IMP  2.1.0
The Integrative Modeling Platform
proteomics.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Generate proteomics info from anchor graph and fits."
4 
5 #read the anchors
6 #read the top fit for each protein, and assign the anchors
7 #add EV accordinly
8 
9 import IMP.multifit
10 from IMP import OptionParser
11 
12 def parse_args():
13  usage = """%prog [options] <asmb.input> <anchors.txt>
14  <output:proteomics>
15 
16 Generate a proteomics file automatically from the anchor graph and fitting
17 results. No interaction data is entered here, but the file can be modified
18 manually afterwards to add additional proteomics information.
19 """
20  parser = OptionParser(usage)
21  options, args = parser.parse_args()
22  if len(args) != 3:
23  parser.error("incorrect number of arguments")
24  return args
25 
26 def run(asmb_fn,anchors_fn,proteomics_fn):
27  asmb=IMP.multifit.read_settings(asmb_fn)
28  asmb.set_was_used(True)
29  ad=IMP.multifit.read_anchors_data(anchors_fn)
30 
31  #read molecules
32  mdl=IMP.kernel.Model()
33  mhs=[]
34  centroids=[]
35  for i in range(asmb.get_number_of_component_headers()):
36  fn = asmb.get_component_header(i).get_filename()
37  mhs.append(IMP.atom.read_pdb(fn, mdl))
38  centroids.append(IMP.core.get_centroid(IMP.core.get_leaves(mhs[i])))
39  #matched anchors
40  match=[]
41  for pt in ad.points_:
42  min_len=999999
43  min_ind=0
44  for j in range(len(mhs)):
45  dist = IMP.algebra.get_squared_distance(pt,centroids[j])
46  if dist < min_len:
47  min_len = dist
48  min_ind = j
49  match.append(min_ind)
50  #now add all the EV
51  ev_pairs=[]
52  for ind1,ind2 in ad.edges_:
53  ev_pairs.append([match[ind1],match[ind2]])
54  outf=open(proteomics_fn,"w")
55  outf.write("|proteins|\n")
56  for i,mh in enumerate(mhs):
57  numres = len(IMP.atom.get_by_type(mh,IMP.atom.RESIDUE_TYPE))
58  outf.write("|%s|1|%d|nn|nn|\n" \
59  % (asmb.get_component_header(i).get_name(), numres))
60  outf.write("|interactions|\n")
61  outf.write("|residue-xlink|\n")
62  outf.write("|ev-pairs|\n")
63  pairs_map={}
64  for evp in ev_pairs:
65  if evp[0] != evp[1]:
66  sortpair = (min(*evp), max(*evp))
67  if sortpair not in pairs_map:
68  name0 = asmb.get_component_header(evp[0]).get_name()
69  name1 = asmb.get_component_header(evp[1]).get_name()
70  outf.write("|%s|%s|\n" % (name0, name1))
71  pairs_map[sortpair]=1
72  outf.close()
73 
74 def main():
75  asmb_fn, anchors_fn, proteomics_fn = parse_args()
76  run(asmb_fn, anchors_fn, proteomics_fn)
77 
78 if __name__=="__main__":
79  main()
double get_squared_distance(const VectorD< D > &v1, const VectorD< D > &v2)
compute the squared distance between two vectors
Definition: VectorD.h:393
algebra::Vector3D get_centroid(const XYZs &ps)
Get the centroid.
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
See IMP.multifit for more information.
IMP::kernel::OptionParser OptionParser
AnchorsData read_anchors_data(const char *txt_filename)
void read_pdb(base::TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.