IMP  2.0.0
The Integrative Modeling Platform
proteomics.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Generate proteomics info from anchor graph and fits."
4 
5 #read the anchors
6 #read the top fit for each protein, and assign the anchors
7 #add EV accordinly
8 
9 import IMP.multifit
10 from optparse import OptionParser
11 
12 def parse_args():
13  usage = """%prog [options] <asmb.input> <anchors.txt>
14  <output:proteomics>
15 
16 Generate a proteomics file automatically from the anchor graph and fitting
17 results. No interaction data is entered here, but the file can be modified
18 manually afterwards to add additional proteomics information.
19 """
20  parser = OptionParser(usage)
21  options, args = parser.parse_args()
22  if len(args) != 3:
23  parser.error("incorrect number of arguments")
24  return args
25 
26 def run(asmb_fn,anchors_fn,proteomics_fn):
27  asmb=IMP.multifit.read_settings(asmb_fn)
28  asmb.set_was_used(True)
29  ad=IMP.multifit.read_anchors_data(anchors_fn)
30 
31  #read molecules
32  mdl=IMP.Model()
33  mhs=[]
34  centroids=[]
35  for i in range(asmb.get_number_of_component_headers()):
36  fn = asmb.get_component_header(i).get_filename()
37  mhs.append(IMP.atom.read_pdb(fn, mdl))
38  centroids.append(IMP.core.get_centroid(IMP.core.get_leaves(mhs[i])))
39  #matched anchors
40  match=[]
41  for pt in ad.points_:
42  min_len=999999
43  min_ind=0
44  for j in range(len(mhs)):
45  dist = IMP.algebra.get_squared_distance(pt,centroids[j])
46  if dist < min_len:
47  min_len = dist
48  min_ind = j
49  match.append(min_ind)
50  #now add all the EV
51  ev_pairs=[]
52  for ind1,ind2 in ad.edges_:
53  ev_pairs.append([match[ind1],match[ind2]])
54  outf=open(proteomics_fn,"w")
55  outf.write("|proteins|\n")
56  for i,mh in enumerate(mhs):
57  numres = len(IMP.atom.get_by_type(mh,IMP.atom.RESIDUE_TYPE))
58  outf.write("|%s|1|%d|nn|nn|\n" \
59  % (asmb.get_component_header(i).get_name(), numres))
60  outf.write("|interactions|\n")
61  outf.write("|residue-xlink|\n")
62  outf.write("|ev-pairs|\n")
63  pairs_map={}
64  for evp in ev_pairs:
65  if evp[0] != evp[1]:
66  sortpair = (min(*evp), max(*evp))
67  if sortpair not in pairs_map:
68  name0 = asmb.get_component_header(evp[0]).get_name()
69  name1 = asmb.get_component_header(evp[1]).get_name()
70  outf.write("|%s|%s|\n" % (name0, name1))
71  pairs_map[sortpair]=1
72  outf.close()
73 
74 def main():
75  asmb_fn, anchors_fn, proteomics_fn = parse_args()
76  run(asmb_fn, anchors_fn, proteomics_fn)
77 
78 if __name__=="__main__":
79  main()