IMP  2.2.0
The Integrative Modeling Platform
proteomics.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Generate proteomics info from anchor graph and fits."
4 
5 # read the anchors
6 # read the top fit for each protein, and assign the anchors
7 # add EV accordinly
8 
9 import IMP.multifit
10 from IMP import OptionParser
11 
12 
13 def parse_args():
14  usage = """%prog [options] <asmb.input> <anchors.txt>
15  <output:proteomics>
16 
17 Generate a proteomics file automatically from the anchor graph and fitting
18 results. No interaction data is entered here, but the file can be modified
19 manually afterwards to add additional proteomics information.
20 """
21  parser = OptionParser(usage)
22  options, args = parser.parse_args()
23  if len(args) != 3:
24  parser.error("incorrect number of arguments")
25  return args
26 
27 
28 def run(asmb_fn, anchors_fn, proteomics_fn):
29  asmb = IMP.multifit.read_settings(asmb_fn)
30  asmb.set_was_used(True)
31  ad = IMP.multifit.read_anchors_data(anchors_fn)
32 
33  # read molecules
34  mdl = IMP.kernel.Model()
35  mhs = []
36  centroids = []
37  for i in range(asmb.get_number_of_component_headers()):
38  fn = asmb.get_component_header(i).get_filename()
39  mhs.append(IMP.atom.read_pdb(fn, mdl))
40  centroids.append(IMP.core.get_centroid(IMP.core.get_leaves(mhs[i])))
41  # matched anchors
42  match = []
43  for pt in ad.points_:
44  min_len = 999999
45  min_ind = 0
46  for j in range(len(mhs)):
47  dist = IMP.algebra.get_squared_distance(pt, centroids[j])
48  if dist < min_len:
49  min_len = dist
50  min_ind = j
51  match.append(min_ind)
52  # now add all the EV
53  ev_pairs = []
54  for ind1, ind2 in ad.edges_:
55  ev_pairs.append([match[ind1], match[ind2]])
56  outf = open(proteomics_fn, "w")
57  outf.write("|proteins|\n")
58  for i, mh in enumerate(mhs):
59  numres = len(IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE))
60  outf.write("|%s|1|%d|nn|nn|\n"
61  % (asmb.get_component_header(i).get_name(), numres))
62  outf.write("|interactions|\n")
63  outf.write("|residue-xlink|\n")
64  outf.write("|ev-pairs|\n")
65  pairs_map = {}
66  for evp in ev_pairs:
67  if evp[0] != evp[1]:
68  sortpair = (min(*evp), max(*evp))
69  if sortpair not in pairs_map:
70  name0 = asmb.get_component_header(evp[0]).get_name()
71  name1 = asmb.get_component_header(evp[1]).get_name()
72  outf.write("|%s|%s|\n" % (name0, name1))
73  pairs_map[sortpair] = 1
74  outf.close()
75 
76 
77 def main():
78  asmb_fn, anchors_fn, proteomics_fn = parse_args()
79  run(asmb_fn, anchors_fn, proteomics_fn)
80 
81 if __name__ == "__main__":
82  main()
double get_squared_distance(const VectorD< D > &v1, const VectorD< D > &v2)
compute the squared distance between two vectors
Definition: VectorD.h:201
algebra::Vector3D get_centroid(const XYZs &ps)
Get the centroid.
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
See IMP.multifit for more information.
IMP::kernel::OptionParser OptionParser
AnchorsData read_anchors_data(const char *txt_filename)
void read_pdb(base::TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:72