IMP logo
IMP Reference Guide  develop.7400db2aee,2024/11/24
The Integrative Modeling Platform
multifit/proteomics.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Generate proteomics info from anchor graph and fits."
4 
5 # read the anchors
6 # read the top fit for each protein, and assign the anchors
7 # add EV accordingly
8 
9 import IMP.multifit
10 from IMP import ArgumentParser
11 
12 
13 def parse_args():
14  desc = """
15 Generate a proteomics file automatically from the anchor graph and fitting
16 results. No interaction data is entered here, but the file can be modified
17 manually afterwards to add additional proteomics information.
18 """
19  p = ArgumentParser(description=desc)
20  p.add_argument("assembly_file", help="assembly file name")
21  p.add_argument("anchors_file", help="anchors file name")
22  p.add_argument("proteomics_file", help="output proteomics file name")
23  return p.parse_args()
24 
25 
26 def run(asmb_fn, anchors_fn, proteomics_fn):
27  asmb = IMP.multifit.read_settings(asmb_fn)
28  asmb.set_was_used(True)
29  ad = IMP.multifit.read_anchors_data(anchors_fn)
30 
31  # read molecules
32  mdl = IMP.Model()
33  mhs = []
34  centroids = []
35  for i in range(asmb.get_number_of_component_headers()):
36  fn = asmb.get_component_header(i).get_filename()
37  mhs.append(IMP.atom.read_pdb(fn, mdl))
38  centroids.append(IMP.core.get_centroid(IMP.core.get_leaves(mhs[i])))
39  # matched anchors
40  match = []
41  for pt in ad.points_:
42  min_len = 999999
43  min_ind = 0
44  for j in range(len(mhs)):
45  dist = IMP.algebra.get_squared_distance(pt, centroids[j])
46  if dist < min_len:
47  min_len = dist
48  min_ind = j
49  match.append(min_ind)
50  # now add all the EV
51  ev_pairs = []
52  for ind1, ind2 in ad.edges_:
53  ev_pairs.append([match[ind1], match[ind2]])
54  outf = open(proteomics_fn, "w")
55  outf.write("|proteins|\n")
56  for i, mh in enumerate(mhs):
57  numres = len(IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE))
58  outf.write("|%s|1|%d|nn|nn|\n"
59  % (asmb.get_component_header(i).get_name(), numres))
60  outf.write("|interactions|\n")
61  outf.write("|residue-xlink|\n")
62  outf.write("|ev-pairs|\n")
63  pairs_map = {}
64  for evp in ev_pairs:
65  if evp[0] != evp[1]:
66  sortpair = (min(*evp), max(*evp))
67  if sortpair not in pairs_map:
68  name0 = asmb.get_component_header(evp[0]).get_name()
69  name1 = asmb.get_component_header(evp[1]).get_name()
70  outf.write("|%s|%s|\n" % (name0, name1))
71  pairs_map[sortpair] = 1
72  outf.close()
73 
74 
75 def main():
76  args = parse_args()
77  run(args.assembly_file, args.anchors_file, args.proteomics_file)
78 
79 
80 if __name__ == "__main__":
81  main()
double get_squared_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the squared distance between two vectors.
Definition: VectorD.h:188
algebra::Vector3D get_centroid(const XYZs &ps)
Get the centroid.
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Fitting atomic structures into a cryo-electron microscopy density map.
AnchorsData read_anchors_data(const char *txt_filename)