IMP logo
IMP Reference Guide  2.21.0
The Integrative Modeling Platform
align.py
1 #!/usr/bin/env python
2 
3 from __future__ import print_function, division
4 import sys
5 import IMP.multifit
6 from IMP import ArgumentParser
7 
8 __doc__ = "Align proteomics graph with the EM map."
9 
10 # analyse the ensemble, first we will do the rmsd stuff
11 
12 
13 class progressBar:
14 
15  def __init__(self, minValue=0, maxValue=10, totalWidth=12):
16  self.progBar = "[]" # This holds the progress bar string
17  self.min = minValue
18  self.max = maxValue
19  self.span = maxValue - minValue
20  self.width = totalWidth
21  self.amount = 0 # When amount == max, we are 100% done
22  self.updateAmount(0) # Build progress bar string
23 
24  def updateAmount(self, newAmount=0):
25  if newAmount < self.min:
26  newAmount = self.min
27  if newAmount > self.max:
28  newAmount = self.max
29  self.amount = newAmount
30 
31  # Figure out the new percent done, round to an integer
32  diffFromMin = float(self.amount - self.min)
33  percentDone = (diffFromMin / float(self.span)) * 100.0
34  percentDone = round(percentDone)
35  percentDone = int(percentDone)
36 
37  # Figure out how many hash bars the percentage should be
38  allFull = self.width - 2
39  numHashes = (percentDone / 100.0) * allFull
40  numHashes = int(round(numHashes))
41 
42  # build a progress bar with hashes and spaces
43  self.progBar = "[" + '#' * numHashes + \
44  ' ' * (allFull - numHashes) + "]"
45 
46  # figure out where to put the percentage, roughly centered
47  percentPlace = (len(self.progBar) // 2) - len(str(percentDone))
48  percentString = str(percentDone) + "%"
49 
50  # slice the percentage into the bar
51  self.progBar = self.progBar[
52  0:percentPlace] + percentString + self.progBar[
53  percentPlace + len(
54  percentString):]
55 
56  def __str__(self):
57  return str(self.progBar)
58 
59 
60 def parse_args():
61  desc = """Align proteomics graph with the EM map."""
62  p = ArgumentParser(description=desc)
63  p.add_argument("-m", "--max", type=int, dest="max", default=999999999,
64  help="maximum number of fits considered")
65  p.add_argument("assembly_file", help="assembly file name")
66  p.add_argument("proteomics_file", help="proteomics file name")
67  p.add_argument("mapping_file", help="mapping file name")
68  p.add_argument("param_file", help="parameter file name")
69  p.add_argument("combinations_file", help="combinations file name (output)")
70  p.add_argument("scores_file", help="fitting scores file name (output)")
71 
72  return p.parse_args()
73 
74 
75 def report_solutions(asmb, mdl, mhs, restraint_set, dmap, mapping_data, combs,
76  combs_fn_output_fn, scored_comb_output_fn, max_comb):
77  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
78  # report scores
79  for i, mh in enumerate(mhs):
80  fn = asmb.get_component_header(i).get_transformations_fn()
81  ensmb.add_component_and_fits(mh,
83  all_leaves = []
84  for mh in mhs:
85  mh_res = IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE)
86  s1 = IMP.atom.Selection(mh_res)
87  s1.set_atom_types([IMP.atom.AtomType("CA")])
88  all_leaves = all_leaves + s1.get_selected_particles()
89  print("number of leaves:", len(all_leaves))
90  print("Get number of restraints:", len(restraint_set.get_restraints()))
91  pb = progressBar(0, len(combs))
92  fitr = IMP.em.FitRestraint(all_leaves, dmap)
93  ranked_combs = []
94  sorted_combs = []
95  print("going to calculate fits for:", len(combs))
96  for i, comb in enumerate(combs):
97  if i % 100 == 0:
98  print("i:", i)
99  ensmb.load_combination(comb)
100  ranked_combs.append([comb, fitr.evaluate(False)])
101  ensmb.unload_combination(comb)
102  pb.updateAmount(i)
103  # print pb
104  print("end fitting")
105  # Sort by score
106  ranked_combs.sort(key=lambda a: a[1])
107  # Remove excess combinations
108  print("ranked combs:", len(ranked_combs))
109  ranked_combs[max_comb:] = []
110  print("ranked combs:", len(ranked_combs))
111  for comb in ranked_combs:
112  sorted_combs.append(comb[0])
113  IMP.multifit.write_paths(sorted_combs, combs_fn_output_fn)
114  output = open(scored_comb_output_fn, "w")
115  for comb, score in ranked_combs:
116  output.write("|")
117  for ind in comb:
118  output.write(str(ind) + " ")
119  output.write("|" + str(1. - score) + "|\n")
120  output.close()
121 
122 
123 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn,
124  combs_fn_output_fn, scored_comb_output_fn, max_comb):
125  asmb = IMP.multifit.read_settings(asmb_fn)
126  asmb.set_was_used(True)
127  dmap = IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
128  dmap.get_header().set_resolution(
129  asmb.get_assembly_header().get_resolution())
130  threshold = asmb.get_assembly_header().get_threshold()
131  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
132  dmap.set_origin(asmb.get_assembly_header().get_origin())
133  # get rmsd for subunits
134  print(params_fn)
135  alignment_params = IMP.multifit.AlignmentParams(params_fn)
136  alignment_params.show()
137  IMP.set_log_level(IMP.WARNING)
138  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
139  print("=========3")
140  mapping_data = IMP.multifit.read_protein_anchors_mapping(prot_data,
141  mapping_fn)
142  print("=========4")
143  _ = mapping_data.get_anchors()
144  print("=========5")
145  # load all proteomics restraints
146  align = IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data, asmb,
147  alignment_params)
148  print("align")
149  align.set_fast_scoring(False)
150  align.set_density_map(dmap, threshold)
151  align.add_states_and_filters()
152  align.add_all_restraints()
153 
154  print("before align")
155  align.align()
156  print("after align")
157  combs = align.get_combinations()
158  # print "after get combinations"
159  if len(combs) == 0:
160  f = open(combs_fn_output_fn, "w")
161  f.write("NO SOLUTIONS FOUND\n")
162  f.close()
163  sys.exit(0)
164 
165  report_solutions(asmb, align.get_model(), align.get_molecules(),
166  align.get_restraint_set(), dmap,
167  mapping_data, combs, combs_fn_output_fn,
168  scored_comb_output_fn, max_comb)
169 
170 
171 def main():
172  args = parse_args()
173  run(args.assembly_file, args.proteomics_file, args.mapping_file,
174  args.param_file, args.combinations_file, args.scores_file, args.max)
175 
176 
177 if __name__ == "__main__":
178  main()
An ensemble of fitting solutions.
def main
Run a set of tests; similar to unittest.main().
void write_paths(const IntsList &paths, const std::string &txt_filename)
The type of an atom.
SettingsData * read_settings(const char *filename)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
Fitting atomic structures into a cryo-electron microscopy density map.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
void set_log_level(LogLevel l)
Set the current global log level.
Calculate score based on fit to EM map.
Definition: FitRestraint.h:39
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
double get_resolution(Model *m, ParticleIndex pi)
Estimate the resolution of the hierarchy as used by Representation.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70