IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
align.py
1 #!/usr/bin/env python
2 
3 from __future__ import print_function, division
4 import sys
5 import IMP.multifit
6 from IMP import OptionParser
7 
8 __doc__ = "Align proteomics graph with the EM map."
9 
10 # analyse the ensemble, first we will do the rmsd stuff
11 
12 class progressBar:
13 
14  def __init__(self, minValue=0, maxValue=10, totalWidth=12):
15  self.progBar = "[]" # This holds the progress bar string
16  self.min = minValue
17  self.max = maxValue
18  self.span = maxValue - minValue
19  self.width = totalWidth
20  self.amount = 0 # When amount == max, we are 100% done
21  self.updateAmount(0) # Build progress bar string
22 
23  def updateAmount(self, newAmount=0):
24  if newAmount < self.min:
25  newAmount = self.min
26  if newAmount > self.max:
27  newAmount = self.max
28  self.amount = newAmount
29 
30  # Figure out the new percent done, round to an integer
31  diffFromMin = float(self.amount - self.min)
32  percentDone = (diffFromMin / float(self.span)) * 100.0
33  percentDone = round(percentDone)
34  percentDone = int(percentDone)
35 
36  # Figure out how many hash bars the percentage should be
37  allFull = self.width - 2
38  numHashes = (percentDone / 100.0) * allFull
39  numHashes = int(round(numHashes))
40 
41  # build a progress bar with hashes and spaces
42  self.progBar = "[" + '#' * numHashes + \
43  ' ' * (allFull - numHashes) + "]"
44 
45  # figure out where to put the percentage, roughly centered
46  percentPlace = (len(self.progBar) // 2) - len(str(percentDone))
47  percentString = str(percentDone) + "%"
48 
49  # slice the percentage into the bar
50  self.progBar = self.progBar[
51  0:percentPlace] + percentString + self.progBar[
52  percentPlace + len(
53  percentString):]
54 
55  def __str__(self):
56  return str(self.progBar)
57 
58 
59 def parse_args():
60  usage = """%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
61  <alignment.params> <combinations[output]>
62  <Fitting scores[output]>
63 
64 Align proteomics graph with the EM map.
65 """
66  parser = OptionParser(usage)
67  parser.add_option("-m", "--max", type="int", dest="max", default=999999999,
68  help="maximum number of fits considered")
69 
70  options, args = parser.parse_args()
71  if len(args) != 6:
72  parser.error("incorrect number of arguments")
73  return options, args
74 
75 
76 def report_solutions(asmb, mdl, mhs, restraint_set, dmap, mapping_data, combs,
77  combs_fn_output_fn, scored_comb_output_fn, max_comb):
78  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
79  # report scores
80  for i, mh in enumerate(mhs):
81  fn = asmb.get_component_header(i).get_transformations_fn()
82  ensmb.add_component_and_fits(mh,
84  all_leaves = []
85  for mh in mhs:
86  mh_res = IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE)
87  s1 = IMP.atom.Selection(mh_res)
88  s1.set_atom_types([IMP.atom.AtomType("CA")])
89  all_leaves = all_leaves + s1.get_selected_particles()
90  print("number of leaves:", len(all_leaves))
91  print("Get number of restraints:", len(restraint_set.get_restraints()))
92  pb = progressBar(0, len(combs))
93  fitr = IMP.em.FitRestraint(all_leaves, dmap)
94  ranked_combs = []
95  sorted_combs = []
96  print("going to calculate fits for:", len(combs))
97  for i, comb in enumerate(combs):
98  if i % 100 == 0:
99  print("i:", i)
100  ensmb.load_combination(comb)
101  ranked_combs.append([comb, fitr.evaluate(False)])
102  ensmb.unload_combination(comb)
103  pb.updateAmount(i)
104  # print pb
105  print("end fitting")
106  # Sort by score
107  ranked_combs.sort(key=lambda a: a[1])
108  # Remove excess combinations
109  print("ranked combs:", len(ranked_combs))
110  ranked_combs[max_comb:] = []
111  print("ranked combs:", len(ranked_combs))
112  for comb in ranked_combs:
113  sorted_combs.append(comb[0])
114  IMP.multifit.write_paths(sorted_combs, combs_fn_output_fn)
115  output = open(scored_comb_output_fn, "w")
116  for comb, score in ranked_combs:
117  output.write("|")
118  for ind in comb:
119  output.write(str(ind) + " ")
120  output.write("|" + str(1. - score) + "|\n")
121  output.close()
122 
123 
124 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn,
125  combs_fn_output_fn, scored_comb_output_fn, max_comb):
126  asmb = IMP.multifit.read_settings(asmb_fn)
127  asmb.set_was_used(True)
128  dmap = IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
129  dmap.get_header().set_resolution(
130  asmb.get_assembly_header().get_resolution())
131  threshold = asmb.get_assembly_header().get_threshold()
132  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
133  dmap.set_origin(asmb.get_assembly_header().get_origin())
134  # get rmsd for subunits
135  print(params_fn)
136  alignment_params = IMP.multifit.AlignmentParams(params_fn)
137  alignment_params.show()
138  IMP.set_log_level(IMP.WARNING)
139  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
140  print("=========3")
141  mapping_data = IMP.multifit.read_protein_anchors_mapping(prot_data,
142  mapping_fn)
143  print("=========4")
144  em_anchors = mapping_data.get_anchors()
145  print("=========5")
146  # load all proteomics restraints
147  align = IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data, asmb,
148  alignment_params)
149  print("align")
150  align.set_fast_scoring(False)
151  align.set_density_map(dmap, threshold)
152  align.add_states_and_filters()
153  align.add_all_restraints()
154 
155  print("before align")
156  align.align()
157  print("after align")
158  combs = align.get_combinations()
159  # print "after get combinations"
160  if len(combs) == 0:
161  f = open(combs_fn_output_fn, "w")
162  f.write("NO SOLUTIONS FOUND\n")
163  f.close()
164  sys.exit(0)
165 
166  report_solutions(asmb, align.get_model(), align.get_molecules(),
167  align.get_restraint_set(), dmap,
168  mapping_data, combs, combs_fn_output_fn,
169  scored_comb_output_fn, max_comb)
170 
171 
172 def main():
173  options, args = parse_args()
174  run(args[0], args[1], args[2], args[3], args[4], args[5],
175  options.max)
176 if __name__ == "__main__":
177  main()
An ensemble of fitting solutions.
def main
Run a set of tests; similar to unittest.main().
void write_paths(const IntsList &paths, const std::string &txt_filename)
The type of an atom.
SettingsData * read_settings(const char *filename)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
Gather all the molecular particles of a certain level in the hierarchy.
Fitting atomic structures into a cryo-electron microscopy density map.
DensityMap * read_map(std::string filename)
Read a density map from a file and return it.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
void set_log_level(LogLevel l)
Set the current global log level.
Calculate score based on fit to EM map.
Definition: FitRestraint.h:32
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
double get_resolution(Model *m, ParticleIndex pi)
Estimate the resolution of the hierarchy as used by Representation.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:65