IMP  2.4.0
The Integrative Modeling Platform
align.py
1 #!/usr/bin/env python
2 
3 from __future__ import print_function, division
4 import sys
5 import IMP.multifit
6 from IMP import OptionParser
7 
8 __doc__ = "Align proteomics graph with the EM map."
9 
10 # analyse the ensemble, first we will do the rmsd stuff
11 
12 class progressBar:
13 
14  def __init__(self, minValue=0, maxValue=10, totalWidth=12):
15  self.progBar = "[]" # This holds the progress bar string
16  self.min = minValue
17  self.max = maxValue
18  self.span = maxValue - minValue
19  self.width = totalWidth
20  self.amount = 0 # When amount == max, we are 100% done
21  self.updateAmount(0) # Build progress bar string
22 
23  def updateAmount(self, newAmount=0):
24  if newAmount < self.min:
25  newAmount = self.min
26  if newAmount > self.max:
27  newAmount = self.max
28  self.amount = newAmount
29 
30  # Figure out the new percent done, round to an integer
31  diffFromMin = float(self.amount - self.min)
32  percentDone = (diffFromMin / float(self.span)) * 100.0
33  percentDone = round(percentDone)
34  percentDone = int(percentDone)
35 
36  # Figure out how many hash bars the percentage should be
37  allFull = self.width - 2
38  numHashes = (percentDone / 100.0) * allFull
39  numHashes = int(round(numHashes))
40 
41  # build a progress bar with hashes and spaces
42  self.progBar = "[" + '#' * numHashes + \
43  ' ' * (allFull - numHashes) + "]"
44 
45  # figure out where to put the percentage, roughly centered
46  percentPlace = (len(self.progBar) // 2) - len(str(percentDone))
47  percentString = str(percentDone) + "%"
48 
49  # slice the percentage into the bar
50  self.progBar = self.progBar[
51  0:percentPlace] + percentString + self.progBar[
52  percentPlace + len(
53  percentString):]
54 
55  def __str__(self):
56  return str(self.progBar)
57 
58 
59 def parse_args():
60  usage = """%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
61  <alignment.params> <combinations[output]>
62  <Fitting scores[output]>
63 
64 Align proteomics graph with the EM map.
65 """
66  parser = OptionParser(usage)
67  parser.add_option("-m", "--max", type="int", dest="max", default=999999999,
68  help="maximum number of fits considered")
69 
70  options, args = parser.parse_args()
71  if len(args) != 6:
72  parser.error("incorrect number of arguments")
73  return options, args
74 
75 
76 def report_solutions(asmb, mdl, mhs, dmap, mapping_data, combs,
77  combs_fn_output_fn, scored_comb_output_fn, max_comb):
78  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
79  # report scores
80  for i, mh in enumerate(mhs):
81  fn = asmb.get_component_header(i).get_transformations_fn()
82  ensmb.add_component_and_fits(mh,
84  all_leaves = []
85  for mh in mhs:
86  mh_res = IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE)
87  s1 = IMP.atom.Selection(mh_res)
88  s1.set_atom_types([IMP.atom.AtomType("CA")])
89  all_leaves = all_leaves + s1.get_selected_particles()
90  print("number of leaves:", len(all_leaves))
91  print("Get number of restraints:", len(mdl.get_restraints()))
92  pb = progressBar(0, len(combs))
93  fitr = IMP.em.FitRestraint(all_leaves, dmap)
94  mdl.add_restraint(fitr)
95  ranked_combs = []
96  sorted_combs = []
97  print("going to calculate fits for:", len(combs))
98  for i, comb in enumerate(combs):
99  if i % 100 == 0:
100  print("i:", i)
101  ensmb.load_combination(comb)
102  ranked_combs.append([comb, fitr.evaluate(False)])
103  ensmb.unload_combination(comb)
104  pb.updateAmount(i)
105  # print pb
106  print("end fitting")
107  # Sort by score
108  ranked_combs.sort(key=lambda a: a[1])
109  # Remove excess combinations
110  print("ranked combs:", len(ranked_combs))
111  ranked_combs[max_comb:] = []
112  print("ranked combs:", len(ranked_combs))
113  for comb in ranked_combs:
114  sorted_combs.append(comb[0])
115  IMP.multifit.write_paths(sorted_combs, combs_fn_output_fn)
116  output = open(scored_comb_output_fn, "w")
117  for comb, score in ranked_combs:
118  output.write("|")
119  for ind in comb:
120  output.write(str(ind) + " ")
121  output.write("|" + str(1. - score) + "|\n")
122  output.close()
123 
124 
125 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn,
126  combs_fn_output_fn, scored_comb_output_fn, max_comb):
127  asmb = IMP.multifit.read_settings(asmb_fn)
128  asmb.set_was_used(True)
129  dmap = IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
130  dmap.get_header().set_resolution(
131  asmb.get_assembly_header().get_resolution())
132  threshold = asmb.get_assembly_header().get_threshold()
133  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
134  dmap.set_origin(asmb.get_assembly_header().get_origin())
135  # get rmsd for subunits
136  print(params_fn)
137  alignment_params = IMP.multifit.AlignmentParams(params_fn)
138  alignment_params.show()
139  IMP.base.set_log_level(IMP.WARNING)
140  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
141  print("=========3")
142  mapping_data = IMP.multifit.read_protein_anchors_mapping(prot_data,
143  mapping_fn)
144  print("=========4")
145  em_anchors = mapping_data.get_anchors()
146  print("=========5")
147  # load all proteomics restraints
148  align = IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data, asmb,
149  alignment_params)
150  print("align")
151  align.set_fast_scoring(False)
152  align.set_density_map(dmap, threshold)
153  align.add_states_and_filters()
154  align.add_all_restraints()
155 
156  print("before align")
157  align.align()
158  print("after align")
159  combs = align.get_combinations()
160  # print "after get combinations"
161  if len(combs) == 0:
162  f = open(combs_fn_output_fn, "w")
163  f.write("NO SOLUTIONS FOUND\n")
164  f.close()
165  sys.exit(0)
166 
167  report_solutions(asmb, align.get_model(), align.get_molecules(), dmap,
168  mapping_data, combs, combs_fn_output_fn,
169  scored_comb_output_fn, max_comb)
170 
171 
172 def main():
173  options, args = parse_args()
174  run(args[0], args[1], args[2], args[3], args[4], args[5],
175  options.max)
176 if __name__ == "__main__":
177  main()
An ensemble of fitting solutions.
void set_log_level(LogLevel l)
Set the current global log level.
double get_resolution(kernel::Model *m, kernel::ParticleIndex pi)
void write_paths(const IntsList &paths, const std::string &txt_filename)
The type of an atom.
SettingsData * read_settings(const char *filename)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
Fitting atomic structures into a cryo-electron microscopy density map.
DensityMap * read_map(std::string filename)
Read a density map from a file and return it.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::kernel::OptionParser OptionParser
Calculate score based on fit to EM map.
Definition: FitRestraint.h:32
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62