IMP  2.3.0
The Integrative Modeling Platform
align.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Align proteomics graph with the EM map."
4 
5 # analyse the ensemble, first we will do the rmsd stuff
6 import sys
7 import IMP.multifit
8 from IMP import OptionParser
9 
10 
11 class progressBar:
12 
13  def __init__(self, minValue=0, maxValue=10, totalWidth=12):
14  self.progBar = "[]" # This holds the progress bar string
15  self.min = minValue
16  self.max = maxValue
17  self.span = maxValue - minValue
18  self.width = totalWidth
19  self.amount = 0 # When amount == max, we are 100% done
20  self.updateAmount(0) # Build progress bar string
21 
22  def updateAmount(self, newAmount=0):
23  if newAmount < self.min:
24  newAmount = self.min
25  if newAmount > self.max:
26  newAmount = self.max
27  self.amount = newAmount
28 
29  # Figure out the new percent done, round to an integer
30  diffFromMin = float(self.amount - self.min)
31  percentDone = (diffFromMin / float(self.span)) * 100.0
32  percentDone = round(percentDone)
33  percentDone = int(percentDone)
34 
35  # Figure out how many hash bars the percentage should be
36  allFull = self.width - 2
37  numHashes = (percentDone / 100.0) * allFull
38  numHashes = int(round(numHashes))
39 
40  # build a progress bar with hashes and spaces
41  self.progBar = "[" + '#' * numHashes + \
42  ' ' * (allFull - numHashes) + "]"
43 
44  # figure out where to put the percentage, roughly centered
45  percentPlace = (len(self.progBar) / 2) - len(str(percentDone))
46  percentString = str(percentDone) + "%"
47 
48  # slice the percentage into the bar
49  self.progBar = self.progBar[
50  0:percentPlace] + percentString + self.progBar[
51  percentPlace + len(
52  percentString):]
53 
54  def __str__(self):
55  return str(self.progBar)
56 
57 
58 def parse_args():
59  usage = """%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
60  <alignment.params> <combinations[output]>
61  <Fitting scores[output]>
62 
63 Align proteomics graph with the EM map.
64 """
65  parser = OptionParser(usage)
66  parser.add_option("-m", "--max", type="int", dest="max", default=999999999,
67  help="maximum number of fits considered")
68 
69  options, args = parser.parse_args()
70  if len(args) != 6:
71  parser.error("incorrect number of arguments")
72  return options, args
73 
74 
75 def report_solutions(asmb, mdl, mhs, dmap, mapping_data, combs,
76  combs_fn_output_fn, scored_comb_output_fn, max_comb):
77  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
78  # report scores
79  for i, mh in enumerate(mhs):
80  fn = asmb.get_component_header(i).get_transformations_fn()
81  ensmb.add_component_and_fits(mh,
83  all_leaves = []
84  for mh in mhs:
85  mh_res = IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE)
86  s1 = IMP.atom.Selection(mh_res)
87  s1.set_atom_types([IMP.atom.AtomType("CA")])
88  all_leaves = all_leaves + s1.get_selected_particles()
89  print "number of leaves:", len(all_leaves)
90  print "Get number of restraints:", len(mdl.get_restraints())
91  pb = progressBar(0, len(combs))
92  fitr = IMP.em.FitRestraint(all_leaves, dmap)
93  mdl.add_restraint(fitr)
94  ranked_combs = []
95  sorted_combs = []
96  print "going to calculate fits for:", len(combs)
97  for i, comb in enumerate(combs):
98  if i % 100 == 0:
99  print "i:", i
100  ensmb.load_combination(comb)
101  ranked_combs.append([comb, fitr.evaluate(False)])
102  ensmb.unload_combination(comb)
103  pb.updateAmount(i)
104  # print pb
105  print "end fitting"
106  # Sort by score
107  ranked_combs.sort(lambda a, b: cmp(a[1], b[1]))
108  # Remove excess combinations
109  print "ranked combs:", len(ranked_combs)
110  ranked_combs[max_comb:] = []
111  print "ranked combs:", len(ranked_combs)
112  for comb in ranked_combs:
113  sorted_combs.append(comb[0])
114  IMP.multifit.write_paths(sorted_combs, combs_fn_output_fn)
115  output = open(scored_comb_output_fn, "w")
116  for comb, score in ranked_combs:
117  output.write("|")
118  for ind in comb:
119  output.write(str(ind) + " ")
120  output.write("|" + str(1. - score) + "|\n")
121  output.close()
122 
123 
124 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn,
125  combs_fn_output_fn, scored_comb_output_fn, max_comb):
126  asmb = IMP.multifit.read_settings(asmb_fn)
127  asmb.set_was_used(True)
128  dmap = IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
129  dmap.get_header().set_resolution(
130  asmb.get_assembly_header().get_resolution())
131  threshold = asmb.get_assembly_header().get_threshold()
132  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
133  dmap.set_origin(asmb.get_assembly_header().get_origin())
134  # get rmsd for subunits
135  print params_fn
136  alignment_params = IMP.multifit.AlignmentParams(params_fn)
137  alignment_params.show()
138  IMP.base.set_log_level(IMP.WARNING)
139  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
140  print "=========3"
141  mapping_data = IMP.multifit.read_protein_anchors_mapping(prot_data,
142  mapping_fn)
143  print "=========4"
144  em_anchors = mapping_data.get_anchors()
145  print "=========5"
146  # load all proteomics restraints
147  align = IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data, asmb,
148  alignment_params)
149  print "align"
150  align.set_fast_scoring(False)
151  align.set_density_map(dmap, threshold)
152  align.add_states_and_filters()
153  align.add_all_restraints()
154 
155  print "before align"
156  align.align()
157  print "after align"
158  combs = align.get_combinations()
159  # print "after get combinations"
160  if len(combs) == 0:
161  f = open(combs_fn_output_fn, "w")
162  f.write("NO SOLUTIONS FOUND\n")
163  f.close()
164  sys.exit(0)
165 
166  report_solutions(asmb, align.get_model(), align.get_molecules(), dmap,
167  mapping_data, combs, combs_fn_output_fn,
168  scored_comb_output_fn, max_comb)
169 
170 
171 def main():
172  options, args = parse_args()
173  run(args[0], args[1], args[2], args[3], args[4], args[5],
174  options.max)
175 if __name__ == "__main__":
176  main()
An ensemble of fitting solutions.
void set_log_level(LogLevel l)
Set the current global log level.
double get_resolution(kernel::Model *m, kernel::ParticleIndex pi)
void write_paths(const IntsList &paths, const std::string &txt_filename)
The type of an atom.
SettingsData * read_settings(const char *filename)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
Fitting atomic structures into a cryo-electron microscopy density map.
DensityMap * read_map(std::string filename)
Read a density map from a file and return it.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::kernel::OptionParser OptionParser
Calculate score based on fit to EM map.
Definition: FitRestraint.h:32
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62