IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
align.py
1 #!/usr/bin/env python
2 
3 from __future__ import print_function, division
4 import sys
5 import IMP.multifit
6 from IMP import ArgumentParser
7 
8 __doc__ = "Align proteomics graph with the EM map."
9 
10 # analyse the ensemble, first we will do the rmsd stuff
11 
12 class progressBar:
13 
14  def __init__(self, minValue=0, maxValue=10, totalWidth=12):
15  self.progBar = "[]" # This holds the progress bar string
16  self.min = minValue
17  self.max = maxValue
18  self.span = maxValue - minValue
19  self.width = totalWidth
20  self.amount = 0 # When amount == max, we are 100% done
21  self.updateAmount(0) # Build progress bar string
22 
23  def updateAmount(self, newAmount=0):
24  if newAmount < self.min:
25  newAmount = self.min
26  if newAmount > self.max:
27  newAmount = self.max
28  self.amount = newAmount
29 
30  # Figure out the new percent done, round to an integer
31  diffFromMin = float(self.amount - self.min)
32  percentDone = (diffFromMin / float(self.span)) * 100.0
33  percentDone = round(percentDone)
34  percentDone = int(percentDone)
35 
36  # Figure out how many hash bars the percentage should be
37  allFull = self.width - 2
38  numHashes = (percentDone / 100.0) * allFull
39  numHashes = int(round(numHashes))
40 
41  # build a progress bar with hashes and spaces
42  self.progBar = "[" + '#' * numHashes + \
43  ' ' * (allFull - numHashes) + "]"
44 
45  # figure out where to put the percentage, roughly centered
46  percentPlace = (len(self.progBar) // 2) - len(str(percentDone))
47  percentString = str(percentDone) + "%"
48 
49  # slice the percentage into the bar
50  self.progBar = self.progBar[
51  0:percentPlace] + percentString + self.progBar[
52  percentPlace + len(
53  percentString):]
54 
55  def __str__(self):
56  return str(self.progBar)
57 
58 
59 def parse_args():
60  desc = """Align proteomics graph with the EM map."""
61  p = ArgumentParser(description=desc)
62  p.add_argument("-m", "--max", type=int, dest="max", default=999999999,
63  help="maximum number of fits considered")
64  p.add_argument("assembly_file", help="assembly file name")
65  p.add_argument("proteomics_file", help="proteomics file name")
66  p.add_argument("mapping_file", help="mapping file name")
67  p.add_argument("param_file", help="parameter file name")
68  p.add_argument("combinations_file", help="combinations file name (output)")
69  p.add_argument("scores_file", help="fitting scores file name (output)")
70 
71  return p.parse_args()
72 
73 
74 def report_solutions(asmb, mdl, mhs, restraint_set, dmap, mapping_data, combs,
75  combs_fn_output_fn, scored_comb_output_fn, max_comb):
76  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
77  # report scores
78  for i, mh in enumerate(mhs):
79  fn = asmb.get_component_header(i).get_transformations_fn()
80  ensmb.add_component_and_fits(mh,
82  all_leaves = []
83  for mh in mhs:
84  mh_res = IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE)
85  s1 = IMP.atom.Selection(mh_res)
86  s1.set_atom_types([IMP.atom.AtomType("CA")])
87  all_leaves = all_leaves + s1.get_selected_particles()
88  print("number of leaves:", len(all_leaves))
89  print("Get number of restraints:", len(restraint_set.get_restraints()))
90  pb = progressBar(0, len(combs))
91  fitr = IMP.em.FitRestraint(all_leaves, dmap)
92  ranked_combs = []
93  sorted_combs = []
94  print("going to calculate fits for:", len(combs))
95  for i, comb in enumerate(combs):
96  if i % 100 == 0:
97  print("i:", i)
98  ensmb.load_combination(comb)
99  ranked_combs.append([comb, fitr.evaluate(False)])
100  ensmb.unload_combination(comb)
101  pb.updateAmount(i)
102  # print pb
103  print("end fitting")
104  # Sort by score
105  ranked_combs.sort(key=lambda a: a[1])
106  # Remove excess combinations
107  print("ranked combs:", len(ranked_combs))
108  ranked_combs[max_comb:] = []
109  print("ranked combs:", len(ranked_combs))
110  for comb in ranked_combs:
111  sorted_combs.append(comb[0])
112  IMP.multifit.write_paths(sorted_combs, combs_fn_output_fn)
113  output = open(scored_comb_output_fn, "w")
114  for comb, score in ranked_combs:
115  output.write("|")
116  for ind in comb:
117  output.write(str(ind) + " ")
118  output.write("|" + str(1. - score) + "|\n")
119  output.close()
120 
121 
122 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn,
123  combs_fn_output_fn, scored_comb_output_fn, max_comb):
124  asmb = IMP.multifit.read_settings(asmb_fn)
125  asmb.set_was_used(True)
126  dmap = IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
127  dmap.get_header().set_resolution(
128  asmb.get_assembly_header().get_resolution())
129  threshold = asmb.get_assembly_header().get_threshold()
130  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
131  dmap.set_origin(asmb.get_assembly_header().get_origin())
132  # get rmsd for subunits
133  print(params_fn)
134  alignment_params = IMP.multifit.AlignmentParams(params_fn)
135  alignment_params.show()
136  IMP.set_log_level(IMP.WARNING)
137  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
138  print("=========3")
139  mapping_data = IMP.multifit.read_protein_anchors_mapping(prot_data,
140  mapping_fn)
141  print("=========4")
142  em_anchors = mapping_data.get_anchors()
143  print("=========5")
144  # load all proteomics restraints
145  align = IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data, asmb,
146  alignment_params)
147  print("align")
148  align.set_fast_scoring(False)
149  align.set_density_map(dmap, threshold)
150  align.add_states_and_filters()
151  align.add_all_restraints()
152 
153  print("before align")
154  align.align()
155  print("after align")
156  combs = align.get_combinations()
157  # print "after get combinations"
158  if len(combs) == 0:
159  f = open(combs_fn_output_fn, "w")
160  f.write("NO SOLUTIONS FOUND\n")
161  f.close()
162  sys.exit(0)
163 
164  report_solutions(asmb, align.get_model(), align.get_molecules(),
165  align.get_restraint_set(), dmap,
166  mapping_data, combs, combs_fn_output_fn,
167  scored_comb_output_fn, max_comb)
168 
169 
170 def main():
171  args = parse_args()
172  run(args.assembly_file, args.proteomics_file, args.mapping_file,
173  args.param_file, args.combinations_file, args.scores_file, args.max)
174 
175 if __name__ == "__main__":
176  main()
An ensemble of fitting solutions.
def main
Run a set of tests; similar to unittest.main().
void write_paths(const IntsList &paths, const std::string &txt_filename)
The type of an atom.
SettingsData * read_settings(const char *filename)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
Fitting atomic structures into a cryo-electron microscopy density map.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
void set_log_level(LogLevel l)
Set the current global log level.
Calculate score based on fit to EM map.
Definition: FitRestraint.h:39
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
double get_resolution(Model *m, ParticleIndex pi)
Estimate the resolution of the hierarchy as used by Representation.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70