IMP  2.1.0
The Integrative Modeling Platform
align.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Align proteomics graph with the EM map."
4 
5 #analyse the ensemble, first we will do the rmsd stuff
6 import sys
7 import IMP.multifit
8 from IMP import OptionParser
9 
10 class progressBar:
11  def __init__(self, minValue = 0, maxValue = 10, totalWidth=12):
12  self.progBar = "[]" # This holds the progress bar string
13  self.min = minValue
14  self.max = maxValue
15  self.span = maxValue - minValue
16  self.width = totalWidth
17  self.amount = 0 # When amount == max, we are 100% done
18  self.updateAmount(0) # Build progress bar string
19 
20  def updateAmount(self, newAmount = 0):
21  if newAmount < self.min: newAmount = self.min
22  if newAmount > self.max: newAmount = self.max
23  self.amount = newAmount
24 
25  # Figure out the new percent done, round to an integer
26  diffFromMin = float(self.amount - self.min)
27  percentDone = (diffFromMin / float(self.span)) * 100.0
28  percentDone = round(percentDone)
29  percentDone = int(percentDone)
30 
31  # Figure out how many hash bars the percentage should be
32  allFull = self.width - 2
33  numHashes = (percentDone / 100.0) * allFull
34  numHashes = int(round(numHashes))
35 
36  # build a progress bar with hashes and spaces
37  self.progBar = "[" + '#'*numHashes + ' '*(allFull-numHashes) + "]"
38 
39  # figure out where to put the percentage, roughly centered
40  percentPlace = (len(self.progBar) / 2) - len(str(percentDone))
41  percentString = str(percentDone) + "%"
42 
43  # slice the percentage into the bar
44  self.progBar = self.progBar[0:percentPlace] + percentString + self.progBar[percentPlace+len(percentString):]
45 
46  def __str__(self):
47  return str(self.progBar)
48 
49 
50 def parse_args():
51  usage = """%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
52  <alignment.params> <combinations[output]>
53  <Fitting scores[output]>
54 
55 Align proteomics graph with the EM map.
56 """
57  parser = OptionParser(usage)
58  parser.add_option("-m", "--max", type="int", dest="max", default=999999999,
59  help="maximum number of fits considered")
60 
61  options, args = parser.parse_args()
62  if len(args) !=6:
63  parser.error("incorrect number of arguments")
64  return options,args
65 
66 def report_solutions(asmb, mdl, mhs, dmap, mapping_data, combs,
67  combs_fn_output_fn, scored_comb_output_fn, max_comb):
68  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
69  #report scores
70  for i,mh in enumerate(mhs):
71  fn = asmb.get_component_header(i).get_transformations_fn()
72  ensmb.add_component_and_fits(mh,
74  all_leaves=[]
75  for mh in mhs:
76  mh_res=IMP.atom.get_by_type(mh,IMP.atom.RESIDUE_TYPE)
77  s1=IMP.atom.Selection(mh_res);
78  s1.set_atom_types([IMP.atom.AtomType("CA")])
79  all_leaves=all_leaves+s1.get_selected_particles();
80  print "number of leaves:",len(all_leaves)
81  print "Get number of restraints:",len(mdl.get_restraints())
82  pb=progressBar(0,len(combs))
83  fitr=IMP.em.FitRestraint(all_leaves,dmap)
84  mdl.add_restraint(fitr)
85  ranked_combs=[]
86  sorted_combs=[]
87  print "going to calculate fits for:",len(combs)
88  for i,comb in enumerate(combs):
89  if i%100 ==0:
90  print "i:",i
91  ensmb.load_combination(comb)
92  ranked_combs.append([comb,fitr.evaluate(False)])
93  ensmb.unload_combination(comb)
94  pb.updateAmount(i)
95  #print pb
96  print "end fitting"
97  # Sort by score
98  ranked_combs.sort(lambda a,b: cmp(a[1], b[1]))
99  # Remove excess combinations
100  print "ranked combs:",len(ranked_combs)
101  ranked_combs[max_comb:] = []
102  print "ranked combs:",len(ranked_combs)
103  for comb in ranked_combs:
104  sorted_combs.append(comb[0])
105  IMP.multifit.write_paths(sorted_combs,combs_fn_output_fn)
106  output = open(scored_comb_output_fn,"w")
107  for comb,score in ranked_combs:
108  output.write("|")
109  for ind in comb:
110  output.write(str(ind)+" ")
111  output.write("|"+str(1.-score)+"|\n")
112  output.close()
113 
114 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn,
115  combs_fn_output_fn, scored_comb_output_fn, max_comb):
116  asmb=IMP.multifit.read_settings(asmb_fn)
117  asmb.set_was_used(True)
118  dmap=IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
119  dmap.get_header().set_resolution(
120  asmb.get_assembly_header().get_resolution())
121  threshold=asmb.get_assembly_header().get_threshold()
122  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
123  dmap.set_origin(asmb.get_assembly_header().get_origin())
124  #get rmsd for subunits
125  print params_fn
126  alignment_params = IMP.multifit.AlignmentParams(params_fn)
127  alignment_params.show()
128  IMP.base.set_log_level(IMP.WARNING)
129  prot_data=IMP.multifit.read_proteomics_data(proteomics_fn)
130  print "=========3"
131  mapping_data=IMP.multifit.read_protein_anchors_mapping(prot_data,
132  mapping_fn)
133  print "=========4"
134  em_anchors = mapping_data.get_anchors()
135  print "=========5"
136  #load all proteomics restraints
137  align=IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data,asmb,
138  alignment_params)
139  print "align"
140  align.set_fast_scoring(False)
141  align.set_density_map(dmap,threshold)
142  align.add_states_and_filters()
143  align.add_all_restraints()
144 
145  print "before align"
146  align.align()
147  print "after align"
148  combs=align.get_combinations()
149  #print "after get combinations"
150  if len(combs)==0:
151  f=open(combs_fn_output_fn,"w")
152  f.write("NO SOLUTIONS FOUND\n")
153  f.close()
154  sys.exit(0)
155 
156  report_solutions(asmb, align.get_model(), align.get_molecules(), dmap,
157  mapping_data, combs, combs_fn_output_fn,
158  scored_comb_output_fn, max_comb)
159 
160 def main():
161  options,args = parse_args()
162  run(args[0], args[1], args[2], args[3], args[4], args[5],
163  options.max)
164 if __name__ == "__main__":
165  main()
An ensemble of fitting solutions.
void set_log_level(LogLevel l)
Set the current global log level.
void write_paths(const IntsList &paths, const std::string &txt_filename)
The type of an atom.
SettingsData * read_settings(const char *filename)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Align proteomics graph to EM density map.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
See IMP.multifit for more information.
DensityMap * read_map(std::string filename)
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::kernel::OptionParser OptionParser
Calculate score based on fit to EM map.
Definition: FitRestraint.h:31
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.