IMP  2.2.0
The Integrative Modeling Platform
score.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Score each of a set of combinations."
4 
5 # analyse the ensemble, first we will do the rmsd stuff
6 import IMP.multifit
7 from IMP import OptionParser
8 
9 
10 def get_color_map():
11  colors = {}
12  colors["Rpt1"] = [0.78, 0.78, 0.73]
13  colors["Rpt2"] = [0.78, 0.66, 0.58]
14  colors["Rpt3"] = [0.77, 0.43, 0.5]
15  colors["Rpt4"] = [0.76, 0.29, 0.67]
16  colors["Rpt5"] = [0.51, 0.14, 0.75]
17  colors["Rpt6"] = [0.0, 0., 0.75]
18  colors["Rpn1"] = [0.34, 0.36, 0.27]
19  colors["Rpn2"] = [0.42, 0.43, 0.36]
20  colors["Rpn3"] = [0.49, 0.5, 0.44]
21  colors["Rpn5"] = [0.56, 0.57, 0.51]
22  colors["Rpn6"] = [0.64, 0.64, 0.59]
23  colors["Rpn7"] = [0.71, 0.71, 0.66]
24  colors["Rpn8"] = [0.78, 0.78, 0.74]
25  colors["Rpn9"] = [1, 0, 0]
26  colors["Rpn10"] = [0, 1, 0]
27  colors["Rpn11"] = [0, 0, 1]
28  colors["Rpn12"] = [0.5, 0.2, 0.4]
29  colors["a1"] = [0.78, 0.78, 0.73]
30  colors["a2"] = [0.78, 0.66, 0.58]
31  colors["a3"] = [0.77, 0.43, 0.5]
32  colors["a4"] = [0.76, 0.29, 0.67]
33  colors["a5"] = [0.51, 0.14, 0.75]
34  colors["a6"] = [0.0, 0., 0.75]
35  colors["a7"] = [0.34, 0.36, 0.27]
36  colors["a8"] = [0.42, 0.43, 0.36]
37  colors["a9"] = [0.49, 0.5, 0.44]
38  colors["a10"] = [0.56, 0.57, 0.51]
39 
40  colors["a11"] = [0.78, 0.78, 0.73]
41  colors["a12"] = [0.78, 0.66, 0.58]
42  colors["a13"] = [0.77, 0.43, 0.5]
43  colors["a14"] = [0.76, 0.29, 0.67]
44  colors["a15"] = [0.51, 0.14, 0.75]
45  colors["a16"] = [0.0, 0., 0.75]
46  colors["a17"] = [0.34, 0.36, 0.27]
47  colors["a18"] = [0.42, 0.43, 0.36]
48  colors["a19"] = [0.49, 0.5, 0.44]
49  colors["a20"] = [0.56, 0.57, 0.51]
50 
51  colors["a21"] = [0.78, 0.78, 0.73]
52  colors["a22"] = [0.78, 0.66, 0.58]
53  colors["a23"] = [0.77, 0.43, 0.5]
54  colors["a24"] = [0.76, 0.29, 0.67]
55  colors["a25"] = [0.51, 0.14, 0.75]
56  colors["a26"] = [0.0, 0., 0.75]
57  colors["a27"] = [0.34, 0.36, 0.27]
58  colors["a28"] = [0.42, 0.43, 0.36]
59  colors["a29"] = [0.49, 0.5, 0.44]
60  colors["a30"] = [0.56, 0.57, 0.51]
61  return colors
62 
63 
64 def decompose(dmap, mhs):
65  full_sampled_map = IMP.em.SampledDensityMap(dmap.get_header())
66  all_ps = []
67  for mh in mhs:
68  all_ps += IMP.core.get_leaves(mh)
69  full_sampled_map.set_particles(all_ps)
70  full_sampled_map.resample()
71  full_sampled_map.calcRMS()
72  upper = (
73  dmap.get_number_of_voxels(
74  ) * dmap.get_header(
75  ).dmean * full_sampled_map.get_header(
76  ).dmean) / len(
77  mhs)
78  lower = dmap.get_number_of_voxels(
79  ) * dmap.get_header(
80  ).rms * full_sampled_map.get_header(
81  ).rms
82  norm_factors = [upper, lower]
83  print "===============my norm factors:", upper, lower
84  return norm_factors
85 
86 
87 def score_each_protein(dmap, mhs, sd):
88  norm_factors = decompose(dmap, mhs)
89  scores = []
90  cc = IMP.em.CoarseCC()
91  mdl = mhs[0].get_model()
92  for i in range(len(mhs)):
93  leaves = IMP.core.get_leaves(mhs[i])
94  rb = IMP.core.RigidMember(leaves[0]).get_rigid_body()
95  mh_dmap = IMP.em.SampledDensityMap(dmap.get_header())
96  mh_dmap.set_particles(leaves)
97  mh_dmap.resample()
98  mh_dmap.calcRMS()
100  sd.get_component_header(i).get_transformations_fn())
101  mh_scores = []
102  for fit in fits[:15]:
103  IMP.core.transform(rb, fit.get_fit_transformation())
104  mh_dmap.resample()
105  mh_scores.append(
106  cc.cross_correlation_coefficient(
107  dmap,
108  mh_dmap,
109  0.,
110  False,
111  norm_factors))
112  IMP.core.transform(rb, fit.get_fit_transformation().get_inverse())
113  scores.append(mh_scores)
114  print "=====mol", i, mh_scores
115  return scores
116 
117 
118 def usage():
119  usage = """%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
120  <alignment.params> <combinatins> <score combinations [output]>
121 
122 Score each of a set of combinations.
123 """
124  parser = OptionParser(usage)
125  parser.add_option("-m", "--max", dest="max", default=999999999,
126  help="maximum number of fits considered")
127  (options, args) = parser.parse_args()
128  if len(args) != 6:
129  parser.error("incorrect number of arguments")
130  return [options, args]
131 
132 
133 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn, combs_fn,
134  scored_comb_output_fn, max_comb):
135  asmb = IMP.multifit.read_settings(asmb_fn)
136  dmap = IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
137  dmap.get_header().set_resolution(
138  asmb.get_assembly_header().get_resolution())
139  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
140  dmap.set_origin(asmb.get_assembly_header().get_origin())
141  threshold = asmb.get_assembly_header().get_threshold()
142  combs = IMP.multifit.read_paths(combs_fn)
143  # get rmsd for subunits
144  colors = get_color_map()
145  names = colors.keys()
146  print params_fn
147  alignment_params = IMP.multifit.AlignmentParams(params_fn)
148  alignment_params.show()
149 
150  IMP.base.set_log_level(IMP.base.TERSE)
151  print "=========", combs_fn
152  combs = IMP.multifit.read_paths(combs_fn)
153  print "=========1"
154  # sd=IMP.multifit.read_settings(asmb_fn)
155  print "=========2"
156  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
157  print "=========3"
159  prot_data, mapping_fn)
160  print "=========4"
161  em_anchors = mapping_data.get_anchors()
162  print "=========5"
163  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
164  print "=========6"
165  # load all proteomics restraints
167  mapping_data, asmb, alignment_params)
168  align.set_fast_scoring(False)
169  print "align"
170  mdl = align.get_model()
171  mhs = align.get_molecules()
172  align.add_states_and_filters()
173  rbs = align.get_rigid_bodies()
174  print IMP.core.RigidMember(IMP.core.get_leaves(mhs[0])[0]).get_rigid_body()
175  align.set_density_map(dmap, threshold)
176  gs = []
177  for i, mh in enumerate(mhs):
178  ensmb.add_component_and_fits(mh,
179  IMP.multifit.read_fitting_solutions(asmb.get_component_header(i).get_transformations_fn()))
180  try:
181  rgb = colors[mh.get_name()]
182  except:
183  rgb = colors[names[i]]
184  color = IMP.display.Color(rgb[0], rgb[1], rgb[2])
185  '''
186  for p in IMP.core.get_leaves(mh):
187  g= IMP.display.XYZRGeometry(p)
188  g.set_color(color)
189  gs.append(g)
190  '''
191  all_leaves = []
192  for mh in mhs:
193  all_leaves += IMP.core.XYZs(IMP.core.get_leaves(mh))
194 
195  align.add_all_restraints()
196  print "====1"
197  print "Get number of restraints:", len(mdl.get_restraints())
198  rs = mdl.get_restraints()
199  for r in mdl.get_restraints():
200  rr = IMP.kernel.RestraintSet.get_from(r)
201  for i in range(rr.get_number_of_restraints()):
202  print rr.get_restraint(i).get_name()
203  output = open(scored_comb_output_fn, "w")
204  # load ref structure
205  ref_mhs = []
206  all_ref_leaves = []
207  for i in range(asmb.get_number_of_component_headers()):
208  c = asmb.get_component_header(i)
209  fn = c.get_reference_fn()
210  if fn:
211  ref_mhs.append(IMP.atom.read_pdb(fn, mdl))
212  all_ref_leaves += IMP.core.get_leaves(ref_mhs[-1])
213  for r in rs:
214  rr = IMP.kernel.RestraintSet.get_from(r)
215  for i in range(rr.get_number_of_restraints()):
216  output.write(rr.get_restraint(i).get_name() + "|")
217  output.write("\n")
218  # add fit restraint
219  fitr = IMP.em.FitRestraint(all_leaves, dmap)
220  mdl.add_restraint(fitr)
221  print "Number of combinations:", len(combs[:max_comb])
222 
223  print "native score"
224  num_violated = 0
225  for r in rs:
226  rr = IMP.kernel.RestraintSet.get_from(r)
227  for j in range(rr.get_number_of_restraints()):
228  print rr.get_restraint(j).get_name(), rr.evaluate(False)
229 
230  prev_name = ''
231  for i, comb in enumerate(combs[:max_comb]):
232  print "Scoring combination:", comb
233  ensmb.load_combination(comb)
234  num_violated = 0
235  for r in rs:
236  rr = IMP.kernel.RestraintSet.get_from(r)
237  for j in range(rr.get_number_of_restraints()):
238  print rr.get_restraint(j).get_name()
239  rscore = rr.evaluate(False)
240  if rscore > 5:
241  num_violated = num_violated + 1
242  IMP.atom.write_pdb(mhs, "model.%d.pdb" % (i))
243  print str(all_leaves[0]) + " :: " + str(all_leaves[-1])
244  score = mdl.evaluate(False)
245  num_violated = 0
246  msg = "COMB" + str(i) + "|"
247  for r in rs:
248  rr = IMP.kernel.RestraintSet.get_from(r)
249  for j in range(rr.get_number_of_restraints()):
250  current_name = rr.get_restraint(j).get_name()
251  if current_name != prev_name:
252  msg += ' ' + current_name + ' '
253  prev_name = current_name
254  rscore = rr.get_restraint(j).evaluate(False)
255  msg += str(rscore) + "|"
256  if rscore > 5:
257  num_violated = num_violated + 1
258  # msg+="|"+str(score)+"|"+str(num_violated)+"|\n"
259  msg += "|" + str(
260  score) + "|" + str(
261  num_violated) + "||||" + str(
262  fitr.evaluate(False)) + "||:"
263  if all_ref_leaves:
264  msg += str(IMP.atom.get_rmsd(IMP.core.XYZs(all_leaves),
265  IMP.core.XYZs(all_ref_leaves)))
266  output.write(msg + "\n")
267  print msg
268  ensmb.unload_combination(comb)
269  output.close()
270 
271 
272 def main():
273  (options, args) = usage()
274  run(args[0], args[1], args[2], args[3], args[4], args[5], int(options.max))
275 
276 if __name__ == "__main__":
277  main()
An ensemble of fitting solutions.
Represent an RGB color.
Definition: Color.h:24
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
void set_log_level(LogLevel l)
Set the current global log level.
double get_resolution(kernel::Model *m, kernel::ParticleIndex pi)
double get_rmsd(const core::XYZs &s0, const core::XYZs &s1, const IMP::algebra::Transformation3D &tr_for_second)
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Responsible for performing coarse fitting between two density objects.
Definition: CoarseCC.h:28
Align proteomics graph to EM density map.
Class for sampling a density map from particles.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
See IMP.multifit for more information.
DensityMap * read_map(std::string filename)
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::kernel::OptionParser OptionParser
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths.
Calculate score based on fit to EM map.
Definition: FitRestraint.h:32
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
void read_pdb(base::TextInput input, int model, Hierarchy h)