IMP logo
IMP Reference Guide  2.11.0
The Integrative Modeling Platform
score.py
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 import IMP.multifit
5 from IMP import ArgumentParser
6 
7 __doc__ = "Score each of a set of combinations."
8 
9 # analyse the ensemble, first we will do the rmsd stuff
10 
11 def get_color_map():
12  colors = {}
13  colors["Rpt1"] = [0.78, 0.78, 0.73]
14  colors["Rpt2"] = [0.78, 0.66, 0.58]
15  colors["Rpt3"] = [0.77, 0.43, 0.5]
16  colors["Rpt4"] = [0.76, 0.29, 0.67]
17  colors["Rpt5"] = [0.51, 0.14, 0.75]
18  colors["Rpt6"] = [0.0, 0., 0.75]
19  colors["Rpn1"] = [0.34, 0.36, 0.27]
20  colors["Rpn2"] = [0.42, 0.43, 0.36]
21  colors["Rpn3"] = [0.49, 0.5, 0.44]
22  colors["Rpn5"] = [0.56, 0.57, 0.51]
23  colors["Rpn6"] = [0.64, 0.64, 0.59]
24  colors["Rpn7"] = [0.71, 0.71, 0.66]
25  colors["Rpn8"] = [0.78, 0.78, 0.74]
26  colors["Rpn9"] = [1, 0, 0]
27  colors["Rpn10"] = [0, 1, 0]
28  colors["Rpn11"] = [0, 0, 1]
29  colors["Rpn12"] = [0.5, 0.2, 0.4]
30  colors["a1"] = [0.78, 0.78, 0.73]
31  colors["a2"] = [0.78, 0.66, 0.58]
32  colors["a3"] = [0.77, 0.43, 0.5]
33  colors["a4"] = [0.76, 0.29, 0.67]
34  colors["a5"] = [0.51, 0.14, 0.75]
35  colors["a6"] = [0.0, 0., 0.75]
36  colors["a7"] = [0.34, 0.36, 0.27]
37  colors["a8"] = [0.42, 0.43, 0.36]
38  colors["a9"] = [0.49, 0.5, 0.44]
39  colors["a10"] = [0.56, 0.57, 0.51]
40 
41  colors["a11"] = [0.78, 0.78, 0.73]
42  colors["a12"] = [0.78, 0.66, 0.58]
43  colors["a13"] = [0.77, 0.43, 0.5]
44  colors["a14"] = [0.76, 0.29, 0.67]
45  colors["a15"] = [0.51, 0.14, 0.75]
46  colors["a16"] = [0.0, 0., 0.75]
47  colors["a17"] = [0.34, 0.36, 0.27]
48  colors["a18"] = [0.42, 0.43, 0.36]
49  colors["a19"] = [0.49, 0.5, 0.44]
50  colors["a20"] = [0.56, 0.57, 0.51]
51 
52  colors["a21"] = [0.78, 0.78, 0.73]
53  colors["a22"] = [0.78, 0.66, 0.58]
54  colors["a23"] = [0.77, 0.43, 0.5]
55  colors["a24"] = [0.76, 0.29, 0.67]
56  colors["a25"] = [0.51, 0.14, 0.75]
57  colors["a26"] = [0.0, 0., 0.75]
58  colors["a27"] = [0.34, 0.36, 0.27]
59  colors["a28"] = [0.42, 0.43, 0.36]
60  colors["a29"] = [0.49, 0.5, 0.44]
61  colors["a30"] = [0.56, 0.57, 0.51]
62  return colors
63 
64 
65 def decompose(dmap, mhs):
66  full_sampled_map = IMP.em.SampledDensityMap(dmap.get_header())
67  all_ps = []
68  for mh in mhs:
69  all_ps += IMP.core.get_leaves(mh)
70  full_sampled_map.set_particles(all_ps)
71  full_sampled_map.resample()
72  full_sampled_map.calcRMS()
73  upper = (
74  dmap.get_number_of_voxels(
75  ) * dmap.get_header(
76  ).dmean * full_sampled_map.get_header(
77  ).dmean) / len(
78  mhs)
79  lower = dmap.get_number_of_voxels(
80  ) * dmap.get_header(
81  ).rms * full_sampled_map.get_header(
82  ).rms
83  norm_factors = [upper, lower]
84  print("===============my norm factors:", upper, lower)
85  return norm_factors
86 
87 
88 def score_each_protein(dmap, mhs, sd):
89  norm_factors = decompose(dmap, mhs)
90  scores = []
91  cc = IMP.em.CoarseCC()
92  mdl = mhs[0].get_model()
93  for i in range(len(mhs)):
94  leaves = IMP.core.get_leaves(mhs[i])
95  rb = IMP.core.RigidMember(leaves[0]).get_rigid_body()
96  mh_dmap = IMP.em.SampledDensityMap(dmap.get_header())
97  mh_dmap.set_particles(leaves)
98  mh_dmap.resample()
99  mh_dmap.calcRMS()
101  sd.get_component_header(i).get_transformations_fn())
102  mh_scores = []
103  for fit in fits[:15]:
104  IMP.core.transform(rb, fit.get_fit_transformation())
105  mh_dmap.resample()
106  mh_scores.append(
107  cc.cross_correlation_coefficient(
108  dmap,
109  mh_dmap,
110  0.,
111  False,
112  norm_factors))
113  IMP.core.transform(rb, fit.get_fit_transformation().get_inverse())
114  scores.append(mh_scores)
115  print("=====mol", i, mh_scores)
116  return scores
117 
118 
119 def usage():
120  usage = """%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
121  <alignment.params> <combinatins> <score combinations [output]>
122 
123 Score each of a set of combinations.
124 """
125  p = ArgumentParser(usage)
126  p.add_argument("-m", "--max", dest="max", type=int, default=999999999,
127  help="maximum number of fits considered")
128  p.add_argument("assembly_file", help="assembly file name")
129  p.add_argument("proteomics_file", help="proteomics file name")
130  p.add_argument("mapping_file", help="mapping file name")
131  p.add_argument("param_file", help="parameter file name")
132  p.add_argument("combinations_file", help="combinations file name")
133  p.add_argument("scores_file", help="output scores file name")
134  return p.parse_args()
135 
136 
137 def run(asmb_fn, proteomics_fn, mapping_fn, params_fn, combs_fn,
138  scored_comb_output_fn, max_comb):
139  asmb = IMP.multifit.read_settings(asmb_fn)
140  dmap = IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
141  dmap.get_header().set_resolution(
142  asmb.get_assembly_header().get_resolution())
143  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
144  dmap.set_origin(asmb.get_assembly_header().get_origin())
145  threshold = asmb.get_assembly_header().get_threshold()
146  combs = IMP.multifit.read_paths(combs_fn)
147  # get rmsd for subunits
148  colors = get_color_map()
149  names = list(colors.keys())
150  print(params_fn)
151  alignment_params = IMP.multifit.AlignmentParams(params_fn)
152  alignment_params.show()
153 
154  IMP.set_log_level(IMP.TERSE)
155  print("=========", combs_fn)
156  combs = IMP.multifit.read_paths(combs_fn)
157  print("=========1")
158  # sd=IMP.multifit.read_settings(asmb_fn)
159  print("=========2")
160  prot_data = IMP.multifit.read_proteomics_data(proteomics_fn)
161  print("=========3")
163  prot_data, mapping_fn)
164  print("=========4")
165  em_anchors = mapping_data.get_anchors()
166  print("=========5")
167  ensmb = IMP.multifit.Ensemble(asmb, mapping_data)
168  print("=========6")
169  # load all proteomics restraints
171  mapping_data, asmb, alignment_params)
172  align.set_fast_scoring(False)
173  print("align")
174  mdl = align.get_model()
175  mhs = align.get_molecules()
176  align.add_states_and_filters()
177  rbs = align.get_rigid_bodies()
178  print(IMP.core.RigidMember(IMP.core.get_leaves(mhs[0])[0]).get_rigid_body())
179  align.set_density_map(dmap, threshold)
180  gs = []
181  for i, mh in enumerate(mhs):
182  ensmb.add_component_and_fits(mh,
183  IMP.multifit.read_fitting_solutions(asmb.get_component_header(i).get_transformations_fn()))
184  try:
185  rgb = colors[mh.get_name()]
186  except:
187  rgb = colors[names[i]]
188  color = IMP.display.Color(rgb[0], rgb[1], rgb[2])
189  '''
190  for p in IMP.core.get_leaves(mh):
191  g= IMP.display.XYZRGeometry(p)
192  g.set_color(color)
193  gs.append(g)
194  '''
195  all_leaves = []
196  for mh in mhs:
197  all_leaves += IMP.core.XYZs(IMP.core.get_leaves(mh))
198 
199  align.add_all_restraints()
200  print("====1")
201  rs = align.get_restraint_set().get_restraints()
202  print("Get number of restraints:", len(rs))
203  for r in rs:
204  rr = IMP.RestraintSet.get_from(r)
205  for i in range(rr.get_number_of_restraints()):
206  print(rr.get_restraint(i).get_name())
207  output = open(scored_comb_output_fn, "w")
208  # load ref structure
209  ref_mhs = []
210  all_ref_leaves = []
211  for i in range(asmb.get_number_of_component_headers()):
212  c = asmb.get_component_header(i)
213  fn = c.get_reference_fn()
214  if fn:
215  ref_mhs.append(IMP.atom.read_pdb(fn, mdl))
216  all_ref_leaves += IMP.core.get_leaves(ref_mhs[-1])
217  for r in rs:
218  rr = IMP.RestraintSet.get_from(r)
219  for i in range(rr.get_number_of_restraints()):
220  output.write(rr.get_restraint(i).get_name() + "|")
221  output.write("\n")
222  # add fit restraint
223  fitr = IMP.em.FitRestraint(all_leaves, dmap)
224  sf = IMP.core.RestraintsScoringFunction(rs + [fitr])
225  print("Number of combinations:", len(combs[:max_comb]))
226 
227  print("native score")
228  num_violated = 0
229  for r in rs:
230  rr = IMP.RestraintSet.get_from(r)
231  for j in range(rr.get_number_of_restraints()):
232  print(rr.get_restraint(j).get_name(), rr.evaluate(False))
233 
234  prev_name = ''
235  for i, comb in enumerate(combs[:max_comb]):
236  print("Scoring combination:", comb)
237  ensmb.load_combination(comb)
238  num_violated = 0
239  for r in rs:
240  rr = IMP.RestraintSet.get_from(r)
241  for j in range(rr.get_number_of_restraints()):
242  print(rr.get_restraint(j).get_name())
243  rscore = rr.evaluate(False)
244  if rscore > 5:
245  num_violated = num_violated + 1
246  IMP.atom.write_pdb(mhs, "model.%d.pdb" % (i))
247  print(str(all_leaves[0]) + " :: " + str(all_leaves[-1]))
248  score = sf.evaluate(False)
249  num_violated = 0
250  msg = "COMB" + str(i) + "|"
251  for r in rs:
252  rr = IMP.RestraintSet.get_from(r)
253  for j in range(rr.get_number_of_restraints()):
254  current_name = rr.get_restraint(j).get_name()
255  if current_name != prev_name:
256  msg += ' ' + current_name + ' '
257  prev_name = current_name
258  rscore = rr.get_restraint(j).evaluate(False)
259  msg += str(rscore) + "|"
260  if rscore > 5:
261  num_violated = num_violated + 1
262  # msg+="|"+str(score)+"|"+str(num_violated)+"|\n"
263  msg += "|" + str(
264  score) + "|" + str(
265  num_violated) + "||||" + str(
266  fitr.evaluate(False)) + "||:"
267  if all_ref_leaves:
268  msg += str(IMP.atom.get_rmsd(IMP.core.XYZs(all_leaves),
269  IMP.core.XYZs(all_ref_leaves)))
270  output.write(msg + "\n")
271  print(msg)
272  ensmb.unload_combination(comb)
273  output.close()
274 
275 
276 def main():
277  args = usage()
278  run(args.assembly_file, args.proteomics_file, args.mapping_file,
279  args.param_file, args.combinations_file, args.scores_file, args.max)
280 
281 if __name__ == "__main__":
282  main()
An ensemble of fitting solutions.
Represent an RGB color.
Definition: Color.h:24
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
def evaluate
Evaluate the score of the restraint.
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Responsible for performing coarse fitting between two density objects.
Definition: CoarseCC.h:28
Align proteomics graph to EM density map.
Class for sampling a density map from particles.
double get_rmsd(const Selection &s0, const Selection &s1)
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Fitting atomic structures into a cryo-electron microscopy density map.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
void set_log_level(LogLevel l)
Set the current global log level.
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths.
Calculate score based on fit to EM map.
Definition: FitRestraint.h:39
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
double get_resolution(Model *m, ParticleIndex pi)
Estimate the resolution of the hierarchy as used by Representation.