IMP  2.1.1
The Integrative Modeling Platform
score.py
1 #!/usr/bin/env python
2 
3 __doc__ = "Score each of a set of combinations."
4 
5 #analyse the ensemble, first we will do the rmsd stuff
6 import IMP.multifit
7 from IMP import OptionParser
8 
9 def get_color_map():
10  colors={}
11  colors["Rpt1"]=[0.78,0.78,0.73]
12  colors["Rpt2"]=[0.78,0.66,0.58]
13  colors["Rpt3"]=[0.77,0.43,0.5]
14  colors["Rpt4"]=[0.76,0.29,0.67]
15  colors["Rpt5"]=[0.51,0.14,0.75]
16  colors["Rpt6"]=[0.0,0.,0.75]
17  colors["Rpn1"]=[0.34,0.36,0.27]
18  colors["Rpn2"]=[0.42,0.43,0.36]
19  colors["Rpn3"]=[0.49,0.5,0.44]
20  colors["Rpn5"]=[0.56,0.57,0.51]
21  colors["Rpn6"]=[0.64,0.64,0.59]
22  colors["Rpn7"]=[0.71,0.71,0.66]
23  colors["Rpn8"]=[0.78,0.78,0.74]
24  colors["Rpn9"]=[1,0,0]
25  colors["Rpn10"]=[0,1,0]
26  colors["Rpn11"]=[0,0,1]
27  colors["Rpn12"]=[0.5,0.2,0.4]
28  colors["a1"]=[0.78,0.78,0.73]
29  colors["a2"]=[0.78,0.66,0.58]
30  colors["a3"]=[0.77,0.43,0.5]
31  colors["a4"]=[0.76,0.29,0.67]
32  colors["a5"]=[0.51,0.14,0.75]
33  colors["a6"]=[0.0,0.,0.75]
34  colors["a7"]=[0.34,0.36,0.27]
35  colors["a8"]=[0.42,0.43,0.36]
36  colors["a9"]=[0.49,0.5,0.44]
37  colors["a10"]=[0.56,0.57,0.51]
38 
39  colors["a11"]=[0.78,0.78,0.73]
40  colors["a12"]=[0.78,0.66,0.58]
41  colors["a13"]=[0.77,0.43,0.5]
42  colors["a14"]=[0.76,0.29,0.67]
43  colors["a15"]=[0.51,0.14,0.75]
44  colors["a16"]=[0.0,0.,0.75]
45  colors["a17"]=[0.34,0.36,0.27]
46  colors["a18"]=[0.42,0.43,0.36]
47  colors["a19"]=[0.49,0.5,0.44]
48  colors["a20"]=[0.56,0.57,0.51]
49 
50 
51  colors["a21"]=[0.78,0.78,0.73]
52  colors["a22"]=[0.78,0.66,0.58]
53  colors["a23"]=[0.77,0.43,0.5]
54  colors["a24"]=[0.76,0.29,0.67]
55  colors["a25"]=[0.51,0.14,0.75]
56  colors["a26"]=[0.0,0.,0.75]
57  colors["a27"]=[0.34,0.36,0.27]
58  colors["a28"]=[0.42,0.43,0.36]
59  colors["a29"]=[0.49,0.5,0.44]
60  colors["a30"]=[0.56,0.57,0.51]
61  return colors
62 
63 
64 def decompose(dmap,mhs):
65  full_sampled_map=IMP.em.SampledDensityMap(dmap.get_header())
66  all_ps=[]
67  for mh in mhs:
68  all_ps+=IMP.core.get_leaves(mh)
69  full_sampled_map.set_particles(all_ps)
70  full_sampled_map.resample()
71  full_sampled_map.calcRMS()
72  upper=(dmap.get_number_of_voxels()*dmap.get_header().dmean*full_sampled_map.get_header().dmean)/len(mhs)
73  lower=dmap.get_number_of_voxels()*dmap.get_header().rms*full_sampled_map.get_header().rms
74  norm_factors=[upper,lower]
75  print "===============my norm factors:",upper,lower
76  return norm_factors
77 
78 
79 def score_each_protein(dmap,mhs,sd):
80  norm_factors=decompose(dmap,mhs)
81  scores=[]
82  cc=IMP.em.CoarseCC()
83  mdl=mhs[0].get_model()
84  for i in range(len(mhs)):
85  leaves=IMP.core.get_leaves(mhs[i])
86  rb=IMP.core.RigidMember(leaves[0]).get_rigid_body()
87  mh_dmap=IMP.em.SampledDensityMap(dmap.get_header())
88  mh_dmap.set_particles(leaves)
89  mh_dmap.resample()
90  mh_dmap.calcRMS()
91  fits=IMP.multifit.read_fitting_solutions(sd.get_component_header(i).get_transformations_fn())
92  mh_scores=[]
93  for fit in fits[:15]:
94  IMP.core.transform(rb,fit.get_fit_transformation())
95  mh_dmap.resample()
96  mh_scores.append(cc.cross_correlation_coefficient(dmap,mh_dmap,0.,False,norm_factors))
97  IMP.core.transform(rb,fit.get_fit_transformation().get_inverse())
98  scores.append(mh_scores)
99  print "=====mol",i,mh_scores
100  return scores
101 
102 def usage():
103  usage = """%prog [options] <asmb> <asmb.proteomics> <asmb.mapping>
104  <alignment.params> <combinatins> <score combinations [output]>
105 
106 Score each of a set of combinations.
107 """
108  parser = OptionParser(usage)
109  parser.add_option("-m", "--max", dest="max",default=999999999,
110  help="maximum number of fits considered")
111  (options, args) = parser.parse_args()
112  if len(args) != 6:
113  parser.error("incorrect number of arguments")
114  return [options,args]
115 
116 def run(asmb_fn,proteomics_fn,mapping_fn,params_fn,combs_fn,
117  scored_comb_output_fn,max_comb):
118  asmb=IMP.multifit.read_settings(asmb_fn)
119  dmap=IMP.em.read_map(asmb.get_assembly_header().get_dens_fn())
120  dmap.get_header().set_resolution(asmb.get_assembly_header().get_resolution())
121  dmap.update_voxel_size(asmb.get_assembly_header().get_spacing())
122  dmap.set_origin(asmb.get_assembly_header().get_origin())
123  threshold=asmb.get_assembly_header().get_threshold()
124  combs=IMP.multifit.read_paths(combs_fn)
125  #get rmsd for subunits
126  colors=get_color_map()
127  names=colors.keys()
128  print params_fn
129  alignment_params = IMP.multifit.AlignmentParams(params_fn)
130  alignment_params.show()
131 
132  IMP.base.set_log_level(IMP.base.TERSE)
133  print "=========",combs_fn
134  combs=IMP.multifit.read_paths(combs_fn)
135  print "=========1"
136  #sd=IMP.multifit.read_settings(asmb_fn)
137  print "=========2"
138  prot_data=IMP.multifit.read_proteomics_data(proteomics_fn)
139  print "=========3"
140  mapping_data=IMP.multifit.read_protein_anchors_mapping(prot_data,mapping_fn)
141  print "=========4"
142  em_anchors = mapping_data.get_anchors()
143  print "=========5"
144  ensmb=IMP.multifit.Ensemble(asmb,mapping_data)
145  print "=========6"
146  #load all proteomics restraints
147  align=IMP.multifit.ProteomicsEMAlignmentAtomic(mapping_data,asmb,alignment_params)
148  align.set_fast_scoring(False)
149  print "align"
150  mdl=align.get_model()
151  mhs=align.get_molecules()
152  align.add_states_and_filters()
153  rbs=align.get_rigid_bodies()
154  print IMP.core.RigidMember(IMP.core.get_leaves(mhs[0])[0]).get_rigid_body()
155  align.set_density_map(dmap,threshold)
156  gs=[]
157  for i,mh in enumerate(mhs):
158  ensmb.add_component_and_fits(mh,
159  IMP.multifit.read_fitting_solutions(asmb.get_component_header(i).get_transformations_fn()))
160  try:
161  rgb=colors[mh.get_name()]
162  except:
163  rgb=colors[names[i]]
164  color=IMP.display.Color(rgb[0],rgb[1],rgb[2])
165  '''
166  for p in IMP.core.get_leaves(mh):
167  g= IMP.display.XYZRGeometry(p)
168  g.set_color(color)
169  gs.append(g)
170  '''
171  all_leaves=[]
172  for mh in mhs:
173  all_leaves+=IMP.core.XYZs(IMP.core.get_leaves(mh))
174 
175  align.add_all_restraints()
176  print "====1"
177  print "Get number of restraints:",len(mdl.get_restraints())
178  rs=mdl.get_restraints()
179  for r in mdl.get_restraints():
180  rr=IMP.kernel.RestraintSet.get_from(r)
181  for i in range(rr.get_number_of_restraints()):
182  print rr.get_restraint(i).get_name()
183  output = open(scored_comb_output_fn,"w")
184  #load ref structure
185  ref_mhs=[]
186  all_ref_leaves=[]
187  for i in range(asmb.get_number_of_component_headers()):
188  c=asmb.get_component_header(i)
189  fn=c.get_reference_fn()
190  if fn:
191  ref_mhs.append(IMP.atom.read_pdb(fn,mdl))
192  all_ref_leaves+=IMP.core.get_leaves(ref_mhs[-1])
193  for r in rs:
194  rr=IMP.kernel.RestraintSet.get_from(r)
195  for i in range(rr.get_number_of_restraints()):
196  output.write(rr.get_restraint(i).get_name()+"|")
197  output.write("\n")
198  #add fit restraint
199  fitr=IMP.em.FitRestraint(all_leaves,dmap)
200  mdl.add_restraint(fitr)
201  print "Number of combinations:",len(combs[:max_comb])
202 
203  print "native score"
204  num_violated=0
205  for r in rs:
206  rr=IMP.kernel.RestraintSet.get_from(r)
207  for j in range(rr.get_number_of_restraints()):
208  print rr.get_restraint(j).get_name(),rr.evaluate(False)
209 
210  prev_name=''
211  for i,comb in enumerate(combs[:max_comb]):
212  print "Scoring combination:",comb
213  ensmb.load_combination(comb)
214  num_violated=0
215  for r in rs:
216  rr=IMP.kernel.RestraintSet.get_from(r)
217  for j in range(rr.get_number_of_restraints()):
218  print rr.get_restraint(j).get_name()
219  rscore=rr.evaluate(False)
220  if rscore>5:
221  num_violated=num_violated+1
222  IMP.atom.write_pdb(mhs,"model.%d.pdb"%(i))
223  print str(all_leaves[0])+" :: " + str(all_leaves[-1])
224  score=mdl.evaluate(None)
225  num_violated=0
226  msg="COMB"+str(i)+"|"
227  for r in rs:
228  rr=IMP.kernel.RestraintSet.get_from(r)
229  for j in range(rr.get_number_of_restraints()):
230  current_name=rr.get_restraint(j).get_name()
231  if current_name!=prev_name:
232  msg+=' '+current_name+' '
233  prev_name=current_name
234  rscore=rr.get_restraint(j).evaluate(False)
235  msg+=str(rscore)+"|"
236  if rscore>5:
237  num_violated=num_violated+1
238  #msg+="|"+str(score)+"|"+str(num_violated)+"|\n"
239  msg+="|"+str(score)+"|"+str(num_violated)+"||||"+str(fitr.evaluate(None))+"||:"
240  if all_ref_leaves:
241  msg+=str(IMP.atom.get_rmsd(IMP.core.XYZs(all_leaves),
242  IMP.core.XYZs(all_ref_leaves)))
243  output.write(msg+"\n")
244  print msg
245  ensmb.unload_combination(comb)
246  output.close()
247 
248 def main():
249  (options,args) = usage()
250  run(args[0],args[1],args[2],args[3],args[4],args[5],int(options.max))
251 
252 if __name__=="__main__":
253  main()
An ensemble of fitting solutions.
Represent an RGB color.
Definition: Color.h:24
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
void set_log_level(LogLevel l)
Set the current global log level.
A decorator for a particle that is part of a rigid body.
Definition: rigid_bodies.h:349
SettingsData * read_settings(const char *filename)
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
ProteinsAnchorsSamplingSpace read_protein_anchors_mapping(multifit::ProteomicsData *prots, const std::string &anchors_prot_map_fn, int max_paths=INT_MAX)
Responsible for performing coarse fitting between two density objects.
Definition: CoarseCC.h:28
Align proteomics graph to EM density map.
Class for sampling a density map from particles.
double get_rmsd(const Selection &s0, const Selection &s1, const IMP::algebra::Transformation3D &tr_for_second=IMP::algebra::get_identity_transformation_3d())
Definition: distance.h:47
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
See IMP.multifit for more information.
DensityMap * read_map(std::string filename)
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::kernel::OptionParser OptionParser
IntsList read_paths(const char *txt_filename, int max_paths=INT_MAX)
Read paths.
Calculate score based on fit to EM map.
Definition: FitRestraint.h:31
FittingSolutionRecords read_fitting_solutions(const char *fitting_fn)
Fitting solutions reader.
void read_pdb(base::TextInput input, int model, Hierarchy h)