IMP  2.3.1
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
6 import IMP.pmi.tools
7 import IMP.pmi.samplers
8 import IMP.pmi.output
9 import IMP.pmi.analysis
10 import IMP.pmi.io.input
11 import IMP.rmf
12 import RMF
13 import os
14 import glob
15 from operator import itemgetter
16 from collections import defaultdict
17 import numpy as np
18 
19 class ReplicaExchange0(object):
20  """A macro to help setup and run replica exchange.
21  Supports monte carlo and molecular dynamics.
22  Produces trajectory RMF files, best PDB structures,
23  and output stat files.
24  @param model The IMP model
25  @param representation PMI.Representation() (or list of them, for multi-state modeling)
26  @param root_hier Instead of passing Representation, just pass a hierarchy
27  @param mote_carlo_sample_objcts Objects for MC sampling
28  @param molecular_dynamics_sample_objects Objects for MD sampling
29  @param output_objects Objects with get_output() for packing into stat files
30  @param crosslink_restraints Harmonic restraints to go in output RMF files
31  @param monte_carlo_temperature MC temp
32  @param replica_exchange_minimum_temperature Low temp for REX
33  @param replica_exchange_maximum_temperature High temp for REX
34  @param num_sample_rounds Number of rounds of MC/MD per cycle
35  @param number_of_best_scoring_models Number of top-scoring PDB models to keep around
36  @param monte_carlo_steps Number of MC steps per round
37  @param molecular_dynamics_steps Number of MD steps per round
38  @param number_of_frames Number of REX frames to run
39  @param nframes_write_coordinates How often to write the coordinates of a frame
40  @param write_initial_rmf Write the initial configuration
41  """
42  def __init__(self, model,
43  representation=None,
44  root_hier=None,
45  sample_objects=None, # DEPRECATED
46  monte_carlo_sample_objects=None,
47  molecular_dynamics_sample_objects=None,
48  output_objects=None,
49  crosslink_restraints=None,
50  monte_carlo_temperature=1.0,
51  simulated_annealing=False,
52  simulated_annealing_minimum_temperature=1.0,
53  simulated_annealing_maximum_temperature=2.5,
54  simulated_annealing_minimum_temperature_nframes=100,
55  simulated_annealing_maximum_temperature_nframes=100,
56  replica_exchange_minimum_temperature=1.0,
57  replica_exchange_maximum_temperature=2.5,
58  num_sample_rounds=1,
59  number_of_best_scoring_models=500,
60  monte_carlo_steps=10,
61  molecular_dynamics_steps=10,
62  number_of_frames=1000,
63  nframes_write_coordinates=1,
64  write_initial_rmf=True,
65  initial_rmf_name_suffix="initial",
66  stat_file_name_suffix="stat",
67  best_pdb_name_suffix="model",
68  do_clean_first=True,
69  do_create_directories=True,
70  global_output_directory="./",
71  rmf_dir="rmfs/",
72  best_pdb_dir="pdbs/",
73  replica_stat_file_suffix="stat_replica",
74  em_object_for_rmf=None,
75  atomistic=False,
76  replica_exchange_object=None):
77 
78  self.model = model
79  self.vars = {}
80 
81  ### add check hierarchy is multistate
82  if representation:
83  if type(representation) == list:
84  self.is_multi_state = True
85  self.root_hiers = [r.prot for r in representation]
86  self.vars["number_of_states"] = len(representation)
87  else:
88  self.is_multi_state = False
89  self.root_hier = representation.prot
90  self.vars["number_of_states"] = 1
91  elif root_hier:
92  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
93  self.vars["number_of_states"] = len(states)
94  if len(states)>1:
95  self.root_hiers = states
96  self.is_multi_state = True
97  else:
98  self.root_hier = root_hier
99  self.is_multi_state = False
100  else:
101  print "ERROR: Must provide representation or root_hier"
102  return
103 
104  self.crosslink_restraints = crosslink_restraints
105  self.em_object_for_rmf = em_object_for_rmf
106  self.monte_carlo_sample_objects = monte_carlo_sample_objects
107  if sample_objects is not None:
108  self.monte_carlo_sample_objects+=sample_objects
109  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
110  self.output_objects = output_objects
111  self.replica_exchange_object = replica_exchange_object
112  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
113  self.vars[
114  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
115  self.vars[
116  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
117 
118  self.vars["simulated_annealing"]=\
119  simulated_annealing
120  self.vars["simulated_annealing_minimum_temperature"]=\
121  simulated_annealing_minimum_temperature
122  self.vars["simulated_annealing_maximum_temperature"]=\
123  simulated_annealing_maximum_temperature
124  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
125  simulated_annealing_minimum_temperature_nframes
126  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
127  simulated_annealing_maximum_temperature_nframes
128 
129  self.vars["num_sample_rounds"] = num_sample_rounds
130  self.vars[
131  "number_of_best_scoring_models"] = number_of_best_scoring_models
132  self.vars["monte_carlo_steps"] = monte_carlo_steps
133  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
134  self.vars["number_of_frames"] = number_of_frames
135  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
136  self.vars["write_initial_rmf"] = write_initial_rmf
137  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
138  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
139  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
140  self.vars["do_clean_first"] = do_clean_first
141  self.vars["do_create_directories"] = do_create_directories
142  self.vars["global_output_directory"] = global_output_directory
143  self.vars["rmf_dir"] = rmf_dir
144  self.vars["best_pdb_dir"] = best_pdb_dir
145  self.vars["atomistic"] = atomistic
146  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
147 
148  def show_info(self):
149  print "ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica "
150  print "--- it stores the best scoring pdb models in pdbs/"
151  print "--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature"
152  print "--- variables:"
153  keys = self.vars.keys()
154  keys.sort()
155  for v in keys:
156  print "------", v.ljust(30), self.vars[v]
157 
158  def get_replica_exchange_object(self):
159  return self.replica_exchange_object
160 
161  def execute_macro(self):
162 
163  temp_index_factor = 100000.0
164  samplers=[]
165  sampler_mc=None
166  sampler_md=None
167  if self.monte_carlo_sample_objects is not None:
168  print "Setting up MonteCarlo"
169  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
170  self.monte_carlo_sample_objects,
171  self.vars["monte_carlo_temperature"])
172  if self.vars["simulated_annealing"]:
173  tmin=self.vars["simulated_annealing_minimum_temperature"]
174  tmax=self.vars["simulated_annealing_maximum_temperature"]
175  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
176  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
177  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
178  self.output_objects.append(sampler_mc)
179  samplers.append(sampler_mc)
180 
181 
182  if self.molecular_dynamics_sample_objects is not None:
183  print "Setting up MolecularDynamics"
184  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
185  self.molecular_dynamics_sample_objects,
186  self.vars["monte_carlo_temperature"])
187  if self.vars["simulated_annealing"]:
188  tmin=self.vars["simulated_annealing_minimum_temperature"]
189  tmax=self.vars["simulated_annealing_maximum_temperature"]
190  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
191  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
192  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
193  self.output_objects.append(sampler_md)
194  samplers.append(sampler_md)
195 # -------------------------------------------------------------------------
196 
197  print "Setting up ReplicaExchange"
198  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
199  self.vars[
200  "replica_exchange_minimum_temperature"],
201  self.vars[
202  "replica_exchange_maximum_temperature"],
203  samplers,
204  replica_exchange_object=self.replica_exchange_object)
205  self.replica_exchange_object = rex.rem
206 
207  myindex = rex.get_my_index()
208  self.output_objects.append(rex)
209 
210  # must reset the minimum temperature due to the
211  # different binary length of rem.get_my_parameter double and python
212  # float
213  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
214 
215 # -------------------------------------------------------------------------
216 
217  globaldir = self.vars["global_output_directory"] + "/"
218  rmf_dir = globaldir + self.vars["rmf_dir"]
219  pdb_dir = globaldir + self.vars["best_pdb_dir"]
220 
221  if self.vars["do_clean_first"]:
222  pass
223 
224  if self.vars["do_create_directories"]:
225 
226  try:
227  os.makedirs(globaldir)
228  except:
229  pass
230  try:
231  os.makedirs(rmf_dir)
232  except:
233  pass
234 
235  if not self.is_multi_state:
236  try:
237  os.makedirs(pdb_dir)
238  except:
239  pass
240  else:
241  for n in range(self.vars["number_of_states"]):
242  try:
243  os.makedirs(pdb_dir + "/" + str(n))
244  except:
245  pass
246 
247 # -------------------------------------------------------------------------
248 
249  sw = IMP.pmi.tools.Stopwatch()
250  self.output_objects.append(sw)
251 
252  print "Setting up stat file"
253  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
254  low_temp_stat_file = globaldir + \
255  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
256  output.init_stat2(low_temp_stat_file,
257  self.output_objects,
258  extralabels=["rmf_file", "rmf_frame_index"])
259 
260  print "Setting up replica stat file"
261  replica_stat_file = globaldir + \
262  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
263  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
264 
265  print "Setting up best pdb files"
266  if not self.is_multi_state:
267  if self.vars["number_of_best_scoring_models"] > 0:
268  output.init_pdb_best_scoring(pdb_dir + "/" +
269  self.vars["best_pdb_name_suffix"],
270  self.root_hier,
271  self.vars[
272  "number_of_best_scoring_models"],
273  replica_exchange=True)
274  else:
275  if self.vars["number_of_best_scoring_models"] > 0:
276  for n in range(self.vars["number_of_states"]):
277  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
278  self.vars["best_pdb_name_suffix"],
279  self.root_hiers[n],
280  self.vars[
281  "number_of_best_scoring_models"],
282  replica_exchange=True)
283 
284 # ---------------------------------------------
285 
286  if not self.em_object_for_rmf is None:
287  if not self.is_multi_state:
288  output_hierarchies = [
289  self.root_hier,
290  self.em_object_for_rmf.get_density_as_hierarchy(
291  )]
292  else:
293  output_hierarchies = self.root_hiers
294  output_hierarchies.append(
295  self.em_object_for_rmf.get_density_as_hierarchy())
296  else:
297  if not self.is_multi_state:
298  output_hierarchies = [self.root_hier]
299  else:
300  output_hierarchies = self.root_hiers
301 
302 #----------------------------------------------
303  print "Setting up and writing initial rmf coordinate file"
304  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
305  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
306  output_hierarchies)
307  if self.crosslink_restraints:
308  output.add_restraints_to_rmf(
309  init_suffix + "." + str(myindex) + ".rmf3",
310  self.crosslink_restraints)
311  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
312  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
313 
314 #----------------------------------------------
315 
316  print "Setting up production rmf files"
317 
318  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
319  output.init_rmf(rmfname, output_hierarchies)
320 
321  if self.crosslink_restraints:
322  output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
323 
324  ntimes_at_low_temp = 0
325 
326  if myindex == 0:
327  self.show_info()
328 
329  for i in range(self.vars["number_of_frames"]):
330  for nr in range(self.vars["num_sample_rounds"]):
331  if sampler_mc is not None:
332  sampler_mc.optimize(self.vars["monte_carlo_steps"])
333  if sampler_md is not None:
334  sampler_md.optimize(self.vars["molecular_dynamics_steps"])
335  score = self.model.evaluate(False)
336  output.set_output_entry("score", score)
337 
338  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
339 
340  if min_temp_index == my_temp_index:
341  print "--- frame %s score %s " % (str(i), str(score))
342 
343  if i % self.vars["nframes_write_coordinates"]==0:
344  print '--- writing coordinates'
345  if self.vars["number_of_best_scoring_models"] > 0:
346  output.write_pdb_best_scoring(score)
347  output.write_rmf(rmfname)
348  output.set_output_entry("rmf_file", rmfname)
349  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
350  else:
351  output.set_output_entry("rmf_file", rmfname)
352  output.set_output_entry("rmf_frame_index", '-1')
353  output.write_stat2(low_temp_stat_file)
354  ntimes_at_low_temp += 1
355 
356  output.write_stat2(replica_stat_file)
357  rex.swap_temp(i, score)
358 
359 
360 # -----------------------------------------------------------------------
361 
362 
364  m,
365  data,
366  resolutions=[1,
367  10],
368  missing_bead_size=20,
369  residue_per_gaussian=None):
370  '''
371  The macro construct a component for each subunit (no splitting, nothing fancy)
372  You can pass the resolutions and the bead size for the missing residue regions.
373  To use this macro, you must provide the following data structure:
374 
375  Component pdbfile chainid rgb color fastafile sequence id
376  in fastafile
377 
378 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
379  ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
380  ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
381  ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
382  ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
383  ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
384  ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
385  ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
386  ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
387  ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
388  ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
389  ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
390 
391  '''
392 
393  r = IMP.pmi.representation.Representation(m)
394 
395  # the dictionary for the hierarchies,
396  hierarchies = {}
397 
398  for d in data:
399  # retrieve the information from the data structure
400  component_name = d[0]
401  pdb_file = d[1]
402  chain_id = d[2]
403  color_id = d[3]
404  fasta_file = d[4][0]
405  # this function
406  fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
407  fasta_file_id = d[4][1]
408  # avoid to add a component with the same name
409  r.create_component(component_name,
410  color=color_id)
411 
412  r.add_component_sequence(component_name,
413  fasta_file,
414  id=fastids[fasta_file_id])
415 
416  hierarchies = r.autobuild_model(component_name,
417  pdb_file,
418  chain_id,
419  resolutions=resolutions,
420  missingbeadsize=missing_bead_size)
421 
422  r.show_component_table(component_name)
423 
424  r.set_rigid_bodies([component_name])
425 
426  r.set_chain_of_super_rigid_bodies(
427  hierarchies,
428  min_length=2,
429  max_length=2)
430 
431  r.setup_component_sequence_connectivity(component_name, resolution=1)
432  r.setup_component_geometry(component_name)
433 
434  r.setup_bonds()
435  # put it at the end of rigid bodies
436  r.set_floppy_bodies()
437 
438  # set current coordinates as reference for RMSD calculation
439  r.set_current_coordinates_as_reference_for_rmsd("Reference")
440 
441  return r
442 
443 
444 # ----------------------------------------------------------------------
445 
446 class BuildModel(object):
447  """A macro to build a Representation based on a Topology and lists of movers
448  @param model The IMP model
449  @param component_topologies List of IMP.pmi.topology.ComponentTopology items
450  @param list_of_rigid_bodies List of lists of domain names from the components.
451  @param list_of_super_rigid_bodies List of lists of domain names from the components.
452  @param chain_of_super_rigid_bodies List of lists of domain names from the components
453  Choices can only be from the same molecule
454  @param sequence_connectivity_scale For scaling the connectivity restraint
455  @param add_each_domain_as_rigid_body That way you don't have to put all of them in the list
456  @param force_create_gmm_files If True, will sample and create GMMs no matter what.
457  If False, will only only sample if the files don't exist.
458  If number of Gaussians is zero, won't do anything.
459  """
460  def __init__(self,
461  model,
462  component_topologies,
463  list_of_rigid_bodies=[],
464  list_of_super_rigid_bodies=[],
465  chain_of_super_rigid_bodies=[],
466  sequence_connectivity_scale=4.0,
467  add_each_domain_as_rigid_body=False,
468  force_create_gmm_files=False):
469  self.m = model
470  self.simo = IMP.pmi.representation.Representation(self.m,
471  upperharmonic=True,
472  disorderedlength=False)
473 
474  data=component_topologies
475  if list_of_rigid_bodies==[]:
476  print "WARNING: No list of rigid bodies inputted to build_model()"
477  if list_of_super_rigid_bodies==[]:
478  print "WARNING: No list of super rigid bodies inputted to build_model()"
479  if chain_of_super_rigid_bodies==[]:
480  print "WARNING: No chain of super rigid bodies inputted to build_model()"
481  all_dnames = set([d for sublist in list_of_rigid_bodies+list_of_super_rigid_bodies\
482  +chain_of_super_rigid_bodies for d in sublist])
483  all_available = set([c.domain_name for c in component_topologies])
484  if not all_dnames <= all_available:
485  raise ValueError("All requested movers must reference domain "
486  "names in the component topologies")
487 
488  self.domain_dict={}
489  self.resdensities={}
490  super_rigid_bodies={}
491  chain_super_rigid_bodies={}
492  rigid_bodies={}
493 
494  for c in data:
495  comp_name = c.name
496  hier_name = c.domain_name
497  color = c.color
498  fasta_file = c.fasta_file
499  fasta_id = c.fasta_id
500  pdb_name = c.pdb_file
501  chain_id = c.chain
502  res_range = c.residue_range
503  offset = c.pdb_offset
504  bead_size = c.bead_size
505  em_num_components = c.em_residues_per_gaussian
506  em_txt_file_name = c.gmm_file
507  em_mrc_file_name = c.mrc_file
508 
509  if comp_name not in self.simo.get_component_names():
510  self.simo.create_component(comp_name,color=0.0)
511  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
512 
513  # create hierarchy (adds resolutions, beads) with autobuild and optionally add EM data
514  if em_num_components==0:
515  read_em_files=False
516  include_res0=False
517  else:
518  if (not os.path.isfile(em_txt_file_name)) or force_create_gmm_files:
519  read_em_files=False
520  include_res0=True
521  else:
522  read_em_files=True
523  include_res0=False
524 
525  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
526  res_range,include_res0,beadsize=bead_size,
527  color=color,offset=offset)
528  if em_num_components!=0:
529  if read_em_files:
530  print "will read GMM files"
531  else:
532  print "will calculate GMMs"
533 
534  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
535  em_mrc_file_name,em_num_components,read_em_files)
536  self.simo.add_all_atom_densities(comp_name, hierarchies=beads)
537  dens_hier+=beads
538  else:
539  dens_hier=[]
540 
541  self.resdensities[hier_name]=dens_hier
542  self.domain_dict[hier_name]=outhier+dens_hier
543 
544  # setup basic restraints
545  for c in self.simo.get_component_names():
546  self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
547  self.simo.setup_component_geometry(c)
548 
549  # create movers
550  for rblist in list_of_rigid_bodies:
551  rb=[]
552  for rbmember in rblist:
553  rb+=[h for h in self.domain_dict[rbmember]]
554  self.simo.set_rigid_body_from_hierarchies(rb)
555  for srblist in list_of_super_rigid_bodies:
556  srb=[]
557  for srbmember in rblist:
558  srb+=[h for h in self.domain_dict[srbmember]]
559  self.simo.set_super_rigid_body_from_hierarchies(srb)
560  for clist in chain_of_super_rigid_bodies:
561  crb=[]
562  for crbmember in rblist:
563  crb+=[h for h in self.domain_dict[crbmember]]
564  self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
565 
566  self.simo.set_floppy_bodies()
567  self.simo.setup_bonds()
568 
570  '''Return the Representation object'''
571  return self.simo
572 
573  def get_density_hierarchies(self,hier_name_list):
574  # return a list of density hierarchies
575  # specify the list of hierarchy names
576  dens_hier_list=[]
577  for hn in hier_name_list:
578  print hn
579  dens_hier_list+=self.resdensities[hn]
580  return dens_hier_list
581 
582  def set_gmm_models_directory(self,directory_name):
583  self.gmm_models_directory=directory_name
584 
585  def get_pdb_bead_bits(self,hierarchy):
586  pdbbits=[]
587  beadbits=[]
588  helixbits=[]
589  for h in hierarchy:
590  if "_pdb" in h.get_name():pdbbits.append(h)
591  if "_bead" in h.get_name():beadbits.append(h)
592  if "_helix" in h.get_name():helixbits.append(h)
593  return (pdbbits,beadbits,helixbits)
594 
595  def scale_bead_radii(self,nresidues,scale):
596  scaled_beads=set()
597  for h in self.domain_dict:
598  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
599  slope=(1.0-scale)/(1.0-float(nresidues))
600 
601  for b in beadbits:
602  # I have to do the following
603  # because otherwise we'll scale more than once
604  if b not in scaled_beads:
605  scaled_beads.add(b)
606  else:
607  continue
608  radius=IMP.core.XYZR(b).get_radius()
609  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
610  scale_factor=slope*float(num_residues)+1.0
611  print scale_factor
612  new_radius=scale_factor*radius
613  IMP.core.XYZR(b).set_radius(new_radius)
614  print b.get_name()
615  print "particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius)
616 
617 
618  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
619  #density generation for the EM restraint
620  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
621  #get the number of residues from the pdb bits
622  res_ind=[]
623  for pb in pdbbits+helixbits:
624  for p in IMP.core.get_leaves(pb):
626 
627  number_of_residues=len(set(res_ind))
628  outhier=[]
629  if read:
630  if len(pdbbits)!=0:
631  outhier+=simo.add_component_density(compname,
632  pdbbits,
633  num_components=num_components,
634  resolution=0,
635  inputfile=txtfilename)
636  if len(helixbits)!=0:
637  outhier+=simo.add_component_density(compname,
638  helixbits,
639  num_components=num_components,
640  resolution=1,
641  inputfile=txtfilename)
642 
643 
644  else:
645  if len(pdbbits)!=0:
646  num_components=number_of_residues/abs(num_components)
647  outhier+=simo.add_component_density(compname,
648  pdbbits,
649  num_components=num_components,
650  resolution=0,
651  outputfile=txtfilename,
652  outputmap=mrcfilename,
653  multiply_by_total_mass=True)
654 
655  if len(helixbits)!=0:
656  num_components=number_of_residues/abs(num_components)
657  outhier+=simo.add_component_density(compname,
658  helixbits,
659  num_components=num_components,
660  resolution=1,
661  outputfile=txtfilename,
662  outputmap=mrcfilename,
663  multiply_by_total_mass=True)
664 
665  return outhier,beadbits
666 
667  def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
668  beadsize=5,color=0.0,offset=0):
669  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :
670  if resrange[-1]==-1:
671  resrange=(resrange[0],len(simo.sequence_dict[comname]))
672  if include_res0:
673  outhier=simo.autobuild_model(comname,
674  pdbname=pdbname,
675  chain=chain,
676  resrange=resrange,
677  resolutions=[0,1,10],
678  offset=offset,
679  color=color,
680  missingbeadsize=beadsize)
681  else:
682  outhier=simo.autobuild_model(comname,
683  pdbname=pdbname,
684  chain=chain,
685  resrange=resrange,
686  resolutions=[1,10],
687  offset=offset,
688  color=color,
689  missingbeadsize=beadsize)
690 
691 
692  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :
693  outhier=simo.add_component_ideal_helix(comname,
694  resolutions=[1,10],
695  resrange=resrange,
696  color=color,
697  show=False)
698 
699  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :
700  outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
701 
702  else:
703 
704  seq_len=len(simo.sequence_dict[comname])
705  outhier=simo.add_component_necklace(comname,
706  begin=1,
707  end=seq_len,
708  length=beadsize)
709 
710  return outhier
711 
712 
713 class BuildModel1(object):
714  """Deprecated building macro - use BuildModel()
715  @param representation The PMI representation"""
716 
717  def __init__(self, representation):
718  self.simo=representation
719  self.gmm_models_directory="."
720 
721  def set_gmm_models_directory(self,directory_name):
722  self.gmm_models_directory=directory_name
723 
724  def build_model(self,data_structure,sequence_connectivity_scale=4.0):
725  """Create model.
726  @param data_structure List of lists containing these entries:
727  comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
728  res_range, read_em_files, bead_size, rb, super_rb,
729  em_num_components, em_txt_file_name, em_mrc_file_name
730  @param sequence_connectivity_scale
731  """
732  self.domain_dict={}
733  self.resdensities={}
734  super_rigid_bodies={}
735  chain_super_rigid_bodies={}
736  rigid_bodies={}
737 
738  for d in data_structure:
739  comp_name = d[0]
740  hier_name = d[1]
741  color = d[2]
742  fasta_file = d[3]
743  fasta_id = d[4]
744  pdb_name = d[5]
745  chain_id = d[6]
746  res_range = d[7][0:2]
747  try:
748  offset = d[7][2]
749  except:
750  offset = 0
751  read_em_files = d[8]
752  bead_size = d[9]
753  rb = d[10]
754  super_rb = d[11]
755  em_num_components = d[12]
756  em_txt_file_name = d[13]
757  em_mrc_file_name = d[14]
758  chain_of_super_rb = d[15]
759 
760  if comp_name not in self.simo.get_component_names():
761  self.simo.create_component(comp_name,color=0.0)
762  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
763  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
764 
765 
766  if not read_em_files is None:
767  if em_txt_file_name is " ": em_txt_file_name=self.gmm_models_directory+"/"+hier_name+".txt"
768  if em_mrc_file_name is " ": em_mrc_file_name=self.gmm_models_directory+"/"+hier_name+".mrc"
769 
770 
771  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
772  self.simo.add_all_atom_densities(comp_name, hierarchies=beads)
773  dens_hier+=beads
774 
775  else:
776  dens_hier=[]
777 
778  self.resdensities[hier_name]=dens_hier
779  self.domain_dict[hier_name]=outhier+dens_hier
780 
781  if rb is not None:
782  if rb not in rigid_bodies:
783  rigid_bodies[rb]=[h for h in self.domain_dict[hier_name]]
784  else:
785  rigid_bodies[rb]+=[h for h in self.domain_dict[hier_name]]
786 
787 
788  if super_rb is not None:
789  for k in super_rb:
790  if k not in super_rigid_bodies:
791  super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
792  else:
793  super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
794 
795  if chain_of_super_rb is not None:
796  for k in chain_of_super_rb:
797  if k not in chain_super_rigid_bodies:
798  chain_super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
799  else:
800  chain_super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
801 
802  self.rigid_bodies=rigid_bodies
803 
804 
805  for c in self.simo.get_component_names():
806  self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
807  self.simo.setup_component_geometry(c)
808 
809 
810  for rb in rigid_bodies:
811  self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
812 
813  for k in super_rigid_bodies:
814  self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
815 
816  for k in chain_super_rigid_bodies:
817  self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
818 
819  self.simo.set_floppy_bodies()
820  self.simo.setup_bonds()
821 
822  def get_density_hierarchies(self,hier_name_list):
823  # return a list of density hierarchies
824  # specify the list of hierarchy names
825  dens_hier_list=[]
826  for hn in hier_name_list:
827  print hn
828  dens_hier_list+=self.resdensities[hn]
829  return dens_hier_list
830 
831  def get_pdb_bead_bits(self,hierarchy):
832  pdbbits=[]
833  beadbits=[]
834  helixbits=[]
835  for h in hierarchy:
836  if "_pdb" in h.get_name():pdbbits.append(h)
837  if "_bead" in h.get_name():beadbits.append(h)
838  if "_helix" in h.get_name():helixbits.append(h)
839  return (pdbbits,beadbits,helixbits)
840 
841  def scale_bead_radii(self,nresidues,scale):
842  scaled_beads=set()
843  for h in self.domain_dict:
844  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
845  slope=(1.0-scale)/(1.0-float(nresidues))
846 
847  for b in beadbits:
848  # I have to do the following
849  # because otherwise we'll scale more than once
850  if b not in scaled_beads:
851  scaled_beads.add(b)
852  else:
853  continue
854  radius=IMP.core.XYZR(b).get_radius()
855  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
856  scale_factor=slope*float(num_residues)+1.0
857  print scale_factor
858  new_radius=scale_factor*radius
859  IMP.core.XYZR(b).set_radius(new_radius)
860  print b.get_name()
861  print "particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius)
862 
863 
864  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
865  #density generation for the EM restraint
866  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
867 
868  #get the number of residues from the pdb bits
869  res_ind=[]
870  for pb in pdbbits+helixbits:
871  for p in IMP.core.get_leaves(pb):
873 
874  number_of_residues=len(set(res_ind))
875  outhier=[]
876  if read:
877  if len(pdbbits)!=0:
878  outhier+=simo.add_component_density(compname,
879  pdbbits,
880  num_components=num_components, # number of gaussian into which the simulated density is approximated
881  resolution=0, # resolution that you want to calculate the simulated density
882  inputfile=txtfilename) # read what it was calculated before
883  if len(helixbits)!=0:
884  outhier+=simo.add_component_density(compname,
885  helixbits,
886  num_components=num_components, # number of gaussian into which the simulated density is approximated
887  resolution=1, # resolution that you want to calculate the simulated density
888  inputfile=txtfilename) # read what it was calculated before
889 
890 
891  else:
892  if len(pdbbits)!=0:
893  if num_components<0:
894  #if negative calculate the number of gmm components automatically
895  # from the number of residues
896  num_components=number_of_residues/abs(num_components)
897  outhier+=simo.add_component_density(compname,
898  pdbbits,
899  num_components=num_components, # number of gaussian into which the simulated density is approximated
900  resolution=0, # resolution that you want to calculate the simulated density
901  outputfile=txtfilename, # do the calculation
902  outputmap=mrcfilename,
903  multiply_by_total_mass=True) # do the calculation and output the mrc
904 
905  if len(helixbits)!=0:
906  if num_components<0:
907  #if negative calculate the number of gmm components automatically
908  # from the number of residues
909  num_components=number_of_residues/abs(num_components)
910  outhier+=simo.add_component_density(compname,
911  helixbits,
912  num_components=num_components, # number of gaussian into which the simulated density is approximated
913  resolution=1, # resolution that you want to calculate the simulated density
914  outputfile=txtfilename, # do the calculation
915  outputmap=mrcfilename,
916  multiply_by_total_mass=True) # do the calculation and output the mrc
917 
918  return outhier,beadbits
919 
920  def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
921 
922  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :
923  if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
924  if read==False:
925  outhier=simo.autobuild_model(comname,
926  pdbname=pdbname,
927  chain=chain,
928  resrange=resrange,
929  resolutions=[0,1,10],
930  offset=offset,
931  color=color,
932  missingbeadsize=beadsize)
933  else:
934  outhier=simo.autobuild_model(comname,
935  pdbname=pdbname,
936  chain=chain,
937  resrange=resrange,
938  resolutions=[1,10],
939  offset=offset,
940  color=color,
941  missingbeadsize=beadsize)
942 
943 
944  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :
945 
946  outhier=simo.add_component_ideal_helix(comname,
947  resolutions=[1,10],
948  resrange=resrange,
949  color=color,
950  show=False)
951 
952  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :
953  outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
954 
955  else:
956 
957  seq_len=len(simo.sequence_dict[comname])
958  outhier=simo.add_component_necklace(comname,
959  begin=1,
960  end=seq_len,
961  length=beadsize)
962 
963  return outhier
964 
965 
966 
967 # ----------------------------------------------------------------------
968 
970  """A macro for running all the basic operations of analysis.
971  Includes clustering, precision analysis, and making ensemble density maps.
972  A number of plots are also supported.
973  @param model The IMP model
974  @param stat_file_name_suffix
975  @param merge_directories The directories containing output files
976  @param best_pdb_name_suffix
977  @param do_clean_first
978  @param do_create_directories
979  @param global_output_directory Where everything is
980  @param replica_stat_file_suffix
981  @param global_analysis_result_directory
982  """
983  def __init__(self, model,
984  merge_directories=["./"],
985  stat_file_name_suffix="stat",
986  best_pdb_name_suffix="model",
987  do_clean_first=True,
988  do_create_directories=True,
989  global_output_directory="output/",
990  replica_stat_file_suffix="stat_replica",
991  global_analysis_result_directory="./analysis/"):
992 
993 
994  try:
995  from mpi4py import MPI
996  self.comm = MPI.COMM_WORLD
997  self.rank = self.comm.Get_rank()
998  self.number_of_processes = self.comm.size
999  except ImportError:
1000  self.rank = 0
1001  self.number_of_processes = 1
1002 
1003  self.model = model
1004  stat_dir = global_output_directory
1005  self.stat_files = []
1006  # it contains the position of the root directories
1007  for rd in merge_directories:
1008  stat_files = glob.glob(rd + "/" + stat_dir + "/stat.*.out")
1009  if len(stat_files)==0:
1010  print "WARNING: no stat files found in",rd + "/" + stat_dir + "/stat.*.out"
1011  self.stat_files += stat_files
1012 
1013 
1015  score_key="SimplifiedModel_Total_Score_None",
1016  rmf_file_key="rmf_file",
1017  rmf_file_frame_key="rmf_frame_index",
1018  outputdir="./",
1019  get_every=1,
1020  nframes_trajectory=10000):
1021  """ Get a trajectory of the modeling run, for generating demonstrative movies
1022  @param score_key The score for ranking models
1023  @param rmf_file_key Key pointing to RMF filename
1024  @param rmf_file_frame_key Key pointing to RMF frame number
1025  @param outputdir The local output directory used in the run
1026  @param get_every Extract every nth frame
1027  @param nframes_trajectory Total number of frames of the trajectory
1028  """
1029  from operator import itemgetter
1030  import math
1031 
1032  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
1033  score_key,
1034  rmf_file_key,
1035  rmf_file_frame_key,
1036  get_every)
1037  rmf_file_list=trajectory_models[0]
1038  rmf_file_frame_list=trajectory_models[1]
1039  score_list=map(float, trajectory_models[2])
1040 
1041  max_score=max(score_list)
1042  min_score=min(score_list)
1043 
1044 
1045  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
1046  binned_scores=[None]*nframes_trajectory
1047  binned_model_indexes=[-1]*nframes_trajectory
1048 
1049  for model_index,s in enumerate(score_list):
1050  bins_score_diffs=[abs(s-b) for b in bins]
1051  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1052  if binned_scores[bin_index]==None:
1053  binned_scores[bin_index]=s
1054  binned_model_indexes[bin_index]=model_index
1055  else:
1056  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1057  new_diff=abs(s-bins[bin_index])
1058  if new_diff < old_diff:
1059  binned_scores[bin_index]=s
1060  binned_model_indexes[bin_index]=model_index
1061 
1062  print binned_scores
1063  print binned_model_indexes
1064 
1065 
1066 
1067  def clustering(self,
1068  score_key="SimplifiedModel_Total_Score_None",
1069  rmf_file_key="rmf_file",
1070  rmf_file_frame_key="rmf_frame_index",
1071  prefiltervalue=None,
1072  feature_keys=[],
1073  outputdir="./",
1074  alignment_components=None,
1075  number_of_best_scoring_models=10,
1076  rmsd_calculation_components=None,
1077  distance_matrix_file=None,
1078  load_distance_matrix_file=False,
1079  skip_clustering=False,
1080  number_of_clusters=1,
1081  display_plot=False,
1082  exit_after_display=True,
1083  get_every=1,
1084  first_and_last_frames=None,
1085  density_custom_ranges=None,
1086  write_pdb_with_centered_coordinates=False,
1087  voxel_size=5.0):
1088  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps
1089  @param score_key The score for ranking models
1090  @param rmf_file_key Key pointing to RMF filename
1091  @param rmf_file_frame_key Key pointing to RMF frame number
1092  @param prefiltervalue Only include frames where the score key is below this value
1093  @param feature_keys Keywords for which you want to calculate average,
1094  medians, etc,
1095  @param outputdir The local output directory used in the run
1096  @param alignment_components List of tuples for aligning the structures
1097  e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1098  @param number_of_best_scoring_models Num models to keep per run
1099  @param rmsd_calculation_components List of tuples for calculating RMSD
1100  e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1101  @param distance_matrix_file Where to store/read the distance matrix
1102  @param load_distance_matrix_file Try to load the distance matrix file
1103  @param skip_clustering Just extract the best scoring models and save the pdbs
1104  @param number_of_clusters Number of k-means clusters
1105  @param display_plot Display the distance matrix
1106  @param exit_after_display Exit after displaying distance matrix
1107  @param get_every Extract every nth frame
1108  @param first_and_last_frames A tuple with the first and last frames to be
1109  analyzed. Values are percentages!
1110  Default: get all frames
1111  @param density_custom_ranges List of tuples or strings for density calculation
1112  e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1113  @param write_pdb_with_centered_coordinates
1114  @param voxel_size Used for the density output
1115  """
1116 
1117 
1118  if self.rank==0:
1119  try:
1120  os.mkdir(outputdir)
1121  except:
1122  pass
1123 
1124 
1125  if not load_distance_matrix_file:
1126  if len(self.stat_files)==0: print "ERROR: no stat file found in the given path"; return
1127  my_stat_files=IMP.pmi.tools.chunk_list_into_segments(
1128  self.stat_files,self.number_of_processes)[self.rank]
1129  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1130  score_key,
1131  feature_keys,
1132  rmf_file_key,
1133  rmf_file_frame_key,
1134  prefiltervalue,
1135  get_every)
1136  rmf_file_list=best_models[0]
1137  rmf_file_frame_list=best_models[1]
1138  score_list=best_models[2]
1139  feature_keyword_list_dict=best_models[3]
1140 
1141 # ------------------------------------------------------------------------
1142 # collect all the files and scores
1143 # ------------------------------------------------------------------------
1144 
1145  if self.number_of_processes > 1:
1146  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1147  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1148  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1149  rmf_file_frame_list)
1150  for k in feature_keyword_list_dict:
1151  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1152  feature_keyword_list_dict[k])
1153 
1154  # sort by score and get the best scoring ones
1155  score_rmf_tuples = zip(score_list,
1156  rmf_file_list,
1157  rmf_file_frame_list,
1158  range(len(score_list)))
1159 
1160  # keep subset of frames if requested
1161  if first_and_last_frames is not None:
1162  nframes = len(score_rmf_tuples)
1163  first_frame = int(first_and_last_frames[0] * nframes)
1164  last_frame = int(first_and_last_frames[1] * nframes)
1165  if last_frame > len(score_rmf_tuples):
1166  last_frame = -1
1167  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1168 
1169  # sort RMFs by the score_key in ascending order, and store the rank
1170  best_score_rmf_tuples = sorted(score_rmf_tuples,
1171  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1172  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1173 
1174  # sort the feature scores in the same way
1175  best_score_feature_keyword_list_dict = defaultdict(list)
1176  for tpl in best_score_rmf_tuples:
1177  index = tpl[3]
1178  for f in feature_keyword_list_dict:
1179  best_score_feature_keyword_list_dict[f].append(
1180  feature_keyword_list_dict[f][index])
1181 
1182  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1183  best_score_rmf_tuples,
1184  self.number_of_processes)[self.rank]
1185 
1186 # ------------------------------------------------------------------------
1187 # optionally don't compute distance matrix or cluster, just write top files
1188 # ------------------------------------------------------------------------
1189  if skip_clustering:
1190  dircluster=os.path.join(outputdir,"all_models."+str(n))
1191  try:
1192  os.mkdir(outputdir)
1193  except:
1194  pass
1195  try:
1196  os.mkdir(dircluster)
1197  except:
1198  pass
1199  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1200  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1201  rmf_name=tpl[1]
1202  rmf_frame_number=tpl[2]
1203  tmp_dict={}
1204  index=tpl[4]
1205  for key in best_score_feature_keyword_list_dict:
1206  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1207 
1208  prot=IMP.pmi.analysis.get_hier_from_rmf(self.model,rmf_frame_number,rmf_name)
1209  if not prot:
1210  continue
1211 
1213  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1214  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf")
1215  o.init_pdb(out_pdb_fn,prot)
1216  o.write_pdb(out_pdb_fn,
1217  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1218 
1219  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1220  tmp_dict["rmf_file_full_path"]=rmf_name
1221  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1222  tmp_dict["local_rmf_frame_number"]=0
1223 
1224  clusstat.write(str(tmp_dict)+"\n")
1225  o.init_rmf(out_rmf_fn,[prot])
1226  o.write_rmf(out_rmf_fn)
1227  o.close_rmf(out_rmf_fn)
1228  return
1229 
1230 
1231 #-------------------------------------------------------------
1232 # read the coordinates
1233 # ------------------------------------------------------------
1234  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1235  my_best_score_rmf_tuples[0],
1236  rmsd_calculation_components)
1237  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1238  my_best_score_rmf_tuples,
1239  alignment_components,
1240  rmsd_calculation_components)
1241 
1242  # note! the coordinates are simple float tuples, NOT decorators, NOT Vector3D,
1243  # NOR particles, because these object cannot be serialized. We need serialization
1244  # for the parallel computation based on mpi.
1245  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1246  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1247  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1248  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1249  all_rmf_file_names=got_coords[4] # RMF file per hit
1250 
1251 
1252  # broadcast the coordinates
1253  if self.number_of_processes > 1:
1254  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1255  all_coordinates)
1256  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1257  all_rmf_file_names)
1258  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1259  rmf_file_name_index_dict)
1260  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1261  alignment_coordinates)
1262  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1263  rmsd_coordinates)
1264 
1265  if self.rank == 0:
1266  # save needed informations in external files
1267  self.save_objects(
1268  [best_score_feature_keyword_list_dict,
1269  rmf_file_name_index_dict],
1270  ".macro.pkl")
1271 
1272 # ------------------------------------------------------------------------
1273 # Calculate distance matrix and cluster
1274 # ------------------------------------------------------------------------
1275  print "setup clustering class"
1276  Clusters = IMP.pmi.analysis.Clustering(rmsd_weights)
1277 
1278  for n, model_coordinate_dict in enumerate(all_coordinates):
1279  template_coordinate_dict = {}
1280  # let's try to align
1281  if alignment_components is not None and len(Clusters.all_coords) == 0:
1282  # set the first model as template coordinates
1283  Clusters.set_template(alignment_coordinates[n])
1284  Clusters.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1285 
1286  print "Global calculating the distance matrix"
1287 
1288  # calculate distance matrix, all against all
1289  Clusters.dist_matrix()
1290 
1291  # perform clustering and optionally display
1292  if self.rank == 0:
1293  Clusters.do_cluster(number_of_clusters)
1294  if display_plot:
1295  if self.rank == 0:
1296  Clusters.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1297  if exit_after_display:
1298  exit()
1299  Clusters.save_distance_matrix_file(file_name=distance_matrix_file)
1300 
1301 # ------------------------------------------------------------------------
1302 # Alteratively, load the distance matrix from file and cluster that
1303 # ------------------------------------------------------------------------
1304  else:
1305  if self.rank==0:
1306  print "setup clustering class"
1307  Clusters = IMP.pmi.analysis.Clustering()
1308  Clusters.load_distance_matrix_file(file_name=distance_matrix_file)
1309  print "clustering with %s clusters" % str(number_of_clusters)
1310  Clusters.do_cluster(number_of_clusters)
1311  [best_score_feature_keyword_list_dict,
1312  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1313  if display_plot:
1314  if self.rank == 0:
1315  Clusters.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1316  if exit_after_display:
1317  exit()
1318  if self.number_of_processes > 1:
1319  self.comm.Barrier()
1320 
1321 # ------------------------------------------------------------------------
1322 # now save all informations about the clusters
1323 # ------------------------------------------------------------------------
1324 
1325  if self.rank == 0:
1326  print Clusters.get_cluster_labels()
1327  for n, cl in enumerate(Clusters.get_cluster_labels()):
1328  print "rank %s " % str(self.rank)
1329  print "cluster %s " % str(n)
1330  print "cluster label %s " % str(cl)
1331  print Clusters.get_cluster_label_names(cl)
1332 
1333  # first initialize the Density class if requested
1334 
1335  if density_custom_ranges:
1336  DensModule = IMP.pmi.analysis.GetModelDensity(
1337  density_custom_ranges,
1338  voxel=voxel_size)
1339 
1340  dircluster = outputdir + "/cluster." + str(n) + "/"
1341  try:
1342  os.mkdir(dircluster)
1343  except:
1344  pass
1345 
1346  rmsd_dict = {"AVERAGE_RMSD":
1347  str(Clusters.get_cluster_label_average_rmsd(cl))}
1348  clusstat = open(dircluster + "stat.out", "w")
1349  for k, structure_name in enumerate(Clusters.get_cluster_label_names(cl)):
1350 
1351  # extract the features
1352  tmp_dict = {}
1353  tmp_dict.update(rmsd_dict)
1354  index = rmf_file_name_index_dict[structure_name]
1355  for key in best_score_feature_keyword_list_dict:
1356  tmp_dict[
1357  key] = best_score_feature_keyword_list_dict[
1358  key][
1359  index]
1360  # get the rmf name and the frame number from the list of
1361  # frame names
1362  rmf_name = structure_name.split("|")[0]
1363  rmf_frame_number = int(structure_name.split("|")[1])
1364 
1365  clusstat.write(str(tmp_dict) + "\n")
1366  prot,rs = IMP.pmi.analysis.get_hier_and_restraints_from_rmf(
1367  self.model,
1368  rmf_frame_number,
1369  rmf_name)
1370  if not prot:
1371  continue
1372 
1373  if k > 0:
1374  model_index = Clusters.get_model_index_from_name(
1375  structure_name)
1376  transformation = Clusters.get_transformation_to_first_member(
1377  cl,
1378  model_index)
1379 
1380  rbs = set()
1381  for p in IMP.atom.get_leaves(prot):
1382  if not IMP.core.XYZR.get_is_setup(p):
1384  IMP.core.XYZR(p).set_radius(0.0001)
1385  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1386 
1388  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1389  rbs.add(rb)
1390  else:
1392  transformation)
1393  for rb in rbs:
1394  IMP.core.transform(rb,transformation)
1395 
1396  # add the density
1397  if density_custom_ranges:
1398  DensModule.add_subunits_density(prot)
1399 
1400  # pdb writing should be optimized!
1401  o = IMP.pmi.output.Output()
1402  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1403  o.write_pdb(dircluster + str(k) + ".pdb")
1404 
1405  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
1406  # IMP.rmf.add_restraints(o.dictionary_rmfs[dircluster+str(n)+".rmf3"],restraints)
1407  o.write_rmf(dircluster + str(k) + ".rmf3")
1408  o.close_rmf(dircluster + str(k) + ".rmf3")
1409 
1410  del o
1411  # IMP.atom.destroy(prot)
1412 
1413  if density_custom_ranges:
1414  DensModule.write_mrc(path=dircluster)
1415  del DensModule
1416 
1417  if self.number_of_processes>1:
1418  self.comm.Barrier()
1419 
1420  def save_objects(self, objects, file_name):
1421  import pickle
1422  outf = open(file_name, 'w')
1423  pickle.dump(objects, outf)
1424  outf.close()
1425 
1426  def load_objects(self, file_name):
1427  import pickle
1428  inputf = open(file_name, 'r')
1429  objects = pickle.load(inputf)
1430  inputf.close()
1431  return objects
A macro for running all the basic operations of analysis.
Definition: macros.py:969
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:368
Miscellaneous utilities.
Definition: tools.py:1
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1067
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
A class to cluster structures.
Representation of the system.
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1014
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
Definition: XYZR.h:47
static XYZR setup_particle(kernel::Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def build_model
Create model.
Definition: macros.py:724
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:961
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def BuildModel0
The macro construct a component for each subunit (no splitting, nothing fancy) You can pass the resol...
Definition: macros.py:363
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
Definition: rigid_bodies.h:369
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:19
A macro to build a Representation based on a Topology and lists of movers.
Definition: macros.py:446
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Classes for writing output files and processing them.
Definition: output.py:1
Sampling of the system.
Definition: samplers.py:1
Deprecated building macro - use BuildModel()
Definition: macros.py:713
Hierarchies get_leaves(const Selection &h)
A class to compute mean density maps from structures Keeps a dictionary of density maps...
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
This "overloaded" function retrieves the residue indexes for each particle which is an instance of Fr...
Definition: tools.py:920
def get_representation
Return the Representation object.
Definition: macros.py:569
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27