IMP logo
IMP Reference Guide  2.8.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
8 import IMP.pmi.tools
9 import IMP.pmi.samplers
10 import IMP.pmi.output
11 import IMP.pmi.analysis
12 import IMP.pmi.io
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import RMF
17 import os
18 import glob
19 from operator import itemgetter
20 from collections import defaultdict
21 import numpy as np
22 import string
23 
24 class ReplicaExchange0(object):
25  """A macro to help setup and run replica exchange.
26  Supports Monte Carlo and molecular dynamics.
27  Produces trajectory RMF files, best PDB structures,
28  and output stat files.
29  """
30  def __init__(self, model,
31  representation=None,
32  root_hier=None,
33  sample_objects=None, # DEPRECATED
34  monte_carlo_sample_objects=None,
35  molecular_dynamics_sample_objects=None,
36  output_objects=[],
37  crosslink_restraints=None,
38  monte_carlo_temperature=1.0,
39  simulated_annealing=False,
40  simulated_annealing_minimum_temperature=1.0,
41  simulated_annealing_maximum_temperature=2.5,
42  simulated_annealing_minimum_temperature_nframes=100,
43  simulated_annealing_maximum_temperature_nframes=100,
44  replica_exchange_minimum_temperature=1.0,
45  replica_exchange_maximum_temperature=2.5,
46  replica_exchange_swap=True,
47  num_sample_rounds=1,
48  number_of_best_scoring_models=500,
49  monte_carlo_steps=10,
50  self_adaptive=False,
51  molecular_dynamics_steps=10,
52  molecular_dynamics_max_time_step=1.0,
53  number_of_frames=1000,
54  save_coordinates_mode="lowest_temperature",
55  nframes_write_coordinates=1,
56  write_initial_rmf=True,
57  initial_rmf_name_suffix="initial",
58  stat_file_name_suffix="stat",
59  best_pdb_name_suffix="model",
60  do_clean_first=True,
61  do_create_directories=True,
62  global_output_directory="./",
63  rmf_dir="rmfs/",
64  best_pdb_dir="pdbs/",
65  replica_stat_file_suffix="stat_replica",
66  em_object_for_rmf=None,
67  atomistic=False,
68  replica_exchange_object=None,
69  test_mode=False):
70  """Constructor.
71  @param model The IMP model
72  @param representation PMI.representation.Representation object
73  (or list of them, for multi-state modeling)
74  @param root_hier Instead of passing Representation, pass the System hierarchy
75  @param monte_carlo_sample_objects Objects for MC sampling; a list of
76  structural components (generally the representation) that will
77  be moved and restraints with parameters that need to
78  be sampled.
79  For PMI2: just pass flat list of movers
80  @param molecular_dynamics_sample_objects Objects for MD sampling
81  For PMI2: just pass flat list of particles
82  @param output_objects A list of structural objects and restraints
83  that will be included in output (statistics files). Any object
84  that provides a get_output() method can be used here.
85  @param crosslink_restraints List of cross-link restraints that will
86  be included in output RMF files (for visualization).
87  @param monte_carlo_temperature MC temp (may need to be optimized
88  based on post-sampling analysis)
89  @param simulated_annealing If True, perform simulated annealing
90  @param simulated_annealing_minimum_temperature Should generally be
91  the same as monte_carlo_temperature.
92  @param simulated_annealing_minimum_temperature_nframes Number of
93  frames to compute at minimum temperature.
94  @param simulated_annealing_maximum_temperature_nframes Number of
95  frames to compute at
96  temps > simulated_annealing_maximum_temperature.
97  @param replica_exchange_minimum_temperature Low temp for REX; should
98  generally be the same as monte_carlo_temperature.
99  @param replica_exchange_maximum_temperature High temp for REX
100  @param replica_exchange_swap Boolean, enable disable temperature
101  swap (Default=True)
102  @param num_sample_rounds Number of rounds of MC/MD per cycle
103  @param number_of_best_scoring_models Number of top-scoring PDB models
104  to keep around for analysis
105  @param monte_carlo_steps Number of MC steps per round
106  @param self_adaptive self adaptive scheme for monte carlo movers
107  @param molecular_dynamics_steps Number of MD steps per round
108  @param molecular_dynamics_max_time_step Max time step for MD
109  @param number_of_frames Number of REX frames to run
110  @param save_coordinates_mode string: how to save coordinates.
111  "lowest_temperature" (default) only the lowest temperatures is saved
112  "25th_score" all replicas whose score is below the 25th percentile
113  "50th_score" all replicas whose score is below the 50th percentile
114  "75th_score" all replicas whose score is below the 75th percentile
115  @param nframes_write_coordinates How often to write the coordinates
116  of a frame
117  @param write_initial_rmf Write the initial configuration
118  @param global_output_directory Folder that will be created to house
119  output.
120  @param test_mode Set to True to avoid writing any files, just test one frame.
121  """
122  self.model = model
123  self.vars = {}
124  self.pmi2 = False
125 
126  ### add check hierarchy is multistate
127  self.output_objects = output_objects
128  self.representation = representation
129  if representation:
130  if type(representation) == list:
131  self.is_multi_state = True
132  self.root_hiers = [r.prot for r in representation]
133  self.vars["number_of_states"] = len(representation)
134  else:
135  self.is_multi_state = False
136  self.root_hier = representation.prot
137  self.vars["number_of_states"] = 1
138  elif root_hier and type(root_hier) == IMP.atom.Hierarchy and root_hier.get_name()=='System':
139  self.pmi2 = True
140  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
141  self.root_hier = root_hier
142  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
143  self.vars["number_of_states"] = len(states)
144  if len(states)>1:
145  self.root_hiers = states
146  self.is_multi_state = True
147  else:
148  self.root_hier = root_hier
149  self.is_multi_state = False
150  else:
151  raise Exception("Must provide representation or System hierarchy (root_hier)")
152 
153  self.crosslink_restraints = crosslink_restraints
154  self.em_object_for_rmf = em_object_for_rmf
155  self.monte_carlo_sample_objects = monte_carlo_sample_objects
156  self.vars["self_adaptive"]=self_adaptive
157  if sample_objects is not None:
158  self.monte_carlo_sample_objects+=sample_objects
159  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
160  self.replica_exchange_object = replica_exchange_object
161  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
162  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
163  self.vars[
164  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
165  self.vars[
166  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
167  self.vars["replica_exchange_swap"] = replica_exchange_swap
168  self.vars["simulated_annealing"]=\
169  simulated_annealing
170  self.vars["simulated_annealing_minimum_temperature"]=\
171  simulated_annealing_minimum_temperature
172  self.vars["simulated_annealing_maximum_temperature"]=\
173  simulated_annealing_maximum_temperature
174  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
175  simulated_annealing_minimum_temperature_nframes
176  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
177  simulated_annealing_maximum_temperature_nframes
178 
179  self.vars["num_sample_rounds"] = num_sample_rounds
180  self.vars[
181  "number_of_best_scoring_models"] = number_of_best_scoring_models
182  self.vars["monte_carlo_steps"] = monte_carlo_steps
183  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
184  self.vars["number_of_frames"] = number_of_frames
185  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
186  raise Exception("save_coordinates_mode has unrecognized value")
187  else:
188  self.vars["save_coordinates_mode"] = save_coordinates_mode
189  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
190  self.vars["write_initial_rmf"] = write_initial_rmf
191  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
192  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
193  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
194  self.vars["do_clean_first"] = do_clean_first
195  self.vars["do_create_directories"] = do_create_directories
196  self.vars["global_output_directory"] = global_output_directory
197  self.vars["rmf_dir"] = rmf_dir
198  self.vars["best_pdb_dir"] = best_pdb_dir
199  self.vars["atomistic"] = atomistic
200  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
201  self.vars["geometries"] = None
202  self.test_mode = test_mode
203 
204  def add_geometries(self, geometries):
205  if self.vars["geometries"] is None:
206  self.vars["geometries"] = list(geometries)
207  else:
208  self.vars["geometries"].extend(geometries)
209 
210  def show_info(self):
211  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
212  print("--- it stores the best scoring pdb models in pdbs/")
213  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
214  print("--- variables:")
215  keys = list(self.vars.keys())
216  keys.sort()
217  for v in keys:
218  print("------", v.ljust(30), self.vars[v])
219 
220  def get_replica_exchange_object(self):
221  return self.replica_exchange_object
222 
223  def execute_macro(self):
224  temp_index_factor = 100000.0
225  samplers=[]
226  sampler_mc=None
227  sampler_md=None
228  if self.monte_carlo_sample_objects is not None:
229  print("Setting up MonteCarlo")
230  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
231  self.monte_carlo_sample_objects,
232  self.vars["monte_carlo_temperature"])
233  if self.vars["simulated_annealing"]:
234  tmin=self.vars["simulated_annealing_minimum_temperature"]
235  tmax=self.vars["simulated_annealing_maximum_temperature"]
236  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
237  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
238  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
239  if self.vars["self_adaptive"]:
240  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
241  self.output_objects.append(sampler_mc)
242  samplers.append(sampler_mc)
243 
244 
245  if self.molecular_dynamics_sample_objects is not None:
246  print("Setting up MolecularDynamics")
247  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
248  self.molecular_dynamics_sample_objects,
249  self.vars["monte_carlo_temperature"],
250  maximum_time_step=self.molecular_dynamics_max_time_step)
251  if self.vars["simulated_annealing"]:
252  tmin=self.vars["simulated_annealing_minimum_temperature"]
253  tmax=self.vars["simulated_annealing_maximum_temperature"]
254  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
255  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
256  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
257  self.output_objects.append(sampler_md)
258  samplers.append(sampler_md)
259 # -------------------------------------------------------------------------
260 
261  print("Setting up ReplicaExchange")
262  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
263  self.vars[
264  "replica_exchange_minimum_temperature"],
265  self.vars[
266  "replica_exchange_maximum_temperature"],
267  samplers,
268  replica_exchange_object=self.replica_exchange_object)
269  self.replica_exchange_object = rex.rem
270 
271  myindex = rex.get_my_index()
272  self.output_objects.append(rex)
273 
274  # must reset the minimum temperature due to the
275  # different binary length of rem.get_my_parameter double and python
276  # float
277  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
278 
279 # -------------------------------------------------------------------------
280 
281  globaldir = self.vars["global_output_directory"] + "/"
282  rmf_dir = globaldir + self.vars["rmf_dir"]
283  pdb_dir = globaldir + self.vars["best_pdb_dir"]
284 
285  if not self.test_mode:
286  if self.vars["do_clean_first"]:
287  pass
288 
289  if self.vars["do_create_directories"]:
290 
291  try:
292  os.makedirs(globaldir)
293  except:
294  pass
295  try:
296  os.makedirs(rmf_dir)
297  except:
298  pass
299 
300  if not self.is_multi_state:
301  try:
302  os.makedirs(pdb_dir)
303  except:
304  pass
305  else:
306  for n in range(self.vars["number_of_states"]):
307  try:
308  os.makedirs(pdb_dir + "/" + str(n))
309  except:
310  pass
311 
312 # -------------------------------------------------------------------------
313 
315  self.output_objects.append(sw)
316 
317  print("Setting up stat file")
318  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
319  low_temp_stat_file = globaldir + \
320  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
321  if not self.test_mode:
322  output.init_stat2(low_temp_stat_file,
323  self.output_objects,
324  extralabels=["rmf_file", "rmf_frame_index"])
325 
326  print("Setting up replica stat file")
327  replica_stat_file = globaldir + \
328  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
329  if not self.test_mode:
330  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
331 
332  if not self.test_mode:
333  print("Setting up best pdb files")
334  if not self.is_multi_state:
335  if self.vars["number_of_best_scoring_models"] > 0:
336  output.init_pdb_best_scoring(pdb_dir + "/" +
337  self.vars["best_pdb_name_suffix"],
338  self.root_hier,
339  self.vars[
340  "number_of_best_scoring_models"],
341  replica_exchange=True)
342  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
343  self.vars["best_pdb_name_suffix"]+".0.pdb")
344  else:
345  if self.vars["number_of_best_scoring_models"] > 0:
346  for n in range(self.vars["number_of_states"]):
347  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
348  self.vars["best_pdb_name_suffix"],
349  self.root_hiers[n],
350  self.vars[
351  "number_of_best_scoring_models"],
352  replica_exchange=True)
353  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
354  self.vars["best_pdb_name_suffix"]+".0.pdb")
355 # ---------------------------------------------
356 
357  if self.em_object_for_rmf is not None:
358  if not self.is_multi_state or self.pmi2:
359  output_hierarchies = [
360  self.root_hier,
361  self.em_object_for_rmf.get_density_as_hierarchy(
362  )]
363  else:
364  output_hierarchies = self.root_hiers
365  output_hierarchies.append(
366  self.em_object_for_rmf.get_density_as_hierarchy())
367  else:
368  if not self.is_multi_state or self.pmi2:
369  output_hierarchies = [self.root_hier]
370  else:
371  output_hierarchies = self.root_hiers
372 
373 #----------------------------------------------
374  if not self.test_mode:
375  print("Setting up and writing initial rmf coordinate file")
376  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
377  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
378  output_hierarchies)
379  if self.crosslink_restraints:
380  output.add_restraints_to_rmf(
381  init_suffix + "." + str(myindex) + ".rmf3",
382  self.crosslink_restraints)
383  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
384  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
385 
386 #----------------------------------------------
387 
388  if not self.test_mode:
389  mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
390 
391 #----------------------------------------------
392 
393  if not self.test_mode:
394  print("Setting up production rmf files")
395  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
396  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"])
397 
398  if self.crosslink_restraints:
399  output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
400 
401  ntimes_at_low_temp = 0
402 
403  if myindex == 0:
404  self.show_info()
405  self.replica_exchange_object.set_was_used(True)
406  nframes = self.vars["number_of_frames"]
407  if self.test_mode:
408  nframes = 1
409  for i in range(nframes):
410  if self.test_mode:
411  score = 0.
412  else:
413  for nr in range(self.vars["num_sample_rounds"]):
414  if sampler_md is not None:
415  sampler_md.optimize(
416  self.vars["molecular_dynamics_steps"])
417  if sampler_mc is not None:
418  sampler_mc.optimize(self.vars["monte_carlo_steps"])
420  self.model).evaluate(False)
421  mpivs.set_value("score",score)
422  output.set_output_entry("score", score)
423 
424 
425 
426  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
427 
428  if self.vars["save_coordinates_mode"] == "lowest_temperature":
429  save_frame=(min_temp_index == my_temp_index)
430  elif self.vars["save_coordinates_mode"] == "25th_score":
431  score_perc=mpivs.get_percentile("score")
432  save_frame=(score_perc*100.0<=25.0)
433  elif self.vars["save_coordinates_mode"] == "50th_score":
434  score_perc=mpivs.get_percentile("score")
435  save_frame=(score_perc*100.0<=50.0)
436  elif self.vars["save_coordinates_mode"] == "75th_score":
437  score_perc=mpivs.get_percentile("score")
438  save_frame=(score_perc*100.0<=75.0)
439 
440  if save_frame:
441  print("--- frame %s score %s " % (str(i), str(score)))
442 
443  if not self.test_mode:
444  if i % self.vars["nframes_write_coordinates"]==0:
445  print('--- writing coordinates')
446  if self.vars["number_of_best_scoring_models"] > 0:
447  output.write_pdb_best_scoring(score)
448  output.write_rmf(rmfname)
449  output.set_output_entry("rmf_file", rmfname)
450  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
451  else:
452  output.set_output_entry("rmf_file", rmfname)
453  output.set_output_entry("rmf_frame_index", '-1')
454  output.write_stat2(low_temp_stat_file)
455  ntimes_at_low_temp += 1
456 
457  if not self.test_mode:
458  output.write_stat2(replica_stat_file)
459  if self.vars["replica_exchange_swap"]:
460  rex.swap_temp(i, score)
461  if self.representation:
462  for p, state in self.representation._protocol_output:
463  p.add_replica_exchange(state, self)
464 
465  if not self.test_mode:
466  print("closing production rmf files")
467  output.close_rmf(rmfname)
468 
469 
470 
471 # ----------------------------------------------------------------------
472 class BuildSystem(object):
473  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
474  Easily create multi-state systems by calling this macro
475  repeatedly with different TopologyReader objects!
476  A useful function is get_molecules() which returns the PMI Molecules grouped by state
477  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
478  Quick multi-state system:
479  @code{.python}
480  mdl = IMP.Model()
481  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
482  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
483  bs = IMP.pmi.macros.BuildSystem(mdl)
484  bs.add_state(reader1)
485  bs.add_state(reader2)
486  bs.execute_macro() # build everything including degrees of freedom
487  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
488  ### now you have a two state system, you add restraints etc
489  @endcode
490  \note The "domain name" entry of the topology reader is not used.
491  All molecules are set up by the component name, but split into rigid bodies
492  as requested.
493  """
494  def __init__(self,
495  mdl,
496  sequence_connectivity_scale=4.0,
497  force_create_gmm_files=False,
498  resolutions=[1,10]):
499  """Constructor
500  @param mdl An IMP Model
501  @param sequence_connectivity_scale For scaling the connectivity restraint
502  @param force_create_gmm_files If True, will sample and create GMMs
503  no matter what. If False, will only only sample if the
504  files don't exist. If number of Gaussians is zero, won't
505  do anything.
506  @param resolutions The resolutions to build for structured regions
507  """
508  self.mdl = mdl
509  self.system = IMP.pmi.topology.System(self.mdl)
510  self._readers = [] # the TopologyReaders (one per state)
511  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
512  self._domains = [] # key = domain unique name, value = Component
513  self.force_create_gmm_files = force_create_gmm_files
514  self.resolutions = resolutions
515 
516  def add_state(self,
517  reader,
518  keep_chain_id=False, fasta_name_map=None):
519  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
520  When you are done adding states, call execute_macro()
521  @param reader The TopologyReader object
522  @param fasta_name_map dictionary for converting protein names found in the fasta file
523  """
524  state = self.system.create_state()
525  self._readers.append(reader)
526  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
527  these_domains = {} # key is unique name, value is _Component
528  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
529  numchain = 0
530 
531  ### setup representation
532  # loop over molecules, copies, then domains
533  for molname in reader.get_molecules():
534  copies = reader.get_molecules()[molname].domains
535  for nc,copyname in enumerate(copies):
536  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
537  copy = copies[copyname]
538  # option to not rename chains
539  if keep_chain_id == True:
540  chain_id = copy[0].chain
541  else:
542  chain_id = chain_ids[numchain]
543  if nc==0:
544  is_nucleic=False
545  fasta_flag=copy[0].fasta_flag
546  if fasta_flag == "RNA" or fasta_flag == "DNA": is_nucleic=True
547  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
548  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
549  orig_mol = state.create_molecule(molname,
550  seq,
551  chain_id,is_nucleic)
552  mol = orig_mol
553  numchain+=1
554  else:
555  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
556  mol = orig_mol.create_copy(chain_id)
557  numchain+=1
558 
559  for domainnumber,domain in enumerate(copy):
560  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
561  # we build everything in the residue range, even if it
562  # extends beyond what's in the actual PDB file
563  these_domains[domain.get_unique_name()] = domain
564  if domain.residue_range==[] or domain.residue_range is None:
565  domain_res = mol.get_residues()
566  else:
567  start = domain.residue_range[0]+domain.pdb_offset
568  if domain.residue_range[1]=='END':
569  end = len(mol.sequence)
570  else:
571  end = domain.residue_range[1]+domain.pdb_offset
572  domain_res = mol.residue_range(start-1,end-1)
573  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
574  if domain.pdb_file=="BEADS":
575  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
576  mol.add_representation(
577  domain_res,
578  resolutions=[domain.bead_size],
579  setup_particles_as_densities=(
580  domain.em_residues_per_gaussian!=0),
581  color = domain.color)
582  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
583  elif domain.pdb_file=="IDEAL_HELIX":
584  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
585  mol.add_representation(
586  domain_res,
587  resolutions=self.resolutions,
588  ideal_helix=True,
589  density_residues_per_component=domain.em_residues_per_gaussian,
590  density_prefix=domain.density_prefix,
591  density_force_compute=self.force_create_gmm_files,
592  color = domain.color)
593  these_domain_res[domain.get_unique_name()] = (domain_res,set())
594  else:
595  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
596  domain_atomic = mol.add_structure(domain.pdb_file,
597  domain.chain,
598  domain.residue_range,
599  domain.pdb_offset,
600  soft_check=True)
601  domain_non_atomic = domain_res - domain_atomic
602  if not domain.em_residues_per_gaussian:
603  mol.add_representation(domain_atomic,
604  resolutions=self.resolutions,
605  color = domain.color)
606  if len(domain_non_atomic)>0:
607  mol.add_representation(domain_non_atomic,
608  resolutions=[domain.bead_size],
609  color = domain.color)
610  else:
611  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
612  mol.add_representation(
613  domain_atomic,
614  resolutions=self.resolutions,
615  density_residues_per_component=domain.em_residues_per_gaussian,
616  density_prefix=domain.density_prefix,
617  density_force_compute=self.force_create_gmm_files,
618  color = domain.color)
619  if len(domain_non_atomic)>0:
620  mol.add_representation(domain_non_atomic,
621  resolutions=[domain.bead_size],
622  setup_particles_as_densities=True,
623  color = domain.color)
624  these_domain_res[domain.get_unique_name()] = (
625  domain_atomic,domain_non_atomic)
626  self._domain_res.append(these_domain_res)
627  self._domains.append(these_domains)
628  print('BuildSystem.add_state: State',len(self.system.states),'added')
629 
630  def get_molecules(self):
631  """Return list of all molecules grouped by state.
632  For each state, it's a dictionary of Molecules where key is the molecule name
633  """
634  return [s.get_molecules() for s in self.system.get_states()]
635 
636  def get_molecule(self,molname,copy_index=0,state_index=0):
637  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
638 
639  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
640  """Builds representations and sets up degrees of freedom"""
641  print("BuildSystem.execute_macro: building representations")
642  self.root_hier = self.system.build()
643 
644  print("BuildSystem.execute_macro: setting up degrees of freedom")
645  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.mdl)
646  for nstate,reader in enumerate(self._readers):
647  rbs = reader.get_rigid_bodies()
648  srbs = reader.get_super_rigid_bodies()
649  csrbs = reader.get_chains_of_super_rigid_bodies()
650 
651  # add rigid bodies
652  domains_in_rbs = set()
653  for rblist in rbs:
654  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
655  all_res = IMP.pmi.tools.OrderedSet()
656  bead_res = IMP.pmi.tools.OrderedSet()
657  for dname in rblist:
658  domain = self._domains[nstate][dname]
659  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
660  all_res|=self._domain_res[nstate][dname][0]
661  bead_res|=self._domain_res[nstate][dname][1]
662  domains_in_rbs.add(dname)
663  all_res|=bead_res
664  print("BuildSystem.execute_macro: -------- \
665 creating rigid body with max_trans %s \
666 max_rot %s non_rigid_max_trans %s" \
667  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
668  self.dof.create_rigid_body(all_res,
669  nonrigid_parts=bead_res,
670  max_trans=max_rb_trans,
671  max_rot=max_rb_rot,
672  nonrigid_max_trans=max_bead_trans)
673 
674  # if you have any BEAD domains not in an RB, set them as flexible beads
675  for dname in self._domains[nstate]:
676  domain = self._domains[nstate][dname]
677  if domain.pdb_file=="BEADS" and dname not in domains_in_rbs:
678  self.dof.create_flexible_beads(
679  self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
680 
681  # add super rigid bodies
682  for srblist in srbs:
683  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
684  all_res = IMP.pmi.tools.OrderedSet()
685  for dname in srblist:
686  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
687  all_res|=self._domain_res[nstate][dname][0]
688  all_res|=self._domain_res[nstate][dname][1]
689 
690  print("BuildSystem.execute_macro: -------- \
691 creating super rigid body with max_trans %s max_rot %s " \
692  % (str(max_srb_trans),str(max_srb_rot)))
693  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
694 
695  # add chains of super rigid bodies
696  for csrblist in csrbs:
697  all_res = IMP.pmi.tools.OrderedSet()
698  for dname in csrblist:
699  all_res|=self._domain_res[nstate][dname][0]
700  all_res|=self._domain_res[nstate][dname][1]
701  all_res = list(all_res)
702  all_res.sort(key=lambda r:r.get_index())
703  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
704  self.dof.create_main_chain_mover(all_res)
705  return self.root_hier,self.dof
706 
707 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
708 class BuildModel(object):
709  """A macro to build a Representation based on a Topology and lists of movers
710  DEPRECATED - Use BuildSystem instead.
711  """
712  def __init__(self,
713  model,
714  component_topologies,
715  list_of_rigid_bodies=[],
716  list_of_super_rigid_bodies=[],
717  chain_of_super_rigid_bodies=[],
718  sequence_connectivity_scale=4.0,
719  add_each_domain_as_rigid_body=False,
720  force_create_gmm_files=False):
721  """Constructor.
722  @param model The IMP model
723  @param component_topologies List of
724  IMP.pmi.topology.ComponentTopology items
725  @param list_of_rigid_bodies List of lists of domain names that will
726  be moved as rigid bodies.
727  @param list_of_super_rigid_bodies List of lists of domain names
728  that will move together in an additional Monte Carlo move.
729  @param chain_of_super_rigid_bodies List of lists of domain names
730  (choices can only be from the same molecule). Each of these
731  groups will be moved rigidly. This helps to sample more
732  efficiently complex topologies, made of several rigid bodies,
733  connected by flexible linkers.
734  @param sequence_connectivity_scale For scaling the connectivity
735  restraint
736  @param add_each_domain_as_rigid_body That way you don't have to
737  put all of them in the list
738  @param force_create_gmm_files If True, will sample and create GMMs
739  no matter what. If False, will only only sample if the
740  files don't exist. If number of Gaussians is zero, won't
741  do anything.
742  """
743  self.m = model
744  self.simo = IMP.pmi.representation.Representation(self.m,
745  upperharmonic=True,
746  disorderedlength=False)
747 
748  data=component_topologies
749  if list_of_rigid_bodies==[]:
750  print("WARNING: No list of rigid bodies inputted to build_model()")
751  if list_of_super_rigid_bodies==[]:
752  print("WARNING: No list of super rigid bodies inputted to build_model()")
753  if chain_of_super_rigid_bodies==[]:
754  print("WARNING: No chain of super rigid bodies inputted to build_model()")
755  all_dnames = set([d for sublist in list_of_rigid_bodies+list_of_super_rigid_bodies\
756  +chain_of_super_rigid_bodies for d in sublist])
757  all_available = set([c._domain_name for c in component_topologies])
758  if not all_dnames <= all_available:
759  raise ValueError("All requested movers must reference domain "
760  "names in the component topologies")
761 
762  self.domain_dict={}
763  self.resdensities={}
764  super_rigid_bodies={}
765  chain_super_rigid_bodies={}
766  rigid_bodies={}
767 
768  for c in data:
769  comp_name = c.molname
770  hier_name = c._domain_name
771  color = 0. # Can't use new-style string colors
772  fasta_file = c.fasta_file
773  fasta_id = c.fasta_id
774  pdb_name = c.pdb_file
775  chain_id = c.chain
776  res_range = c.residue_range
777  offset = c.pdb_offset
778  bead_size = c.bead_size
779  em_num_components = c.em_residues_per_gaussian
780  em_txt_file_name = c.gmm_file
781  em_mrc_file_name = c.mrc_file
782 
783  if comp_name not in self.simo.get_component_names():
784  self.simo.create_component(comp_name,color=0.0)
785  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
786 
787  # create hierarchy (adds resolutions, beads) with autobuild and optionally add EM data
788  if em_num_components==0:
789  read_em_files=False
790  include_res0=False
791  else:
792  if (not os.path.isfile(em_txt_file_name)) or force_create_gmm_files:
793  read_em_files=False
794  include_res0=True
795  else:
796  read_em_files=True
797  include_res0=False
798 
799  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
800  res_range,include_res0,beadsize=bead_size,
801  color=color,offset=offset)
802  if em_num_components!=0:
803  if read_em_files:
804  print("will read GMM files")
805  else:
806  print("will calculate GMMs")
807 
808  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
809  em_mrc_file_name,em_num_components,read_em_files)
810  self.simo.add_all_atom_densities(comp_name, particles=beads)
811  dens_hier+=beads
812  else:
813  dens_hier=[]
814 
815  self.resdensities[hier_name]=dens_hier
816  self.domain_dict[hier_name]=outhier+dens_hier
817 
818  # setup basic restraints
819  for c in self.simo.get_component_names():
820  self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
821  self.simo.setup_component_geometry(c)
822 
823  # create movers
824  for rblist in list_of_rigid_bodies:
825  rb=[]
826  for rbmember in rblist:
827  rb+=[h for h in self.domain_dict[rbmember]]
828  self.simo.set_rigid_body_from_hierarchies(rb)
829  for srblist in list_of_super_rigid_bodies:
830  srb=[]
831  for srbmember in rblist:
832  srb+=[h for h in self.domain_dict[srbmember]]
833  self.simo.set_super_rigid_body_from_hierarchies(srb)
834  for clist in chain_of_super_rigid_bodies:
835  crb=[]
836  for crbmember in rblist:
837  crb+=[h for h in self.domain_dict[crbmember]]
838  self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
839 
840  self.simo.set_floppy_bodies()
841  self.simo.setup_bonds()
842 
844  '''Return the Representation object'''
845  return self.simo
846 
847  def get_density_hierarchies(self,hier_name_list):
848  # return a list of density hierarchies
849  # specify the list of hierarchy names
850  dens_hier_list=[]
851  for hn in hier_name_list:
852  print(hn)
853  dens_hier_list+=self.resdensities[hn]
854  return dens_hier_list
855 
856  def set_gmm_models_directory(self,directory_name):
857  self.gmm_models_directory=directory_name
858 
859  def get_pdb_bead_bits(self,hierarchy):
860  pdbbits=[]
861  beadbits=[]
862  helixbits=[]
863  for h in hierarchy:
864  if "_pdb" in h.get_name():pdbbits.append(h)
865  if "_bead" in h.get_name():beadbits.append(h)
866  if "_helix" in h.get_name():helixbits.append(h)
867  return (pdbbits,beadbits,helixbits)
868 
869  def scale_bead_radii(self,nresidues,scale):
870  scaled_beads=set()
871  for h in self.domain_dict:
872  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
873  slope=(1.0-scale)/(1.0-float(nresidues))
874 
875  for b in beadbits:
876  # I have to do the following
877  # because otherwise we'll scale more than once
878  if b not in scaled_beads:
879  scaled_beads.add(b)
880  else:
881  continue
882  radius=IMP.core.XYZR(b).get_radius()
883  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
884  scale_factor=slope*float(num_residues)+1.0
885  print(scale_factor)
886  new_radius=scale_factor*radius
887  IMP.core.XYZR(b).set_radius(new_radius)
888  print(b.get_name())
889  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
890 
891 
892  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
893  #density generation for the EM restraint
894  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
895  #get the number of residues from the pdb bits
896  res_ind=[]
897  for pb in pdbbits+helixbits:
898  for p in IMP.core.get_leaves(pb):
900 
901  number_of_residues=len(set(res_ind))
902  outhier=[]
903  if read:
904  if len(pdbbits)!=0:
905  outhier+=simo.add_component_density(compname,
906  pdbbits,
907  num_components=num_components,
908  resolution=0,
909  inputfile=txtfilename)
910  if len(helixbits)!=0:
911  outhier+=simo.add_component_density(compname,
912  helixbits,
913  num_components=num_components,
914  resolution=1,
915  inputfile=txtfilename)
916 
917 
918  else:
919  if len(pdbbits)!=0:
920  num_components=number_of_residues//abs(num_components)+1
921  outhier+=simo.add_component_density(compname,
922  pdbbits,
923  num_components=num_components,
924  resolution=0,
925  outputfile=txtfilename,
926  outputmap=mrcfilename,
927  multiply_by_total_mass=True)
928 
929  if len(helixbits)!=0:
930  num_components=number_of_residues//abs(num_components)+1
931  outhier+=simo.add_component_density(compname,
932  helixbits,
933  num_components=num_components,
934  resolution=1,
935  outputfile=txtfilename,
936  outputmap=mrcfilename,
937  multiply_by_total_mass=True)
938 
939  return outhier,beadbits
940 
941  def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
942  beadsize=5,color=0.0,offset=0):
943  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :
944  if include_res0:
945  outhier=simo.autobuild_model(comname,
946  pdbname=pdbname,
947  chain=chain,
948  resrange=resrange,
949  resolutions=[0,1,10],
950  offset=offset,
951  color=color,
952  missingbeadsize=beadsize)
953  else:
954  outhier=simo.autobuild_model(comname,
955  pdbname=pdbname,
956  chain=chain,
957  resrange=resrange,
958  resolutions=[1,10],
959  offset=offset,
960  color=color,
961  missingbeadsize=beadsize)
962 
963 
964  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :
965  outhier=simo.add_component_ideal_helix(comname,
966  resolutions=[1,10],
967  resrange=resrange,
968  color=color,
969  show=False)
970 
971  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :
972  outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
973 
974  else:
975 
976  seq_len=len(simo.sequence_dict[comname])
977  outhier=simo.add_component_necklace(comname,
978  begin=1,
979  end=seq_len,
980  length=beadsize)
981 
982  return outhier
983 
984 
985 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
986 class BuildModel1(object):
987  """Deprecated building macro - use BuildSystem()"""
988 
989  def __init__(self, representation):
990  """Constructor.
991  @param representation The PMI representation
992  """
993  self.simo=representation
994  self.gmm_models_directory="."
995  self.rmf_file={}
996  self.rmf_frame_number={}
997  self.rmf_names_map={}
998  self.rmf_component_name={}
999 
1000  def set_gmm_models_directory(self,directory_name):
1001  self.gmm_models_directory=directory_name
1002 
1003  def build_model(self,data_structure,sequence_connectivity_scale=4.0,
1004  sequence_connectivity_resolution=10,
1005  rmf_file=None,rmf_frame_number=0,rmf_file_map=None,
1006  skip_connectivity_these_domains=None,
1007  skip_gaussian_in_rmf=False, skip_gaussian_in_representation=False):
1008  """Create model.
1009  @param data_structure List of lists containing these entries:
1010  comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
1011  res_range, read_em_files, bead_size, rb, super_rb,
1012  em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
1013  keep_gaussian_flexible_beads (optional)
1014  @param sequence_connectivity_scale
1015  @param rmf_file
1016  @param rmf_frame_number
1017  @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
1018  rmf_frame_number,
1019  rmf_component_name)
1020  """
1021  self.domain_dict={}
1022  self.resdensities={}
1023  super_rigid_bodies={}
1024  chain_super_rigid_bodies={}
1025  rigid_bodies={}
1026 
1027  for d in data_structure:
1028  comp_name = d[0]
1029  hier_name = d[1]
1030  color = d[2]
1031  fasta_file = d[3]
1032  fasta_id = d[4]
1033  pdb_name = d[5]
1034  chain_id = d[6]
1035  res_range = d[7][0:2]
1036  try:
1037  offset = d[7][2]
1038  except:
1039  offset = 0
1040  read_em_files = d[8]
1041  bead_size = d[9]
1042  rb = d[10]
1043  super_rb = d[11]
1044  em_num_components = d[12]
1045  em_txt_file_name = d[13]
1046  em_mrc_file_name = d[14]
1047  chain_of_super_rb = d[15]
1048  try:
1049  keep_gaussian_flexible_beads = d[16]
1050  except:
1051  keep_gaussian_flexible_beads = True
1052 
1053  if comp_name not in self.simo.get_component_names():
1054  self.simo.create_component(comp_name,color=0.0)
1055  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1056  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1057 
1058 
1059  if not read_em_files is None:
1060  if em_txt_file_name is " ": em_txt_file_name=self.gmm_models_directory+"/"+hier_name+".txt"
1061  if em_mrc_file_name is " ": em_mrc_file_name=self.gmm_models_directory+"/"+hier_name+".mrc"
1062 
1063 
1064  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1065 
1066  if (keep_gaussian_flexible_beads):
1067  self.simo.add_all_atom_densities(comp_name, particles=beads)
1068  dens_hier+=beads
1069  else:
1070  dens_hier=[]
1071 
1072  self.resdensities[hier_name]=dens_hier
1073  self.domain_dict[hier_name]=outhier+dens_hier
1074 
1075  if rb is not None:
1076  if rb not in rigid_bodies:
1077  rigid_bodies[rb]=[h for h in self.domain_dict[hier_name]]
1078  else:
1079  rigid_bodies[rb]+=[h for h in self.domain_dict[hier_name]]
1080 
1081 
1082  if super_rb is not None:
1083  for k in super_rb:
1084  if k not in super_rigid_bodies:
1085  super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1086  else:
1087  super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1088 
1089  if chain_of_super_rb is not None:
1090  for k in chain_of_super_rb:
1091  if k not in chain_super_rigid_bodies:
1092  chain_super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1093  else:
1094  chain_super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1095 
1096 
1097 
1098  self.rigid_bodies=rigid_bodies
1099 
1100  for c in self.simo.get_component_names():
1101  if rmf_file is not None:
1102  rf=rmf_file
1103  rfn=rmf_frame_number
1104  self.simo.set_coordinates_from_rmf(c, rf,rfn,
1105  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1106  elif rmf_file_map:
1107  for k in rmf_file_map:
1108  cname=k
1109  rf=rmf_file_map[k][0]
1110  rfn=rmf_file_map[k][1]
1111  rcname=rmf_file_map[k][2]
1112  self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1113  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1114  else:
1115  if c in self.rmf_file:
1116  rf=self.rmf_file[c]
1117  rfn=self.rmf_frame_number[c]
1118  rfm=self.rmf_names_map[c]
1119  rcname=self.rmf_component_name[c]
1120  self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1121  rmf_component_name=rcname,
1122  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1123  if (not skip_connectivity_these_domains) or (c not in skip_connectivity_these_domains):
1124  self.simo.setup_component_sequence_connectivity(c,
1125  resolution=sequence_connectivity_resolution,
1126  scale=sequence_connectivity_scale)
1127  self.simo.setup_component_geometry(c)
1128 
1129  for rb in rigid_bodies:
1130  self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1131 
1132  for k in super_rigid_bodies:
1133  self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1134 
1135  for k in chain_super_rigid_bodies:
1136  self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1137 
1138  self.simo.set_floppy_bodies()
1139  self.simo.setup_bonds()
1140 
1141  def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1142  hiers=self.domain_dict[hier_name]
1143  for length in lengths:
1144  for n in range(len(hiers)-length):
1145  hs=hiers[n+1:n+length-1]
1146  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1147  for n in range(1,len(hiers)-1,5):
1148  hs=hiers[n+1:]
1149  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1150  hs=hiers[:n-1]
1151  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1152 
1153 
1154  def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1155  self.rmf_file[component_name]=rmf_file
1156  self.rmf_frame_number[component_name]=rmf_frame_number
1157  self.rmf_names_map[component_name]=rmf_names_map
1158  self.rmf_component_name[component_name]=rmf_component_name
1159 
1160  def get_density_hierarchies(self,hier_name_list):
1161  # return a list of density hierarchies
1162  # specify the list of hierarchy names
1163  dens_hier_list=[]
1164  for hn in hier_name_list:
1165  print(hn)
1166  dens_hier_list+=self.resdensities[hn]
1167  return dens_hier_list
1168 
1169  def get_pdb_bead_bits(self,hierarchy):
1170  pdbbits=[]
1171  beadbits=[]
1172  helixbits=[]
1173  for h in hierarchy:
1174  if "_pdb" in h.get_name():pdbbits.append(h)
1175  if "_bead" in h.get_name():beadbits.append(h)
1176  if "_helix" in h.get_name():helixbits.append(h)
1177  return (pdbbits,beadbits,helixbits)
1178 
1179  def scale_bead_radii(self,nresidues,scale):
1180  scaled_beads=set()
1181  for h in self.domain_dict:
1182  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1183  slope=(1.0-scale)/(1.0-float(nresidues))
1184 
1185  for b in beadbits:
1186  # I have to do the following
1187  # because otherwise we'll scale more than once
1188  if b not in scaled_beads:
1189  scaled_beads.add(b)
1190  else:
1191  continue
1192  radius=IMP.core.XYZR(b).get_radius()
1193  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
1194  scale_factor=slope*float(num_residues)+1.0
1195  print(scale_factor)
1196  new_radius=scale_factor*radius
1197  IMP.core.XYZR(b).set_radius(new_radius)
1198  print(b.get_name())
1199  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
1200 
1201 
1202  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1203  #density generation for the EM restraint
1204  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1205 
1206  #get the number of residues from the pdb bits
1207  res_ind=[]
1208  for pb in pdbbits+helixbits:
1209  for p in IMP.core.get_leaves(pb):
1211 
1212  number_of_residues=len(set(res_ind))
1213  outhier=[]
1214  if read:
1215  if len(pdbbits)!=0:
1216  outhier+=simo.add_component_density(compname,
1217  pdbbits,
1218  num_components=num_components, # number of gaussian into which the simulated density is approximated
1219  resolution=0, # resolution that you want to calculate the simulated density
1220  inputfile=txtfilename) # read what it was calculated before
1221  if len(helixbits)!=0:
1222  outhier+=simo.add_component_density(compname,
1223  helixbits,
1224  num_components=num_components, # number of gaussian into which the simulated density is approximated
1225  resolution=1, # resolution that you want to calculate the simulated density
1226  inputfile=txtfilename) # read what it was calculated before
1227 
1228 
1229  else:
1230  if len(pdbbits)!=0:
1231  if num_components<0:
1232  #if negative calculate the number of gmm components automatically
1233  # from the number of residues
1234  num_components=number_of_residues/abs(num_components)
1235  outhier+=simo.add_component_density(compname,
1236  pdbbits,
1237  num_components=num_components, # number of gaussian into which the simulated density is approximated
1238  resolution=0, # resolution that you want to calculate the simulated density
1239  outputfile=txtfilename, # do the calculation
1240  outputmap=mrcfilename,
1241  multiply_by_total_mass=True) # do the calculation and output the mrc
1242 
1243  if len(helixbits)!=0:
1244  if num_components<0:
1245  #if negative calculate the number of gmm components automatically
1246  # from the number of residues
1247  num_components=number_of_residues/abs(num_components)
1248  outhier+=simo.add_component_density(compname,
1249  helixbits,
1250  num_components=num_components, # number of gaussian into which the simulated density is approximated
1251  resolution=1, # resolution that you want to calculate the simulated density
1252  outputfile=txtfilename, # do the calculation
1253  outputmap=mrcfilename,
1254  multiply_by_total_mass=True) # do the calculation and output the mrc
1255 
1256  return outhier,beadbits
1257 
1258  def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1259 
1260  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1261  if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1262  if read==False:
1263  outhier=simo.autobuild_model(comname,
1264  pdbname=pdbname,
1265  chain=chain,
1266  resrange=resrange,
1267  resolutions=[0,1,10],
1268  offset=offset,
1269  color=color,
1270  missingbeadsize=beadsize)
1271  else:
1272  outhier=simo.autobuild_model(comname,
1273  pdbname=pdbname,
1274  chain=chain,
1275  resrange=resrange,
1276  resolutions=[1,10],
1277  offset=offset,
1278  color=color,
1279  missingbeadsize=beadsize)
1280 
1281  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1282  outhier=simo.add_component_ideal_helix(comname,
1283  resolutions=[1,10],
1284  resrange=resrange,
1285  color=color,
1286  show=False)
1287 
1288  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" and pdbname is not "DENSITY" :
1289  seq_len=resrange[1]
1290  if resrange[1]==-1:
1291  seq_len=len(simo.sequence_dict[comname])
1292  outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1293 
1294  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is "DENSITY" :
1295  outhier=[]
1296 
1297  else:
1298 
1299  seq_len=len(simo.sequence_dict[comname])
1300  outhier=simo.add_component_necklace(comname,
1301  begin=1,
1302  end=seq_len,
1303  length=beadsize)
1304 
1305  return outhier
1306 
1307  def set_coordinates(self,hier_name,xyz_tuple):
1308  hier=self.domain_dict[hier_name]
1309  for h in IMP.atom.get_leaves(hier):
1310  p=h.get_particle()
1312  pass
1313  else:
1314  IMP.core.XYZ(p).set_coordinates(xyz_tuple)
1315 
1316  def save_rmf(self,rmfname):
1317 
1319  o.init_rmf(rmfname,[self.simo.prot])
1320  o.write_rmf(rmfname)
1321  o.close_rmf(rmfname)
1322 # -----------------------------------------------------------------------
1323 
1324 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
1325 def BuildModel0(
1326  m,
1327  data,
1328  resolutions=[1,
1329  10],
1330  missing_bead_size=20,
1331  residue_per_gaussian=None):
1332  '''
1333  Construct a component for each subunit (no splitting, nothing fancy).
1334  You can pass the resolutions and the bead size for the missing residue regions.
1335  To use this macro, you must provide the following data structure:
1336  Component pdbfile chainid rgb color fastafile sequence id
1337  in fastafile
1338 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1339  ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1340  ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1341  ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1342  ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1343  ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1344  ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1345  ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1346  ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1347  ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1348  ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1349  ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1350  '''
1351 
1353 
1354  # the dictionary for the hierarchies,
1355  hierarchies = {}
1356 
1357  for d in data:
1358  # retrieve the information from the data structure
1359  component_name = d[0]
1360  pdb_file = d[1]
1361  chain_id = d[2]
1362  color_id = d[3]
1363  fasta_file = d[4][0]
1364  # this function
1365  fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
1366  fasta_file_id = d[4][1]
1367  # avoid to add a component with the same name
1368  r.create_component(component_name,
1369  color=color_id)
1370 
1371  r.add_component_sequence(component_name,
1372  fasta_file,
1373  id=fastids[fasta_file_id])
1374 
1375  hierarchies = r.autobuild_model(component_name,
1376  pdb_file,
1377  chain_id,
1378  resolutions=resolutions,
1379  missingbeadsize=missing_bead_size)
1380 
1381  r.show_component_table(component_name)
1382 
1383  r.set_rigid_bodies([component_name])
1384 
1385  r.set_chain_of_super_rigid_bodies(
1386  hierarchies,
1387  min_length=2,
1388  max_length=2)
1389 
1390  r.setup_component_sequence_connectivity(component_name, resolution=1)
1391  r.setup_component_geometry(component_name)
1392 
1393  r.setup_bonds()
1394  # put it at the end of rigid bodies
1395  r.set_floppy_bodies()
1396 
1397  # set current coordinates as reference for RMSD calculation
1398  r.set_current_coordinates_as_reference_for_rmsd("Reference")
1399 
1400  return r
1401 
1402 # ----------------------------------------------------------------------
1403 
1405  """A macro for running all the basic operations of analysis.
1406  Includes clustering, precision analysis, and making ensemble density maps.
1407  A number of plots are also supported.
1408  """
1409  def __init__(self, model,
1410  merge_directories=["./"],
1411  stat_file_name_suffix="stat",
1412  best_pdb_name_suffix="model",
1413  do_clean_first=True,
1414  do_create_directories=True,
1415  global_output_directory="output/",
1416  replica_stat_file_suffix="stat_replica",
1417  global_analysis_result_directory="./analysis/",
1418  test_mode=False):
1419  """Constructor.
1420  @param model The IMP model
1421  @param stat_file_name_suffix
1422  @param merge_directories The directories containing output files
1423  @param best_pdb_name_suffix
1424  @param do_clean_first
1425  @param do_create_directories
1426  @param global_output_directory Where everything is
1427  @param replica_stat_file_suffix
1428  @param global_analysis_result_directory
1429  @param test_mode If True, nothing is changed on disk
1430  """
1431 
1432  try:
1433  from mpi4py import MPI
1434  self.comm = MPI.COMM_WORLD
1435  self.rank = self.comm.Get_rank()
1436  self.number_of_processes = self.comm.size
1437  except ImportError:
1438  self.rank = 0
1439  self.number_of_processes = 1
1440 
1441  self.test_mode = test_mode
1442  self._protocol_output = []
1443  self.cluster_obj = None
1444  self.model = model
1445  stat_dir = global_output_directory
1446  self.stat_files = []
1447  # it contains the position of the root directories
1448  for rd in merge_directories:
1449  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
1450  if len(stat_files)==0:
1451  print("WARNING: no stat files found in",os.path.join(rd,stat_dir))
1452  self.stat_files += stat_files
1453 
1454  def add_protocol_output(self, p):
1455  """Capture details of the modeling protocol.
1456  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1457  """
1458  # Assume last state is the one we're interested in
1459  self._protocol_output.append((p, p._last_state))
1460 
1461  def get_modeling_trajectory(self,
1462  score_key="SimplifiedModel_Total_Score_None",
1463  rmf_file_key="rmf_file",
1464  rmf_file_frame_key="rmf_frame_index",
1465  outputdir="./",
1466  get_every=1,
1467  nframes_trajectory=10000):
1468  """ Get a trajectory of the modeling run, for generating demonstrative movies
1469  @param score_key The score for ranking models
1470  @param rmf_file_key Key pointing to RMF filename
1471  @param rmf_file_frame_key Key pointing to RMF frame number
1472  @param outputdir The local output directory used in the run
1473  @param get_every Extract every nth frame
1474  @param nframes_trajectory Total number of frames of the trajectory
1475  """
1476  from operator import itemgetter
1477  import math
1478 
1479  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
1480  score_key,
1481  rmf_file_key,
1482  rmf_file_frame_key,
1483  get_every)
1484  rmf_file_list=trajectory_models[0]
1485  rmf_file_frame_list=trajectory_models[1]
1486  score_list=list(map(float, trajectory_models[2]))
1487 
1488  max_score=max(score_list)
1489  min_score=min(score_list)
1490 
1491 
1492  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
1493  binned_scores=[None]*nframes_trajectory
1494  binned_model_indexes=[-1]*nframes_trajectory
1495 
1496  for model_index,s in enumerate(score_list):
1497  bins_score_diffs=[abs(s-b) for b in bins]
1498  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1499  if binned_scores[bin_index]==None:
1500  binned_scores[bin_index]=s
1501  binned_model_indexes[bin_index]=model_index
1502  else:
1503  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1504  new_diff=abs(s-bins[bin_index])
1505  if new_diff < old_diff:
1506  binned_scores[bin_index]=s
1507  binned_model_indexes[bin_index]=model_index
1508 
1509  print(binned_scores)
1510  print(binned_model_indexes)
1511 
1512 
1513  def _expand_ambiguity(self,prot,d):
1514  """If using PMI2, expand the dictionary to include copies as ambiguous options
1515  This also keeps the states separate.
1516  """
1517  newdict = {}
1518  for key in d:
1519  val = d[key]
1520  if '..' in key or (type(val) is tuple and len(val)>=3):
1521  newdict[key] = val
1522  continue
1523  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1524  if type(val) is tuple:
1525  start = val[0]
1526  stop = val[1]
1527  name = val[2]
1528  else:
1529  start = 1
1530  stop = -1
1531  name = val
1532  for nst in range(len(states)):
1533  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
1534  copies = sel.get_selected_particles(with_representation=False)
1535  if len(copies)>1:
1536  for nc in range(len(copies)):
1537  if len(states)>1:
1538  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1539  else:
1540  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1541  else:
1542  newdict[key] = val
1543  return newdict
1544 
1545 
1546  def clustering(self,
1547  score_key="SimplifiedModel_Total_Score_None",
1548  rmf_file_key="rmf_file",
1549  rmf_file_frame_key="rmf_frame_index",
1550  state_number=0,
1551  prefiltervalue=None,
1552  feature_keys=[],
1553  outputdir="./",
1554  alignment_components=None,
1555  number_of_best_scoring_models=10,
1556  rmsd_calculation_components=None,
1557  distance_matrix_file='distances.mat',
1558  load_distance_matrix_file=False,
1559  skip_clustering=False,
1560  number_of_clusters=1,
1561  display_plot=False,
1562  exit_after_display=True,
1563  get_every=1,
1564  first_and_last_frames=None,
1565  density_custom_ranges=None,
1566  write_pdb_with_centered_coordinates=False,
1567  voxel_size=5.0):
1568  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1569  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1570  Can pass None for copy or state to ignore that field.
1571  If you don't pass a specific copy number
1572  @param score_key The score for ranking models. Use "Total_Score"
1573  instead of default for PMI2.
1574  @param rmf_file_key Key pointing to RMF filename
1575  @param rmf_file_frame_key Key pointing to RMF frame number
1576  @param state_number State number to analyze
1577  @param prefiltervalue Only include frames where the
1578  score key is below this value
1579  @param feature_keys Keywords for which you want to
1580  calculate average, medians, etc.
1581  If you pass "Keyname" it'll include everything that matches "*Keyname*"
1582  @param outputdir The local output directory used in the run
1583  @param alignment_components Dictionary with keys=groupname, values are tuples
1584  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1585  @param number_of_best_scoring_models Num models to keep per run
1586  @param rmsd_calculation_components For calculating RMSD
1587  (same format as alignment_components)
1588  @param distance_matrix_file Where to store/read the distance matrix
1589  @param load_distance_matrix_file Try to load the distance matrix file
1590  @param skip_clustering Just extract the best scoring models
1591  and save the pdbs
1592  @param number_of_clusters Number of k-means clusters
1593  @param display_plot Display the distance matrix
1594  @param exit_after_display Exit after displaying distance matrix
1595  @param get_every Extract every nth frame
1596  @param first_and_last_frames A tuple with the first and last frames to be
1597  analyzed. Values are percentages!
1598  Default: get all frames
1599  @param density_custom_ranges For density calculation
1600  (same format as alignment_components)
1601  @param write_pdb_with_centered_coordinates
1602  @param voxel_size Used for the density output
1603  """
1604  self._outputdir = os.path.abspath(outputdir)
1605  self._number_of_clusters = number_of_clusters
1606  for p, state in self._protocol_output:
1607  p.add_replica_exchange_analysis(state, self)
1608 
1609  if self.test_mode:
1610  return
1611 
1612  if self.rank==0:
1613  try:
1614  os.mkdir(outputdir)
1615  except:
1616  pass
1617 
1618  if not load_distance_matrix_file:
1619  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1620  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1621  self.stat_files,self.number_of_processes)[self.rank]
1622 
1623  # read ahead to check if you need the PMI2 score key instead
1624  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1625  orig_score_key = score_key
1626  if score_key not in po.get_keys():
1627  if 'Total_Score' in po.get_keys():
1628  score_key = 'Total_Score'
1629  print("WARNING: Using 'Total_Score' instead of "
1630  "'SimplifiedModel_Total_Score_None' for the score key")
1631  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1632  if k in feature_keys:
1633  print("WARNING: no need to pass " +k+" to feature_keys.")
1634  feature_keys.remove(k)
1635 
1636  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1637  score_key,
1638  feature_keys,
1639  rmf_file_key,
1640  rmf_file_frame_key,
1641  prefiltervalue,
1642  get_every)
1643  rmf_file_list=best_models[0]
1644  rmf_file_frame_list=best_models[1]
1645  score_list=best_models[2]
1646  feature_keyword_list_dict=best_models[3]
1647 
1648 # ------------------------------------------------------------------------
1649 # collect all the files and scores
1650 # ------------------------------------------------------------------------
1651 
1652  if self.number_of_processes > 1:
1653  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1654  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1655  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1656  rmf_file_frame_list)
1657  for k in feature_keyword_list_dict:
1658  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1659  feature_keyword_list_dict[k])
1660 
1661  # sort by score and get the best scoring ones
1662  score_rmf_tuples = list(zip(score_list,
1663  rmf_file_list,
1664  rmf_file_frame_list,
1665  list(range(len(score_list)))))
1666 
1667  if density_custom_ranges:
1668  for k in density_custom_ranges:
1669  if type(density_custom_ranges[k]) is not list:
1670  raise Exception("Density custom ranges: values must be lists of tuples")
1671 
1672  # keep subset of frames if requested
1673  if first_and_last_frames is not None:
1674  nframes = len(score_rmf_tuples)
1675  first_frame = int(first_and_last_frames[0] * nframes)
1676  last_frame = int(first_and_last_frames[1] * nframes)
1677  if last_frame > len(score_rmf_tuples):
1678  last_frame = -1
1679  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1680 
1681  # sort RMFs by the score_key in ascending order, and store the rank
1682  best_score_rmf_tuples = sorted(score_rmf_tuples,
1683  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1684  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1685  # sort the feature scores in the same way
1686  best_score_feature_keyword_list_dict = defaultdict(list)
1687  for tpl in best_score_rmf_tuples:
1688  index = tpl[3]
1689  for f in feature_keyword_list_dict:
1690  best_score_feature_keyword_list_dict[f].append(
1691  feature_keyword_list_dict[f][index])
1692  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1693  best_score_rmf_tuples,
1694  self.number_of_processes)[self.rank]
1695 
1696  # expand the dictionaries to include ambiguous copies
1697  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1698  0,
1699  my_best_score_rmf_tuples[0][1])[0]
1700  if IMP.pmi.get_is_canonical(prot_ahead):
1701  if rmsd_calculation_components is not None:
1702  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1703  if tmp!=rmsd_calculation_components:
1704  print('Detected ambiguity, expand rmsd components to',tmp)
1705  rmsd_calculation_components = tmp
1706  if alignment_components is not None:
1707  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1708  if tmp!=alignment_components:
1709  print('Detected ambiguity, expand alignment components to',tmp)
1710  alignment_components = tmp
1711 
1712 #-------------------------------------------------------------
1713 # read the coordinates
1714 # ------------------------------------------------------------
1715  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1716  my_best_score_rmf_tuples[0],
1717  rmsd_calculation_components,
1718  state_number=state_number)
1719  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1720  my_best_score_rmf_tuples,
1721  alignment_components,
1722  rmsd_calculation_components,
1723  state_number=state_number)
1724 
1725  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1726  # NOR particles, because these object cannot be serialized. We need serialization
1727  # for the parallel computation based on mpi.
1728  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1729  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1730  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1731  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1732  all_rmf_file_names=got_coords[4] # RMF file per hit
1733 
1734 # ------------------------------------------------------------------------
1735 # optionally don't compute distance matrix or cluster, just write top files
1736 # ------------------------------------------------------------------------
1737  if skip_clustering:
1738  if density_custom_ranges:
1739  DensModule = IMP.pmi.analysis.GetModelDensity(
1740  density_custom_ranges,
1741  voxel=voxel_size)
1742 
1743  dircluster=os.path.join(outputdir,"all_models."+str(n))
1744  try:
1745  os.mkdir(outputdir)
1746  except:
1747  pass
1748  try:
1749  os.mkdir(dircluster)
1750  except:
1751  pass
1752  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1753  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1754  rmf_name=tpl[1]
1755  rmf_frame_number=tpl[2]
1756  tmp_dict={}
1757  index=tpl[4]
1758  for key in best_score_feature_keyword_list_dict:
1759  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1760 
1761  if cnt==0:
1762  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1763  self.model,
1764  rmf_frame_number,
1765  rmf_name)
1766  else:
1767  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1768  self.model,
1769  prots,
1770  rs,
1771  rmf_frame_number,
1772  rmf_name)
1773  if not linking_successful:
1774  continue
1775 
1776  if not prots:
1777  continue
1778 
1779  if IMP.pmi.get_is_canonical(prots[0]):
1780  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1781  prot = states[state_number]
1782  else:
1783  prot = prots[state_number]
1784 
1785  # get transformation aligning coordinates of requested tuples
1786  # to the first RMF file
1787  if cnt==0:
1788  coords_f1=alignment_coordinates[cnt]
1789  if cnt > 0:
1790  coords_f2=alignment_coordinates[cnt]
1791  if coords_f2:
1792  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1793  transformation = Ali.align()[1]
1794  else:
1796 
1797  rbs = set()
1798  for p in IMP.atom.get_leaves(prot):
1799  if not IMP.core.XYZR.get_is_setup(p):
1801  IMP.core.XYZR(p).set_radius(0.0001)
1802  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1803 
1805  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1806  rbs.add(rb)
1807  else:
1809  transformation)
1810  for rb in rbs:
1811  IMP.core.transform(rb,transformation)
1812 
1814  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1815  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1816  o.init_pdb(out_pdb_fn,prot)
1817  o.write_pdb(out_pdb_fn,
1818  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1819 
1820  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1821  tmp_dict["rmf_file_full_path"]=rmf_name
1822  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1823  tmp_dict["local_rmf_frame_number"]=0
1824 
1825  clusstat.write(str(tmp_dict)+"\n")
1826 
1827  if IMP.pmi.get_is_canonical(prot):
1828  # create a single-state System and write that
1830  h.set_name("System")
1831  h.add_child(prot)
1832  o.init_rmf(out_rmf_fn, [h], rs)
1833  else:
1834  o.init_rmf(out_rmf_fn, [prot],rs)
1835 
1836  o.write_rmf(out_rmf_fn)
1837  o.close_rmf(out_rmf_fn)
1838  # add the density
1839  if density_custom_ranges:
1840  DensModule.add_subunits_density(prot)
1841 
1842  if density_custom_ranges:
1843  DensModule.write_mrc(path=dircluster)
1844  del DensModule
1845  return
1846 
1847 
1848 
1849  # broadcast the coordinates
1850  if self.number_of_processes > 1:
1851  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1852  all_coordinates)
1853  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1854  all_rmf_file_names)
1855  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1856  rmf_file_name_index_dict)
1857  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1858  alignment_coordinates)
1859  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1860  rmsd_coordinates)
1861 
1862  if self.rank == 0:
1863  # save needed informations in external files
1864  self.save_objects(
1865  [best_score_feature_keyword_list_dict,
1866  rmf_file_name_index_dict],
1867  ".macro.pkl")
1868 
1869 # ------------------------------------------------------------------------
1870 # Calculate distance matrix and cluster
1871 # ------------------------------------------------------------------------
1872  print("setup clustering class")
1873  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1874 
1875  for n, model_coordinate_dict in enumerate(all_coordinates):
1876  template_coordinate_dict = {}
1877  # let's try to align
1878  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1879  # set the first model as template coordinates
1880  self.cluster_obj.set_template(alignment_coordinates[n])
1881  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1882  print("Global calculating the distance matrix")
1883 
1884  # calculate distance matrix, all against all
1885  self.cluster_obj.dist_matrix()
1886 
1887  # perform clustering and optionally display
1888  if self.rank == 0:
1889  self.cluster_obj.do_cluster(number_of_clusters)
1890  if display_plot:
1891  if self.rank == 0:
1892  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1893  if exit_after_display:
1894  exit()
1895  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1896 
1897 # ------------------------------------------------------------------------
1898 # Alteratively, load the distance matrix from file and cluster that
1899 # ------------------------------------------------------------------------
1900  else:
1901  if self.rank==0:
1902  print("setup clustering class")
1903  self.cluster_obj = IMP.pmi.analysis.Clustering()
1904  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1905  print("clustering with %s clusters" % str(number_of_clusters))
1906  self.cluster_obj.do_cluster(number_of_clusters)
1907  [best_score_feature_keyword_list_dict,
1908  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1909  if display_plot:
1910  if self.rank == 0:
1911  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1912  if exit_after_display:
1913  exit()
1914  if self.number_of_processes > 1:
1915  self.comm.Barrier()
1916 
1917 # ------------------------------------------------------------------------
1918 # now save all informations about the clusters
1919 # ------------------------------------------------------------------------
1920 
1921  if self.rank == 0:
1922  print(self.cluster_obj.get_cluster_labels())
1923  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1924  print("rank %s " % str(self.rank))
1925  print("cluster %s " % str(n))
1926  print("cluster label %s " % str(cl))
1927  print(self.cluster_obj.get_cluster_label_names(cl))
1928 
1929  # first initialize the Density class if requested
1930  if density_custom_ranges:
1931  DensModule = IMP.pmi.analysis.GetModelDensity(
1932  density_custom_ranges,
1933  voxel=voxel_size)
1934 
1935  dircluster = outputdir + "/cluster." + str(n) + "/"
1936  try:
1937  os.mkdir(dircluster)
1938  except:
1939  pass
1940 
1941  rmsd_dict = {"AVERAGE_RMSD":
1942  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1943  clusstat = open(dircluster + "stat.out", "w")
1944  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1945  # extract the features
1946  tmp_dict = {}
1947  tmp_dict.update(rmsd_dict)
1948  index = rmf_file_name_index_dict[structure_name]
1949  for key in best_score_feature_keyword_list_dict:
1950  tmp_dict[
1951  key] = best_score_feature_keyword_list_dict[
1952  key][
1953  index]
1954 
1955  # get the rmf name and the frame number from the list of
1956  # frame names
1957  rmf_name = structure_name.split("|")[0]
1958  rmf_frame_number = int(structure_name.split("|")[1])
1959  clusstat.write(str(tmp_dict) + "\n")
1960 
1961  # extract frame (open or link to existing)
1962  if k==0:
1963  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1964  self.model,
1965  rmf_frame_number,
1966  rmf_name)
1967  else:
1968  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1969  self.model,
1970  prots,
1971  rs,
1972  rmf_frame_number,
1973  rmf_name)
1974  if not linking_successful:
1975  continue
1976  if not prots:
1977  continue
1978 
1979  if IMP.pmi.get_is_canonical(prots[0]):
1980  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1981  prot = states[state_number]
1982  else:
1983  prot = prots[state_number]
1984 
1985  # transform clusters onto first
1986  if k > 0:
1987  model_index = self.cluster_obj.get_model_index_from_name(
1988  structure_name)
1989  transformation = self.cluster_obj.get_transformation_to_first_member(
1990  cl,
1991  model_index)
1992  rbs = set()
1993  for p in IMP.atom.get_leaves(prot):
1994  if not IMP.core.XYZR.get_is_setup(p):
1996  IMP.core.XYZR(p).set_radius(0.0001)
1997  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1998 
2000  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
2001  rbs.add(rb)
2002  else:
2004  transformation)
2005  for rb in rbs:
2006  IMP.core.transform(rb,transformation)
2007 
2008  # add the density
2009  if density_custom_ranges:
2010  DensModule.add_subunits_density(prot)
2011 
2012  # pdb writing should be optimized!
2013  o = IMP.pmi.output.Output()
2014  o.init_pdb(dircluster + str(k) + ".pdb", prot)
2015  o.write_pdb(dircluster + str(k) + ".pdb")
2016 
2017  if IMP.pmi.get_is_canonical(prot):
2018  # create a single-state System and write that
2020  h.set_name("System")
2021  h.add_child(prot)
2022  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
2023  else:
2024  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
2025  o.write_rmf(dircluster + str(k) + ".rmf3")
2026  o.close_rmf(dircluster + str(k) + ".rmf3")
2027 
2028  del o
2029  # IMP.atom.destroy(prot)
2030 
2031  if density_custom_ranges:
2032  DensModule.write_mrc(path=dircluster)
2033  del DensModule
2034 
2035  if self.number_of_processes>1:
2036  self.comm.Barrier()
2037 
2038  def get_cluster_rmsd(self,cluster_num):
2039  if self.cluster_obj is None:
2040  raise Exception("Run clustering first")
2041  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2042 
2043  def save_objects(self, objects, file_name):
2044  import pickle
2045  outf = open(file_name, 'wb')
2046  pickle.dump(objects, outf)
2047  outf.close()
2048 
2049  def load_objects(self, file_name):
2050  import pickle
2051  inputf = open(file_name, 'rb')
2052  objects = pickle.load(inputf)
2053  inputf.close()
2054  return objects
A class to simplify create of constraints and movers for an IMP Hierarchy.
A macro for running all the basic operations of analysis.
Definition: macros.py:1404
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:50
Sample using molecular dynamics.
Definition: samplers.py:392
A class for reading stat files.
Definition: output.py:696
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:508
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:509
def __init__
Constructor.
Definition: macros.py:729
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:630
def __init__
Constructor.
Definition: macros.py:76
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1546
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:472
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
This class initializes the root node of the global IMP.atom.Hierarchy.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:1454
Representation of the system.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1463
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
Definition: macros.py:1003
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:1079
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Definition: macros.py:1324
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:47
Collect timing information.
Definition: tools.py:58
def __init__
Constructor.
Definition: macros.py:989
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Definition: macros.py:707
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9583
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and setup constraints for PMI objects.
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:516
Deprecated building macro - use BuildSystem()
Definition: macros.py:985
The general base class for IMP exceptions.
Definition: exception.h:49
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:639
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:494
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
Definition: macros.py:24
A dictionary-like wrapper for reading and storing sequence data.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1037
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:640
Sample using replica exchange.
Definition: samplers.py:502
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
Definition: macros.py:843
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27