IMP logo
IMP Reference Guide  2.7.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
8 import IMP.pmi.tools
9 import IMP.pmi.samplers
10 import IMP.pmi.output
11 import IMP.pmi.analysis
12 import IMP.pmi.io
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import RMF
17 import os
18 import glob
19 from operator import itemgetter
20 from collections import defaultdict
21 import numpy as np
22 import string
23 
24 class ReplicaExchange0(object):
25  """A macro to help setup and run replica exchange.
26  Supports Monte Carlo and molecular dynamics.
27  Produces trajectory RMF files, best PDB structures,
28  and output stat files.
29  """
30  def __init__(self, model,
31  representation=None,
32  root_hier=None,
33  sample_objects=None, # DEPRECATED
34  monte_carlo_sample_objects=None,
35  molecular_dynamics_sample_objects=None,
36  output_objects=[],
37  crosslink_restraints=None,
38  monte_carlo_temperature=1.0,
39  simulated_annealing=False,
40  simulated_annealing_minimum_temperature=1.0,
41  simulated_annealing_maximum_temperature=2.5,
42  simulated_annealing_minimum_temperature_nframes=100,
43  simulated_annealing_maximum_temperature_nframes=100,
44  replica_exchange_minimum_temperature=1.0,
45  replica_exchange_maximum_temperature=2.5,
46  num_sample_rounds=1,
47  number_of_best_scoring_models=500,
48  monte_carlo_steps=10,
49  molecular_dynamics_steps=10,
50  molecular_dynamics_max_time_step=1.0,
51  number_of_frames=1000,
52  nframes_write_coordinates=1,
53  write_initial_rmf=True,
54  initial_rmf_name_suffix="initial",
55  stat_file_name_suffix="stat",
56  best_pdb_name_suffix="model",
57  do_clean_first=True,
58  do_create_directories=True,
59  global_output_directory="./",
60  rmf_dir="rmfs/",
61  best_pdb_dir="pdbs/",
62  replica_stat_file_suffix="stat_replica",
63  em_object_for_rmf=None,
64  atomistic=False,
65  replica_exchange_object=None,
66  test_mode=False):
67  """Constructor.
68  @param model The IMP model
69  @param representation PMI.representation.Representation object
70  (or list of them, for multi-state modeling)
71  @param root_hier Instead of passing Representation, pass the System hierarchy
72  @param monte_carlo_sample_objects Objects for MC sampling; a list of
73  structural components (generally the representation) that will
74  be moved and restraints with parameters that need to
75  be sampled.
76  For PMI2: just pass flat list of movers
77  @param molecular_dynamics_sample_objects Objects for MD sampling
78  For PMI2: just pass flat list of particles
79  @param output_objects A list of structural objects and restraints
80  that will be included in output (statistics files). Any object
81  that provides a get_output() method can be used here.
82  @param crosslink_restraints List of cross-link restraints that will
83  be included in output RMF files (for visualization).
84  @param monte_carlo_temperature MC temp (may need to be optimized
85  based on post-sampling analysis)
86  @param simulated_annealing If True, perform simulated annealing
87  @param simulated_annealing_minimum_temperature Should generally be
88  the same as monte_carlo_temperature.
89  @param simulated_annealing_minimum_temperature_nframes Number of
90  frames to compute at minimum temperature.
91  @param simulated_annealing_maximum_temperature_nframes Number of
92  frames to compute at
93  temps > simulated_annealing_maximum_temperature.
94  @param replica_exchange_minimum_temperature Low temp for REX; should
95  generally be the same as monte_carlo_temperature.
96  @param replica_exchange_maximum_temperature High temp for REX
97  @param num_sample_rounds Number of rounds of MC/MD per cycle
98  @param number_of_best_scoring_models Number of top-scoring PDB models
99  to keep around for analysis
100  @param monte_carlo_steps Number of MC steps per round
101  @param molecular_dynamics_steps Number of MD steps per round
102  @param molecular_dynamics_max_time_step Max time step for MD
103  @param number_of_frames Number of REX frames to run
104  @param nframes_write_coordinates How often to write the coordinates
105  of a frame
106  @param write_initial_rmf Write the initial configuration
107  @param global_output_directory Folder that will be created to house
108  output.
109  @param test_mode Set to True to avoid writing any files, just test one frame.
110  """
111  self.model = model
112  self.vars = {}
113  self.pmi2 = False
114 
115  ### add check hierarchy is multistate
116  self.output_objects = output_objects
117  self.representation = representation
118  if representation:
119  if type(representation) == list:
120  self.is_multi_state = True
121  self.root_hiers = [r.prot for r in representation]
122  self.vars["number_of_states"] = len(representation)
123  else:
124  self.is_multi_state = False
125  self.root_hier = representation.prot
126  self.vars["number_of_states"] = 1
127  elif root_hier and type(root_hier) == IMP.atom.Hierarchy and root_hier.get_name()=='System':
128  self.pmi2 = True
129  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
130  self.root_hier = root_hier
131  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
132  self.vars["number_of_states"] = len(states)
133  if len(states)>1:
134  self.root_hiers = states
135  self.is_multi_state = True
136  else:
137  self.root_hier = root_hier
138  self.is_multi_state = False
139  else:
140  raise Exception("Must provide representation or System hierarchy (root_hier)")
141 
142  self.crosslink_restraints = crosslink_restraints
143  self.em_object_for_rmf = em_object_for_rmf
144  self.monte_carlo_sample_objects = monte_carlo_sample_objects
145  if sample_objects is not None:
146  self.monte_carlo_sample_objects+=sample_objects
147  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
148  self.replica_exchange_object = replica_exchange_object
149  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
150  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
151  self.vars[
152  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
153  self.vars[
154  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
155 
156  self.vars["simulated_annealing"]=\
157  simulated_annealing
158  self.vars["simulated_annealing_minimum_temperature"]=\
159  simulated_annealing_minimum_temperature
160  self.vars["simulated_annealing_maximum_temperature"]=\
161  simulated_annealing_maximum_temperature
162  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
163  simulated_annealing_minimum_temperature_nframes
164  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
165  simulated_annealing_maximum_temperature_nframes
166 
167  self.vars["num_sample_rounds"] = num_sample_rounds
168  self.vars[
169  "number_of_best_scoring_models"] = number_of_best_scoring_models
170  self.vars["monte_carlo_steps"] = monte_carlo_steps
171  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
172  self.vars["number_of_frames"] = number_of_frames
173  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
174  self.vars["write_initial_rmf"] = write_initial_rmf
175  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
176  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
177  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
178  self.vars["do_clean_first"] = do_clean_first
179  self.vars["do_create_directories"] = do_create_directories
180  self.vars["global_output_directory"] = global_output_directory
181  self.vars["rmf_dir"] = rmf_dir
182  self.vars["best_pdb_dir"] = best_pdb_dir
183  self.vars["atomistic"] = atomistic
184  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
185  self.vars["geometries"] = None
186  self.test_mode = test_mode
187 
188  def add_geometries(self, geometries):
189  if self.vars["geometries"] is None:
190  self.vars["geometries"] = list(geometries)
191  else:
192  self.vars["geometries"].extend(geometries)
193 
194  def show_info(self):
195  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
196  print("--- it stores the best scoring pdb models in pdbs/")
197  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
198  print("--- variables:")
199  keys = list(self.vars.keys())
200  keys.sort()
201  for v in keys:
202  print("------", v.ljust(30), self.vars[v])
203 
204  def get_replica_exchange_object(self):
205  return self.replica_exchange_object
206 
207  def execute_macro(self):
208  temp_index_factor = 100000.0
209  samplers=[]
210  sampler_mc=None
211  sampler_md=None
212  if self.monte_carlo_sample_objects is not None:
213  print("Setting up MonteCarlo")
214  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
215  self.monte_carlo_sample_objects,
216  self.vars["monte_carlo_temperature"])
217  if self.vars["simulated_annealing"]:
218  tmin=self.vars["simulated_annealing_minimum_temperature"]
219  tmax=self.vars["simulated_annealing_maximum_temperature"]
220  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
221  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
222  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
223  self.output_objects.append(sampler_mc)
224  samplers.append(sampler_mc)
225 
226 
227  if self.molecular_dynamics_sample_objects is not None:
228  print("Setting up MolecularDynamics")
229  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
230  self.molecular_dynamics_sample_objects,
231  self.vars["monte_carlo_temperature"],
232  maximum_time_step=self.molecular_dynamics_max_time_step)
233  if self.vars["simulated_annealing"]:
234  tmin=self.vars["simulated_annealing_minimum_temperature"]
235  tmax=self.vars["simulated_annealing_maximum_temperature"]
236  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
237  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
238  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
239  self.output_objects.append(sampler_md)
240  samplers.append(sampler_md)
241 # -------------------------------------------------------------------------
242 
243  print("Setting up ReplicaExchange")
244  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
245  self.vars[
246  "replica_exchange_minimum_temperature"],
247  self.vars[
248  "replica_exchange_maximum_temperature"],
249  samplers,
250  replica_exchange_object=self.replica_exchange_object)
251  self.replica_exchange_object = rex.rem
252 
253  myindex = rex.get_my_index()
254  self.output_objects.append(rex)
255 
256  # must reset the minimum temperature due to the
257  # different binary length of rem.get_my_parameter double and python
258  # float
259  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
260 
261 # -------------------------------------------------------------------------
262 
263  globaldir = self.vars["global_output_directory"] + "/"
264  rmf_dir = globaldir + self.vars["rmf_dir"]
265  pdb_dir = globaldir + self.vars["best_pdb_dir"]
266 
267  if not self.test_mode:
268  if self.vars["do_clean_first"]:
269  pass
270 
271  if self.vars["do_create_directories"]:
272 
273  try:
274  os.makedirs(globaldir)
275  except:
276  pass
277  try:
278  os.makedirs(rmf_dir)
279  except:
280  pass
281 
282  if not self.is_multi_state:
283  try:
284  os.makedirs(pdb_dir)
285  except:
286  pass
287  else:
288  for n in range(self.vars["number_of_states"]):
289  try:
290  os.makedirs(pdb_dir + "/" + str(n))
291  except:
292  pass
293 
294 # -------------------------------------------------------------------------
295 
296  sw = IMP.pmi.tools.Stopwatch()
297  self.output_objects.append(sw)
298 
299  print("Setting up stat file")
300  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
301  low_temp_stat_file = globaldir + \
302  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
303  if not self.test_mode:
304  output.init_stat2(low_temp_stat_file,
305  self.output_objects,
306  extralabels=["rmf_file", "rmf_frame_index"])
307 
308  print("Setting up replica stat file")
309  replica_stat_file = globaldir + \
310  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
311  if not self.test_mode:
312  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
313 
314  if not self.test_mode:
315  print("Setting up best pdb files")
316  if not self.is_multi_state:
317  if self.vars["number_of_best_scoring_models"] > 0:
318  output.init_pdb_best_scoring(pdb_dir + "/" +
319  self.vars["best_pdb_name_suffix"],
320  self.root_hier,
321  self.vars[
322  "number_of_best_scoring_models"],
323  replica_exchange=True)
324  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
325  self.vars["best_pdb_name_suffix"]+".0.pdb")
326  else:
327  if self.vars["number_of_best_scoring_models"] > 0:
328  for n in range(self.vars["number_of_states"]):
329  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
330  self.vars["best_pdb_name_suffix"],
331  self.root_hiers[n],
332  self.vars[
333  "number_of_best_scoring_models"],
334  replica_exchange=True)
335  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
336  self.vars["best_pdb_name_suffix"]+".0.pdb")
337 # ---------------------------------------------
338 
339  if self.em_object_for_rmf is not None:
340  if not self.is_multi_state or self.pmi2:
341  output_hierarchies = [
342  self.root_hier,
343  self.em_object_for_rmf.get_density_as_hierarchy(
344  )]
345  else:
346  output_hierarchies = self.root_hiers
347  output_hierarchies.append(
348  self.em_object_for_rmf.get_density_as_hierarchy())
349  else:
350  if not self.is_multi_state or self.pmi2:
351  output_hierarchies = [self.root_hier]
352  else:
353  output_hierarchies = self.root_hiers
354 
355 #----------------------------------------------
356  if not self.test_mode:
357  print("Setting up and writing initial rmf coordinate file")
358  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
359  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
360  output_hierarchies)
361  if self.crosslink_restraints:
362  output.add_restraints_to_rmf(
363  init_suffix + "." + str(myindex) + ".rmf3",
364  self.crosslink_restraints)
365  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
366  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
367 
368 #----------------------------------------------
369 
370  if not self.test_mode:
371  print("Setting up production rmf files")
372  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
373  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"])
374 
375  if self.crosslink_restraints:
376  output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
377 
378  ntimes_at_low_temp = 0
379 
380  if myindex == 0:
381  self.show_info()
382  self.replica_exchange_object.set_was_used(True)
383  nframes = self.vars["number_of_frames"]
384  if self.test_mode:
385  nframes = 1
386  for i in range(nframes):
387  if self.test_mode:
388  score = 0.
389  else:
390  for nr in range(self.vars["num_sample_rounds"]):
391  if sampler_md is not None:
392  sampler_md.optimize(
393  self.vars["molecular_dynamics_steps"])
394  if sampler_mc is not None:
395  sampler_mc.optimize(self.vars["monte_carlo_steps"])
397  self.model).evaluate(False)
398  output.set_output_entry("score", score)
399 
400  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
401 
402  if min_temp_index == my_temp_index:
403  print("--- frame %s score %s " % (str(i), str(score)))
404 
405  if not self.test_mode:
406  if i % self.vars["nframes_write_coordinates"]==0:
407  print('--- writing coordinates')
408  if self.vars["number_of_best_scoring_models"] > 0:
409  output.write_pdb_best_scoring(score)
410  output.write_rmf(rmfname)
411  output.set_output_entry("rmf_file", rmfname)
412  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
413  else:
414  output.set_output_entry("rmf_file", rmfname)
415  output.set_output_entry("rmf_frame_index", '-1')
416  output.write_stat2(low_temp_stat_file)
417  ntimes_at_low_temp += 1
418 
419  if not self.test_mode:
420  output.write_stat2(replica_stat_file)
421  rex.swap_temp(i, score)
422  if self.representation:
423  for p in self.representation._protocol_output:
424  p.add_replica_exchange(self)
425 
426  if not self.test_mode:
427  print("closing production rmf files")
428  output.close_rmf(rmfname)
429 
430 
431 
432 # ----------------------------------------------------------------------
433 class BuildSystem(object):
434  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
435  Easily create multi-state systems by calling this macro
436  repeatedly with different TopologyReader objects!
437  A useful function is get_molecules() which returns the PMI Molecules grouped by state
438  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
439  Quick multi-state system:
440  @code{.python}
441  mdl = IMP.Model()
442  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
443  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
444  bs = IMP.pmi.macros.BuildSystem(mdl)
445  bs.add_state(reader1)
446  bs.add_state(reader2)
447  bs.execute_macro() # build everything including degrees of freedom
448  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
449  ### now you have a two state system, you add restraints etc
450  @endcode
451  \note The "domain name" entry of the topology reader is not used.
452  All molecules are set up by the component name, but split into rigid bodies
453  as requested.
454  """
455  def __init__(self,
456  mdl,
457  sequence_connectivity_scale=4.0,
458  force_create_gmm_files=False,
459  resolutions=[1,10]):
460  """Constructor
461  @param mdl An IMP Model
462  @param sequence_connectivity_scale For scaling the connectivity restraint
463  @param force_create_gmm_files If True, will sample and create GMMs
464  no matter what. If False, will only only sample if the
465  files don't exist. If number of Gaussians is zero, won't
466  do anything.
467  @param resolutions The resolutions to build for structured regions
468  """
469  self.mdl = mdl
470  self.system = IMP.pmi.topology.System(self.mdl)
471  self._readers = [] # the TopologyReaders (one per state)
472  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
473  self._domains = [] # key = domain unique name, value = Component
474  self.force_create_gmm_files = force_create_gmm_files
475  self.resolutions = resolutions
476 
477  def add_state(self,
478  reader,
479  keep_chain_id=False):
480  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
481  When you are done adding states, call execute_macro()
482  @param reader The TopologyReader object
483  """
484  state = self.system.create_state()
485  self._readers.append(reader)
486  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
487  these_domains = {} # key is unique name, value is _Component
488  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
489  numchain = 0
490 
491  ### setup representation
492  # loop over molecules, copies, then domains
493  for molname in reader.get_molecules():
494  copies = reader.get_molecules()[molname].domains
495  for nc,copyname in enumerate(copies):
496  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
497  copy = copies[copyname]
498  # option to not rename chains
499  if keep_chain_id == True:
500  chain_id = copy[0].chain
501  else:
502  chain_id = chain_ids[numchain]
503  if nc==0:
504  is_nucleic=False
505  fasta_flag=copy[0].fasta_flag
506  if fasta_flag == "RNA" or fasta_flag == "DNA": is_nucleic=True
507 
508  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file)[copy[0].fasta_id]
509  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
510  orig_mol = state.create_molecule(molname,
511  seq,
512  chain_id,is_nucleic)
513  mol = orig_mol
514  numchain+=1
515  else:
516  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
517  mol = orig_mol.create_copy(chain_id)
518  numchain+=1
519 
520  for domainnumber,domain in enumerate(copy):
521  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
522  # we build everything in the residue range, even if it
523  # extends beyond what's in the actual PDB file
524  these_domains[domain.get_unique_name()] = domain
525  if domain.residue_range==[] or domain.residue_range is None:
526  domain_res = mol.get_residues()
527  else:
528  start = domain.residue_range[0]+domain.pdb_offset
529  if domain.residue_range[1]=='END':
530  end = len(mol.sequence)
531  else:
532  end = domain.residue_range[1]+domain.pdb_offset
533  domain_res = mol.residue_range(start-1,end-1)
534  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
535  if domain.pdb_file=="BEADS":
536  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
537  mol.add_representation(
538  domain_res,
539  resolutions=[domain.bead_size],
540  setup_particles_as_densities=(
541  domain.em_residues_per_gaussian!=0),
542  color = domain.color)
543  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
544  elif domain.pdb_file=="IDEAL_HELIX":
545  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
546  mol.add_representation(
547  domain_res,
548  resolutions=self.resolutions,
549  ideal_helix=True,
550  density_residues_per_component=domain.em_residues_per_gaussian,
551  density_prefix=domain.density_prefix,
552  density_force_compute=self.force_create_gmm_files,
553  color = domain.color)
554  these_domain_res[domain.get_unique_name()] = (domain_res,set())
555  else:
556  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
557  domain_atomic = mol.add_structure(domain.pdb_file,
558  domain.chain,
559  domain.residue_range,
560  domain.pdb_offset,
561  soft_check=True)
562  domain_non_atomic = domain_res - domain_atomic
563  if not domain.em_residues_per_gaussian:
564  mol.add_representation(domain_atomic,
565  resolutions=self.resolutions,
566  color = domain.color)
567  if len(domain_non_atomic)>0:
568  mol.add_representation(domain_non_atomic,
569  resolutions=[domain.bead_size],
570  color = domain.color)
571  else:
572  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
573  mol.add_representation(
574  domain_atomic,
575  resolutions=self.resolutions,
576  density_residues_per_component=domain.em_residues_per_gaussian,
577  density_prefix=domain.density_prefix,
578  density_force_compute=self.force_create_gmm_files,
579  color = domain.color)
580  if len(domain_non_atomic)>0:
581  mol.add_representation(domain_non_atomic,
582  resolutions=[domain.bead_size],
583  setup_particles_as_densities=True,
584  color = domain.color)
585  these_domain_res[domain.get_unique_name()] = (
586  domain_atomic,domain_non_atomic)
587  self._domain_res.append(these_domain_res)
588  self._domains.append(these_domains)
589  print('BuildSystem.add_state: State',len(self.system.states),'added')
590 
591  def get_molecules(self):
592  """Return list of all molecules grouped by state.
593  For each state, it's a dictionary of Molecules where key is the molecule name
594  """
595  return [s.get_molecules() for s in self.system.get_states()]
596 
597  def get_molecule(self,molname,copy_index=0,state_index=0):
598  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
599 
600  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
601  """Builds representations and sets up degrees of freedom"""
602  print("BuildSystem.execute_macro: building representations")
603  self.root_hier = self.system.build()
604 
605  print("BuildSystem.execute_macro: setting up degrees of freedom")
606  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.mdl)
607  for nstate,reader in enumerate(self._readers):
608  rbs = reader.get_rigid_bodies()
609  srbs = reader.get_super_rigid_bodies()
610  csrbs = reader.get_chains_of_super_rigid_bodies()
611 
612  # add rigid bodies
613  domains_in_rbs = set()
614  for rblist in rbs:
615  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
616  all_res = IMP.pmi.tools.OrderedSet()
617  bead_res = IMP.pmi.tools.OrderedSet()
618  for dname in rblist:
619  domain = self._domains[nstate][dname]
620  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
621  all_res|=self._domain_res[nstate][dname][0]
622  bead_res|=self._domain_res[nstate][dname][1]
623  domains_in_rbs.add(dname)
624  all_res|=bead_res
625  print("BuildSystem.execute_macro: -------- \
626 creating rigid body with max_trans %s \
627 max_rot %s non_rigid_max_trans %s" \
628  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
629  self.dof.create_rigid_body(all_res,
630  nonrigid_parts=bead_res,
631  max_trans=max_rb_trans,
632  max_rot=max_rb_rot,
633  nonrigid_max_trans=max_bead_trans)
634 
635  # if you have any BEAD domains not in an RB, set them as flexible beads
636  for dname in self._domains[nstate]:
637  domain = self._domains[nstate][dname]
638  if domain.pdb_file=="BEADS" and dname not in domains_in_rbs:
639  self.dof.create_flexible_beads(
640  self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
641 
642  # add super rigid bodies
643  for srblist in srbs:
644  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
645  all_res = IMP.pmi.tools.OrderedSet()
646  for dname in srblist:
647  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
648  all_res|=self._domain_res[nstate][dname][0]
649  all_res|=self._domain_res[nstate][dname][1]
650 
651  print("BuildSystem.execute_macro: -------- \
652 creating super rigid body with max_trans %s max_rot %s " \
653  % (str(max_srb_trans),str(max_srb_rot)))
654  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
655 
656  # add chains of super rigid bodies
657  for csrblist in csrbs:
658  all_res = IMP.pmi.tools.OrderedSet()
659  for dname in csrblist:
660  all_res|=self._domain_res[nstate][dname][0]
661  all_res|=self._domain_res[nstate][dname][1]
662  all_res = list(all_res)
663  all_res.sort(key=lambda r:r.get_index())
664  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
665  self.dof.create_main_chain_mover(all_res)
666  return self.root_hier,self.dof
667 
668 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
669 class BuildModel(object):
670  """A macro to build a Representation based on a Topology and lists of movers
671  DEPRECATED - Use BuildSystem instead.
672  """
673  def __init__(self,
674  model,
675  component_topologies,
676  list_of_rigid_bodies=[],
677  list_of_super_rigid_bodies=[],
678  chain_of_super_rigid_bodies=[],
679  sequence_connectivity_scale=4.0,
680  add_each_domain_as_rigid_body=False,
681  force_create_gmm_files=False):
682  """Constructor.
683  @param model The IMP model
684  @param component_topologies List of
685  IMP.pmi.topology.ComponentTopology items
686  @param list_of_rigid_bodies List of lists of domain names that will
687  be moved as rigid bodies.
688  @param list_of_super_rigid_bodies List of lists of domain names
689  that will move together in an additional Monte Carlo move.
690  @param chain_of_super_rigid_bodies List of lists of domain names
691  (choices can only be from the same molecule). Each of these
692  groups will be moved rigidly. This helps to sample more
693  efficiently complex topologies, made of several rigid bodies,
694  connected by flexible linkers.
695  @param sequence_connectivity_scale For scaling the connectivity
696  restraint
697  @param add_each_domain_as_rigid_body That way you don't have to
698  put all of them in the list
699  @param force_create_gmm_files If True, will sample and create GMMs
700  no matter what. If False, will only only sample if the
701  files don't exist. If number of Gaussians is zero, won't
702  do anything.
703  """
704  self.m = model
705  self.simo = IMP.pmi.representation.Representation(self.m,
706  upperharmonic=True,
707  disorderedlength=False)
708 
709  data=component_topologies
710  if list_of_rigid_bodies==[]:
711  print("WARNING: No list of rigid bodies inputted to build_model()")
712  if list_of_super_rigid_bodies==[]:
713  print("WARNING: No list of super rigid bodies inputted to build_model()")
714  if chain_of_super_rigid_bodies==[]:
715  print("WARNING: No chain of super rigid bodies inputted to build_model()")
716  all_dnames = set([d for sublist in list_of_rigid_bodies+list_of_super_rigid_bodies\
717  +chain_of_super_rigid_bodies for d in sublist])
718  all_available = set([c._domain_name for c in component_topologies])
719  if not all_dnames <= all_available:
720  raise ValueError("All requested movers must reference domain "
721  "names in the component topologies")
722 
723  self.domain_dict={}
724  self.resdensities={}
725  super_rigid_bodies={}
726  chain_super_rigid_bodies={}
727  rigid_bodies={}
728 
729  for c in data:
730  comp_name = c.molname
731  hier_name = c._domain_name
732  color = 0. # Can't use new-style string colors
733  fasta_file = c.fasta_file
734  fasta_id = c.fasta_id
735  pdb_name = c.pdb_file
736  chain_id = c.chain
737  res_range = c.residue_range
738  offset = c.pdb_offset
739  bead_size = c.bead_size
740  em_num_components = c.em_residues_per_gaussian
741  em_txt_file_name = c.gmm_file
742  em_mrc_file_name = c.mrc_file
743 
744  if comp_name not in self.simo.get_component_names():
745  self.simo.create_component(comp_name,color=0.0)
746  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
747 
748  # create hierarchy (adds resolutions, beads) with autobuild and optionally add EM data
749  if em_num_components==0:
750  read_em_files=False
751  include_res0=False
752  else:
753  if (not os.path.isfile(em_txt_file_name)) or force_create_gmm_files:
754  read_em_files=False
755  include_res0=True
756  else:
757  read_em_files=True
758  include_res0=False
759 
760  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
761  res_range,include_res0,beadsize=bead_size,
762  color=color,offset=offset)
763  if em_num_components!=0:
764  if read_em_files:
765  print("will read GMM files")
766  else:
767  print("will calculate GMMs")
768 
769  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
770  em_mrc_file_name,em_num_components,read_em_files)
771  self.simo.add_all_atom_densities(comp_name, particles=beads)
772  dens_hier+=beads
773  else:
774  dens_hier=[]
775 
776  self.resdensities[hier_name]=dens_hier
777  self.domain_dict[hier_name]=outhier+dens_hier
778 
779  # setup basic restraints
780  for c in self.simo.get_component_names():
781  self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
782  self.simo.setup_component_geometry(c)
783 
784  # create movers
785  for rblist in list_of_rigid_bodies:
786  rb=[]
787  for rbmember in rblist:
788  rb+=[h for h in self.domain_dict[rbmember]]
789  self.simo.set_rigid_body_from_hierarchies(rb)
790  for srblist in list_of_super_rigid_bodies:
791  srb=[]
792  for srbmember in rblist:
793  srb+=[h for h in self.domain_dict[srbmember]]
794  self.simo.set_super_rigid_body_from_hierarchies(srb)
795  for clist in chain_of_super_rigid_bodies:
796  crb=[]
797  for crbmember in rblist:
798  crb+=[h for h in self.domain_dict[crbmember]]
799  self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
800 
801  self.simo.set_floppy_bodies()
802  self.simo.setup_bonds()
803 
805  '''Return the Representation object'''
806  return self.simo
807 
808  def get_density_hierarchies(self,hier_name_list):
809  # return a list of density hierarchies
810  # specify the list of hierarchy names
811  dens_hier_list=[]
812  for hn in hier_name_list:
813  print(hn)
814  dens_hier_list+=self.resdensities[hn]
815  return dens_hier_list
816 
817  def set_gmm_models_directory(self,directory_name):
818  self.gmm_models_directory=directory_name
819 
820  def get_pdb_bead_bits(self,hierarchy):
821  pdbbits=[]
822  beadbits=[]
823  helixbits=[]
824  for h in hierarchy:
825  if "_pdb" in h.get_name():pdbbits.append(h)
826  if "_bead" in h.get_name():beadbits.append(h)
827  if "_helix" in h.get_name():helixbits.append(h)
828  return (pdbbits,beadbits,helixbits)
829 
830  def scale_bead_radii(self,nresidues,scale):
831  scaled_beads=set()
832  for h in self.domain_dict:
833  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
834  slope=(1.0-scale)/(1.0-float(nresidues))
835 
836  for b in beadbits:
837  # I have to do the following
838  # because otherwise we'll scale more than once
839  if b not in scaled_beads:
840  scaled_beads.add(b)
841  else:
842  continue
843  radius=IMP.core.XYZR(b).get_radius()
844  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
845  scale_factor=slope*float(num_residues)+1.0
846  print(scale_factor)
847  new_radius=scale_factor*radius
848  IMP.core.XYZR(b).set_radius(new_radius)
849  print(b.get_name())
850  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
851 
852 
853  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
854  #density generation for the EM restraint
855  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
856  #get the number of residues from the pdb bits
857  res_ind=[]
858  for pb in pdbbits+helixbits:
859  for p in IMP.core.get_leaves(pb):
861 
862  number_of_residues=len(set(res_ind))
863  outhier=[]
864  if read:
865  if len(pdbbits)!=0:
866  outhier+=simo.add_component_density(compname,
867  pdbbits,
868  num_components=num_components,
869  resolution=0,
870  inputfile=txtfilename)
871  if len(helixbits)!=0:
872  outhier+=simo.add_component_density(compname,
873  helixbits,
874  num_components=num_components,
875  resolution=1,
876  inputfile=txtfilename)
877 
878 
879  else:
880  if len(pdbbits)!=0:
881  num_components=number_of_residues//abs(num_components)+1
882  outhier+=simo.add_component_density(compname,
883  pdbbits,
884  num_components=num_components,
885  resolution=0,
886  outputfile=txtfilename,
887  outputmap=mrcfilename,
888  multiply_by_total_mass=True)
889 
890  if len(helixbits)!=0:
891  num_components=number_of_residues//abs(num_components)+1
892  outhier+=simo.add_component_density(compname,
893  helixbits,
894  num_components=num_components,
895  resolution=1,
896  outputfile=txtfilename,
897  outputmap=mrcfilename,
898  multiply_by_total_mass=True)
899 
900  return outhier,beadbits
901 
902  def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
903  beadsize=5,color=0.0,offset=0):
904  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :
905  if include_res0:
906  outhier=simo.autobuild_model(comname,
907  pdbname=pdbname,
908  chain=chain,
909  resrange=resrange,
910  resolutions=[0,1,10],
911  offset=offset,
912  color=color,
913  missingbeadsize=beadsize)
914  else:
915  outhier=simo.autobuild_model(comname,
916  pdbname=pdbname,
917  chain=chain,
918  resrange=resrange,
919  resolutions=[1,10],
920  offset=offset,
921  color=color,
922  missingbeadsize=beadsize)
923 
924 
925  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :
926  outhier=simo.add_component_ideal_helix(comname,
927  resolutions=[1,10],
928  resrange=resrange,
929  color=color,
930  show=False)
931 
932  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :
933  outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
934 
935  else:
936 
937  seq_len=len(simo.sequence_dict[comname])
938  outhier=simo.add_component_necklace(comname,
939  begin=1,
940  end=seq_len,
941  length=beadsize)
942 
943  return outhier
944 
945 
946 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
947 class BuildModel1(object):
948  """Deprecated building macro - use BuildSystem()"""
949 
950  def __init__(self, representation):
951  """Constructor.
952  @param representation The PMI representation
953  """
954  self.simo=representation
955  self.gmm_models_directory="."
956  self.rmf_file={}
957  self.rmf_frame_number={}
958  self.rmf_names_map={}
959  self.rmf_component_name={}
960 
961  def set_gmm_models_directory(self,directory_name):
962  self.gmm_models_directory=directory_name
963 
964  def build_model(self,data_structure,sequence_connectivity_scale=4.0,
965  sequence_connectivity_resolution=10,
966  rmf_file=None,rmf_frame_number=0,rmf_file_map=None,
967  skip_connectivity_these_domains=None,
968  skip_gaussian_in_rmf=False, skip_gaussian_in_representation=False):
969  """Create model.
970  @param data_structure List of lists containing these entries:
971  comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
972  res_range, read_em_files, bead_size, rb, super_rb,
973  em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
974  keep_gaussian_flexible_beads (optional)
975  @param sequence_connectivity_scale
976  @param rmf_file
977  @param rmf_frame_number
978  @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
979  rmf_frame_number,
980  rmf_component_name)
981  """
982  self.domain_dict={}
983  self.resdensities={}
984  super_rigid_bodies={}
985  chain_super_rigid_bodies={}
986  rigid_bodies={}
987 
988  for d in data_structure:
989  comp_name = d[0]
990  hier_name = d[1]
991  color = d[2]
992  fasta_file = d[3]
993  fasta_id = d[4]
994  pdb_name = d[5]
995  chain_id = d[6]
996  res_range = d[7][0:2]
997  try:
998  offset = d[7][2]
999  except:
1000  offset = 0
1001  read_em_files = d[8]
1002  bead_size = d[9]
1003  rb = d[10]
1004  super_rb = d[11]
1005  em_num_components = d[12]
1006  em_txt_file_name = d[13]
1007  em_mrc_file_name = d[14]
1008  chain_of_super_rb = d[15]
1009  try:
1010  keep_gaussian_flexible_beads = d[16]
1011  except:
1012  keep_gaussian_flexible_beads = True
1013 
1014  if comp_name not in self.simo.get_component_names():
1015  self.simo.create_component(comp_name,color=0.0)
1016  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1017  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1018 
1019 
1020  if not read_em_files is None:
1021  if em_txt_file_name is " ": em_txt_file_name=self.gmm_models_directory+"/"+hier_name+".txt"
1022  if em_mrc_file_name is " ": em_mrc_file_name=self.gmm_models_directory+"/"+hier_name+".mrc"
1023 
1024 
1025  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1026 
1027  if (keep_gaussian_flexible_beads):
1028  self.simo.add_all_atom_densities(comp_name, particles=beads)
1029  dens_hier+=beads
1030  else:
1031  dens_hier=[]
1032 
1033  self.resdensities[hier_name]=dens_hier
1034  self.domain_dict[hier_name]=outhier+dens_hier
1035 
1036  if rb is not None:
1037  if rb not in rigid_bodies:
1038  rigid_bodies[rb]=[h for h in self.domain_dict[hier_name]]
1039  else:
1040  rigid_bodies[rb]+=[h for h in self.domain_dict[hier_name]]
1041 
1042 
1043  if super_rb is not None:
1044  for k in super_rb:
1045  if k not in super_rigid_bodies:
1046  super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1047  else:
1048  super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1049 
1050  if chain_of_super_rb is not None:
1051  for k in chain_of_super_rb:
1052  if k not in chain_super_rigid_bodies:
1053  chain_super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1054  else:
1055  chain_super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1056 
1057 
1058 
1059  self.rigid_bodies=rigid_bodies
1060 
1061  for c in self.simo.get_component_names():
1062  if rmf_file is not None:
1063  rf=rmf_file
1064  rfn=rmf_frame_number
1065  self.simo.set_coordinates_from_rmf(c, rf,rfn,
1066  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1067  elif rmf_file_map:
1068  for k in rmf_file_map:
1069  cname=k
1070  rf=rmf_file_map[k][0]
1071  rfn=rmf_file_map[k][1]
1072  rcname=rmf_file_map[k][2]
1073  self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1074  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1075  else:
1076  if c in self.rmf_file:
1077  rf=self.rmf_file[c]
1078  rfn=self.rmf_frame_number[c]
1079  rfm=self.rmf_names_map[c]
1080  rcname=self.rmf_component_name[c]
1081  self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1082  rmf_component_name=rcname,
1083  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1084  if (not skip_connectivity_these_domains) or (c not in skip_connectivity_these_domains):
1085  self.simo.setup_component_sequence_connectivity(c,
1086  resolution=sequence_connectivity_resolution,
1087  scale=sequence_connectivity_scale)
1088  self.simo.setup_component_geometry(c)
1089 
1090  for rb in rigid_bodies:
1091  self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1092 
1093  for k in super_rigid_bodies:
1094  self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1095 
1096  for k in chain_super_rigid_bodies:
1097  self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1098 
1099  self.simo.set_floppy_bodies()
1100  self.simo.setup_bonds()
1101 
1102  def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1103  hiers=self.domain_dict[hier_name]
1104  for length in lengths:
1105  for n in range(len(hiers)-length):
1106  hs=hiers[n+1:n+length-1]
1107  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1108  for n in range(1,len(hiers)-1,5):
1109  hs=hiers[n+1:]
1110  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1111  hs=hiers[:n-1]
1112  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1113 
1114 
1115  def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1116  self.rmf_file[component_name]=rmf_file
1117  self.rmf_frame_number[component_name]=rmf_frame_number
1118  self.rmf_names_map[component_name]=rmf_names_map
1119  self.rmf_component_name[component_name]=rmf_component_name
1120 
1121  def get_density_hierarchies(self,hier_name_list):
1122  # return a list of density hierarchies
1123  # specify the list of hierarchy names
1124  dens_hier_list=[]
1125  for hn in hier_name_list:
1126  print(hn)
1127  dens_hier_list+=self.resdensities[hn]
1128  return dens_hier_list
1129 
1130  def get_pdb_bead_bits(self,hierarchy):
1131  pdbbits=[]
1132  beadbits=[]
1133  helixbits=[]
1134  for h in hierarchy:
1135  if "_pdb" in h.get_name():pdbbits.append(h)
1136  if "_bead" in h.get_name():beadbits.append(h)
1137  if "_helix" in h.get_name():helixbits.append(h)
1138  return (pdbbits,beadbits,helixbits)
1139 
1140  def scale_bead_radii(self,nresidues,scale):
1141  scaled_beads=set()
1142  for h in self.domain_dict:
1143  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1144  slope=(1.0-scale)/(1.0-float(nresidues))
1145 
1146  for b in beadbits:
1147  # I have to do the following
1148  # because otherwise we'll scale more than once
1149  if b not in scaled_beads:
1150  scaled_beads.add(b)
1151  else:
1152  continue
1153  radius=IMP.core.XYZR(b).get_radius()
1154  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
1155  scale_factor=slope*float(num_residues)+1.0
1156  print(scale_factor)
1157  new_radius=scale_factor*radius
1158  IMP.core.XYZR(b).set_radius(new_radius)
1159  print(b.get_name())
1160  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
1161 
1162 
1163  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1164  #density generation for the EM restraint
1165  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1166 
1167  #get the number of residues from the pdb bits
1168  res_ind=[]
1169  for pb in pdbbits+helixbits:
1170  for p in IMP.core.get_leaves(pb):
1172 
1173  number_of_residues=len(set(res_ind))
1174  outhier=[]
1175  if read:
1176  if len(pdbbits)!=0:
1177  outhier+=simo.add_component_density(compname,
1178  pdbbits,
1179  num_components=num_components, # number of gaussian into which the simulated density is approximated
1180  resolution=0, # resolution that you want to calculate the simulated density
1181  inputfile=txtfilename) # read what it was calculated before
1182  if len(helixbits)!=0:
1183  outhier+=simo.add_component_density(compname,
1184  helixbits,
1185  num_components=num_components, # number of gaussian into which the simulated density is approximated
1186  resolution=1, # resolution that you want to calculate the simulated density
1187  inputfile=txtfilename) # read what it was calculated before
1188 
1189 
1190  else:
1191  if len(pdbbits)!=0:
1192  if num_components<0:
1193  #if negative calculate the number of gmm components automatically
1194  # from the number of residues
1195  num_components=number_of_residues/abs(num_components)
1196  outhier+=simo.add_component_density(compname,
1197  pdbbits,
1198  num_components=num_components, # number of gaussian into which the simulated density is approximated
1199  resolution=0, # resolution that you want to calculate the simulated density
1200  outputfile=txtfilename, # do the calculation
1201  outputmap=mrcfilename,
1202  multiply_by_total_mass=True) # do the calculation and output the mrc
1203 
1204  if len(helixbits)!=0:
1205  if num_components<0:
1206  #if negative calculate the number of gmm components automatically
1207  # from the number of residues
1208  num_components=number_of_residues/abs(num_components)
1209  outhier+=simo.add_component_density(compname,
1210  helixbits,
1211  num_components=num_components, # number of gaussian into which the simulated density is approximated
1212  resolution=1, # resolution that you want to calculate the simulated density
1213  outputfile=txtfilename, # do the calculation
1214  outputmap=mrcfilename,
1215  multiply_by_total_mass=True) # do the calculation and output the mrc
1216 
1217  return outhier,beadbits
1218 
1219  def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1220 
1221  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1222  if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1223  if read==False:
1224  outhier=simo.autobuild_model(comname,
1225  pdbname=pdbname,
1226  chain=chain,
1227  resrange=resrange,
1228  resolutions=[0,1,10],
1229  offset=offset,
1230  color=color,
1231  missingbeadsize=beadsize)
1232  else:
1233  outhier=simo.autobuild_model(comname,
1234  pdbname=pdbname,
1235  chain=chain,
1236  resrange=resrange,
1237  resolutions=[1,10],
1238  offset=offset,
1239  color=color,
1240  missingbeadsize=beadsize)
1241 
1242  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1243  outhier=simo.add_component_ideal_helix(comname,
1244  resolutions=[1,10],
1245  resrange=resrange,
1246  color=color,
1247  show=False)
1248 
1249  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" and pdbname is not "DENSITY" :
1250  seq_len=resrange[1]
1251  if resrange[1]==-1:
1252  seq_len=len(simo.sequence_dict[comname])
1253  outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1254 
1255  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is "DENSITY" :
1256  outhier=[]
1257 
1258  else:
1259 
1260  seq_len=len(simo.sequence_dict[comname])
1261  outhier=simo.add_component_necklace(comname,
1262  begin=1,
1263  end=seq_len,
1264  length=beadsize)
1265 
1266  return outhier
1267 
1268  def set_coordinates(self,hier_name,xyz_tuple):
1269  hier=self.domain_dict[hier_name]
1270  for h in IMP.atom.get_leaves(hier):
1271  p=h.get_particle()
1273  pass
1274  else:
1275  IMP.core.XYZ(p).set_coordinates(xyz_tuple)
1276 
1277  def save_rmf(self,rmfname):
1278 
1280  o.init_rmf(rmfname,[self.simo.prot])
1281  o.write_rmf(rmfname)
1282  o.close_rmf(rmfname)
1283 # -----------------------------------------------------------------------
1284 
1285 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
1286 def BuildModel0(
1287  m,
1288  data,
1289  resolutions=[1,
1290  10],
1291  missing_bead_size=20,
1292  residue_per_gaussian=None):
1293  '''
1294  Construct a component for each subunit (no splitting, nothing fancy).
1295  You can pass the resolutions and the bead size for the missing residue regions.
1296  To use this macro, you must provide the following data structure:
1297  Component pdbfile chainid rgb color fastafile sequence id
1298  in fastafile
1299 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1300  ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1301  ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1302  ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1303  ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1304  ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1305  ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1306  ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1307  ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1308  ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1309  ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1310  ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1311  '''
1312 
1314 
1315  # the dictionary for the hierarchies,
1316  hierarchies = {}
1317 
1318  for d in data:
1319  # retrieve the information from the data structure
1320  component_name = d[0]
1321  pdb_file = d[1]
1322  chain_id = d[2]
1323  color_id = d[3]
1324  fasta_file = d[4][0]
1325  # this function
1326  fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
1327  fasta_file_id = d[4][1]
1328  # avoid to add a component with the same name
1329  r.create_component(component_name,
1330  color=color_id)
1331 
1332  r.add_component_sequence(component_name,
1333  fasta_file,
1334  id=fastids[fasta_file_id])
1335 
1336  hierarchies = r.autobuild_model(component_name,
1337  pdb_file,
1338  chain_id,
1339  resolutions=resolutions,
1340  missingbeadsize=missing_bead_size)
1341 
1342  r.show_component_table(component_name)
1343 
1344  r.set_rigid_bodies([component_name])
1345 
1346  r.set_chain_of_super_rigid_bodies(
1347  hierarchies,
1348  min_length=2,
1349  max_length=2)
1350 
1351  r.setup_component_sequence_connectivity(component_name, resolution=1)
1352  r.setup_component_geometry(component_name)
1353 
1354  r.setup_bonds()
1355  # put it at the end of rigid bodies
1356  r.set_floppy_bodies()
1357 
1358  # set current coordinates as reference for RMSD calculation
1359  r.set_current_coordinates_as_reference_for_rmsd("Reference")
1360 
1361  return r
1362 
1363 # ----------------------------------------------------------------------
1364 
1366  """A macro for running all the basic operations of analysis.
1367  Includes clustering, precision analysis, and making ensemble density maps.
1368  A number of plots are also supported.
1369  """
1370  def __init__(self, model,
1371  merge_directories=["./"],
1372  stat_file_name_suffix="stat",
1373  best_pdb_name_suffix="model",
1374  do_clean_first=True,
1375  do_create_directories=True,
1376  global_output_directory="output/",
1377  replica_stat_file_suffix="stat_replica",
1378  global_analysis_result_directory="./analysis/",
1379  test_mode=False):
1380  """Constructor.
1381  @param model The IMP model
1382  @param stat_file_name_suffix
1383  @param merge_directories The directories containing output files
1384  @param best_pdb_name_suffix
1385  @param do_clean_first
1386  @param do_create_directories
1387  @param global_output_directory Where everything is
1388  @param replica_stat_file_suffix
1389  @param global_analysis_result_directory
1390  @param test_mode If True, nothing is changed on disk
1391  """
1392 
1393  try:
1394  from mpi4py import MPI
1395  self.comm = MPI.COMM_WORLD
1396  self.rank = self.comm.Get_rank()
1397  self.number_of_processes = self.comm.size
1398  except ImportError:
1399  self.rank = 0
1400  self.number_of_processes = 1
1401 
1402  self.test_mode = test_mode
1403  self._protocol_output = []
1404  self.cluster_obj = None
1405  self.model = model
1406  stat_dir = global_output_directory
1407  self.stat_files = []
1408  # it contains the position of the root directories
1409  for rd in merge_directories:
1410  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
1411  if len(stat_files)==0:
1412  print("WARNING: no stat files found in",os.path.join(rd,stat_dir))
1413  self.stat_files += stat_files
1414 
1415  def add_protocol_output(self, p):
1416  """Capture details of the modeling protocol.
1417  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1418  """
1419  self._protocol_output.append(p)
1420 
1421  def get_modeling_trajectory(self,
1422  score_key="SimplifiedModel_Total_Score_None",
1423  rmf_file_key="rmf_file",
1424  rmf_file_frame_key="rmf_frame_index",
1425  outputdir="./",
1426  get_every=1,
1427  nframes_trajectory=10000):
1428  """ Get a trajectory of the modeling run, for generating demonstrative movies
1429  @param score_key The score for ranking models
1430  @param rmf_file_key Key pointing to RMF filename
1431  @param rmf_file_frame_key Key pointing to RMF frame number
1432  @param outputdir The local output directory used in the run
1433  @param get_every Extract every nth frame
1434  @param nframes_trajectory Total number of frames of the trajectory
1435  """
1436  from operator import itemgetter
1437  import math
1438 
1439  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
1440  score_key,
1441  rmf_file_key,
1442  rmf_file_frame_key,
1443  get_every)
1444  rmf_file_list=trajectory_models[0]
1445  rmf_file_frame_list=trajectory_models[1]
1446  score_list=list(map(float, trajectory_models[2]))
1447 
1448  max_score=max(score_list)
1449  min_score=min(score_list)
1450 
1451 
1452  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
1453  binned_scores=[None]*nframes_trajectory
1454  binned_model_indexes=[-1]*nframes_trajectory
1455 
1456  for model_index,s in enumerate(score_list):
1457  bins_score_diffs=[abs(s-b) for b in bins]
1458  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1459  if binned_scores[bin_index]==None:
1460  binned_scores[bin_index]=s
1461  binned_model_indexes[bin_index]=model_index
1462  else:
1463  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1464  new_diff=abs(s-bins[bin_index])
1465  if new_diff < old_diff:
1466  binned_scores[bin_index]=s
1467  binned_model_indexes[bin_index]=model_index
1468 
1469  print(binned_scores)
1470  print(binned_model_indexes)
1471 
1472 
1473  def _expand_ambiguity(self,prot,d):
1474  """If using PMI2, expand the dictionary to include copies as ambiguous options
1475  This also keeps the states separate.
1476  """
1477  newdict = {}
1478  for key in d:
1479  val = d[key]
1480  if '..' in key or (type(val) is tuple and len(val)>=3):
1481  newdict[key] = val
1482  continue
1483  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1484  if type(val) is tuple:
1485  start = val[0]
1486  stop = val[1]
1487  name = val[2]
1488  else:
1489  start = 1
1490  stop = -1
1491  name = val
1492  for nst in range(len(states)):
1493  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
1494  copies = sel.get_selected_particles(with_representation=False)
1495  if len(copies)>1:
1496  for nc in range(len(copies)):
1497  if len(states)>1:
1498  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1499  else:
1500  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1501  else:
1502  newdict[key] = val
1503  return newdict
1504 
1505 
1506  def clustering(self,
1507  score_key="SimplifiedModel_Total_Score_None",
1508  rmf_file_key="rmf_file",
1509  rmf_file_frame_key="rmf_frame_index",
1510  state_number=0,
1511  prefiltervalue=None,
1512  feature_keys=[],
1513  outputdir="./",
1514  alignment_components=None,
1515  number_of_best_scoring_models=10,
1516  rmsd_calculation_components=None,
1517  distance_matrix_file='distances.mat',
1518  load_distance_matrix_file=False,
1519  skip_clustering=False,
1520  number_of_clusters=1,
1521  display_plot=False,
1522  exit_after_display=True,
1523  get_every=1,
1524  first_and_last_frames=None,
1525  density_custom_ranges=None,
1526  write_pdb_with_centered_coordinates=False,
1527  voxel_size=5.0):
1528  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1529  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1530  Can pass None for copy or state to ignore that field.
1531  If you don't pass a specific copy number
1532  @param score_key The score for ranking models. Use "Total_Score"
1533  instead of default for PMI2.
1534  @param rmf_file_key Key pointing to RMF filename
1535  @param rmf_file_frame_key Key pointing to RMF frame number
1536  @param state_number State number to analyze
1537  @param prefiltervalue Only include frames where the
1538  score key is below this value
1539  @param feature_keys Keywords for which you want to
1540  calculate average, medians, etc.
1541  If you pass "Keyname" it'll include everything that matches "*Keyname*"
1542  @param outputdir The local output directory used in the run
1543  @param alignment_components Dictionary with keys=groupname, values are tuples
1544  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1545  @param number_of_best_scoring_models Num models to keep per run
1546  @param rmsd_calculation_components For calculating RMSD
1547  (same format as alignment_components)
1548  @param distance_matrix_file Where to store/read the distance matrix
1549  @param load_distance_matrix_file Try to load the distance matrix file
1550  @param skip_clustering Just extract the best scoring models
1551  and save the pdbs
1552  @param number_of_clusters Number of k-means clusters
1553  @param display_plot Display the distance matrix
1554  @param exit_after_display Exit after displaying distance matrix
1555  @param get_every Extract every nth frame
1556  @param first_and_last_frames A tuple with the first and last frames to be
1557  analyzed. Values are percentages!
1558  Default: get all frames
1559  @param density_custom_ranges For density calculation
1560  (same format as alignment_components)
1561  @param write_pdb_with_centered_coordinates
1562  @param voxel_size Used for the density output
1563  """
1564  self._outputdir = outputdir
1565  self._number_of_clusters = number_of_clusters
1566  for p in self._protocol_output:
1567  p.add_replica_exchange_analysis(self)
1568 
1569  if self.test_mode:
1570  return
1571 
1572  if self.rank==0:
1573  try:
1574  os.mkdir(outputdir)
1575  except:
1576  pass
1577 
1578  if not load_distance_matrix_file:
1579  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1580  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1581  self.stat_files,self.number_of_processes)[self.rank]
1582 
1583  # read ahead to check if you need the PMI2 score key instead
1584  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1585  orig_score_key = score_key
1586  if score_key not in po.get_keys():
1587  if 'Total_Score' in po.get_keys():
1588  score_key = 'Total_Score'
1589  print("WARNING: Using 'Total_Score' instead of "
1590  "'SimplifiedModel_Total_Score_None' for the score key")
1591  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1592  if k in feature_keys:
1593  print("WARNING: no need to pass " +k+" to feature_keys.")
1594  feature_keys.remove(k)
1595 
1596  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1597  score_key,
1598  feature_keys,
1599  rmf_file_key,
1600  rmf_file_frame_key,
1601  prefiltervalue,
1602  get_every)
1603  rmf_file_list=best_models[0]
1604  rmf_file_frame_list=best_models[1]
1605  score_list=best_models[2]
1606  feature_keyword_list_dict=best_models[3]
1607 
1608 # ------------------------------------------------------------------------
1609 # collect all the files and scores
1610 # ------------------------------------------------------------------------
1611 
1612  if self.number_of_processes > 1:
1613  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1614  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1615  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1616  rmf_file_frame_list)
1617  for k in feature_keyword_list_dict:
1618  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1619  feature_keyword_list_dict[k])
1620 
1621  # sort by score and get the best scoring ones
1622  score_rmf_tuples = list(zip(score_list,
1623  rmf_file_list,
1624  rmf_file_frame_list,
1625  list(range(len(score_list)))))
1626 
1627  if density_custom_ranges:
1628  for k in density_custom_ranges:
1629  if type(density_custom_ranges[k]) is not list:
1630  raise Exception("Density custom ranges: values must be lists of tuples")
1631 
1632  # keep subset of frames if requested
1633  if first_and_last_frames is not None:
1634  nframes = len(score_rmf_tuples)
1635  first_frame = int(first_and_last_frames[0] * nframes)
1636  last_frame = int(first_and_last_frames[1] * nframes)
1637  if last_frame > len(score_rmf_tuples):
1638  last_frame = -1
1639  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1640 
1641  # sort RMFs by the score_key in ascending order, and store the rank
1642  best_score_rmf_tuples = sorted(score_rmf_tuples,
1643  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1644  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1645  # sort the feature scores in the same way
1646  best_score_feature_keyword_list_dict = defaultdict(list)
1647  for tpl in best_score_rmf_tuples:
1648  index = tpl[3]
1649  for f in feature_keyword_list_dict:
1650  best_score_feature_keyword_list_dict[f].append(
1651  feature_keyword_list_dict[f][index])
1652  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1653  best_score_rmf_tuples,
1654  self.number_of_processes)[self.rank]
1655 
1656  # expand the dictionaries to include ambiguous copies
1657  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1658  0,
1659  my_best_score_rmf_tuples[0][1])[0]
1660  if IMP.pmi.get_is_canonical(prot_ahead):
1661  if rmsd_calculation_components is not None:
1662  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1663  if tmp!=rmsd_calculation_components:
1664  print('Detected ambiguity, expand rmsd components to',tmp)
1665  rmsd_calculation_components = tmp
1666  if alignment_components is not None:
1667  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1668  if tmp!=alignment_components:
1669  print('Detected ambiguity, expand alignment components to',tmp)
1670  alignment_components = tmp
1671 
1672 #-------------------------------------------------------------
1673 # read the coordinates
1674 # ------------------------------------------------------------
1675  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1676  my_best_score_rmf_tuples[0],
1677  rmsd_calculation_components,
1678  state_number=state_number)
1679  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1680  my_best_score_rmf_tuples,
1681  alignment_components,
1682  rmsd_calculation_components,
1683  state_number=state_number)
1684 
1685  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1686  # NOR particles, because these object cannot be serialized. We need serialization
1687  # for the parallel computation based on mpi.
1688  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1689  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1690  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1691  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1692  all_rmf_file_names=got_coords[4] # RMF file per hit
1693 
1694 # ------------------------------------------------------------------------
1695 # optionally don't compute distance matrix or cluster, just write top files
1696 # ------------------------------------------------------------------------
1697  if skip_clustering:
1698  if density_custom_ranges:
1699  DensModule = IMP.pmi.analysis.GetModelDensity(
1700  density_custom_ranges,
1701  voxel=voxel_size)
1702 
1703  dircluster=os.path.join(outputdir,"all_models."+str(n))
1704  try:
1705  os.mkdir(outputdir)
1706  except:
1707  pass
1708  try:
1709  os.mkdir(dircluster)
1710  except:
1711  pass
1712  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1713  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1714  rmf_name=tpl[1]
1715  rmf_frame_number=tpl[2]
1716  tmp_dict={}
1717  index=tpl[4]
1718  for key in best_score_feature_keyword_list_dict:
1719  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1720 
1721  if cnt==0:
1722  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1723  self.model,
1724  rmf_frame_number,
1725  rmf_name)
1726  else:
1727  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1728  self.model,
1729  prots,
1730  rs,
1731  rmf_frame_number,
1732  rmf_name)
1733  if not linking_successful:
1734  continue
1735 
1736  if not prots:
1737  continue
1738 
1739  if IMP.pmi.get_is_canonical(prots[0]):
1740  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1741  prot = states[state_number]
1742  else:
1743  prot = prots[state_number]
1744 
1745  # get transformation aligning coordinates of requested tuples
1746  # to the first RMF file
1747  if cnt==0:
1748  coords_f1=alignment_coordinates[cnt]
1749  if cnt > 0:
1750  coords_f2=alignment_coordinates[cnt]
1751  if coords_f2:
1752  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1753  transformation = Ali.align()[1]
1754  else:
1756 
1757  rbs = set()
1758  for p in IMP.atom.get_leaves(prot):
1759  if not IMP.core.XYZR.get_is_setup(p):
1761  IMP.core.XYZR(p).set_radius(0.0001)
1762  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1763 
1765  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1766  rbs.add(rb)
1767  else:
1769  transformation)
1770  for rb in rbs:
1771  IMP.core.transform(rb,transformation)
1772 
1774  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1775  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1776  o.init_pdb(out_pdb_fn,prot)
1777  o.write_pdb(out_pdb_fn,
1778  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1779 
1780  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1781  tmp_dict["rmf_file_full_path"]=rmf_name
1782  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1783  tmp_dict["local_rmf_frame_number"]=0
1784 
1785  clusstat.write(str(tmp_dict)+"\n")
1786 
1787  if IMP.pmi.get_is_canonical(prot):
1788  # create a single-state System and write that
1790  h.set_name("System")
1791  h.add_child(prot)
1792  o.init_rmf(out_rmf_fn, [h], rs)
1793  else:
1794  o.init_rmf(out_rmf_fn, [prot],rs)
1795 
1796  o.write_rmf(out_rmf_fn)
1797  o.close_rmf(out_rmf_fn)
1798  # add the density
1799  if density_custom_ranges:
1800  DensModule.add_subunits_density(prot)
1801 
1802  if density_custom_ranges:
1803  DensModule.write_mrc(path=dircluster)
1804  del DensModule
1805  return
1806 
1807 
1808 
1809  # broadcast the coordinates
1810  if self.number_of_processes > 1:
1811  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1812  all_coordinates)
1813  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1814  all_rmf_file_names)
1815  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1816  rmf_file_name_index_dict)
1817  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1818  alignment_coordinates)
1819  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1820  rmsd_coordinates)
1821 
1822  if self.rank == 0:
1823  # save needed informations in external files
1824  self.save_objects(
1825  [best_score_feature_keyword_list_dict,
1826  rmf_file_name_index_dict],
1827  ".macro.pkl")
1828 
1829 # ------------------------------------------------------------------------
1830 # Calculate distance matrix and cluster
1831 # ------------------------------------------------------------------------
1832  print("setup clustering class")
1833  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1834 
1835  for n, model_coordinate_dict in enumerate(all_coordinates):
1836  template_coordinate_dict = {}
1837  # let's try to align
1838  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1839  # set the first model as template coordinates
1840  self.cluster_obj.set_template(alignment_coordinates[n])
1841  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1842  print("Global calculating the distance matrix")
1843 
1844  # calculate distance matrix, all against all
1845  self.cluster_obj.dist_matrix()
1846 
1847  # perform clustering and optionally display
1848  if self.rank == 0:
1849  self.cluster_obj.do_cluster(number_of_clusters)
1850  if display_plot:
1851  if self.rank == 0:
1852  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1853  if exit_after_display:
1854  exit()
1855  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1856 
1857 # ------------------------------------------------------------------------
1858 # Alteratively, load the distance matrix from file and cluster that
1859 # ------------------------------------------------------------------------
1860  else:
1861  if self.rank==0:
1862  print("setup clustering class")
1863  self.cluster_obj = IMP.pmi.analysis.Clustering()
1864  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1865  print("clustering with %s clusters" % str(number_of_clusters))
1866  self.cluster_obj.do_cluster(number_of_clusters)
1867  [best_score_feature_keyword_list_dict,
1868  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1869  if display_plot:
1870  if self.rank == 0:
1871  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1872  if exit_after_display:
1873  exit()
1874  if self.number_of_processes > 1:
1875  self.comm.Barrier()
1876 
1877 # ------------------------------------------------------------------------
1878 # now save all informations about the clusters
1879 # ------------------------------------------------------------------------
1880 
1881  if self.rank == 0:
1882  print(self.cluster_obj.get_cluster_labels())
1883  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1884  print("rank %s " % str(self.rank))
1885  print("cluster %s " % str(n))
1886  print("cluster label %s " % str(cl))
1887  print(self.cluster_obj.get_cluster_label_names(cl))
1888 
1889  # first initialize the Density class if requested
1890  if density_custom_ranges:
1891  DensModule = IMP.pmi.analysis.GetModelDensity(
1892  density_custom_ranges,
1893  voxel=voxel_size)
1894 
1895  dircluster = outputdir + "/cluster." + str(n) + "/"
1896  try:
1897  os.mkdir(dircluster)
1898  except:
1899  pass
1900 
1901  rmsd_dict = {"AVERAGE_RMSD":
1902  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1903  clusstat = open(dircluster + "stat.out", "w")
1904  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1905  # extract the features
1906  tmp_dict = {}
1907  tmp_dict.update(rmsd_dict)
1908  index = rmf_file_name_index_dict[structure_name]
1909  for key in best_score_feature_keyword_list_dict:
1910  tmp_dict[
1911  key] = best_score_feature_keyword_list_dict[
1912  key][
1913  index]
1914 
1915  # get the rmf name and the frame number from the list of
1916  # frame names
1917  rmf_name = structure_name.split("|")[0]
1918  rmf_frame_number = int(structure_name.split("|")[1])
1919  clusstat.write(str(tmp_dict) + "\n")
1920 
1921  # extract frame (open or link to existing)
1922  if k==0:
1923  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1924  self.model,
1925  rmf_frame_number,
1926  rmf_name)
1927  else:
1928  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1929  self.model,
1930  prots,
1931  rs,
1932  rmf_frame_number,
1933  rmf_name)
1934  if not linking_successful:
1935  continue
1936  if not prots:
1937  continue
1938 
1939  if IMP.pmi.get_is_canonical(prots[0]):
1940  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1941  prot = states[state_number]
1942  else:
1943  prot = prots[state_number]
1944 
1945  # transform clusters onto first
1946  if k > 0:
1947  model_index = self.cluster_obj.get_model_index_from_name(
1948  structure_name)
1949  transformation = self.cluster_obj.get_transformation_to_first_member(
1950  cl,
1951  model_index)
1952  rbs = set()
1953  for p in IMP.atom.get_leaves(prot):
1954  if not IMP.core.XYZR.get_is_setup(p):
1956  IMP.core.XYZR(p).set_radius(0.0001)
1957  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1958 
1960  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1961  rbs.add(rb)
1962  else:
1964  transformation)
1965  for rb in rbs:
1966  IMP.core.transform(rb,transformation)
1967 
1968  # add the density
1969  if density_custom_ranges:
1970  DensModule.add_subunits_density(prot)
1971 
1972  # pdb writing should be optimized!
1973  o = IMP.pmi.output.Output()
1974  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1975  o.write_pdb(dircluster + str(k) + ".pdb")
1976 
1977  if IMP.pmi.get_is_canonical(prot):
1978  # create a single-state System and write that
1980  h.set_name("System")
1981  h.add_child(prot)
1982  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1983  else:
1984  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
1985  o.write_rmf(dircluster + str(k) + ".rmf3")
1986  o.close_rmf(dircluster + str(k) + ".rmf3")
1987 
1988  del o
1989  # IMP.atom.destroy(prot)
1990 
1991  if density_custom_ranges:
1992  DensModule.write_mrc(path=dircluster)
1993  del DensModule
1994 
1995  if self.number_of_processes>1:
1996  self.comm.Barrier()
1997 
1998  def get_cluster_rmsd(self,cluster_num):
1999  if self.cluster_obj is None:
2000  raise Exception("Run clustering first")
2001  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2002 
2003  def save_objects(self, objects, file_name):
2004  import pickle
2005  outf = open(file_name, 'wb')
2006  pickle.dump(objects, outf)
2007  outf.close()
2008 
2009  def load_objects(self, file_name):
2010  import pickle
2011  inputf = open(file_name, 'rb')
2012  objects = pickle.load(inputf)
2013  inputf.close()
2014  return objects
A class to simplify create of constraints and movers for an IMP Hierarchy.
A macro for running all the basic operations of analysis.
Definition: macros.py:1365
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:49
Sample using molecular dynamics.
Definition: samplers.py:384
A class for reading stat files.
Definition: output.py:696
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:508
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:509
def __init__
Constructor.
Definition: macros.py:690
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:591
def __init__
Constructor.
Definition: macros.py:68
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1506
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:433
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
This class initializes the root node of the global IMP.atom.Hierarchy.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:1415
Representation of the system.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1424
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
Definition: macros.py:964
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:1110
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Definition: macros.py:1285
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:47
def __init__
Constructor.
Definition: macros.py:950
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Definition: macros.py:668
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9619
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and setup constraints for PMI objects.
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:477
Deprecated building macro - use BuildSystem()
Definition: macros.py:946
The general base class for IMP exceptions.
Definition: exception.h:49
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:600
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:455
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
Definition: macros.py:24
A dictionary-like wrapper for reading and storing sequence data.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1068
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:640
Sample using replica exchange.
Definition: samplers.py:490
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
Definition: macros.py:804
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27