IMP logo
IMP Reference Guide  2.15.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import string
22 import itertools
23 import warnings
24 import math
25 
26 
27 class _RMFRestraints(object):
28  """All restraints that are written out to the RMF file"""
29  def __init__(self, model, user_restraints):
30  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
31  self._user_restraints = user_restraints if user_restraints else []
32 
33  def __len__(self):
34  return (len(self._user_restraints)
35  + self._rmf_rs.get_number_of_restraints())
36 
37  def __bool__(self):
38  return len(self) > 0
39  __nonzero__ = __bool__ # Python 2 compatibility
40 
41  def __getitem__(self, i):
42  class FakePMIWrapper(object):
43  def __init__(self, r):
44  self.r = IMP.RestraintSet.get_from(r)
45 
46  def get_restraint(self):
47  return self.r
48 
49  lenuser = len(self._user_restraints)
50  if 0 <= i < lenuser:
51  return self._user_restraints[i]
52  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
53  r = self._rmf_rs.get_restraint(i - lenuser)
54  return FakePMIWrapper(r)
55  else:
56  raise IndexError("Out of range")
57 
58 
59 class ReplicaExchange0(object):
60  """A macro to help setup and run replica exchange.
61  Supports Monte Carlo and molecular dynamics.
62  Produces trajectory RMF files, best PDB structures,
63  and output stat files.
64  """
65  def __init__(self, model, root_hier,
66  sample_objects=None, # DEPRECATED
67  monte_carlo_sample_objects=None,
68  molecular_dynamics_sample_objects=None,
69  output_objects=[],
70  rmf_output_objects=None,
71  crosslink_restraints=None, # DEPRECATED
72  monte_carlo_temperature=1.0,
73  simulated_annealing=False,
74  simulated_annealing_minimum_temperature=1.0,
75  simulated_annealing_maximum_temperature=2.5,
76  simulated_annealing_minimum_temperature_nframes=100,
77  simulated_annealing_maximum_temperature_nframes=100,
78  replica_exchange_minimum_temperature=1.0,
79  replica_exchange_maximum_temperature=2.5,
80  replica_exchange_swap=True,
81  num_sample_rounds=1,
82  number_of_best_scoring_models=500,
83  monte_carlo_steps=10,
84  self_adaptive=False,
85  molecular_dynamics_steps=10,
86  molecular_dynamics_max_time_step=1.0,
87  number_of_frames=1000,
88  save_coordinates_mode="lowest_temperature",
89  nframes_write_coordinates=1,
90  write_initial_rmf=True,
91  initial_rmf_name_suffix="initial",
92  stat_file_name_suffix="stat",
93  best_pdb_name_suffix="model",
94  do_clean_first=True,
95  do_create_directories=True,
96  global_output_directory="./",
97  rmf_dir="rmfs/",
98  best_pdb_dir="pdbs/",
99  replica_stat_file_suffix="stat_replica",
100  em_object_for_rmf=None,
101  atomistic=False,
102  replica_exchange_object=None,
103  test_mode=False):
104  """Constructor.
105  @param model The IMP model
106  @param root_hier Top-level (System)hierarchy
107  @param monte_carlo_sample_objects Objects for MC sampling; a list of
108  structural components (generally the representation) that
109  will be moved and restraints with parameters that need to
110  be sampled.
111  For PMI2: just pass flat list of movers
112  @param molecular_dynamics_sample_objects Objects for MD sampling
113  For PMI2: just pass flat list of particles
114  @param output_objects A list of structural objects and restraints
115  that will be included in output (ie, statistics "stat"
116  files). Any object that provides a get_output() method
117  can be used here. If None is passed
118  the macro will not write stat files.
119  @param rmf_output_objects A list of structural objects and
120  restraints that will be included in rmf. Any object
121  that provides a get_output() method can be used here.
122  @param monte_carlo_temperature MC temp (may need to be optimized
123  based on post-sampling analysis)
124  @param simulated_annealing If True, perform simulated annealing
125  @param simulated_annealing_minimum_temperature Should generally be
126  the same as monte_carlo_temperature.
127  @param simulated_annealing_minimum_temperature_nframes Number of
128  frames to compute at minimum temperature.
129  @param simulated_annealing_maximum_temperature_nframes Number of
130  frames to compute at
131  temps > simulated_annealing_maximum_temperature.
132  @param replica_exchange_minimum_temperature Low temp for REX; should
133  generally be the same as monte_carlo_temperature.
134  @param replica_exchange_maximum_temperature High temp for REX
135  @param replica_exchange_swap Boolean, enable disable temperature
136  swap (Default=True)
137  @param num_sample_rounds Number of rounds of MC/MD per cycle
138  @param number_of_best_scoring_models Number of top-scoring PDB
139  models to keep around for analysis
140  @param monte_carlo_steps Number of MC steps per round
141  @param self_adaptive self adaptive scheme for Monte Carlo movers
142  @param molecular_dynamics_steps Number of MD steps per round
143  @param molecular_dynamics_max_time_step Max time step for MD
144  @param number_of_frames Number of REX frames to run
145  @param save_coordinates_mode string: how to save coordinates.
146  "lowest_temperature" (default) only the lowest temperatures
147  is saved
148  "25th_score" all replicas whose score is below the 25th
149  percentile
150  "50th_score" all replicas whose score is below the 50th
151  percentile
152  "75th_score" all replicas whose score is below the 75th
153  percentile
154  @param nframes_write_coordinates How often to write the coordinates
155  of a frame
156  @param write_initial_rmf Write the initial configuration
157  @param global_output_directory Folder that will be created to house
158  output.
159  @param test_mode Set to True to avoid writing any files, just test
160  one frame.
161  """
162  self.model = model
163  self.vars = {}
164 
165  # add check hierarchy is multistate
166  self.output_objects = output_objects
167  self.rmf_output_objects = rmf_output_objects
168  if (isinstance(root_hier, IMP.atom.Hierarchy)
169  and root_hier.get_name() == 'System'):
170  if self.output_objects is not None:
171  self.output_objects.append(
172  IMP.pmi.io.TotalScoreOutput(self.model))
173  if self.rmf_output_objects is not None:
174  self.rmf_output_objects.append(
175  IMP.pmi.io.TotalScoreOutput(self.model))
176  self.root_hier = root_hier
177  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
178  self.vars["number_of_states"] = len(states)
179  if len(states) > 1:
180  self.root_hiers = states
181  self.is_multi_state = True
182  else:
183  self.root_hier = root_hier
184  self.is_multi_state = False
185  else:
186  raise TypeError("Must provide System hierarchy (root_hier)")
187 
188  if crosslink_restraints:
190  "crosslink_restraints is deprecated and is ignored; "
191  "all cross-link restraints should be automatically "
192  "added to output RMF files")
193  self._rmf_restraints = _RMFRestraints(model, None)
194  self.em_object_for_rmf = em_object_for_rmf
195  self.monte_carlo_sample_objects = monte_carlo_sample_objects
196  self.vars["self_adaptive"] = self_adaptive
197  if sample_objects is not None:
199  "sample_objects is deprecated; use monte_carlo_sample_objects "
200  "(or molecular_dynamics_sample_objects) instead")
201  self.monte_carlo_sample_objects += sample_objects
202  self.molecular_dynamics_sample_objects = \
203  molecular_dynamics_sample_objects
204  self.replica_exchange_object = replica_exchange_object
205  self.molecular_dynamics_max_time_step = \
206  molecular_dynamics_max_time_step
207  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
208  self.vars["replica_exchange_minimum_temperature"] = \
209  replica_exchange_minimum_temperature
210  self.vars["replica_exchange_maximum_temperature"] = \
211  replica_exchange_maximum_temperature
212  self.vars["replica_exchange_swap"] = replica_exchange_swap
213  self.vars["simulated_annealing"] = simulated_annealing
214  self.vars["simulated_annealing_minimum_temperature"] = \
215  simulated_annealing_minimum_temperature
216  self.vars["simulated_annealing_maximum_temperature"] = \
217  simulated_annealing_maximum_temperature
218  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
219  simulated_annealing_minimum_temperature_nframes
220  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
221  simulated_annealing_maximum_temperature_nframes
222 
223  self.vars["num_sample_rounds"] = num_sample_rounds
224  self.vars[
225  "number_of_best_scoring_models"] = number_of_best_scoring_models
226  self.vars["monte_carlo_steps"] = monte_carlo_steps
227  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
228  self.vars["number_of_frames"] = number_of_frames
229  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
230  "50th_score", "75th_score"):
231  raise Exception("save_coordinates_mode has unrecognized value")
232  else:
233  self.vars["save_coordinates_mode"] = save_coordinates_mode
234  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
235  self.vars["write_initial_rmf"] = write_initial_rmf
236  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
237  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
238  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
239  self.vars["do_clean_first"] = do_clean_first
240  self.vars["do_create_directories"] = do_create_directories
241  self.vars["global_output_directory"] = global_output_directory
242  self.vars["rmf_dir"] = rmf_dir
243  self.vars["best_pdb_dir"] = best_pdb_dir
244  self.vars["atomistic"] = atomistic
245  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
246  self.vars["geometries"] = None
247  self.test_mode = test_mode
248 
249  def add_geometries(self, geometries):
250  if self.vars["geometries"] is None:
251  self.vars["geometries"] = list(geometries)
252  else:
253  self.vars["geometries"].extend(geometries)
254 
255  def show_info(self):
256  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, "
257  "rmfs/*.rmf3 for each replica ")
258  print("--- it stores the best scoring pdb models in pdbs/")
259  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
260  "lowest temperature")
261  print("--- variables:")
262  keys = list(self.vars.keys())
263  keys.sort()
264  for v in keys:
265  print("------", v.ljust(30), self.vars[v])
266 
267  def get_replica_exchange_object(self):
268  return self.replica_exchange_object
269 
270  def _add_provenance(self, sampler_md, sampler_mc):
271  """Record details about the sampling in the IMP Hierarchies"""
272  iterations = 0
273  if sampler_md:
274  method = "Molecular Dynamics"
275  iterations += self.vars["molecular_dynamics_steps"]
276  if sampler_mc:
277  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
278  iterations += self.vars["monte_carlo_steps"]
279  # If no sampling is actually done, no provenance to write
280  if iterations == 0 or self.vars["number_of_frames"] == 0:
281  return
282  iterations *= self.vars["num_sample_rounds"]
283 
284  pi = self.model.add_particle("sampling")
286  self.model, pi, method, self.vars["number_of_frames"],
287  iterations)
288  p.set_number_of_replicas(
289  self.replica_exchange_object.get_number_of_replicas())
290  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
291  IMP.core.add_provenance(self.model, self.root_hier, p)
292 
293  def execute_macro(self):
294  temp_index_factor = 100000.0
295  samplers = []
296  sampler_mc = None
297  sampler_md = None
298  if self.monte_carlo_sample_objects is not None:
299  print("Setting up MonteCarlo")
300  sampler_mc = IMP.pmi.samplers.MonteCarlo(
301  self.model, self.monte_carlo_sample_objects,
302  self.vars["monte_carlo_temperature"])
303  if self.vars["simulated_annealing"]:
304  tmin = self.vars["simulated_annealing_minimum_temperature"]
305  tmax = self.vars["simulated_annealing_maximum_temperature"]
306  nfmin = self.vars[
307  "simulated_annealing_minimum_temperature_nframes"]
308  nfmax = self.vars[
309  "simulated_annealing_maximum_temperature_nframes"]
310  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
311  if self.vars["self_adaptive"]:
312  sampler_mc.set_self_adaptive(
313  isselfadaptive=self.vars["self_adaptive"])
314  if self.output_objects is not None:
315  self.output_objects.append(sampler_mc)
316  if self.rmf_output_objects is not None:
317  self.rmf_output_objects.append(sampler_mc)
318  samplers.append(sampler_mc)
319 
320  if self.molecular_dynamics_sample_objects is not None:
321  print("Setting up MolecularDynamics")
323  self.model, self.molecular_dynamics_sample_objects,
324  self.vars["monte_carlo_temperature"],
325  maximum_time_step=self.molecular_dynamics_max_time_step)
326  if self.vars["simulated_annealing"]:
327  tmin = self.vars["simulated_annealing_minimum_temperature"]
328  tmax = self.vars["simulated_annealing_maximum_temperature"]
329  nfmin = self.vars[
330  "simulated_annealing_minimum_temperature_nframes"]
331  nfmax = self.vars[
332  "simulated_annealing_maximum_temperature_nframes"]
333  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
334  if self.output_objects is not None:
335  self.output_objects.append(sampler_md)
336  if self.rmf_output_objects is not None:
337  self.rmf_output_objects.append(sampler_md)
338  samplers.append(sampler_md)
339 # -------------------------------------------------------------------------
340 
341  print("Setting up ReplicaExchange")
343  self.model, self.vars["replica_exchange_minimum_temperature"],
344  self.vars["replica_exchange_maximum_temperature"], samplers,
345  replica_exchange_object=self.replica_exchange_object)
346  self.replica_exchange_object = rex.rem
347 
348  myindex = rex.get_my_index()
349  if self.output_objects is not None:
350  self.output_objects.append(rex)
351  if self.rmf_output_objects is not None:
352  self.rmf_output_objects.append(rex)
353  # must reset the minimum temperature due to the
354  # different binary length of rem.get_my_parameter double and python
355  # float
356  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
357 
358 # -------------------------------------------------------------------------
359 
360  globaldir = self.vars["global_output_directory"] + "/"
361  rmf_dir = globaldir + self.vars["rmf_dir"]
362  pdb_dir = globaldir + self.vars["best_pdb_dir"]
363 
364  if not self.test_mode:
365  if self.vars["do_clean_first"]:
366  pass
367 
368  if self.vars["do_create_directories"]:
369 
370  try:
371  os.makedirs(globaldir)
372  except: # noqa: E722
373  pass
374  try:
375  os.makedirs(rmf_dir)
376  except: # noqa: E722
377  pass
378 
379  if not self.is_multi_state:
380  try:
381  os.makedirs(pdb_dir)
382  except: # noqa: E722
383  pass
384  else:
385  for n in range(self.vars["number_of_states"]):
386  try:
387  os.makedirs(pdb_dir + "/" + str(n))
388  except: # noqa: E722
389  pass
390 
391 # -------------------------------------------------------------------------
392 
394  if self.output_objects is not None:
395  self.output_objects.append(sw)
396  if self.rmf_output_objects is not None:
397  self.rmf_output_objects.append(sw)
398 
399  print("Setting up stat file")
400  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
401  low_temp_stat_file = globaldir + \
402  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
403 
404  # Ensure model is updated before saving init files
405  if not self.test_mode:
406  self.model.update()
407 
408  if not self.test_mode:
409  if self.output_objects is not None:
410  output.init_stat2(low_temp_stat_file,
411  self.output_objects,
412  extralabels=["rmf_file", "rmf_frame_index"])
413  else:
414  print("Stat file writing is disabled")
415 
416  if self.rmf_output_objects is not None:
417  print("Stat info being written in the rmf file")
418 
419  print("Setting up replica stat file")
420  replica_stat_file = globaldir + \
421  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
422  if not self.test_mode:
423  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
424 
425  if not self.test_mode:
426  print("Setting up best pdb files")
427  if not self.is_multi_state:
428  if self.vars["number_of_best_scoring_models"] > 0:
429  output.init_pdb_best_scoring(
430  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
431  self.root_hier,
432  self.vars["number_of_best_scoring_models"],
433  replica_exchange=True)
434  output.write_psf(
435  pdb_dir + "/" + "model.psf",
436  pdb_dir + "/" +
437  self.vars["best_pdb_name_suffix"] + ".0.pdb")
438  else:
439  if self.vars["number_of_best_scoring_models"] > 0:
440  for n in range(self.vars["number_of_states"]):
441  output.init_pdb_best_scoring(
442  pdb_dir + "/" + str(n) + "/" +
443  self.vars["best_pdb_name_suffix"],
444  self.root_hiers[n],
445  self.vars["number_of_best_scoring_models"],
446  replica_exchange=True)
447  output.write_psf(
448  pdb_dir + "/" + str(n) + "/" + "model.psf",
449  pdb_dir + "/" + str(n) + "/" +
450  self.vars["best_pdb_name_suffix"] + ".0.pdb")
451 # ---------------------------------------------
452 
453  if self.em_object_for_rmf is not None:
454  output_hierarchies = [
455  self.root_hier,
456  self.em_object_for_rmf.get_density_as_hierarchy(
457  )]
458  else:
459  output_hierarchies = [self.root_hier]
460 
461  if not self.test_mode:
462  print("Setting up and writing initial rmf coordinate file")
463  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
464  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
465  output_hierarchies,
466  listofobjects=self.rmf_output_objects)
467  if self._rmf_restraints:
468  output.add_restraints_to_rmf(
469  init_suffix + "." + str(myindex) + ".rmf3",
470  self._rmf_restraints)
471  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
472  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
473 
474  if not self.test_mode:
475  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
476 
477  self._add_provenance(sampler_md, sampler_mc)
478 
479  if not self.test_mode:
480  print("Setting up production rmf files")
481  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
482  output.init_rmf(rmfname, output_hierarchies,
483  geometries=self.vars["geometries"],
484  listofobjects=self.rmf_output_objects)
485 
486  if self._rmf_restraints:
487  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
488 
489  ntimes_at_low_temp = 0
490 
491  if myindex == 0:
492  self.show_info()
493  self.replica_exchange_object.set_was_used(True)
494  nframes = self.vars["number_of_frames"]
495  if self.test_mode:
496  nframes = 1
497  for i in range(nframes):
498  if self.test_mode:
499  score = 0.
500  else:
501  for nr in range(self.vars["num_sample_rounds"]):
502  if sampler_md is not None:
503  sampler_md.optimize(
504  self.vars["molecular_dynamics_steps"])
505  if sampler_mc is not None:
506  sampler_mc.optimize(self.vars["monte_carlo_steps"])
508  self.model).evaluate(False)
509  mpivs.set_value("score", score)
510  output.set_output_entry("score", score)
511 
512  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
513 
514  if self.vars["save_coordinates_mode"] == "lowest_temperature":
515  save_frame = (min_temp_index == my_temp_index)
516  elif self.vars["save_coordinates_mode"] == "25th_score":
517  score_perc = mpivs.get_percentile("score")
518  save_frame = (score_perc*100.0 <= 25.0)
519  elif self.vars["save_coordinates_mode"] == "50th_score":
520  score_perc = mpivs.get_percentile("score")
521  save_frame = (score_perc*100.0 <= 50.0)
522  elif self.vars["save_coordinates_mode"] == "75th_score":
523  score_perc = mpivs.get_percentile("score")
524  save_frame = (score_perc*100.0 <= 75.0)
525 
526  # Ensure model is updated before saving output files
527  if save_frame and not self.test_mode:
528  self.model.update()
529 
530  if save_frame:
531  print("--- frame %s score %s " % (str(i), str(score)))
532 
533  if not self.test_mode:
534  if i % self.vars["nframes_write_coordinates"] == 0:
535  print('--- writing coordinates')
536  if self.vars["number_of_best_scoring_models"] > 0:
537  output.write_pdb_best_scoring(score)
538  output.write_rmf(rmfname)
539  output.set_output_entry("rmf_file", rmfname)
540  output.set_output_entry("rmf_frame_index",
541  ntimes_at_low_temp)
542  else:
543  output.set_output_entry("rmf_file", rmfname)
544  output.set_output_entry("rmf_frame_index", '-1')
545  if self.output_objects is not None:
546  output.write_stat2(low_temp_stat_file)
547  ntimes_at_low_temp += 1
548 
549  if not self.test_mode:
550  output.write_stat2(replica_stat_file)
551  if self.vars["replica_exchange_swap"]:
552  rex.swap_temp(i, score)
553  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
554  p.add_replica_exchange(state, self)
555 
556  if not self.test_mode:
557  print("closing production rmf files")
558  output.close_rmf(rmfname)
559 
560 
561 class BuildSystem(object):
562  """A macro to build a IMP::pmi::topology::System based on a
563  TopologyReader object.
564 
565  Easily create multi-state systems by calling this macro
566  repeatedly with different TopologyReader objects!
567  A useful function is get_molecules() which returns the PMI Molecules
568  grouped by state as a dictionary with key = (molecule name),
569  value = IMP.pmi.topology.Molecule
570  Quick multi-state system:
571  @code{.python}
572  model = IMP.Model()
573  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
574  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
575  bs = IMP.pmi.macros.BuildSystem(model)
576  bs.add_state(reader1)
577  bs.add_state(reader2)
578  bs.execute_macro() # build everything including degrees of freedom
579  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
580  ### now you have a two state system, you add restraints etc
581  @endcode
582  @note The "domain name" entry of the topology reader is not used.
583  All molecules are set up by the component name, but split into rigid bodies
584  as requested.
585  """
586 
587  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
588  'RNA': IMP.pmi.alphabets.rna}
589 
590  def __init__(self, model, sequence_connectivity_scale=4.0,
591  force_create_gmm_files=False, resolutions=[1, 10]):
592  """Constructor
593  @param model An IMP Model
594  @param sequence_connectivity_scale For scaling the connectivity
595  restraint
596  @param force_create_gmm_files If True, will sample and create GMMs
597  no matter what. If False, will only sample if the
598  files don't exist. If number of Gaussians is zero, won't
599  do anything.
600  @param resolutions The resolutions to build for structured regions
601  """
602  self.model = model
603  self.system = IMP.pmi.topology.System(self.model)
604  self._readers = [] # the TopologyReaders (one per state)
605  # TempResidues for each domain key=unique name,
606  # value=(atomic_res,non_atomic_res).
607  self._domain_res = []
608  self._domains = [] # key = domain unique name, value = Component
609  self.force_create_gmm_files = force_create_gmm_files
610  self.resolutions = resolutions
611 
612  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None):
613  """Add a state using the topology info in a
614  IMP::pmi::topology::TopologyReader object.
615  When you are done adding states, call execute_macro()
616  @param reader The TopologyReader object
617  @param fasta_name_map dictionary for converting protein names
618  found in the fasta file
619  """
620  state = self.system.create_state()
621  self._readers.append(reader)
622  # key is unique name, value is (atomic res, nonatomicres)
623  these_domain_res = {}
624  these_domains = {} # key is unique name, value is _Component
625  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
626  numchain = 0
627 
628  # setup representation
629  # loop over molecules, copies, then domains
630  for molname in reader.get_molecules():
631  copies = reader.get_molecules()[molname].domains
632  for nc, copyname in enumerate(copies):
633  print("BuildSystem.add_state: setting up molecule %s copy "
634  "number %s" % (molname, str(nc)))
635  copy = copies[copyname]
636  # option to not rename chains
637  if keep_chain_id:
638  all_chains = [c for c in copy if c.chain is not None]
639  if all_chains:
640  chain_id = all_chains[0].chain
641  else:
642  chain_id = chain_ids[numchain]
643  warnings.warn(
644  "No PDBs specified for %s, so keep_chain_id has "
645  "no effect; using default chain ID '%s'"
646  % (molname, chain_id), IMP.pmi.ParameterWarning)
647  else:
648  chain_id = chain_ids[numchain]
649  if nc == 0:
650  alphabet = IMP.pmi.alphabets.amino_acid
651  fasta_flag = copy[0].fasta_flag
652  if fasta_flag in self._alphabets:
653  alphabet = self._alphabets[fasta_flag]
655  copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
656  print("BuildSystem.add_state: molecule %s sequence has "
657  "%s residues" % (molname, len(seq)))
658  orig_mol = state.create_molecule(molname, seq, chain_id,
659  alphabet=alphabet)
660  mol = orig_mol
661  numchain += 1
662  else:
663  print("BuildSystem.add_state: creating a copy for "
664  "molecule %s" % molname)
665  mol = orig_mol.create_copy(chain_id)
666  numchain += 1
667 
668  for domainnumber, domain in enumerate(copy):
669  print("BuildSystem.add_state: ---- setting up domain %s "
670  "of molecule %s" % (domainnumber, molname))
671  # we build everything in the residue range, even if it
672  # extends beyond what's in the actual PDB file
673  these_domains[domain.get_unique_name()] = domain
674  if domain.residue_range == [] or \
675  domain.residue_range is None:
676  domain_res = mol.get_residues()
677  else:
678  start = domain.residue_range[0]+domain.pdb_offset
679  if domain.residue_range[1] == 'END':
680  end = len(mol.sequence)
681  else:
682  end = domain.residue_range[1]+domain.pdb_offset
683  domain_res = mol.residue_range(start-1, end-1)
684  print("BuildSystem.add_state: -------- domain %s of "
685  "molecule %s extends from residue %s to "
686  "residue %s "
687  % (domainnumber, molname, start, end))
688  if domain.pdb_file == "BEADS":
689  print("BuildSystem.add_state: -------- domain %s of "
690  "molecule %s represented by BEADS "
691  % (domainnumber, molname))
692  mol.add_representation(
693  domain_res,
694  resolutions=[domain.bead_size],
695  setup_particles_as_densities=(
696  domain.em_residues_per_gaussian != 0),
697  color=domain.color)
698  these_domain_res[domain.get_unique_name()] = \
699  (set(), domain_res)
700  elif domain.pdb_file == "IDEAL_HELIX":
701  print("BuildSystem.add_state: -------- domain %s of "
702  "molecule %s represented by IDEAL_HELIX "
703  % (domainnumber, molname))
704  emper = domain.em_residues_per_gaussian
705  mol.add_representation(
706  domain_res,
707  resolutions=self.resolutions,
708  ideal_helix=True,
709  density_residues_per_component=emper,
710  density_prefix=domain.density_prefix,
711  density_force_compute=self.force_create_gmm_files,
712  color=domain.color)
713  these_domain_res[domain.get_unique_name()] = \
714  (domain_res, set())
715  else:
716  print("BuildSystem.add_state: -------- domain %s of "
717  "molecule %s represented by pdb file %s "
718  % (domainnumber, molname, domain.pdb_file))
719  domain_atomic = mol.add_structure(domain.pdb_file,
720  domain.chain,
721  domain.residue_range,
722  domain.pdb_offset,
723  soft_check=True)
724  domain_non_atomic = domain_res - domain_atomic
725  if not domain.em_residues_per_gaussian:
726  mol.add_representation(
727  domain_atomic, resolutions=self.resolutions,
728  color=domain.color)
729  if len(domain_non_atomic) > 0:
730  mol.add_representation(
731  domain_non_atomic,
732  resolutions=[domain.bead_size],
733  color=domain.color)
734  else:
735  print("BuildSystem.add_state: -------- domain %s "
736  "of molecule %s represented by gaussians "
737  % (domainnumber, molname))
738  emper = domain.em_residues_per_gaussian
739  creategmm = self.force_create_gmm_files
740  mol.add_representation(
741  domain_atomic,
742  resolutions=self.resolutions,
743  density_residues_per_component=emper,
744  density_prefix=domain.density_prefix,
745  density_force_compute=creategmm,
746  color=domain.color)
747  if len(domain_non_atomic) > 0:
748  mol.add_representation(
749  domain_non_atomic,
750  resolutions=[domain.bead_size],
751  setup_particles_as_densities=True,
752  color=domain.color)
753  these_domain_res[domain.get_unique_name()] = (
754  domain_atomic, domain_non_atomic)
755  self._domain_res.append(these_domain_res)
756  self._domains.append(these_domains)
757  print('BuildSystem.add_state: State', len(self.system.states), 'added')
758 
759  def get_molecules(self):
760  """Return list of all molecules grouped by state.
761  For each state, it's a dictionary of Molecules where key is the
762  molecule name
763  """
764  return [s.get_molecules() for s in self.system.get_states()]
765 
766  def get_molecule(self, molname, copy_index=0, state_index=0):
767  return self.system.get_states()[state_index].get_molecules()[
768  molname][copy_index]
769 
770  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
771  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
772  """Builds representations and sets up degrees of freedom"""
773  print("BuildSystem.execute_macro: building representations")
774  self.root_hier = self.system.build()
775 
776  print("BuildSystem.execute_macro: setting up degrees of freedom")
777  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
778  for nstate, reader in enumerate(self._readers):
779  rbs = reader.get_rigid_bodies()
780  srbs = reader.get_super_rigid_bodies()
781  csrbs = reader.get_chains_of_super_rigid_bodies()
782 
783  # add rigid bodies
784  domains_in_rbs = set()
785  for rblist in rbs:
786  print("BuildSystem.execute_macro: -------- building rigid "
787  "body %s" % (str(rblist)))
788  all_res = IMP.pmi.tools.OrderedSet()
789  bead_res = IMP.pmi.tools.OrderedSet()
790  for dname in rblist:
791  domain = self._domains[nstate][dname]
792  print("BuildSystem.execute_macro: -------- adding %s"
793  % (str(dname)))
794  all_res |= self._domain_res[nstate][dname][0]
795  bead_res |= self._domain_res[nstate][dname][1]
796  domains_in_rbs.add(dname)
797  all_res |= bead_res
798  print("BuildSystem.execute_macro: -------- creating rigid "
799  "body with max_trans %s max_rot %s "
800  "non_rigid_max_trans %s"
801  % (str(max_rb_trans), str(max_rb_rot),
802  str(max_bead_trans)))
803  self.dof.create_rigid_body(all_res,
804  nonrigid_parts=bead_res,
805  max_trans=max_rb_trans,
806  max_rot=max_rb_rot,
807  nonrigid_max_trans=max_bead_trans)
808 
809  # if you have any domains not in an RB, set them as flexible beads
810  for dname, domain in self._domains[nstate].items():
811  if dname not in domains_in_rbs:
812  if domain.pdb_file != "BEADS":
813  warnings.warn(
814  "No rigid bodies set for %s. Residues read from "
815  "the PDB file will not be sampled - only regions "
816  "missing from the PDB will be treated flexibly. "
817  "To sample the entire sequence, use BEADS instead "
818  "of a PDB file name" % dname,
820  self.dof.create_flexible_beads(
821  self._domain_res[nstate][dname][1],
822  max_trans=max_bead_trans)
823 
824  # add super rigid bodies
825  for srblist in srbs:
826  print("BuildSystem.execute_macro: -------- building "
827  "super rigid body %s" % (str(srblist)))
828  all_res = IMP.pmi.tools.OrderedSet()
829  for dname in srblist:
830  print("BuildSystem.execute_macro: -------- adding %s"
831  % (str(dname)))
832  all_res |= self._domain_res[nstate][dname][0]
833  all_res |= self._domain_res[nstate][dname][1]
834 
835  print("BuildSystem.execute_macro: -------- creating super "
836  "rigid body with max_trans %s max_rot %s "
837  % (str(max_srb_trans), str(max_srb_rot)))
838  self.dof.create_super_rigid_body(
839  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
840 
841  # add chains of super rigid bodies
842  for csrblist in csrbs:
843  all_res = IMP.pmi.tools.OrderedSet()
844  for dname in csrblist:
845  all_res |= self._domain_res[nstate][dname][0]
846  all_res |= self._domain_res[nstate][dname][1]
847  all_res = list(all_res)
848  all_res.sort(key=lambda r: r.get_index())
849  self.dof.create_main_chain_mover(all_res)
850  return self.root_hier, self.dof
851 
852 
853 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
854 class AnalysisReplicaExchange0(object):
855  """A macro for running all the basic operations of analysis.
856  Includes clustering, precision analysis, and making ensemble density maps.
857  A number of plots are also supported.
858  """
859  def __init__(self, model,
860  merge_directories=["./"],
861  stat_file_name_suffix="stat",
862  best_pdb_name_suffix="model",
863  do_clean_first=True,
864  do_create_directories=True,
865  global_output_directory="output/",
866  replica_stat_file_suffix="stat_replica",
867  global_analysis_result_directory="./analysis/",
868  test_mode=False):
869  """Constructor.
870  @param model The IMP model
871  @param stat_file_name_suffix
872  @param merge_directories The directories containing output files
873  @param best_pdb_name_suffix
874  @param do_clean_first
875  @param do_create_directories
876  @param global_output_directory Where everything is
877  @param replica_stat_file_suffix
878  @param global_analysis_result_directory
879  @param test_mode If True, nothing is changed on disk
880  """
881 
882  try:
883  from mpi4py import MPI
884  self.comm = MPI.COMM_WORLD
885  self.rank = self.comm.Get_rank()
886  self.number_of_processes = self.comm.size
887  except ImportError:
888  self.rank = 0
889  self.number_of_processes = 1
890 
891  self.test_mode = test_mode
892  self._protocol_output = []
893  self.cluster_obj = None
894  self.model = model
895  stat_dir = global_output_directory
896  self.stat_files = []
897  # it contains the position of the root directories
898  for rd in merge_directories:
899  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
900  if len(stat_files) == 0:
901  warnings.warn("no stat files found in %s"
902  % os.path.join(rd, stat_dir),
904  self.stat_files += stat_files
905 
906  def add_protocol_output(self, p):
907  """Capture details of the modeling protocol.
908  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
909  """
910  # Assume last state is the one we're interested in
911  self._protocol_output.append((p, p._last_state))
912 
913  def get_modeling_trajectory(self,
914  score_key="SimplifiedModel_Total_Score_None",
915  rmf_file_key="rmf_file",
916  rmf_file_frame_key="rmf_frame_index",
917  outputdir="./",
918  get_every=1,
919  nframes_trajectory=10000):
920  """ Get a trajectory of the modeling run, for generating
921  demonstrative movies
922 
923  @param score_key The score for ranking models
924  @param rmf_file_key Key pointing to RMF filename
925  @param rmf_file_frame_key Key pointing to RMF frame number
926  @param outputdir The local output directory used in the run
927  @param get_every Extract every nth frame
928  @param nframes_trajectory Total number of frames of the trajectory
929  """
930  import math
931 
932  trajectory_models = IMP.pmi.io.get_trajectory_models(
933  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
934  get_every)
935  score_list = list(map(float, trajectory_models[2]))
936 
937  max_score = max(score_list)
938  min_score = min(score_list)
939 
940  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
941  for i in range(nframes_trajectory)]
942  binned_scores = [None]*nframes_trajectory
943  binned_model_indexes = [-1]*nframes_trajectory
944 
945  for model_index, s in enumerate(score_list):
946  bins_score_diffs = [abs(s-b) for b in bins]
947  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
948  if binned_scores[bin_index] is None:
949  binned_scores[bin_index] = s
950  binned_model_indexes[bin_index] = model_index
951  else:
952  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
953  new_diff = abs(s-bins[bin_index])
954  if new_diff < old_diff:
955  binned_scores[bin_index] = s
956  binned_model_indexes[bin_index] = model_index
957 
958  print(binned_scores)
959  print(binned_model_indexes)
960 
961  def _expand_ambiguity(self, prot, d):
962  """If using PMI2, expand the dictionary to include copies as
963  ambiguous options
964 
965  This also keeps the states separate.
966  """
967  newdict = {}
968  for key in d:
969  val = d[key]
970  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
971  newdict[key] = val
972  continue
973  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
974  if isinstance(val, tuple):
975  start = val[0]
976  stop = val[1]
977  name = val[2]
978  else:
979  start = 1
980  stop = -1
981  name = val
982  for nst in range(len(states)):
983  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
984  copies = sel.get_selected_particles(with_representation=False)
985  if len(copies) > 1:
986  for nc in range(len(copies)):
987  if len(states) > 1:
988  newdict['%s.%i..%i' % (name, nst, nc)] = \
989  (start, stop, name, nc, nst)
990  else:
991  newdict['%s..%i' % (name, nc)] = \
992  (start, stop, name, nc, nst)
993  else:
994  newdict[key] = val
995  return newdict
996 
997  def clustering(self,
998  score_key="SimplifiedModel_Total_Score_None",
999  rmf_file_key="rmf_file",
1000  rmf_file_frame_key="rmf_frame_index",
1001  state_number=0,
1002  prefiltervalue=None,
1003  feature_keys=[],
1004  outputdir="./",
1005  alignment_components=None,
1006  number_of_best_scoring_models=10,
1007  rmsd_calculation_components=None,
1008  distance_matrix_file='distances.mat',
1009  load_distance_matrix_file=False,
1010  skip_clustering=False,
1011  number_of_clusters=1,
1012  display_plot=False,
1013  exit_after_display=True,
1014  get_every=1,
1015  first_and_last_frames=None,
1016  density_custom_ranges=None,
1017  write_pdb_with_centered_coordinates=False,
1018  voxel_size=5.0):
1019  """Get the best scoring models, compute a distance matrix,
1020  cluster them, and create density maps.
1021 
1022  Tuple format: "molname" just the molecule,
1023  or (start,stop,molname,copy_num(optional),state_num(optional)
1024  Can pass None for copy or state to ignore that field.
1025  If you don't pass a specific copy number
1026 
1027  @param score_key The score for ranking models.
1028  Use "Total_Score" instead of default for PMI2.
1029  @param rmf_file_key Key pointing to RMF filename
1030  @param rmf_file_frame_key Key pointing to RMF frame number
1031  @param state_number State number to analyze
1032  @param prefiltervalue Only include frames where the
1033  score key is below this value
1034  @param feature_keys Keywords for which you want to
1035  calculate average, medians, etc.
1036  If you pass "Keyname" it'll include everything that matches
1037  "*Keyname*"
1038  @param outputdir The local output directory used in
1039  the run
1040  @param alignment_components Dictionary with keys=groupname,
1041  values are tuples for aligning the structures
1042  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1043  @param number_of_best_scoring_models Num models to keep per run
1044  @param rmsd_calculation_components For calculating RMSD
1045  (same format as alignment_components)
1046  @param distance_matrix_file Where to store/read the
1047  distance matrix
1048  @param load_distance_matrix_file Try to load the distance
1049  matrix file
1050  @param skip_clustering Just extract the best scoring
1051  models and save the pdbs
1052  @param number_of_clusters Number of k-means clusters
1053  @param display_plot Display the distance matrix
1054  @param exit_after_display Exit after displaying distance
1055  matrix
1056  @param get_every Extract every nth frame
1057  @param first_and_last_frames A tuple with the first and last
1058  frames to be analyzed. Values are percentages!
1059  Default: get all frames
1060  @param density_custom_ranges For density calculation
1061  (same format as alignment_components)
1062  @param write_pdb_with_centered_coordinates
1063  @param voxel_size Used for the density output
1064  """
1065  # Track provenance information to be added to each output model
1066  prov = []
1067  self._outputdir = os.path.abspath(outputdir)
1068  self._number_of_clusters = number_of_clusters
1069  for p, state in self._protocol_output:
1070  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1071 
1072  if self.test_mode:
1073  return
1074 
1075  if self.rank == 0:
1076  try:
1077  os.mkdir(outputdir)
1078  except: # noqa: E722
1079  pass
1080 
1081  if not load_distance_matrix_file:
1082  if len(self.stat_files) == 0:
1083  print("ERROR: no stat file found in the given path")
1084  return
1085  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1086  self.stat_files, self.number_of_processes)[self.rank]
1087 
1088  # read ahead to check if you need the PMI2 score key instead
1089  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1090  orig_score_key = score_key
1091  if score_key not in po.get_keys():
1092  if 'Total_Score' in po.get_keys():
1093  score_key = 'Total_Score'
1094  warnings.warn(
1095  "Using 'Total_Score' instead of "
1096  "'SimplifiedModel_Total_Score_None' for the score key",
1098  for k in [orig_score_key, score_key, rmf_file_key,
1099  rmf_file_frame_key]:
1100  if k in feature_keys:
1101  warnings.warn(
1102  "no need to pass " + k + " to feature_keys.",
1104  feature_keys.remove(k)
1105 
1106  best_models = IMP.pmi.io.get_best_models(
1107  my_stat_files, score_key, feature_keys, rmf_file_key,
1108  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1109  rmf_file_list = best_models[0]
1110  rmf_file_frame_list = best_models[1]
1111  score_list = best_models[2]
1112  feature_keyword_list_dict = best_models[3]
1113 
1114 # ------------------------------------------------------------------------
1115 # collect all the files and scores
1116 # ------------------------------------------------------------------------
1117 
1118  if self.number_of_processes > 1:
1119  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1120  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1121  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1122  rmf_file_frame_list)
1123  for k in feature_keyword_list_dict:
1124  feature_keyword_list_dict[k] = \
1126  feature_keyword_list_dict[k])
1127 
1128  # sort by score and get the best scoring ones
1129  score_rmf_tuples = list(zip(score_list,
1130  rmf_file_list,
1131  rmf_file_frame_list,
1132  list(range(len(score_list)))))
1133 
1134  if density_custom_ranges:
1135  for k in density_custom_ranges:
1136  if not isinstance(density_custom_ranges[k], list):
1137  raise Exception("Density custom ranges: values must "
1138  "be lists of tuples")
1139 
1140  # keep subset of frames if requested
1141  if first_and_last_frames is not None:
1142  nframes = len(score_rmf_tuples)
1143  first_frame = int(first_and_last_frames[0] * nframes)
1144  last_frame = int(first_and_last_frames[1] * nframes)
1145  if last_frame > len(score_rmf_tuples):
1146  last_frame = -1
1147  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1148 
1149  # sort RMFs by the score_key in ascending order, and store the rank
1150  best_score_rmf_tuples = sorted(
1151  score_rmf_tuples,
1152  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1153  best_score_rmf_tuples = [t+(n,) for n, t in
1154  enumerate(best_score_rmf_tuples)]
1155  # Note in the provenance info that we only kept best-scoring models
1156  prov.append(IMP.pmi.io.FilterProvenance(
1157  "Best scoring", 0, number_of_best_scoring_models))
1158  # sort the feature scores in the same way
1159  best_score_feature_keyword_list_dict = defaultdict(list)
1160  for tpl in best_score_rmf_tuples:
1161  index = tpl[3]
1162  for f in feature_keyword_list_dict:
1163  best_score_feature_keyword_list_dict[f].append(
1164  feature_keyword_list_dict[f][index])
1165  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1166  best_score_rmf_tuples,
1167  self.number_of_processes)[self.rank]
1168 
1169  # expand the dictionaries to include ambiguous copies
1170  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1171  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1172  if IMP.pmi.get_is_canonical(prot_ahead):
1173  if rmsd_calculation_components is not None:
1174  tmp = self._expand_ambiguity(
1175  prot_ahead, rmsd_calculation_components)
1176  if tmp != rmsd_calculation_components:
1177  print('Detected ambiguity, expand rmsd components to',
1178  tmp)
1179  rmsd_calculation_components = tmp
1180  if alignment_components is not None:
1181  tmp = self._expand_ambiguity(prot_ahead,
1182  alignment_components)
1183  if tmp != alignment_components:
1184  print('Detected ambiguity, expand alignment '
1185  'components to', tmp)
1186  alignment_components = tmp
1187 
1188 # -------------------------------------------------------------
1189 # read the coordinates
1190 # ------------------------------------------------------------
1191  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1192  self.model, my_best_score_rmf_tuples[0],
1193  rmsd_calculation_components, state_number=state_number)
1195  self.model, my_best_score_rmf_tuples, alignment_components,
1196  rmsd_calculation_components, state_number=state_number)
1197 
1198  # note! the coordinates are simply float tuples, NOT decorators,
1199  # NOT Vector3D, NOR particles, because these object cannot be
1200  # serialized. We need serialization
1201  # for the parallel computation based on mpi.
1202 
1203  # dict:key=component name,val=coords per hit
1204  all_coordinates = got_coords[0]
1205 
1206  # same as above, limited to alignment bits
1207  alignment_coordinates = got_coords[1]
1208 
1209  # same as above, limited to RMSD bits
1210  rmsd_coordinates = got_coords[2]
1211 
1212  # dictionary with key=RMF, value=score rank
1213  rmf_file_name_index_dict = got_coords[3]
1214 
1215  # RMF file per hit
1216  all_rmf_file_names = got_coords[4]
1217 
1218 # ------------------------------------------------------------------------
1219 # optionally don't compute distance matrix or cluster, just write top files
1220 # ------------------------------------------------------------------------
1221  if skip_clustering:
1222  if density_custom_ranges:
1223  DensModule = IMP.pmi.analysis.GetModelDensity(
1224  density_custom_ranges, voxel=voxel_size)
1225 
1226  dircluster = os.path.join(outputdir,
1227  "all_models."+str(self.rank))
1228  try:
1229  os.mkdir(outputdir)
1230  except: # noqa: E722
1231  pass
1232  try:
1233  os.mkdir(dircluster)
1234  except: # noqa: E722
1235  pass
1236  clusstat = open(os.path.join(
1237  dircluster, "stat."+str(self.rank)+".out"), "w")
1238  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1239  rmf_name = tpl[1]
1240  rmf_frame_number = tpl[2]
1241  tmp_dict = {}
1242  index = tpl[4]
1243  for key in best_score_feature_keyword_list_dict:
1244  tmp_dict[key] = \
1245  best_score_feature_keyword_list_dict[key][index]
1246 
1247  if cnt == 0:
1248  prots, rs = \
1249  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1250  self.model, rmf_frame_number, rmf_name)
1251  else:
1252  linking_successful = \
1253  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1254  self.model, prots, rs, rmf_frame_number,
1255  rmf_name)
1256  if not linking_successful:
1257  continue
1258 
1259  if not prots:
1260  continue
1261 
1262  if IMP.pmi.get_is_canonical(prots[0]):
1263  states = IMP.atom.get_by_type(
1264  prots[0], IMP.atom.STATE_TYPE)
1265  prot = states[state_number]
1266  else:
1267  prot = prots[state_number]
1268 
1269  # get transformation aligning coordinates of
1270  # requested tuples to the first RMF file
1271  if cnt == 0:
1272  coords_f1 = alignment_coordinates[cnt]
1273  if cnt > 0:
1274  coords_f2 = alignment_coordinates[cnt]
1275  if coords_f2:
1277  coords_f1, coords_f2)
1278  transformation = Ali.align()[1]
1279  else:
1280  transformation = \
1282 
1283  rbs = set()
1284  for p in IMP.atom.get_leaves(prot):
1285  if not IMP.core.XYZR.get_is_setup(p):
1287  IMP.core.XYZR(p).set_radius(0.0001)
1288  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1289 
1291  rbm = IMP.core.RigidBodyMember(p)
1292  rb = rbm.get_rigid_body()
1293  rbs.add(rb)
1294  else:
1296  transformation)
1297  for rb in rbs:
1298  IMP.core.transform(rb, transformation)
1299 
1300  o = IMP.pmi.output.Output()
1301  self.model.update()
1302  out_pdb_fn = os.path.join(
1303  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1304  out_rmf_fn = os.path.join(
1305  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1306  o.init_pdb(out_pdb_fn, prot)
1307  tc = write_pdb_with_centered_coordinates
1308  o.write_pdb(out_pdb_fn,
1309  translate_to_geometric_center=tc)
1310 
1311  tmp_dict["local_pdb_file_name"] = \
1312  os.path.basename(out_pdb_fn)
1313  tmp_dict["rmf_file_full_path"] = rmf_name
1314  tmp_dict["local_rmf_file_name"] = \
1315  os.path.basename(out_rmf_fn)
1316  tmp_dict["local_rmf_frame_number"] = 0
1317 
1318  clusstat.write(str(tmp_dict)+"\n")
1319 
1320  if IMP.pmi.get_is_canonical(prot):
1321  # create a single-state System and write that
1323  IMP.Particle(self.model))
1324  h.set_name("System")
1325  h.add_child(prot)
1326  o.init_rmf(out_rmf_fn, [h], rs)
1327  else:
1328  o.init_rmf(out_rmf_fn, [prot], rs)
1329 
1330  o.write_rmf(out_rmf_fn)
1331  o.close_rmf(out_rmf_fn)
1332  # add the density
1333  if density_custom_ranges:
1334  DensModule.add_subunits_density(prot)
1335 
1336  if density_custom_ranges:
1337  DensModule.write_mrc(path=dircluster)
1338  del DensModule
1339  return
1340 
1341  # broadcast the coordinates
1342  if self.number_of_processes > 1:
1343  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1344  all_coordinates)
1345  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1346  all_rmf_file_names)
1347  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1348  rmf_file_name_index_dict)
1349  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1350  alignment_coordinates)
1351  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1352  rmsd_coordinates)
1353 
1354  if self.rank == 0:
1355  # save needed informations in external files
1356  self.save_objects(
1357  [best_score_feature_keyword_list_dict,
1358  rmf_file_name_index_dict],
1359  ".macro.pkl")
1360 
1361 # ------------------------------------------------------------------------
1362 # Calculate distance matrix and cluster
1363 # ------------------------------------------------------------------------
1364  print("setup clustering class")
1365  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1366 
1367  for n, model_coordinate_dict in enumerate(all_coordinates):
1368  # let's try to align
1369  if (alignment_components is not None
1370  and len(self.cluster_obj.all_coords) == 0):
1371  # set the first model as template coordinates
1372  self.cluster_obj.set_template(alignment_coordinates[n])
1373  self.cluster_obj.fill(all_rmf_file_names[n],
1374  rmsd_coordinates[n])
1375  print("Global calculating the distance matrix")
1376 
1377  # calculate distance matrix, all against all
1378  self.cluster_obj.dist_matrix()
1379 
1380  # perform clustering and optionally display
1381  if self.rank == 0:
1382  self.cluster_obj.do_cluster(number_of_clusters)
1383  if display_plot:
1384  if self.rank == 0:
1385  self.cluster_obj.plot_matrix(
1386  figurename=os.path.join(outputdir,
1387  'dist_matrix.pdf'))
1388  if exit_after_display:
1389  exit()
1390  self.cluster_obj.save_distance_matrix_file(
1391  file_name=distance_matrix_file)
1392 
1393 # ------------------------------------------------------------------------
1394 # Alteratively, load the distance matrix from file and cluster that
1395 # ------------------------------------------------------------------------
1396  else:
1397  if self.rank == 0:
1398  print("setup clustering class")
1399  self.cluster_obj = IMP.pmi.analysis.Clustering()
1400  self.cluster_obj.load_distance_matrix_file(
1401  file_name=distance_matrix_file)
1402  print("clustering with %s clusters" % str(number_of_clusters))
1403  self.cluster_obj.do_cluster(number_of_clusters)
1404  [best_score_feature_keyword_list_dict,
1405  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1406  if display_plot:
1407  if self.rank == 0:
1408  self.cluster_obj.plot_matrix(figurename=os.path.join(
1409  outputdir, 'dist_matrix.pdf'))
1410  if exit_after_display:
1411  exit()
1412  if self.number_of_processes > 1:
1413  self.comm.Barrier()
1414 
1415 # ------------------------------------------------------------------------
1416 # now save all informations about the clusters
1417 # ------------------------------------------------------------------------
1418 
1419  if self.rank == 0:
1420  print(self.cluster_obj.get_cluster_labels())
1421  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1422  print("rank %s " % str(self.rank))
1423  print("cluster %s " % str(n))
1424  print("cluster label %s " % str(cl))
1425  print(self.cluster_obj.get_cluster_label_names(cl))
1426  cluster_size = \
1427  len(self.cluster_obj.get_cluster_label_names(cl))
1428  cluster_prov = \
1429  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1430 
1431  # first initialize the Density class if requested
1432  if density_custom_ranges:
1433  DensModule = IMP.pmi.analysis.GetModelDensity(
1434  density_custom_ranges,
1435  voxel=voxel_size)
1436 
1437  dircluster = outputdir + "/cluster." + str(n) + "/"
1438  try:
1439  os.mkdir(dircluster)
1440  except: # noqa: E722
1441  pass
1442 
1443  rmsd_dict = {
1444  "AVERAGE_RMSD":
1445  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1446  clusstat = open(dircluster + "stat.out", "w")
1447  for k, structure_name in enumerate(
1448  self.cluster_obj.get_cluster_label_names(cl)):
1449  # extract the features
1450  tmp_dict = {}
1451  tmp_dict.update(rmsd_dict)
1452  index = rmf_file_name_index_dict[structure_name]
1453  for key in best_score_feature_keyword_list_dict:
1454  tmp_dict[
1455  key] = best_score_feature_keyword_list_dict[
1456  key][
1457  index]
1458 
1459  # get the rmf name and the frame number from the list of
1460  # frame names
1461  rmf_name = structure_name.split("|")[0]
1462  rmf_frame_number = int(structure_name.split("|")[1])
1463  clusstat.write(str(tmp_dict) + "\n")
1464 
1465  # extract frame (open or link to existing)
1466  if k == 0:
1467  prots, rs = \
1468  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1469  self.model, rmf_frame_number, rmf_name)
1470  else:
1471  linking_successful = \
1472  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1473  self.model, prots, rs, rmf_frame_number,
1474  rmf_name)
1475  if not linking_successful:
1476  continue
1477  if not prots:
1478  continue
1479 
1480  if IMP.pmi.get_is_canonical(prots[0]):
1481  states = IMP.atom.get_by_type(
1482  prots[0], IMP.atom.STATE_TYPE)
1483  prot = states[state_number]
1484  else:
1485  prot = prots[state_number]
1486  if k == 0:
1487  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1488 
1489  # transform clusters onto first
1490  if k > 0:
1491  co = self.cluster_obj
1492  model_index = co.get_model_index_from_name(
1493  structure_name)
1494  transformation = co.get_transformation_to_first_member(
1495  cl, model_index)
1496  rbs = set()
1497  for p in IMP.atom.get_leaves(prot):
1498  if not IMP.core.XYZR.get_is_setup(p):
1500  IMP.core.XYZR(p).set_radius(0.0001)
1501  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1502 
1504  rbm = IMP.core.RigidBodyMember(p)
1505  rb = rbm.get_rigid_body()
1506  rbs.add(rb)
1507  else:
1509  transformation)
1510  for rb in rbs:
1511  IMP.core.transform(rb, transformation)
1512 
1513  # add the density
1514  if density_custom_ranges:
1515  DensModule.add_subunits_density(prot)
1516 
1517  # pdb writing should be optimized!
1518  o = IMP.pmi.output.Output()
1519  self.model.update()
1520  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1521  o.write_pdb(dircluster + str(k) + ".pdb")
1522 
1523  if IMP.pmi.get_is_canonical(prot):
1524  # create a single-state System and write that
1526  IMP.Particle(self.model))
1527  h.set_name("System")
1528  h.add_child(prot)
1529  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1530  else:
1531  o.init_rmf(dircluster + str(k) + ".rmf3", [prot], rs)
1532  o.write_rmf(dircluster + str(k) + ".rmf3")
1533  o.close_rmf(dircluster + str(k) + ".rmf3")
1534 
1535  del o
1536  # IMP.atom.destroy(prot)
1537 
1538  if density_custom_ranges:
1539  DensModule.write_mrc(path=dircluster)
1540  del DensModule
1541 
1542  if self.number_of_processes > 1:
1543  self.comm.Barrier()
1544 
1545  def get_cluster_rmsd(self, cluster_num):
1546  if self.cluster_obj is None:
1547  raise Exception("Run clustering first")
1548  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1549 
1550  def save_objects(self, objects, file_name):
1551  import pickle
1552  outf = open(file_name, 'wb')
1553  pickle.dump(objects, outf)
1554  outf.close()
1555 
1556  def load_objects(self, file_name):
1557  import pickle
1558  inputf = open(file_name, 'rb')
1559  objects = pickle.load(inputf)
1560  inputf.close()
1561  return objects
1562 
1563 
1565 
1566  """
1567  This class contains analysis utilities to investigate ReplicaExchange
1568  results.
1569  """
1570 
1571  ########################
1572  # Construction and Setup
1573  ########################
1574 
1575  def __init__(self, model, stat_files, best_models=None, score_key=None,
1576  alignment=True):
1577  """
1578  Construction of the Class.
1579  @param model IMP.Model()
1580  @param stat_files list of string. Can be ascii stat files,
1581  rmf files names
1582  @param best_models Integer. Number of best scoring models,
1583  if None: all models will be read
1584  @param alignment boolean (Default=True). Align before computing
1585  the rmsd.
1586  """
1587 
1588  self.model = model
1589  self.best_models = best_models
1591  model, stat_files, self.best_models, score_key, cache=True)
1593  StatHierarchyHandler=self.stath0)
1594 
1595  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1597  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1599  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1600  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1601  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1602  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1603  self.clusters = []
1604  # fill the cluster list with a single cluster containing all models
1605  c = IMP.pmi.output.Cluster(0)
1606  self.clusters.append(c)
1607  for n0 in range(len(self.stath0)):
1608  c.add_member(n0)
1609  self.pairwise_rmsd = {}
1610  self.pairwise_molecular_assignment = {}
1611  self.alignment = alignment
1612  self.symmetric_molecules = {}
1613  self.issymmetricsel = {}
1614  self.update_seldicts()
1615  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1616  IMP.atom.get_leaves(self.stath0))
1617  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1618  IMP.atom.get_leaves(self.stath1))
1619 
1620  def set_rmsd_selection(self, **kwargs):
1621  """
1622  Setup the selection onto which the rmsd is computed
1623  @param kwargs use IMP.atom.Selection keywords
1624  """
1625  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1626  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1627  self.update_seldicts()
1628 
1629  def set_symmetric(self, molecule_name):
1630  """
1631  Store names of symmetric molecules
1632  """
1633  self.symmetric_molecules[molecule_name] = 0
1634  self.update_seldicts()
1635 
1636  def set_alignment_selection(self, **kwargs):
1637  """
1638  Setup the selection onto which the alignment is computed
1639  @param kwargs use IMP.atom.Selection keywords
1640  """
1641  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1642  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1643 
1644  ######################
1645  # Clustering functions
1646  ######################
1647  def clean_clusters(self):
1648  for c in self.clusters:
1649  del c
1650  self.clusters = []
1651 
1652  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1653  """
1654  Cluster the models based on RMSD.
1655  @param rmsd_cutoff Float the distance cutoff in Angstrom
1656  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1657  be used to compute rmsds
1658  """
1659  self.clean_clusters()
1660  not_clustered = set(range(len(self.stath1)))
1661  while len(not_clustered) > 0:
1662  self.aggregate(not_clustered, rmsd_cutoff, metric)
1663  self.update_clusters()
1664 
1665  def refine(self, rmsd_cutoff=10):
1666  """
1667  Refine the clusters by merging the ones whose centers are close
1668  @param rmsd_cutoff cutoff distance in Angstorms
1669  """
1670  clusters_copy = self.clusters
1671  for c0, c1 in itertools.combinations(self.clusters, 2):
1672  if c0.center_index is None:
1673  self.compute_cluster_center(c0)
1674  if c1.center_index is None:
1675  self.compute_cluster_center(c1)
1676  _ = self.stath0[c0.center_index]
1677  _ = self.stath1[c1.center_index]
1678  rmsd, molecular_assignment = self.rmsd()
1679  if rmsd <= rmsd_cutoff:
1680  if c1 in self.clusters:
1681  clusters_copy.remove(c1)
1682  c0 += c1
1683  self.clusters = clusters_copy
1684  self.update_clusters()
1685 
1686  ####################
1687  # Input Output
1688  ####################
1689 
1690  def set_cluster_assignments(self, cluster_ids):
1691  if len(cluster_ids) != len(self.stath0):
1692  raise ValueError('cluster ids has to be same length as '
1693  'number of frames')
1694 
1695  self.clusters = []
1696  for i in sorted(list(set(cluster_ids))):
1697  self.clusters.append(IMP.pmi.output.Cluster(i))
1698  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1699  self.clusters[idx].add_member(i, d)
1700 
1701  def get_cluster_data(self, cluster):
1702  """
1703  Return the model data from a cluster
1704  @param cluster IMP.pmi.output.Cluster object
1705  """
1706  data = []
1707  for m in cluster:
1708  data.append(m)
1709  return data
1710 
1711  def save_data(self, filename='data.pkl'):
1712  """
1713  Save the data for the whole models into a pickle file
1714  @param filename string
1715  """
1716  self.stath0.save_data(filename)
1717 
1718  def set_data(self, data):
1719  """
1720  Set the data from an external IMP.pmi.output.Data
1721  @param data IMP.pmi.output.Data
1722  """
1723  self.stath0.data = data
1724  self.stath1.data = data
1725 
1726  def load_data(self, filename='data.pkl'):
1727  """
1728  Load the data from an external pickled file
1729  @param filename string
1730  """
1731  self.stath0.load_data(filename)
1732  self.stath1.load_data(filename)
1733  self.best_models = len(self.stath0)
1734 
1735  def add_cluster(self, rmf_name_list):
1736  c = IMP.pmi.output.Cluster(len(self.clusters))
1737  print("creating cluster index "+str(len(self.clusters)))
1738  self.clusters.append(c)
1739  current_len = len(self.stath0)
1740 
1741  for rmf in rmf_name_list:
1742  print("adding rmf "+rmf)
1743  self.stath0.add_stat_file(rmf)
1744  self.stath1.add_stat_file(rmf)
1745 
1746  for n0 in range(current_len, len(self.stath0)):
1747  d0 = self.stath0[n0]
1748  c.add_member(n0, d0)
1749  self.update_clusters()
1750 
1751  def save_clusters(self, filename='clusters.pkl'):
1752  """
1753  Save the clusters into a pickle file
1754  @param filename string
1755  """
1756  try:
1757  import cPickle as pickle
1758  except ImportError:
1759  import pickle
1760  fl = open(filename, 'wb')
1761  pickle.dump(self.clusters, fl)
1762 
1763  def load_clusters(self, filename='clusters.pkl', append=False):
1764  """
1765  Load the clusters from a pickle file
1766  @param filename string
1767  @param append bool (Default=False), if True. append the clusters
1768  to the ones currently present
1769  """
1770  try:
1771  import cPickle as pickle
1772  except ImportError:
1773  import pickle
1774  fl = open(filename, 'rb')
1775  self.clean_clusters()
1776  if append:
1777  self.clusters += pickle.load(fl)
1778  else:
1779  self.clusters = pickle.load(fl)
1780  self.update_clusters()
1781 
1782  ####################
1783  # Analysis Functions
1784  ####################
1785 
1786  def compute_cluster_center(self, cluster):
1787  """
1788  Compute the cluster center for a given cluster
1789  """
1790  member_distance = defaultdict(float)
1791 
1792  for n0, n1 in itertools.combinations(cluster.members, 2):
1793  _ = self.stath0[n0]
1794  _ = self.stath1[n1]
1795  rmsd, _ = self.rmsd()
1796  member_distance[n0] += rmsd
1797 
1798  if len(member_distance) > 0:
1799  cluster.center_index = min(member_distance,
1800  key=member_distance.get)
1801  else:
1802  cluster.center_index = cluster.members[0]
1803 
1804  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1805  prefix="./"):
1806  """
1807  Save the coordinates of the current cluster a single rmf file
1808  """
1809  print("saving coordinates", cluster)
1810  if self.alignment:
1811  self.set_reference(reference, cluster)
1812  o = IMP.pmi.output.Output()
1813  if rmf_name is None:
1814  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1815 
1816  _ = self.stath1[cluster.members[0]]
1817  self.model.update()
1818  o.init_rmf(rmf_name, [self.stath1])
1819  for n1 in cluster.members:
1820  _ = self.stath1[n1]
1821  self.model.update()
1823  if self.alignment:
1824  self.align()
1825  o.write_rmf(rmf_name)
1827  o.close_rmf(rmf_name)
1828 
1829  def prune_redundant_structures(self, rmsd_cutoff=10):
1830  """
1831  remove structures that are similar
1832  append it to a new cluster
1833  """
1834  print("pruning models")
1835  selected = 0
1836  filtered = [selected]
1837  remaining = range(1, len(self.stath1), 10)
1838 
1839  while len(remaining) > 0:
1840  d0 = self.stath0[selected]
1841  rm = []
1842  for n1 in remaining:
1843  _ = self.stath1[n1]
1844  if self.alignment:
1845  self.align()
1846  d, _ = self.rmsd()
1847  if d <= rmsd_cutoff:
1848  rm.append(n1)
1849  print("pruning model %s, similar to model %s, rmsd %s"
1850  % (str(n1), str(selected), str(d)))
1851  remaining = [x for x in remaining if x not in rm]
1852  if len(remaining) == 0:
1853  break
1854  selected = remaining[0]
1855  filtered.append(selected)
1856  remaining.pop(0)
1857  c = IMP.pmi.output.Cluster(len(self.clusters))
1858  self.clusters.append(c)
1859  for n0 in filtered:
1860  d0 = self.stath0[n0]
1861  c.add_member(n0, d0)
1862  self.update_clusters()
1863 
1864  def precision(self, cluster):
1865  """
1866  Compute the precision of a cluster
1867  """
1868  npairs = 0
1869  rmsd = 0.0
1870  precision = None
1871 
1872  if cluster.center_index is not None:
1873  members1 = [cluster.center_index]
1874  else:
1875  members1 = cluster.members
1876 
1877  for n0 in members1:
1878  _ = self.stath0[n0]
1879  for n1 in cluster.members:
1880  if n0 != n1:
1881  npairs += 1
1882  _ = self.stath1[n1]
1884  tmp_rmsd, _ = self.rmsd()
1885  rmsd += tmp_rmsd
1887 
1888  if npairs > 0:
1889  precision = rmsd/npairs
1890  cluster.precision = precision
1891  return precision
1892 
1893  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1894  """
1895  Compute the bipartite precision (ie the cross-precision)
1896  between two clusters
1897  """
1898  npairs = 0
1899  rmsd = 0.0
1900  for cn0, n0 in enumerate(cluster1.members):
1901  _ = self.stath0[n0]
1902  for cn1, n1 in enumerate(cluster2.members):
1903  _ = self.stath1[n1]
1904  tmp_rmsd, _ = self.rmsd()
1905  if verbose:
1906  print("--- rmsd between structure %s and structure "
1907  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1908  rmsd += tmp_rmsd
1909  npairs += 1
1910  precision = rmsd/npairs
1911  return precision
1912 
1913  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1914  cluster_ref=None, step=1):
1915  """
1916  Compute the Root mean square fluctuations
1917  of a molecule in a cluster
1918  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1919  residue indexes and the value is the rmsf
1920  """
1921  rmsf = IMP.pmi.tools.OrderedDict()
1922 
1923  # assumes that residue indexes are identical for stath0 and stath1
1924  if cluster_ref is not None:
1925  if cluster_ref.center_index is not None:
1926  members0 = [cluster_ref.center_index]
1927  else:
1928  members0 = cluster_ref.members
1929  else:
1930  if cluster.center_index is not None:
1931  members0 = [cluster.center_index]
1932  else:
1933  members0 = cluster.members
1934 
1935  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1936  copy_index=copy_index, state_index=state_index)
1937  ps0 = s0.get_selected_particles()
1938  # get the residue indexes
1939  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1940  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1941 
1942  # get the corresponding particles
1943  npairs = 0
1944  for n0 in members0:
1945  d0 = self.stath0[n0]
1946  for n1 in cluster.members[::step]:
1947  if n0 != n1:
1948  print("--- rmsf %s %s" % (str(n0), str(n1)))
1950 
1951  s1 = IMP.atom.Selection(
1952  self.stath1, molecule=molecule,
1953  residue_indexes=residue_indexes, resolution=1,
1954  copy_index=copy_index, state_index=state_index)
1955  ps1 = s1.get_selected_particles()
1956 
1957  d1 = self.stath1[n1]
1958  if self.alignment:
1959  self.align()
1960  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
1961  r = residue_indexes[n]
1962  d0 = IMP.core.XYZ(p0)
1963  d1 = IMP.core.XYZ(p1)
1964  if r in rmsf:
1965  rmsf[r] += IMP.core.get_distance(d0, d1)
1966  else:
1967  rmsf[r] = IMP.core.get_distance(d0, d1)
1968  npairs += 1
1970  for r in rmsf:
1971  rmsf[r] /= npairs
1972 
1973  for stath in [self.stath0, self.stath1]:
1974  if molecule not in self.symmetric_molecules:
1975  s = IMP.atom.Selection(
1976  stath, molecule=molecule, residue_index=r,
1977  resolution=1, copy_index=copy_index,
1978  state_index=state_index)
1979  else:
1980  s = IMP.atom.Selection(
1981  stath, molecule=molecule, residue_index=r,
1982  resolution=1, state_index=state_index)
1983 
1984  ps = s.get_selected_particles()
1985  for p in ps:
1987  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
1988  else:
1990 
1991  return rmsf
1992 
1993  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
1994  reference="Absolute", prefix="./", step=1):
1995  if self.alignment:
1996  self.set_reference(reference, cluster)
1997  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
1998  voxel=voxel_size)
1999 
2000  for n1 in cluster.members[::step]:
2001  print("density "+str(n1))
2002  _ = self.stath1[n1]
2004  if self.alignment:
2005  self.align()
2006  dens.add_subunits_density(self.stath1)
2008  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2009  del dens
2010 
2011  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2012  consolidate=False, molecules=None, prefix='./',
2013  reference="Absolute"):
2014  if self.alignment:
2015  self.set_reference(reference, cluster)
2016  import numpy as np
2017  import matplotlib.pyplot as plt
2018  import matplotlib.cm as cm
2019  from scipy.spatial.distance import cdist
2020  import IMP.pmi.topology
2021  if molecules is None:
2023  for mol in IMP.pmi.tools.get_molecules(
2024  IMP.atom.get_leaves(self.stath1))]
2025  else:
2027  for mol in IMP.pmi.tools.get_molecules(
2029  self.stath1,
2030  molecules=molecules).get_selected_particles())]
2031  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2032  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2033  total_len_unique = sum(max(mol.get_residue_indexes())
2034  for mol in unique_copies)
2035 
2036  index_dict = {}
2037  prev_stop = 0
2038 
2039  if not consolidate:
2040  for mol in mols:
2041  seqlen = max(mol.get_residue_indexes())
2042  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2043  prev_stop += seqlen
2044 
2045  else:
2046  for mol in unique_copies:
2047  seqlen = max(mol.get_residue_indexes())
2048  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2049  prev_stop += seqlen
2050 
2051  for ncl, n1 in enumerate(cluster.members):
2052  print(ncl)
2053  _ = self.stath1[n1]
2054  coord_dict = IMP.pmi.tools.OrderedDict()
2055  for mol in mols:
2056  rindexes = mol.get_residue_indexes()
2057  coords = np.ones((max(rindexes), 3))
2058  for rnum in rindexes:
2059  sel = IMP.atom.Selection(mol, residue_index=rnum,
2060  resolution=1)
2061  selpart = sel.get_selected_particles()
2062  if len(selpart) == 0:
2063  continue
2064  selpart = selpart[0]
2065  coords[rnum - 1, :] = \
2066  IMP.core.XYZ(selpart).get_coordinates()
2067  coord_dict[mol] = coords
2068 
2069  if not consolidate:
2070  coords = np.concatenate(list(coord_dict.values()))
2071  dists = cdist(coords, coords)
2072  binary_dists = np.where((dists <= contact_threshold)
2073  & (dists >= 1.0), 1.0, 0.0)
2074  else:
2075  binary_dists_dict = {}
2076  for mol1 in mols:
2077  len1 = max(mol1.get_residue_indexes())
2078  for mol2 in mols:
2079  name1 = mol1.get_name()
2080  name2 = mol2.get_name()
2081  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2082  if (name1, name2) not in binary_dists_dict:
2083  binary_dists_dict[(name1, name2)] = \
2084  np.zeros((len1, len1))
2085  binary_dists_dict[(name1, name2)] += \
2086  np.where((dists <= contact_threshold)
2087  & (dists >= 1.0), 1.0, 0.0)
2088  binary_dists = np.zeros((total_len_unique, total_len_unique))
2089 
2090  for name1, name2 in binary_dists_dict:
2091  r1 = index_dict[mol_names_unique[name1]]
2092  r2 = index_dict[mol_names_unique[name2]]
2093  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2094  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2095  1.0, 0.0)
2096 
2097  if ncl == 0:
2098  dist_maps = [dists]
2099  av_dist_map = dists
2100  contact_freqs = binary_dists
2101  else:
2102  dist_maps.append(dists)
2103  av_dist_map += dists
2104  contact_freqs += binary_dists
2105 
2106  if log_scale:
2107  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2108  else:
2109  contact_freqs = 1.0/len(cluster)*contact_freqs
2110  av_dist_map = 1.0/len(cluster)*contact_freqs
2111 
2112  fig = plt.figure(figsize=(100, 100))
2113  ax = fig.add_subplot(111)
2114  ax.set_xticks([])
2115  ax.set_yticks([])
2116  gap_between_components = 50
2117  colormap = cm.Blues
2118  colornorm = None
2119 
2120  if not consolidate:
2121  sorted_tuple = sorted(
2123  mol).get_extended_name(), mol) for mol in mols)
2124  prot_list = list(zip(*sorted_tuple))[1]
2125  else:
2126  sorted_tuple = sorted(
2127  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2128  for mol in unique_copies)
2129  prot_list = list(zip(*sorted_tuple))[1]
2130 
2131  prot_listx = prot_list
2132  nresx = gap_between_components + \
2133  sum([max(mol.get_residue_indexes())
2134  + gap_between_components for mol in prot_listx])
2135 
2136  # set the list of proteins on the y axis
2137  prot_listy = prot_list
2138  nresy = gap_between_components + \
2139  sum([max(mol.get_residue_indexes())
2140  + gap_between_components for mol in prot_listy])
2141 
2142  # this is the residue offset for each protein
2143  resoffsetx = {}
2144  resendx = {}
2145  res = gap_between_components
2146  for mol in prot_listx:
2147  resoffsetx[mol] = res
2148  res += max(mol.get_residue_indexes())
2149  resendx[mol] = res
2150  res += gap_between_components
2151 
2152  resoffsety = {}
2153  resendy = {}
2154  res = gap_between_components
2155  for mol in prot_listy:
2156  resoffsety[mol] = res
2157  res += max(mol.get_residue_indexes())
2158  resendy[mol] = res
2159  res += gap_between_components
2160 
2161  resoffsetdiagonal = {}
2162  res = gap_between_components
2163  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2164  resoffsetdiagonal[mol] = res
2165  res += max(mol.get_residue_indexes())
2166  res += gap_between_components
2167 
2168  # plot protein boundaries
2169  xticks = []
2170  xlabels = []
2171  for n, prot in enumerate(prot_listx):
2172  res = resoffsetx[prot]
2173  end = resendx[prot]
2174  for proty in prot_listy:
2175  resy = resoffsety[proty]
2176  endy = resendy[proty]
2177  ax.plot([res, res], [resy, endy], linestyle='-',
2178  color='gray', lw=0.4)
2179  ax.plot([end, end], [resy, endy], linestyle='-',
2180  color='gray', lw=0.4)
2181  xticks.append((float(res) + float(end)) / 2)
2183  prot).get_extended_name())
2184 
2185  yticks = []
2186  ylabels = []
2187  for n, prot in enumerate(prot_listy):
2188  res = resoffsety[prot]
2189  end = resendy[prot]
2190  for protx in prot_listx:
2191  resx = resoffsetx[protx]
2192  endx = resendx[protx]
2193  ax.plot([resx, endx], [res, res], linestyle='-',
2194  color='gray', lw=0.4)
2195  ax.plot([resx, endx], [end, end], linestyle='-',
2196  color='gray', lw=0.4)
2197  yticks.append((float(res) + float(end)) / 2)
2199  prot).get_extended_name())
2200 
2201  # plot the contact map
2202 
2203  tmp_array = np.zeros((nresx, nresy))
2204  ret = {}
2205  for px in prot_listx:
2206  for py in prot_listy:
2207  resx = resoffsetx[px]
2208  lengx = resendx[px] - 1
2209  resy = resoffsety[py]
2210  lengy = resendy[py] - 1
2211  indexes_x = index_dict[px]
2212  minx = min(indexes_x)
2213  maxx = max(indexes_x)
2214  indexes_y = index_dict[py]
2215  miny = min(indexes_y)
2216  maxy = max(indexes_y)
2217  tmp_array[resx:lengx, resy:lengy] = \
2218  contact_freqs[minx:maxx, miny:maxy]
2219  ret[(px, py)] = np.argwhere(
2220  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2221 
2222  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2223  origin='lower', alpha=0.6, interpolation='nearest')
2224 
2225  ax.set_xticks(xticks)
2226  ax.set_xticklabels(xlabels, rotation=90)
2227  ax.set_yticks(yticks)
2228  ax.set_yticklabels(ylabels)
2229  plt.setp(ax.get_xticklabels(), fontsize=6)
2230  plt.setp(ax.get_yticklabels(), fontsize=6)
2231 
2232  # display and write to file
2233  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2234  [i.set_linewidth(2.0) for i in ax.spines.values()]
2235 
2236  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2237  dpi=300, transparent="False")
2238  return ret
2239 
2240  def plot_rmsd_matrix(self, filename):
2241  self.compute_all_pairwise_rmsd()
2242  distance_matrix = np.zeros(
2243  (len(self.stath0), len(self.stath1)))
2244  for (n0, n1) in self.pairwise_rmsd:
2245  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2246 
2247  import matplotlib as mpl
2248  mpl.use('Agg')
2249  import matplotlib.pylab as pl
2250  from scipy.cluster import hierarchy as hrc
2251 
2252  fig = pl.figure(figsize=(10, 8))
2253  ax = fig.add_subplot(212)
2254  dendrogram = hrc.dendrogram(
2255  hrc.linkage(distance_matrix),
2256  color_threshold=7,
2257  no_labels=True)
2258  leaves_order = dendrogram['leaves']
2259  ax.set_xlabel('Model')
2260  ax.set_ylabel('RMSD [Angstroms]')
2261 
2262  ax2 = fig.add_subplot(221)
2263  cax = ax2.imshow(
2264  distance_matrix[leaves_order, :][:, leaves_order],
2265  interpolation='nearest')
2266  cb = fig.colorbar(cax)
2267  cb.set_label('RMSD [Angstroms]')
2268  ax2.set_xlabel('Model')
2269  ax2.set_ylabel('Model')
2270 
2271  pl.savefig(filename, dpi=300)
2272  pl.close(fig)
2273 
2274  ####################
2275  # Internal Functions
2276  ####################
2277 
2278  def update_clusters(self):
2279  """
2280  Update the cluster id numbers
2281  """
2282  for n, c in enumerate(self.clusters):
2283  c.cluster_id = n
2284 
2285  def get_molecule(self, hier, name, copy):
2286  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2287  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2288 
2289  def update_seldicts(self):
2290  """
2291  Update the seldicts
2292  """
2293  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2294  self.sel0_rmsd.get_selected_particles())
2295  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2296  self.sel1_rmsd.get_selected_particles())
2297  for mol in self.seldict0:
2298  for sel in self.seldict0[mol]:
2299  self.issymmetricsel[sel] = False
2300  for mol in self.symmetric_molecules:
2301  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2302  for sel in self.seldict0[mol]:
2303  self.issymmetricsel[sel] = True
2304 
2305  def align(self):
2307  self.sel1_alignment, self.sel0_alignment)
2308 
2309  for rb in self.rbs1:
2310  IMP.core.transform(rb, tr)
2311 
2312  for bead in self.beads1:
2313  try:
2314  IMP.core.transform(IMP.core.XYZ(bead), tr)
2315  except: # noqa: E722
2316  continue
2317 
2318  self.model.update()
2319 
2320  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2321  '''
2322  initial filling of the clusters.
2323  '''
2324  n0 = idxs.pop()
2325  print("clustering model "+str(n0))
2326  d0 = self.stath0[n0]
2327  c = IMP.pmi.output.Cluster(len(self.clusters))
2328  print("creating cluster index "+str(len(self.clusters)))
2329  self.clusters.append(c)
2330  c.add_member(n0, d0)
2331  clustered = set([n0])
2332  for n1 in idxs:
2333  print("--- trying to add model " + str(n1) + " to cluster "
2334  + str(len(self.clusters)))
2335  d1 = self.stath1[n1]
2336  if self.alignment:
2337  self.align()
2338  rmsd, _ = self.rmsd(metric=metric)
2339  if rmsd < rmsd_cutoff:
2340  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2341  c.add_member(n1, d1)
2342  clustered.add(n1)
2343  else:
2344  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2345  idxs -= clustered
2346 
2347  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2348  """
2349  merge the clusters that have close members
2350  @param rmsd_cutoff cutoff distance in Angstorms
2351  """
2352  # before merging, clusters are spheres of radius rmsd_cutoff
2353  # centered on the 1st element
2354  # here we only try to merge clusters whose centers are closer
2355  # than 2*rmsd_cutoff
2356  to_merge = []
2357  print("merging...")
2358  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2359  itertools.combinations(self.clusters, 2)):
2360  n0, n1 = [c.members[0] for c in (c0, c1)]
2361  _ = self.stath0[n0]
2362  _ = self.stath1[n1]
2363  rmsd, _ = self.rmsd()
2364  if (rmsd < 2*rmsd_cutoff and
2365  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2366  to_merge.append((c0, c1))
2367 
2368  for c0, c in reversed(to_merge):
2369  self.merge(c0, c)
2370 
2371  # keep only full clusters
2372  self.clusters = [c for c in
2373  filter(lambda x: len(x.members) > 0, self.clusters)]
2374 
2375  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2376  '''
2377  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2378  '''
2379  print("check close members for clusters " + str(c0.cluster_id) +
2380  " and " + str(c1.cluster_id))
2381  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2382  _ = self.stath0[n0]
2383  _ = self.stath1[n1]
2384  rmsd, _ = self.rmsd(metric=metric)
2385  if rmsd < rmsd_cutoff:
2386  return True
2387 
2388  return False
2389 
2390  def merge(self, c0, c1):
2391  '''
2392  merge two clusters
2393  '''
2394  c0 += c1
2395  c1.members = []
2396  c1.data = {}
2397 
2398  def rmsd_helper(self, sels0, sels1, metric):
2399  '''
2400  a function that returns the permutation best_sel of sels0 that
2401  minimizes metric
2402  '''
2403  best_rmsd2 = float('inf')
2404  best_sel = None
2405  if self.issymmetricsel[sels0[0]]:
2406  # this cases happens when symmetries were defined
2407  N = len(sels0)
2408  for offset in range(N):
2409  sels = [sels0[(offset+i) % N] for i in range(N)]
2410  sel0 = sels[0]
2411  sel1 = sels1[0]
2412  r = metric(sel0, sel1)
2413  rmsd2 = r*r*N
2414  if rmsd2 < best_rmsd2:
2415  best_rmsd2 = rmsd2
2416  best_sel = sels
2417  else:
2418  for sels in itertools.permutations(sels0):
2419  rmsd2 = 0.0
2420  for sel0, sel1 in itertools.takewhile(
2421  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2422  r = metric(sel0, sel1)
2423  rmsd2 += r*r
2424  if rmsd2 < best_rmsd2:
2425  best_rmsd2 = rmsd2
2426  best_sel = sels
2427  return best_sel, best_rmsd2
2428 
2429  def compute_all_pairwise_rmsd(self):
2430  for d0 in self.stath0:
2431  for d1 in self.stath1:
2432  rmsd, _ = self.rmsd()
2433 
2434  def rmsd(self, metric=IMP.atom.get_rmsd):
2435  '''
2436  Computes the RMSD. Resolves ambiguous pairs assignments
2437  '''
2438  # here we memoize the rmsd and molecular assignment so that it's
2439  # not done multiple times
2440  n0 = self.stath0.current_index
2441  n1 = self.stath1.current_index
2442  if ((n0, n1) in self.pairwise_rmsd) \
2443  and ((n0, n1) in self.pairwise_molecular_assignment):
2444  return (self.pairwise_rmsd[(n0, n1)],
2445  self.pairwise_molecular_assignment[(n0, n1)])
2446 
2447  if self.alignment:
2448  self.align()
2449  # if it's not yet memoized
2450  total_rmsd = 0.0
2451  total_N = 0
2452  # this is a dictionary which keys are the molecule names, and values
2453  # are the list of IMP.atom.Selection for all molecules that share
2454  # the molecule name
2455  molecular_assignment = {}
2456  for molname, sels0 in self.seldict0.items():
2457  sels_best_order, best_rmsd2 = \
2458  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2459 
2460  Ncoords = len(sels_best_order[0].get_selected_particles())
2461  Ncopies = len(self.seldict1[molname])
2462  total_rmsd += Ncoords*best_rmsd2
2463  total_N += Ncoords*Ncopies
2464 
2465  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2466  p0 = sel0.get_selected_particles()[0]
2467  p1 = sel1.get_selected_particles()[0]
2468  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2469  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2470  c0 = IMP.atom.Copy(m0).get_copy_index()
2471  c1 = IMP.atom.Copy(m1).get_copy_index()
2472  molecular_assignment[(molname, c0)] = (molname, c1)
2473 
2474  total_rmsd = math.sqrt(total_rmsd/total_N)
2475 
2476  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2477  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2478  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2479  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2480  return total_rmsd, molecular_assignment
2481 
2482  def set_reference(self, reference, cluster):
2483  """
2484  Fix the reference structure for structural alignment, rmsd and
2485  chain assignemnt
2486 
2487  @param reference can be either "Absolute" (cluster center of the
2488  first cluster) or Relative (cluster center of the current
2489  cluster)
2490  """
2491  if reference == "Absolute":
2492  _ = self.stath0[0]
2493  elif reference == "Relative":
2494  if cluster.center_index:
2495  n0 = cluster.center_index
2496  else:
2497  n0 = cluster.members[0]
2498  _ = self.stath0[n0]
2499 
2501  """
2502  compute the molecular assignemnts between multiple copies
2503  of the same sequence. It changes the Copy index of Molecules
2504  """
2505  _ = self.stath1[n1]
2506  _, molecular_assignment = self.rmsd()
2507  for (m0, c0), (m1, c1) in molecular_assignment.items():
2508  mol0 = self.molcopydict0[m0][c0]
2509  mol1 = self.molcopydict1[m1][c1]
2510  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2511  p1 = IMP.atom.Copy(mol1).get_particle()
2512  p1.set_value(cik0, c0)
2513 
2515  """
2516  Undo the Copy index assignment
2517  """
2518  _ = self.stath1[n1]
2519  _, molecular_assignment = self.rmsd()
2520  for (m0, c0), (m1, c1) in molecular_assignment.items():
2521  mol0 = self.molcopydict0[m0][c0]
2522  mol1 = self.molcopydict1[m1][c1]
2523  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2524  p1 = IMP.atom.Copy(mol1).get_particle()
2525  p1.set_value(cik0, c1)
2526 
2527  ####################
2528  # Container Functions
2529  ####################
2530 
2531  def __repr__(self):
2532  s = "AnalysisReplicaExchange\n"
2533  s += "---- number of clusters %s \n" % str(len(self.clusters))
2534  s += "---- number of models %s \n" % str(len(self.stath0))
2535  return s
2536 
2537  def __getitem__(self, int_slice_adaptor):
2538  if isinstance(int_slice_adaptor, int):
2539  return self.clusters[int_slice_adaptor]
2540  elif isinstance(int_slice_adaptor, slice):
2541  return self.__iter__(int_slice_adaptor)
2542  else:
2543  raise TypeError("Unknown Type")
2544 
2545  def __len__(self):
2546  return len(self.clusters)
2547 
2548  def __iter__(self, slice_key=None):
2549  if slice_key is None:
2550  for i in range(len(self)):
2551  yield self[i]
2552  else:
2553  for i in range(len(self))[slice_key]:
2554  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2434
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2482
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1763
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1134
def precision
Compute the precision of a cluster.
Definition: macros.py:1864
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:853
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:107
A container for models organized into clusters.
Definition: output.py:1501
Sample using molecular dynamics.
Definition: samplers.py:392
def aggregate
initial filling of the clusters.
Definition: macros.py:2320
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:926
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:616
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:617
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1829
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1913
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1231
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1620
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1701
def __init__
Construction of the Class.
Definition: macros.py:1575
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:759
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1718
def __init__
Constructor.
Definition: macros.py:117
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2514
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1636
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2398
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1804
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:997
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2500
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1564
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:561
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2347
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:906
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1786
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:917
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2289
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2278
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:608
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1665
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:200
Collect timing information.
Definition: tools.py:122
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1629
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:10197
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2390
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:612
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1248
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1711
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:770
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1893
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:590
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1652
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1751
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2375
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
Definition: macros.py:59
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1207
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1726
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:565
Sample using replica exchange.
Definition: samplers.py:497
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27