IMP logo
IMP Reference Guide  develop.5de2a9ba5d,2024/02/25
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 try:
18  from pathlib import Path
19 except ImportError: # Use bundled pathlib on Python 2 without pathlib
20  from IMP._compat_pathlib import Path
21 import glob
22 from operator import itemgetter
23 from collections import defaultdict
24 import numpy as np
25 import itertools
26 import warnings
27 import math
28 
29 
30 class _MockMPIValues(object):
31  """Replace samplers.MPI_values when in test mode"""
32  def get_percentile(self, name):
33  return 0.
34 
35 
36 class _RMFRestraints(object):
37  """All restraints that are written out to the RMF file"""
38  def __init__(self, model, user_restraints):
39  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
40  self._user_restraints = user_restraints if user_restraints else []
41 
42  def __len__(self):
43  return (len(self._user_restraints)
44  + self._rmf_rs.get_number_of_restraints())
45 
46  def __bool__(self):
47  return len(self) > 0
48  __nonzero__ = __bool__ # Python 2 compatibility
49 
50  def __getitem__(self, i):
51  class FakePMIWrapper(object):
52  def __init__(self, r):
53  self.r = IMP.RestraintSet.get_from(r)
54 
55  def get_restraint(self):
56  return self.r
57 
58  lenuser = len(self._user_restraints)
59  if 0 <= i < lenuser:
60  return self._user_restraints[i]
61  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62  r = self._rmf_rs.get_restraint(i - lenuser)
63  return FakePMIWrapper(r)
64  else:
65  raise IndexError("Out of range")
66 
67 
68 class ReplicaExchange(object):
69  """A macro to help setup and run replica exchange.
70  Supports Monte Carlo and molecular dynamics.
71  Produces trajectory RMF files, best PDB structures,
72  and output stat files.
73  """
74  def __init__(self, model, root_hier,
75  monte_carlo_sample_objects=None,
76  molecular_dynamics_sample_objects=None,
77  output_objects=[],
78  rmf_output_objects=None,
79  monte_carlo_temperature=1.0,
80  simulated_annealing=False,
81  simulated_annealing_minimum_temperature=1.0,
82  simulated_annealing_maximum_temperature=2.5,
83  simulated_annealing_minimum_temperature_nframes=100,
84  simulated_annealing_maximum_temperature_nframes=100,
85  replica_exchange_minimum_temperature=1.0,
86  replica_exchange_maximum_temperature=2.5,
87  replica_exchange_swap=True,
88  num_sample_rounds=1,
89  number_of_best_scoring_models=500,
90  monte_carlo_steps=10,
91  self_adaptive=False,
92  molecular_dynamics_steps=10,
93  molecular_dynamics_max_time_step=1.0,
94  number_of_frames=1000,
95  save_coordinates_mode="lowest_temperature",
96  nframes_write_coordinates=1,
97  write_initial_rmf=True,
98  initial_rmf_name_suffix="initial",
99  stat_file_name_suffix="stat",
100  best_pdb_name_suffix="model",
101  mmcif=False,
102  do_clean_first=True,
103  do_create_directories=True,
104  global_output_directory="./",
105  rmf_dir="rmfs/",
106  best_pdb_dir="pdbs/",
107  replica_stat_file_suffix="stat_replica",
108  em_object_for_rmf=None,
109  atomistic=False,
110  replica_exchange_object=None,
111  test_mode=False,
112  score_moved=False):
113  """Constructor.
114  @param model The IMP model
115  @param root_hier Top-level (System)hierarchy
116  @param monte_carlo_sample_objects Objects for MC sampling, which
117  should generally be a simple list of Mover objects, e.g.
118  from DegreesOfFreedom.get_movers().
119  @param molecular_dynamics_sample_objects Objects for MD sampling,
120  which should generally be a simple list of particles.
121  @param output_objects A list of structural objects and restraints
122  that will be included in output (ie, statistics "stat"
123  files). Any object that provides a get_output() method
124  can be used here. If None is passed
125  the macro will not write stat files.
126  @param rmf_output_objects A list of structural objects and
127  restraints that will be included in rmf. Any object
128  that provides a get_output() method can be used here.
129  @param monte_carlo_temperature MC temp (may need to be optimized
130  based on post-sampling analysis)
131  @param simulated_annealing If True, perform simulated annealing
132  @param simulated_annealing_minimum_temperature Should generally be
133  the same as monte_carlo_temperature.
134  @param simulated_annealing_minimum_temperature_nframes Number of
135  frames to compute at minimum temperature.
136  @param simulated_annealing_maximum_temperature_nframes Number of
137  frames to compute at
138  temps > simulated_annealing_maximum_temperature.
139  @param replica_exchange_minimum_temperature Low temp for REX; should
140  generally be the same as monte_carlo_temperature.
141  @param replica_exchange_maximum_temperature High temp for REX
142  @param replica_exchange_swap Boolean, enable disable temperature
143  swap (Default=True)
144  @param num_sample_rounds Number of rounds of MC/MD per cycle
145  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146  models to keep around for analysis.
147  @param mmcif If True, write best scoring models in mmCIF format;
148  if False (the default), write in legacy PDB format.
149  @param best_pdb_dir The directory under `global_output_directory`
150  where best-scoring PDB/mmCIF files are written.
151  @param best_pdb_name_suffix Part of the file name for best-scoring
152  PDB/mmCIF files.
153  @param monte_carlo_steps Number of MC steps per round
154  @param self_adaptive self adaptive scheme for Monte Carlo movers
155  @param molecular_dynamics_steps Number of MD steps per round
156  @param molecular_dynamics_max_time_step Max time step for MD
157  @param number_of_frames Number of REX frames to run
158  @param save_coordinates_mode string: how to save coordinates.
159  "lowest_temperature" (default) only the lowest temperatures
160  is saved
161  "25th_score" all replicas whose score is below the 25th
162  percentile
163  "50th_score" all replicas whose score is below the 50th
164  percentile
165  "75th_score" all replicas whose score is below the 75th
166  percentile
167  @param nframes_write_coordinates How often to write the coordinates
168  of a frame
169  @param write_initial_rmf Write the initial configuration
170  @param global_output_directory Folder that will be created to house
171  output.
172  @param test_mode Set to True to avoid writing any files, just test
173  one frame.
174  @param score_moved If True, attempt to speed up Monte Carlo
175  sampling by caching scoring function terms on particles
176  that didn't move.
177  """
178  self.model = model
179  self.vars = {}
180 
181  # add check hierarchy is multistate
182  if output_objects == []:
183  # The "[]" in the default parameters is a global object, so make
184  # our own copy here
185  self.output_objects = []
186  else:
187  self.output_objects = output_objects
188  self.rmf_output_objects = rmf_output_objects
189  if (isinstance(root_hier, IMP.atom.Hierarchy)
190  and not root_hier.get_parent()):
191  if self.output_objects is not None:
192  self.output_objects.append(
193  IMP.pmi.io.TotalScoreOutput(self.model))
194  if self.rmf_output_objects is not None:
195  self.rmf_output_objects.append(
196  IMP.pmi.io.TotalScoreOutput(self.model))
197  self.root_hier = root_hier
198  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
199  self.vars["number_of_states"] = len(states)
200  if len(states) > 1:
201  self.root_hiers = states
202  self.is_multi_state = True
203  else:
204  self.root_hier = root_hier
205  self.is_multi_state = False
206  else:
207  raise TypeError("Must provide System hierarchy (root_hier)")
208 
209  self._rmf_restraints = _RMFRestraints(model, None)
210  self.em_object_for_rmf = em_object_for_rmf
211  self.monte_carlo_sample_objects = monte_carlo_sample_objects
212  self.vars["self_adaptive"] = self_adaptive
213  self.molecular_dynamics_sample_objects = \
214  molecular_dynamics_sample_objects
215  self.replica_exchange_object = replica_exchange_object
216  self.molecular_dynamics_max_time_step = \
217  molecular_dynamics_max_time_step
218  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
219  self.vars["replica_exchange_minimum_temperature"] = \
220  replica_exchange_minimum_temperature
221  self.vars["replica_exchange_maximum_temperature"] = \
222  replica_exchange_maximum_temperature
223  self.vars["replica_exchange_swap"] = replica_exchange_swap
224  self.vars["simulated_annealing"] = simulated_annealing
225  self.vars["simulated_annealing_minimum_temperature"] = \
226  simulated_annealing_minimum_temperature
227  self.vars["simulated_annealing_maximum_temperature"] = \
228  simulated_annealing_maximum_temperature
229  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
230  simulated_annealing_minimum_temperature_nframes
231  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
232  simulated_annealing_maximum_temperature_nframes
233 
234  self.vars["num_sample_rounds"] = num_sample_rounds
235  self.vars[
236  "number_of_best_scoring_models"] = number_of_best_scoring_models
237  self.vars["monte_carlo_steps"] = monte_carlo_steps
238  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
239  self.vars["number_of_frames"] = number_of_frames
240  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
241  "50th_score", "75th_score"):
242  raise Exception("save_coordinates_mode has unrecognized value")
243  else:
244  self.vars["save_coordinates_mode"] = save_coordinates_mode
245  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
246  self.vars["write_initial_rmf"] = write_initial_rmf
247  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
248  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
249  self.vars["mmcif"] = mmcif
250  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
251  self.vars["do_clean_first"] = do_clean_first
252  self.vars["do_create_directories"] = do_create_directories
253  self.vars["global_output_directory"] = global_output_directory
254  self.vars["rmf_dir"] = rmf_dir
255  self.vars["best_pdb_dir"] = best_pdb_dir
256  self.vars["atomistic"] = atomistic
257  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
258  self.vars["geometries"] = None
259  self.test_mode = test_mode
260  self.score_moved = score_moved
261 
262  def add_geometries(self, geometries):
263  if self.vars["geometries"] is None:
264  self.vars["geometries"] = list(geometries)
265  else:
266  self.vars["geometries"].extend(geometries)
267 
268  def show_info(self):
269  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
270  "rmfs/*.rmf3 for each replica ")
271  print("--- it stores the best scoring pdb models in pdbs/")
272  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
273  "lowest temperature")
274  print("--- variables:")
275  keys = list(self.vars.keys())
276  keys.sort()
277  for v in keys:
278  print("------", v.ljust(30), self.vars[v])
279 
280  def get_replica_exchange_object(self):
281  return self.replica_exchange_object
282 
283  def _add_provenance(self, sampler_md, sampler_mc):
284  """Record details about the sampling in the IMP Hierarchies"""
285  iterations = 0
286  if sampler_md:
287  method = "Molecular Dynamics"
288  iterations += self.vars["molecular_dynamics_steps"]
289  if sampler_mc:
290  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
291  iterations += self.vars["monte_carlo_steps"]
292  # If no sampling is actually done, no provenance to write
293  if iterations == 0 or self.vars["number_of_frames"] == 0:
294  return
295  iterations *= self.vars["num_sample_rounds"]
296 
297  pi = self.model.add_particle("sampling")
299  self.model, pi, method, self.vars["number_of_frames"],
300  iterations)
301  p.set_number_of_replicas(
302  self.replica_exchange_object.get_number_of_replicas())
303  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
304  IMP.core.add_provenance(self.model, self.root_hier, p)
305 
306  def execute_macro(self):
307  temp_index_factor = 100000.0
308  samplers = []
309  sampler_mc = None
310  sampler_md = None
311  if self.monte_carlo_sample_objects is not None:
312  print("Setting up MonteCarlo")
313  sampler_mc = IMP.pmi.samplers.MonteCarlo(
314  self.model, self.monte_carlo_sample_objects,
315  self.vars["monte_carlo_temperature"],
316  score_moved=self.score_moved)
317  if self.vars["simulated_annealing"]:
318  tmin = self.vars["simulated_annealing_minimum_temperature"]
319  tmax = self.vars["simulated_annealing_maximum_temperature"]
320  nfmin = self.vars[
321  "simulated_annealing_minimum_temperature_nframes"]
322  nfmax = self.vars[
323  "simulated_annealing_maximum_temperature_nframes"]
324  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
325  if self.vars["self_adaptive"]:
326  sampler_mc.set_self_adaptive(
327  isselfadaptive=self.vars["self_adaptive"])
328  if self.output_objects is not None:
329  self.output_objects.append(sampler_mc)
330  if self.rmf_output_objects is not None:
331  self.rmf_output_objects.append(sampler_mc)
332  samplers.append(sampler_mc)
333 
334  if self.molecular_dynamics_sample_objects is not None:
335  print("Setting up MolecularDynamics")
337  self.model, self.molecular_dynamics_sample_objects,
338  self.vars["monte_carlo_temperature"],
339  maximum_time_step=self.molecular_dynamics_max_time_step)
340  if self.vars["simulated_annealing"]:
341  tmin = self.vars["simulated_annealing_minimum_temperature"]
342  tmax = self.vars["simulated_annealing_maximum_temperature"]
343  nfmin = self.vars[
344  "simulated_annealing_minimum_temperature_nframes"]
345  nfmax = self.vars[
346  "simulated_annealing_maximum_temperature_nframes"]
347  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
348  if self.output_objects is not None:
349  self.output_objects.append(sampler_md)
350  if self.rmf_output_objects is not None:
351  self.rmf_output_objects.append(sampler_md)
352  samplers.append(sampler_md)
353 # -------------------------------------------------------------------------
354 
355  print("Setting up ReplicaExchange")
357  self.model, self.vars["replica_exchange_minimum_temperature"],
358  self.vars["replica_exchange_maximum_temperature"], samplers,
359  replica_exchange_object=self.replica_exchange_object)
360  self.replica_exchange_object = rex.rem
361 
362  myindex = rex.get_my_index()
363  if self.output_objects is not None:
364  self.output_objects.append(rex)
365  if self.rmf_output_objects is not None:
366  self.rmf_output_objects.append(rex)
367  # must reset the minimum temperature due to the
368  # different binary length of rem.get_my_parameter double and python
369  # float
370  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
371 
372 # -------------------------------------------------------------------------
373 
374  globaldir = self.vars["global_output_directory"] + "/"
375  rmf_dir = globaldir + self.vars["rmf_dir"]
376  pdb_dir = globaldir + self.vars["best_pdb_dir"]
377 
378  if not self.test_mode:
379  if self.vars["do_clean_first"]:
380  pass
381 
382  if self.vars["do_create_directories"]:
383 
384  try:
385  os.makedirs(globaldir)
386  except: # noqa: E722
387  pass
388  try:
389  os.makedirs(rmf_dir)
390  except: # noqa: E722
391  pass
392 
393  if not self.is_multi_state:
394  try:
395  os.makedirs(pdb_dir)
396  except: # noqa: E722
397  pass
398  else:
399  for n in range(self.vars["number_of_states"]):
400  try:
401  os.makedirs(pdb_dir + "/" + str(n))
402  except: # noqa: E722
403  pass
404 
405 # -------------------------------------------------------------------------
406 
408  if self.output_objects is not None:
409  self.output_objects.append(sw)
410  if self.rmf_output_objects is not None:
411  self.rmf_output_objects.append(sw)
412 
413  print("Setting up stat file")
414  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
415  low_temp_stat_file = globaldir + \
416  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
417 
418  # Ensure model is updated before saving init files
419  if not self.test_mode:
420  self.model.update()
421 
422  if not self.test_mode:
423  if self.output_objects is not None:
424  output.init_stat2(low_temp_stat_file,
425  self.output_objects,
426  extralabels=["rmf_file", "rmf_frame_index"])
427  else:
428  print("Stat file writing is disabled")
429 
430  if self.rmf_output_objects is not None:
431  print("Stat info being written in the rmf file")
432 
433  print("Setting up replica stat file")
434  replica_stat_file = globaldir + \
435  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
436  if not self.test_mode:
437  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
438 
439  if not self.test_mode:
440  print("Setting up best pdb files")
441  if not self.is_multi_state:
442  if self.vars["number_of_best_scoring_models"] > 0:
443  output.init_pdb_best_scoring(
444  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
445  self.root_hier,
446  self.vars["number_of_best_scoring_models"],
447  replica_exchange=True,
448  mmcif=self.vars['mmcif'],
449  best_score_file=globaldir + "best.scores.rex.py")
450  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
451  output.write_psf(
452  pdb_dir + "/" + "model.psf",
453  pdb_dir + "/" +
454  self.vars["best_pdb_name_suffix"] + pdbext)
455  else:
456  if self.vars["number_of_best_scoring_models"] > 0:
457  for n in range(self.vars["number_of_states"]):
458  output.init_pdb_best_scoring(
459  pdb_dir + "/" + str(n) + "/" +
460  self.vars["best_pdb_name_suffix"],
461  self.root_hiers[n],
462  self.vars["number_of_best_scoring_models"],
463  replica_exchange=True,
464  mmcif=self.vars['mmcif'],
465  best_score_file=globaldir + "best.scores.rex.py")
466  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
467  output.write_psf(
468  pdb_dir + "/" + str(n) + "/" + "model.psf",
469  pdb_dir + "/" + str(n) + "/" +
470  self.vars["best_pdb_name_suffix"] + pdbext)
471 # ---------------------------------------------
472 
473  if self.em_object_for_rmf is not None:
474  output_hierarchies = [
475  self.root_hier,
476  self.em_object_for_rmf.get_density_as_hierarchy(
477  )]
478  else:
479  output_hierarchies = [self.root_hier]
480 
481  if not self.test_mode:
482  print("Setting up and writing initial rmf coordinate file")
483  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
484  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
485  output_hierarchies,
486  listofobjects=self.rmf_output_objects)
487  if self._rmf_restraints:
488  output.add_restraints_to_rmf(
489  init_suffix + "." + str(myindex) + ".rmf3",
490  self._rmf_restraints)
491  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
492  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
493 
494  if not self.test_mode:
495  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
496  else:
497  mpivs = _MockMPIValues()
498 
499  self._add_provenance(sampler_md, sampler_mc)
500 
501  if not self.test_mode:
502  print("Setting up production rmf files")
503  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
504  output.init_rmf(rmfname, output_hierarchies,
505  geometries=self.vars["geometries"],
506  listofobjects=self.rmf_output_objects)
507 
508  if self._rmf_restraints:
509  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
510 
511  ntimes_at_low_temp = 0
512 
513  if myindex == 0:
514  self.show_info()
515  self.replica_exchange_object.set_was_used(True)
516  nframes = self.vars["number_of_frames"]
517  if self.test_mode:
518  nframes = 1
519  for i in range(nframes):
520  if self.test_mode:
521  score = 0.
522  else:
523  for nr in range(self.vars["num_sample_rounds"]):
524  if sampler_md is not None:
525  sampler_md.optimize(
526  self.vars["molecular_dynamics_steps"])
527  if sampler_mc is not None:
528  sampler_mc.optimize(self.vars["monte_carlo_steps"])
530  self.model).evaluate(False)
531  mpivs.set_value("score", score)
532  output.set_output_entry("score", score)
533 
534  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
535 
536  if self.vars["save_coordinates_mode"] == "lowest_temperature":
537  save_frame = (min_temp_index == my_temp_index)
538  elif self.vars["save_coordinates_mode"] == "25th_score":
539  score_perc = mpivs.get_percentile("score")
540  save_frame = (score_perc*100.0 <= 25.0)
541  elif self.vars["save_coordinates_mode"] == "50th_score":
542  score_perc = mpivs.get_percentile("score")
543  save_frame = (score_perc*100.0 <= 50.0)
544  elif self.vars["save_coordinates_mode"] == "75th_score":
545  score_perc = mpivs.get_percentile("score")
546  save_frame = (score_perc*100.0 <= 75.0)
547 
548  # Ensure model is updated before saving output files
549  if save_frame and not self.test_mode:
550  self.model.update()
551 
552  if save_frame:
553  print("--- frame %s score %s " % (str(i), str(score)))
554 
555  if not self.test_mode:
556  if i % self.vars["nframes_write_coordinates"] == 0:
557  print('--- writing coordinates')
558  if self.vars["number_of_best_scoring_models"] > 0:
559  output.write_pdb_best_scoring(score)
560  output.write_rmf(rmfname)
561  output.set_output_entry("rmf_file", rmfname)
562  output.set_output_entry("rmf_frame_index",
563  ntimes_at_low_temp)
564  else:
565  output.set_output_entry("rmf_file", rmfname)
566  output.set_output_entry("rmf_frame_index", '-1')
567  if self.output_objects is not None:
568  output.write_stat2(low_temp_stat_file)
569  ntimes_at_low_temp += 1
570 
571  if not self.test_mode:
572  output.write_stat2(replica_stat_file)
573  if self.vars["replica_exchange_swap"]:
574  rex.swap_temp(i, score)
575  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
576  p.add_replica_exchange(state, self)
577 
578  if not self.test_mode:
579  print("closing production rmf files")
580  output.close_rmf(rmfname)
581 
582 
583 class BuildSystem(object):
584  """A macro to build a IMP::pmi::topology::System based on a
585  TopologyReader object.
586 
587  Easily create multi-state systems by calling this macro
588  repeatedly with different TopologyReader objects!
589  A useful function is get_molecules() which returns the PMI Molecules
590  grouped by state as a dictionary with key = (molecule name),
591  value = IMP.pmi.topology.Molecule
592  Quick multi-state system:
593  @code{.python}
594  model = IMP.Model()
595  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
596  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
597  bs = IMP.pmi.macros.BuildSystem(model)
598  bs.add_state(reader1)
599  bs.add_state(reader2)
600  bs.execute_macro() # build everything including degrees of freedom
601  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
602  ### now you have a two state system, you add restraints etc
603  @endcode
604  @note The "domain name" entry of the topology reader is not used.
605  All molecules are set up by the component name, but split into rigid bodies
606  as requested.
607  """
608 
609  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
610  'RNA': IMP.pmi.alphabets.rna}
611 
612  def __init__(self, model, sequence_connectivity_scale=4.0,
613  force_create_gmm_files=False, resolutions=[1, 10],
614  name='System'):
615  """Constructor
616  @param model An IMP Model
617  @param sequence_connectivity_scale For scaling the connectivity
618  restraint
619  @param force_create_gmm_files If True, will sample and create GMMs
620  no matter what. If False, will only sample if the
621  files don't exist. If number of Gaussians is zero, won't
622  do anything.
623  @param resolutions The resolutions to build for structured regions
624  @param name The name of the top-level hierarchy node.
625  """
626  self.model = model
627  self.system = IMP.pmi.topology.System(self.model, name=name)
628  self._readers = [] # the TopologyReaders (one per state)
629  # TempResidues for each domain key=unique name,
630  # value=(atomic_res,non_atomic_res).
631  self._domain_res = []
632  self._domains = [] # key = domain unique name, value = Component
633  self.force_create_gmm_files = force_create_gmm_files
634  self.resolutions = resolutions
635 
636  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
637  chain_ids=None):
638  """Add a state using the topology info in a
639  IMP::pmi::topology::TopologyReader object.
640  When you are done adding states, call execute_macro()
641  @param reader The TopologyReader object
642  @param fasta_name_map dictionary for converting protein names
643  found in the fasta file
644  @param chain_ids A list or string of chain IDs for assigning to
645  newly-created molecules, e.g.
646  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
647  If not specified, chain IDs A through Z are assigned, then
648  AA through AZ, then BA through BZ, and so on, in the same
649  fashion as PDB.
650  """
651  state = self.system.create_state()
652  self._readers.append(reader)
653  # key is unique name, value is (atomic res, nonatomicres)
654  these_domain_res = {}
655  these_domains = {} # key is unique name, value is _Component
656  if chain_ids is None:
657  chain_ids = IMP.pmi.output._ChainIDs()
658  numchain = 0
659 
660  # setup representation
661  # loop over molecules, copies, then domains
662  for molname in reader.get_molecules():
663  copies = reader.get_molecules()[molname].domains
664  for nc, copyname in enumerate(copies):
665  print("BuildSystem.add_state: setting up molecule %s copy "
666  "number %s" % (molname, str(nc)))
667  copy = copies[copyname]
668  # option to not rename chains
669  if keep_chain_id:
670  all_chains = [c for c in copy if c.chain is not None]
671  if all_chains:
672  chain_id = all_chains[0].chain
673  else:
674  chain_id = chain_ids[numchain]
675  warnings.warn(
676  "No PDBs specified for %s, so keep_chain_id has "
677  "no effect; using default chain ID '%s'"
678  % (molname, chain_id), IMP.pmi.ParameterWarning)
679  else:
680  chain_id = chain_ids[numchain]
681  if nc == 0:
682  alphabet = IMP.pmi.alphabets.amino_acid
683  fasta_flag = copy[0].fasta_flag
684  if fasta_flag in self._alphabets:
685  alphabet = self._alphabets[fasta_flag]
687  copy[0].fasta_file, fasta_name_map)
688  seq = seqs[copy[0].fasta_id]
689  print("BuildSystem.add_state: molecule %s sequence has "
690  "%s residues" % (molname, len(seq)))
691  orig_mol = state.create_molecule(
692  molname, seq, chain_id, alphabet=alphabet,
693  uniprot=seqs.uniprot.get(copy[0].fasta_id))
694  mol = orig_mol
695  numchain += 1
696  else:
697  print("BuildSystem.add_state: creating a copy for "
698  "molecule %s" % molname)
699  mol = orig_mol.create_copy(chain_id)
700  numchain += 1
701 
702  for domainnumber, domain in enumerate(copy):
703  print("BuildSystem.add_state: ---- setting up domain %s "
704  "of molecule %s" % (domainnumber, molname))
705  # we build everything in the residue range, even if it
706  # extends beyond what's in the actual PDB file
707  these_domains[domain.get_unique_name()] = domain
708  if domain.residue_range == [] or \
709  domain.residue_range is None:
710  domain_res = mol.get_residues()
711  else:
712  start = domain.residue_range[0]+domain.pdb_offset
713  if domain.residue_range[1] == 'END':
714  end = len(mol.sequence)
715  else:
716  end = domain.residue_range[1]+domain.pdb_offset
717  domain_res = mol.residue_range(start-1, end-1)
718  print("BuildSystem.add_state: -------- domain %s of "
719  "molecule %s extends from residue %s to "
720  "residue %s "
721  % (domainnumber, molname, start, end))
722  if domain.pdb_file == "BEADS":
723  print("BuildSystem.add_state: -------- domain %s of "
724  "molecule %s represented by BEADS "
725  % (domainnumber, molname))
726  mol.add_representation(
727  domain_res,
728  resolutions=[domain.bead_size],
729  setup_particles_as_densities=(
730  domain.em_residues_per_gaussian != 0),
731  color=domain.color)
732  these_domain_res[domain.get_unique_name()] = \
733  (set(), domain_res)
734  elif domain.pdb_file == "IDEAL_HELIX":
735  print("BuildSystem.add_state: -------- domain %s of "
736  "molecule %s represented by IDEAL_HELIX "
737  % (domainnumber, molname))
738  emper = domain.em_residues_per_gaussian
739  mol.add_representation(
740  domain_res,
741  resolutions=self.resolutions,
742  ideal_helix=True,
743  density_residues_per_component=emper,
744  density_prefix=domain.density_prefix,
745  density_force_compute=self.force_create_gmm_files,
746  color=domain.color)
747  these_domain_res[domain.get_unique_name()] = \
748  (domain_res, set())
749  else:
750  print("BuildSystem.add_state: -------- domain %s of "
751  "molecule %s represented by pdb file %s "
752  % (domainnumber, molname, domain.pdb_file))
753  domain_atomic = mol.add_structure(domain.pdb_file,
754  domain.chain,
755  domain.residue_range,
756  domain.pdb_offset,
757  soft_check=True)
758  domain_non_atomic = domain_res - domain_atomic
759  if not domain.em_residues_per_gaussian:
760  mol.add_representation(
761  domain_atomic, resolutions=self.resolutions,
762  color=domain.color)
763  if len(domain_non_atomic) > 0:
764  mol.add_representation(
765  domain_non_atomic,
766  resolutions=[domain.bead_size],
767  color=domain.color)
768  else:
769  print("BuildSystem.add_state: -------- domain %s "
770  "of molecule %s represented by gaussians "
771  % (domainnumber, molname))
772  emper = domain.em_residues_per_gaussian
773  creategmm = self.force_create_gmm_files
774  mol.add_representation(
775  domain_atomic,
776  resolutions=self.resolutions,
777  density_residues_per_component=emper,
778  density_prefix=domain.density_prefix,
779  density_force_compute=creategmm,
780  color=domain.color)
781  if len(domain_non_atomic) > 0:
782  mol.add_representation(
783  domain_non_atomic,
784  resolutions=[domain.bead_size],
785  setup_particles_as_densities=True,
786  color=domain.color)
787  these_domain_res[domain.get_unique_name()] = (
788  domain_atomic, domain_non_atomic)
789  self._domain_res.append(these_domain_res)
790  self._domains.append(these_domains)
791  print('BuildSystem.add_state: State', len(self.system.states), 'added')
792  return state
793 
794  def get_molecules(self):
795  """Return list of all molecules grouped by state.
796  For each state, it's a dictionary of Molecules where key is the
797  molecule name
798  """
799  return [s.get_molecules() for s in self.system.get_states()]
800 
801  def get_molecule(self, molname, copy_index=0, state_index=0):
802  return self.system.get_states()[state_index].get_molecules()[
803  molname][copy_index]
804 
805  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
806  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
807  """Builds representations and sets up degrees of freedom"""
808  print("BuildSystem.execute_macro: building representations")
809  self.root_hier = self.system.build()
810 
811  print("BuildSystem.execute_macro: setting up degrees of freedom")
812  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
813  for nstate, reader in enumerate(self._readers):
814  rbs = reader.get_rigid_bodies()
815  srbs = reader.get_super_rigid_bodies()
816  csrbs = reader.get_chains_of_super_rigid_bodies()
817 
818  # add rigid bodies
819  domains_in_rbs = set()
820  for rblist in rbs:
821  print("BuildSystem.execute_macro: -------- building rigid "
822  "body %s" % (str(rblist)))
823  all_res = IMP.pmi.tools.OrderedSet()
824  bead_res = IMP.pmi.tools.OrderedSet()
825  for dname in rblist:
826  domain = self._domains[nstate][dname]
827  print("BuildSystem.execute_macro: -------- adding %s"
828  % (str(dname)))
829  all_res |= self._domain_res[nstate][dname][0]
830  bead_res |= self._domain_res[nstate][dname][1]
831  domains_in_rbs.add(dname)
832  all_res |= bead_res
833  print("BuildSystem.execute_macro: -------- creating rigid "
834  "body with max_trans %s max_rot %s "
835  "non_rigid_max_trans %s"
836  % (str(max_rb_trans), str(max_rb_rot),
837  str(max_bead_trans)))
838  self.dof.create_rigid_body(all_res,
839  nonrigid_parts=bead_res,
840  max_trans=max_rb_trans,
841  max_rot=max_rb_rot,
842  nonrigid_max_trans=max_bead_trans,
843  name="RigidBody %s" % dname)
844 
845  # if you have any domains not in an RB, set them as flexible beads
846  for dname, domain in self._domains[nstate].items():
847  if dname not in domains_in_rbs:
848  if domain.pdb_file != "BEADS":
849  warnings.warn(
850  "No rigid bodies set for %s. Residues read from "
851  "the PDB file will not be sampled - only regions "
852  "missing from the PDB will be treated flexibly. "
853  "To sample the entire sequence, use BEADS instead "
854  "of a PDB file name" % dname,
856  self.dof.create_flexible_beads(
857  self._domain_res[nstate][dname][1],
858  max_trans=max_bead_trans)
859 
860  # add super rigid bodies
861  for srblist in srbs:
862  print("BuildSystem.execute_macro: -------- building "
863  "super rigid body %s" % (str(srblist)))
864  all_res = IMP.pmi.tools.OrderedSet()
865  for dname in srblist:
866  print("BuildSystem.execute_macro: -------- adding %s"
867  % (str(dname)))
868  all_res |= self._domain_res[nstate][dname][0]
869  all_res |= self._domain_res[nstate][dname][1]
870 
871  print("BuildSystem.execute_macro: -------- creating super "
872  "rigid body with max_trans %s max_rot %s "
873  % (str(max_srb_trans), str(max_srb_rot)))
874  self.dof.create_super_rigid_body(
875  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
876 
877  # add chains of super rigid bodies
878  for csrblist in csrbs:
879  all_res = IMP.pmi.tools.OrderedSet()
880  for dname in csrblist:
881  all_res |= self._domain_res[nstate][dname][0]
882  all_res |= self._domain_res[nstate][dname][1]
883  all_res = list(all_res)
884  all_res.sort(key=lambda r: r.get_index())
885  self.dof.create_main_chain_mover(all_res)
886  return self.root_hier, self.dof
887 
888 
889 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
890 class AnalysisReplicaExchange0(object):
891  """A macro for running all the basic operations of analysis.
892  Includes clustering, precision analysis, and making ensemble density maps.
893  A number of plots are also supported.
894  """
895  def __init__(self, model,
896  merge_directories=["./"],
897  stat_file_name_suffix="stat",
898  best_pdb_name_suffix="model",
899  do_clean_first=True,
900  do_create_directories=True,
901  global_output_directory="output/",
902  replica_stat_file_suffix="stat_replica",
903  global_analysis_result_directory="./analysis/",
904  test_mode=False):
905  """Constructor.
906  @param model The IMP model
907  @param stat_file_name_suffix
908  @param merge_directories The directories containing output files
909  @param best_pdb_name_suffix
910  @param do_clean_first
911  @param do_create_directories
912  @param global_output_directory Where everything is
913  @param replica_stat_file_suffix
914  @param global_analysis_result_directory
915  @param test_mode If True, nothing is changed on disk
916  """
917 
918  try:
919  from mpi4py import MPI
920  self.comm = MPI.COMM_WORLD
921  self.rank = self.comm.Get_rank()
922  self.number_of_processes = self.comm.size
923  except ImportError:
924  self.rank = 0
925  self.number_of_processes = 1
926 
927  self.test_mode = test_mode
928  self._protocol_output = []
929  self.cluster_obj = None
930  self.model = model
931  stat_dir = global_output_directory
932  self.stat_files = []
933  # it contains the position of the root directories
934  for rd in merge_directories:
935  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
936  if len(stat_files) == 0:
937  warnings.warn("no stat files found in %s"
938  % os.path.join(rd, stat_dir),
940  self.stat_files += stat_files
941 
942  def add_protocol_output(self, p):
943  """Capture details of the modeling protocol.
944  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
945  """
946  # Assume last state is the one we're interested in
947  self._protocol_output.append((p, p._last_state))
948 
949  def get_modeling_trajectory(self,
950  score_key="Total_Score",
951  rmf_file_key="rmf_file",
952  rmf_file_frame_key="rmf_frame_index",
953  outputdir="./",
954  get_every=1,
955  nframes_trajectory=10000):
956  """ Get a trajectory of the modeling run, for generating
957  demonstrative movies
958 
959  @param score_key The score for ranking models
960  @param rmf_file_key Key pointing to RMF filename
961  @param rmf_file_frame_key Key pointing to RMF frame number
962  @param outputdir The local output directory used in the run
963  @param get_every Extract every nth frame
964  @param nframes_trajectory Total number of frames of the trajectory
965  """
966  import math
967 
968  trajectory_models = IMP.pmi.io.get_trajectory_models(
969  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
970  get_every)
971  score_list = list(map(float, trajectory_models[2]))
972 
973  max_score = max(score_list)
974  min_score = min(score_list)
975 
976  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
977  for i in range(nframes_trajectory)]
978  binned_scores = [None]*nframes_trajectory
979  binned_model_indexes = [-1]*nframes_trajectory
980 
981  for model_index, s in enumerate(score_list):
982  bins_score_diffs = [abs(s-b) for b in bins]
983  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
984  if binned_scores[bin_index] is None:
985  binned_scores[bin_index] = s
986  binned_model_indexes[bin_index] = model_index
987  else:
988  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
989  new_diff = abs(s-bins[bin_index])
990  if new_diff < old_diff:
991  binned_scores[bin_index] = s
992  binned_model_indexes[bin_index] = model_index
993 
994  print(binned_scores)
995  print(binned_model_indexes)
996 
997  def _expand_ambiguity(self, prot, d):
998  """If using PMI2, expand the dictionary to include copies as
999  ambiguous options
1000 
1001  This also keeps the states separate.
1002  """
1003  newdict = {}
1004  for key in d:
1005  val = d[key]
1006  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1007  newdict[key] = val
1008  continue
1009  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1010  if isinstance(val, tuple):
1011  start = val[0]
1012  stop = val[1]
1013  name = val[2]
1014  else:
1015  start = 1
1016  stop = -1
1017  name = val
1018  for nst in range(len(states)):
1019  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1020  copies = sel.get_selected_particles(with_representation=False)
1021  if len(copies) > 1:
1022  for nc in range(len(copies)):
1023  if len(states) > 1:
1024  newdict['%s.%i..%i' % (name, nst, nc)] = \
1025  (start, stop, name, nc, nst)
1026  else:
1027  newdict['%s..%i' % (name, nc)] = \
1028  (start, stop, name, nc, nst)
1029  else:
1030  newdict[key] = val
1031  return newdict
1032 
1033  def clustering(self,
1034  score_key="Total_Score",
1035  rmf_file_key="rmf_file",
1036  rmf_file_frame_key="rmf_frame_index",
1037  state_number=0,
1038  prefiltervalue=None,
1039  feature_keys=[],
1040  outputdir="./",
1041  alignment_components=None,
1042  number_of_best_scoring_models=10,
1043  rmsd_calculation_components=None,
1044  distance_matrix_file='distances.mat',
1045  load_distance_matrix_file=False,
1046  skip_clustering=False,
1047  number_of_clusters=1,
1048  display_plot=False,
1049  exit_after_display=True,
1050  get_every=1,
1051  first_and_last_frames=None,
1052  density_custom_ranges=None,
1053  write_pdb_with_centered_coordinates=False,
1054  voxel_size=5.0):
1055  """Get the best scoring models, compute a distance matrix,
1056  cluster them, and create density maps.
1057 
1058  Tuple format: "molname" just the molecule,
1059  or (start,stop,molname,copy_num(optional),state_num(optional)
1060  Can pass None for copy or state to ignore that field.
1061  If you don't pass a specific copy number
1062 
1063  @param score_key The score for ranking models.
1064  @param rmf_file_key Key pointing to RMF filename
1065  @param rmf_file_frame_key Key pointing to RMF frame number
1066  @param state_number State number to analyze
1067  @param prefiltervalue Only include frames where the
1068  score key is below this value
1069  @param feature_keys Keywords for which you want to
1070  calculate average, medians, etc.
1071  If you pass "Keyname" it'll include everything that matches
1072  "*Keyname*"
1073  @param outputdir The local output directory used in
1074  the run
1075  @param alignment_components Dictionary with keys=groupname,
1076  values are tuples for aligning the structures
1077  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1078  @param number_of_best_scoring_models Num models to keep per run
1079  @param rmsd_calculation_components For calculating RMSD
1080  (same format as alignment_components)
1081  @param distance_matrix_file Where to store/read the
1082  distance matrix
1083  @param load_distance_matrix_file Try to load the distance
1084  matrix file
1085  @param skip_clustering Just extract the best scoring
1086  models and save the pdbs
1087  @param number_of_clusters Number of k-means clusters
1088  @param display_plot Display the distance matrix
1089  @param exit_after_display Exit after displaying distance
1090  matrix
1091  @param get_every Extract every nth frame
1092  @param first_and_last_frames A tuple with the first and last
1093  frames to be analyzed. Values are percentages!
1094  Default: get all frames
1095  @param density_custom_ranges For density calculation
1096  (same format as alignment_components)
1097  @param write_pdb_with_centered_coordinates
1098  @param voxel_size Used for the density output
1099  """
1100  # Track provenance information to be added to each output model
1101  prov = []
1102  self._outputdir = Path(outputdir).absolute()
1103  self._number_of_clusters = number_of_clusters
1104  for p, state in self._protocol_output:
1105  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1106 
1107  if self.test_mode:
1108  return
1109 
1110  if self.rank == 0:
1111  try:
1112  os.mkdir(outputdir)
1113  except: # noqa: E722
1114  pass
1115 
1116  if not load_distance_matrix_file:
1117  if len(self.stat_files) == 0:
1118  print("ERROR: no stat file found in the given path")
1119  return
1120  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1121  self.stat_files, self.number_of_processes)[self.rank]
1122 
1123  # read ahead to check if you need the PMI2 score key instead
1124  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1125  if k in feature_keys:
1126  warnings.warn(
1127  "no need to pass " + k + " to feature_keys.",
1129  feature_keys.remove(k)
1130 
1131  best_models = IMP.pmi.io.get_best_models(
1132  my_stat_files, score_key, feature_keys, rmf_file_key,
1133  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1134  rmf_file_list = best_models[0]
1135  rmf_file_frame_list = best_models[1]
1136  score_list = best_models[2]
1137  feature_keyword_list_dict = best_models[3]
1138 
1139 # ------------------------------------------------------------------------
1140 # collect all the files and scores
1141 # ------------------------------------------------------------------------
1142 
1143  if self.number_of_processes > 1:
1144  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1145  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1146  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1147  rmf_file_frame_list)
1148  for k in feature_keyword_list_dict:
1149  feature_keyword_list_dict[k] = \
1151  feature_keyword_list_dict[k])
1152 
1153  # sort by score and get the best scoring ones
1154  score_rmf_tuples = list(zip(score_list,
1155  rmf_file_list,
1156  rmf_file_frame_list,
1157  list(range(len(score_list)))))
1158 
1159  if density_custom_ranges:
1160  for k in density_custom_ranges:
1161  if not isinstance(density_custom_ranges[k], list):
1162  raise Exception("Density custom ranges: values must "
1163  "be lists of tuples")
1164 
1165  # keep subset of frames if requested
1166  if first_and_last_frames is not None:
1167  nframes = len(score_rmf_tuples)
1168  first_frame = int(first_and_last_frames[0] * nframes)
1169  last_frame = int(first_and_last_frames[1] * nframes)
1170  if last_frame > len(score_rmf_tuples):
1171  last_frame = -1
1172  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1173 
1174  # sort RMFs by the score_key in ascending order, and store the rank
1175  best_score_rmf_tuples = sorted(
1176  score_rmf_tuples,
1177  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1178  best_score_rmf_tuples = [t+(n,) for n, t in
1179  enumerate(best_score_rmf_tuples)]
1180  # Note in the provenance info that we only kept best-scoring models
1181  prov.append(IMP.pmi.io.FilterProvenance(
1182  "Best scoring", 0, number_of_best_scoring_models))
1183  # sort the feature scores in the same way
1184  best_score_feature_keyword_list_dict = defaultdict(list)
1185  for tpl in best_score_rmf_tuples:
1186  index = tpl[3]
1187  for f in feature_keyword_list_dict:
1188  best_score_feature_keyword_list_dict[f].append(
1189  feature_keyword_list_dict[f][index])
1190  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1191  best_score_rmf_tuples,
1192  self.number_of_processes)[self.rank]
1193 
1194  # expand the dictionaries to include ambiguous copies
1195  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1196  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1197  if rmsd_calculation_components is not None:
1198  tmp = self._expand_ambiguity(
1199  prot_ahead, rmsd_calculation_components)
1200  if tmp != rmsd_calculation_components:
1201  print('Detected ambiguity, expand rmsd components to',
1202  tmp)
1203  rmsd_calculation_components = tmp
1204  if alignment_components is not None:
1205  tmp = self._expand_ambiguity(prot_ahead,
1206  alignment_components)
1207  if tmp != alignment_components:
1208  print('Detected ambiguity, expand alignment '
1209  'components to', tmp)
1210  alignment_components = tmp
1211 
1212 # -------------------------------------------------------------
1213 # read the coordinates
1214 # ------------------------------------------------------------
1215  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1216  self.model, my_best_score_rmf_tuples[0],
1217  rmsd_calculation_components, state_number=state_number)
1219  self.model, my_best_score_rmf_tuples, alignment_components,
1220  rmsd_calculation_components, state_number=state_number)
1221 
1222  # note! the coordinates are simply float tuples, NOT decorators,
1223  # NOT Vector3D, NOR particles, because these object cannot be
1224  # serialized. We need serialization
1225  # for the parallel computation based on mpi.
1226 
1227  # dict:key=component name,val=coords per hit
1228  all_coordinates = got_coords[0]
1229 
1230  # same as above, limited to alignment bits
1231  alignment_coordinates = got_coords[1]
1232 
1233  # same as above, limited to RMSD bits
1234  rmsd_coordinates = got_coords[2]
1235 
1236  # dictionary with key=RMF, value=score rank
1237  rmf_file_name_index_dict = got_coords[3]
1238 
1239  # RMF file per hit
1240  all_rmf_file_names = got_coords[4]
1241 
1242 # ------------------------------------------------------------------------
1243 # optionally don't compute distance matrix or cluster, just write top files
1244 # ------------------------------------------------------------------------
1245  if skip_clustering:
1246  if density_custom_ranges:
1247  DensModule = IMP.pmi.analysis.GetModelDensity(
1248  density_custom_ranges, voxel=voxel_size)
1249 
1250  dircluster = os.path.join(outputdir,
1251  "all_models."+str(self.rank))
1252  try:
1253  os.mkdir(outputdir)
1254  except: # noqa: E722
1255  pass
1256  try:
1257  os.mkdir(dircluster)
1258  except: # noqa: E722
1259  pass
1260  clusstat = open(os.path.join(
1261  dircluster, "stat."+str(self.rank)+".out"), "w")
1262  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1263  rmf_name = tpl[1]
1264  rmf_frame_number = tpl[2]
1265  tmp_dict = {}
1266  index = tpl[4]
1267  for key in best_score_feature_keyword_list_dict:
1268  tmp_dict[key] = \
1269  best_score_feature_keyword_list_dict[key][index]
1270 
1271  if cnt == 0:
1272  prots, rs = \
1273  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1274  self.model, rmf_frame_number, rmf_name)
1275  else:
1276  linking_successful = \
1277  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1278  self.model, prots, rs, rmf_frame_number,
1279  rmf_name)
1280  if not linking_successful:
1281  continue
1282 
1283  if not prots:
1284  continue
1285 
1286  states = IMP.atom.get_by_type(
1287  prots[0], IMP.atom.STATE_TYPE)
1288  prot = states[state_number]
1289 
1290  # get transformation aligning coordinates of
1291  # requested tuples to the first RMF file
1292  if cnt == 0:
1293  coords_f1 = alignment_coordinates[cnt]
1294  if cnt > 0:
1295  coords_f2 = alignment_coordinates[cnt]
1296  if coords_f2:
1298  coords_f1, coords_f2)
1299  transformation = Ali.align()[1]
1300  else:
1301  transformation = \
1303 
1304  rbs = set()
1305  for p in IMP.atom.get_leaves(prot):
1306  if not IMP.core.XYZR.get_is_setup(p):
1308  IMP.core.XYZR(p).set_radius(0.0001)
1309  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1310 
1312  rbm = IMP.core.RigidBodyMember(p)
1313  rb = rbm.get_rigid_body()
1314  rbs.add(rb)
1315  else:
1317  transformation)
1318  for rb in rbs:
1319  IMP.core.transform(rb, transformation)
1320 
1321  o = IMP.pmi.output.Output()
1322  self.model.update()
1323  out_pdb_fn = os.path.join(
1324  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1325  out_rmf_fn = os.path.join(
1326  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1327  o.init_pdb(out_pdb_fn, prot)
1328  tc = write_pdb_with_centered_coordinates
1329  o.write_pdb(out_pdb_fn,
1330  translate_to_geometric_center=tc)
1331 
1332  tmp_dict["local_pdb_file_name"] = \
1333  os.path.basename(out_pdb_fn)
1334  tmp_dict["rmf_file_full_path"] = rmf_name
1335  tmp_dict["local_rmf_file_name"] = \
1336  os.path.basename(out_rmf_fn)
1337  tmp_dict["local_rmf_frame_number"] = 0
1338 
1339  clusstat.write(str(tmp_dict)+"\n")
1340 
1341  # create a single-state System and write that
1343  IMP.Particle(self.model))
1344  h.set_name("System")
1345  h.add_child(prot)
1346  o.init_rmf(out_rmf_fn, [h], rs)
1347 
1348  o.write_rmf(out_rmf_fn)
1349  o.close_rmf(out_rmf_fn)
1350  # add the density
1351  if density_custom_ranges:
1352  DensModule.add_subunits_density(prot)
1353 
1354  if density_custom_ranges:
1355  DensModule.write_mrc(path=dircluster)
1356  del DensModule
1357  return
1358 
1359  # broadcast the coordinates
1360  if self.number_of_processes > 1:
1361  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1362  all_coordinates)
1363  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1364  all_rmf_file_names)
1365  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1366  rmf_file_name_index_dict)
1367  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1368  alignment_coordinates)
1369  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1370  rmsd_coordinates)
1371 
1372  if self.rank == 0:
1373  # save needed information in external files
1374  self.save_objects(
1375  [best_score_feature_keyword_list_dict,
1376  rmf_file_name_index_dict],
1377  ".macro.pkl")
1378 
1379 # ------------------------------------------------------------------------
1380 # Calculate distance matrix and cluster
1381 # ------------------------------------------------------------------------
1382  print("setup clustering class")
1383  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1384 
1385  for n, model_coordinate_dict in enumerate(all_coordinates):
1386  # let's try to align
1387  if (alignment_components is not None
1388  and len(self.cluster_obj.all_coords) == 0):
1389  # set the first model as template coordinates
1390  self.cluster_obj.set_template(alignment_coordinates[n])
1391  self.cluster_obj.fill(all_rmf_file_names[n],
1392  rmsd_coordinates[n])
1393  print("Global calculating the distance matrix")
1394 
1395  # calculate distance matrix, all against all
1396  self.cluster_obj.dist_matrix()
1397 
1398  # perform clustering and optionally display
1399  if self.rank == 0:
1400  self.cluster_obj.do_cluster(number_of_clusters)
1401  if display_plot:
1402  if self.rank == 0:
1403  self.cluster_obj.plot_matrix(
1404  figurename=os.path.join(outputdir,
1405  'dist_matrix.pdf'))
1406  if exit_after_display:
1407  exit()
1408  self.cluster_obj.save_distance_matrix_file(
1409  file_name=distance_matrix_file)
1410 
1411 # ------------------------------------------------------------------------
1412 # Alternatively, load the distance matrix from file and cluster that
1413 # ------------------------------------------------------------------------
1414  else:
1415  if self.rank == 0:
1416  print("setup clustering class")
1417  self.cluster_obj = IMP.pmi.analysis.Clustering()
1418  self.cluster_obj.load_distance_matrix_file(
1419  file_name=distance_matrix_file)
1420  print("clustering with %s clusters" % str(number_of_clusters))
1421  self.cluster_obj.do_cluster(number_of_clusters)
1422  [best_score_feature_keyword_list_dict,
1423  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1424  if display_plot:
1425  if self.rank == 0:
1426  self.cluster_obj.plot_matrix(figurename=os.path.join(
1427  outputdir, 'dist_matrix.pdf'))
1428  if exit_after_display:
1429  exit()
1430  if self.number_of_processes > 1:
1431  self.comm.Barrier()
1432 
1433 # ------------------------------------------------------------------------
1434 # now save all information about the clusters
1435 # ------------------------------------------------------------------------
1436 
1437  if self.rank == 0:
1438  print(self.cluster_obj.get_cluster_labels())
1439  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1440  print("rank %s " % str(self.rank))
1441  print("cluster %s " % str(n))
1442  print("cluster label %s " % str(cl))
1443  print(self.cluster_obj.get_cluster_label_names(cl))
1444  cluster_size = \
1445  len(self.cluster_obj.get_cluster_label_names(cl))
1446  cluster_prov = \
1447  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1448 
1449  # first initialize the Density class if requested
1450  if density_custom_ranges:
1451  DensModule = IMP.pmi.analysis.GetModelDensity(
1452  density_custom_ranges,
1453  voxel=voxel_size)
1454 
1455  dircluster = outputdir + "/cluster." + str(n) + "/"
1456  try:
1457  os.mkdir(dircluster)
1458  except: # noqa: E722
1459  pass
1460 
1461  rmsd_dict = {
1462  "AVERAGE_RMSD":
1463  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1464  clusstat = open(dircluster + "stat.out", "w")
1465  for k, structure_name in enumerate(
1466  self.cluster_obj.get_cluster_label_names(cl)):
1467  # extract the features
1468  tmp_dict = {}
1469  tmp_dict.update(rmsd_dict)
1470  index = rmf_file_name_index_dict[structure_name]
1471  for key in best_score_feature_keyword_list_dict:
1472  tmp_dict[
1473  key] = best_score_feature_keyword_list_dict[
1474  key][
1475  index]
1476 
1477  # get the rmf name and the frame number from the list of
1478  # frame names
1479  rmf_name = structure_name.split("|")[0]
1480  rmf_frame_number = int(structure_name.split("|")[1])
1481  clusstat.write(str(tmp_dict) + "\n")
1482 
1483  # extract frame (open or link to existing)
1484  if k == 0:
1485  prots, rs = \
1486  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1487  self.model, rmf_frame_number, rmf_name)
1488  else:
1489  linking_successful = \
1490  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1491  self.model, prots, rs, rmf_frame_number,
1492  rmf_name)
1493  if not linking_successful:
1494  continue
1495  if not prots:
1496  continue
1497 
1498  states = IMP.atom.get_by_type(
1499  prots[0], IMP.atom.STATE_TYPE)
1500  prot = states[state_number]
1501  if k == 0:
1502  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1503 
1504  # transform clusters onto first
1505  if k > 0:
1506  co = self.cluster_obj
1507  model_index = co.get_model_index_from_name(
1508  structure_name)
1509  transformation = co.get_transformation_to_first_member(
1510  cl, model_index)
1511  rbs = set()
1512  for p in IMP.atom.get_leaves(prot):
1513  if not IMP.core.XYZR.get_is_setup(p):
1515  IMP.core.XYZR(p).set_radius(0.0001)
1516  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1517 
1519  rbm = IMP.core.RigidBodyMember(p)
1520  rb = rbm.get_rigid_body()
1521  rbs.add(rb)
1522  else:
1524  transformation)
1525  for rb in rbs:
1526  IMP.core.transform(rb, transformation)
1527 
1528  # add the density
1529  if density_custom_ranges:
1530  DensModule.add_subunits_density(prot)
1531 
1532  # pdb writing should be optimized!
1533  o = IMP.pmi.output.Output()
1534  self.model.update()
1535  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1536  o.write_pdb(dircluster + str(k) + ".pdb")
1537 
1538  # create a single-state System and write that
1540  IMP.Particle(self.model))
1541  h.set_name("System")
1542  h.add_child(prot)
1543  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1544  o.write_rmf(dircluster + str(k) + ".rmf3")
1545  o.close_rmf(dircluster + str(k) + ".rmf3")
1546 
1547  del o
1548  # IMP.atom.destroy(prot)
1549 
1550  if density_custom_ranges:
1551  DensModule.write_mrc(path=dircluster)
1552  del DensModule
1553 
1554  if self.number_of_processes > 1:
1555  self.comm.Barrier()
1556 
1557  def get_cluster_rmsd(self, cluster_num):
1558  if self.cluster_obj is None:
1559  raise Exception("Run clustering first")
1560  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1561 
1562  def save_objects(self, objects, file_name):
1563  import pickle
1564  outf = open(file_name, 'wb')
1565  pickle.dump(objects, outf)
1566  outf.close()
1567 
1568  def load_objects(self, file_name):
1569  import pickle
1570  inputf = open(file_name, 'rb')
1571  objects = pickle.load(inputf)
1572  inputf.close()
1573  return objects
1574 
1575 
1577 
1578  """
1579  This class contains analysis utilities to investigate ReplicaExchange
1580  results.
1581  """
1582 
1583  ########################
1584  # Construction and Setup
1585  ########################
1586 
1587  def __init__(self, model, stat_files, best_models=None, score_key=None,
1588  alignment=True):
1589  """
1590  Construction of the Class.
1591  @param model IMP.Model()
1592  @param stat_files list of string. Can be ascii stat files,
1593  rmf files names
1594  @param best_models Integer. Number of best scoring models,
1595  if None: all models will be read
1596  @param alignment boolean (Default=True). Align before computing
1597  the rmsd.
1598  """
1599 
1600  self.model = model
1601  self.best_models = best_models
1603  model, stat_files, self.best_models, score_key, cache=True)
1605  StatHierarchyHandler=self.stath0)
1606 
1607  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1609  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1611  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1612  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1613  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1614  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1615  self.clusters = []
1616  # fill the cluster list with a single cluster containing all models
1617  c = IMP.pmi.output.Cluster(0)
1618  self.clusters.append(c)
1619  for n0 in range(len(self.stath0)):
1620  c.add_member(n0)
1621  self.pairwise_rmsd = {}
1622  self.pairwise_molecular_assignment = {}
1623  self.alignment = alignment
1624  self.symmetric_molecules = {}
1625  self.issymmetricsel = {}
1626  self.update_seldicts()
1627  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1628  IMP.atom.get_leaves(self.stath0))
1629  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1630  IMP.atom.get_leaves(self.stath1))
1631 
1632  def set_rmsd_selection(self, **kwargs):
1633  """
1634  Setup the selection onto which the rmsd is computed
1635  @param kwargs use IMP.atom.Selection keywords
1636  """
1637  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1638  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1639  self.update_seldicts()
1640 
1641  def set_symmetric(self, molecule_name):
1642  """
1643  Store names of symmetric molecules
1644  """
1645  self.symmetric_molecules[molecule_name] = 0
1646  self.update_seldicts()
1647 
1648  def set_alignment_selection(self, **kwargs):
1649  """
1650  Setup the selection onto which the alignment is computed
1651  @param kwargs use IMP.atom.Selection keywords
1652  """
1653  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1654  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1655 
1656  ######################
1657  # Clustering functions
1658  ######################
1659  def clean_clusters(self):
1660  for c in self.clusters:
1661  del c
1662  self.clusters = []
1663 
1664  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1665  """
1666  Cluster the models based on RMSD.
1667  @param rmsd_cutoff Float the distance cutoff in Angstrom
1668  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1669  be used to compute rmsds
1670  """
1671  self.clean_clusters()
1672  not_clustered = set(range(len(self.stath1)))
1673  while len(not_clustered) > 0:
1674  self.aggregate(not_clustered, rmsd_cutoff, metric)
1675  self.update_clusters()
1676 
1677  def refine(self, rmsd_cutoff=10):
1678  """
1679  Refine the clusters by merging the ones whose centers are close
1680  @param rmsd_cutoff cutoff distance in Angstorms
1681  """
1682  clusters_copy = self.clusters
1683  for c0, c1 in itertools.combinations(self.clusters, 2):
1684  if c0.center_index is None:
1685  self.compute_cluster_center(c0)
1686  if c1.center_index is None:
1687  self.compute_cluster_center(c1)
1688  _ = self.stath0[c0.center_index]
1689  _ = self.stath1[c1.center_index]
1690  rmsd, molecular_assignment = self.rmsd()
1691  if rmsd <= rmsd_cutoff:
1692  if c1 in self.clusters:
1693  clusters_copy.remove(c1)
1694  c0 += c1
1695  self.clusters = clusters_copy
1696  self.update_clusters()
1697 
1698  ####################
1699  # Input Output
1700  ####################
1701 
1702  def set_cluster_assignments(self, cluster_ids):
1703  if len(cluster_ids) != len(self.stath0):
1704  raise ValueError('cluster ids has to be same length as '
1705  'number of frames')
1706 
1707  self.clusters = []
1708  for i in sorted(list(set(cluster_ids))):
1709  self.clusters.append(IMP.pmi.output.Cluster(i))
1710  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1711  self.clusters[idx].add_member(i, d)
1712 
1713  def get_cluster_data(self, cluster):
1714  """
1715  Return the model data from a cluster
1716  @param cluster IMP.pmi.output.Cluster object
1717  """
1718  data = []
1719  for m in cluster:
1720  data.append(m)
1721  return data
1722 
1723  def save_data(self, filename='data.pkl'):
1724  """
1725  Save the data for the whole models into a pickle file
1726  @param filename string
1727  """
1728  self.stath0.save_data(filename)
1729 
1730  def set_data(self, data):
1731  """
1732  Set the data from an external IMP.pmi.output.Data
1733  @param data IMP.pmi.output.Data
1734  """
1735  self.stath0.data = data
1736  self.stath1.data = data
1737 
1738  def load_data(self, filename='data.pkl'):
1739  """
1740  Load the data from an external pickled file
1741  @param filename string
1742  """
1743  self.stath0.load_data(filename)
1744  self.stath1.load_data(filename)
1745  self.best_models = len(self.stath0)
1746 
1747  def add_cluster(self, rmf_name_list):
1748  c = IMP.pmi.output.Cluster(len(self.clusters))
1749  print("creating cluster index "+str(len(self.clusters)))
1750  self.clusters.append(c)
1751  current_len = len(self.stath0)
1752 
1753  for rmf in rmf_name_list:
1754  print("adding rmf "+rmf)
1755  self.stath0.add_stat_file(rmf)
1756  self.stath1.add_stat_file(rmf)
1757 
1758  for n0 in range(current_len, len(self.stath0)):
1759  d0 = self.stath0[n0]
1760  c.add_member(n0, d0)
1761  self.update_clusters()
1762 
1763  def save_clusters(self, filename='clusters.pkl'):
1764  """
1765  Save the clusters into a pickle file
1766  @param filename string
1767  """
1768  try:
1769  import cPickle as pickle
1770  except ImportError:
1771  import pickle
1772  fl = open(filename, 'wb')
1773  pickle.dump(self.clusters, fl)
1774 
1775  def load_clusters(self, filename='clusters.pkl', append=False):
1776  """
1777  Load the clusters from a pickle file
1778  @param filename string
1779  @param append bool (Default=False), if True. append the clusters
1780  to the ones currently present
1781  """
1782  try:
1783  import cPickle as pickle
1784  except ImportError:
1785  import pickle
1786  fl = open(filename, 'rb')
1787  self.clean_clusters()
1788  if append:
1789  self.clusters += pickle.load(fl)
1790  else:
1791  self.clusters = pickle.load(fl)
1792  self.update_clusters()
1793 
1794  ####################
1795  # Analysis Functions
1796  ####################
1797 
1798  def compute_cluster_center(self, cluster):
1799  """
1800  Compute the cluster center for a given cluster
1801  """
1802  member_distance = defaultdict(float)
1803 
1804  for n0, n1 in itertools.combinations(cluster.members, 2):
1805  _ = self.stath0[n0]
1806  _ = self.stath1[n1]
1807  rmsd, _ = self.rmsd()
1808  member_distance[n0] += rmsd
1809 
1810  if len(member_distance) > 0:
1811  cluster.center_index = min(member_distance,
1812  key=member_distance.get)
1813  else:
1814  cluster.center_index = cluster.members[0]
1815 
1816  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1817  prefix="./"):
1818  """
1819  Save the coordinates of the current cluster a single rmf file
1820  """
1821  print("saving coordinates", cluster)
1822  if self.alignment:
1823  self.set_reference(reference, cluster)
1824  o = IMP.pmi.output.Output()
1825  if rmf_name is None:
1826  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1827 
1828  _ = self.stath1[cluster.members[0]]
1829  self.model.update()
1830  o.init_rmf(rmf_name, [self.stath1])
1831  for n1 in cluster.members:
1832  _ = self.stath1[n1]
1833  self.model.update()
1835  if self.alignment:
1836  self.align()
1837  o.write_rmf(rmf_name)
1839  o.close_rmf(rmf_name)
1840 
1841  def prune_redundant_structures(self, rmsd_cutoff=10):
1842  """
1843  remove structures that are similar
1844  append it to a new cluster
1845  """
1846  print("pruning models")
1847  selected = 0
1848  filtered = [selected]
1849  remaining = range(1, len(self.stath1), 10)
1850 
1851  while len(remaining) > 0:
1852  d0 = self.stath0[selected]
1853  rm = []
1854  for n1 in remaining:
1855  _ = self.stath1[n1]
1856  if self.alignment:
1857  self.align()
1858  d, _ = self.rmsd()
1859  if d <= rmsd_cutoff:
1860  rm.append(n1)
1861  print("pruning model %s, similar to model %s, rmsd %s"
1862  % (str(n1), str(selected), str(d)))
1863  remaining = [x for x in remaining if x not in rm]
1864  if len(remaining) == 0:
1865  break
1866  selected = remaining[0]
1867  filtered.append(selected)
1868  remaining.pop(0)
1869  c = IMP.pmi.output.Cluster(len(self.clusters))
1870  self.clusters.append(c)
1871  for n0 in filtered:
1872  d0 = self.stath0[n0]
1873  c.add_member(n0, d0)
1874  self.update_clusters()
1875 
1876  def precision(self, cluster):
1877  """
1878  Compute the precision of a cluster
1879  """
1880  npairs = 0
1881  rmsd = 0.0
1882  precision = None
1883 
1884  if cluster.center_index is not None:
1885  members1 = [cluster.center_index]
1886  else:
1887  members1 = cluster.members
1888 
1889  for n0 in members1:
1890  _ = self.stath0[n0]
1891  for n1 in cluster.members:
1892  if n0 != n1:
1893  npairs += 1
1894  _ = self.stath1[n1]
1896  tmp_rmsd, _ = self.rmsd()
1897  rmsd += tmp_rmsd
1899 
1900  if npairs > 0:
1901  precision = rmsd/npairs
1902  cluster.precision = precision
1903  return precision
1904 
1905  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1906  """
1907  Compute the bipartite precision (ie the cross-precision)
1908  between two clusters
1909  """
1910  npairs = 0
1911  rmsd = 0.0
1912  for cn0, n0 in enumerate(cluster1.members):
1913  _ = self.stath0[n0]
1914  for cn1, n1 in enumerate(cluster2.members):
1915  _ = self.stath1[n1]
1916  tmp_rmsd, _ = self.rmsd()
1917  if verbose:
1918  print("--- rmsd between structure %s and structure "
1919  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1920  rmsd += tmp_rmsd
1921  npairs += 1
1922  precision = rmsd/npairs
1923  return precision
1924 
1925  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1926  cluster_ref=None, step=1):
1927  """
1928  Compute the Root mean square fluctuations
1929  of a molecule in a cluster
1930  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1931  residue indexes and the value is the rmsf
1932  """
1933  rmsf = IMP.pmi.tools.OrderedDict()
1934 
1935  # assumes that residue indexes are identical for stath0 and stath1
1936  if cluster_ref is not None:
1937  if cluster_ref.center_index is not None:
1938  members0 = [cluster_ref.center_index]
1939  else:
1940  members0 = cluster_ref.members
1941  else:
1942  if cluster.center_index is not None:
1943  members0 = [cluster.center_index]
1944  else:
1945  members0 = cluster.members
1946 
1947  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1948  copy_index=copy_index, state_index=state_index)
1949  ps0 = s0.get_selected_particles()
1950  # get the residue indexes
1951  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1952  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1953 
1954  # get the corresponding particles
1955  npairs = 0
1956  for n0 in members0:
1957  d0 = self.stath0[n0]
1958  for n1 in cluster.members[::step]:
1959  if n0 != n1:
1960  print("--- rmsf %s %s" % (str(n0), str(n1)))
1962 
1963  s1 = IMP.atom.Selection(
1964  self.stath1, molecule=molecule,
1965  residue_indexes=residue_indexes, resolution=1,
1966  copy_index=copy_index, state_index=state_index)
1967  ps1 = s1.get_selected_particles()
1968 
1969  d1 = self.stath1[n1]
1970  if self.alignment:
1971  self.align()
1972  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
1973  r = residue_indexes[n]
1974  d0 = IMP.core.XYZ(p0)
1975  d1 = IMP.core.XYZ(p1)
1976  if r in rmsf:
1977  rmsf[r] += IMP.core.get_distance(d0, d1)
1978  else:
1979  rmsf[r] = IMP.core.get_distance(d0, d1)
1980  npairs += 1
1982  for r in rmsf:
1983  rmsf[r] /= npairs
1984 
1985  for stath in [self.stath0, self.stath1]:
1986  if molecule not in self.symmetric_molecules:
1987  s = IMP.atom.Selection(
1988  stath, molecule=molecule, residue_index=r,
1989  resolution=1, copy_index=copy_index,
1990  state_index=state_index)
1991  else:
1992  s = IMP.atom.Selection(
1993  stath, molecule=molecule, residue_index=r,
1994  resolution=1, state_index=state_index)
1995 
1996  ps = s.get_selected_particles()
1997  for p in ps:
1999  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2000  else:
2002 
2003  return rmsf
2004 
2005  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2006  reference="Absolute", prefix="./", step=1):
2007  if self.alignment:
2008  self.set_reference(reference, cluster)
2009  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2010  voxel=voxel_size)
2011 
2012  for n1 in cluster.members[::step]:
2013  print("density "+str(n1))
2014  _ = self.stath1[n1]
2016  if self.alignment:
2017  self.align()
2018  dens.add_subunits_density(self.stath1)
2020  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2021  del dens
2022 
2023  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2024  consolidate=False, molecules=None, prefix='./',
2025  reference="Absolute"):
2026  if self.alignment:
2027  self.set_reference(reference, cluster)
2028  import numpy as np
2029  import matplotlib.pyplot as plt
2030  import matplotlib.cm as cm
2031  from scipy.spatial.distance import cdist
2032  import IMP.pmi.topology
2033  if molecules is None:
2035  for mol in IMP.pmi.tools.get_molecules(
2036  IMP.atom.get_leaves(self.stath1))]
2037  else:
2039  for mol in IMP.pmi.tools.get_molecules(
2041  self.stath1,
2042  molecules=molecules).get_selected_particles())]
2043  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2044  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2045  total_len_unique = sum(max(mol.get_residue_indexes())
2046  for mol in unique_copies)
2047 
2048  index_dict = {}
2049  prev_stop = 0
2050 
2051  if not consolidate:
2052  for mol in mols:
2053  seqlen = max(mol.get_residue_indexes())
2054  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2055  prev_stop += seqlen
2056 
2057  else:
2058  for mol in unique_copies:
2059  seqlen = max(mol.get_residue_indexes())
2060  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2061  prev_stop += seqlen
2062 
2063  for ncl, n1 in enumerate(cluster.members):
2064  print(ncl)
2065  _ = self.stath1[n1]
2066  coord_dict = IMP.pmi.tools.OrderedDict()
2067  for mol in mols:
2068  rindexes = mol.get_residue_indexes()
2069  coords = np.ones((max(rindexes), 3))
2070  for rnum in rindexes:
2071  sel = IMP.atom.Selection(mol, residue_index=rnum,
2072  resolution=1)
2073  selpart = sel.get_selected_particles()
2074  if len(selpart) == 0:
2075  continue
2076  selpart = selpart[0]
2077  coords[rnum - 1, :] = \
2078  IMP.core.XYZ(selpart).get_coordinates()
2079  coord_dict[mol] = coords
2080 
2081  if not consolidate:
2082  coords = np.concatenate(list(coord_dict.values()))
2083  dists = cdist(coords, coords)
2084  binary_dists = np.where((dists <= contact_threshold)
2085  & (dists >= 1.0), 1.0, 0.0)
2086  else:
2087  binary_dists_dict = {}
2088  for mol1 in mols:
2089  len1 = max(mol1.get_residue_indexes())
2090  for mol2 in mols:
2091  name1 = mol1.get_name()
2092  name2 = mol2.get_name()
2093  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2094  if (name1, name2) not in binary_dists_dict:
2095  binary_dists_dict[(name1, name2)] = \
2096  np.zeros((len1, len1))
2097  binary_dists_dict[(name1, name2)] += \
2098  np.where((dists <= contact_threshold)
2099  & (dists >= 1.0), 1.0, 0.0)
2100  binary_dists = np.zeros((total_len_unique, total_len_unique))
2101 
2102  for name1, name2 in binary_dists_dict:
2103  r1 = index_dict[mol_names_unique[name1]]
2104  r2 = index_dict[mol_names_unique[name2]]
2105  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2106  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2107  1.0, 0.0)
2108 
2109  if ncl == 0:
2110  dist_maps = [dists]
2111  av_dist_map = dists
2112  contact_freqs = binary_dists
2113  else:
2114  dist_maps.append(dists)
2115  av_dist_map += dists
2116  contact_freqs += binary_dists
2117 
2118  if log_scale:
2119  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2120  else:
2121  contact_freqs = 1.0/len(cluster)*contact_freqs
2122  av_dist_map = 1.0/len(cluster)*contact_freqs
2123 
2124  fig = plt.figure(figsize=(100, 100))
2125  ax = fig.add_subplot(111)
2126  ax.set_xticks([])
2127  ax.set_yticks([])
2128  gap_between_components = 50
2129  colormap = cm.Blues
2130  colornorm = None
2131 
2132  if not consolidate:
2133  sorted_tuple = sorted(
2135  mol).get_extended_name(), mol) for mol in mols)
2136  prot_list = list(zip(*sorted_tuple))[1]
2137  else:
2138  sorted_tuple = sorted(
2139  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2140  for mol in unique_copies)
2141  prot_list = list(zip(*sorted_tuple))[1]
2142 
2143  prot_listx = prot_list
2144  nresx = gap_between_components + \
2145  sum([max(mol.get_residue_indexes())
2146  + gap_between_components for mol in prot_listx])
2147 
2148  # set the list of proteins on the y axis
2149  prot_listy = prot_list
2150  nresy = gap_between_components + \
2151  sum([max(mol.get_residue_indexes())
2152  + gap_between_components for mol in prot_listy])
2153 
2154  # this is the residue offset for each protein
2155  resoffsetx = {}
2156  resendx = {}
2157  res = gap_between_components
2158  for mol in prot_listx:
2159  resoffsetx[mol] = res
2160  res += max(mol.get_residue_indexes())
2161  resendx[mol] = res
2162  res += gap_between_components
2163 
2164  resoffsety = {}
2165  resendy = {}
2166  res = gap_between_components
2167  for mol in prot_listy:
2168  resoffsety[mol] = res
2169  res += max(mol.get_residue_indexes())
2170  resendy[mol] = res
2171  res += gap_between_components
2172 
2173  resoffsetdiagonal = {}
2174  res = gap_between_components
2175  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2176  resoffsetdiagonal[mol] = res
2177  res += max(mol.get_residue_indexes())
2178  res += gap_between_components
2179 
2180  # plot protein boundaries
2181  xticks = []
2182  xlabels = []
2183  for n, prot in enumerate(prot_listx):
2184  res = resoffsetx[prot]
2185  end = resendx[prot]
2186  for proty in prot_listy:
2187  resy = resoffsety[proty]
2188  endy = resendy[proty]
2189  ax.plot([res, res], [resy, endy], linestyle='-',
2190  color='gray', lw=0.4)
2191  ax.plot([end, end], [resy, endy], linestyle='-',
2192  color='gray', lw=0.4)
2193  xticks.append((float(res) + float(end)) / 2)
2195  prot).get_extended_name())
2196 
2197  yticks = []
2198  ylabels = []
2199  for n, prot in enumerate(prot_listy):
2200  res = resoffsety[prot]
2201  end = resendy[prot]
2202  for protx in prot_listx:
2203  resx = resoffsetx[protx]
2204  endx = resendx[protx]
2205  ax.plot([resx, endx], [res, res], linestyle='-',
2206  color='gray', lw=0.4)
2207  ax.plot([resx, endx], [end, end], linestyle='-',
2208  color='gray', lw=0.4)
2209  yticks.append((float(res) + float(end)) / 2)
2211  prot).get_extended_name())
2212 
2213  # plot the contact map
2214 
2215  tmp_array = np.zeros((nresx, nresy))
2216  ret = {}
2217  for px in prot_listx:
2218  for py in prot_listy:
2219  resx = resoffsetx[px]
2220  lengx = resendx[px] - 1
2221  resy = resoffsety[py]
2222  lengy = resendy[py] - 1
2223  indexes_x = index_dict[px]
2224  minx = min(indexes_x)
2225  maxx = max(indexes_x)
2226  indexes_y = index_dict[py]
2227  miny = min(indexes_y)
2228  maxy = max(indexes_y)
2229  tmp_array[resx:lengx, resy:lengy] = \
2230  contact_freqs[minx:maxx, miny:maxy]
2231  ret[(px, py)] = np.argwhere(
2232  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2233 
2234  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2235  origin='lower', alpha=0.6, interpolation='nearest')
2236 
2237  ax.set_xticks(xticks)
2238  ax.set_xticklabels(xlabels, rotation=90)
2239  ax.set_yticks(yticks)
2240  ax.set_yticklabels(ylabels)
2241  plt.setp(ax.get_xticklabels(), fontsize=6)
2242  plt.setp(ax.get_yticklabels(), fontsize=6)
2243 
2244  # display and write to file
2245  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2246  [i.set_linewidth(2.0) for i in ax.spines.values()]
2247 
2248  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2249  dpi=300, transparent="False")
2250  return ret
2251 
2252  def plot_rmsd_matrix(self, filename):
2253  self.compute_all_pairwise_rmsd()
2254  distance_matrix = np.zeros(
2255  (len(self.stath0), len(self.stath1)))
2256  for (n0, n1) in self.pairwise_rmsd:
2257  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2258 
2259  import matplotlib as mpl
2260  mpl.use('Agg')
2261  import matplotlib.pylab as pl
2262  from scipy.cluster import hierarchy as hrc
2263 
2264  fig = pl.figure(figsize=(10, 8))
2265  ax = fig.add_subplot(212)
2266  dendrogram = hrc.dendrogram(
2267  hrc.linkage(distance_matrix),
2268  color_threshold=7,
2269  no_labels=True)
2270  leaves_order = dendrogram['leaves']
2271  ax.set_xlabel('Model')
2272  ax.set_ylabel('RMSD [Angstroms]')
2273 
2274  ax2 = fig.add_subplot(221)
2275  cax = ax2.imshow(
2276  distance_matrix[leaves_order, :][:, leaves_order],
2277  interpolation='nearest')
2278  cb = fig.colorbar(cax)
2279  cb.set_label('RMSD [Angstroms]')
2280  ax2.set_xlabel('Model')
2281  ax2.set_ylabel('Model')
2282 
2283  pl.savefig(filename, dpi=300)
2284  pl.close(fig)
2285 
2286  ####################
2287  # Internal Functions
2288  ####################
2289 
2290  def update_clusters(self):
2291  """
2292  Update the cluster id numbers
2293  """
2294  for n, c in enumerate(self.clusters):
2295  c.cluster_id = n
2296 
2297  def get_molecule(self, hier, name, copy):
2298  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2299  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2300 
2301  def update_seldicts(self):
2302  """
2303  Update the seldicts
2304  """
2305  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2306  self.sel0_rmsd.get_selected_particles())
2307  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2308  self.sel1_rmsd.get_selected_particles())
2309  for mol in self.seldict0:
2310  for sel in self.seldict0[mol]:
2311  self.issymmetricsel[sel] = False
2312  for mol in self.symmetric_molecules:
2313  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2314  for sel in self.seldict0[mol]:
2315  self.issymmetricsel[sel] = True
2316 
2317  def align(self):
2319  self.sel1_alignment, self.sel0_alignment)
2320 
2321  for rb in self.rbs1:
2322  IMP.core.transform(rb, tr)
2323 
2324  for bead in self.beads1:
2325  try:
2326  IMP.core.transform(IMP.core.XYZ(bead), tr)
2327  except: # noqa: E722
2328  continue
2329 
2330  self.model.update()
2331 
2332  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2333  '''
2334  initial filling of the clusters.
2335  '''
2336  n0 = idxs.pop()
2337  print("clustering model "+str(n0))
2338  d0 = self.stath0[n0]
2339  c = IMP.pmi.output.Cluster(len(self.clusters))
2340  print("creating cluster index "+str(len(self.clusters)))
2341  self.clusters.append(c)
2342  c.add_member(n0, d0)
2343  clustered = set([n0])
2344  for n1 in idxs:
2345  print("--- trying to add model " + str(n1) + " to cluster "
2346  + str(len(self.clusters)))
2347  d1 = self.stath1[n1]
2348  if self.alignment:
2349  self.align()
2350  rmsd, _ = self.rmsd(metric=metric)
2351  if rmsd < rmsd_cutoff:
2352  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2353  c.add_member(n1, d1)
2354  clustered.add(n1)
2355  else:
2356  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2357  idxs -= clustered
2358 
2359  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2360  """
2361  merge the clusters that have close members
2362  @param rmsd_cutoff cutoff distance in Angstorms
2363  """
2364  # before merging, clusters are spheres of radius rmsd_cutoff
2365  # centered on the 1st element
2366  # here we only try to merge clusters whose centers are closer
2367  # than 2*rmsd_cutoff
2368  to_merge = []
2369  print("merging...")
2370  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2371  itertools.combinations(self.clusters, 2)):
2372  n0, n1 = [c.members[0] for c in (c0, c1)]
2373  _ = self.stath0[n0]
2374  _ = self.stath1[n1]
2375  rmsd, _ = self.rmsd()
2376  if (rmsd < 2*rmsd_cutoff and
2377  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2378  to_merge.append((c0, c1))
2379 
2380  for c0, c in reversed(to_merge):
2381  self.merge(c0, c)
2382 
2383  # keep only full clusters
2384  self.clusters = [c for c in
2385  filter(lambda x: len(x.members) > 0, self.clusters)]
2386 
2387  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2388  '''
2389  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2390  '''
2391  print("check close members for clusters " + str(c0.cluster_id) +
2392  " and " + str(c1.cluster_id))
2393  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2394  _ = self.stath0[n0]
2395  _ = self.stath1[n1]
2396  rmsd, _ = self.rmsd(metric=metric)
2397  if rmsd < rmsd_cutoff:
2398  return True
2399 
2400  return False
2401 
2402  def merge(self, c0, c1):
2403  '''
2404  merge two clusters
2405  '''
2406  c0 += c1
2407  c1.members = []
2408  c1.data = {}
2409 
2410  def rmsd_helper(self, sels0, sels1, metric):
2411  '''
2412  a function that returns the permutation best_sel of sels0 that
2413  minimizes metric
2414  '''
2415  best_rmsd2 = float('inf')
2416  best_sel = None
2417  if self.issymmetricsel[sels0[0]]:
2418  # this cases happens when symmetries were defined
2419  N = len(sels0)
2420  for offset in range(N):
2421  sels = [sels0[(offset+i) % N] for i in range(N)]
2422  sel0 = sels[0]
2423  sel1 = sels1[0]
2424  r = metric(sel0, sel1)
2425  rmsd2 = r*r*N
2426  if rmsd2 < best_rmsd2:
2427  best_rmsd2 = rmsd2
2428  best_sel = sels
2429  else:
2430  for sels in itertools.permutations(sels0):
2431  rmsd2 = 0.0
2432  for sel0, sel1 in itertools.takewhile(
2433  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2434  r = metric(sel0, sel1)
2435  rmsd2 += r*r
2436  if rmsd2 < best_rmsd2:
2437  best_rmsd2 = rmsd2
2438  best_sel = sels
2439  return best_sel, best_rmsd2
2440 
2441  def compute_all_pairwise_rmsd(self):
2442  for d0 in self.stath0:
2443  for d1 in self.stath1:
2444  rmsd, _ = self.rmsd()
2445 
2446  def rmsd(self, metric=IMP.atom.get_rmsd):
2447  '''
2448  Computes the RMSD. Resolves ambiguous pairs assignments
2449  '''
2450  # here we memoize the rmsd and molecular assignment so that it's
2451  # not done multiple times
2452  n0 = self.stath0.current_index
2453  n1 = self.stath1.current_index
2454  if ((n0, n1) in self.pairwise_rmsd) \
2455  and ((n0, n1) in self.pairwise_molecular_assignment):
2456  return (self.pairwise_rmsd[(n0, n1)],
2457  self.pairwise_molecular_assignment[(n0, n1)])
2458 
2459  if self.alignment:
2460  self.align()
2461  # if it's not yet memoized
2462  total_rmsd = 0.0
2463  total_N = 0
2464  # this is a dictionary which keys are the molecule names, and values
2465  # are the list of IMP.atom.Selection for all molecules that share
2466  # the molecule name
2467  molecular_assignment = {}
2468  for molname, sels0 in self.seldict0.items():
2469  sels_best_order, best_rmsd2 = \
2470  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2471 
2472  Ncoords = len(sels_best_order[0].get_selected_particles())
2473  Ncopies = len(self.seldict1[molname])
2474  total_rmsd += Ncoords*best_rmsd2
2475  total_N += Ncoords*Ncopies
2476 
2477  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2478  p0 = sel0.get_selected_particles()[0]
2479  p1 = sel1.get_selected_particles()[0]
2480  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2481  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2482  c0 = IMP.atom.Copy(m0).get_copy_index()
2483  c1 = IMP.atom.Copy(m1).get_copy_index()
2484  molecular_assignment[(molname, c0)] = (molname, c1)
2485 
2486  total_rmsd = math.sqrt(total_rmsd/total_N)
2487 
2488  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2489  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2490  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2491  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2492  return total_rmsd, molecular_assignment
2493 
2494  def set_reference(self, reference, cluster):
2495  """
2496  Fix the reference structure for structural alignment, rmsd and
2497  chain assignment
2498 
2499  @param reference can be either "Absolute" (cluster center of the
2500  first cluster) or Relative (cluster center of the current
2501  cluster)
2502  """
2503  if reference == "Absolute":
2504  _ = self.stath0[0]
2505  elif reference == "Relative":
2506  if cluster.center_index:
2507  n0 = cluster.center_index
2508  else:
2509  n0 = cluster.members[0]
2510  _ = self.stath0[n0]
2511 
2513  """
2514  compute the molecular assignments between multiple copies
2515  of the same sequence. It changes the Copy index of Molecules
2516  """
2517  _ = self.stath1[n1]
2518  _, molecular_assignment = self.rmsd()
2519  for (m0, c0), (m1, c1) in molecular_assignment.items():
2520  mol0 = self.molcopydict0[m0][c0]
2521  mol1 = self.molcopydict1[m1][c1]
2522  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2523  p1 = IMP.atom.Copy(mol1).get_particle()
2524  p1.set_value(cik0, c0)
2525 
2527  """
2528  Undo the Copy index assignment
2529  """
2530  _ = self.stath1[n1]
2531  _, molecular_assignment = self.rmsd()
2532  for (m0, c0), (m1, c1) in molecular_assignment.items():
2533  mol0 = self.molcopydict0[m0][c0]
2534  mol1 = self.molcopydict1[m1][c1]
2535  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2536  p1 = IMP.atom.Copy(mol1).get_particle()
2537  p1.set_value(cik0, c1)
2538 
2539  ####################
2540  # Container Functions
2541  ####################
2542 
2543  def __repr__(self):
2544  s = "AnalysisReplicaExchange\n"
2545  s += "---- number of clusters %s \n" % str(len(self.clusters))
2546  s += "---- number of models %s \n" % str(len(self.stath0))
2547  return s
2548 
2549  def __getitem__(self, int_slice_adaptor):
2550  if isinstance(int_slice_adaptor, int):
2551  return self.clusters[int_slice_adaptor]
2552  elif isinstance(int_slice_adaptor, slice):
2553  return self.__iter__(int_slice_adaptor)
2554  else:
2555  raise TypeError("Unknown Type")
2556 
2557  def __len__(self):
2558  return len(self.clusters)
2559 
2560  def __iter__(self, slice_key=None):
2561  if slice_key is None:
2562  for i in range(len(self)):
2563  yield self[i]
2564  else:
2565  for i in range(len(self))[slice_key]:
2566  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2446
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: macros.py:2494
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1775
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1078
def precision
Compute the precision of a cluster.
Definition: macros.py:1876
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:889
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:112
A container for models organized into clusters.
Definition: output.py:1510
Sample using molecular dynamics.
Definition: samplers.py:327
def aggregate
initial filling of the clusters.
Definition: macros.py:2332
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634
A macro to help setup and run replica exchange.
Definition: macros.py:68
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1841
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1925
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1175
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1632
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1713
def __init__
Construction of the Class.
Definition: macros.py:1587
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:794
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1730
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2526
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1648
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2410
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1816
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1033
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: macros.py:2512
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1576
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:583
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2359
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:942
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1798
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:953
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:133
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2301
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2290
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:555
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1677
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:203
Collect timing information.
Definition: tools.py:127
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1641
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9522
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2402
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:636
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1257
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1723
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:805
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1905
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:612
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1664
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1763
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2387
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1151
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1738
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:512
Sample using replica exchange.
Definition: samplers.py:432
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27