IMP logo
IMP Reference Guide  2.18.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 try:
18  from pathlib import Path
19 except ImportError: # Use bundled pathlib on Python 2 without pathlib
20  from IMP._compat_pathlib import Path
21 import glob
22 from operator import itemgetter
23 from collections import defaultdict
24 import numpy as np
25 import itertools
26 import warnings
27 import math
28 
29 
30 class _MockMPIValues(object):
31  """Replace samplers.MPI_values when in test mode"""
32  def get_percentile(self, name):
33  return 0.
34 
35 
36 class _RMFRestraints(object):
37  """All restraints that are written out to the RMF file"""
38  def __init__(self, model, user_restraints):
39  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
40  self._user_restraints = user_restraints if user_restraints else []
41 
42  def __len__(self):
43  return (len(self._user_restraints)
44  + self._rmf_rs.get_number_of_restraints())
45 
46  def __bool__(self):
47  return len(self) > 0
48  __nonzero__ = __bool__ # Python 2 compatibility
49 
50  def __getitem__(self, i):
51  class FakePMIWrapper(object):
52  def __init__(self, r):
53  self.r = IMP.RestraintSet.get_from(r)
54 
55  def get_restraint(self):
56  return self.r
57 
58  lenuser = len(self._user_restraints)
59  if 0 <= i < lenuser:
60  return self._user_restraints[i]
61  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62  r = self._rmf_rs.get_restraint(i - lenuser)
63  return FakePMIWrapper(r)
64  else:
65  raise IndexError("Out of range")
66 
67 
68 class ReplicaExchange(object):
69  """A macro to help setup and run replica exchange.
70  Supports Monte Carlo and molecular dynamics.
71  Produces trajectory RMF files, best PDB structures,
72  and output stat files.
73  """
74  def __init__(self, model, root_hier,
75  monte_carlo_sample_objects=None,
76  molecular_dynamics_sample_objects=None,
77  output_objects=[],
78  rmf_output_objects=None,
79  monte_carlo_temperature=1.0,
80  simulated_annealing=False,
81  simulated_annealing_minimum_temperature=1.0,
82  simulated_annealing_maximum_temperature=2.5,
83  simulated_annealing_minimum_temperature_nframes=100,
84  simulated_annealing_maximum_temperature_nframes=100,
85  replica_exchange_minimum_temperature=1.0,
86  replica_exchange_maximum_temperature=2.5,
87  replica_exchange_swap=True,
88  num_sample_rounds=1,
89  number_of_best_scoring_models=500,
90  monte_carlo_steps=10,
91  self_adaptive=False,
92  molecular_dynamics_steps=10,
93  molecular_dynamics_max_time_step=1.0,
94  number_of_frames=1000,
95  save_coordinates_mode="lowest_temperature",
96  nframes_write_coordinates=1,
97  write_initial_rmf=True,
98  initial_rmf_name_suffix="initial",
99  stat_file_name_suffix="stat",
100  best_pdb_name_suffix="model",
101  mmcif=False,
102  do_clean_first=True,
103  do_create_directories=True,
104  global_output_directory="./",
105  rmf_dir="rmfs/",
106  best_pdb_dir="pdbs/",
107  replica_stat_file_suffix="stat_replica",
108  em_object_for_rmf=None,
109  atomistic=False,
110  replica_exchange_object=None,
111  test_mode=False,
112  score_moved=False):
113  """Constructor.
114  @param model The IMP model
115  @param root_hier Top-level (System)hierarchy
116  @param monte_carlo_sample_objects Objects for MC sampling, which
117  should generally be a simple list of Mover objects, e.g.
118  from DegreesOfFreedom.get_movers().
119  @param molecular_dynamics_sample_objects Objects for MD sampling,
120  which should generally be a simple list of particles.
121  @param output_objects A list of structural objects and restraints
122  that will be included in output (ie, statistics "stat"
123  files). Any object that provides a get_output() method
124  can be used here. If None is passed
125  the macro will not write stat files.
126  @param rmf_output_objects A list of structural objects and
127  restraints that will be included in rmf. Any object
128  that provides a get_output() method can be used here.
129  @param monte_carlo_temperature MC temp (may need to be optimized
130  based on post-sampling analysis)
131  @param simulated_annealing If True, perform simulated annealing
132  @param simulated_annealing_minimum_temperature Should generally be
133  the same as monte_carlo_temperature.
134  @param simulated_annealing_minimum_temperature_nframes Number of
135  frames to compute at minimum temperature.
136  @param simulated_annealing_maximum_temperature_nframes Number of
137  frames to compute at
138  temps > simulated_annealing_maximum_temperature.
139  @param replica_exchange_minimum_temperature Low temp for REX; should
140  generally be the same as monte_carlo_temperature.
141  @param replica_exchange_maximum_temperature High temp for REX
142  @param replica_exchange_swap Boolean, enable disable temperature
143  swap (Default=True)
144  @param num_sample_rounds Number of rounds of MC/MD per cycle
145  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146  models to keep around for analysis.
147  @param mmcif If True, write best scoring models in mmCIF format;
148  if False (the default), write in legacy PDB format.
149  @param best_pdb_dir The directory under `global_output_directory`
150  where best-scoring PDB/mmCIF files are written.
151  @param best_pdb_name_suffix Part of the file name for best-scoring
152  PDB/mmCIF files.
153  @param monte_carlo_steps Number of MC steps per round
154  @param self_adaptive self adaptive scheme for Monte Carlo movers
155  @param molecular_dynamics_steps Number of MD steps per round
156  @param molecular_dynamics_max_time_step Max time step for MD
157  @param number_of_frames Number of REX frames to run
158  @param save_coordinates_mode string: how to save coordinates.
159  "lowest_temperature" (default) only the lowest temperatures
160  is saved
161  "25th_score" all replicas whose score is below the 25th
162  percentile
163  "50th_score" all replicas whose score is below the 50th
164  percentile
165  "75th_score" all replicas whose score is below the 75th
166  percentile
167  @param nframes_write_coordinates How often to write the coordinates
168  of a frame
169  @param write_initial_rmf Write the initial configuration
170  @param global_output_directory Folder that will be created to house
171  output.
172  @param test_mode Set to True to avoid writing any files, just test
173  one frame.
174  @param score_moved If True, attempt to speed up Monte Carlo
175  sampling by caching scoring function terms on particles
176  that didn't move.
177  """
178  self.model = model
179  self.vars = {}
180 
181  # add check hierarchy is multistate
182  self.output_objects = output_objects
183  self.rmf_output_objects = rmf_output_objects
184  if (isinstance(root_hier, IMP.atom.Hierarchy)
185  and root_hier.get_name() == 'System'):
186  if self.output_objects is not None:
187  self.output_objects.append(
188  IMP.pmi.io.TotalScoreOutput(self.model))
189  if self.rmf_output_objects is not None:
190  self.rmf_output_objects.append(
191  IMP.pmi.io.TotalScoreOutput(self.model))
192  self.root_hier = root_hier
193  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
194  self.vars["number_of_states"] = len(states)
195  if len(states) > 1:
196  self.root_hiers = states
197  self.is_multi_state = True
198  else:
199  self.root_hier = root_hier
200  self.is_multi_state = False
201  else:
202  raise TypeError("Must provide System hierarchy (root_hier)")
203 
204  self._rmf_restraints = _RMFRestraints(model, None)
205  self.em_object_for_rmf = em_object_for_rmf
206  self.monte_carlo_sample_objects = monte_carlo_sample_objects
207  self.vars["self_adaptive"] = self_adaptive
208  self.molecular_dynamics_sample_objects = \
209  molecular_dynamics_sample_objects
210  self.replica_exchange_object = replica_exchange_object
211  self.molecular_dynamics_max_time_step = \
212  molecular_dynamics_max_time_step
213  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
214  self.vars["replica_exchange_minimum_temperature"] = \
215  replica_exchange_minimum_temperature
216  self.vars["replica_exchange_maximum_temperature"] = \
217  replica_exchange_maximum_temperature
218  self.vars["replica_exchange_swap"] = replica_exchange_swap
219  self.vars["simulated_annealing"] = simulated_annealing
220  self.vars["simulated_annealing_minimum_temperature"] = \
221  simulated_annealing_minimum_temperature
222  self.vars["simulated_annealing_maximum_temperature"] = \
223  simulated_annealing_maximum_temperature
224  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
225  simulated_annealing_minimum_temperature_nframes
226  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
227  simulated_annealing_maximum_temperature_nframes
228 
229  self.vars["num_sample_rounds"] = num_sample_rounds
230  self.vars[
231  "number_of_best_scoring_models"] = number_of_best_scoring_models
232  self.vars["monte_carlo_steps"] = monte_carlo_steps
233  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
234  self.vars["number_of_frames"] = number_of_frames
235  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
236  "50th_score", "75th_score"):
237  raise Exception("save_coordinates_mode has unrecognized value")
238  else:
239  self.vars["save_coordinates_mode"] = save_coordinates_mode
240  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
241  self.vars["write_initial_rmf"] = write_initial_rmf
242  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
243  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
244  self.vars["mmcif"] = mmcif
245  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
246  self.vars["do_clean_first"] = do_clean_first
247  self.vars["do_create_directories"] = do_create_directories
248  self.vars["global_output_directory"] = global_output_directory
249  self.vars["rmf_dir"] = rmf_dir
250  self.vars["best_pdb_dir"] = best_pdb_dir
251  self.vars["atomistic"] = atomistic
252  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
253  self.vars["geometries"] = None
254  self.test_mode = test_mode
255  self.score_moved = score_moved
256 
257  def add_geometries(self, geometries):
258  if self.vars["geometries"] is None:
259  self.vars["geometries"] = list(geometries)
260  else:
261  self.vars["geometries"].extend(geometries)
262 
263  def show_info(self):
264  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
265  "rmfs/*.rmf3 for each replica ")
266  print("--- it stores the best scoring pdb models in pdbs/")
267  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
268  "lowest temperature")
269  print("--- variables:")
270  keys = list(self.vars.keys())
271  keys.sort()
272  for v in keys:
273  print("------", v.ljust(30), self.vars[v])
274 
275  def get_replica_exchange_object(self):
276  return self.replica_exchange_object
277 
278  def _add_provenance(self, sampler_md, sampler_mc):
279  """Record details about the sampling in the IMP Hierarchies"""
280  iterations = 0
281  if sampler_md:
282  method = "Molecular Dynamics"
283  iterations += self.vars["molecular_dynamics_steps"]
284  if sampler_mc:
285  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
286  iterations += self.vars["monte_carlo_steps"]
287  # If no sampling is actually done, no provenance to write
288  if iterations == 0 or self.vars["number_of_frames"] == 0:
289  return
290  iterations *= self.vars["num_sample_rounds"]
291 
292  pi = self.model.add_particle("sampling")
294  self.model, pi, method, self.vars["number_of_frames"],
295  iterations)
296  p.set_number_of_replicas(
297  self.replica_exchange_object.get_number_of_replicas())
298  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
299  IMP.core.add_provenance(self.model, self.root_hier, p)
300 
301  def execute_macro(self):
302  temp_index_factor = 100000.0
303  samplers = []
304  sampler_mc = None
305  sampler_md = None
306  if self.monte_carlo_sample_objects is not None:
307  print("Setting up MonteCarlo")
308  sampler_mc = IMP.pmi.samplers.MonteCarlo(
309  self.model, self.monte_carlo_sample_objects,
310  self.vars["monte_carlo_temperature"],
311  score_moved=self.score_moved)
312  if self.vars["simulated_annealing"]:
313  tmin = self.vars["simulated_annealing_minimum_temperature"]
314  tmax = self.vars["simulated_annealing_maximum_temperature"]
315  nfmin = self.vars[
316  "simulated_annealing_minimum_temperature_nframes"]
317  nfmax = self.vars[
318  "simulated_annealing_maximum_temperature_nframes"]
319  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
320  if self.vars["self_adaptive"]:
321  sampler_mc.set_self_adaptive(
322  isselfadaptive=self.vars["self_adaptive"])
323  if self.output_objects is not None:
324  self.output_objects.append(sampler_mc)
325  if self.rmf_output_objects is not None:
326  self.rmf_output_objects.append(sampler_mc)
327  samplers.append(sampler_mc)
328 
329  if self.molecular_dynamics_sample_objects is not None:
330  print("Setting up MolecularDynamics")
332  self.model, self.molecular_dynamics_sample_objects,
333  self.vars["monte_carlo_temperature"],
334  maximum_time_step=self.molecular_dynamics_max_time_step)
335  if self.vars["simulated_annealing"]:
336  tmin = self.vars["simulated_annealing_minimum_temperature"]
337  tmax = self.vars["simulated_annealing_maximum_temperature"]
338  nfmin = self.vars[
339  "simulated_annealing_minimum_temperature_nframes"]
340  nfmax = self.vars[
341  "simulated_annealing_maximum_temperature_nframes"]
342  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
343  if self.output_objects is not None:
344  self.output_objects.append(sampler_md)
345  if self.rmf_output_objects is not None:
346  self.rmf_output_objects.append(sampler_md)
347  samplers.append(sampler_md)
348 # -------------------------------------------------------------------------
349 
350  print("Setting up ReplicaExchange")
352  self.model, self.vars["replica_exchange_minimum_temperature"],
353  self.vars["replica_exchange_maximum_temperature"], samplers,
354  replica_exchange_object=self.replica_exchange_object)
355  self.replica_exchange_object = rex.rem
356 
357  myindex = rex.get_my_index()
358  if self.output_objects is not None:
359  self.output_objects.append(rex)
360  if self.rmf_output_objects is not None:
361  self.rmf_output_objects.append(rex)
362  # must reset the minimum temperature due to the
363  # different binary length of rem.get_my_parameter double and python
364  # float
365  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
366 
367 # -------------------------------------------------------------------------
368 
369  globaldir = self.vars["global_output_directory"] + "/"
370  rmf_dir = globaldir + self.vars["rmf_dir"]
371  pdb_dir = globaldir + self.vars["best_pdb_dir"]
372 
373  if not self.test_mode:
374  if self.vars["do_clean_first"]:
375  pass
376 
377  if self.vars["do_create_directories"]:
378 
379  try:
380  os.makedirs(globaldir)
381  except: # noqa: E722
382  pass
383  try:
384  os.makedirs(rmf_dir)
385  except: # noqa: E722
386  pass
387 
388  if not self.is_multi_state:
389  try:
390  os.makedirs(pdb_dir)
391  except: # noqa: E722
392  pass
393  else:
394  for n in range(self.vars["number_of_states"]):
395  try:
396  os.makedirs(pdb_dir + "/" + str(n))
397  except: # noqa: E722
398  pass
399 
400 # -------------------------------------------------------------------------
401 
403  if self.output_objects is not None:
404  self.output_objects.append(sw)
405  if self.rmf_output_objects is not None:
406  self.rmf_output_objects.append(sw)
407 
408  print("Setting up stat file")
409  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
410  low_temp_stat_file = globaldir + \
411  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
412 
413  # Ensure model is updated before saving init files
414  if not self.test_mode:
415  self.model.update()
416 
417  if not self.test_mode:
418  if self.output_objects is not None:
419  output.init_stat2(low_temp_stat_file,
420  self.output_objects,
421  extralabels=["rmf_file", "rmf_frame_index"])
422  else:
423  print("Stat file writing is disabled")
424 
425  if self.rmf_output_objects is not None:
426  print("Stat info being written in the rmf file")
427 
428  print("Setting up replica stat file")
429  replica_stat_file = globaldir + \
430  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
431  if not self.test_mode:
432  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
433 
434  if not self.test_mode:
435  print("Setting up best pdb files")
436  if not self.is_multi_state:
437  if self.vars["number_of_best_scoring_models"] > 0:
438  output.init_pdb_best_scoring(
439  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
440  self.root_hier,
441  self.vars["number_of_best_scoring_models"],
442  replica_exchange=True,
443  mmcif=self.vars['mmcif'],
444  best_score_file=globaldir + "best.scores.rex.py")
445  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
446  output.write_psf(
447  pdb_dir + "/" + "model.psf",
448  pdb_dir + "/" +
449  self.vars["best_pdb_name_suffix"] + pdbext)
450  else:
451  if self.vars["number_of_best_scoring_models"] > 0:
452  for n in range(self.vars["number_of_states"]):
453  output.init_pdb_best_scoring(
454  pdb_dir + "/" + str(n) + "/" +
455  self.vars["best_pdb_name_suffix"],
456  self.root_hiers[n],
457  self.vars["number_of_best_scoring_models"],
458  replica_exchange=True,
459  mmcif=self.vars['mmcif'],
460  best_score_file=globaldir + "best.scores.rex.py")
461  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
462  output.write_psf(
463  pdb_dir + "/" + str(n) + "/" + "model.psf",
464  pdb_dir + "/" + str(n) + "/" +
465  self.vars["best_pdb_name_suffix"] + pdbext)
466 # ---------------------------------------------
467 
468  if self.em_object_for_rmf is not None:
469  output_hierarchies = [
470  self.root_hier,
471  self.em_object_for_rmf.get_density_as_hierarchy(
472  )]
473  else:
474  output_hierarchies = [self.root_hier]
475 
476  if not self.test_mode:
477  print("Setting up and writing initial rmf coordinate file")
478  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
479  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
480  output_hierarchies,
481  listofobjects=self.rmf_output_objects)
482  if self._rmf_restraints:
483  output.add_restraints_to_rmf(
484  init_suffix + "." + str(myindex) + ".rmf3",
485  self._rmf_restraints)
486  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
487  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
488 
489  if not self.test_mode:
490  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
491  else:
492  mpivs = _MockMPIValues()
493 
494  self._add_provenance(sampler_md, sampler_mc)
495 
496  if not self.test_mode:
497  print("Setting up production rmf files")
498  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
499  output.init_rmf(rmfname, output_hierarchies,
500  geometries=self.vars["geometries"],
501  listofobjects=self.rmf_output_objects)
502 
503  if self._rmf_restraints:
504  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
505 
506  ntimes_at_low_temp = 0
507 
508  if myindex == 0:
509  self.show_info()
510  self.replica_exchange_object.set_was_used(True)
511  nframes = self.vars["number_of_frames"]
512  if self.test_mode:
513  nframes = 1
514  for i in range(nframes):
515  if self.test_mode:
516  score = 0.
517  else:
518  for nr in range(self.vars["num_sample_rounds"]):
519  if sampler_md is not None:
520  sampler_md.optimize(
521  self.vars["molecular_dynamics_steps"])
522  if sampler_mc is not None:
523  sampler_mc.optimize(self.vars["monte_carlo_steps"])
525  self.model).evaluate(False)
526  mpivs.set_value("score", score)
527  output.set_output_entry("score", score)
528 
529  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
530 
531  if self.vars["save_coordinates_mode"] == "lowest_temperature":
532  save_frame = (min_temp_index == my_temp_index)
533  elif self.vars["save_coordinates_mode"] == "25th_score":
534  score_perc = mpivs.get_percentile("score")
535  save_frame = (score_perc*100.0 <= 25.0)
536  elif self.vars["save_coordinates_mode"] == "50th_score":
537  score_perc = mpivs.get_percentile("score")
538  save_frame = (score_perc*100.0 <= 50.0)
539  elif self.vars["save_coordinates_mode"] == "75th_score":
540  score_perc = mpivs.get_percentile("score")
541  save_frame = (score_perc*100.0 <= 75.0)
542 
543  # Ensure model is updated before saving output files
544  if save_frame and not self.test_mode:
545  self.model.update()
546 
547  if save_frame:
548  print("--- frame %s score %s " % (str(i), str(score)))
549 
550  if not self.test_mode:
551  if i % self.vars["nframes_write_coordinates"] == 0:
552  print('--- writing coordinates')
553  if self.vars["number_of_best_scoring_models"] > 0:
554  output.write_pdb_best_scoring(score)
555  output.write_rmf(rmfname)
556  output.set_output_entry("rmf_file", rmfname)
557  output.set_output_entry("rmf_frame_index",
558  ntimes_at_low_temp)
559  else:
560  output.set_output_entry("rmf_file", rmfname)
561  output.set_output_entry("rmf_frame_index", '-1')
562  if self.output_objects is not None:
563  output.write_stat2(low_temp_stat_file)
564  ntimes_at_low_temp += 1
565 
566  if not self.test_mode:
567  output.write_stat2(replica_stat_file)
568  if self.vars["replica_exchange_swap"]:
569  rex.swap_temp(i, score)
570  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
571  p.add_replica_exchange(state, self)
572 
573  if not self.test_mode:
574  print("closing production rmf files")
575  output.close_rmf(rmfname)
576 
577 
578 @IMP.deprecated_object("2.18", "Use ReplicaExchange instead")
579 class ReplicaExchange0(ReplicaExchange):
580  pass
581 
582 
583 class BuildSystem(object):
584  """A macro to build a IMP::pmi::topology::System based on a
585  TopologyReader object.
586 
587  Easily create multi-state systems by calling this macro
588  repeatedly with different TopologyReader objects!
589  A useful function is get_molecules() which returns the PMI Molecules
590  grouped by state as a dictionary with key = (molecule name),
591  value = IMP.pmi.topology.Molecule
592  Quick multi-state system:
593  @code{.python}
594  model = IMP.Model()
595  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
596  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
597  bs = IMP.pmi.macros.BuildSystem(model)
598  bs.add_state(reader1)
599  bs.add_state(reader2)
600  bs.execute_macro() # build everything including degrees of freedom
601  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
602  ### now you have a two state system, you add restraints etc
603  @endcode
604  @note The "domain name" entry of the topology reader is not used.
605  All molecules are set up by the component name, but split into rigid bodies
606  as requested.
607  """
608 
609  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
610  'RNA': IMP.pmi.alphabets.rna}
611 
612  def __init__(self, model, sequence_connectivity_scale=4.0,
613  force_create_gmm_files=False, resolutions=[1, 10]):
614  """Constructor
615  @param model An IMP Model
616  @param sequence_connectivity_scale For scaling the connectivity
617  restraint
618  @param force_create_gmm_files If True, will sample and create GMMs
619  no matter what. If False, will only sample if the
620  files don't exist. If number of Gaussians is zero, won't
621  do anything.
622  @param resolutions The resolutions to build for structured regions
623  """
624  self.model = model
625  self.system = IMP.pmi.topology.System(self.model)
626  self._readers = [] # the TopologyReaders (one per state)
627  # TempResidues for each domain key=unique name,
628  # value=(atomic_res,non_atomic_res).
629  self._domain_res = []
630  self._domains = [] # key = domain unique name, value = Component
631  self.force_create_gmm_files = force_create_gmm_files
632  self.resolutions = resolutions
633 
634  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
635  chain_ids=None):
636  """Add a state using the topology info in a
637  IMP::pmi::topology::TopologyReader object.
638  When you are done adding states, call execute_macro()
639  @param reader The TopologyReader object
640  @param fasta_name_map dictionary for converting protein names
641  found in the fasta file
642  @param chain_ids A list or string of chain IDs for assigning to
643  newly-created molecules, e.g.
644  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
645  If not specified, chain IDs A through Z are assigned, then
646  AA through AZ, then BA through BZ, and so on, in the same
647  fashion as PDB.
648  """
649  state = self.system.create_state()
650  self._readers.append(reader)
651  # key is unique name, value is (atomic res, nonatomicres)
652  these_domain_res = {}
653  these_domains = {} # key is unique name, value is _Component
654  if chain_ids is None:
655  chain_ids = IMP.pmi.output._ChainIDs()
656  numchain = 0
657 
658  # setup representation
659  # loop over molecules, copies, then domains
660  for molname in reader.get_molecules():
661  copies = reader.get_molecules()[molname].domains
662  for nc, copyname in enumerate(copies):
663  print("BuildSystem.add_state: setting up molecule %s copy "
664  "number %s" % (molname, str(nc)))
665  copy = copies[copyname]
666  # option to not rename chains
667  if keep_chain_id:
668  all_chains = [c for c in copy if c.chain is not None]
669  if all_chains:
670  chain_id = all_chains[0].chain
671  else:
672  chain_id = chain_ids[numchain]
673  warnings.warn(
674  "No PDBs specified for %s, so keep_chain_id has "
675  "no effect; using default chain ID '%s'"
676  % (molname, chain_id), IMP.pmi.ParameterWarning)
677  else:
678  chain_id = chain_ids[numchain]
679  if nc == 0:
680  alphabet = IMP.pmi.alphabets.amino_acid
681  fasta_flag = copy[0].fasta_flag
682  if fasta_flag in self._alphabets:
683  alphabet = self._alphabets[fasta_flag]
685  copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
686  print("BuildSystem.add_state: molecule %s sequence has "
687  "%s residues" % (molname, len(seq)))
688  orig_mol = state.create_molecule(molname, seq, chain_id,
689  alphabet=alphabet)
690  mol = orig_mol
691  numchain += 1
692  else:
693  print("BuildSystem.add_state: creating a copy for "
694  "molecule %s" % molname)
695  mol = orig_mol.create_copy(chain_id)
696  numchain += 1
697 
698  for domainnumber, domain in enumerate(copy):
699  print("BuildSystem.add_state: ---- setting up domain %s "
700  "of molecule %s" % (domainnumber, molname))
701  # we build everything in the residue range, even if it
702  # extends beyond what's in the actual PDB file
703  these_domains[domain.get_unique_name()] = domain
704  if domain.residue_range == [] or \
705  domain.residue_range is None:
706  domain_res = mol.get_residues()
707  else:
708  start = domain.residue_range[0]+domain.pdb_offset
709  if domain.residue_range[1] == 'END':
710  end = len(mol.sequence)
711  else:
712  end = domain.residue_range[1]+domain.pdb_offset
713  domain_res = mol.residue_range(start-1, end-1)
714  print("BuildSystem.add_state: -------- domain %s of "
715  "molecule %s extends from residue %s to "
716  "residue %s "
717  % (domainnumber, molname, start, end))
718  if domain.pdb_file == "BEADS":
719  print("BuildSystem.add_state: -------- domain %s of "
720  "molecule %s represented by BEADS "
721  % (domainnumber, molname))
722  mol.add_representation(
723  domain_res,
724  resolutions=[domain.bead_size],
725  setup_particles_as_densities=(
726  domain.em_residues_per_gaussian != 0),
727  color=domain.color)
728  these_domain_res[domain.get_unique_name()] = \
729  (set(), domain_res)
730  elif domain.pdb_file == "IDEAL_HELIX":
731  print("BuildSystem.add_state: -------- domain %s of "
732  "molecule %s represented by IDEAL_HELIX "
733  % (domainnumber, molname))
734  emper = domain.em_residues_per_gaussian
735  mol.add_representation(
736  domain_res,
737  resolutions=self.resolutions,
738  ideal_helix=True,
739  density_residues_per_component=emper,
740  density_prefix=domain.density_prefix,
741  density_force_compute=self.force_create_gmm_files,
742  color=domain.color)
743  these_domain_res[domain.get_unique_name()] = \
744  (domain_res, set())
745  else:
746  print("BuildSystem.add_state: -------- domain %s of "
747  "molecule %s represented by pdb file %s "
748  % (domainnumber, molname, domain.pdb_file))
749  domain_atomic = mol.add_structure(domain.pdb_file,
750  domain.chain,
751  domain.residue_range,
752  domain.pdb_offset,
753  soft_check=True)
754  domain_non_atomic = domain_res - domain_atomic
755  if not domain.em_residues_per_gaussian:
756  mol.add_representation(
757  domain_atomic, resolutions=self.resolutions,
758  color=domain.color)
759  if len(domain_non_atomic) > 0:
760  mol.add_representation(
761  domain_non_atomic,
762  resolutions=[domain.bead_size],
763  color=domain.color)
764  else:
765  print("BuildSystem.add_state: -------- domain %s "
766  "of molecule %s represented by gaussians "
767  % (domainnumber, molname))
768  emper = domain.em_residues_per_gaussian
769  creategmm = self.force_create_gmm_files
770  mol.add_representation(
771  domain_atomic,
772  resolutions=self.resolutions,
773  density_residues_per_component=emper,
774  density_prefix=domain.density_prefix,
775  density_force_compute=creategmm,
776  color=domain.color)
777  if len(domain_non_atomic) > 0:
778  mol.add_representation(
779  domain_non_atomic,
780  resolutions=[domain.bead_size],
781  setup_particles_as_densities=True,
782  color=domain.color)
783  these_domain_res[domain.get_unique_name()] = (
784  domain_atomic, domain_non_atomic)
785  self._domain_res.append(these_domain_res)
786  self._domains.append(these_domains)
787  print('BuildSystem.add_state: State', len(self.system.states), 'added')
788 
789  def get_molecules(self):
790  """Return list of all molecules grouped by state.
791  For each state, it's a dictionary of Molecules where key is the
792  molecule name
793  """
794  return [s.get_molecules() for s in self.system.get_states()]
795 
796  def get_molecule(self, molname, copy_index=0, state_index=0):
797  return self.system.get_states()[state_index].get_molecules()[
798  molname][copy_index]
799 
800  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
801  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
802  """Builds representations and sets up degrees of freedom"""
803  print("BuildSystem.execute_macro: building representations")
804  self.root_hier = self.system.build()
805 
806  print("BuildSystem.execute_macro: setting up degrees of freedom")
807  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
808  for nstate, reader in enumerate(self._readers):
809  rbs = reader.get_rigid_bodies()
810  srbs = reader.get_super_rigid_bodies()
811  csrbs = reader.get_chains_of_super_rigid_bodies()
812 
813  # add rigid bodies
814  domains_in_rbs = set()
815  for rblist in rbs:
816  print("BuildSystem.execute_macro: -------- building rigid "
817  "body %s" % (str(rblist)))
818  all_res = IMP.pmi.tools.OrderedSet()
819  bead_res = IMP.pmi.tools.OrderedSet()
820  for dname in rblist:
821  domain = self._domains[nstate][dname]
822  print("BuildSystem.execute_macro: -------- adding %s"
823  % (str(dname)))
824  all_res |= self._domain_res[nstate][dname][0]
825  bead_res |= self._domain_res[nstate][dname][1]
826  domains_in_rbs.add(dname)
827  all_res |= bead_res
828  print("BuildSystem.execute_macro: -------- creating rigid "
829  "body with max_trans %s max_rot %s "
830  "non_rigid_max_trans %s"
831  % (str(max_rb_trans), str(max_rb_rot),
832  str(max_bead_trans)))
833  self.dof.create_rigid_body(all_res,
834  nonrigid_parts=bead_res,
835  max_trans=max_rb_trans,
836  max_rot=max_rb_rot,
837  nonrigid_max_trans=max_bead_trans,
838  name="RigidBody %s" % dname)
839 
840  # if you have any domains not in an RB, set them as flexible beads
841  for dname, domain in self._domains[nstate].items():
842  if dname not in domains_in_rbs:
843  if domain.pdb_file != "BEADS":
844  warnings.warn(
845  "No rigid bodies set for %s. Residues read from "
846  "the PDB file will not be sampled - only regions "
847  "missing from the PDB will be treated flexibly. "
848  "To sample the entire sequence, use BEADS instead "
849  "of a PDB file name" % dname,
851  self.dof.create_flexible_beads(
852  self._domain_res[nstate][dname][1],
853  max_trans=max_bead_trans)
854 
855  # add super rigid bodies
856  for srblist in srbs:
857  print("BuildSystem.execute_macro: -------- building "
858  "super rigid body %s" % (str(srblist)))
859  all_res = IMP.pmi.tools.OrderedSet()
860  for dname in srblist:
861  print("BuildSystem.execute_macro: -------- adding %s"
862  % (str(dname)))
863  all_res |= self._domain_res[nstate][dname][0]
864  all_res |= self._domain_res[nstate][dname][1]
865 
866  print("BuildSystem.execute_macro: -------- creating super "
867  "rigid body with max_trans %s max_rot %s "
868  % (str(max_srb_trans), str(max_srb_rot)))
869  self.dof.create_super_rigid_body(
870  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
871 
872  # add chains of super rigid bodies
873  for csrblist in csrbs:
874  all_res = IMP.pmi.tools.OrderedSet()
875  for dname in csrblist:
876  all_res |= self._domain_res[nstate][dname][0]
877  all_res |= self._domain_res[nstate][dname][1]
878  all_res = list(all_res)
879  all_res.sort(key=lambda r: r.get_index())
880  self.dof.create_main_chain_mover(all_res)
881  return self.root_hier, self.dof
882 
883 
884 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
885 class AnalysisReplicaExchange0(object):
886  """A macro for running all the basic operations of analysis.
887  Includes clustering, precision analysis, and making ensemble density maps.
888  A number of plots are also supported.
889  """
890  def __init__(self, model,
891  merge_directories=["./"],
892  stat_file_name_suffix="stat",
893  best_pdb_name_suffix="model",
894  do_clean_first=True,
895  do_create_directories=True,
896  global_output_directory="output/",
897  replica_stat_file_suffix="stat_replica",
898  global_analysis_result_directory="./analysis/",
899  test_mode=False):
900  """Constructor.
901  @param model The IMP model
902  @param stat_file_name_suffix
903  @param merge_directories The directories containing output files
904  @param best_pdb_name_suffix
905  @param do_clean_first
906  @param do_create_directories
907  @param global_output_directory Where everything is
908  @param replica_stat_file_suffix
909  @param global_analysis_result_directory
910  @param test_mode If True, nothing is changed on disk
911  """
912 
913  try:
914  from mpi4py import MPI
915  self.comm = MPI.COMM_WORLD
916  self.rank = self.comm.Get_rank()
917  self.number_of_processes = self.comm.size
918  except ImportError:
919  self.rank = 0
920  self.number_of_processes = 1
921 
922  self.test_mode = test_mode
923  self._protocol_output = []
924  self.cluster_obj = None
925  self.model = model
926  stat_dir = global_output_directory
927  self.stat_files = []
928  # it contains the position of the root directories
929  for rd in merge_directories:
930  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
931  if len(stat_files) == 0:
932  warnings.warn("no stat files found in %s"
933  % os.path.join(rd, stat_dir),
935  self.stat_files += stat_files
936 
937  def add_protocol_output(self, p):
938  """Capture details of the modeling protocol.
939  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
940  """
941  # Assume last state is the one we're interested in
942  self._protocol_output.append((p, p._last_state))
943 
944  def get_modeling_trajectory(self,
945  score_key="Total_Score",
946  rmf_file_key="rmf_file",
947  rmf_file_frame_key="rmf_frame_index",
948  outputdir="./",
949  get_every=1,
950  nframes_trajectory=10000):
951  """ Get a trajectory of the modeling run, for generating
952  demonstrative movies
953 
954  @param score_key The score for ranking models
955  @param rmf_file_key Key pointing to RMF filename
956  @param rmf_file_frame_key Key pointing to RMF frame number
957  @param outputdir The local output directory used in the run
958  @param get_every Extract every nth frame
959  @param nframes_trajectory Total number of frames of the trajectory
960  """
961  import math
962 
963  trajectory_models = IMP.pmi.io.get_trajectory_models(
964  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
965  get_every)
966  score_list = list(map(float, trajectory_models[2]))
967 
968  max_score = max(score_list)
969  min_score = min(score_list)
970 
971  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
972  for i in range(nframes_trajectory)]
973  binned_scores = [None]*nframes_trajectory
974  binned_model_indexes = [-1]*nframes_trajectory
975 
976  for model_index, s in enumerate(score_list):
977  bins_score_diffs = [abs(s-b) for b in bins]
978  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
979  if binned_scores[bin_index] is None:
980  binned_scores[bin_index] = s
981  binned_model_indexes[bin_index] = model_index
982  else:
983  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
984  new_diff = abs(s-bins[bin_index])
985  if new_diff < old_diff:
986  binned_scores[bin_index] = s
987  binned_model_indexes[bin_index] = model_index
988 
989  print(binned_scores)
990  print(binned_model_indexes)
991 
992  def _expand_ambiguity(self, prot, d):
993  """If using PMI2, expand the dictionary to include copies as
994  ambiguous options
995 
996  This also keeps the states separate.
997  """
998  newdict = {}
999  for key in d:
1000  val = d[key]
1001  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1002  newdict[key] = val
1003  continue
1004  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1005  if isinstance(val, tuple):
1006  start = val[0]
1007  stop = val[1]
1008  name = val[2]
1009  else:
1010  start = 1
1011  stop = -1
1012  name = val
1013  for nst in range(len(states)):
1014  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1015  copies = sel.get_selected_particles(with_representation=False)
1016  if len(copies) > 1:
1017  for nc in range(len(copies)):
1018  if len(states) > 1:
1019  newdict['%s.%i..%i' % (name, nst, nc)] = \
1020  (start, stop, name, nc, nst)
1021  else:
1022  newdict['%s..%i' % (name, nc)] = \
1023  (start, stop, name, nc, nst)
1024  else:
1025  newdict[key] = val
1026  return newdict
1027 
1028  def clustering(self,
1029  score_key="Total_Score",
1030  rmf_file_key="rmf_file",
1031  rmf_file_frame_key="rmf_frame_index",
1032  state_number=0,
1033  prefiltervalue=None,
1034  feature_keys=[],
1035  outputdir="./",
1036  alignment_components=None,
1037  number_of_best_scoring_models=10,
1038  rmsd_calculation_components=None,
1039  distance_matrix_file='distances.mat',
1040  load_distance_matrix_file=False,
1041  skip_clustering=False,
1042  number_of_clusters=1,
1043  display_plot=False,
1044  exit_after_display=True,
1045  get_every=1,
1046  first_and_last_frames=None,
1047  density_custom_ranges=None,
1048  write_pdb_with_centered_coordinates=False,
1049  voxel_size=5.0):
1050  """Get the best scoring models, compute a distance matrix,
1051  cluster them, and create density maps.
1052 
1053  Tuple format: "molname" just the molecule,
1054  or (start,stop,molname,copy_num(optional),state_num(optional)
1055  Can pass None for copy or state to ignore that field.
1056  If you don't pass a specific copy number
1057 
1058  @param score_key The score for ranking models.
1059  @param rmf_file_key Key pointing to RMF filename
1060  @param rmf_file_frame_key Key pointing to RMF frame number
1061  @param state_number State number to analyze
1062  @param prefiltervalue Only include frames where the
1063  score key is below this value
1064  @param feature_keys Keywords for which you want to
1065  calculate average, medians, etc.
1066  If you pass "Keyname" it'll include everything that matches
1067  "*Keyname*"
1068  @param outputdir The local output directory used in
1069  the run
1070  @param alignment_components Dictionary with keys=groupname,
1071  values are tuples for aligning the structures
1072  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1073  @param number_of_best_scoring_models Num models to keep per run
1074  @param rmsd_calculation_components For calculating RMSD
1075  (same format as alignment_components)
1076  @param distance_matrix_file Where to store/read the
1077  distance matrix
1078  @param load_distance_matrix_file Try to load the distance
1079  matrix file
1080  @param skip_clustering Just extract the best scoring
1081  models and save the pdbs
1082  @param number_of_clusters Number of k-means clusters
1083  @param display_plot Display the distance matrix
1084  @param exit_after_display Exit after displaying distance
1085  matrix
1086  @param get_every Extract every nth frame
1087  @param first_and_last_frames A tuple with the first and last
1088  frames to be analyzed. Values are percentages!
1089  Default: get all frames
1090  @param density_custom_ranges For density calculation
1091  (same format as alignment_components)
1092  @param write_pdb_with_centered_coordinates
1093  @param voxel_size Used for the density output
1094  """
1095  # Track provenance information to be added to each output model
1096  prov = []
1097  self._outputdir = Path(outputdir).absolute()
1098  self._number_of_clusters = number_of_clusters
1099  for p, state in self._protocol_output:
1100  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1101 
1102  if self.test_mode:
1103  return
1104 
1105  if self.rank == 0:
1106  try:
1107  os.mkdir(outputdir)
1108  except: # noqa: E722
1109  pass
1110 
1111  if not load_distance_matrix_file:
1112  if len(self.stat_files) == 0:
1113  print("ERROR: no stat file found in the given path")
1114  return
1115  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1116  self.stat_files, self.number_of_processes)[self.rank]
1117 
1118  # read ahead to check if you need the PMI2 score key instead
1119  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1120  if k in feature_keys:
1121  warnings.warn(
1122  "no need to pass " + k + " to feature_keys.",
1124  feature_keys.remove(k)
1125 
1126  best_models = IMP.pmi.io.get_best_models(
1127  my_stat_files, score_key, feature_keys, rmf_file_key,
1128  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1129  rmf_file_list = best_models[0]
1130  rmf_file_frame_list = best_models[1]
1131  score_list = best_models[2]
1132  feature_keyword_list_dict = best_models[3]
1133 
1134 # ------------------------------------------------------------------------
1135 # collect all the files and scores
1136 # ------------------------------------------------------------------------
1137 
1138  if self.number_of_processes > 1:
1139  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1140  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1141  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1142  rmf_file_frame_list)
1143  for k in feature_keyword_list_dict:
1144  feature_keyword_list_dict[k] = \
1146  feature_keyword_list_dict[k])
1147 
1148  # sort by score and get the best scoring ones
1149  score_rmf_tuples = list(zip(score_list,
1150  rmf_file_list,
1151  rmf_file_frame_list,
1152  list(range(len(score_list)))))
1153 
1154  if density_custom_ranges:
1155  for k in density_custom_ranges:
1156  if not isinstance(density_custom_ranges[k], list):
1157  raise Exception("Density custom ranges: values must "
1158  "be lists of tuples")
1159 
1160  # keep subset of frames if requested
1161  if first_and_last_frames is not None:
1162  nframes = len(score_rmf_tuples)
1163  first_frame = int(first_and_last_frames[0] * nframes)
1164  last_frame = int(first_and_last_frames[1] * nframes)
1165  if last_frame > len(score_rmf_tuples):
1166  last_frame = -1
1167  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1168 
1169  # sort RMFs by the score_key in ascending order, and store the rank
1170  best_score_rmf_tuples = sorted(
1171  score_rmf_tuples,
1172  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1173  best_score_rmf_tuples = [t+(n,) for n, t in
1174  enumerate(best_score_rmf_tuples)]
1175  # Note in the provenance info that we only kept best-scoring models
1176  prov.append(IMP.pmi.io.FilterProvenance(
1177  "Best scoring", 0, number_of_best_scoring_models))
1178  # sort the feature scores in the same way
1179  best_score_feature_keyword_list_dict = defaultdict(list)
1180  for tpl in best_score_rmf_tuples:
1181  index = tpl[3]
1182  for f in feature_keyword_list_dict:
1183  best_score_feature_keyword_list_dict[f].append(
1184  feature_keyword_list_dict[f][index])
1185  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1186  best_score_rmf_tuples,
1187  self.number_of_processes)[self.rank]
1188 
1189  # expand the dictionaries to include ambiguous copies
1190  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1191  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1192  if IMP.pmi.get_is_canonical(prot_ahead):
1193  if rmsd_calculation_components is not None:
1194  tmp = self._expand_ambiguity(
1195  prot_ahead, rmsd_calculation_components)
1196  if tmp != rmsd_calculation_components:
1197  print('Detected ambiguity, expand rmsd components to',
1198  tmp)
1199  rmsd_calculation_components = tmp
1200  if alignment_components is not None:
1201  tmp = self._expand_ambiguity(prot_ahead,
1202  alignment_components)
1203  if tmp != alignment_components:
1204  print('Detected ambiguity, expand alignment '
1205  'components to', tmp)
1206  alignment_components = tmp
1207 
1208 # -------------------------------------------------------------
1209 # read the coordinates
1210 # ------------------------------------------------------------
1211  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1212  self.model, my_best_score_rmf_tuples[0],
1213  rmsd_calculation_components, state_number=state_number)
1215  self.model, my_best_score_rmf_tuples, alignment_components,
1216  rmsd_calculation_components, state_number=state_number)
1217 
1218  # note! the coordinates are simply float tuples, NOT decorators,
1219  # NOT Vector3D, NOR particles, because these object cannot be
1220  # serialized. We need serialization
1221  # for the parallel computation based on mpi.
1222 
1223  # dict:key=component name,val=coords per hit
1224  all_coordinates = got_coords[0]
1225 
1226  # same as above, limited to alignment bits
1227  alignment_coordinates = got_coords[1]
1228 
1229  # same as above, limited to RMSD bits
1230  rmsd_coordinates = got_coords[2]
1231 
1232  # dictionary with key=RMF, value=score rank
1233  rmf_file_name_index_dict = got_coords[3]
1234 
1235  # RMF file per hit
1236  all_rmf_file_names = got_coords[4]
1237 
1238 # ------------------------------------------------------------------------
1239 # optionally don't compute distance matrix or cluster, just write top files
1240 # ------------------------------------------------------------------------
1241  if skip_clustering:
1242  if density_custom_ranges:
1243  DensModule = IMP.pmi.analysis.GetModelDensity(
1244  density_custom_ranges, voxel=voxel_size)
1245 
1246  dircluster = os.path.join(outputdir,
1247  "all_models."+str(self.rank))
1248  try:
1249  os.mkdir(outputdir)
1250  except: # noqa: E722
1251  pass
1252  try:
1253  os.mkdir(dircluster)
1254  except: # noqa: E722
1255  pass
1256  clusstat = open(os.path.join(
1257  dircluster, "stat."+str(self.rank)+".out"), "w")
1258  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1259  rmf_name = tpl[1]
1260  rmf_frame_number = tpl[2]
1261  tmp_dict = {}
1262  index = tpl[4]
1263  for key in best_score_feature_keyword_list_dict:
1264  tmp_dict[key] = \
1265  best_score_feature_keyword_list_dict[key][index]
1266 
1267  if cnt == 0:
1268  prots, rs = \
1269  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1270  self.model, rmf_frame_number, rmf_name)
1271  else:
1272  linking_successful = \
1273  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1274  self.model, prots, rs, rmf_frame_number,
1275  rmf_name)
1276  if not linking_successful:
1277  continue
1278 
1279  if not prots:
1280  continue
1281 
1282  if IMP.pmi.get_is_canonical(prots[0]):
1283  states = IMP.atom.get_by_type(
1284  prots[0], IMP.atom.STATE_TYPE)
1285  prot = states[state_number]
1286  else:
1287  prot = prots[state_number]
1288 
1289  # get transformation aligning coordinates of
1290  # requested tuples to the first RMF file
1291  if cnt == 0:
1292  coords_f1 = alignment_coordinates[cnt]
1293  if cnt > 0:
1294  coords_f2 = alignment_coordinates[cnt]
1295  if coords_f2:
1297  coords_f1, coords_f2)
1298  transformation = Ali.align()[1]
1299  else:
1300  transformation = \
1302 
1303  rbs = set()
1304  for p in IMP.atom.get_leaves(prot):
1305  if not IMP.core.XYZR.get_is_setup(p):
1307  IMP.core.XYZR(p).set_radius(0.0001)
1308  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1309 
1311  rbm = IMP.core.RigidBodyMember(p)
1312  rb = rbm.get_rigid_body()
1313  rbs.add(rb)
1314  else:
1316  transformation)
1317  for rb in rbs:
1318  IMP.core.transform(rb, transformation)
1319 
1320  o = IMP.pmi.output.Output()
1321  self.model.update()
1322  out_pdb_fn = os.path.join(
1323  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1324  out_rmf_fn = os.path.join(
1325  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1326  o.init_pdb(out_pdb_fn, prot)
1327  tc = write_pdb_with_centered_coordinates
1328  o.write_pdb(out_pdb_fn,
1329  translate_to_geometric_center=tc)
1330 
1331  tmp_dict["local_pdb_file_name"] = \
1332  os.path.basename(out_pdb_fn)
1333  tmp_dict["rmf_file_full_path"] = rmf_name
1334  tmp_dict["local_rmf_file_name"] = \
1335  os.path.basename(out_rmf_fn)
1336  tmp_dict["local_rmf_frame_number"] = 0
1337 
1338  clusstat.write(str(tmp_dict)+"\n")
1339 
1340  if IMP.pmi.get_is_canonical(prot):
1341  # create a single-state System and write that
1343  IMP.Particle(self.model))
1344  h.set_name("System")
1345  h.add_child(prot)
1346  o.init_rmf(out_rmf_fn, [h], rs)
1347  else:
1348  o.init_rmf(out_rmf_fn, [prot], rs)
1349 
1350  o.write_rmf(out_rmf_fn)
1351  o.close_rmf(out_rmf_fn)
1352  # add the density
1353  if density_custom_ranges:
1354  DensModule.add_subunits_density(prot)
1355 
1356  if density_custom_ranges:
1357  DensModule.write_mrc(path=dircluster)
1358  del DensModule
1359  return
1360 
1361  # broadcast the coordinates
1362  if self.number_of_processes > 1:
1363  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1364  all_coordinates)
1365  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1366  all_rmf_file_names)
1367  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1368  rmf_file_name_index_dict)
1369  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1370  alignment_coordinates)
1371  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1372  rmsd_coordinates)
1373 
1374  if self.rank == 0:
1375  # save needed informations in external files
1376  self.save_objects(
1377  [best_score_feature_keyword_list_dict,
1378  rmf_file_name_index_dict],
1379  ".macro.pkl")
1380 
1381 # ------------------------------------------------------------------------
1382 # Calculate distance matrix and cluster
1383 # ------------------------------------------------------------------------
1384  print("setup clustering class")
1385  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1386 
1387  for n, model_coordinate_dict in enumerate(all_coordinates):
1388  # let's try to align
1389  if (alignment_components is not None
1390  and len(self.cluster_obj.all_coords) == 0):
1391  # set the first model as template coordinates
1392  self.cluster_obj.set_template(alignment_coordinates[n])
1393  self.cluster_obj.fill(all_rmf_file_names[n],
1394  rmsd_coordinates[n])
1395  print("Global calculating the distance matrix")
1396 
1397  # calculate distance matrix, all against all
1398  self.cluster_obj.dist_matrix()
1399 
1400  # perform clustering and optionally display
1401  if self.rank == 0:
1402  self.cluster_obj.do_cluster(number_of_clusters)
1403  if display_plot:
1404  if self.rank == 0:
1405  self.cluster_obj.plot_matrix(
1406  figurename=os.path.join(outputdir,
1407  'dist_matrix.pdf'))
1408  if exit_after_display:
1409  exit()
1410  self.cluster_obj.save_distance_matrix_file(
1411  file_name=distance_matrix_file)
1412 
1413 # ------------------------------------------------------------------------
1414 # Alteratively, load the distance matrix from file and cluster that
1415 # ------------------------------------------------------------------------
1416  else:
1417  if self.rank == 0:
1418  print("setup clustering class")
1419  self.cluster_obj = IMP.pmi.analysis.Clustering()
1420  self.cluster_obj.load_distance_matrix_file(
1421  file_name=distance_matrix_file)
1422  print("clustering with %s clusters" % str(number_of_clusters))
1423  self.cluster_obj.do_cluster(number_of_clusters)
1424  [best_score_feature_keyword_list_dict,
1425  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1426  if display_plot:
1427  if self.rank == 0:
1428  self.cluster_obj.plot_matrix(figurename=os.path.join(
1429  outputdir, 'dist_matrix.pdf'))
1430  if exit_after_display:
1431  exit()
1432  if self.number_of_processes > 1:
1433  self.comm.Barrier()
1434 
1435 # ------------------------------------------------------------------------
1436 # now save all informations about the clusters
1437 # ------------------------------------------------------------------------
1438 
1439  if self.rank == 0:
1440  print(self.cluster_obj.get_cluster_labels())
1441  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1442  print("rank %s " % str(self.rank))
1443  print("cluster %s " % str(n))
1444  print("cluster label %s " % str(cl))
1445  print(self.cluster_obj.get_cluster_label_names(cl))
1446  cluster_size = \
1447  len(self.cluster_obj.get_cluster_label_names(cl))
1448  cluster_prov = \
1449  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1450 
1451  # first initialize the Density class if requested
1452  if density_custom_ranges:
1453  DensModule = IMP.pmi.analysis.GetModelDensity(
1454  density_custom_ranges,
1455  voxel=voxel_size)
1456 
1457  dircluster = outputdir + "/cluster." + str(n) + "/"
1458  try:
1459  os.mkdir(dircluster)
1460  except: # noqa: E722
1461  pass
1462 
1463  rmsd_dict = {
1464  "AVERAGE_RMSD":
1465  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1466  clusstat = open(dircluster + "stat.out", "w")
1467  for k, structure_name in enumerate(
1468  self.cluster_obj.get_cluster_label_names(cl)):
1469  # extract the features
1470  tmp_dict = {}
1471  tmp_dict.update(rmsd_dict)
1472  index = rmf_file_name_index_dict[structure_name]
1473  for key in best_score_feature_keyword_list_dict:
1474  tmp_dict[
1475  key] = best_score_feature_keyword_list_dict[
1476  key][
1477  index]
1478 
1479  # get the rmf name and the frame number from the list of
1480  # frame names
1481  rmf_name = structure_name.split("|")[0]
1482  rmf_frame_number = int(structure_name.split("|")[1])
1483  clusstat.write(str(tmp_dict) + "\n")
1484 
1485  # extract frame (open or link to existing)
1486  if k == 0:
1487  prots, rs = \
1488  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1489  self.model, rmf_frame_number, rmf_name)
1490  else:
1491  linking_successful = \
1492  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1493  self.model, prots, rs, rmf_frame_number,
1494  rmf_name)
1495  if not linking_successful:
1496  continue
1497  if not prots:
1498  continue
1499 
1500  if IMP.pmi.get_is_canonical(prots[0]):
1501  states = IMP.atom.get_by_type(
1502  prots[0], IMP.atom.STATE_TYPE)
1503  prot = states[state_number]
1504  else:
1505  prot = prots[state_number]
1506  if k == 0:
1507  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1508 
1509  # transform clusters onto first
1510  if k > 0:
1511  co = self.cluster_obj
1512  model_index = co.get_model_index_from_name(
1513  structure_name)
1514  transformation = co.get_transformation_to_first_member(
1515  cl, model_index)
1516  rbs = set()
1517  for p in IMP.atom.get_leaves(prot):
1518  if not IMP.core.XYZR.get_is_setup(p):
1520  IMP.core.XYZR(p).set_radius(0.0001)
1521  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1522 
1524  rbm = IMP.core.RigidBodyMember(p)
1525  rb = rbm.get_rigid_body()
1526  rbs.add(rb)
1527  else:
1529  transformation)
1530  for rb in rbs:
1531  IMP.core.transform(rb, transformation)
1532 
1533  # add the density
1534  if density_custom_ranges:
1535  DensModule.add_subunits_density(prot)
1536 
1537  # pdb writing should be optimized!
1538  o = IMP.pmi.output.Output()
1539  self.model.update()
1540  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1541  o.write_pdb(dircluster + str(k) + ".pdb")
1542 
1543  if IMP.pmi.get_is_canonical(prot):
1544  # create a single-state System and write that
1546  IMP.Particle(self.model))
1547  h.set_name("System")
1548  h.add_child(prot)
1549  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1550  else:
1551  o.init_rmf(dircluster + str(k) + ".rmf3", [prot], rs)
1552  o.write_rmf(dircluster + str(k) + ".rmf3")
1553  o.close_rmf(dircluster + str(k) + ".rmf3")
1554 
1555  del o
1556  # IMP.atom.destroy(prot)
1557 
1558  if density_custom_ranges:
1559  DensModule.write_mrc(path=dircluster)
1560  del DensModule
1561 
1562  if self.number_of_processes > 1:
1563  self.comm.Barrier()
1564 
1565  def get_cluster_rmsd(self, cluster_num):
1566  if self.cluster_obj is None:
1567  raise Exception("Run clustering first")
1568  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1569 
1570  def save_objects(self, objects, file_name):
1571  import pickle
1572  outf = open(file_name, 'wb')
1573  pickle.dump(objects, outf)
1574  outf.close()
1575 
1576  def load_objects(self, file_name):
1577  import pickle
1578  inputf = open(file_name, 'rb')
1579  objects = pickle.load(inputf)
1580  inputf.close()
1581  return objects
1582 
1583 
1585 
1586  """
1587  This class contains analysis utilities to investigate ReplicaExchange
1588  results.
1589  """
1590 
1591  ########################
1592  # Construction and Setup
1593  ########################
1594 
1595  def __init__(self, model, stat_files, best_models=None, score_key=None,
1596  alignment=True):
1597  """
1598  Construction of the Class.
1599  @param model IMP.Model()
1600  @param stat_files list of string. Can be ascii stat files,
1601  rmf files names
1602  @param best_models Integer. Number of best scoring models,
1603  if None: all models will be read
1604  @param alignment boolean (Default=True). Align before computing
1605  the rmsd.
1606  """
1607 
1608  self.model = model
1609  self.best_models = best_models
1611  model, stat_files, self.best_models, score_key, cache=True)
1613  StatHierarchyHandler=self.stath0)
1614 
1615  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1617  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1619  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1620  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1621  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1622  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1623  self.clusters = []
1624  # fill the cluster list with a single cluster containing all models
1625  c = IMP.pmi.output.Cluster(0)
1626  self.clusters.append(c)
1627  for n0 in range(len(self.stath0)):
1628  c.add_member(n0)
1629  self.pairwise_rmsd = {}
1630  self.pairwise_molecular_assignment = {}
1631  self.alignment = alignment
1632  self.symmetric_molecules = {}
1633  self.issymmetricsel = {}
1634  self.update_seldicts()
1635  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1636  IMP.atom.get_leaves(self.stath0))
1637  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1638  IMP.atom.get_leaves(self.stath1))
1639 
1640  def set_rmsd_selection(self, **kwargs):
1641  """
1642  Setup the selection onto which the rmsd is computed
1643  @param kwargs use IMP.atom.Selection keywords
1644  """
1645  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1646  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1647  self.update_seldicts()
1648 
1649  def set_symmetric(self, molecule_name):
1650  """
1651  Store names of symmetric molecules
1652  """
1653  self.symmetric_molecules[molecule_name] = 0
1654  self.update_seldicts()
1655 
1656  def set_alignment_selection(self, **kwargs):
1657  """
1658  Setup the selection onto which the alignment is computed
1659  @param kwargs use IMP.atom.Selection keywords
1660  """
1661  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1662  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1663 
1664  ######################
1665  # Clustering functions
1666  ######################
1667  def clean_clusters(self):
1668  for c in self.clusters:
1669  del c
1670  self.clusters = []
1671 
1672  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1673  """
1674  Cluster the models based on RMSD.
1675  @param rmsd_cutoff Float the distance cutoff in Angstrom
1676  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1677  be used to compute rmsds
1678  """
1679  self.clean_clusters()
1680  not_clustered = set(range(len(self.stath1)))
1681  while len(not_clustered) > 0:
1682  self.aggregate(not_clustered, rmsd_cutoff, metric)
1683  self.update_clusters()
1684 
1685  def refine(self, rmsd_cutoff=10):
1686  """
1687  Refine the clusters by merging the ones whose centers are close
1688  @param rmsd_cutoff cutoff distance in Angstorms
1689  """
1690  clusters_copy = self.clusters
1691  for c0, c1 in itertools.combinations(self.clusters, 2):
1692  if c0.center_index is None:
1693  self.compute_cluster_center(c0)
1694  if c1.center_index is None:
1695  self.compute_cluster_center(c1)
1696  _ = self.stath0[c0.center_index]
1697  _ = self.stath1[c1.center_index]
1698  rmsd, molecular_assignment = self.rmsd()
1699  if rmsd <= rmsd_cutoff:
1700  if c1 in self.clusters:
1701  clusters_copy.remove(c1)
1702  c0 += c1
1703  self.clusters = clusters_copy
1704  self.update_clusters()
1705 
1706  ####################
1707  # Input Output
1708  ####################
1709 
1710  def set_cluster_assignments(self, cluster_ids):
1711  if len(cluster_ids) != len(self.stath0):
1712  raise ValueError('cluster ids has to be same length as '
1713  'number of frames')
1714 
1715  self.clusters = []
1716  for i in sorted(list(set(cluster_ids))):
1717  self.clusters.append(IMP.pmi.output.Cluster(i))
1718  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1719  self.clusters[idx].add_member(i, d)
1720 
1721  def get_cluster_data(self, cluster):
1722  """
1723  Return the model data from a cluster
1724  @param cluster IMP.pmi.output.Cluster object
1725  """
1726  data = []
1727  for m in cluster:
1728  data.append(m)
1729  return data
1730 
1731  def save_data(self, filename='data.pkl'):
1732  """
1733  Save the data for the whole models into a pickle file
1734  @param filename string
1735  """
1736  self.stath0.save_data(filename)
1737 
1738  def set_data(self, data):
1739  """
1740  Set the data from an external IMP.pmi.output.Data
1741  @param data IMP.pmi.output.Data
1742  """
1743  self.stath0.data = data
1744  self.stath1.data = data
1745 
1746  def load_data(self, filename='data.pkl'):
1747  """
1748  Load the data from an external pickled file
1749  @param filename string
1750  """
1751  self.stath0.load_data(filename)
1752  self.stath1.load_data(filename)
1753  self.best_models = len(self.stath0)
1754 
1755  def add_cluster(self, rmf_name_list):
1756  c = IMP.pmi.output.Cluster(len(self.clusters))
1757  print("creating cluster index "+str(len(self.clusters)))
1758  self.clusters.append(c)
1759  current_len = len(self.stath0)
1760 
1761  for rmf in rmf_name_list:
1762  print("adding rmf "+rmf)
1763  self.stath0.add_stat_file(rmf)
1764  self.stath1.add_stat_file(rmf)
1765 
1766  for n0 in range(current_len, len(self.stath0)):
1767  d0 = self.stath0[n0]
1768  c.add_member(n0, d0)
1769  self.update_clusters()
1770 
1771  def save_clusters(self, filename='clusters.pkl'):
1772  """
1773  Save the clusters into a pickle file
1774  @param filename string
1775  """
1776  try:
1777  import cPickle as pickle
1778  except ImportError:
1779  import pickle
1780  fl = open(filename, 'wb')
1781  pickle.dump(self.clusters, fl)
1782 
1783  def load_clusters(self, filename='clusters.pkl', append=False):
1784  """
1785  Load the clusters from a pickle file
1786  @param filename string
1787  @param append bool (Default=False), if True. append the clusters
1788  to the ones currently present
1789  """
1790  try:
1791  import cPickle as pickle
1792  except ImportError:
1793  import pickle
1794  fl = open(filename, 'rb')
1795  self.clean_clusters()
1796  if append:
1797  self.clusters += pickle.load(fl)
1798  else:
1799  self.clusters = pickle.load(fl)
1800  self.update_clusters()
1801 
1802  ####################
1803  # Analysis Functions
1804  ####################
1805 
1806  def compute_cluster_center(self, cluster):
1807  """
1808  Compute the cluster center for a given cluster
1809  """
1810  member_distance = defaultdict(float)
1811 
1812  for n0, n1 in itertools.combinations(cluster.members, 2):
1813  _ = self.stath0[n0]
1814  _ = self.stath1[n1]
1815  rmsd, _ = self.rmsd()
1816  member_distance[n0] += rmsd
1817 
1818  if len(member_distance) > 0:
1819  cluster.center_index = min(member_distance,
1820  key=member_distance.get)
1821  else:
1822  cluster.center_index = cluster.members[0]
1823 
1824  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1825  prefix="./"):
1826  """
1827  Save the coordinates of the current cluster a single rmf file
1828  """
1829  print("saving coordinates", cluster)
1830  if self.alignment:
1831  self.set_reference(reference, cluster)
1832  o = IMP.pmi.output.Output()
1833  if rmf_name is None:
1834  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1835 
1836  _ = self.stath1[cluster.members[0]]
1837  self.model.update()
1838  o.init_rmf(rmf_name, [self.stath1])
1839  for n1 in cluster.members:
1840  _ = self.stath1[n1]
1841  self.model.update()
1843  if self.alignment:
1844  self.align()
1845  o.write_rmf(rmf_name)
1847  o.close_rmf(rmf_name)
1848 
1849  def prune_redundant_structures(self, rmsd_cutoff=10):
1850  """
1851  remove structures that are similar
1852  append it to a new cluster
1853  """
1854  print("pruning models")
1855  selected = 0
1856  filtered = [selected]
1857  remaining = range(1, len(self.stath1), 10)
1858 
1859  while len(remaining) > 0:
1860  d0 = self.stath0[selected]
1861  rm = []
1862  for n1 in remaining:
1863  _ = self.stath1[n1]
1864  if self.alignment:
1865  self.align()
1866  d, _ = self.rmsd()
1867  if d <= rmsd_cutoff:
1868  rm.append(n1)
1869  print("pruning model %s, similar to model %s, rmsd %s"
1870  % (str(n1), str(selected), str(d)))
1871  remaining = [x for x in remaining if x not in rm]
1872  if len(remaining) == 0:
1873  break
1874  selected = remaining[0]
1875  filtered.append(selected)
1876  remaining.pop(0)
1877  c = IMP.pmi.output.Cluster(len(self.clusters))
1878  self.clusters.append(c)
1879  for n0 in filtered:
1880  d0 = self.stath0[n0]
1881  c.add_member(n0, d0)
1882  self.update_clusters()
1883 
1884  def precision(self, cluster):
1885  """
1886  Compute the precision of a cluster
1887  """
1888  npairs = 0
1889  rmsd = 0.0
1890  precision = None
1891 
1892  if cluster.center_index is not None:
1893  members1 = [cluster.center_index]
1894  else:
1895  members1 = cluster.members
1896 
1897  for n0 in members1:
1898  _ = self.stath0[n0]
1899  for n1 in cluster.members:
1900  if n0 != n1:
1901  npairs += 1
1902  _ = self.stath1[n1]
1904  tmp_rmsd, _ = self.rmsd()
1905  rmsd += tmp_rmsd
1907 
1908  if npairs > 0:
1909  precision = rmsd/npairs
1910  cluster.precision = precision
1911  return precision
1912 
1913  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1914  """
1915  Compute the bipartite precision (ie the cross-precision)
1916  between two clusters
1917  """
1918  npairs = 0
1919  rmsd = 0.0
1920  for cn0, n0 in enumerate(cluster1.members):
1921  _ = self.stath0[n0]
1922  for cn1, n1 in enumerate(cluster2.members):
1923  _ = self.stath1[n1]
1924  tmp_rmsd, _ = self.rmsd()
1925  if verbose:
1926  print("--- rmsd between structure %s and structure "
1927  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1928  rmsd += tmp_rmsd
1929  npairs += 1
1930  precision = rmsd/npairs
1931  return precision
1932 
1933  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1934  cluster_ref=None, step=1):
1935  """
1936  Compute the Root mean square fluctuations
1937  of a molecule in a cluster
1938  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1939  residue indexes and the value is the rmsf
1940  """
1941  rmsf = IMP.pmi.tools.OrderedDict()
1942 
1943  # assumes that residue indexes are identical for stath0 and stath1
1944  if cluster_ref is not None:
1945  if cluster_ref.center_index is not None:
1946  members0 = [cluster_ref.center_index]
1947  else:
1948  members0 = cluster_ref.members
1949  else:
1950  if cluster.center_index is not None:
1951  members0 = [cluster.center_index]
1952  else:
1953  members0 = cluster.members
1954 
1955  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1956  copy_index=copy_index, state_index=state_index)
1957  ps0 = s0.get_selected_particles()
1958  # get the residue indexes
1959  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1960  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1961 
1962  # get the corresponding particles
1963  npairs = 0
1964  for n0 in members0:
1965  d0 = self.stath0[n0]
1966  for n1 in cluster.members[::step]:
1967  if n0 != n1:
1968  print("--- rmsf %s %s" % (str(n0), str(n1)))
1970 
1971  s1 = IMP.atom.Selection(
1972  self.stath1, molecule=molecule,
1973  residue_indexes=residue_indexes, resolution=1,
1974  copy_index=copy_index, state_index=state_index)
1975  ps1 = s1.get_selected_particles()
1976 
1977  d1 = self.stath1[n1]
1978  if self.alignment:
1979  self.align()
1980  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
1981  r = residue_indexes[n]
1982  d0 = IMP.core.XYZ(p0)
1983  d1 = IMP.core.XYZ(p1)
1984  if r in rmsf:
1985  rmsf[r] += IMP.core.get_distance(d0, d1)
1986  else:
1987  rmsf[r] = IMP.core.get_distance(d0, d1)
1988  npairs += 1
1990  for r in rmsf:
1991  rmsf[r] /= npairs
1992 
1993  for stath in [self.stath0, self.stath1]:
1994  if molecule not in self.symmetric_molecules:
1995  s = IMP.atom.Selection(
1996  stath, molecule=molecule, residue_index=r,
1997  resolution=1, copy_index=copy_index,
1998  state_index=state_index)
1999  else:
2000  s = IMP.atom.Selection(
2001  stath, molecule=molecule, residue_index=r,
2002  resolution=1, state_index=state_index)
2003 
2004  ps = s.get_selected_particles()
2005  for p in ps:
2007  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2008  else:
2010 
2011  return rmsf
2012 
2013  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2014  reference="Absolute", prefix="./", step=1):
2015  if self.alignment:
2016  self.set_reference(reference, cluster)
2017  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2018  voxel=voxel_size)
2019 
2020  for n1 in cluster.members[::step]:
2021  print("density "+str(n1))
2022  _ = self.stath1[n1]
2024  if self.alignment:
2025  self.align()
2026  dens.add_subunits_density(self.stath1)
2028  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2029  del dens
2030 
2031  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2032  consolidate=False, molecules=None, prefix='./',
2033  reference="Absolute"):
2034  if self.alignment:
2035  self.set_reference(reference, cluster)
2036  import numpy as np
2037  import matplotlib.pyplot as plt
2038  import matplotlib.cm as cm
2039  from scipy.spatial.distance import cdist
2040  import IMP.pmi.topology
2041  if molecules is None:
2043  for mol in IMP.pmi.tools.get_molecules(
2044  IMP.atom.get_leaves(self.stath1))]
2045  else:
2047  for mol in IMP.pmi.tools.get_molecules(
2049  self.stath1,
2050  molecules=molecules).get_selected_particles())]
2051  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2052  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2053  total_len_unique = sum(max(mol.get_residue_indexes())
2054  for mol in unique_copies)
2055 
2056  index_dict = {}
2057  prev_stop = 0
2058 
2059  if not consolidate:
2060  for mol in mols:
2061  seqlen = max(mol.get_residue_indexes())
2062  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2063  prev_stop += seqlen
2064 
2065  else:
2066  for mol in unique_copies:
2067  seqlen = max(mol.get_residue_indexes())
2068  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2069  prev_stop += seqlen
2070 
2071  for ncl, n1 in enumerate(cluster.members):
2072  print(ncl)
2073  _ = self.stath1[n1]
2074  coord_dict = IMP.pmi.tools.OrderedDict()
2075  for mol in mols:
2076  rindexes = mol.get_residue_indexes()
2077  coords = np.ones((max(rindexes), 3))
2078  for rnum in rindexes:
2079  sel = IMP.atom.Selection(mol, residue_index=rnum,
2080  resolution=1)
2081  selpart = sel.get_selected_particles()
2082  if len(selpart) == 0:
2083  continue
2084  selpart = selpart[0]
2085  coords[rnum - 1, :] = \
2086  IMP.core.XYZ(selpart).get_coordinates()
2087  coord_dict[mol] = coords
2088 
2089  if not consolidate:
2090  coords = np.concatenate(list(coord_dict.values()))
2091  dists = cdist(coords, coords)
2092  binary_dists = np.where((dists <= contact_threshold)
2093  & (dists >= 1.0), 1.0, 0.0)
2094  else:
2095  binary_dists_dict = {}
2096  for mol1 in mols:
2097  len1 = max(mol1.get_residue_indexes())
2098  for mol2 in mols:
2099  name1 = mol1.get_name()
2100  name2 = mol2.get_name()
2101  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2102  if (name1, name2) not in binary_dists_dict:
2103  binary_dists_dict[(name1, name2)] = \
2104  np.zeros((len1, len1))
2105  binary_dists_dict[(name1, name2)] += \
2106  np.where((dists <= contact_threshold)
2107  & (dists >= 1.0), 1.0, 0.0)
2108  binary_dists = np.zeros((total_len_unique, total_len_unique))
2109 
2110  for name1, name2 in binary_dists_dict:
2111  r1 = index_dict[mol_names_unique[name1]]
2112  r2 = index_dict[mol_names_unique[name2]]
2113  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2114  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2115  1.0, 0.0)
2116 
2117  if ncl == 0:
2118  dist_maps = [dists]
2119  av_dist_map = dists
2120  contact_freqs = binary_dists
2121  else:
2122  dist_maps.append(dists)
2123  av_dist_map += dists
2124  contact_freqs += binary_dists
2125 
2126  if log_scale:
2127  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2128  else:
2129  contact_freqs = 1.0/len(cluster)*contact_freqs
2130  av_dist_map = 1.0/len(cluster)*contact_freqs
2131 
2132  fig = plt.figure(figsize=(100, 100))
2133  ax = fig.add_subplot(111)
2134  ax.set_xticks([])
2135  ax.set_yticks([])
2136  gap_between_components = 50
2137  colormap = cm.Blues
2138  colornorm = None
2139 
2140  if not consolidate:
2141  sorted_tuple = sorted(
2143  mol).get_extended_name(), mol) for mol in mols)
2144  prot_list = list(zip(*sorted_tuple))[1]
2145  else:
2146  sorted_tuple = sorted(
2147  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2148  for mol in unique_copies)
2149  prot_list = list(zip(*sorted_tuple))[1]
2150 
2151  prot_listx = prot_list
2152  nresx = gap_between_components + \
2153  sum([max(mol.get_residue_indexes())
2154  + gap_between_components for mol in prot_listx])
2155 
2156  # set the list of proteins on the y axis
2157  prot_listy = prot_list
2158  nresy = gap_between_components + \
2159  sum([max(mol.get_residue_indexes())
2160  + gap_between_components for mol in prot_listy])
2161 
2162  # this is the residue offset for each protein
2163  resoffsetx = {}
2164  resendx = {}
2165  res = gap_between_components
2166  for mol in prot_listx:
2167  resoffsetx[mol] = res
2168  res += max(mol.get_residue_indexes())
2169  resendx[mol] = res
2170  res += gap_between_components
2171 
2172  resoffsety = {}
2173  resendy = {}
2174  res = gap_between_components
2175  for mol in prot_listy:
2176  resoffsety[mol] = res
2177  res += max(mol.get_residue_indexes())
2178  resendy[mol] = res
2179  res += gap_between_components
2180 
2181  resoffsetdiagonal = {}
2182  res = gap_between_components
2183  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2184  resoffsetdiagonal[mol] = res
2185  res += max(mol.get_residue_indexes())
2186  res += gap_between_components
2187 
2188  # plot protein boundaries
2189  xticks = []
2190  xlabels = []
2191  for n, prot in enumerate(prot_listx):
2192  res = resoffsetx[prot]
2193  end = resendx[prot]
2194  for proty in prot_listy:
2195  resy = resoffsety[proty]
2196  endy = resendy[proty]
2197  ax.plot([res, res], [resy, endy], linestyle='-',
2198  color='gray', lw=0.4)
2199  ax.plot([end, end], [resy, endy], linestyle='-',
2200  color='gray', lw=0.4)
2201  xticks.append((float(res) + float(end)) / 2)
2203  prot).get_extended_name())
2204 
2205  yticks = []
2206  ylabels = []
2207  for n, prot in enumerate(prot_listy):
2208  res = resoffsety[prot]
2209  end = resendy[prot]
2210  for protx in prot_listx:
2211  resx = resoffsetx[protx]
2212  endx = resendx[protx]
2213  ax.plot([resx, endx], [res, res], linestyle='-',
2214  color='gray', lw=0.4)
2215  ax.plot([resx, endx], [end, end], linestyle='-',
2216  color='gray', lw=0.4)
2217  yticks.append((float(res) + float(end)) / 2)
2219  prot).get_extended_name())
2220 
2221  # plot the contact map
2222 
2223  tmp_array = np.zeros((nresx, nresy))
2224  ret = {}
2225  for px in prot_listx:
2226  for py in prot_listy:
2227  resx = resoffsetx[px]
2228  lengx = resendx[px] - 1
2229  resy = resoffsety[py]
2230  lengy = resendy[py] - 1
2231  indexes_x = index_dict[px]
2232  minx = min(indexes_x)
2233  maxx = max(indexes_x)
2234  indexes_y = index_dict[py]
2235  miny = min(indexes_y)
2236  maxy = max(indexes_y)
2237  tmp_array[resx:lengx, resy:lengy] = \
2238  contact_freqs[minx:maxx, miny:maxy]
2239  ret[(px, py)] = np.argwhere(
2240  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2241 
2242  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2243  origin='lower', alpha=0.6, interpolation='nearest')
2244 
2245  ax.set_xticks(xticks)
2246  ax.set_xticklabels(xlabels, rotation=90)
2247  ax.set_yticks(yticks)
2248  ax.set_yticklabels(ylabels)
2249  plt.setp(ax.get_xticklabels(), fontsize=6)
2250  plt.setp(ax.get_yticklabels(), fontsize=6)
2251 
2252  # display and write to file
2253  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2254  [i.set_linewidth(2.0) for i in ax.spines.values()]
2255 
2256  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2257  dpi=300, transparent="False")
2258  return ret
2259 
2260  def plot_rmsd_matrix(self, filename):
2261  self.compute_all_pairwise_rmsd()
2262  distance_matrix = np.zeros(
2263  (len(self.stath0), len(self.stath1)))
2264  for (n0, n1) in self.pairwise_rmsd:
2265  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2266 
2267  import matplotlib as mpl
2268  mpl.use('Agg')
2269  import matplotlib.pylab as pl
2270  from scipy.cluster import hierarchy as hrc
2271 
2272  fig = pl.figure(figsize=(10, 8))
2273  ax = fig.add_subplot(212)
2274  dendrogram = hrc.dendrogram(
2275  hrc.linkage(distance_matrix),
2276  color_threshold=7,
2277  no_labels=True)
2278  leaves_order = dendrogram['leaves']
2279  ax.set_xlabel('Model')
2280  ax.set_ylabel('RMSD [Angstroms]')
2281 
2282  ax2 = fig.add_subplot(221)
2283  cax = ax2.imshow(
2284  distance_matrix[leaves_order, :][:, leaves_order],
2285  interpolation='nearest')
2286  cb = fig.colorbar(cax)
2287  cb.set_label('RMSD [Angstroms]')
2288  ax2.set_xlabel('Model')
2289  ax2.set_ylabel('Model')
2290 
2291  pl.savefig(filename, dpi=300)
2292  pl.close(fig)
2293 
2294  ####################
2295  # Internal Functions
2296  ####################
2297 
2298  def update_clusters(self):
2299  """
2300  Update the cluster id numbers
2301  """
2302  for n, c in enumerate(self.clusters):
2303  c.cluster_id = n
2304 
2305  def get_molecule(self, hier, name, copy):
2306  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2307  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2308 
2309  def update_seldicts(self):
2310  """
2311  Update the seldicts
2312  """
2313  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2314  self.sel0_rmsd.get_selected_particles())
2315  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2316  self.sel1_rmsd.get_selected_particles())
2317  for mol in self.seldict0:
2318  for sel in self.seldict0[mol]:
2319  self.issymmetricsel[sel] = False
2320  for mol in self.symmetric_molecules:
2321  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2322  for sel in self.seldict0[mol]:
2323  self.issymmetricsel[sel] = True
2324 
2325  def align(self):
2327  self.sel1_alignment, self.sel0_alignment)
2328 
2329  for rb in self.rbs1:
2330  IMP.core.transform(rb, tr)
2331 
2332  for bead in self.beads1:
2333  try:
2334  IMP.core.transform(IMP.core.XYZ(bead), tr)
2335  except: # noqa: E722
2336  continue
2337 
2338  self.model.update()
2339 
2340  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2341  '''
2342  initial filling of the clusters.
2343  '''
2344  n0 = idxs.pop()
2345  print("clustering model "+str(n0))
2346  d0 = self.stath0[n0]
2347  c = IMP.pmi.output.Cluster(len(self.clusters))
2348  print("creating cluster index "+str(len(self.clusters)))
2349  self.clusters.append(c)
2350  c.add_member(n0, d0)
2351  clustered = set([n0])
2352  for n1 in idxs:
2353  print("--- trying to add model " + str(n1) + " to cluster "
2354  + str(len(self.clusters)))
2355  d1 = self.stath1[n1]
2356  if self.alignment:
2357  self.align()
2358  rmsd, _ = self.rmsd(metric=metric)
2359  if rmsd < rmsd_cutoff:
2360  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2361  c.add_member(n1, d1)
2362  clustered.add(n1)
2363  else:
2364  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2365  idxs -= clustered
2366 
2367  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2368  """
2369  merge the clusters that have close members
2370  @param rmsd_cutoff cutoff distance in Angstorms
2371  """
2372  # before merging, clusters are spheres of radius rmsd_cutoff
2373  # centered on the 1st element
2374  # here we only try to merge clusters whose centers are closer
2375  # than 2*rmsd_cutoff
2376  to_merge = []
2377  print("merging...")
2378  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2379  itertools.combinations(self.clusters, 2)):
2380  n0, n1 = [c.members[0] for c in (c0, c1)]
2381  _ = self.stath0[n0]
2382  _ = self.stath1[n1]
2383  rmsd, _ = self.rmsd()
2384  if (rmsd < 2*rmsd_cutoff and
2385  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2386  to_merge.append((c0, c1))
2387 
2388  for c0, c in reversed(to_merge):
2389  self.merge(c0, c)
2390 
2391  # keep only full clusters
2392  self.clusters = [c for c in
2393  filter(lambda x: len(x.members) > 0, self.clusters)]
2394 
2395  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2396  '''
2397  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2398  '''
2399  print("check close members for clusters " + str(c0.cluster_id) +
2400  " and " + str(c1.cluster_id))
2401  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2402  _ = self.stath0[n0]
2403  _ = self.stath1[n1]
2404  rmsd, _ = self.rmsd(metric=metric)
2405  if rmsd < rmsd_cutoff:
2406  return True
2407 
2408  return False
2409 
2410  def merge(self, c0, c1):
2411  '''
2412  merge two clusters
2413  '''
2414  c0 += c1
2415  c1.members = []
2416  c1.data = {}
2417 
2418  def rmsd_helper(self, sels0, sels1, metric):
2419  '''
2420  a function that returns the permutation best_sel of sels0 that
2421  minimizes metric
2422  '''
2423  best_rmsd2 = float('inf')
2424  best_sel = None
2425  if self.issymmetricsel[sels0[0]]:
2426  # this cases happens when symmetries were defined
2427  N = len(sels0)
2428  for offset in range(N):
2429  sels = [sels0[(offset+i) % N] for i in range(N)]
2430  sel0 = sels[0]
2431  sel1 = sels1[0]
2432  r = metric(sel0, sel1)
2433  rmsd2 = r*r*N
2434  if rmsd2 < best_rmsd2:
2435  best_rmsd2 = rmsd2
2436  best_sel = sels
2437  else:
2438  for sels in itertools.permutations(sels0):
2439  rmsd2 = 0.0
2440  for sel0, sel1 in itertools.takewhile(
2441  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2442  r = metric(sel0, sel1)
2443  rmsd2 += r*r
2444  if rmsd2 < best_rmsd2:
2445  best_rmsd2 = rmsd2
2446  best_sel = sels
2447  return best_sel, best_rmsd2
2448 
2449  def compute_all_pairwise_rmsd(self):
2450  for d0 in self.stath0:
2451  for d1 in self.stath1:
2452  rmsd, _ = self.rmsd()
2453 
2454  def rmsd(self, metric=IMP.atom.get_rmsd):
2455  '''
2456  Computes the RMSD. Resolves ambiguous pairs assignments
2457  '''
2458  # here we memoize the rmsd and molecular assignment so that it's
2459  # not done multiple times
2460  n0 = self.stath0.current_index
2461  n1 = self.stath1.current_index
2462  if ((n0, n1) in self.pairwise_rmsd) \
2463  and ((n0, n1) in self.pairwise_molecular_assignment):
2464  return (self.pairwise_rmsd[(n0, n1)],
2465  self.pairwise_molecular_assignment[(n0, n1)])
2466 
2467  if self.alignment:
2468  self.align()
2469  # if it's not yet memoized
2470  total_rmsd = 0.0
2471  total_N = 0
2472  # this is a dictionary which keys are the molecule names, and values
2473  # are the list of IMP.atom.Selection for all molecules that share
2474  # the molecule name
2475  molecular_assignment = {}
2476  for molname, sels0 in self.seldict0.items():
2477  sels_best_order, best_rmsd2 = \
2478  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2479 
2480  Ncoords = len(sels_best_order[0].get_selected_particles())
2481  Ncopies = len(self.seldict1[molname])
2482  total_rmsd += Ncoords*best_rmsd2
2483  total_N += Ncoords*Ncopies
2484 
2485  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2486  p0 = sel0.get_selected_particles()[0]
2487  p1 = sel1.get_selected_particles()[0]
2488  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2489  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2490  c0 = IMP.atom.Copy(m0).get_copy_index()
2491  c1 = IMP.atom.Copy(m1).get_copy_index()
2492  molecular_assignment[(molname, c0)] = (molname, c1)
2493 
2494  total_rmsd = math.sqrt(total_rmsd/total_N)
2495 
2496  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2497  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2498  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2499  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2500  return total_rmsd, molecular_assignment
2501 
2502  def set_reference(self, reference, cluster):
2503  """
2504  Fix the reference structure for structural alignment, rmsd and
2505  chain assignemnt
2506 
2507  @param reference can be either "Absolute" (cluster center of the
2508  first cluster) or Relative (cluster center of the current
2509  cluster)
2510  """
2511  if reference == "Absolute":
2512  _ = self.stath0[0]
2513  elif reference == "Relative":
2514  if cluster.center_index:
2515  n0 = cluster.center_index
2516  else:
2517  n0 = cluster.members[0]
2518  _ = self.stath0[n0]
2519 
2521  """
2522  compute the molecular assignemnts between multiple copies
2523  of the same sequence. It changes the Copy index of Molecules
2524  """
2525  _ = self.stath1[n1]
2526  _, molecular_assignment = self.rmsd()
2527  for (m0, c0), (m1, c1) in molecular_assignment.items():
2528  mol0 = self.molcopydict0[m0][c0]
2529  mol1 = self.molcopydict1[m1][c1]
2530  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2531  p1 = IMP.atom.Copy(mol1).get_particle()
2532  p1.set_value(cik0, c0)
2533 
2535  """
2536  Undo the Copy index assignment
2537  """
2538  _ = self.stath1[n1]
2539  _, molecular_assignment = self.rmsd()
2540  for (m0, c0), (m1, c1) in molecular_assignment.items():
2541  mol0 = self.molcopydict0[m0][c0]
2542  mol1 = self.molcopydict1[m1][c1]
2543  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2544  p1 = IMP.atom.Copy(mol1).get_particle()
2545  p1.set_value(cik0, c1)
2546 
2547  ####################
2548  # Container Functions
2549  ####################
2550 
2551  def __repr__(self):
2552  s = "AnalysisReplicaExchange\n"
2553  s += "---- number of clusters %s \n" % str(len(self.clusters))
2554  s += "---- number of models %s \n" % str(len(self.stath0))
2555  return s
2556 
2557  def __getitem__(self, int_slice_adaptor):
2558  if isinstance(int_slice_adaptor, int):
2559  return self.clusters[int_slice_adaptor]
2560  elif isinstance(int_slice_adaptor, slice):
2561  return self.__iter__(int_slice_adaptor)
2562  else:
2563  raise TypeError("Unknown Type")
2564 
2565  def __len__(self):
2566  return len(self.clusters)
2567 
2568  def __iter__(self, slice_key=None):
2569  if slice_key is None:
2570  for i in range(len(self)):
2571  yield self[i]
2572  else:
2573  for i in range(len(self))[slice_key]:
2574  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2454
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2502
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1783
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1136
def precision
Compute the precision of a cluster.
Definition: macros.py:1884
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:884
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:111
A container for models organized into clusters.
Definition: output.py:1522
Sample using molecular dynamics.
Definition: samplers.py:400
def aggregate
initial filling of the clusters.
Definition: macros.py:2340
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:616
A macro to help setup and run replica exchange.
Definition: macros.py:68
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:617
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1849
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1933
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1233
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1640
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1721
def __init__
Construction of the Class.
Definition: macros.py:1595
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:789
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1738
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2534
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1656
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2418
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1824
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1028
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2520
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1584
Add uncertainty to a particle.
Definition: Uncertainty.h:24
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2367
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:937
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1806
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:948
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:133
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2309
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2298
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:613
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1685
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:203
Collect timing information.
Definition: tools.py:126
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1649
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:10663
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2410
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:634
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1269
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1731
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:800
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1913
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:612
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1672
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1771
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2395
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1209
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1746
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:570
Sample using replica exchange.
Definition: samplers.py:505
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27