IMP logo
IMP Reference Guide  develop.5d113ecbfd,2023/06/06
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 try:
18  from pathlib import Path
19 except ImportError: # Use bundled pathlib on Python 2 without pathlib
20  from IMP._compat_pathlib import Path
21 import glob
22 from operator import itemgetter
23 from collections import defaultdict
24 import numpy as np
25 import itertools
26 import warnings
27 import math
28 
29 
30 class _MockMPIValues(object):
31  """Replace samplers.MPI_values when in test mode"""
32  def get_percentile(self, name):
33  return 0.
34 
35 
36 class _RMFRestraints(object):
37  """All restraints that are written out to the RMF file"""
38  def __init__(self, model, user_restraints):
39  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
40  self._user_restraints = user_restraints if user_restraints else []
41 
42  def __len__(self):
43  return (len(self._user_restraints)
44  + self._rmf_rs.get_number_of_restraints())
45 
46  def __bool__(self):
47  return len(self) > 0
48  __nonzero__ = __bool__ # Python 2 compatibility
49 
50  def __getitem__(self, i):
51  class FakePMIWrapper(object):
52  def __init__(self, r):
53  self.r = IMP.RestraintSet.get_from(r)
54 
55  def get_restraint(self):
56  return self.r
57 
58  lenuser = len(self._user_restraints)
59  if 0 <= i < lenuser:
60  return self._user_restraints[i]
61  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62  r = self._rmf_rs.get_restraint(i - lenuser)
63  return FakePMIWrapper(r)
64  else:
65  raise IndexError("Out of range")
66 
67 
68 class ReplicaExchange(object):
69  """A macro to help setup and run replica exchange.
70  Supports Monte Carlo and molecular dynamics.
71  Produces trajectory RMF files, best PDB structures,
72  and output stat files.
73  """
74  def __init__(self, model, root_hier,
75  monte_carlo_sample_objects=None,
76  molecular_dynamics_sample_objects=None,
77  output_objects=[],
78  rmf_output_objects=None,
79  monte_carlo_temperature=1.0,
80  simulated_annealing=False,
81  simulated_annealing_minimum_temperature=1.0,
82  simulated_annealing_maximum_temperature=2.5,
83  simulated_annealing_minimum_temperature_nframes=100,
84  simulated_annealing_maximum_temperature_nframes=100,
85  replica_exchange_minimum_temperature=1.0,
86  replica_exchange_maximum_temperature=2.5,
87  replica_exchange_swap=True,
88  num_sample_rounds=1,
89  number_of_best_scoring_models=500,
90  monte_carlo_steps=10,
91  self_adaptive=False,
92  molecular_dynamics_steps=10,
93  molecular_dynamics_max_time_step=1.0,
94  number_of_frames=1000,
95  save_coordinates_mode="lowest_temperature",
96  nframes_write_coordinates=1,
97  write_initial_rmf=True,
98  initial_rmf_name_suffix="initial",
99  stat_file_name_suffix="stat",
100  best_pdb_name_suffix="model",
101  mmcif=False,
102  do_clean_first=True,
103  do_create_directories=True,
104  global_output_directory="./",
105  rmf_dir="rmfs/",
106  best_pdb_dir="pdbs/",
107  replica_stat_file_suffix="stat_replica",
108  em_object_for_rmf=None,
109  atomistic=False,
110  replica_exchange_object=None,
111  test_mode=False,
112  score_moved=False):
113  """Constructor.
114  @param model The IMP model
115  @param root_hier Top-level (System)hierarchy
116  @param monte_carlo_sample_objects Objects for MC sampling, which
117  should generally be a simple list of Mover objects, e.g.
118  from DegreesOfFreedom.get_movers().
119  @param molecular_dynamics_sample_objects Objects for MD sampling,
120  which should generally be a simple list of particles.
121  @param output_objects A list of structural objects and restraints
122  that will be included in output (ie, statistics "stat"
123  files). Any object that provides a get_output() method
124  can be used here. If None is passed
125  the macro will not write stat files.
126  @param rmf_output_objects A list of structural objects and
127  restraints that will be included in rmf. Any object
128  that provides a get_output() method can be used here.
129  @param monte_carlo_temperature MC temp (may need to be optimized
130  based on post-sampling analysis)
131  @param simulated_annealing If True, perform simulated annealing
132  @param simulated_annealing_minimum_temperature Should generally be
133  the same as monte_carlo_temperature.
134  @param simulated_annealing_minimum_temperature_nframes Number of
135  frames to compute at minimum temperature.
136  @param simulated_annealing_maximum_temperature_nframes Number of
137  frames to compute at
138  temps > simulated_annealing_maximum_temperature.
139  @param replica_exchange_minimum_temperature Low temp for REX; should
140  generally be the same as monte_carlo_temperature.
141  @param replica_exchange_maximum_temperature High temp for REX
142  @param replica_exchange_swap Boolean, enable disable temperature
143  swap (Default=True)
144  @param num_sample_rounds Number of rounds of MC/MD per cycle
145  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146  models to keep around for analysis.
147  @param mmcif If True, write best scoring models in mmCIF format;
148  if False (the default), write in legacy PDB format.
149  @param best_pdb_dir The directory under `global_output_directory`
150  where best-scoring PDB/mmCIF files are written.
151  @param best_pdb_name_suffix Part of the file name for best-scoring
152  PDB/mmCIF files.
153  @param monte_carlo_steps Number of MC steps per round
154  @param self_adaptive self adaptive scheme for Monte Carlo movers
155  @param molecular_dynamics_steps Number of MD steps per round
156  @param molecular_dynamics_max_time_step Max time step for MD
157  @param number_of_frames Number of REX frames to run
158  @param save_coordinates_mode string: how to save coordinates.
159  "lowest_temperature" (default) only the lowest temperatures
160  is saved
161  "25th_score" all replicas whose score is below the 25th
162  percentile
163  "50th_score" all replicas whose score is below the 50th
164  percentile
165  "75th_score" all replicas whose score is below the 75th
166  percentile
167  @param nframes_write_coordinates How often to write the coordinates
168  of a frame
169  @param write_initial_rmf Write the initial configuration
170  @param global_output_directory Folder that will be created to house
171  output.
172  @param test_mode Set to True to avoid writing any files, just test
173  one frame.
174  @param score_moved If True, attempt to speed up Monte Carlo
175  sampling by caching scoring function terms on particles
176  that didn't move.
177  """
178  self.model = model
179  self.vars = {}
180 
181  # add check hierarchy is multistate
182  self.output_objects = output_objects
183  self.rmf_output_objects = rmf_output_objects
184  if (isinstance(root_hier, IMP.atom.Hierarchy)
185  and not root_hier.get_parent()):
186  if self.output_objects is not None:
187  self.output_objects.append(
188  IMP.pmi.io.TotalScoreOutput(self.model))
189  if self.rmf_output_objects is not None:
190  self.rmf_output_objects.append(
191  IMP.pmi.io.TotalScoreOutput(self.model))
192  self.root_hier = root_hier
193  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
194  self.vars["number_of_states"] = len(states)
195  if len(states) > 1:
196  self.root_hiers = states
197  self.is_multi_state = True
198  else:
199  self.root_hier = root_hier
200  self.is_multi_state = False
201  else:
202  raise TypeError("Must provide System hierarchy (root_hier)")
203 
204  self._rmf_restraints = _RMFRestraints(model, None)
205  self.em_object_for_rmf = em_object_for_rmf
206  self.monte_carlo_sample_objects = monte_carlo_sample_objects
207  self.vars["self_adaptive"] = self_adaptive
208  self.molecular_dynamics_sample_objects = \
209  molecular_dynamics_sample_objects
210  self.replica_exchange_object = replica_exchange_object
211  self.molecular_dynamics_max_time_step = \
212  molecular_dynamics_max_time_step
213  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
214  self.vars["replica_exchange_minimum_temperature"] = \
215  replica_exchange_minimum_temperature
216  self.vars["replica_exchange_maximum_temperature"] = \
217  replica_exchange_maximum_temperature
218  self.vars["replica_exchange_swap"] = replica_exchange_swap
219  self.vars["simulated_annealing"] = simulated_annealing
220  self.vars["simulated_annealing_minimum_temperature"] = \
221  simulated_annealing_minimum_temperature
222  self.vars["simulated_annealing_maximum_temperature"] = \
223  simulated_annealing_maximum_temperature
224  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
225  simulated_annealing_minimum_temperature_nframes
226  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
227  simulated_annealing_maximum_temperature_nframes
228 
229  self.vars["num_sample_rounds"] = num_sample_rounds
230  self.vars[
231  "number_of_best_scoring_models"] = number_of_best_scoring_models
232  self.vars["monte_carlo_steps"] = monte_carlo_steps
233  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
234  self.vars["number_of_frames"] = number_of_frames
235  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
236  "50th_score", "75th_score"):
237  raise Exception("save_coordinates_mode has unrecognized value")
238  else:
239  self.vars["save_coordinates_mode"] = save_coordinates_mode
240  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
241  self.vars["write_initial_rmf"] = write_initial_rmf
242  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
243  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
244  self.vars["mmcif"] = mmcif
245  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
246  self.vars["do_clean_first"] = do_clean_first
247  self.vars["do_create_directories"] = do_create_directories
248  self.vars["global_output_directory"] = global_output_directory
249  self.vars["rmf_dir"] = rmf_dir
250  self.vars["best_pdb_dir"] = best_pdb_dir
251  self.vars["atomistic"] = atomistic
252  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
253  self.vars["geometries"] = None
254  self.test_mode = test_mode
255  self.score_moved = score_moved
256 
257  def add_geometries(self, geometries):
258  if self.vars["geometries"] is None:
259  self.vars["geometries"] = list(geometries)
260  else:
261  self.vars["geometries"].extend(geometries)
262 
263  def show_info(self):
264  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
265  "rmfs/*.rmf3 for each replica ")
266  print("--- it stores the best scoring pdb models in pdbs/")
267  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
268  "lowest temperature")
269  print("--- variables:")
270  keys = list(self.vars.keys())
271  keys.sort()
272  for v in keys:
273  print("------", v.ljust(30), self.vars[v])
274 
275  def get_replica_exchange_object(self):
276  return self.replica_exchange_object
277 
278  def _add_provenance(self, sampler_md, sampler_mc):
279  """Record details about the sampling in the IMP Hierarchies"""
280  iterations = 0
281  if sampler_md:
282  method = "Molecular Dynamics"
283  iterations += self.vars["molecular_dynamics_steps"]
284  if sampler_mc:
285  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
286  iterations += self.vars["monte_carlo_steps"]
287  # If no sampling is actually done, no provenance to write
288  if iterations == 0 or self.vars["number_of_frames"] == 0:
289  return
290  iterations *= self.vars["num_sample_rounds"]
291 
292  pi = self.model.add_particle("sampling")
294  self.model, pi, method, self.vars["number_of_frames"],
295  iterations)
296  p.set_number_of_replicas(
297  self.replica_exchange_object.get_number_of_replicas())
298  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
299  IMP.core.add_provenance(self.model, self.root_hier, p)
300 
301  def execute_macro(self):
302  temp_index_factor = 100000.0
303  samplers = []
304  sampler_mc = None
305  sampler_md = None
306  if self.monte_carlo_sample_objects is not None:
307  print("Setting up MonteCarlo")
308  sampler_mc = IMP.pmi.samplers.MonteCarlo(
309  self.model, self.monte_carlo_sample_objects,
310  self.vars["monte_carlo_temperature"],
311  score_moved=self.score_moved)
312  if self.vars["simulated_annealing"]:
313  tmin = self.vars["simulated_annealing_minimum_temperature"]
314  tmax = self.vars["simulated_annealing_maximum_temperature"]
315  nfmin = self.vars[
316  "simulated_annealing_minimum_temperature_nframes"]
317  nfmax = self.vars[
318  "simulated_annealing_maximum_temperature_nframes"]
319  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
320  if self.vars["self_adaptive"]:
321  sampler_mc.set_self_adaptive(
322  isselfadaptive=self.vars["self_adaptive"])
323  if self.output_objects is not None:
324  self.output_objects.append(sampler_mc)
325  if self.rmf_output_objects is not None:
326  self.rmf_output_objects.append(sampler_mc)
327  samplers.append(sampler_mc)
328 
329  if self.molecular_dynamics_sample_objects is not None:
330  print("Setting up MolecularDynamics")
332  self.model, self.molecular_dynamics_sample_objects,
333  self.vars["monte_carlo_temperature"],
334  maximum_time_step=self.molecular_dynamics_max_time_step)
335  if self.vars["simulated_annealing"]:
336  tmin = self.vars["simulated_annealing_minimum_temperature"]
337  tmax = self.vars["simulated_annealing_maximum_temperature"]
338  nfmin = self.vars[
339  "simulated_annealing_minimum_temperature_nframes"]
340  nfmax = self.vars[
341  "simulated_annealing_maximum_temperature_nframes"]
342  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
343  if self.output_objects is not None:
344  self.output_objects.append(sampler_md)
345  if self.rmf_output_objects is not None:
346  self.rmf_output_objects.append(sampler_md)
347  samplers.append(sampler_md)
348 # -------------------------------------------------------------------------
349 
350  print("Setting up ReplicaExchange")
352  self.model, self.vars["replica_exchange_minimum_temperature"],
353  self.vars["replica_exchange_maximum_temperature"], samplers,
354  replica_exchange_object=self.replica_exchange_object)
355  self.replica_exchange_object = rex.rem
356 
357  myindex = rex.get_my_index()
358  if self.output_objects is not None:
359  self.output_objects.append(rex)
360  if self.rmf_output_objects is not None:
361  self.rmf_output_objects.append(rex)
362  # must reset the minimum temperature due to the
363  # different binary length of rem.get_my_parameter double and python
364  # float
365  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
366 
367 # -------------------------------------------------------------------------
368 
369  globaldir = self.vars["global_output_directory"] + "/"
370  rmf_dir = globaldir + self.vars["rmf_dir"]
371  pdb_dir = globaldir + self.vars["best_pdb_dir"]
372 
373  if not self.test_mode:
374  if self.vars["do_clean_first"]:
375  pass
376 
377  if self.vars["do_create_directories"]:
378 
379  try:
380  os.makedirs(globaldir)
381  except: # noqa: E722
382  pass
383  try:
384  os.makedirs(rmf_dir)
385  except: # noqa: E722
386  pass
387 
388  if not self.is_multi_state:
389  try:
390  os.makedirs(pdb_dir)
391  except: # noqa: E722
392  pass
393  else:
394  for n in range(self.vars["number_of_states"]):
395  try:
396  os.makedirs(pdb_dir + "/" + str(n))
397  except: # noqa: E722
398  pass
399 
400 # -------------------------------------------------------------------------
401 
403  if self.output_objects is not None:
404  self.output_objects.append(sw)
405  if self.rmf_output_objects is not None:
406  self.rmf_output_objects.append(sw)
407 
408  print("Setting up stat file")
409  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
410  low_temp_stat_file = globaldir + \
411  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
412 
413  # Ensure model is updated before saving init files
414  if not self.test_mode:
415  self.model.update()
416 
417  if not self.test_mode:
418  if self.output_objects is not None:
419  output.init_stat2(low_temp_stat_file,
420  self.output_objects,
421  extralabels=["rmf_file", "rmf_frame_index"])
422  else:
423  print("Stat file writing is disabled")
424 
425  if self.rmf_output_objects is not None:
426  print("Stat info being written in the rmf file")
427 
428  print("Setting up replica stat file")
429  replica_stat_file = globaldir + \
430  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
431  if not self.test_mode:
432  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
433 
434  if not self.test_mode:
435  print("Setting up best pdb files")
436  if not self.is_multi_state:
437  if self.vars["number_of_best_scoring_models"] > 0:
438  output.init_pdb_best_scoring(
439  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
440  self.root_hier,
441  self.vars["number_of_best_scoring_models"],
442  replica_exchange=True,
443  mmcif=self.vars['mmcif'],
444  best_score_file=globaldir + "best.scores.rex.py")
445  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
446  output.write_psf(
447  pdb_dir + "/" + "model.psf",
448  pdb_dir + "/" +
449  self.vars["best_pdb_name_suffix"] + pdbext)
450  else:
451  if self.vars["number_of_best_scoring_models"] > 0:
452  for n in range(self.vars["number_of_states"]):
453  output.init_pdb_best_scoring(
454  pdb_dir + "/" + str(n) + "/" +
455  self.vars["best_pdb_name_suffix"],
456  self.root_hiers[n],
457  self.vars["number_of_best_scoring_models"],
458  replica_exchange=True,
459  mmcif=self.vars['mmcif'],
460  best_score_file=globaldir + "best.scores.rex.py")
461  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
462  output.write_psf(
463  pdb_dir + "/" + str(n) + "/" + "model.psf",
464  pdb_dir + "/" + str(n) + "/" +
465  self.vars["best_pdb_name_suffix"] + pdbext)
466 # ---------------------------------------------
467 
468  if self.em_object_for_rmf is not None:
469  output_hierarchies = [
470  self.root_hier,
471  self.em_object_for_rmf.get_density_as_hierarchy(
472  )]
473  else:
474  output_hierarchies = [self.root_hier]
475 
476  if not self.test_mode:
477  print("Setting up and writing initial rmf coordinate file")
478  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
479  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
480  output_hierarchies,
481  listofobjects=self.rmf_output_objects)
482  if self._rmf_restraints:
483  output.add_restraints_to_rmf(
484  init_suffix + "." + str(myindex) + ".rmf3",
485  self._rmf_restraints)
486  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
487  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
488 
489  if not self.test_mode:
490  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
491  else:
492  mpivs = _MockMPIValues()
493 
494  self._add_provenance(sampler_md, sampler_mc)
495 
496  if not self.test_mode:
497  print("Setting up production rmf files")
498  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
499  output.init_rmf(rmfname, output_hierarchies,
500  geometries=self.vars["geometries"],
501  listofobjects=self.rmf_output_objects)
502 
503  if self._rmf_restraints:
504  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
505 
506  ntimes_at_low_temp = 0
507 
508  if myindex == 0:
509  self.show_info()
510  self.replica_exchange_object.set_was_used(True)
511  nframes = self.vars["number_of_frames"]
512  if self.test_mode:
513  nframes = 1
514  for i in range(nframes):
515  if self.test_mode:
516  score = 0.
517  else:
518  for nr in range(self.vars["num_sample_rounds"]):
519  if sampler_md is not None:
520  sampler_md.optimize(
521  self.vars["molecular_dynamics_steps"])
522  if sampler_mc is not None:
523  sampler_mc.optimize(self.vars["monte_carlo_steps"])
525  self.model).evaluate(False)
526  mpivs.set_value("score", score)
527  output.set_output_entry("score", score)
528 
529  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
530 
531  if self.vars["save_coordinates_mode"] == "lowest_temperature":
532  save_frame = (min_temp_index == my_temp_index)
533  elif self.vars["save_coordinates_mode"] == "25th_score":
534  score_perc = mpivs.get_percentile("score")
535  save_frame = (score_perc*100.0 <= 25.0)
536  elif self.vars["save_coordinates_mode"] == "50th_score":
537  score_perc = mpivs.get_percentile("score")
538  save_frame = (score_perc*100.0 <= 50.0)
539  elif self.vars["save_coordinates_mode"] == "75th_score":
540  score_perc = mpivs.get_percentile("score")
541  save_frame = (score_perc*100.0 <= 75.0)
542 
543  # Ensure model is updated before saving output files
544  if save_frame and not self.test_mode:
545  self.model.update()
546 
547  if save_frame:
548  print("--- frame %s score %s " % (str(i), str(score)))
549 
550  if not self.test_mode:
551  if i % self.vars["nframes_write_coordinates"] == 0:
552  print('--- writing coordinates')
553  if self.vars["number_of_best_scoring_models"] > 0:
554  output.write_pdb_best_scoring(score)
555  output.write_rmf(rmfname)
556  output.set_output_entry("rmf_file", rmfname)
557  output.set_output_entry("rmf_frame_index",
558  ntimes_at_low_temp)
559  else:
560  output.set_output_entry("rmf_file", rmfname)
561  output.set_output_entry("rmf_frame_index", '-1')
562  if self.output_objects is not None:
563  output.write_stat2(low_temp_stat_file)
564  ntimes_at_low_temp += 1
565 
566  if not self.test_mode:
567  output.write_stat2(replica_stat_file)
568  if self.vars["replica_exchange_swap"]:
569  rex.swap_temp(i, score)
570  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
571  p.add_replica_exchange(state, self)
572 
573  if not self.test_mode:
574  print("closing production rmf files")
575  output.close_rmf(rmfname)
576 
577 
578 class BuildSystem(object):
579  """A macro to build a IMP::pmi::topology::System based on a
580  TopologyReader object.
581 
582  Easily create multi-state systems by calling this macro
583  repeatedly with different TopologyReader objects!
584  A useful function is get_molecules() which returns the PMI Molecules
585  grouped by state as a dictionary with key = (molecule name),
586  value = IMP.pmi.topology.Molecule
587  Quick multi-state system:
588  @code{.python}
589  model = IMP.Model()
590  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
591  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
592  bs = IMP.pmi.macros.BuildSystem(model)
593  bs.add_state(reader1)
594  bs.add_state(reader2)
595  bs.execute_macro() # build everything including degrees of freedom
596  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
597  ### now you have a two state system, you add restraints etc
598  @endcode
599  @note The "domain name" entry of the topology reader is not used.
600  All molecules are set up by the component name, but split into rigid bodies
601  as requested.
602  """
603 
604  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
605  'RNA': IMP.pmi.alphabets.rna}
606 
607  def __init__(self, model, sequence_connectivity_scale=4.0,
608  force_create_gmm_files=False, resolutions=[1, 10],
609  name='System'):
610  """Constructor
611  @param model An IMP Model
612  @param sequence_connectivity_scale For scaling the connectivity
613  restraint
614  @param force_create_gmm_files If True, will sample and create GMMs
615  no matter what. If False, will only sample if the
616  files don't exist. If number of Gaussians is zero, won't
617  do anything.
618  @param resolutions The resolutions to build for structured regions
619  @param name The name of the top-level hierarchy node.
620  """
621  self.model = model
622  self.system = IMP.pmi.topology.System(self.model, name=name)
623  self._readers = [] # the TopologyReaders (one per state)
624  # TempResidues for each domain key=unique name,
625  # value=(atomic_res,non_atomic_res).
626  self._domain_res = []
627  self._domains = [] # key = domain unique name, value = Component
628  self.force_create_gmm_files = force_create_gmm_files
629  self.resolutions = resolutions
630 
631  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
632  chain_ids=None):
633  """Add a state using the topology info in a
634  IMP::pmi::topology::TopologyReader object.
635  When you are done adding states, call execute_macro()
636  @param reader The TopologyReader object
637  @param fasta_name_map dictionary for converting protein names
638  found in the fasta file
639  @param chain_ids A list or string of chain IDs for assigning to
640  newly-created molecules, e.g.
641  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
642  If not specified, chain IDs A through Z are assigned, then
643  AA through AZ, then BA through BZ, and so on, in the same
644  fashion as PDB.
645  """
646  state = self.system.create_state()
647  self._readers.append(reader)
648  # key is unique name, value is (atomic res, nonatomicres)
649  these_domain_res = {}
650  these_domains = {} # key is unique name, value is _Component
651  if chain_ids is None:
652  chain_ids = IMP.pmi.output._ChainIDs()
653  numchain = 0
654 
655  # setup representation
656  # loop over molecules, copies, then domains
657  for molname in reader.get_molecules():
658  copies = reader.get_molecules()[molname].domains
659  for nc, copyname in enumerate(copies):
660  print("BuildSystem.add_state: setting up molecule %s copy "
661  "number %s" % (molname, str(nc)))
662  copy = copies[copyname]
663  # option to not rename chains
664  if keep_chain_id:
665  all_chains = [c for c in copy if c.chain is not None]
666  if all_chains:
667  chain_id = all_chains[0].chain
668  else:
669  chain_id = chain_ids[numchain]
670  warnings.warn(
671  "No PDBs specified for %s, so keep_chain_id has "
672  "no effect; using default chain ID '%s'"
673  % (molname, chain_id), IMP.pmi.ParameterWarning)
674  else:
675  chain_id = chain_ids[numchain]
676  if nc == 0:
677  alphabet = IMP.pmi.alphabets.amino_acid
678  fasta_flag = copy[0].fasta_flag
679  if fasta_flag in self._alphabets:
680  alphabet = self._alphabets[fasta_flag]
682  copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
683  print("BuildSystem.add_state: molecule %s sequence has "
684  "%s residues" % (molname, len(seq)))
685  orig_mol = state.create_molecule(molname, seq, chain_id,
686  alphabet=alphabet)
687  mol = orig_mol
688  numchain += 1
689  else:
690  print("BuildSystem.add_state: creating a copy for "
691  "molecule %s" % molname)
692  mol = orig_mol.create_copy(chain_id)
693  numchain += 1
694 
695  for domainnumber, domain in enumerate(copy):
696  print("BuildSystem.add_state: ---- setting up domain %s "
697  "of molecule %s" % (domainnumber, molname))
698  # we build everything in the residue range, even if it
699  # extends beyond what's in the actual PDB file
700  these_domains[domain.get_unique_name()] = domain
701  if domain.residue_range == [] or \
702  domain.residue_range is None:
703  domain_res = mol.get_residues()
704  else:
705  start = domain.residue_range[0]+domain.pdb_offset
706  if domain.residue_range[1] == 'END':
707  end = len(mol.sequence)
708  else:
709  end = domain.residue_range[1]+domain.pdb_offset
710  domain_res = mol.residue_range(start-1, end-1)
711  print("BuildSystem.add_state: -------- domain %s of "
712  "molecule %s extends from residue %s to "
713  "residue %s "
714  % (domainnumber, molname, start, end))
715  if domain.pdb_file == "BEADS":
716  print("BuildSystem.add_state: -------- domain %s of "
717  "molecule %s represented by BEADS "
718  % (domainnumber, molname))
719  mol.add_representation(
720  domain_res,
721  resolutions=[domain.bead_size],
722  setup_particles_as_densities=(
723  domain.em_residues_per_gaussian != 0),
724  color=domain.color)
725  these_domain_res[domain.get_unique_name()] = \
726  (set(), domain_res)
727  elif domain.pdb_file == "IDEAL_HELIX":
728  print("BuildSystem.add_state: -------- domain %s of "
729  "molecule %s represented by IDEAL_HELIX "
730  % (domainnumber, molname))
731  emper = domain.em_residues_per_gaussian
732  mol.add_representation(
733  domain_res,
734  resolutions=self.resolutions,
735  ideal_helix=True,
736  density_residues_per_component=emper,
737  density_prefix=domain.density_prefix,
738  density_force_compute=self.force_create_gmm_files,
739  color=domain.color)
740  these_domain_res[domain.get_unique_name()] = \
741  (domain_res, set())
742  else:
743  print("BuildSystem.add_state: -------- domain %s of "
744  "molecule %s represented by pdb file %s "
745  % (domainnumber, molname, domain.pdb_file))
746  domain_atomic = mol.add_structure(domain.pdb_file,
747  domain.chain,
748  domain.residue_range,
749  domain.pdb_offset,
750  soft_check=True)
751  domain_non_atomic = domain_res - domain_atomic
752  if not domain.em_residues_per_gaussian:
753  mol.add_representation(
754  domain_atomic, resolutions=self.resolutions,
755  color=domain.color)
756  if len(domain_non_atomic) > 0:
757  mol.add_representation(
758  domain_non_atomic,
759  resolutions=[domain.bead_size],
760  color=domain.color)
761  else:
762  print("BuildSystem.add_state: -------- domain %s "
763  "of molecule %s represented by gaussians "
764  % (domainnumber, molname))
765  emper = domain.em_residues_per_gaussian
766  creategmm = self.force_create_gmm_files
767  mol.add_representation(
768  domain_atomic,
769  resolutions=self.resolutions,
770  density_residues_per_component=emper,
771  density_prefix=domain.density_prefix,
772  density_force_compute=creategmm,
773  color=domain.color)
774  if len(domain_non_atomic) > 0:
775  mol.add_representation(
776  domain_non_atomic,
777  resolutions=[domain.bead_size],
778  setup_particles_as_densities=True,
779  color=domain.color)
780  these_domain_res[domain.get_unique_name()] = (
781  domain_atomic, domain_non_atomic)
782  self._domain_res.append(these_domain_res)
783  self._domains.append(these_domains)
784  print('BuildSystem.add_state: State', len(self.system.states), 'added')
785  return state
786 
787  def get_molecules(self):
788  """Return list of all molecules grouped by state.
789  For each state, it's a dictionary of Molecules where key is the
790  molecule name
791  """
792  return [s.get_molecules() for s in self.system.get_states()]
793 
794  def get_molecule(self, molname, copy_index=0, state_index=0):
795  return self.system.get_states()[state_index].get_molecules()[
796  molname][copy_index]
797 
798  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
799  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
800  """Builds representations and sets up degrees of freedom"""
801  print("BuildSystem.execute_macro: building representations")
802  self.root_hier = self.system.build()
803 
804  print("BuildSystem.execute_macro: setting up degrees of freedom")
805  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
806  for nstate, reader in enumerate(self._readers):
807  rbs = reader.get_rigid_bodies()
808  srbs = reader.get_super_rigid_bodies()
809  csrbs = reader.get_chains_of_super_rigid_bodies()
810 
811  # add rigid bodies
812  domains_in_rbs = set()
813  for rblist in rbs:
814  print("BuildSystem.execute_macro: -------- building rigid "
815  "body %s" % (str(rblist)))
816  all_res = IMP.pmi.tools.OrderedSet()
817  bead_res = IMP.pmi.tools.OrderedSet()
818  for dname in rblist:
819  domain = self._domains[nstate][dname]
820  print("BuildSystem.execute_macro: -------- adding %s"
821  % (str(dname)))
822  all_res |= self._domain_res[nstate][dname][0]
823  bead_res |= self._domain_res[nstate][dname][1]
824  domains_in_rbs.add(dname)
825  all_res |= bead_res
826  print("BuildSystem.execute_macro: -------- creating rigid "
827  "body with max_trans %s max_rot %s "
828  "non_rigid_max_trans %s"
829  % (str(max_rb_trans), str(max_rb_rot),
830  str(max_bead_trans)))
831  self.dof.create_rigid_body(all_res,
832  nonrigid_parts=bead_res,
833  max_trans=max_rb_trans,
834  max_rot=max_rb_rot,
835  nonrigid_max_trans=max_bead_trans,
836  name="RigidBody %s" % dname)
837 
838  # if you have any domains not in an RB, set them as flexible beads
839  for dname, domain in self._domains[nstate].items():
840  if dname not in domains_in_rbs:
841  if domain.pdb_file != "BEADS":
842  warnings.warn(
843  "No rigid bodies set for %s. Residues read from "
844  "the PDB file will not be sampled - only regions "
845  "missing from the PDB will be treated flexibly. "
846  "To sample the entire sequence, use BEADS instead "
847  "of a PDB file name" % dname,
849  self.dof.create_flexible_beads(
850  self._domain_res[nstate][dname][1],
851  max_trans=max_bead_trans)
852 
853  # add super rigid bodies
854  for srblist in srbs:
855  print("BuildSystem.execute_macro: -------- building "
856  "super rigid body %s" % (str(srblist)))
857  all_res = IMP.pmi.tools.OrderedSet()
858  for dname in srblist:
859  print("BuildSystem.execute_macro: -------- adding %s"
860  % (str(dname)))
861  all_res |= self._domain_res[nstate][dname][0]
862  all_res |= self._domain_res[nstate][dname][1]
863 
864  print("BuildSystem.execute_macro: -------- creating super "
865  "rigid body with max_trans %s max_rot %s "
866  % (str(max_srb_trans), str(max_srb_rot)))
867  self.dof.create_super_rigid_body(
868  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
869 
870  # add chains of super rigid bodies
871  for csrblist in csrbs:
872  all_res = IMP.pmi.tools.OrderedSet()
873  for dname in csrblist:
874  all_res |= self._domain_res[nstate][dname][0]
875  all_res |= self._domain_res[nstate][dname][1]
876  all_res = list(all_res)
877  all_res.sort(key=lambda r: r.get_index())
878  self.dof.create_main_chain_mover(all_res)
879  return self.root_hier, self.dof
880 
881 
882 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
883 class AnalysisReplicaExchange0(object):
884  """A macro for running all the basic operations of analysis.
885  Includes clustering, precision analysis, and making ensemble density maps.
886  A number of plots are also supported.
887  """
888  def __init__(self, model,
889  merge_directories=["./"],
890  stat_file_name_suffix="stat",
891  best_pdb_name_suffix="model",
892  do_clean_first=True,
893  do_create_directories=True,
894  global_output_directory="output/",
895  replica_stat_file_suffix="stat_replica",
896  global_analysis_result_directory="./analysis/",
897  test_mode=False):
898  """Constructor.
899  @param model The IMP model
900  @param stat_file_name_suffix
901  @param merge_directories The directories containing output files
902  @param best_pdb_name_suffix
903  @param do_clean_first
904  @param do_create_directories
905  @param global_output_directory Where everything is
906  @param replica_stat_file_suffix
907  @param global_analysis_result_directory
908  @param test_mode If True, nothing is changed on disk
909  """
910 
911  try:
912  from mpi4py import MPI
913  self.comm = MPI.COMM_WORLD
914  self.rank = self.comm.Get_rank()
915  self.number_of_processes = self.comm.size
916  except ImportError:
917  self.rank = 0
918  self.number_of_processes = 1
919 
920  self.test_mode = test_mode
921  self._protocol_output = []
922  self.cluster_obj = None
923  self.model = model
924  stat_dir = global_output_directory
925  self.stat_files = []
926  # it contains the position of the root directories
927  for rd in merge_directories:
928  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
929  if len(stat_files) == 0:
930  warnings.warn("no stat files found in %s"
931  % os.path.join(rd, stat_dir),
933  self.stat_files += stat_files
934 
935  def add_protocol_output(self, p):
936  """Capture details of the modeling protocol.
937  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
938  """
939  # Assume last state is the one we're interested in
940  self._protocol_output.append((p, p._last_state))
941 
942  def get_modeling_trajectory(self,
943  score_key="Total_Score",
944  rmf_file_key="rmf_file",
945  rmf_file_frame_key="rmf_frame_index",
946  outputdir="./",
947  get_every=1,
948  nframes_trajectory=10000):
949  """ Get a trajectory of the modeling run, for generating
950  demonstrative movies
951 
952  @param score_key The score for ranking models
953  @param rmf_file_key Key pointing to RMF filename
954  @param rmf_file_frame_key Key pointing to RMF frame number
955  @param outputdir The local output directory used in the run
956  @param get_every Extract every nth frame
957  @param nframes_trajectory Total number of frames of the trajectory
958  """
959  import math
960 
961  trajectory_models = IMP.pmi.io.get_trajectory_models(
962  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
963  get_every)
964  score_list = list(map(float, trajectory_models[2]))
965 
966  max_score = max(score_list)
967  min_score = min(score_list)
968 
969  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
970  for i in range(nframes_trajectory)]
971  binned_scores = [None]*nframes_trajectory
972  binned_model_indexes = [-1]*nframes_trajectory
973 
974  for model_index, s in enumerate(score_list):
975  bins_score_diffs = [abs(s-b) for b in bins]
976  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
977  if binned_scores[bin_index] is None:
978  binned_scores[bin_index] = s
979  binned_model_indexes[bin_index] = model_index
980  else:
981  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
982  new_diff = abs(s-bins[bin_index])
983  if new_diff < old_diff:
984  binned_scores[bin_index] = s
985  binned_model_indexes[bin_index] = model_index
986 
987  print(binned_scores)
988  print(binned_model_indexes)
989 
990  def _expand_ambiguity(self, prot, d):
991  """If using PMI2, expand the dictionary to include copies as
992  ambiguous options
993 
994  This also keeps the states separate.
995  """
996  newdict = {}
997  for key in d:
998  val = d[key]
999  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1000  newdict[key] = val
1001  continue
1002  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1003  if isinstance(val, tuple):
1004  start = val[0]
1005  stop = val[1]
1006  name = val[2]
1007  else:
1008  start = 1
1009  stop = -1
1010  name = val
1011  for nst in range(len(states)):
1012  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1013  copies = sel.get_selected_particles(with_representation=False)
1014  if len(copies) > 1:
1015  for nc in range(len(copies)):
1016  if len(states) > 1:
1017  newdict['%s.%i..%i' % (name, nst, nc)] = \
1018  (start, stop, name, nc, nst)
1019  else:
1020  newdict['%s..%i' % (name, nc)] = \
1021  (start, stop, name, nc, nst)
1022  else:
1023  newdict[key] = val
1024  return newdict
1025 
1026  def clustering(self,
1027  score_key="Total_Score",
1028  rmf_file_key="rmf_file",
1029  rmf_file_frame_key="rmf_frame_index",
1030  state_number=0,
1031  prefiltervalue=None,
1032  feature_keys=[],
1033  outputdir="./",
1034  alignment_components=None,
1035  number_of_best_scoring_models=10,
1036  rmsd_calculation_components=None,
1037  distance_matrix_file='distances.mat',
1038  load_distance_matrix_file=False,
1039  skip_clustering=False,
1040  number_of_clusters=1,
1041  display_plot=False,
1042  exit_after_display=True,
1043  get_every=1,
1044  first_and_last_frames=None,
1045  density_custom_ranges=None,
1046  write_pdb_with_centered_coordinates=False,
1047  voxel_size=5.0):
1048  """Get the best scoring models, compute a distance matrix,
1049  cluster them, and create density maps.
1050 
1051  Tuple format: "molname" just the molecule,
1052  or (start,stop,molname,copy_num(optional),state_num(optional)
1053  Can pass None for copy or state to ignore that field.
1054  If you don't pass a specific copy number
1055 
1056  @param score_key The score for ranking models.
1057  @param rmf_file_key Key pointing to RMF filename
1058  @param rmf_file_frame_key Key pointing to RMF frame number
1059  @param state_number State number to analyze
1060  @param prefiltervalue Only include frames where the
1061  score key is below this value
1062  @param feature_keys Keywords for which you want to
1063  calculate average, medians, etc.
1064  If you pass "Keyname" it'll include everything that matches
1065  "*Keyname*"
1066  @param outputdir The local output directory used in
1067  the run
1068  @param alignment_components Dictionary with keys=groupname,
1069  values are tuples for aligning the structures
1070  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1071  @param number_of_best_scoring_models Num models to keep per run
1072  @param rmsd_calculation_components For calculating RMSD
1073  (same format as alignment_components)
1074  @param distance_matrix_file Where to store/read the
1075  distance matrix
1076  @param load_distance_matrix_file Try to load the distance
1077  matrix file
1078  @param skip_clustering Just extract the best scoring
1079  models and save the pdbs
1080  @param number_of_clusters Number of k-means clusters
1081  @param display_plot Display the distance matrix
1082  @param exit_after_display Exit after displaying distance
1083  matrix
1084  @param get_every Extract every nth frame
1085  @param first_and_last_frames A tuple with the first and last
1086  frames to be analyzed. Values are percentages!
1087  Default: get all frames
1088  @param density_custom_ranges For density calculation
1089  (same format as alignment_components)
1090  @param write_pdb_with_centered_coordinates
1091  @param voxel_size Used for the density output
1092  """
1093  # Track provenance information to be added to each output model
1094  prov = []
1095  self._outputdir = Path(outputdir).absolute()
1096  self._number_of_clusters = number_of_clusters
1097  for p, state in self._protocol_output:
1098  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1099 
1100  if self.test_mode:
1101  return
1102 
1103  if self.rank == 0:
1104  try:
1105  os.mkdir(outputdir)
1106  except: # noqa: E722
1107  pass
1108 
1109  if not load_distance_matrix_file:
1110  if len(self.stat_files) == 0:
1111  print("ERROR: no stat file found in the given path")
1112  return
1113  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1114  self.stat_files, self.number_of_processes)[self.rank]
1115 
1116  # read ahead to check if you need the PMI2 score key instead
1117  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1118  if k in feature_keys:
1119  warnings.warn(
1120  "no need to pass " + k + " to feature_keys.",
1122  feature_keys.remove(k)
1123 
1124  best_models = IMP.pmi.io.get_best_models(
1125  my_stat_files, score_key, feature_keys, rmf_file_key,
1126  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1127  rmf_file_list = best_models[0]
1128  rmf_file_frame_list = best_models[1]
1129  score_list = best_models[2]
1130  feature_keyword_list_dict = best_models[3]
1131 
1132 # ------------------------------------------------------------------------
1133 # collect all the files and scores
1134 # ------------------------------------------------------------------------
1135 
1136  if self.number_of_processes > 1:
1137  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1138  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1139  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1140  rmf_file_frame_list)
1141  for k in feature_keyword_list_dict:
1142  feature_keyword_list_dict[k] = \
1144  feature_keyword_list_dict[k])
1145 
1146  # sort by score and get the best scoring ones
1147  score_rmf_tuples = list(zip(score_list,
1148  rmf_file_list,
1149  rmf_file_frame_list,
1150  list(range(len(score_list)))))
1151 
1152  if density_custom_ranges:
1153  for k in density_custom_ranges:
1154  if not isinstance(density_custom_ranges[k], list):
1155  raise Exception("Density custom ranges: values must "
1156  "be lists of tuples")
1157 
1158  # keep subset of frames if requested
1159  if first_and_last_frames is not None:
1160  nframes = len(score_rmf_tuples)
1161  first_frame = int(first_and_last_frames[0] * nframes)
1162  last_frame = int(first_and_last_frames[1] * nframes)
1163  if last_frame > len(score_rmf_tuples):
1164  last_frame = -1
1165  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1166 
1167  # sort RMFs by the score_key in ascending order, and store the rank
1168  best_score_rmf_tuples = sorted(
1169  score_rmf_tuples,
1170  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1171  best_score_rmf_tuples = [t+(n,) for n, t in
1172  enumerate(best_score_rmf_tuples)]
1173  # Note in the provenance info that we only kept best-scoring models
1174  prov.append(IMP.pmi.io.FilterProvenance(
1175  "Best scoring", 0, number_of_best_scoring_models))
1176  # sort the feature scores in the same way
1177  best_score_feature_keyword_list_dict = defaultdict(list)
1178  for tpl in best_score_rmf_tuples:
1179  index = tpl[3]
1180  for f in feature_keyword_list_dict:
1181  best_score_feature_keyword_list_dict[f].append(
1182  feature_keyword_list_dict[f][index])
1183  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1184  best_score_rmf_tuples,
1185  self.number_of_processes)[self.rank]
1186 
1187  # expand the dictionaries to include ambiguous copies
1188  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1189  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1190  if IMP.pmi.get_is_canonical(prot_ahead):
1191  if rmsd_calculation_components is not None:
1192  tmp = self._expand_ambiguity(
1193  prot_ahead, rmsd_calculation_components)
1194  if tmp != rmsd_calculation_components:
1195  print('Detected ambiguity, expand rmsd components to',
1196  tmp)
1197  rmsd_calculation_components = tmp
1198  if alignment_components is not None:
1199  tmp = self._expand_ambiguity(prot_ahead,
1200  alignment_components)
1201  if tmp != alignment_components:
1202  print('Detected ambiguity, expand alignment '
1203  'components to', tmp)
1204  alignment_components = tmp
1205 
1206 # -------------------------------------------------------------
1207 # read the coordinates
1208 # ------------------------------------------------------------
1209  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1210  self.model, my_best_score_rmf_tuples[0],
1211  rmsd_calculation_components, state_number=state_number)
1213  self.model, my_best_score_rmf_tuples, alignment_components,
1214  rmsd_calculation_components, state_number=state_number)
1215 
1216  # note! the coordinates are simply float tuples, NOT decorators,
1217  # NOT Vector3D, NOR particles, because these object cannot be
1218  # serialized. We need serialization
1219  # for the parallel computation based on mpi.
1220 
1221  # dict:key=component name,val=coords per hit
1222  all_coordinates = got_coords[0]
1223 
1224  # same as above, limited to alignment bits
1225  alignment_coordinates = got_coords[1]
1226 
1227  # same as above, limited to RMSD bits
1228  rmsd_coordinates = got_coords[2]
1229 
1230  # dictionary with key=RMF, value=score rank
1231  rmf_file_name_index_dict = got_coords[3]
1232 
1233  # RMF file per hit
1234  all_rmf_file_names = got_coords[4]
1235 
1236 # ------------------------------------------------------------------------
1237 # optionally don't compute distance matrix or cluster, just write top files
1238 # ------------------------------------------------------------------------
1239  if skip_clustering:
1240  if density_custom_ranges:
1241  DensModule = IMP.pmi.analysis.GetModelDensity(
1242  density_custom_ranges, voxel=voxel_size)
1243 
1244  dircluster = os.path.join(outputdir,
1245  "all_models."+str(self.rank))
1246  try:
1247  os.mkdir(outputdir)
1248  except: # noqa: E722
1249  pass
1250  try:
1251  os.mkdir(dircluster)
1252  except: # noqa: E722
1253  pass
1254  clusstat = open(os.path.join(
1255  dircluster, "stat."+str(self.rank)+".out"), "w")
1256  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1257  rmf_name = tpl[1]
1258  rmf_frame_number = tpl[2]
1259  tmp_dict = {}
1260  index = tpl[4]
1261  for key in best_score_feature_keyword_list_dict:
1262  tmp_dict[key] = \
1263  best_score_feature_keyword_list_dict[key][index]
1264 
1265  if cnt == 0:
1266  prots, rs = \
1267  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1268  self.model, rmf_frame_number, rmf_name)
1269  else:
1270  linking_successful = \
1271  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1272  self.model, prots, rs, rmf_frame_number,
1273  rmf_name)
1274  if not linking_successful:
1275  continue
1276 
1277  if not prots:
1278  continue
1279 
1280  if IMP.pmi.get_is_canonical(prots[0]):
1281  states = IMP.atom.get_by_type(
1282  prots[0], IMP.atom.STATE_TYPE)
1283  prot = states[state_number]
1284  else:
1285  prot = prots[state_number]
1286 
1287  # get transformation aligning coordinates of
1288  # requested tuples to the first RMF file
1289  if cnt == 0:
1290  coords_f1 = alignment_coordinates[cnt]
1291  if cnt > 0:
1292  coords_f2 = alignment_coordinates[cnt]
1293  if coords_f2:
1295  coords_f1, coords_f2)
1296  transformation = Ali.align()[1]
1297  else:
1298  transformation = \
1300 
1301  rbs = set()
1302  for p in IMP.atom.get_leaves(prot):
1303  if not IMP.core.XYZR.get_is_setup(p):
1305  IMP.core.XYZR(p).set_radius(0.0001)
1306  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1307 
1309  rbm = IMP.core.RigidBodyMember(p)
1310  rb = rbm.get_rigid_body()
1311  rbs.add(rb)
1312  else:
1314  transformation)
1315  for rb in rbs:
1316  IMP.core.transform(rb, transformation)
1317 
1318  o = IMP.pmi.output.Output()
1319  self.model.update()
1320  out_pdb_fn = os.path.join(
1321  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1322  out_rmf_fn = os.path.join(
1323  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1324  o.init_pdb(out_pdb_fn, prot)
1325  tc = write_pdb_with_centered_coordinates
1326  o.write_pdb(out_pdb_fn,
1327  translate_to_geometric_center=tc)
1328 
1329  tmp_dict["local_pdb_file_name"] = \
1330  os.path.basename(out_pdb_fn)
1331  tmp_dict["rmf_file_full_path"] = rmf_name
1332  tmp_dict["local_rmf_file_name"] = \
1333  os.path.basename(out_rmf_fn)
1334  tmp_dict["local_rmf_frame_number"] = 0
1335 
1336  clusstat.write(str(tmp_dict)+"\n")
1337 
1338  if IMP.pmi.get_is_canonical(prot):
1339  # create a single-state System and write that
1341  IMP.Particle(self.model))
1342  h.set_name("System")
1343  h.add_child(prot)
1344  o.init_rmf(out_rmf_fn, [h], rs)
1345  else:
1346  o.init_rmf(out_rmf_fn, [prot], rs)
1347 
1348  o.write_rmf(out_rmf_fn)
1349  o.close_rmf(out_rmf_fn)
1350  # add the density
1351  if density_custom_ranges:
1352  DensModule.add_subunits_density(prot)
1353 
1354  if density_custom_ranges:
1355  DensModule.write_mrc(path=dircluster)
1356  del DensModule
1357  return
1358 
1359  # broadcast the coordinates
1360  if self.number_of_processes > 1:
1361  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1362  all_coordinates)
1363  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1364  all_rmf_file_names)
1365  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1366  rmf_file_name_index_dict)
1367  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1368  alignment_coordinates)
1369  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1370  rmsd_coordinates)
1371 
1372  if self.rank == 0:
1373  # save needed informations in external files
1374  self.save_objects(
1375  [best_score_feature_keyword_list_dict,
1376  rmf_file_name_index_dict],
1377  ".macro.pkl")
1378 
1379 # ------------------------------------------------------------------------
1380 # Calculate distance matrix and cluster
1381 # ------------------------------------------------------------------------
1382  print("setup clustering class")
1383  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1384 
1385  for n, model_coordinate_dict in enumerate(all_coordinates):
1386  # let's try to align
1387  if (alignment_components is not None
1388  and len(self.cluster_obj.all_coords) == 0):
1389  # set the first model as template coordinates
1390  self.cluster_obj.set_template(alignment_coordinates[n])
1391  self.cluster_obj.fill(all_rmf_file_names[n],
1392  rmsd_coordinates[n])
1393  print("Global calculating the distance matrix")
1394 
1395  # calculate distance matrix, all against all
1396  self.cluster_obj.dist_matrix()
1397 
1398  # perform clustering and optionally display
1399  if self.rank == 0:
1400  self.cluster_obj.do_cluster(number_of_clusters)
1401  if display_plot:
1402  if self.rank == 0:
1403  self.cluster_obj.plot_matrix(
1404  figurename=os.path.join(outputdir,
1405  'dist_matrix.pdf'))
1406  if exit_after_display:
1407  exit()
1408  self.cluster_obj.save_distance_matrix_file(
1409  file_name=distance_matrix_file)
1410 
1411 # ------------------------------------------------------------------------
1412 # Alteratively, load the distance matrix from file and cluster that
1413 # ------------------------------------------------------------------------
1414  else:
1415  if self.rank == 0:
1416  print("setup clustering class")
1417  self.cluster_obj = IMP.pmi.analysis.Clustering()
1418  self.cluster_obj.load_distance_matrix_file(
1419  file_name=distance_matrix_file)
1420  print("clustering with %s clusters" % str(number_of_clusters))
1421  self.cluster_obj.do_cluster(number_of_clusters)
1422  [best_score_feature_keyword_list_dict,
1423  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1424  if display_plot:
1425  if self.rank == 0:
1426  self.cluster_obj.plot_matrix(figurename=os.path.join(
1427  outputdir, 'dist_matrix.pdf'))
1428  if exit_after_display:
1429  exit()
1430  if self.number_of_processes > 1:
1431  self.comm.Barrier()
1432 
1433 # ------------------------------------------------------------------------
1434 # now save all informations about the clusters
1435 # ------------------------------------------------------------------------
1436 
1437  if self.rank == 0:
1438  print(self.cluster_obj.get_cluster_labels())
1439  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1440  print("rank %s " % str(self.rank))
1441  print("cluster %s " % str(n))
1442  print("cluster label %s " % str(cl))
1443  print(self.cluster_obj.get_cluster_label_names(cl))
1444  cluster_size = \
1445  len(self.cluster_obj.get_cluster_label_names(cl))
1446  cluster_prov = \
1447  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1448 
1449  # first initialize the Density class if requested
1450  if density_custom_ranges:
1451  DensModule = IMP.pmi.analysis.GetModelDensity(
1452  density_custom_ranges,
1453  voxel=voxel_size)
1454 
1455  dircluster = outputdir + "/cluster." + str(n) + "/"
1456  try:
1457  os.mkdir(dircluster)
1458  except: # noqa: E722
1459  pass
1460 
1461  rmsd_dict = {
1462  "AVERAGE_RMSD":
1463  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1464  clusstat = open(dircluster + "stat.out", "w")
1465  for k, structure_name in enumerate(
1466  self.cluster_obj.get_cluster_label_names(cl)):
1467  # extract the features
1468  tmp_dict = {}
1469  tmp_dict.update(rmsd_dict)
1470  index = rmf_file_name_index_dict[structure_name]
1471  for key in best_score_feature_keyword_list_dict:
1472  tmp_dict[
1473  key] = best_score_feature_keyword_list_dict[
1474  key][
1475  index]
1476 
1477  # get the rmf name and the frame number from the list of
1478  # frame names
1479  rmf_name = structure_name.split("|")[0]
1480  rmf_frame_number = int(structure_name.split("|")[1])
1481  clusstat.write(str(tmp_dict) + "\n")
1482 
1483  # extract frame (open or link to existing)
1484  if k == 0:
1485  prots, rs = \
1486  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1487  self.model, rmf_frame_number, rmf_name)
1488  else:
1489  linking_successful = \
1490  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1491  self.model, prots, rs, rmf_frame_number,
1492  rmf_name)
1493  if not linking_successful:
1494  continue
1495  if not prots:
1496  continue
1497 
1498  if IMP.pmi.get_is_canonical(prots[0]):
1499  states = IMP.atom.get_by_type(
1500  prots[0], IMP.atom.STATE_TYPE)
1501  prot = states[state_number]
1502  else:
1503  prot = prots[state_number]
1504  if k == 0:
1505  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1506 
1507  # transform clusters onto first
1508  if k > 0:
1509  co = self.cluster_obj
1510  model_index = co.get_model_index_from_name(
1511  structure_name)
1512  transformation = co.get_transformation_to_first_member(
1513  cl, model_index)
1514  rbs = set()
1515  for p in IMP.atom.get_leaves(prot):
1516  if not IMP.core.XYZR.get_is_setup(p):
1518  IMP.core.XYZR(p).set_radius(0.0001)
1519  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1520 
1522  rbm = IMP.core.RigidBodyMember(p)
1523  rb = rbm.get_rigid_body()
1524  rbs.add(rb)
1525  else:
1527  transformation)
1528  for rb in rbs:
1529  IMP.core.transform(rb, transformation)
1530 
1531  # add the density
1532  if density_custom_ranges:
1533  DensModule.add_subunits_density(prot)
1534 
1535  # pdb writing should be optimized!
1536  o = IMP.pmi.output.Output()
1537  self.model.update()
1538  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1539  o.write_pdb(dircluster + str(k) + ".pdb")
1540 
1541  if IMP.pmi.get_is_canonical(prot):
1542  # create a single-state System and write that
1544  IMP.Particle(self.model))
1545  h.set_name("System")
1546  h.add_child(prot)
1547  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1548  else:
1549  o.init_rmf(dircluster + str(k) + ".rmf3", [prot], rs)
1550  o.write_rmf(dircluster + str(k) + ".rmf3")
1551  o.close_rmf(dircluster + str(k) + ".rmf3")
1552 
1553  del o
1554  # IMP.atom.destroy(prot)
1555 
1556  if density_custom_ranges:
1557  DensModule.write_mrc(path=dircluster)
1558  del DensModule
1559 
1560  if self.number_of_processes > 1:
1561  self.comm.Barrier()
1562 
1563  def get_cluster_rmsd(self, cluster_num):
1564  if self.cluster_obj is None:
1565  raise Exception("Run clustering first")
1566  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1567 
1568  def save_objects(self, objects, file_name):
1569  import pickle
1570  outf = open(file_name, 'wb')
1571  pickle.dump(objects, outf)
1572  outf.close()
1573 
1574  def load_objects(self, file_name):
1575  import pickle
1576  inputf = open(file_name, 'rb')
1577  objects = pickle.load(inputf)
1578  inputf.close()
1579  return objects
1580 
1581 
1583 
1584  """
1585  This class contains analysis utilities to investigate ReplicaExchange
1586  results.
1587  """
1588 
1589  ########################
1590  # Construction and Setup
1591  ########################
1592 
1593  def __init__(self, model, stat_files, best_models=None, score_key=None,
1594  alignment=True):
1595  """
1596  Construction of the Class.
1597  @param model IMP.Model()
1598  @param stat_files list of string. Can be ascii stat files,
1599  rmf files names
1600  @param best_models Integer. Number of best scoring models,
1601  if None: all models will be read
1602  @param alignment boolean (Default=True). Align before computing
1603  the rmsd.
1604  """
1605 
1606  self.model = model
1607  self.best_models = best_models
1609  model, stat_files, self.best_models, score_key, cache=True)
1611  StatHierarchyHandler=self.stath0)
1612 
1613  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1615  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1617  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1618  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1619  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1620  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1621  self.clusters = []
1622  # fill the cluster list with a single cluster containing all models
1623  c = IMP.pmi.output.Cluster(0)
1624  self.clusters.append(c)
1625  for n0 in range(len(self.stath0)):
1626  c.add_member(n0)
1627  self.pairwise_rmsd = {}
1628  self.pairwise_molecular_assignment = {}
1629  self.alignment = alignment
1630  self.symmetric_molecules = {}
1631  self.issymmetricsel = {}
1632  self.update_seldicts()
1633  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1634  IMP.atom.get_leaves(self.stath0))
1635  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1636  IMP.atom.get_leaves(self.stath1))
1637 
1638  def set_rmsd_selection(self, **kwargs):
1639  """
1640  Setup the selection onto which the rmsd is computed
1641  @param kwargs use IMP.atom.Selection keywords
1642  """
1643  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1644  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1645  self.update_seldicts()
1646 
1647  def set_symmetric(self, molecule_name):
1648  """
1649  Store names of symmetric molecules
1650  """
1651  self.symmetric_molecules[molecule_name] = 0
1652  self.update_seldicts()
1653 
1654  def set_alignment_selection(self, **kwargs):
1655  """
1656  Setup the selection onto which the alignment is computed
1657  @param kwargs use IMP.atom.Selection keywords
1658  """
1659  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1660  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1661 
1662  ######################
1663  # Clustering functions
1664  ######################
1665  def clean_clusters(self):
1666  for c in self.clusters:
1667  del c
1668  self.clusters = []
1669 
1670  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1671  """
1672  Cluster the models based on RMSD.
1673  @param rmsd_cutoff Float the distance cutoff in Angstrom
1674  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1675  be used to compute rmsds
1676  """
1677  self.clean_clusters()
1678  not_clustered = set(range(len(self.stath1)))
1679  while len(not_clustered) > 0:
1680  self.aggregate(not_clustered, rmsd_cutoff, metric)
1681  self.update_clusters()
1682 
1683  def refine(self, rmsd_cutoff=10):
1684  """
1685  Refine the clusters by merging the ones whose centers are close
1686  @param rmsd_cutoff cutoff distance in Angstorms
1687  """
1688  clusters_copy = self.clusters
1689  for c0, c1 in itertools.combinations(self.clusters, 2):
1690  if c0.center_index is None:
1691  self.compute_cluster_center(c0)
1692  if c1.center_index is None:
1693  self.compute_cluster_center(c1)
1694  _ = self.stath0[c0.center_index]
1695  _ = self.stath1[c1.center_index]
1696  rmsd, molecular_assignment = self.rmsd()
1697  if rmsd <= rmsd_cutoff:
1698  if c1 in self.clusters:
1699  clusters_copy.remove(c1)
1700  c0 += c1
1701  self.clusters = clusters_copy
1702  self.update_clusters()
1703 
1704  ####################
1705  # Input Output
1706  ####################
1707 
1708  def set_cluster_assignments(self, cluster_ids):
1709  if len(cluster_ids) != len(self.stath0):
1710  raise ValueError('cluster ids has to be same length as '
1711  'number of frames')
1712 
1713  self.clusters = []
1714  for i in sorted(list(set(cluster_ids))):
1715  self.clusters.append(IMP.pmi.output.Cluster(i))
1716  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1717  self.clusters[idx].add_member(i, d)
1718 
1719  def get_cluster_data(self, cluster):
1720  """
1721  Return the model data from a cluster
1722  @param cluster IMP.pmi.output.Cluster object
1723  """
1724  data = []
1725  for m in cluster:
1726  data.append(m)
1727  return data
1728 
1729  def save_data(self, filename='data.pkl'):
1730  """
1731  Save the data for the whole models into a pickle file
1732  @param filename string
1733  """
1734  self.stath0.save_data(filename)
1735 
1736  def set_data(self, data):
1737  """
1738  Set the data from an external IMP.pmi.output.Data
1739  @param data IMP.pmi.output.Data
1740  """
1741  self.stath0.data = data
1742  self.stath1.data = data
1743 
1744  def load_data(self, filename='data.pkl'):
1745  """
1746  Load the data from an external pickled file
1747  @param filename string
1748  """
1749  self.stath0.load_data(filename)
1750  self.stath1.load_data(filename)
1751  self.best_models = len(self.stath0)
1752 
1753  def add_cluster(self, rmf_name_list):
1754  c = IMP.pmi.output.Cluster(len(self.clusters))
1755  print("creating cluster index "+str(len(self.clusters)))
1756  self.clusters.append(c)
1757  current_len = len(self.stath0)
1758 
1759  for rmf in rmf_name_list:
1760  print("adding rmf "+rmf)
1761  self.stath0.add_stat_file(rmf)
1762  self.stath1.add_stat_file(rmf)
1763 
1764  for n0 in range(current_len, len(self.stath0)):
1765  d0 = self.stath0[n0]
1766  c.add_member(n0, d0)
1767  self.update_clusters()
1768 
1769  def save_clusters(self, filename='clusters.pkl'):
1770  """
1771  Save the clusters into a pickle file
1772  @param filename string
1773  """
1774  try:
1775  import cPickle as pickle
1776  except ImportError:
1777  import pickle
1778  fl = open(filename, 'wb')
1779  pickle.dump(self.clusters, fl)
1780 
1781  def load_clusters(self, filename='clusters.pkl', append=False):
1782  """
1783  Load the clusters from a pickle file
1784  @param filename string
1785  @param append bool (Default=False), if True. append the clusters
1786  to the ones currently present
1787  """
1788  try:
1789  import cPickle as pickle
1790  except ImportError:
1791  import pickle
1792  fl = open(filename, 'rb')
1793  self.clean_clusters()
1794  if append:
1795  self.clusters += pickle.load(fl)
1796  else:
1797  self.clusters = pickle.load(fl)
1798  self.update_clusters()
1799 
1800  ####################
1801  # Analysis Functions
1802  ####################
1803 
1804  def compute_cluster_center(self, cluster):
1805  """
1806  Compute the cluster center for a given cluster
1807  """
1808  member_distance = defaultdict(float)
1809 
1810  for n0, n1 in itertools.combinations(cluster.members, 2):
1811  _ = self.stath0[n0]
1812  _ = self.stath1[n1]
1813  rmsd, _ = self.rmsd()
1814  member_distance[n0] += rmsd
1815 
1816  if len(member_distance) > 0:
1817  cluster.center_index = min(member_distance,
1818  key=member_distance.get)
1819  else:
1820  cluster.center_index = cluster.members[0]
1821 
1822  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1823  prefix="./"):
1824  """
1825  Save the coordinates of the current cluster a single rmf file
1826  """
1827  print("saving coordinates", cluster)
1828  if self.alignment:
1829  self.set_reference(reference, cluster)
1830  o = IMP.pmi.output.Output()
1831  if rmf_name is None:
1832  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1833 
1834  _ = self.stath1[cluster.members[0]]
1835  self.model.update()
1836  o.init_rmf(rmf_name, [self.stath1])
1837  for n1 in cluster.members:
1838  _ = self.stath1[n1]
1839  self.model.update()
1841  if self.alignment:
1842  self.align()
1843  o.write_rmf(rmf_name)
1845  o.close_rmf(rmf_name)
1846 
1847  def prune_redundant_structures(self, rmsd_cutoff=10):
1848  """
1849  remove structures that are similar
1850  append it to a new cluster
1851  """
1852  print("pruning models")
1853  selected = 0
1854  filtered = [selected]
1855  remaining = range(1, len(self.stath1), 10)
1856 
1857  while len(remaining) > 0:
1858  d0 = self.stath0[selected]
1859  rm = []
1860  for n1 in remaining:
1861  _ = self.stath1[n1]
1862  if self.alignment:
1863  self.align()
1864  d, _ = self.rmsd()
1865  if d <= rmsd_cutoff:
1866  rm.append(n1)
1867  print("pruning model %s, similar to model %s, rmsd %s"
1868  % (str(n1), str(selected), str(d)))
1869  remaining = [x for x in remaining if x not in rm]
1870  if len(remaining) == 0:
1871  break
1872  selected = remaining[0]
1873  filtered.append(selected)
1874  remaining.pop(0)
1875  c = IMP.pmi.output.Cluster(len(self.clusters))
1876  self.clusters.append(c)
1877  for n0 in filtered:
1878  d0 = self.stath0[n0]
1879  c.add_member(n0, d0)
1880  self.update_clusters()
1881 
1882  def precision(self, cluster):
1883  """
1884  Compute the precision of a cluster
1885  """
1886  npairs = 0
1887  rmsd = 0.0
1888  precision = None
1889 
1890  if cluster.center_index is not None:
1891  members1 = [cluster.center_index]
1892  else:
1893  members1 = cluster.members
1894 
1895  for n0 in members1:
1896  _ = self.stath0[n0]
1897  for n1 in cluster.members:
1898  if n0 != n1:
1899  npairs += 1
1900  _ = self.stath1[n1]
1902  tmp_rmsd, _ = self.rmsd()
1903  rmsd += tmp_rmsd
1905 
1906  if npairs > 0:
1907  precision = rmsd/npairs
1908  cluster.precision = precision
1909  return precision
1910 
1911  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1912  """
1913  Compute the bipartite precision (ie the cross-precision)
1914  between two clusters
1915  """
1916  npairs = 0
1917  rmsd = 0.0
1918  for cn0, n0 in enumerate(cluster1.members):
1919  _ = self.stath0[n0]
1920  for cn1, n1 in enumerate(cluster2.members):
1921  _ = self.stath1[n1]
1922  tmp_rmsd, _ = self.rmsd()
1923  if verbose:
1924  print("--- rmsd between structure %s and structure "
1925  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1926  rmsd += tmp_rmsd
1927  npairs += 1
1928  precision = rmsd/npairs
1929  return precision
1930 
1931  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1932  cluster_ref=None, step=1):
1933  """
1934  Compute the Root mean square fluctuations
1935  of a molecule in a cluster
1936  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1937  residue indexes and the value is the rmsf
1938  """
1939  rmsf = IMP.pmi.tools.OrderedDict()
1940 
1941  # assumes that residue indexes are identical for stath0 and stath1
1942  if cluster_ref is not None:
1943  if cluster_ref.center_index is not None:
1944  members0 = [cluster_ref.center_index]
1945  else:
1946  members0 = cluster_ref.members
1947  else:
1948  if cluster.center_index is not None:
1949  members0 = [cluster.center_index]
1950  else:
1951  members0 = cluster.members
1952 
1953  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1954  copy_index=copy_index, state_index=state_index)
1955  ps0 = s0.get_selected_particles()
1956  # get the residue indexes
1957  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1958  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1959 
1960  # get the corresponding particles
1961  npairs = 0
1962  for n0 in members0:
1963  d0 = self.stath0[n0]
1964  for n1 in cluster.members[::step]:
1965  if n0 != n1:
1966  print("--- rmsf %s %s" % (str(n0), str(n1)))
1968 
1969  s1 = IMP.atom.Selection(
1970  self.stath1, molecule=molecule,
1971  residue_indexes=residue_indexes, resolution=1,
1972  copy_index=copy_index, state_index=state_index)
1973  ps1 = s1.get_selected_particles()
1974 
1975  d1 = self.stath1[n1]
1976  if self.alignment:
1977  self.align()
1978  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
1979  r = residue_indexes[n]
1980  d0 = IMP.core.XYZ(p0)
1981  d1 = IMP.core.XYZ(p1)
1982  if r in rmsf:
1983  rmsf[r] += IMP.core.get_distance(d0, d1)
1984  else:
1985  rmsf[r] = IMP.core.get_distance(d0, d1)
1986  npairs += 1
1988  for r in rmsf:
1989  rmsf[r] /= npairs
1990 
1991  for stath in [self.stath0, self.stath1]:
1992  if molecule not in self.symmetric_molecules:
1993  s = IMP.atom.Selection(
1994  stath, molecule=molecule, residue_index=r,
1995  resolution=1, copy_index=copy_index,
1996  state_index=state_index)
1997  else:
1998  s = IMP.atom.Selection(
1999  stath, molecule=molecule, residue_index=r,
2000  resolution=1, state_index=state_index)
2001 
2002  ps = s.get_selected_particles()
2003  for p in ps:
2005  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2006  else:
2008 
2009  return rmsf
2010 
2011  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2012  reference="Absolute", prefix="./", step=1):
2013  if self.alignment:
2014  self.set_reference(reference, cluster)
2015  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2016  voxel=voxel_size)
2017 
2018  for n1 in cluster.members[::step]:
2019  print("density "+str(n1))
2020  _ = self.stath1[n1]
2022  if self.alignment:
2023  self.align()
2024  dens.add_subunits_density(self.stath1)
2026  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2027  del dens
2028 
2029  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2030  consolidate=False, molecules=None, prefix='./',
2031  reference="Absolute"):
2032  if self.alignment:
2033  self.set_reference(reference, cluster)
2034  import numpy as np
2035  import matplotlib.pyplot as plt
2036  import matplotlib.cm as cm
2037  from scipy.spatial.distance import cdist
2038  import IMP.pmi.topology
2039  if molecules is None:
2041  for mol in IMP.pmi.tools.get_molecules(
2042  IMP.atom.get_leaves(self.stath1))]
2043  else:
2045  for mol in IMP.pmi.tools.get_molecules(
2047  self.stath1,
2048  molecules=molecules).get_selected_particles())]
2049  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2050  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2051  total_len_unique = sum(max(mol.get_residue_indexes())
2052  for mol in unique_copies)
2053 
2054  index_dict = {}
2055  prev_stop = 0
2056 
2057  if not consolidate:
2058  for mol in mols:
2059  seqlen = max(mol.get_residue_indexes())
2060  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2061  prev_stop += seqlen
2062 
2063  else:
2064  for mol in unique_copies:
2065  seqlen = max(mol.get_residue_indexes())
2066  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2067  prev_stop += seqlen
2068 
2069  for ncl, n1 in enumerate(cluster.members):
2070  print(ncl)
2071  _ = self.stath1[n1]
2072  coord_dict = IMP.pmi.tools.OrderedDict()
2073  for mol in mols:
2074  rindexes = mol.get_residue_indexes()
2075  coords = np.ones((max(rindexes), 3))
2076  for rnum in rindexes:
2077  sel = IMP.atom.Selection(mol, residue_index=rnum,
2078  resolution=1)
2079  selpart = sel.get_selected_particles()
2080  if len(selpart) == 0:
2081  continue
2082  selpart = selpart[0]
2083  coords[rnum - 1, :] = \
2084  IMP.core.XYZ(selpart).get_coordinates()
2085  coord_dict[mol] = coords
2086 
2087  if not consolidate:
2088  coords = np.concatenate(list(coord_dict.values()))
2089  dists = cdist(coords, coords)
2090  binary_dists = np.where((dists <= contact_threshold)
2091  & (dists >= 1.0), 1.0, 0.0)
2092  else:
2093  binary_dists_dict = {}
2094  for mol1 in mols:
2095  len1 = max(mol1.get_residue_indexes())
2096  for mol2 in mols:
2097  name1 = mol1.get_name()
2098  name2 = mol2.get_name()
2099  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2100  if (name1, name2) not in binary_dists_dict:
2101  binary_dists_dict[(name1, name2)] = \
2102  np.zeros((len1, len1))
2103  binary_dists_dict[(name1, name2)] += \
2104  np.where((dists <= contact_threshold)
2105  & (dists >= 1.0), 1.0, 0.0)
2106  binary_dists = np.zeros((total_len_unique, total_len_unique))
2107 
2108  for name1, name2 in binary_dists_dict:
2109  r1 = index_dict[mol_names_unique[name1]]
2110  r2 = index_dict[mol_names_unique[name2]]
2111  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2112  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2113  1.0, 0.0)
2114 
2115  if ncl == 0:
2116  dist_maps = [dists]
2117  av_dist_map = dists
2118  contact_freqs = binary_dists
2119  else:
2120  dist_maps.append(dists)
2121  av_dist_map += dists
2122  contact_freqs += binary_dists
2123 
2124  if log_scale:
2125  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2126  else:
2127  contact_freqs = 1.0/len(cluster)*contact_freqs
2128  av_dist_map = 1.0/len(cluster)*contact_freqs
2129 
2130  fig = plt.figure(figsize=(100, 100))
2131  ax = fig.add_subplot(111)
2132  ax.set_xticks([])
2133  ax.set_yticks([])
2134  gap_between_components = 50
2135  colormap = cm.Blues
2136  colornorm = None
2137 
2138  if not consolidate:
2139  sorted_tuple = sorted(
2141  mol).get_extended_name(), mol) for mol in mols)
2142  prot_list = list(zip(*sorted_tuple))[1]
2143  else:
2144  sorted_tuple = sorted(
2145  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2146  for mol in unique_copies)
2147  prot_list = list(zip(*sorted_tuple))[1]
2148 
2149  prot_listx = prot_list
2150  nresx = gap_between_components + \
2151  sum([max(mol.get_residue_indexes())
2152  + gap_between_components for mol in prot_listx])
2153 
2154  # set the list of proteins on the y axis
2155  prot_listy = prot_list
2156  nresy = gap_between_components + \
2157  sum([max(mol.get_residue_indexes())
2158  + gap_between_components for mol in prot_listy])
2159 
2160  # this is the residue offset for each protein
2161  resoffsetx = {}
2162  resendx = {}
2163  res = gap_between_components
2164  for mol in prot_listx:
2165  resoffsetx[mol] = res
2166  res += max(mol.get_residue_indexes())
2167  resendx[mol] = res
2168  res += gap_between_components
2169 
2170  resoffsety = {}
2171  resendy = {}
2172  res = gap_between_components
2173  for mol in prot_listy:
2174  resoffsety[mol] = res
2175  res += max(mol.get_residue_indexes())
2176  resendy[mol] = res
2177  res += gap_between_components
2178 
2179  resoffsetdiagonal = {}
2180  res = gap_between_components
2181  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2182  resoffsetdiagonal[mol] = res
2183  res += max(mol.get_residue_indexes())
2184  res += gap_between_components
2185 
2186  # plot protein boundaries
2187  xticks = []
2188  xlabels = []
2189  for n, prot in enumerate(prot_listx):
2190  res = resoffsetx[prot]
2191  end = resendx[prot]
2192  for proty in prot_listy:
2193  resy = resoffsety[proty]
2194  endy = resendy[proty]
2195  ax.plot([res, res], [resy, endy], linestyle='-',
2196  color='gray', lw=0.4)
2197  ax.plot([end, end], [resy, endy], linestyle='-',
2198  color='gray', lw=0.4)
2199  xticks.append((float(res) + float(end)) / 2)
2201  prot).get_extended_name())
2202 
2203  yticks = []
2204  ylabels = []
2205  for n, prot in enumerate(prot_listy):
2206  res = resoffsety[prot]
2207  end = resendy[prot]
2208  for protx in prot_listx:
2209  resx = resoffsetx[protx]
2210  endx = resendx[protx]
2211  ax.plot([resx, endx], [res, res], linestyle='-',
2212  color='gray', lw=0.4)
2213  ax.plot([resx, endx], [end, end], linestyle='-',
2214  color='gray', lw=0.4)
2215  yticks.append((float(res) + float(end)) / 2)
2217  prot).get_extended_name())
2218 
2219  # plot the contact map
2220 
2221  tmp_array = np.zeros((nresx, nresy))
2222  ret = {}
2223  for px in prot_listx:
2224  for py in prot_listy:
2225  resx = resoffsetx[px]
2226  lengx = resendx[px] - 1
2227  resy = resoffsety[py]
2228  lengy = resendy[py] - 1
2229  indexes_x = index_dict[px]
2230  minx = min(indexes_x)
2231  maxx = max(indexes_x)
2232  indexes_y = index_dict[py]
2233  miny = min(indexes_y)
2234  maxy = max(indexes_y)
2235  tmp_array[resx:lengx, resy:lengy] = \
2236  contact_freqs[minx:maxx, miny:maxy]
2237  ret[(px, py)] = np.argwhere(
2238  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2239 
2240  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2241  origin='lower', alpha=0.6, interpolation='nearest')
2242 
2243  ax.set_xticks(xticks)
2244  ax.set_xticklabels(xlabels, rotation=90)
2245  ax.set_yticks(yticks)
2246  ax.set_yticklabels(ylabels)
2247  plt.setp(ax.get_xticklabels(), fontsize=6)
2248  plt.setp(ax.get_yticklabels(), fontsize=6)
2249 
2250  # display and write to file
2251  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2252  [i.set_linewidth(2.0) for i in ax.spines.values()]
2253 
2254  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2255  dpi=300, transparent="False")
2256  return ret
2257 
2258  def plot_rmsd_matrix(self, filename):
2259  self.compute_all_pairwise_rmsd()
2260  distance_matrix = np.zeros(
2261  (len(self.stath0), len(self.stath1)))
2262  for (n0, n1) in self.pairwise_rmsd:
2263  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2264 
2265  import matplotlib as mpl
2266  mpl.use('Agg')
2267  import matplotlib.pylab as pl
2268  from scipy.cluster import hierarchy as hrc
2269 
2270  fig = pl.figure(figsize=(10, 8))
2271  ax = fig.add_subplot(212)
2272  dendrogram = hrc.dendrogram(
2273  hrc.linkage(distance_matrix),
2274  color_threshold=7,
2275  no_labels=True)
2276  leaves_order = dendrogram['leaves']
2277  ax.set_xlabel('Model')
2278  ax.set_ylabel('RMSD [Angstroms]')
2279 
2280  ax2 = fig.add_subplot(221)
2281  cax = ax2.imshow(
2282  distance_matrix[leaves_order, :][:, leaves_order],
2283  interpolation='nearest')
2284  cb = fig.colorbar(cax)
2285  cb.set_label('RMSD [Angstroms]')
2286  ax2.set_xlabel('Model')
2287  ax2.set_ylabel('Model')
2288 
2289  pl.savefig(filename, dpi=300)
2290  pl.close(fig)
2291 
2292  ####################
2293  # Internal Functions
2294  ####################
2295 
2296  def update_clusters(self):
2297  """
2298  Update the cluster id numbers
2299  """
2300  for n, c in enumerate(self.clusters):
2301  c.cluster_id = n
2302 
2303  def get_molecule(self, hier, name, copy):
2304  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2305  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2306 
2307  def update_seldicts(self):
2308  """
2309  Update the seldicts
2310  """
2311  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2312  self.sel0_rmsd.get_selected_particles())
2313  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2314  self.sel1_rmsd.get_selected_particles())
2315  for mol in self.seldict0:
2316  for sel in self.seldict0[mol]:
2317  self.issymmetricsel[sel] = False
2318  for mol in self.symmetric_molecules:
2319  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2320  for sel in self.seldict0[mol]:
2321  self.issymmetricsel[sel] = True
2322 
2323  def align(self):
2325  self.sel1_alignment, self.sel0_alignment)
2326 
2327  for rb in self.rbs1:
2328  IMP.core.transform(rb, tr)
2329 
2330  for bead in self.beads1:
2331  try:
2332  IMP.core.transform(IMP.core.XYZ(bead), tr)
2333  except: # noqa: E722
2334  continue
2335 
2336  self.model.update()
2337 
2338  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2339  '''
2340  initial filling of the clusters.
2341  '''
2342  n0 = idxs.pop()
2343  print("clustering model "+str(n0))
2344  d0 = self.stath0[n0]
2345  c = IMP.pmi.output.Cluster(len(self.clusters))
2346  print("creating cluster index "+str(len(self.clusters)))
2347  self.clusters.append(c)
2348  c.add_member(n0, d0)
2349  clustered = set([n0])
2350  for n1 in idxs:
2351  print("--- trying to add model " + str(n1) + " to cluster "
2352  + str(len(self.clusters)))
2353  d1 = self.stath1[n1]
2354  if self.alignment:
2355  self.align()
2356  rmsd, _ = self.rmsd(metric=metric)
2357  if rmsd < rmsd_cutoff:
2358  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2359  c.add_member(n1, d1)
2360  clustered.add(n1)
2361  else:
2362  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2363  idxs -= clustered
2364 
2365  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2366  """
2367  merge the clusters that have close members
2368  @param rmsd_cutoff cutoff distance in Angstorms
2369  """
2370  # before merging, clusters are spheres of radius rmsd_cutoff
2371  # centered on the 1st element
2372  # here we only try to merge clusters whose centers are closer
2373  # than 2*rmsd_cutoff
2374  to_merge = []
2375  print("merging...")
2376  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2377  itertools.combinations(self.clusters, 2)):
2378  n0, n1 = [c.members[0] for c in (c0, c1)]
2379  _ = self.stath0[n0]
2380  _ = self.stath1[n1]
2381  rmsd, _ = self.rmsd()
2382  if (rmsd < 2*rmsd_cutoff and
2383  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2384  to_merge.append((c0, c1))
2385 
2386  for c0, c in reversed(to_merge):
2387  self.merge(c0, c)
2388 
2389  # keep only full clusters
2390  self.clusters = [c for c in
2391  filter(lambda x: len(x.members) > 0, self.clusters)]
2392 
2393  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2394  '''
2395  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2396  '''
2397  print("check close members for clusters " + str(c0.cluster_id) +
2398  " and " + str(c1.cluster_id))
2399  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2400  _ = self.stath0[n0]
2401  _ = self.stath1[n1]
2402  rmsd, _ = self.rmsd(metric=metric)
2403  if rmsd < rmsd_cutoff:
2404  return True
2405 
2406  return False
2407 
2408  def merge(self, c0, c1):
2409  '''
2410  merge two clusters
2411  '''
2412  c0 += c1
2413  c1.members = []
2414  c1.data = {}
2415 
2416  def rmsd_helper(self, sels0, sels1, metric):
2417  '''
2418  a function that returns the permutation best_sel of sels0 that
2419  minimizes metric
2420  '''
2421  best_rmsd2 = float('inf')
2422  best_sel = None
2423  if self.issymmetricsel[sels0[0]]:
2424  # this cases happens when symmetries were defined
2425  N = len(sels0)
2426  for offset in range(N):
2427  sels = [sels0[(offset+i) % N] for i in range(N)]
2428  sel0 = sels[0]
2429  sel1 = sels1[0]
2430  r = metric(sel0, sel1)
2431  rmsd2 = r*r*N
2432  if rmsd2 < best_rmsd2:
2433  best_rmsd2 = rmsd2
2434  best_sel = sels
2435  else:
2436  for sels in itertools.permutations(sels0):
2437  rmsd2 = 0.0
2438  for sel0, sel1 in itertools.takewhile(
2439  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2440  r = metric(sel0, sel1)
2441  rmsd2 += r*r
2442  if rmsd2 < best_rmsd2:
2443  best_rmsd2 = rmsd2
2444  best_sel = sels
2445  return best_sel, best_rmsd2
2446 
2447  def compute_all_pairwise_rmsd(self):
2448  for d0 in self.stath0:
2449  for d1 in self.stath1:
2450  rmsd, _ = self.rmsd()
2451 
2452  def rmsd(self, metric=IMP.atom.get_rmsd):
2453  '''
2454  Computes the RMSD. Resolves ambiguous pairs assignments
2455  '''
2456  # here we memoize the rmsd and molecular assignment so that it's
2457  # not done multiple times
2458  n0 = self.stath0.current_index
2459  n1 = self.stath1.current_index
2460  if ((n0, n1) in self.pairwise_rmsd) \
2461  and ((n0, n1) in self.pairwise_molecular_assignment):
2462  return (self.pairwise_rmsd[(n0, n1)],
2463  self.pairwise_molecular_assignment[(n0, n1)])
2464 
2465  if self.alignment:
2466  self.align()
2467  # if it's not yet memoized
2468  total_rmsd = 0.0
2469  total_N = 0
2470  # this is a dictionary which keys are the molecule names, and values
2471  # are the list of IMP.atom.Selection for all molecules that share
2472  # the molecule name
2473  molecular_assignment = {}
2474  for molname, sels0 in self.seldict0.items():
2475  sels_best_order, best_rmsd2 = \
2476  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2477 
2478  Ncoords = len(sels_best_order[0].get_selected_particles())
2479  Ncopies = len(self.seldict1[molname])
2480  total_rmsd += Ncoords*best_rmsd2
2481  total_N += Ncoords*Ncopies
2482 
2483  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2484  p0 = sel0.get_selected_particles()[0]
2485  p1 = sel1.get_selected_particles()[0]
2486  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2487  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2488  c0 = IMP.atom.Copy(m0).get_copy_index()
2489  c1 = IMP.atom.Copy(m1).get_copy_index()
2490  molecular_assignment[(molname, c0)] = (molname, c1)
2491 
2492  total_rmsd = math.sqrt(total_rmsd/total_N)
2493 
2494  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2495  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2496  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2497  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2498  return total_rmsd, molecular_assignment
2499 
2500  def set_reference(self, reference, cluster):
2501  """
2502  Fix the reference structure for structural alignment, rmsd and
2503  chain assignemnt
2504 
2505  @param reference can be either "Absolute" (cluster center of the
2506  first cluster) or Relative (cluster center of the current
2507  cluster)
2508  """
2509  if reference == "Absolute":
2510  _ = self.stath0[0]
2511  elif reference == "Relative":
2512  if cluster.center_index:
2513  n0 = cluster.center_index
2514  else:
2515  n0 = cluster.members[0]
2516  _ = self.stath0[n0]
2517 
2519  """
2520  compute the molecular assignemnts between multiple copies
2521  of the same sequence. It changes the Copy index of Molecules
2522  """
2523  _ = self.stath1[n1]
2524  _, molecular_assignment = self.rmsd()
2525  for (m0, c0), (m1, c1) in molecular_assignment.items():
2526  mol0 = self.molcopydict0[m0][c0]
2527  mol1 = self.molcopydict1[m1][c1]
2528  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2529  p1 = IMP.atom.Copy(mol1).get_particle()
2530  p1.set_value(cik0, c0)
2531 
2533  """
2534  Undo the Copy index assignment
2535  """
2536  _ = self.stath1[n1]
2537  _, molecular_assignment = self.rmsd()
2538  for (m0, c0), (m1, c1) in molecular_assignment.items():
2539  mol0 = self.molcopydict0[m0][c0]
2540  mol1 = self.molcopydict1[m1][c1]
2541  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2542  p1 = IMP.atom.Copy(mol1).get_particle()
2543  p1.set_value(cik0, c1)
2544 
2545  ####################
2546  # Container Functions
2547  ####################
2548 
2549  def __repr__(self):
2550  s = "AnalysisReplicaExchange\n"
2551  s += "---- number of clusters %s \n" % str(len(self.clusters))
2552  s += "---- number of models %s \n" % str(len(self.stath0))
2553  return s
2554 
2555  def __getitem__(self, int_slice_adaptor):
2556  if isinstance(int_slice_adaptor, int):
2557  return self.clusters[int_slice_adaptor]
2558  elif isinstance(int_slice_adaptor, slice):
2559  return self.__iter__(int_slice_adaptor)
2560  else:
2561  raise TypeError("Unknown Type")
2562 
2563  def __len__(self):
2564  return len(self.clusters)
2565 
2566  def __iter__(self, slice_key=None):
2567  if slice_key is None:
2568  for i in range(len(self)):
2569  yield self[i]
2570  else:
2571  for i in range(len(self))[slice_key]:
2572  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2452
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2500
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1781
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1077
def precision
Compute the precision of a cluster.
Definition: macros.py:1882
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:882
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:111
A container for models organized into clusters.
Definition: output.py:1522
Sample using molecular dynamics.
Definition: samplers.py:327
def aggregate
initial filling of the clusters.
Definition: macros.py:2338
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634
A macro to help setup and run replica exchange.
Definition: macros.py:68
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1847
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1931
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1174
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1638
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1719
def __init__
Construction of the Class.
Definition: macros.py:1593
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:787
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1736
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2532
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1654
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2416
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1822
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1026
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2518
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1582
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:578
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2365
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:935
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1804
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:946
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:133
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2307
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2296
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:554
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1683
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:203
Collect timing information.
Definition: tools.py:126
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1647
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:8845
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2408
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:631
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1269
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1729
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:798
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1911
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:607
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1670
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1769
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2393
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1150
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1744
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:511
Sample using replica exchange.
Definition: samplers.py:432
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27