IMP logo
IMP Reference Guide  develop.23016263b1,2026/04/24
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 import IMP
6 import IMP.pmi.tools
7 import IMP.pmi.samplers
8 import IMP.pmi.output
9 import IMP.pmi.analysis
10 import IMP.pmi.io
11 import IMP.pmi.alphabets
12 import IMP.rmf
13 import IMP.isd
14 import IMP.pmi.dof
15 import os
16 from pathlib import Path
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import itertools
22 import warnings
23 import math
24 
25 import pickle
26 
27 
28 class _MockMPIValues:
29  """Replace samplers.MPI_values when in test mode"""
30  def get_percentile(self, name):
31  return 0.
32 
33 
34 class _RMFRestraints:
35  """All restraints that are written out to the RMF file"""
36  def __init__(self, model, user_restraints):
37  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
38  self._user_restraints = user_restraints if user_restraints else []
39 
40  def __len__(self):
41  return (len(self._user_restraints)
42  + self._rmf_rs.get_number_of_restraints())
43 
44  def __bool__(self):
45  return len(self) > 0
46 
47  def __getitem__(self, i):
48  class FakePMIWrapper:
49  def __init__(self, r):
50  self.r = IMP.RestraintSet.get_from(r)
51 
52  def get_restraint(self):
53  return self.r
54 
55  lenuser = len(self._user_restraints)
56  if 0 <= i < lenuser:
57  return self._user_restraints[i]
58  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
59  r = self._rmf_rs.get_restraint(i - lenuser)
60  return FakePMIWrapper(r)
61  else:
62  raise IndexError("Out of range")
63 
64 
66  """A macro to help setup and run replica exchange.
67  Supports Monte Carlo and molecular dynamics.
68  Produces trajectory RMF files, best PDB structures,
69  and output stat files.
70  """
71  def __init__(self, model, root_hier,
72  monte_carlo_sample_objects=None,
73  molecular_dynamics_sample_objects=None,
74  output_objects=[],
75  rmf_output_objects=None,
76  monte_carlo_temperature=1.0,
77  simulated_annealing=False,
78  simulated_annealing_minimum_temperature=1.0,
79  simulated_annealing_maximum_temperature=2.5,
80  simulated_annealing_minimum_temperature_nframes=100,
81  simulated_annealing_maximum_temperature_nframes=100,
82  replica_exchange_minimum_temperature=1.0,
83  replica_exchange_maximum_temperature=2.5,
84  replica_exchange_swap=True,
85  num_sample_rounds=1,
86  number_of_best_scoring_models=500,
87  monte_carlo_steps=10,
88  self_adaptive=False,
89  molecular_dynamics_steps=10,
90  molecular_dynamics_max_time_step=1.0,
91  number_of_frames=1000,
92  save_coordinates_mode="lowest_temperature",
93  nframes_write_coordinates=1,
94  write_initial_rmf=True,
95  initial_rmf_name_suffix="initial",
96  stat_file_name_suffix="stat",
97  best_pdb_name_suffix="model",
98  mmcif=False,
99  do_clean_first=True,
100  do_create_directories=True,
101  global_output_directory="./",
102  rmf_dir="rmfs/",
103  best_pdb_dir="pdbs/",
104  replica_stat_file_suffix="stat_replica",
105  em_object_for_rmf=None,
106  atomistic=False,
107  replica_exchange_object=None,
108  test_mode=False,
109  score_moved=False,
110  use_nestor=False,
111  nestor_restraints=None,
112  nestor_rmf_fname_prefix="nested",
113  use_jax=False):
114  """Constructor.
115  @param model The IMP model
116  @param root_hier Top-level (System)hierarchy
117  @param monte_carlo_sample_objects Objects for MC sampling, which
118  should generally be a simple list of Mover objects, e.g.
119  from DegreesOfFreedom.get_movers().
120  @param molecular_dynamics_sample_objects Objects for MD sampling,
121  which should generally be a simple list of particles.
122  @param output_objects A list of structural objects and restraints
123  that will be included in output (ie, statistics "stat"
124  files). Any object that provides a get_output() method
125  can be used here. If None is passed
126  the macro will not write stat files.
127  @param rmf_output_objects A list of structural objects and
128  restraints that will be included in rmf. Any object
129  that provides a get_output() method can be used here.
130  @param monte_carlo_temperature MC temp (may need to be optimized
131  based on post-sampling analysis)
132  @param simulated_annealing If True, perform simulated annealing
133  @param simulated_annealing_minimum_temperature Should generally be
134  the same as monte_carlo_temperature.
135  @param simulated_annealing_minimum_temperature_nframes Number of
136  frames to compute at minimum temperature.
137  @param simulated_annealing_maximum_temperature_nframes Number of
138  frames to compute at
139  temps > simulated_annealing_maximum_temperature.
140  @param replica_exchange_minimum_temperature Low temp for REX; should
141  generally be the same as monte_carlo_temperature.
142  @param replica_exchange_maximum_temperature High temp for REX
143  @param replica_exchange_swap Boolean, enable disable temperature
144  swap (Default=True)
145  @param num_sample_rounds Number of rounds of MC/MD per cycle
146  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
147  models to keep around for analysis.
148  @param mmcif If True, write best scoring models in mmCIF format;
149  if False (the default), write in legacy PDB format.
150  @param best_pdb_dir The directory under `global_output_directory`
151  where best-scoring PDB/mmCIF files are written.
152  @param best_pdb_name_suffix Part of the file name for best-scoring
153  PDB/mmCIF files.
154  @param monte_carlo_steps Number of MC steps per round
155  @param self_adaptive self adaptive scheme for Monte Carlo movers
156  @param molecular_dynamics_steps Number of MD steps per round
157  @param molecular_dynamics_max_time_step Max time step for MD
158  @param number_of_frames Number of REX frames to run
159  @param save_coordinates_mode string: how to save coordinates.
160  "lowest_temperature" (default) only the lowest temperatures
161  is saved
162  "25th_score" all replicas whose score is below the 25th
163  percentile
164  "50th_score" all replicas whose score is below the 50th
165  percentile
166  "75th_score" all replicas whose score is below the 75th
167  percentile
168  @param nframes_write_coordinates How often to write the coordinates
169  of a frame
170  @param write_initial_rmf Write the initial configuration
171  @param global_output_directory Folder that will be created to house
172  output.
173  @param test_mode Set to True to avoid writing any files, just test
174  one frame.
175  @param score_moved If True, attempt to speed up Monte Carlo
176  sampling by caching scoring function terms on particles
177  that didn't move.
178  @param use_nestor If True, follows the Nested Sampling workflow
179  of the NestOR module and skips writing stat files and
180  replica stat files.
181  @param nestor_restraints A list of restraints for which
182  likelihoods are to be computed for use by NestOR module.
183  @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
184  files generated by NestOR .
185  @param use_jax If set to True, sample the scoring function using
186  JAX instead of IMP's internal C++ implementation (requires
187  that all PMI restraints used have a JAX implementation).
188  """
189  self.model = model
190  self.vars = {}
191 
192  # add check hierarchy is multistate
193  if output_objects == []:
194  # The "[]" in the default parameters is a global object, so make
195  # our own copy here
196  self.output_objects = []
197  else:
198  self.output_objects = output_objects
199  self.rmf_output_objects = rmf_output_objects
200  if (isinstance(root_hier, IMP.atom.Hierarchy)
201  and not root_hier.get_parent()):
202  if self.output_objects is not None:
203  self.output_objects.append(
204  IMP.pmi.io.TotalScoreOutput(self.model))
205  if self.rmf_output_objects is not None:
206  self.rmf_output_objects.append(
207  IMP.pmi.io.TotalScoreOutput(self.model))
208  self.root_hier = root_hier
209  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
210  self.vars["number_of_states"] = len(states)
211  if len(states) > 1:
212  self.root_hiers = states
213  self.is_multi_state = True
214  else:
215  self.root_hier = root_hier
216  self.is_multi_state = False
217  else:
218  raise TypeError("Must provide System hierarchy (root_hier)")
219 
220  self._rmf_restraints = _RMFRestraints(model, None)
221  self.em_object_for_rmf = em_object_for_rmf
222  self.monte_carlo_sample_objects = monte_carlo_sample_objects
223  self.vars["self_adaptive"] = self_adaptive
224  self.molecular_dynamics_sample_objects = \
225  molecular_dynamics_sample_objects
226  self.replica_exchange_object = replica_exchange_object
227  self.molecular_dynamics_max_time_step = \
228  molecular_dynamics_max_time_step
229  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
230  self.vars["replica_exchange_minimum_temperature"] = \
231  replica_exchange_minimum_temperature
232  self.vars["replica_exchange_maximum_temperature"] = \
233  replica_exchange_maximum_temperature
234  self.vars["replica_exchange_swap"] = replica_exchange_swap
235  self.vars["simulated_annealing"] = simulated_annealing
236  self.vars["simulated_annealing_minimum_temperature"] = \
237  simulated_annealing_minimum_temperature
238  self.vars["simulated_annealing_maximum_temperature"] = \
239  simulated_annealing_maximum_temperature
240  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
241  simulated_annealing_minimum_temperature_nframes
242  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
243  simulated_annealing_maximum_temperature_nframes
244 
245  self.vars["num_sample_rounds"] = num_sample_rounds
246  self.vars[
247  "number_of_best_scoring_models"] = number_of_best_scoring_models
248  self.vars["monte_carlo_steps"] = monte_carlo_steps
249  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
250  self.vars["number_of_frames"] = number_of_frames
251  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
252  "50th_score", "75th_score"):
253  raise Exception("save_coordinates_mode has unrecognized value")
254  else:
255  self.vars["save_coordinates_mode"] = save_coordinates_mode
256  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
257  self.vars["write_initial_rmf"] = write_initial_rmf
258  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
259  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
260  self.vars["mmcif"] = mmcif
261  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
262  self.vars["do_clean_first"] = do_clean_first
263  self.vars["do_create_directories"] = do_create_directories
264  self.vars["global_output_directory"] = global_output_directory
265  self.vars["rmf_dir"] = rmf_dir
266  self.vars["best_pdb_dir"] = best_pdb_dir
267  self.vars["atomistic"] = atomistic
268  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
269  self.vars["geometries"] = None
270  self.test_mode = test_mode
271  self.score_moved = score_moved
272  self.use_jax = use_jax
273  self.vars["use_nestor"] = self.nest = use_nestor
274  self.nestor_restraints = nestor_restraints
275  self.nestor_rmf_fname = nestor_rmf_fname_prefix
276 
277  def add_geometries(self, geometries):
278  if self.vars["geometries"] is None:
279  self.vars["geometries"] = list(geometries)
280  else:
281  self.vars["geometries"].extend(geometries)
282 
283  def show_info(self):
284  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
285  "rmfs/*.rmf3 for each replica ")
286  print("--- it stores the best scoring pdb models in pdbs/")
287  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
288  "lowest temperature")
289  print("--- variables:")
290  for k, v in sorted(self.vars.items(), key=itemgetter(0)):
291  print("------", k.ljust(30), v)
292 
293  def get_replica_exchange_object(self):
294  return self.replica_exchange_object
295 
296  def _add_provenance(self, sampler_md, sampler_mc):
297  """Record details about the sampling in the IMP Hierarchies"""
298  iterations = 0
299  if sampler_md:
300  method = "Molecular Dynamics"
301  iterations += self.vars["molecular_dynamics_steps"]
302  if sampler_mc:
303  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
304  iterations += self.vars["monte_carlo_steps"]
305  # If no sampling is actually done, no provenance to write
306  if iterations == 0 or self.vars["number_of_frames"] == 0:
307  return
308  iterations *= self.vars["num_sample_rounds"]
309 
310  pi = self.model.add_particle("sampling")
312  self.model, pi, method, self.vars["number_of_frames"],
313  iterations)
314  p.set_number_of_replicas(
315  self.replica_exchange_object.get_number_of_replicas())
316  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
317  IMP.core.add_provenance(self.model, self.root_hier, p)
318 
319  def _setup_mc_sampler(self):
320  sampler_mc = IMP.pmi.samplers.MonteCarlo(
321  self.model, self.monte_carlo_sample_objects,
322  self.vars["monte_carlo_temperature"],
323  score_moved=self.score_moved)
324  if self.use_jax:
325  sampler_mc.set_use_jax(self.vars["monte_carlo_steps"])
326  if self.vars["simulated_annealing"]:
327  tmin = self.vars["simulated_annealing_minimum_temperature"]
328  tmax = self.vars["simulated_annealing_maximum_temperature"]
329  nfmin = self.vars[
330  "simulated_annealing_minimum_temperature_nframes"]
331  nfmax = self.vars[
332  "simulated_annealing_maximum_temperature_nframes"]
333  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
334  if self.vars["self_adaptive"]:
335  sampler_mc.set_self_adaptive(
336  isselfadaptive=self.vars["self_adaptive"])
337  if self.output_objects is not None:
338  self.output_objects.append(sampler_mc)
339  if self.rmf_output_objects is not None:
340  self.rmf_output_objects.append(sampler_mc)
341  return sampler_mc
342 
343  def _setup_md_sampler(self):
345  self.model, self.molecular_dynamics_sample_objects,
346  self.vars["monte_carlo_temperature"],
347  maximum_time_step=self.molecular_dynamics_max_time_step)
348  if self.use_jax:
349  sampler_md.set_use_jax(self.vars["molecular_dynamics_steps"])
350  if self.vars["simulated_annealing"]:
351  tmin = self.vars["simulated_annealing_minimum_temperature"]
352  tmax = self.vars["simulated_annealing_maximum_temperature"]
353  nfmin = self.vars[
354  "simulated_annealing_minimum_temperature_nframes"]
355  nfmax = self.vars[
356  "simulated_annealing_maximum_temperature_nframes"]
357  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
358  if self.output_objects is not None:
359  self.output_objects.append(sampler_md)
360  if self.rmf_output_objects is not None:
361  self.rmf_output_objects.append(sampler_md)
362  return sampler_md
363 
364  def _get_jax_model(self, sampler_mc):
365  if self.use_jax:
366  return sampler_mc.get_jax_model()
367 
368  def execute_macro(self):
369  temp_index_factor = 100000.0
370  samplers = []
371  sampler_mc = None
372  sampler_md = None
373  if self.monte_carlo_sample_objects is not None:
374  print("Setting up MonteCarlo")
375  sampler_mc = self._setup_mc_sampler()
376  samplers.append(sampler_mc)
377 
378  if self.molecular_dynamics_sample_objects is not None:
379  print("Setting up MolecularDynamics")
380  sampler_md = self._setup_md_sampler()
381  samplers.append(sampler_md)
382 
383 # -------------------------------------------------------------------------
384 
385  print("Setting up ReplicaExchange")
387  self.model, self.vars["replica_exchange_minimum_temperature"],
388  self.vars["replica_exchange_maximum_temperature"], samplers,
389  replica_exchange_object=self.replica_exchange_object)
390  self.replica_exchange_object = rex.rem
391 
392  myindex = rex.get_my_index()
393  if self.output_objects is not None:
394  self.output_objects.append(rex)
395  if self.rmf_output_objects is not None:
396  self.rmf_output_objects.append(rex)
397  # must reset the minimum temperature due to the
398  # different binary length of rem.get_my_parameter double and python
399  # float
400  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
401 
402 # -------------------------------------------------------------------------
403 
404  globaldir = self.vars["global_output_directory"] + "/"
405  rmf_dir = globaldir + self.vars["rmf_dir"]
406  pdb_dir = globaldir + self.vars["best_pdb_dir"]
407 
408  if not self.test_mode and not self.nest:
409  if self.vars["do_clean_first"]:
410  pass
411 
412  if self.vars["do_create_directories"]:
413 
414  os.makedirs(globaldir, exist_ok=True)
415  os.makedirs(rmf_dir, exist_ok=True)
416  if not self.is_multi_state:
417  os.makedirs(pdb_dir, exist_ok=True)
418  else:
419  for n in range(self.vars["number_of_states"]):
420  os.makedirs(pdb_dir + "/" + str(n), exist_ok=True)
421 
422 # -------------------------------------------------------------------------
423 
425  if self.output_objects is not None:
426  self.output_objects.append(sw)
427  if self.rmf_output_objects is not None:
428  self.rmf_output_objects.append(sw)
429 
430  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
431 
432  if not self.nest:
433  print("Setting up stat file")
434  low_temp_stat_file = globaldir + \
435  self.vars["stat_file_name_suffix"] + "." + \
436  str(myindex) + ".out"
437 
438  # Ensure model is updated before saving init files
439  if not self.test_mode:
440  self.model.update()
441 
442  if not self.test_mode and not self.nest:
443  if self.output_objects is not None:
444  output.init_stat2(low_temp_stat_file,
445  self.output_objects,
446  extralabels=["rmf_file", "rmf_frame_index"],
447  jax_model=self._get_jax_model(sampler_mc))
448  else:
449  print("Stat file writing is disabled")
450 
451  if self.rmf_output_objects is not None and not self.nest:
452  print("Stat info being written in the rmf file")
453 
454  if not self.test_mode and not self.nest:
455  print("Setting up replica stat file")
456  replica_stat_file = globaldir + \
457  self.vars["replica_stat_file_suffix"] + "." + \
458  str(myindex) + ".out"
459  if not self.test_mode:
460  output.init_stat2(replica_stat_file, [rex],
461  extralabels=["score"],
462  jax_model=self._get_jax_model(sampler_mc))
463 
464  print("Setting up best pdb files")
465  if not self.is_multi_state:
466  if self.vars["number_of_best_scoring_models"] > 0:
467  output.init_pdb_best_scoring(
468  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
469  self.root_hier,
470  self.vars["number_of_best_scoring_models"],
471  replica_exchange=True,
472  mmcif=self.vars['mmcif'],
473  best_score_file=globaldir + "best.scores.rex.py")
474  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
475  output.write_psf(
476  pdb_dir + "/" + "model.psf",
477  pdb_dir + "/" +
478  self.vars["best_pdb_name_suffix"] + pdbext)
479  else:
480  if self.vars["number_of_best_scoring_models"] > 0:
481  for n in range(self.vars["number_of_states"]):
482  output.init_pdb_best_scoring(
483  pdb_dir + "/" + str(n) + "/" +
484  self.vars["best_pdb_name_suffix"],
485  self.root_hiers[n],
486  self.vars["number_of_best_scoring_models"],
487  replica_exchange=True,
488  mmcif=self.vars['mmcif'],
489  best_score_file=globaldir + "best.scores.rex.py")
490  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
491  output.write_psf(
492  pdb_dir + "/" + str(n) + "/" + "model.psf",
493  pdb_dir + "/" + str(n) + "/" +
494  self.vars["best_pdb_name_suffix"] + pdbext)
495 # ---------------------------------------------
496 
497  if self.em_object_for_rmf is not None:
498  output_hierarchies = [
499  self.root_hier,
500  self.em_object_for_rmf.get_density_as_hierarchy(
501  )]
502  else:
503  output_hierarchies = [self.root_hier]
504 
505  if not self.test_mode and not self.nest:
506  print("Setting up and writing initial rmf coordinate file")
507  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
508  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
509  output_hierarchies,
510  listofobjects=self.rmf_output_objects)
511  if self._rmf_restraints:
512  output.add_restraints_to_rmf(
513  init_suffix + "." + str(myindex) + ".rmf3",
514  self._rmf_restraints)
515  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
516  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
517 
518  if not self.test_mode:
519  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
520  else:
521  mpivs = _MockMPIValues()
522 
523  self._add_provenance(sampler_md, sampler_mc)
524 
525  if not self.test_mode and not self.nest:
526  print("Setting up production rmf files")
527  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
528  output.init_rmf(rmfname, output_hierarchies,
529  geometries=self.vars["geometries"],
530  listofobjects=self.rmf_output_objects)
531 
532  if self._rmf_restraints:
533  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
534 
535  if not self.test_mode and self.nest:
536  print("Setting up NestOR rmf files")
537  nestor_rmf_fname = str(self.nestor_rmf_fname) + '_' + \
538  str(self.replica_exchange_object.get_my_index()) + '.rmf3'
539 
540  output.init_rmf(nestor_rmf_fname, output_hierarchies,
541  geometries=self.vars["geometries"],
542  listofobjects=self.rmf_output_objects)
543 
544  ntimes_at_low_temp = 0
545 
546  if myindex == 0 and not self.nest:
547  self.show_info()
548  self.replica_exchange_object.set_was_used(True)
549  nframes = self.vars["number_of_frames"]
550  if self.test_mode:
551  nframes = 1
552 
553  sampled_likelihoods = []
554  for i in range(nframes):
555  if self.test_mode:
556  score = 0.
557  else:
558  score = None
559  for nr in range(self.vars["num_sample_rounds"]):
560  if sampler_md is not None:
561  score = sampler_md.optimize(
562  self.vars["molecular_dynamics_steps"])
563  if sampler_mc is not None:
564  score = sampler_mc.optimize(
565  self.vars["monte_carlo_steps"])
566  if score is None:
568  self.model).evaluate(False)
569  elif (IMP.get_check_level() >= IMP.USAGE_AND_INTERNAL
570  and not self.use_jax):
571  # Final score from samplers should match the current
572  # score of the Model
573  check_score = IMP.pmi.tools.get_restraint_set(
574  self.model).evaluate(False)
575  assert abs(score - check_score) < 1e-4
576  mpivs.set_value("score", score)
577  if not self.nest:
578  output.set_output_entry("score", score)
579 
580  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
581 
582  if self.vars["save_coordinates_mode"] == "lowest_temperature":
583  save_frame = (min_temp_index == my_temp_index)
584  elif self.vars["save_coordinates_mode"] == "25th_score":
585  score_perc = mpivs.get_percentile("score")
586  save_frame = (score_perc*100.0 <= 25.0)
587  elif self.vars["save_coordinates_mode"] == "50th_score":
588  score_perc = mpivs.get_percentile("score")
589  save_frame = (score_perc*100.0 <= 50.0)
590  elif self.vars["save_coordinates_mode"] == "75th_score":
591  score_perc = mpivs.get_percentile("score")
592  save_frame = (score_perc*100.0 <= 75.0)
593 
594  # Ensure model is updated before saving output files
595  if save_frame and not self.test_mode:
596  self.model.update()
597 
598  if save_frame:
599  print("--- frame %s score %s " % (str(i), str(score)))
600 
601  if self.nest:
602  if math.isnan(score):
603  sampled_likelihoods.append(math.nan)
604  else:
605  likelihood_for_sample = 1
606  for rstrnt in self.nestor_restraints:
607  likelihood_for_sample *= rstrnt.get_likelihood()
608  sampled_likelihoods.append(likelihood_for_sample)
609  output.write_rmf(nestor_rmf_fname)
610 
611  if not self.test_mode and not self.nest:
612  if i % self.vars["nframes_write_coordinates"] == 0:
613  print('--- writing coordinates')
614  if self.vars["number_of_best_scoring_models"] > 0:
615  output.write_pdb_best_scoring(score)
616  output.write_rmf(rmfname)
617  output.set_output_entry("rmf_file", rmfname)
618  output.set_output_entry("rmf_frame_index",
619  ntimes_at_low_temp)
620  else:
621  output.set_output_entry("rmf_file", rmfname)
622  output.set_output_entry("rmf_frame_index", '-1')
623  if self.output_objects is not None:
624  output.write_stat2(
625  low_temp_stat_file,
626  jax_model=self._get_jax_model(sampler_mc))
627  ntimes_at_low_temp += 1
628 
629  if not self.test_mode and not self.nest:
630  output.write_stat2(
631  replica_stat_file,
632  jax_model=self._get_jax_model(sampler_mc))
633  if self.vars["replica_exchange_swap"]:
634  rex.swap_temp(i, score)
635 
636  if self.nest and len(sampled_likelihoods) > 0:
637  with open("likelihoods_"
638  + str(self.replica_exchange_object.get_my_index()),
639  "wb") as lif:
640  pickle.dump(sampled_likelihoods, lif)
641 
642  output.close_rmf(nestor_rmf_fname)
643 
644  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
645  p.add_replica_exchange(state, self)
646 
647  if not self.test_mode and not self.nest:
648  print("closing production rmf files")
649  output.close_rmf(rmfname)
650 
651 
653  """A macro to build a IMP::pmi::topology::System based on a
654  TopologyReader object.
655 
656  Easily create multi-state systems by calling this macro
657  repeatedly with different TopologyReader objects!
658  A useful function is get_molecules() which returns the PMI Molecules
659  grouped by state as a dictionary with key = (molecule name),
660  value = IMP.pmi.topology.Molecule
661  Quick multi-state system:
662  @code{.python}
663  model = IMP.Model()
664  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
665  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
666  bs = IMP.pmi.macros.BuildSystem(model)
667  bs.add_state(reader1)
668  bs.add_state(reader2)
669  bs.execute_macro() # build everything including degrees of freedom
670  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
671  ### now you have a two state system, you add restraints etc
672  @endcode
673  @note The "domain name" entry of the topology reader is not used.
674  All molecules are set up by the component name, but split into rigid bodies
675  as requested.
676  """
677 
678  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
679  'RNA': IMP.pmi.alphabets.rna}
680 
681  def __init__(self, model, sequence_connectivity_scale=4.0,
682  force_create_gmm_files=False, resolutions=[1, 10],
683  name='System'):
684  """Constructor
685  @param model An IMP Model
686  @param sequence_connectivity_scale For scaling the connectivity
687  restraint
688  @param force_create_gmm_files If True, will sample and create GMMs
689  no matter what. If False, will only sample if the
690  files don't exist. If number of Gaussians is zero, won't
691  do anything.
692  @param resolutions The resolutions to build for structured regions
693  @param name The name of the top-level hierarchy node.
694  """
695  self.model = model
696  self.system = IMP.pmi.topology.System(self.model, name=name)
697  self._readers = [] # the TopologyReaders (one per state)
698  # TempResidues for each domain key=unique name,
699  # value=(atomic_res,non_atomic_res).
700  self._domain_res = []
701  self._domains = [] # key = domain unique name, value = Component
702  self.force_create_gmm_files = force_create_gmm_files
703  self.resolutions = resolutions
704 
705  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
706  chain_ids=None):
707  """Add a state using the topology info in a
708  IMP::pmi::topology::TopologyReader object.
709  When you are done adding states, call execute_macro()
710  @param reader The TopologyReader object
711  @param keep_chain_id If True, keep the chain IDs from the
712  original PDB files, if available
713  @param fasta_name_map dictionary for converting protein names
714  found in the fasta file
715  @param chain_ids A list or string of chain IDs for assigning to
716  newly-created molecules, e.g.
717  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
718  If not specified, chain IDs A through Z are assigned, then
719  AA through AZ, then BA through BZ, and so on, in the same
720  fashion as PDB.
721  """
722  state = self.system.create_state()
723  self._readers.append(reader)
724  # key is unique name, value is (atomic res, nonatomicres)
725  these_domain_res = {}
726  these_domains = {} # key is unique name, value is _Component
727  if chain_ids is None:
728  chain_ids = IMP.pmi.output._ChainIDs()
729  numchain = 0
730 
731  # setup representation
732  # loop over molecules, copies, then domains
733  for molname in reader.get_molecules():
734  copies = reader.get_molecules()[molname].domains
735  for nc, copyname in enumerate(copies):
736  print("BuildSystem.add_state: setting up molecule %s copy "
737  "number %s" % (molname, str(nc)))
738  copy = copies[copyname]
739  # option to not rename chains
740  if keep_chain_id:
741  all_chains = [c for c in copy if c.chain is not None]
742  if all_chains:
743  chain_id = all_chains[0].chain
744  else:
745  chain_id = chain_ids[numchain]
746  warnings.warn(
747  "No PDBs specified for %s, so keep_chain_id has "
748  "no effect; using default chain ID '%s'"
749  % (molname, chain_id), IMP.pmi.ParameterWarning)
750  else:
751  chain_id = chain_ids[numchain]
752  if nc == 0:
753  alphabet = IMP.pmi.alphabets.amino_acid
754  fasta_flag = copy[0].fasta_flag
755  if fasta_flag in self._alphabets:
756  alphabet = self._alphabets[fasta_flag]
758  copy[0].fasta_file, fasta_name_map)
759  seq = seqs[copy[0].fasta_id]
760  print("BuildSystem.add_state: molecule %s sequence has "
761  "%s residues" % (molname, len(seq)))
762  orig_mol = state.create_molecule(
763  molname, seq, chain_id, alphabet=alphabet,
764  uniprot=seqs.uniprot.get(copy[0].fasta_id))
765  mol = orig_mol
766  numchain += 1
767  else:
768  print("BuildSystem.add_state: creating a copy for "
769  "molecule %s" % molname)
770  mol = orig_mol.create_copy(chain_id)
771  numchain += 1
772 
773  for domainnumber, domain in enumerate(copy):
774  print("BuildSystem.add_state: ---- setting up domain %s "
775  "of molecule %s" % (domainnumber, molname))
776  # we build everything in the residue range, even if it
777  # extends beyond what's in the actual PDB file
778  these_domains[domain.get_unique_name()] = domain
779  if domain.residue_range == [] or \
780  domain.residue_range is None:
781  domain_res = mol.get_residues()
782  else:
783  start = domain.residue_range[0]+domain.pdb_offset
784  if domain.residue_range[1] == 'END':
785  end = len(mol.sequence)
786  else:
787  end = domain.residue_range[1]+domain.pdb_offset
788  domain_res = mol.residue_range(start-1, end-1)
789  print("BuildSystem.add_state: -------- domain %s of "
790  "molecule %s extends from residue %s to "
791  "residue %s "
792  % (domainnumber, molname, start, end))
793  if domain.pdb_file == "BEADS":
794  print("BuildSystem.add_state: -------- domain %s of "
795  "molecule %s represented by BEADS "
796  % (domainnumber, molname))
797  mol.add_representation(
798  domain_res,
799  resolutions=[domain.bead_size],
800  setup_particles_as_densities=(
801  domain.em_residues_per_gaussian != 0),
802  color=domain.color)
803  these_domain_res[domain.get_unique_name()] = \
804  (set(), domain_res)
805  elif domain.pdb_file == "IDEAL_HELIX":
806  print("BuildSystem.add_state: -------- domain %s of "
807  "molecule %s represented by IDEAL_HELIX "
808  % (domainnumber, molname))
809  emper = domain.em_residues_per_gaussian
810  mol.add_representation(
811  domain_res,
812  resolutions=self.resolutions,
813  ideal_helix=True,
814  density_residues_per_component=emper,
815  density_prefix=domain.density_prefix,
816  density_force_compute=self.force_create_gmm_files,
817  color=domain.color)
818  these_domain_res[domain.get_unique_name()] = \
819  (domain_res, set())
820  else:
821  print("BuildSystem.add_state: -------- domain %s of "
822  "molecule %s represented by pdb file %s "
823  % (domainnumber, molname, domain.pdb_file))
824  domain_atomic = mol.add_structure(domain.pdb_file,
825  domain.chain,
826  domain.residue_range,
827  domain.pdb_offset,
828  soft_check=True)
829  domain_non_atomic = domain_res - domain_atomic
830  if not domain.em_residues_per_gaussian:
831  mol.add_representation(
832  domain_atomic, resolutions=self.resolutions,
833  color=domain.color)
834  if len(domain_non_atomic) > 0:
835  mol.add_representation(
836  domain_non_atomic,
837  resolutions=[domain.bead_size],
838  color=domain.color)
839  else:
840  print("BuildSystem.add_state: -------- domain %s "
841  "of molecule %s represented by gaussians "
842  % (domainnumber, molname))
843  emper = domain.em_residues_per_gaussian
844  creategmm = self.force_create_gmm_files
845  mol.add_representation(
846  domain_atomic,
847  resolutions=self.resolutions,
848  density_residues_per_component=emper,
849  density_prefix=domain.density_prefix,
850  density_force_compute=creategmm,
851  color=domain.color)
852  if len(domain_non_atomic) > 0:
853  mol.add_representation(
854  domain_non_atomic,
855  resolutions=[domain.bead_size],
856  setup_particles_as_densities=True,
857  color=domain.color)
858  these_domain_res[domain.get_unique_name()] = (
859  domain_atomic, domain_non_atomic)
860  self._domain_res.append(these_domain_res)
861  self._domains.append(these_domains)
862  print('BuildSystem.add_state: State', len(self.system.states), 'added')
863  return state
864 
865  def get_molecules(self):
866  """Return list of all molecules grouped by state.
867  For each state, it's a dictionary of Molecules where key is the
868  molecule name
869  """
870  return [s.get_molecules() for s in self.system.get_states()]
871 
872  def get_molecule(self, molname, copy_index=0, state_index=0):
873  return self.system.get_states()[state_index].get_molecules()[
874  molname][copy_index]
875 
876  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
877  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
878  """Builds representations and sets up degrees of freedom"""
879  print("BuildSystem.execute_macro: building representations")
880  self.root_hier = self.system.build()
881 
882  print("BuildSystem.execute_macro: setting up degrees of freedom")
883  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
884  for nstate, reader in enumerate(self._readers):
885  rbs = reader.get_rigid_bodies()
886  srbs = reader.get_super_rigid_bodies()
887  csrbs = reader.get_chains_of_super_rigid_bodies()
888 
889  # add rigid bodies
890  domains_in_rbs = set()
891  for rblist in rbs:
892  print("BuildSystem.execute_macro: -------- building rigid "
893  "body %s" % (str(rblist)))
894  all_res = IMP.pmi.tools.OrderedSet()
895  bead_res = IMP.pmi.tools.OrderedSet()
896  for dname in rblist:
897  domain = self._domains[nstate][dname]
898  print("BuildSystem.execute_macro: -------- adding %s"
899  % (str(dname)))
900  all_res |= self._domain_res[nstate][dname][0]
901  bead_res |= self._domain_res[nstate][dname][1]
902  domains_in_rbs.add(dname)
903  all_res |= bead_res
904  print("BuildSystem.execute_macro: -------- creating rigid "
905  "body with max_trans %s max_rot %s "
906  "non_rigid_max_trans %s"
907  % (str(max_rb_trans), str(max_rb_rot),
908  str(max_bead_trans)))
909  self.dof.create_rigid_body(all_res,
910  nonrigid_parts=bead_res,
911  max_trans=max_rb_trans,
912  max_rot=max_rb_rot,
913  nonrigid_max_trans=max_bead_trans,
914  name="RigidBody %s" % dname)
915 
916  # if you have any domains not in an RB, set them as flexible beads
917  for dname, domain in self._domains[nstate].items():
918  if dname not in domains_in_rbs:
919  if domain.pdb_file != "BEADS":
920  warnings.warn(
921  "No rigid bodies set for %s. Residues read from "
922  "the PDB file will not be sampled - only regions "
923  "missing from the PDB will be treated flexibly. "
924  "To sample the entire sequence, use BEADS instead "
925  "of a PDB file name" % dname,
927  self.dof.create_flexible_beads(
928  self._domain_res[nstate][dname][1],
929  max_trans=max_bead_trans)
930 
931  # add super rigid bodies
932  for srblist in srbs:
933  print("BuildSystem.execute_macro: -------- building "
934  "super rigid body %s" % (str(srblist)))
935  all_res = IMP.pmi.tools.OrderedSet()
936  for dname in srblist:
937  print("BuildSystem.execute_macro: -------- adding %s"
938  % (str(dname)))
939  all_res |= self._domain_res[nstate][dname][0]
940  all_res |= self._domain_res[nstate][dname][1]
941 
942  print("BuildSystem.execute_macro: -------- creating super "
943  "rigid body with max_trans %s max_rot %s "
944  % (str(max_srb_trans), str(max_srb_rot)))
945  self.dof.create_super_rigid_body(
946  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
947 
948  # add chains of super rigid bodies
949  for csrblist in csrbs:
950  all_res = IMP.pmi.tools.OrderedSet()
951  for dname in csrblist:
952  all_res |= self._domain_res[nstate][dname][0]
953  all_res |= self._domain_res[nstate][dname][1]
954  all_res = list(all_res)
955  all_res.sort(key=lambda r: r.get_index())
956  self.dof.create_main_chain_mover(all_res)
957  return self.root_hier, self.dof
958 
959 
960 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
962  """A macro for running all the basic operations of analysis.
963  Includes clustering, precision analysis, and making ensemble density maps.
964  A number of plots are also supported.
965  """
966  def __init__(self, model,
967  merge_directories=["./"],
968  stat_file_name_suffix="stat",
969  best_pdb_name_suffix="model",
970  do_clean_first=True,
971  do_create_directories=True,
972  global_output_directory="output/",
973  replica_stat_file_suffix="stat_replica",
974  global_analysis_result_directory="./analysis/",
975  test_mode=False):
976  """Constructor.
977  @param model The IMP model
978  @param stat_file_name_suffix
979  @param merge_directories The directories containing output files
980  @param best_pdb_name_suffix
981  @param do_clean_first
982  @param do_create_directories
983  @param global_output_directory Where everything is
984  @param replica_stat_file_suffix
985  @param global_analysis_result_directory
986  @param test_mode If True, nothing is changed on disk
987  """
988 
989  try:
990  from mpi4py import MPI
991  self.comm = MPI.COMM_WORLD
992  self.rank = self.comm.Get_rank()
993  self.number_of_processes = self.comm.size
994  except ImportError:
995  self.rank = 0
996  self.number_of_processes = 1
997 
998  self.test_mode = test_mode
999  self._protocol_output = []
1000  self.cluster_obj = None
1001  self.model = model
1002  stat_dir = global_output_directory
1003  self.stat_files = []
1004  # it contains the position of the root directories
1005  for rd in merge_directories:
1006  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
1007  if len(stat_files) == 0:
1008  warnings.warn("no stat files found in %s"
1009  % os.path.join(rd, stat_dir),
1011  self.stat_files += stat_files
1012 
1013  def add_protocol_output(self, p):
1014  """Capture details of the modeling protocol.
1015  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1016  """
1017  # Assume last state is the one we're interested in
1018  self._protocol_output.append((p, p._last_state))
1019 
1020  def get_modeling_trajectory(self,
1021  score_key="Total_Score",
1022  rmf_file_key="rmf_file",
1023  rmf_file_frame_key="rmf_frame_index",
1024  outputdir="./",
1025  get_every=1,
1026  nframes_trajectory=10000):
1027  """ Get a trajectory of the modeling run, for generating
1028  demonstrative movies
1029 
1030  @param score_key The score for ranking models
1031  @param rmf_file_key Key pointing to RMF filename
1032  @param rmf_file_frame_key Key pointing to RMF frame number
1033  @param outputdir The local output directory used in the run
1034  @param get_every Extract every nth frame
1035  @param nframes_trajectory Total number of frames of the trajectory
1036  """
1037  import math
1038 
1039  trajectory_models = IMP.pmi.io.get_trajectory_models(
1040  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1041  get_every)
1042  score_list = list(map(float, trajectory_models[2]))
1043 
1044  max_score = max(score_list)
1045  min_score = min(score_list)
1046 
1047  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1048  for i in range(nframes_trajectory)]
1049  binned_scores = [None]*nframes_trajectory
1050  binned_model_indexes = [-1]*nframes_trajectory
1051 
1052  for model_index, s in enumerate(score_list):
1053  bins_score_diffs = [abs(s-b) for b in bins]
1054  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1055  if binned_scores[bin_index] is None:
1056  binned_scores[bin_index] = s
1057  binned_model_indexes[bin_index] = model_index
1058  else:
1059  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1060  new_diff = abs(s-bins[bin_index])
1061  if new_diff < old_diff:
1062  binned_scores[bin_index] = s
1063  binned_model_indexes[bin_index] = model_index
1064 
1065  print(binned_scores)
1066  print(binned_model_indexes)
1067 
1068  def _expand_ambiguity(self, prot, d):
1069  """If using PMI2, expand the dictionary to include copies as
1070  ambiguous options
1071 
1072  This also keeps the states separate.
1073  """
1074  newdict = {}
1075  for key in d:
1076  val = d[key]
1077  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1078  newdict[key] = val
1079  continue
1080  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1081  if isinstance(val, tuple):
1082  start = val[0]
1083  stop = val[1]
1084  name = val[2]
1085  else:
1086  start = 1
1087  stop = -1
1088  name = val
1089  for nst in range(len(states)):
1090  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1091  copies = sel.get_selected_particles(with_representation=False)
1092  if len(copies) > 1:
1093  for nc in range(len(copies)):
1094  if len(states) > 1:
1095  newdict['%s.%i..%i' % (name, nst, nc)] = \
1096  (start, stop, name, nc, nst)
1097  else:
1098  newdict['%s..%i' % (name, nc)] = \
1099  (start, stop, name, nc, nst)
1100  else:
1101  newdict[key] = val
1102  return newdict
1103 
1104  def clustering(self,
1105  score_key="Total_Score",
1106  rmf_file_key="rmf_file",
1107  rmf_file_frame_key="rmf_frame_index",
1108  state_number=0,
1109  prefiltervalue=None,
1110  feature_keys=[],
1111  outputdir="./",
1112  alignment_components=None,
1113  number_of_best_scoring_models=10,
1114  rmsd_calculation_components=None,
1115  distance_matrix_file='distances.mat',
1116  load_distance_matrix_file=False,
1117  skip_clustering=False,
1118  number_of_clusters=1,
1119  display_plot=False,
1120  exit_after_display=True,
1121  get_every=1,
1122  first_and_last_frames=None,
1123  density_custom_ranges=None,
1124  write_pdb_with_centered_coordinates=False,
1125  voxel_size=5.0):
1126  """Get the best scoring models, compute a distance matrix,
1127  cluster them, and create density maps.
1128 
1129  Tuple format: "molname" just the molecule,
1130  or (start,stop,molname,copy_num(optional),state_num(optional)
1131  Can pass None for copy or state to ignore that field.
1132  If you don't pass a specific copy number
1133 
1134  @param score_key The score for ranking models.
1135  @param rmf_file_key Key pointing to RMF filename
1136  @param rmf_file_frame_key Key pointing to RMF frame number
1137  @param state_number State number to analyze
1138  @param prefiltervalue Only include frames where the
1139  score key is below this value
1140  @param feature_keys Keywords for which you want to
1141  calculate average, medians, etc.
1142  If you pass "Keyname" it'll include everything that matches
1143  "*Keyname*"
1144  @param outputdir The local output directory used in
1145  the run
1146  @param alignment_components Dictionary with keys=groupname,
1147  values are tuples for aligning the structures
1148  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1149  @param number_of_best_scoring_models Num models to keep per run
1150  @param rmsd_calculation_components For calculating RMSD
1151  (same format as alignment_components)
1152  @param distance_matrix_file Where to store/read the
1153  distance matrix
1154  @param load_distance_matrix_file Try to load the distance
1155  matrix file
1156  @param skip_clustering Just extract the best scoring
1157  models and save the pdbs
1158  @param number_of_clusters Number of k-means clusters
1159  @param display_plot Display the distance matrix
1160  @param exit_after_display Exit after displaying distance
1161  matrix
1162  @param get_every Extract every nth frame
1163  @param first_and_last_frames A tuple with the first and last
1164  frames to be analyzed. Values are percentages!
1165  Default: get all frames
1166  @param density_custom_ranges For density calculation
1167  (same format as alignment_components)
1168  @param write_pdb_with_centered_coordinates
1169  @param voxel_size Used for the density output
1170  """
1171  # Track provenance information to be added to each output model
1172  prov = []
1173  self._outputdir = Path(outputdir).absolute()
1174  self._number_of_clusters = number_of_clusters
1175  for p, state in self._protocol_output:
1176  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1177 
1178  if self.test_mode:
1179  return
1180 
1181  if self.rank == 0:
1182  try:
1183  os.mkdir(outputdir)
1184  except: # noqa: E722
1185  pass
1186 
1187  if not load_distance_matrix_file:
1188  if len(self.stat_files) == 0:
1189  print("ERROR: no stat file found in the given path")
1190  return
1191  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1192  self.stat_files, self.number_of_processes)[self.rank]
1193 
1194  # read ahead to check if you need the PMI2 score key instead
1195  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1196  if k in feature_keys:
1197  warnings.warn(
1198  "no need to pass " + k + " to feature_keys.",
1200  feature_keys.remove(k)
1201 
1202  best_models = IMP.pmi.io.get_best_models(
1203  my_stat_files, score_key, feature_keys, rmf_file_key,
1204  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1205  rmf_file_list = best_models[0]
1206  rmf_file_frame_list = best_models[1]
1207  score_list = best_models[2]
1208  feature_keyword_list_dict = best_models[3]
1209 
1210 # ------------------------------------------------------------------------
1211 # collect all the files and scores
1212 # ------------------------------------------------------------------------
1213 
1214  if self.number_of_processes > 1:
1215  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1216  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1217  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1218  rmf_file_frame_list)
1219  for k in feature_keyword_list_dict:
1220  feature_keyword_list_dict[k] = \
1222  feature_keyword_list_dict[k])
1223 
1224  # sort by score and get the best scoring ones
1225  score_rmf_tuples = list(zip(score_list,
1226  rmf_file_list,
1227  rmf_file_frame_list,
1228  list(range(len(score_list)))))
1229 
1230  if density_custom_ranges:
1231  for k in density_custom_ranges:
1232  if not isinstance(density_custom_ranges[k], list):
1233  raise Exception("Density custom ranges: values must "
1234  "be lists of tuples")
1235 
1236  # keep subset of frames if requested
1237  if first_and_last_frames is not None:
1238  nframes = len(score_rmf_tuples)
1239  first_frame = int(first_and_last_frames[0] * nframes)
1240  last_frame = int(first_and_last_frames[1] * nframes)
1241  if last_frame > len(score_rmf_tuples):
1242  last_frame = -1
1243  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1244 
1245  # sort RMFs by the score_key in ascending order, and store the rank
1246  best_score_rmf_tuples = sorted(
1247  score_rmf_tuples,
1248  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1249  best_score_rmf_tuples = [t+(n,) for n, t in
1250  enumerate(best_score_rmf_tuples)]
1251  # Note in the provenance info that we only kept best-scoring models
1252  prov.append(IMP.pmi.io.FilterProvenance(
1253  "Best scoring", 0, number_of_best_scoring_models))
1254  # sort the feature scores in the same way
1255  best_score_feature_keyword_list_dict = defaultdict(list)
1256  for tpl in best_score_rmf_tuples:
1257  index = tpl[3]
1258  for f in feature_keyword_list_dict:
1259  best_score_feature_keyword_list_dict[f].append(
1260  feature_keyword_list_dict[f][index])
1261  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1262  best_score_rmf_tuples,
1263  self.number_of_processes)[self.rank]
1264 
1265  # expand the dictionaries to include ambiguous copies
1266  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1267  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1268  if rmsd_calculation_components is not None:
1269  tmp = self._expand_ambiguity(
1270  prot_ahead, rmsd_calculation_components)
1271  if tmp != rmsd_calculation_components:
1272  print('Detected ambiguity, expand rmsd components to',
1273  tmp)
1274  rmsd_calculation_components = tmp
1275  if alignment_components is not None:
1276  tmp = self._expand_ambiguity(prot_ahead,
1277  alignment_components)
1278  if tmp != alignment_components:
1279  print('Detected ambiguity, expand alignment '
1280  'components to', tmp)
1281  alignment_components = tmp
1282 
1283 # -------------------------------------------------------------
1284 # read the coordinates
1285 # ------------------------------------------------------------
1286  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1287  self.model, my_best_score_rmf_tuples[0],
1288  rmsd_calculation_components, state_number=state_number)
1290  self.model, my_best_score_rmf_tuples, alignment_components,
1291  rmsd_calculation_components, state_number=state_number)
1292 
1293  # note! the coordinates are simply float tuples, NOT decorators,
1294  # NOT Vector3D, NOR particles, because these object cannot be
1295  # serialized. We need serialization
1296  # for the parallel computation based on mpi.
1297 
1298  # dict:key=component name,val=coords per hit
1299  all_coordinates = got_coords[0]
1300 
1301  # same as above, limited to alignment bits
1302  alignment_coordinates = got_coords[1]
1303 
1304  # same as above, limited to RMSD bits
1305  rmsd_coordinates = got_coords[2]
1306 
1307  # dictionary with key=RMF, value=score rank
1308  rmf_file_name_index_dict = got_coords[3]
1309 
1310  # RMF file per hit
1311  all_rmf_file_names = got_coords[4]
1312 
1313 # ------------------------------------------------------------------------
1314 # optionally don't compute distance matrix or cluster, just write top files
1315 # ------------------------------------------------------------------------
1316  if skip_clustering:
1317  if density_custom_ranges:
1318  DensModule = IMP.pmi.analysis.GetModelDensity(
1319  density_custom_ranges, voxel=voxel_size)
1320 
1321  dircluster = os.path.join(outputdir,
1322  "all_models."+str(self.rank))
1323  try:
1324  os.mkdir(outputdir)
1325  except: # noqa: E722
1326  pass
1327  try:
1328  os.mkdir(dircluster)
1329  except: # noqa: E722
1330  pass
1331  clusstat = open(os.path.join(
1332  dircluster, "stat."+str(self.rank)+".out"), "w")
1333  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1334  rmf_name = tpl[1]
1335  rmf_frame_number = tpl[2]
1336  tmp_dict = {}
1337  index = tpl[4]
1338  for key in best_score_feature_keyword_list_dict:
1339  tmp_dict[key] = \
1340  best_score_feature_keyword_list_dict[key][index]
1341 
1342  if cnt == 0:
1343  prots, rs = \
1344  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1345  self.model, rmf_frame_number, rmf_name)
1346  else:
1347  linking_successful = \
1348  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1349  self.model, prots, rs, rmf_frame_number,
1350  rmf_name)
1351  if not linking_successful:
1352  continue
1353 
1354  if not prots:
1355  continue
1356 
1357  states = IMP.atom.get_by_type(
1358  prots[0], IMP.atom.STATE_TYPE)
1359  prot = states[state_number]
1360 
1361  # get transformation aligning coordinates of
1362  # requested tuples to the first RMF file
1363  if cnt == 0:
1364  coords_f1 = alignment_coordinates[cnt]
1365  if cnt > 0:
1366  coords_f2 = alignment_coordinates[cnt]
1367  if coords_f2:
1369  coords_f1, coords_f2)
1370  transformation = Ali.align()[1]
1371  else:
1372  transformation = \
1374 
1375  rbs = set()
1376  for p in IMP.atom.get_leaves(prot):
1377  if not IMP.core.XYZR.get_is_setup(p):
1379  IMP.core.XYZR(p).set_radius(0.0001)
1380  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1381 
1383  rbm = IMP.core.RigidBodyMember(p)
1384  rb = rbm.get_rigid_body()
1385  rbs.add(rb)
1386  else:
1388  transformation)
1389  for rb in rbs:
1390  IMP.core.transform(rb, transformation)
1391 
1392  o = IMP.pmi.output.Output()
1393  self.model.update()
1394  out_pdb_fn = os.path.join(
1395  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1396  out_rmf_fn = os.path.join(
1397  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1398  o.init_pdb(out_pdb_fn, prot)
1399  tc = write_pdb_with_centered_coordinates
1400  o.write_pdb(out_pdb_fn,
1401  translate_to_geometric_center=tc)
1402 
1403  tmp_dict["local_pdb_file_name"] = \
1404  os.path.basename(out_pdb_fn)
1405  tmp_dict["rmf_file_full_path"] = rmf_name
1406  tmp_dict["local_rmf_file_name"] = \
1407  os.path.basename(out_rmf_fn)
1408  tmp_dict["local_rmf_frame_number"] = 0
1409 
1410  clusstat.write(str(tmp_dict)+"\n")
1411 
1412  # create a single-state System and write that
1414  IMP.Particle(self.model))
1415  h.set_name("System")
1416  h.add_child(prot)
1417  o.init_rmf(out_rmf_fn, [h], rs)
1418 
1419  o.write_rmf(out_rmf_fn)
1420  o.close_rmf(out_rmf_fn)
1421  # add the density
1422  if density_custom_ranges:
1423  DensModule.add_subunits_density(prot)
1424 
1425  if density_custom_ranges:
1426  DensModule.write_mrc(path=dircluster)
1427  del DensModule
1428  return
1429 
1430  # broadcast the coordinates
1431  if self.number_of_processes > 1:
1432  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1433  all_coordinates)
1434  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1435  all_rmf_file_names)
1436  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1437  rmf_file_name_index_dict)
1438  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1439  alignment_coordinates)
1440  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1441  rmsd_coordinates)
1442 
1443  if self.rank == 0:
1444  # save needed information in external files
1445  self.save_objects(
1446  [best_score_feature_keyword_list_dict,
1447  rmf_file_name_index_dict],
1448  ".macro.pkl")
1449 
1450 # ------------------------------------------------------------------------
1451 # Calculate distance matrix and cluster
1452 # ------------------------------------------------------------------------
1453  print("setup clustering class")
1454  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1455 
1456  for n, model_coordinate_dict in enumerate(all_coordinates):
1457  # let's try to align
1458  if (alignment_components is not None
1459  and len(self.cluster_obj.all_coords) == 0):
1460  # set the first model as template coordinates
1461  self.cluster_obj.set_template(alignment_coordinates[n])
1462  self.cluster_obj.fill(all_rmf_file_names[n],
1463  rmsd_coordinates[n])
1464  print("Global calculating the distance matrix")
1465 
1466  # calculate distance matrix, all against all
1467  self.cluster_obj.dist_matrix()
1468 
1469  # perform clustering and optionally display
1470  if self.rank == 0:
1471  self.cluster_obj.do_cluster(number_of_clusters)
1472  if display_plot:
1473  if self.rank == 0:
1474  self.cluster_obj.plot_matrix(
1475  figurename=os.path.join(outputdir,
1476  'dist_matrix.pdf'))
1477  if exit_after_display:
1478  exit()
1479  self.cluster_obj.save_distance_matrix_file(
1480  file_name=distance_matrix_file)
1481 
1482 # ------------------------------------------------------------------------
1483 # Alternatively, load the distance matrix from file and cluster that
1484 # ------------------------------------------------------------------------
1485  else:
1486  if self.rank == 0:
1487  print("setup clustering class")
1488  self.cluster_obj = IMP.pmi.analysis.Clustering()
1489  self.cluster_obj.load_distance_matrix_file(
1490  file_name=distance_matrix_file)
1491  print("clustering with %s clusters" % str(number_of_clusters))
1492  self.cluster_obj.do_cluster(number_of_clusters)
1493  [best_score_feature_keyword_list_dict,
1494  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1495  if display_plot:
1496  if self.rank == 0:
1497  self.cluster_obj.plot_matrix(figurename=os.path.join(
1498  outputdir, 'dist_matrix.pdf'))
1499  if exit_after_display:
1500  exit()
1501  if self.number_of_processes > 1:
1502  self.comm.Barrier()
1503 
1504 # ------------------------------------------------------------------------
1505 # now save all information about the clusters
1506 # ------------------------------------------------------------------------
1507 
1508  if self.rank == 0:
1509  print(self.cluster_obj.get_cluster_labels())
1510  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1511  print("rank %s " % str(self.rank))
1512  print("cluster %s " % str(n))
1513  print("cluster label %s " % str(cl))
1514  print(self.cluster_obj.get_cluster_label_names(cl))
1515  cluster_size = \
1516  len(self.cluster_obj.get_cluster_label_names(cl))
1517  cluster_prov = \
1518  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1519 
1520  # first initialize the Density class if requested
1521  if density_custom_ranges:
1522  DensModule = IMP.pmi.analysis.GetModelDensity(
1523  density_custom_ranges,
1524  voxel=voxel_size)
1525 
1526  dircluster = outputdir + "/cluster." + str(n) + "/"
1527  try:
1528  os.mkdir(dircluster)
1529  except: # noqa: E722
1530  pass
1531 
1532  rmsd_dict = {
1533  "AVERAGE_RMSD":
1534  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1535  clusstat = open(dircluster + "stat.out", "w")
1536  for k, structure_name in enumerate(
1537  self.cluster_obj.get_cluster_label_names(cl)):
1538  # extract the features
1539  tmp_dict = {}
1540  tmp_dict.update(rmsd_dict)
1541  index = rmf_file_name_index_dict[structure_name]
1542  for key in best_score_feature_keyword_list_dict:
1543  tmp_dict[
1544  key] = best_score_feature_keyword_list_dict[
1545  key][
1546  index]
1547 
1548  # get the rmf name and the frame number from the list of
1549  # frame names
1550  rmf_name = structure_name.split("|")[0]
1551  rmf_frame_number = int(structure_name.split("|")[1])
1552  clusstat.write(str(tmp_dict) + "\n")
1553 
1554  # extract frame (open or link to existing)
1555  if k == 0:
1556  prots, rs = \
1557  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1558  self.model, rmf_frame_number, rmf_name)
1559  else:
1560  linking_successful = \
1561  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1562  self.model, prots, rs, rmf_frame_number,
1563  rmf_name)
1564  if not linking_successful:
1565  continue
1566  if not prots:
1567  continue
1568 
1569  states = IMP.atom.get_by_type(
1570  prots[0], IMP.atom.STATE_TYPE)
1571  prot = states[state_number]
1572  if k == 0:
1573  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1574 
1575  # transform clusters onto first
1576  if k > 0:
1577  co = self.cluster_obj
1578  model_index = co.get_model_index_from_name(
1579  structure_name)
1580  transformation = co.get_transformation_to_first_member(
1581  cl, model_index)
1582  rbs = set()
1583  for p in IMP.atom.get_leaves(prot):
1584  if not IMP.core.XYZR.get_is_setup(p):
1586  IMP.core.XYZR(p).set_radius(0.0001)
1587  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1588 
1590  rbm = IMP.core.RigidBodyMember(p)
1591  rb = rbm.get_rigid_body()
1592  rbs.add(rb)
1593  else:
1595  transformation)
1596  for rb in rbs:
1597  IMP.core.transform(rb, transformation)
1598 
1599  # add the density
1600  if density_custom_ranges:
1601  DensModule.add_subunits_density(prot)
1602 
1603  # pdb writing should be optimized!
1604  o = IMP.pmi.output.Output()
1605  self.model.update()
1606  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1607  o.write_pdb(dircluster + str(k) + ".pdb")
1608 
1609  # create a single-state System and write that
1611  IMP.Particle(self.model))
1612  h.set_name("System")
1613  h.add_child(prot)
1614  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1615  o.write_rmf(dircluster + str(k) + ".rmf3")
1616  o.close_rmf(dircluster + str(k) + ".rmf3")
1617 
1618  del o
1619  # IMP.atom.destroy(prot)
1620 
1621  if density_custom_ranges:
1622  DensModule.write_mrc(path=dircluster)
1623  del DensModule
1624 
1625  if self.number_of_processes > 1:
1626  self.comm.Barrier()
1627 
1628  def get_cluster_rmsd(self, cluster_num):
1629  if self.cluster_obj is None:
1630  raise Exception("Run clustering first")
1631  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1632 
1633  def save_objects(self, objects, file_name):
1634  import pickle
1635  with open(file_name, 'wb') as outf:
1636  pickle.dump(objects, outf)
1637 
1638  def load_objects(self, file_name):
1639  import pickle
1640  with open(file_name, 'rb') as inputf:
1641  objects = pickle.load(inputf)
1642  return objects
1643 
1644 
1646 
1647  """
1648  This class contains analysis utilities to investigate ReplicaExchange
1649  results.
1650  """
1651 
1652  ########################
1653  # Construction and Setup
1654  ########################
1655 
1656  def __init__(self, model, stat_files, best_models=None, score_key=None,
1657  alignment=True):
1658  """
1659  Construction of the Class.
1660  @param model IMP.Model()
1661  @param stat_files list of string. Can be ascii stat files,
1662  rmf files names
1663  @param best_models Integer. Number of best scoring models,
1664  if None: all models will be read
1665  @param score_key Use the provided stat key keyword as the score
1666  (by default, the total score is used)
1667  @param alignment boolean (Default=True). Align before computing
1668  the rmsd.
1669  """
1670 
1671  self.model = model
1672  self.best_models = best_models
1674  model, stat_files, self.best_models, score_key, cache=True)
1676  StatHierarchyHandler=self.stath0)
1677 
1678  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1680  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1682  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1683  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1684  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1685  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1686  self.clusters = []
1687  # fill the cluster list with a single cluster containing all models
1688  c = IMP.pmi.output.Cluster(0)
1689  self.clusters.append(c)
1690  for n0 in range(len(self.stath0)):
1691  c.add_member(n0)
1692  self.pairwise_rmsd = {}
1693  self.pairwise_molecular_assignment = {}
1694  self.alignment = alignment
1695  self.symmetric_molecules = {}
1696  self.issymmetricsel = {}
1697  self.update_seldicts()
1698  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1699  IMP.atom.get_leaves(self.stath0))
1700  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1701  IMP.atom.get_leaves(self.stath1))
1702 
1703  def set_rmsd_selection(self, **kwargs):
1704  """
1705  Setup the selection onto which the rmsd is computed
1706  @param kwargs use IMP.atom.Selection keywords
1707  """
1708  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1709  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1710  self.update_seldicts()
1711 
1712  def set_symmetric(self, molecule_name):
1713  """
1714  Store names of symmetric molecules
1715  """
1716  self.symmetric_molecules[molecule_name] = 0
1717  self.update_seldicts()
1718 
1719  def set_alignment_selection(self, **kwargs):
1720  """
1721  Setup the selection onto which the alignment is computed
1722  @param kwargs use IMP.atom.Selection keywords
1723  """
1724  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1725  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1726 
1727  ######################
1728  # Clustering functions
1729  ######################
1730  def clean_clusters(self):
1731  for c in self.clusters:
1732  del c
1733  self.clusters = []
1734 
1735  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1736  """
1737  Cluster the models based on RMSD.
1738  @param rmsd_cutoff Float the distance cutoff in Angstrom
1739  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1740  be used to compute rmsds
1741  """
1742  self.clean_clusters()
1743  not_clustered = set(range(len(self.stath1)))
1744  while len(not_clustered) > 0:
1745  self.aggregate(not_clustered, rmsd_cutoff, metric)
1746  self.update_clusters()
1747 
1748  def refine(self, rmsd_cutoff=10):
1749  """
1750  Refine the clusters by merging the ones whose centers are close
1751  @param rmsd_cutoff cutoff distance in Angstorms
1752  """
1753  clusters_copy = self.clusters
1754  for c0, c1 in itertools.combinations(self.clusters, 2):
1755  if c0.center_index is None:
1756  self.compute_cluster_center(c0)
1757  if c1.center_index is None:
1758  self.compute_cluster_center(c1)
1759  _ = self.stath0[c0.center_index]
1760  _ = self.stath1[c1.center_index]
1761  rmsd, molecular_assignment = self.rmsd()
1762  if rmsd <= rmsd_cutoff:
1763  if c1 in self.clusters:
1764  clusters_copy.remove(c1)
1765  c0 += c1
1766  self.clusters = clusters_copy
1767  self.update_clusters()
1768 
1769  ####################
1770  # Input Output
1771  ####################
1772 
1773  def set_cluster_assignments(self, cluster_ids):
1774  if len(cluster_ids) != len(self.stath0):
1775  raise ValueError('cluster ids has to be same length as '
1776  'number of frames')
1777 
1778  self.clusters = []
1779  for i in sorted(list(set(cluster_ids))):
1780  self.clusters.append(IMP.pmi.output.Cluster(i))
1781  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1782  self.clusters[idx].add_member(i, d)
1783 
1784  def get_cluster_data(self, cluster):
1785  """
1786  Return the model data from a cluster
1787  @param cluster IMP.pmi.output.Cluster object
1788  """
1789  data = []
1790  for m in cluster:
1791  data.append(m)
1792  return data
1793 
1794  def save_data(self, filename='data.pkl'):
1795  """
1796  Save the data for the whole models into a pickle file
1797  @param filename string
1798  """
1799  self.stath0.save_data(filename)
1800 
1801  def set_data(self, data):
1802  """
1803  Set the data from an external IMP.pmi.output.Data
1804  @param data IMP.pmi.output.Data
1805  """
1806  self.stath0.data = data
1807  self.stath1.data = data
1808 
1809  def load_data(self, filename='data.pkl'):
1810  """
1811  Load the data from an external pickled file
1812  @param filename string
1813  """
1814  self.stath0.load_data(filename)
1815  self.stath1.load_data(filename)
1816  self.best_models = len(self.stath0)
1817 
1818  def add_cluster(self, rmf_name_list):
1819  c = IMP.pmi.output.Cluster(len(self.clusters))
1820  print("creating cluster index "+str(len(self.clusters)))
1821  self.clusters.append(c)
1822  current_len = len(self.stath0)
1823 
1824  for rmf in rmf_name_list:
1825  print("adding rmf "+rmf)
1826  self.stath0.add_stat_file(rmf)
1827  self.stath1.add_stat_file(rmf)
1828 
1829  for n0 in range(current_len, len(self.stath0)):
1830  d0 = self.stath0[n0]
1831  c.add_member(n0, d0)
1832  self.update_clusters()
1833 
1834  def save_clusters(self, filename='clusters.pkl'):
1835  """
1836  Save the clusters into a pickle file
1837  @param filename string
1838  """
1839  import pickle
1840  with open(filename, 'wb') as fl:
1841  pickle.dump(self.clusters, fl)
1842 
1843  def load_clusters(self, filename='clusters.pkl', append=False):
1844  """
1845  Load the clusters from a pickle file
1846  @param filename string
1847  @param append bool (Default=False), if True. append the clusters
1848  to the ones currently present
1849  """
1850  import pickle
1851  self.clean_clusters()
1852  with open(filename, 'rb') as fl:
1853  if append:
1854  self.clusters += pickle.load(fl)
1855  else:
1856  self.clusters = pickle.load(fl)
1857  self.update_clusters()
1858 
1859  ####################
1860  # Analysis Functions
1861  ####################
1862 
1863  def compute_cluster_center(self, cluster):
1864  """
1865  Compute the cluster center for a given cluster
1866  """
1867  member_distance = defaultdict(float)
1868 
1869  for n0, n1 in itertools.combinations(cluster.members, 2):
1870  _ = self.stath0[n0]
1871  _ = self.stath1[n1]
1872  rmsd, _ = self.rmsd()
1873  member_distance[n0] += rmsd
1874 
1875  if len(member_distance) > 0:
1876  cluster.center_index = min(member_distance,
1877  key=member_distance.get)
1878  else:
1879  cluster.center_index = cluster.members[0]
1880 
1881  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1882  prefix="./"):
1883  """
1884  Save the coordinates of the current cluster a single rmf file
1885  """
1886  print("saving coordinates", cluster)
1887  if self.alignment:
1888  self.set_reference(reference, cluster)
1889  o = IMP.pmi.output.Output()
1890  if rmf_name is None:
1891  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1892 
1893  _ = self.stath1[cluster.members[0]]
1894  self.model.update()
1895  o.init_rmf(rmf_name, [self.stath1])
1896  for n1 in cluster.members:
1897  _ = self.stath1[n1]
1898  self.model.update()
1900  if self.alignment:
1901  self.align()
1902  o.write_rmf(rmf_name)
1904  o.close_rmf(rmf_name)
1905 
1906  def prune_redundant_structures(self, rmsd_cutoff=10):
1907  """
1908  remove structures that are similar
1909  append it to a new cluster
1910  """
1911  print("pruning models")
1912  selected = 0
1913  filtered = [selected]
1914  remaining = range(1, len(self.stath1), 10)
1915 
1916  while len(remaining) > 0:
1917  d0 = self.stath0[selected]
1918  rm = []
1919  for n1 in remaining:
1920  _ = self.stath1[n1]
1921  if self.alignment:
1922  self.align()
1923  d, _ = self.rmsd()
1924  if d <= rmsd_cutoff:
1925  rm.append(n1)
1926  print("pruning model %s, similar to model %s, rmsd %s"
1927  % (str(n1), str(selected), str(d)))
1928  remaining = [x for x in remaining if x not in rm]
1929  if len(remaining) == 0:
1930  break
1931  selected = remaining[0]
1932  filtered.append(selected)
1933  remaining.pop(0)
1934  c = IMP.pmi.output.Cluster(len(self.clusters))
1935  self.clusters.append(c)
1936  for n0 in filtered:
1937  d0 = self.stath0[n0]
1938  c.add_member(n0, d0)
1939  self.update_clusters()
1940 
1941  def precision(self, cluster):
1942  """
1943  Compute the precision of a cluster
1944  """
1945  npairs = 0
1946  rmsd = 0.0
1947  precision = None
1948 
1949  if cluster.center_index is not None:
1950  members1 = [cluster.center_index]
1951  else:
1952  members1 = cluster.members
1953 
1954  for n0 in members1:
1955  _ = self.stath0[n0]
1956  for n1 in cluster.members:
1957  if n0 != n1:
1958  npairs += 1
1959  _ = self.stath1[n1]
1961  tmp_rmsd, _ = self.rmsd()
1962  rmsd += tmp_rmsd
1964 
1965  if npairs > 0:
1966  precision = rmsd/npairs
1967  cluster.precision = precision
1968  return precision
1969 
1970  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1971  """
1972  Compute the bipartite precision (ie the cross-precision)
1973  between two clusters
1974  """
1975  npairs = 0
1976  rmsd = 0.0
1977  for cn0, n0 in enumerate(cluster1.members):
1978  _ = self.stath0[n0]
1979  for cn1, n1 in enumerate(cluster2.members):
1980  _ = self.stath1[n1]
1981  tmp_rmsd, _ = self.rmsd()
1982  if verbose:
1983  print("--- rmsd between structure %s and structure "
1984  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1985  rmsd += tmp_rmsd
1986  npairs += 1
1987  precision = rmsd/npairs
1988  return precision
1989 
1990  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1991  cluster_ref=None, step=1):
1992  """
1993  Compute the Root mean square fluctuations
1994  of a molecule in a cluster
1995  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1996  residue indexes and the value is the rmsf
1997  """
1998  rmsf = IMP.pmi.tools.OrderedDict()
1999 
2000  # assumes that residue indexes are identical for stath0 and stath1
2001  if cluster_ref is not None:
2002  if cluster_ref.center_index is not None:
2003  members0 = [cluster_ref.center_index]
2004  else:
2005  members0 = cluster_ref.members
2006  else:
2007  if cluster.center_index is not None:
2008  members0 = [cluster.center_index]
2009  else:
2010  members0 = cluster.members
2011 
2012  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
2013  copy_index=copy_index, state_index=state_index)
2014  ps0 = s0.get_selected_particles()
2015  # get the residue indexes
2016  residue_indexes = list(IMP.pmi.tools.OrderedSet(
2017  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
2018 
2019  # get the corresponding particles
2020  npairs = 0
2021  for n0 in members0:
2022  d0 = self.stath0[n0]
2023  for n1 in cluster.members[::step]:
2024  if n0 != n1:
2025  print("--- rmsf %s %s" % (str(n0), str(n1)))
2027 
2028  s1 = IMP.atom.Selection(
2029  self.stath1, molecule=molecule,
2030  residue_indexes=residue_indexes, resolution=1,
2031  copy_index=copy_index, state_index=state_index)
2032  ps1 = s1.get_selected_particles()
2033 
2034  d1 = self.stath1[n1]
2035  if self.alignment:
2036  self.align()
2037  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
2038  r = residue_indexes[n]
2039  d0 = IMP.core.XYZ(p0)
2040  d1 = IMP.core.XYZ(p1)
2041  if r in rmsf:
2042  rmsf[r] += IMP.core.get_distance(d0, d1)
2043  else:
2044  rmsf[r] = IMP.core.get_distance(d0, d1)
2045  npairs += 1
2047  for r in rmsf:
2048  rmsf[r] /= npairs
2049 
2050  for stath in [self.stath0, self.stath1]:
2051  if molecule not in self.symmetric_molecules:
2052  s = IMP.atom.Selection(
2053  stath, molecule=molecule, residue_index=r,
2054  resolution=1, copy_index=copy_index,
2055  state_index=state_index)
2056  else:
2057  s = IMP.atom.Selection(
2058  stath, molecule=molecule, residue_index=r,
2059  resolution=1, state_index=state_index)
2060 
2061  ps = s.get_selected_particles()
2062  for p in ps:
2064  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2065  else:
2067 
2068  return rmsf
2069 
2070  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2071  reference="Absolute", prefix="./", step=1):
2072  if self.alignment:
2073  self.set_reference(reference, cluster)
2074  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2075  voxel=voxel_size)
2076 
2077  for n1 in cluster.members[::step]:
2078  print("density "+str(n1))
2079  _ = self.stath1[n1]
2081  if self.alignment:
2082  self.align()
2083  dens.add_subunits_density(self.stath1)
2085  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2086  del dens
2087 
2088  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2089  consolidate=False, molecules=None, prefix='./',
2090  reference="Absolute"):
2091  if self.alignment:
2092  self.set_reference(reference, cluster)
2093  import numpy as np
2094  import matplotlib.pyplot as plt
2095  import matplotlib.cm as cm
2096  from scipy.spatial.distance import cdist
2097  import IMP.pmi.topology
2098  if molecules is None:
2100  for mol in IMP.pmi.tools.get_molecules(
2101  IMP.atom.get_leaves(self.stath1))]
2102  else:
2104  for mol in IMP.pmi.tools.get_molecules(
2106  self.stath1,
2107  molecules=molecules).get_selected_particles())]
2108  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2109  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2110  total_len_unique = sum(max(mol.get_residue_indexes())
2111  for mol in unique_copies)
2112 
2113  index_dict = {}
2114  prev_stop = 0
2115 
2116  if not consolidate:
2117  for mol in mols:
2118  seqlen = max(mol.get_residue_indexes())
2119  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2120  prev_stop += seqlen
2121 
2122  else:
2123  for mol in unique_copies:
2124  seqlen = max(mol.get_residue_indexes())
2125  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2126  prev_stop += seqlen
2127 
2128  for ncl, n1 in enumerate(cluster.members):
2129  print(ncl)
2130  _ = self.stath1[n1]
2131  coord_dict = IMP.pmi.tools.OrderedDict()
2132  for mol in mols:
2133  rindexes = mol.get_residue_indexes()
2134  coords = np.ones((max(rindexes), 3))
2135  for rnum in rindexes:
2136  sel = IMP.atom.Selection(mol, residue_index=rnum,
2137  resolution=1)
2138  selpart = sel.get_selected_particles()
2139  if len(selpart) == 0:
2140  continue
2141  selpart = selpart[0]
2142  coords[rnum - 1, :] = \
2143  IMP.core.XYZ(selpart).get_coordinates()
2144  coord_dict[mol] = coords
2145 
2146  if not consolidate:
2147  coords = np.concatenate(list(coord_dict.values()))
2148  dists = cdist(coords, coords)
2149  binary_dists = np.where((dists <= contact_threshold)
2150  & (dists >= 1.0), 1.0, 0.0)
2151  else:
2152  binary_dists_dict = {}
2153  for mol1 in mols:
2154  len1 = max(mol1.get_residue_indexes())
2155  for mol2 in mols:
2156  name1 = mol1.get_name()
2157  name2 = mol2.get_name()
2158  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2159  if (name1, name2) not in binary_dists_dict:
2160  binary_dists_dict[(name1, name2)] = \
2161  np.zeros((len1, len1))
2162  binary_dists_dict[(name1, name2)] += \
2163  np.where((dists <= contact_threshold)
2164  & (dists >= 1.0), 1.0, 0.0)
2165  binary_dists = np.zeros((total_len_unique, total_len_unique))
2166 
2167  for name1, name2 in binary_dists_dict:
2168  r1 = index_dict[mol_names_unique[name1]]
2169  r2 = index_dict[mol_names_unique[name2]]
2170  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2171  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2172  1.0, 0.0)
2173 
2174  if ncl == 0:
2175  dist_maps = [dists]
2176  av_dist_map = dists
2177  contact_freqs = binary_dists
2178  else:
2179  dist_maps.append(dists)
2180  av_dist_map += dists
2181  contact_freqs += binary_dists
2182 
2183  if log_scale:
2184  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2185  else:
2186  contact_freqs = 1.0/len(cluster)*contact_freqs
2187  av_dist_map = 1.0/len(cluster)*contact_freqs
2188 
2189  fig = plt.figure(figsize=(100, 100))
2190  ax = fig.add_subplot(111)
2191  ax.set_xticks([])
2192  ax.set_yticks([])
2193  gap_between_components = 50
2194  colormap = cm.Blues
2195  colornorm = None
2196 
2197  if not consolidate:
2198  sorted_tuple = sorted(
2200  mol).get_extended_name(), mol) for mol in mols)
2201  prot_list = list(zip(*sorted_tuple))[1]
2202  else:
2203  sorted_tuple = sorted(
2204  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2205  for mol in unique_copies)
2206  prot_list = list(zip(*sorted_tuple))[1]
2207 
2208  prot_listx = prot_list
2209  nresx = gap_between_components + \
2210  sum([max(mol.get_residue_indexes())
2211  + gap_between_components for mol in prot_listx])
2212 
2213  # set the list of proteins on the y axis
2214  prot_listy = prot_list
2215  nresy = gap_between_components + \
2216  sum([max(mol.get_residue_indexes())
2217  + gap_between_components for mol in prot_listy])
2218 
2219  # this is the residue offset for each protein
2220  resoffsetx = {}
2221  resendx = {}
2222  res = gap_between_components
2223  for mol in prot_listx:
2224  resoffsetx[mol] = res
2225  res += max(mol.get_residue_indexes())
2226  resendx[mol] = res
2227  res += gap_between_components
2228 
2229  resoffsety = {}
2230  resendy = {}
2231  res = gap_between_components
2232  for mol in prot_listy:
2233  resoffsety[mol] = res
2234  res += max(mol.get_residue_indexes())
2235  resendy[mol] = res
2236  res += gap_between_components
2237 
2238  resoffsetdiagonal = {}
2239  res = gap_between_components
2240  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2241  resoffsetdiagonal[mol] = res
2242  res += max(mol.get_residue_indexes())
2243  res += gap_between_components
2244 
2245  # plot protein boundaries
2246  xticks = []
2247  xlabels = []
2248  for n, prot in enumerate(prot_listx):
2249  res = resoffsetx[prot]
2250  end = resendx[prot]
2251  for proty in prot_listy:
2252  resy = resoffsety[proty]
2253  endy = resendy[proty]
2254  ax.plot([res, res], [resy, endy], linestyle='-',
2255  color='gray', lw=0.4)
2256  ax.plot([end, end], [resy, endy], linestyle='-',
2257  color='gray', lw=0.4)
2258  xticks.append((float(res) + float(end)) / 2)
2260  prot).get_extended_name())
2261 
2262  yticks = []
2263  ylabels = []
2264  for n, prot in enumerate(prot_listy):
2265  res = resoffsety[prot]
2266  end = resendy[prot]
2267  for protx in prot_listx:
2268  resx = resoffsetx[protx]
2269  endx = resendx[protx]
2270  ax.plot([resx, endx], [res, res], linestyle='-',
2271  color='gray', lw=0.4)
2272  ax.plot([resx, endx], [end, end], linestyle='-',
2273  color='gray', lw=0.4)
2274  yticks.append((float(res) + float(end)) / 2)
2276  prot).get_extended_name())
2277 
2278  # plot the contact map
2279 
2280  tmp_array = np.zeros((nresx, nresy))
2281  ret = {}
2282  for px in prot_listx:
2283  for py in prot_listy:
2284  resx = resoffsetx[px]
2285  lengx = resendx[px] - 1
2286  resy = resoffsety[py]
2287  lengy = resendy[py] - 1
2288  indexes_x = index_dict[px]
2289  minx = min(indexes_x)
2290  maxx = max(indexes_x)
2291  indexes_y = index_dict[py]
2292  miny = min(indexes_y)
2293  maxy = max(indexes_y)
2294  tmp_array[resx:lengx, resy:lengy] = \
2295  contact_freqs[minx:maxx, miny:maxy]
2296  ret[(px, py)] = np.argwhere(
2297  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2298 
2299  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2300  origin='lower', alpha=0.6, interpolation='nearest')
2301 
2302  ax.set_xticks(xticks)
2303  ax.set_xticklabels(xlabels, rotation=90)
2304  ax.set_yticks(yticks)
2305  ax.set_yticklabels(ylabels)
2306  plt.setp(ax.get_xticklabels(), fontsize=6)
2307  plt.setp(ax.get_yticklabels(), fontsize=6)
2308 
2309  # display and write to file
2310  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2311  [i.set_linewidth(2.0) for i in ax.spines.values()]
2312 
2313  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2314  dpi=300, transparent="False")
2315  return ret
2316 
2317  def plot_rmsd_matrix(self, filename):
2318  self.compute_all_pairwise_rmsd()
2319  distance_matrix = np.zeros(
2320  (len(self.stath0), len(self.stath1)))
2321  for (n0, n1) in self.pairwise_rmsd:
2322  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2323 
2324  import matplotlib as mpl
2325  mpl.use('Agg')
2326  import matplotlib.pylab as pl
2327  from scipy.cluster import hierarchy as hrc
2328 
2329  fig = pl.figure(figsize=(10, 8))
2330  ax = fig.add_subplot(212)
2331  dendrogram = hrc.dendrogram(
2332  hrc.linkage(distance_matrix),
2333  color_threshold=7,
2334  no_labels=True)
2335  leaves_order = dendrogram['leaves']
2336  ax.set_xlabel('Model')
2337  ax.set_ylabel('RMSD [Angstroms]')
2338 
2339  ax2 = fig.add_subplot(221)
2340  cax = ax2.imshow(
2341  distance_matrix[leaves_order, :][:, leaves_order],
2342  interpolation='nearest')
2343  cb = fig.colorbar(cax)
2344  cb.set_label('RMSD [Angstroms]')
2345  ax2.set_xlabel('Model')
2346  ax2.set_ylabel('Model')
2347 
2348  pl.savefig(filename, dpi=300)
2349  pl.close(fig)
2350 
2351  ####################
2352  # Internal Functions
2353  ####################
2354 
2355  def update_clusters(self):
2356  """
2357  Update the cluster id numbers
2358  """
2359  for n, c in enumerate(self.clusters):
2360  c.cluster_id = n
2361 
2362  def get_molecule(self, hier, name, copy):
2363  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2364  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2365 
2366  def update_seldicts(self):
2367  """
2368  Update the seldicts
2369  """
2370  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2371  self.sel0_rmsd.get_selected_particles())
2372  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2373  self.sel1_rmsd.get_selected_particles())
2374  for mol in self.seldict0:
2375  for sel in self.seldict0[mol]:
2376  self.issymmetricsel[sel] = False
2377  for mol in self.symmetric_molecules:
2378  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2379  for sel in self.seldict0[mol]:
2380  self.issymmetricsel[sel] = True
2381 
2382  def align(self):
2384  self.sel1_alignment, self.sel0_alignment)
2385 
2386  for rb in self.rbs1:
2387  IMP.core.transform(rb, tr)
2388 
2389  for bead in self.beads1:
2390  try:
2391  IMP.core.transform(IMP.core.XYZ(bead), tr)
2392  except: # noqa: E722
2393  continue
2394 
2395  self.model.update()
2396 
2397  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2398  '''
2399  initial filling of the clusters.
2400  '''
2401  n0 = idxs.pop()
2402  print("clustering model "+str(n0))
2403  d0 = self.stath0[n0]
2404  c = IMP.pmi.output.Cluster(len(self.clusters))
2405  print("creating cluster index "+str(len(self.clusters)))
2406  self.clusters.append(c)
2407  c.add_member(n0, d0)
2408  clustered = set([n0])
2409  for n1 in idxs:
2410  print("--- trying to add model " + str(n1) + " to cluster "
2411  + str(len(self.clusters)))
2412  d1 = self.stath1[n1]
2413  if self.alignment:
2414  self.align()
2415  rmsd, _ = self.rmsd(metric=metric)
2416  if rmsd < rmsd_cutoff:
2417  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2418  c.add_member(n1, d1)
2419  clustered.add(n1)
2420  else:
2421  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2422  idxs -= clustered
2423 
2424  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2425  """
2426  merge the clusters that have close members
2427 
2428  @param rmsd_cutoff cutoff distance in Angstorms
2429  @param metric Function to calculate distance between two Selections
2430  (by default, IMP.atom.get_rmsd is used)
2431  """
2432  # before merging, clusters are spheres of radius rmsd_cutoff
2433  # centered on the 1st element
2434  # here we only try to merge clusters whose centers are closer
2435  # than 2*rmsd_cutoff
2436  to_merge = []
2437  print("merging...")
2438  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2439  itertools.combinations(self.clusters, 2)):
2440  n0, n1 = [c.members[0] for c in (c0, c1)]
2441  _ = self.stath0[n0]
2442  _ = self.stath1[n1]
2443  rmsd, _ = self.rmsd()
2444  if (rmsd < 2*rmsd_cutoff and
2445  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2446  to_merge.append((c0, c1))
2447 
2448  for c0, c in reversed(to_merge):
2449  self.merge(c0, c)
2450 
2451  # keep only full clusters
2452  self.clusters = [c for c in
2453  filter(lambda x: len(x.members) > 0, self.clusters)]
2454 
2455  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2456  '''
2457  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2458  '''
2459  print("check close members for clusters " + str(c0.cluster_id) +
2460  " and " + str(c1.cluster_id))
2461  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2462  _ = self.stath0[n0]
2463  _ = self.stath1[n1]
2464  rmsd, _ = self.rmsd(metric=metric)
2465  if rmsd < rmsd_cutoff:
2466  return True
2467 
2468  return False
2469 
2470  def merge(self, c0, c1):
2471  '''
2472  merge two clusters
2473  '''
2474  c0 += c1
2475  c1.members = []
2476  c1.data = {}
2477 
2478  def rmsd_helper(self, sels0, sels1, metric):
2479  '''
2480  a function that returns the permutation best_sel of sels0 that
2481  minimizes metric
2482  '''
2483  best_rmsd2 = float('inf')
2484  best_sel = None
2485  if self.issymmetricsel[sels0[0]]:
2486  # this cases happens when symmetries were defined
2487  N = len(sels0)
2488  for offset in range(N):
2489  sels = [sels0[(offset+i) % N] for i in range(N)]
2490  sel0 = sels[0]
2491  sel1 = sels1[0]
2492  r = metric(sel0, sel1)
2493  rmsd2 = r*r*N
2494  if rmsd2 < best_rmsd2:
2495  best_rmsd2 = rmsd2
2496  best_sel = sels
2497  else:
2498  for sels in itertools.permutations(sels0):
2499  rmsd2 = 0.0
2500  for sel0, sel1 in itertools.takewhile(
2501  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2502  r = metric(sel0, sel1)
2503  rmsd2 += r*r
2504  if rmsd2 < best_rmsd2:
2505  best_rmsd2 = rmsd2
2506  best_sel = sels
2507  return best_sel, best_rmsd2
2508 
2509  def compute_all_pairwise_rmsd(self):
2510  for d0 in self.stath0:
2511  for d1 in self.stath1:
2512  rmsd, _ = self.rmsd()
2513 
2514  def rmsd(self, metric=IMP.atom.get_rmsd):
2515  '''
2516  Computes the RMSD. Resolves ambiguous pairs assignments
2517  '''
2518  # here we memoize the rmsd and molecular assignment so that it's
2519  # not done multiple times
2520  n0 = self.stath0.current_index
2521  n1 = self.stath1.current_index
2522  if ((n0, n1) in self.pairwise_rmsd) \
2523  and ((n0, n1) in self.pairwise_molecular_assignment):
2524  return (self.pairwise_rmsd[(n0, n1)],
2525  self.pairwise_molecular_assignment[(n0, n1)])
2526 
2527  if self.alignment:
2528  self.align()
2529  # if it's not yet memoized
2530  total_rmsd = 0.0
2531  total_N = 0
2532  # this is a dictionary which keys are the molecule names, and values
2533  # are the list of IMP.atom.Selection for all molecules that share
2534  # the molecule name
2535  molecular_assignment = {}
2536  for molname, sels0 in self.seldict0.items():
2537  sels_best_order, best_rmsd2 = \
2538  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2539 
2540  Ncoords = len(sels_best_order[0].get_selected_particles())
2541  Ncopies = len(self.seldict1[molname])
2542  total_rmsd += Ncoords*best_rmsd2
2543  total_N += Ncoords*Ncopies
2544 
2545  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2546  p0 = sel0.get_selected_particles()[0]
2547  p1 = sel1.get_selected_particles()[0]
2548  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2549  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2550  c0 = IMP.atom.Copy(m0).get_copy_index()
2551  c1 = IMP.atom.Copy(m1).get_copy_index()
2552  molecular_assignment[(molname, c0)] = (molname, c1)
2553 
2554  total_rmsd = math.sqrt(total_rmsd/total_N)
2555 
2556  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2557  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2558  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2559  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2560  return total_rmsd, molecular_assignment
2561 
2562  def set_reference(self, reference, cluster):
2563  """
2564  Fix the reference structure for structural alignment, rmsd and
2565  chain assignment
2566 
2567  @param reference can be either "Absolute" (cluster center of the
2568  first cluster) or Relative (cluster center of the current
2569  cluster)
2570  #param cluster the reference IMP.pmi.output.Cluster object
2571  """
2572  if reference == "Absolute":
2573  _ = self.stath0[0]
2574  elif reference == "Relative":
2575  if cluster.center_index:
2576  n0 = cluster.center_index
2577  else:
2578  n0 = cluster.members[0]
2579  _ = self.stath0[n0]
2580 
2582  """
2583  compute the molecular assignments between multiple copies
2584  of the same sequence. It changes the Copy index of Molecules
2585  """
2586  _ = self.stath1[n1]
2587  _, molecular_assignment = self.rmsd()
2588  for (m0, c0), (m1, c1) in molecular_assignment.items():
2589  mol0 = self.molcopydict0[m0][c0]
2590  mol1 = self.molcopydict1[m1][c1]
2591  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2592  p1 = IMP.atom.Copy(mol1).get_particle()
2593  p1.set_value(cik0, c0)
2594 
2596  """
2597  Undo the Copy index assignment
2598  """
2599  _ = self.stath1[n1]
2600  _, molecular_assignment = self.rmsd()
2601  for (m0, c0), (m1, c1) in molecular_assignment.items():
2602  mol0 = self.molcopydict0[m0][c0]
2603  mol1 = self.molcopydict1[m1][c1]
2604  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2605  p1 = IMP.atom.Copy(mol1).get_particle()
2606  p1.set_value(cik0, c1)
2607 
2608  ####################
2609  # Container Functions
2610  ####################
2611 
2612  def __repr__(self):
2613  s = "AnalysisReplicaExchange\n"
2614  s += "---- number of clusters %s \n" % str(len(self.clusters))
2615  s += "---- number of models %s \n" % str(len(self.stath0))
2616  return s
2617 
2618  def __getitem__(self, int_slice_adaptor):
2619  if isinstance(int_slice_adaptor, int):
2620  return self.clusters[int_slice_adaptor]
2621  elif isinstance(int_slice_adaptor, slice):
2622  return self.__iter__(int_slice_adaptor)
2623  else:
2624  raise TypeError("Unknown Type")
2625 
2626  def __len__(self):
2627  return len(self.clusters)
2628 
2629  def __iter__(self, slice_key=None):
2630  if slice_key is None:
2631  for i in range(len(self)):
2632  yield self[i]
2633  else:
2634  for i in range(len(self))[slice_key]:
2635  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2514
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: macros.py:2562
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1843
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: pmi/tools.py:1062
def precision
Compute the precision of a cluster.
Definition: macros.py:1941
CheckLevel get_check_level()
Get the current audit mode.
Definition: exception.h:80
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:961
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: pmi/tools.py:104
A container for models organized into clusters.
Definition: output.py:1556
Sample using molecular dynamics.
Definition: samplers.py:255
def aggregate
initial filling of the clusters.
Definition: macros.py:2397
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:632
A macro to help setup and run replica exchange.
Definition: macros.py:65
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:633
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1906
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1990
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: pmi/tools.py:1159
A helper output for model evaluation.
Miscellaneous utilities.
Definition: pmi/tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1703
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1784
def __init__
Construction of the Class.
Definition: macros.py:1656
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:865
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1801
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2595
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1719
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2478
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1881
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1104
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: macros.py:2581
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1645
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:652
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2424
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:1013
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1863
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1024
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:140
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:21
def update_seldicts
Update the seldicts.
Definition: macros.py:2366
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2355
def scatter_and_gather
Synchronize data over a parallel run.
Definition: pmi/tools.py:542
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1748
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199
Collect timing information.
Definition: pmi/tools.py:119
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1712
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9886
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:70
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2470
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:705
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1304
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1794
Class to handle individual particles of a Model object.
Definition: Particle.h:45
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:876
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1970
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:681
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1735
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1834
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2455
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: pmi/tools.py:1135
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1809
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: pmi/tools.py:499
Sample using replica exchange.
Definition: samplers.py:347
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27