IMP logo
IMP Reference Guide  2.17.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 try:
18  from pathlib import Path
19 except ImportError: # Use bundled pathlib on Python 2 without pathlib
20  from IMP._compat_pathlib import Path
21 import glob
22 from operator import itemgetter
23 from collections import defaultdict
24 import numpy as np
25 import itertools
26 import warnings
27 import math
28 
29 
30 class _RMFRestraints(object):
31  """All restraints that are written out to the RMF file"""
32  def __init__(self, model, user_restraints):
33  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
34  self._user_restraints = user_restraints if user_restraints else []
35 
36  def __len__(self):
37  return (len(self._user_restraints)
38  + self._rmf_rs.get_number_of_restraints())
39 
40  def __bool__(self):
41  return len(self) > 0
42  __nonzero__ = __bool__ # Python 2 compatibility
43 
44  def __getitem__(self, i):
45  class FakePMIWrapper(object):
46  def __init__(self, r):
47  self.r = IMP.RestraintSet.get_from(r)
48 
49  def get_restraint(self):
50  return self.r
51 
52  lenuser = len(self._user_restraints)
53  if 0 <= i < lenuser:
54  return self._user_restraints[i]
55  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
56  r = self._rmf_rs.get_restraint(i - lenuser)
57  return FakePMIWrapper(r)
58  else:
59  raise IndexError("Out of range")
60 
61 
62 class ReplicaExchange0(object):
63  """A macro to help setup and run replica exchange.
64  Supports Monte Carlo and molecular dynamics.
65  Produces trajectory RMF files, best PDB structures,
66  and output stat files.
67  """
68  def __init__(self, model, root_hier,
69  sample_objects=None, # DEPRECATED
70  monte_carlo_sample_objects=None,
71  molecular_dynamics_sample_objects=None,
72  output_objects=[],
73  rmf_output_objects=None,
74  crosslink_restraints=None, # DEPRECATED
75  monte_carlo_temperature=1.0,
76  simulated_annealing=False,
77  simulated_annealing_minimum_temperature=1.0,
78  simulated_annealing_maximum_temperature=2.5,
79  simulated_annealing_minimum_temperature_nframes=100,
80  simulated_annealing_maximum_temperature_nframes=100,
81  replica_exchange_minimum_temperature=1.0,
82  replica_exchange_maximum_temperature=2.5,
83  replica_exchange_swap=True,
84  num_sample_rounds=1,
85  number_of_best_scoring_models=500,
86  monte_carlo_steps=10,
87  self_adaptive=False,
88  molecular_dynamics_steps=10,
89  molecular_dynamics_max_time_step=1.0,
90  number_of_frames=1000,
91  save_coordinates_mode="lowest_temperature",
92  nframes_write_coordinates=1,
93  write_initial_rmf=True,
94  initial_rmf_name_suffix="initial",
95  stat_file_name_suffix="stat",
96  best_pdb_name_suffix="model",
97  mmcif=False,
98  do_clean_first=True,
99  do_create_directories=True,
100  global_output_directory="./",
101  rmf_dir="rmfs/",
102  best_pdb_dir="pdbs/",
103  replica_stat_file_suffix="stat_replica",
104  em_object_for_rmf=None,
105  atomistic=False,
106  replica_exchange_object=None,
107  test_mode=False,
108  score_moved=False):
109  """Constructor.
110  @param model The IMP model
111  @param root_hier Top-level (System)hierarchy
112  @param monte_carlo_sample_objects Objects for MC sampling; a list of
113  structural components (generally the representation) that
114  will be moved and restraints with parameters that need to
115  be sampled.
116  For PMI2: just pass flat list of movers
117  @param molecular_dynamics_sample_objects Objects for MD sampling
118  For PMI2: just pass flat list of particles
119  @param output_objects A list of structural objects and restraints
120  that will be included in output (ie, statistics "stat"
121  files). Any object that provides a get_output() method
122  can be used here. If None is passed
123  the macro will not write stat files.
124  @param rmf_output_objects A list of structural objects and
125  restraints that will be included in rmf. Any object
126  that provides a get_output() method can be used here.
127  @param monte_carlo_temperature MC temp (may need to be optimized
128  based on post-sampling analysis)
129  @param simulated_annealing If True, perform simulated annealing
130  @param simulated_annealing_minimum_temperature Should generally be
131  the same as monte_carlo_temperature.
132  @param simulated_annealing_minimum_temperature_nframes Number of
133  frames to compute at minimum temperature.
134  @param simulated_annealing_maximum_temperature_nframes Number of
135  frames to compute at
136  temps > simulated_annealing_maximum_temperature.
137  @param replica_exchange_minimum_temperature Low temp for REX; should
138  generally be the same as monte_carlo_temperature.
139  @param replica_exchange_maximum_temperature High temp for REX
140  @param replica_exchange_swap Boolean, enable disable temperature
141  swap (Default=True)
142  @param num_sample_rounds Number of rounds of MC/MD per cycle
143  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
144  models to keep around for analysis.
145  @param mmcif If True, write best scoring models in mmCIF format;
146  if False (the default), write in legacy PDB format.
147  @param best_pdb_dir The directory under `global_output_directory`
148  where best-scoring PDB/mmCIF files are written.
149  @param best_pdb_name_suffix Part of the file name for best-scoring
150  PDB/mmCIF files.
151  @param monte_carlo_steps Number of MC steps per round
152  @param self_adaptive self adaptive scheme for Monte Carlo movers
153  @param molecular_dynamics_steps Number of MD steps per round
154  @param molecular_dynamics_max_time_step Max time step for MD
155  @param number_of_frames Number of REX frames to run
156  @param save_coordinates_mode string: how to save coordinates.
157  "lowest_temperature" (default) only the lowest temperatures
158  is saved
159  "25th_score" all replicas whose score is below the 25th
160  percentile
161  "50th_score" all replicas whose score is below the 50th
162  percentile
163  "75th_score" all replicas whose score is below the 75th
164  percentile
165  @param nframes_write_coordinates How often to write the coordinates
166  of a frame
167  @param write_initial_rmf Write the initial configuration
168  @param global_output_directory Folder that will be created to house
169  output.
170  @param test_mode Set to True to avoid writing any files, just test
171  one frame.
172  @param score_moved If True, attempt to speed up Monte Carlo
173  sampling by caching scoring function terms on particles
174  that didn't move.
175  """
176  self.model = model
177  self.vars = {}
178 
179  # add check hierarchy is multistate
180  self.output_objects = output_objects
181  self.rmf_output_objects = rmf_output_objects
182  if (isinstance(root_hier, IMP.atom.Hierarchy)
183  and root_hier.get_name() == 'System'):
184  if self.output_objects is not None:
185  self.output_objects.append(
186  IMP.pmi.io.TotalScoreOutput(self.model))
187  if self.rmf_output_objects is not None:
188  self.rmf_output_objects.append(
189  IMP.pmi.io.TotalScoreOutput(self.model))
190  self.root_hier = root_hier
191  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
192  self.vars["number_of_states"] = len(states)
193  if len(states) > 1:
194  self.root_hiers = states
195  self.is_multi_state = True
196  else:
197  self.root_hier = root_hier
198  self.is_multi_state = False
199  else:
200  raise TypeError("Must provide System hierarchy (root_hier)")
201 
202  if crosslink_restraints:
204  "crosslink_restraints is deprecated and is ignored; "
205  "all cross-link restraints should be automatically "
206  "added to output RMF files")
207  self._rmf_restraints = _RMFRestraints(model, None)
208  self.em_object_for_rmf = em_object_for_rmf
209  self.monte_carlo_sample_objects = monte_carlo_sample_objects
210  self.vars["self_adaptive"] = self_adaptive
211  if sample_objects is not None:
213  "sample_objects is deprecated; use monte_carlo_sample_objects "
214  "(or molecular_dynamics_sample_objects) instead")
215  self.monte_carlo_sample_objects += sample_objects
216  self.molecular_dynamics_sample_objects = \
217  molecular_dynamics_sample_objects
218  self.replica_exchange_object = replica_exchange_object
219  self.molecular_dynamics_max_time_step = \
220  molecular_dynamics_max_time_step
221  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
222  self.vars["replica_exchange_minimum_temperature"] = \
223  replica_exchange_minimum_temperature
224  self.vars["replica_exchange_maximum_temperature"] = \
225  replica_exchange_maximum_temperature
226  self.vars["replica_exchange_swap"] = replica_exchange_swap
227  self.vars["simulated_annealing"] = simulated_annealing
228  self.vars["simulated_annealing_minimum_temperature"] = \
229  simulated_annealing_minimum_temperature
230  self.vars["simulated_annealing_maximum_temperature"] = \
231  simulated_annealing_maximum_temperature
232  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
233  simulated_annealing_minimum_temperature_nframes
234  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
235  simulated_annealing_maximum_temperature_nframes
236 
237  self.vars["num_sample_rounds"] = num_sample_rounds
238  self.vars[
239  "number_of_best_scoring_models"] = number_of_best_scoring_models
240  self.vars["monte_carlo_steps"] = monte_carlo_steps
241  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
242  self.vars["number_of_frames"] = number_of_frames
243  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
244  "50th_score", "75th_score"):
245  raise Exception("save_coordinates_mode has unrecognized value")
246  else:
247  self.vars["save_coordinates_mode"] = save_coordinates_mode
248  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
249  self.vars["write_initial_rmf"] = write_initial_rmf
250  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
251  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
252  self.vars["mmcif"] = mmcif
253  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
254  self.vars["do_clean_first"] = do_clean_first
255  self.vars["do_create_directories"] = do_create_directories
256  self.vars["global_output_directory"] = global_output_directory
257  self.vars["rmf_dir"] = rmf_dir
258  self.vars["best_pdb_dir"] = best_pdb_dir
259  self.vars["atomistic"] = atomistic
260  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
261  self.vars["geometries"] = None
262  self.test_mode = test_mode
263  self.score_moved = score_moved
264 
265  def add_geometries(self, geometries):
266  if self.vars["geometries"] is None:
267  self.vars["geometries"] = list(geometries)
268  else:
269  self.vars["geometries"].extend(geometries)
270 
271  def show_info(self):
272  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, "
273  "rmfs/*.rmf3 for each replica ")
274  print("--- it stores the best scoring pdb models in pdbs/")
275  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
276  "lowest temperature")
277  print("--- variables:")
278  keys = list(self.vars.keys())
279  keys.sort()
280  for v in keys:
281  print("------", v.ljust(30), self.vars[v])
282 
283  def get_replica_exchange_object(self):
284  return self.replica_exchange_object
285 
286  def _add_provenance(self, sampler_md, sampler_mc):
287  """Record details about the sampling in the IMP Hierarchies"""
288  iterations = 0
289  if sampler_md:
290  method = "Molecular Dynamics"
291  iterations += self.vars["molecular_dynamics_steps"]
292  if sampler_mc:
293  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
294  iterations += self.vars["monte_carlo_steps"]
295  # If no sampling is actually done, no provenance to write
296  if iterations == 0 or self.vars["number_of_frames"] == 0:
297  return
298  iterations *= self.vars["num_sample_rounds"]
299 
300  pi = self.model.add_particle("sampling")
302  self.model, pi, method, self.vars["number_of_frames"],
303  iterations)
304  p.set_number_of_replicas(
305  self.replica_exchange_object.get_number_of_replicas())
306  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
307  IMP.core.add_provenance(self.model, self.root_hier, p)
308 
309  def execute_macro(self):
310  temp_index_factor = 100000.0
311  samplers = []
312  sampler_mc = None
313  sampler_md = None
314  if self.monte_carlo_sample_objects is not None:
315  print("Setting up MonteCarlo")
316  sampler_mc = IMP.pmi.samplers.MonteCarlo(
317  self.model, self.monte_carlo_sample_objects,
318  self.vars["monte_carlo_temperature"],
319  score_moved=self.score_moved)
320  if self.vars["simulated_annealing"]:
321  tmin = self.vars["simulated_annealing_minimum_temperature"]
322  tmax = self.vars["simulated_annealing_maximum_temperature"]
323  nfmin = self.vars[
324  "simulated_annealing_minimum_temperature_nframes"]
325  nfmax = self.vars[
326  "simulated_annealing_maximum_temperature_nframes"]
327  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
328  if self.vars["self_adaptive"]:
329  sampler_mc.set_self_adaptive(
330  isselfadaptive=self.vars["self_adaptive"])
331  if self.output_objects is not None:
332  self.output_objects.append(sampler_mc)
333  if self.rmf_output_objects is not None:
334  self.rmf_output_objects.append(sampler_mc)
335  samplers.append(sampler_mc)
336 
337  if self.molecular_dynamics_sample_objects is not None:
338  print("Setting up MolecularDynamics")
340  self.model, self.molecular_dynamics_sample_objects,
341  self.vars["monte_carlo_temperature"],
342  maximum_time_step=self.molecular_dynamics_max_time_step)
343  if self.vars["simulated_annealing"]:
344  tmin = self.vars["simulated_annealing_minimum_temperature"]
345  tmax = self.vars["simulated_annealing_maximum_temperature"]
346  nfmin = self.vars[
347  "simulated_annealing_minimum_temperature_nframes"]
348  nfmax = self.vars[
349  "simulated_annealing_maximum_temperature_nframes"]
350  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
351  if self.output_objects is not None:
352  self.output_objects.append(sampler_md)
353  if self.rmf_output_objects is not None:
354  self.rmf_output_objects.append(sampler_md)
355  samplers.append(sampler_md)
356 # -------------------------------------------------------------------------
357 
358  print("Setting up ReplicaExchange")
360  self.model, self.vars["replica_exchange_minimum_temperature"],
361  self.vars["replica_exchange_maximum_temperature"], samplers,
362  replica_exchange_object=self.replica_exchange_object)
363  self.replica_exchange_object = rex.rem
364 
365  myindex = rex.get_my_index()
366  if self.output_objects is not None:
367  self.output_objects.append(rex)
368  if self.rmf_output_objects is not None:
369  self.rmf_output_objects.append(rex)
370  # must reset the minimum temperature due to the
371  # different binary length of rem.get_my_parameter double and python
372  # float
373  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
374 
375 # -------------------------------------------------------------------------
376 
377  globaldir = self.vars["global_output_directory"] + "/"
378  rmf_dir = globaldir + self.vars["rmf_dir"]
379  pdb_dir = globaldir + self.vars["best_pdb_dir"]
380 
381  if not self.test_mode:
382  if self.vars["do_clean_first"]:
383  pass
384 
385  if self.vars["do_create_directories"]:
386 
387  try:
388  os.makedirs(globaldir)
389  except: # noqa: E722
390  pass
391  try:
392  os.makedirs(rmf_dir)
393  except: # noqa: E722
394  pass
395 
396  if not self.is_multi_state:
397  try:
398  os.makedirs(pdb_dir)
399  except: # noqa: E722
400  pass
401  else:
402  for n in range(self.vars["number_of_states"]):
403  try:
404  os.makedirs(pdb_dir + "/" + str(n))
405  except: # noqa: E722
406  pass
407 
408 # -------------------------------------------------------------------------
409 
411  if self.output_objects is not None:
412  self.output_objects.append(sw)
413  if self.rmf_output_objects is not None:
414  self.rmf_output_objects.append(sw)
415 
416  print("Setting up stat file")
417  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
418  low_temp_stat_file = globaldir + \
419  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
420 
421  # Ensure model is updated before saving init files
422  if not self.test_mode:
423  self.model.update()
424 
425  if not self.test_mode:
426  if self.output_objects is not None:
427  output.init_stat2(low_temp_stat_file,
428  self.output_objects,
429  extralabels=["rmf_file", "rmf_frame_index"])
430  else:
431  print("Stat file writing is disabled")
432 
433  if self.rmf_output_objects is not None:
434  print("Stat info being written in the rmf file")
435 
436  print("Setting up replica stat file")
437  replica_stat_file = globaldir + \
438  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
439  if not self.test_mode:
440  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
441 
442  if not self.test_mode:
443  print("Setting up best pdb files")
444  if not self.is_multi_state:
445  if self.vars["number_of_best_scoring_models"] > 0:
446  output.init_pdb_best_scoring(
447  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
448  self.root_hier,
449  self.vars["number_of_best_scoring_models"],
450  replica_exchange=True,
451  mmcif=self.vars['mmcif'],
452  best_score_file=globaldir + "best.scores.rex.py")
453  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
454  output.write_psf(
455  pdb_dir + "/" + "model.psf",
456  pdb_dir + "/" +
457  self.vars["best_pdb_name_suffix"] + pdbext)
458  else:
459  if self.vars["number_of_best_scoring_models"] > 0:
460  for n in range(self.vars["number_of_states"]):
461  output.init_pdb_best_scoring(
462  pdb_dir + "/" + str(n) + "/" +
463  self.vars["best_pdb_name_suffix"],
464  self.root_hiers[n],
465  self.vars["number_of_best_scoring_models"],
466  replica_exchange=True,
467  mmcif=self.vars['mmcif'],
468  best_score_file=globaldir + "best.scores.rex.py")
469  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
470  output.write_psf(
471  pdb_dir + "/" + str(n) + "/" + "model.psf",
472  pdb_dir + "/" + str(n) + "/" +
473  self.vars["best_pdb_name_suffix"] + pdbext)
474 # ---------------------------------------------
475 
476  if self.em_object_for_rmf is not None:
477  output_hierarchies = [
478  self.root_hier,
479  self.em_object_for_rmf.get_density_as_hierarchy(
480  )]
481  else:
482  output_hierarchies = [self.root_hier]
483 
484  if not self.test_mode:
485  print("Setting up and writing initial rmf coordinate file")
486  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
487  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
488  output_hierarchies,
489  listofobjects=self.rmf_output_objects)
490  if self._rmf_restraints:
491  output.add_restraints_to_rmf(
492  init_suffix + "." + str(myindex) + ".rmf3",
493  self._rmf_restraints)
494  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
495  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
496 
497  if not self.test_mode:
498  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
499 
500  self._add_provenance(sampler_md, sampler_mc)
501 
502  if not self.test_mode:
503  print("Setting up production rmf files")
504  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
505  output.init_rmf(rmfname, output_hierarchies,
506  geometries=self.vars["geometries"],
507  listofobjects=self.rmf_output_objects)
508 
509  if self._rmf_restraints:
510  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
511 
512  ntimes_at_low_temp = 0
513 
514  if myindex == 0:
515  self.show_info()
516  self.replica_exchange_object.set_was_used(True)
517  nframes = self.vars["number_of_frames"]
518  if self.test_mode:
519  nframes = 1
520  for i in range(nframes):
521  if self.test_mode:
522  score = 0.
523  else:
524  for nr in range(self.vars["num_sample_rounds"]):
525  if sampler_md is not None:
526  sampler_md.optimize(
527  self.vars["molecular_dynamics_steps"])
528  if sampler_mc is not None:
529  sampler_mc.optimize(self.vars["monte_carlo_steps"])
531  self.model).evaluate(False)
532  mpivs.set_value("score", score)
533  output.set_output_entry("score", score)
534 
535  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
536 
537  if self.vars["save_coordinates_mode"] == "lowest_temperature":
538  save_frame = (min_temp_index == my_temp_index)
539  elif self.vars["save_coordinates_mode"] == "25th_score":
540  score_perc = mpivs.get_percentile("score")
541  save_frame = (score_perc*100.0 <= 25.0)
542  elif self.vars["save_coordinates_mode"] == "50th_score":
543  score_perc = mpivs.get_percentile("score")
544  save_frame = (score_perc*100.0 <= 50.0)
545  elif self.vars["save_coordinates_mode"] == "75th_score":
546  score_perc = mpivs.get_percentile("score")
547  save_frame = (score_perc*100.0 <= 75.0)
548 
549  # Ensure model is updated before saving output files
550  if save_frame and not self.test_mode:
551  self.model.update()
552 
553  if save_frame:
554  print("--- frame %s score %s " % (str(i), str(score)))
555 
556  if not self.test_mode:
557  if i % self.vars["nframes_write_coordinates"] == 0:
558  print('--- writing coordinates')
559  if self.vars["number_of_best_scoring_models"] > 0:
560  output.write_pdb_best_scoring(score)
561  output.write_rmf(rmfname)
562  output.set_output_entry("rmf_file", rmfname)
563  output.set_output_entry("rmf_frame_index",
564  ntimes_at_low_temp)
565  else:
566  output.set_output_entry("rmf_file", rmfname)
567  output.set_output_entry("rmf_frame_index", '-1')
568  if self.output_objects is not None:
569  output.write_stat2(low_temp_stat_file)
570  ntimes_at_low_temp += 1
571 
572  if not self.test_mode:
573  output.write_stat2(replica_stat_file)
574  if self.vars["replica_exchange_swap"]:
575  rex.swap_temp(i, score)
576  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
577  p.add_replica_exchange(state, self)
578 
579  if not self.test_mode:
580  print("closing production rmf files")
581  output.close_rmf(rmfname)
582 
583 
584 class BuildSystem(object):
585  """A macro to build a IMP::pmi::topology::System based on a
586  TopologyReader object.
587 
588  Easily create multi-state systems by calling this macro
589  repeatedly with different TopologyReader objects!
590  A useful function is get_molecules() which returns the PMI Molecules
591  grouped by state as a dictionary with key = (molecule name),
592  value = IMP.pmi.topology.Molecule
593  Quick multi-state system:
594  @code{.python}
595  model = IMP.Model()
596  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
597  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
598  bs = IMP.pmi.macros.BuildSystem(model)
599  bs.add_state(reader1)
600  bs.add_state(reader2)
601  bs.execute_macro() # build everything including degrees of freedom
602  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
603  ### now you have a two state system, you add restraints etc
604  @endcode
605  @note The "domain name" entry of the topology reader is not used.
606  All molecules are set up by the component name, but split into rigid bodies
607  as requested.
608  """
609 
610  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
611  'RNA': IMP.pmi.alphabets.rna}
612 
613  def __init__(self, model, sequence_connectivity_scale=4.0,
614  force_create_gmm_files=False, resolutions=[1, 10]):
615  """Constructor
616  @param model An IMP Model
617  @param sequence_connectivity_scale For scaling the connectivity
618  restraint
619  @param force_create_gmm_files If True, will sample and create GMMs
620  no matter what. If False, will only sample if the
621  files don't exist. If number of Gaussians is zero, won't
622  do anything.
623  @param resolutions The resolutions to build for structured regions
624  """
625  self.model = model
626  self.system = IMP.pmi.topology.System(self.model)
627  self._readers = [] # the TopologyReaders (one per state)
628  # TempResidues for each domain key=unique name,
629  # value=(atomic_res,non_atomic_res).
630  self._domain_res = []
631  self._domains = [] # key = domain unique name, value = Component
632  self.force_create_gmm_files = force_create_gmm_files
633  self.resolutions = resolutions
634 
635  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
636  chain_ids=None):
637  """Add a state using the topology info in a
638  IMP::pmi::topology::TopologyReader object.
639  When you are done adding states, call execute_macro()
640  @param reader The TopologyReader object
641  @param fasta_name_map dictionary for converting protein names
642  found in the fasta file
643  @param chain_ids A list or string of chain IDs for assigning to
644  newly-created molecules, e.g.
645  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
646  If not specified, chain IDs A through Z are assigned, then
647  AA through AZ, then BA through BZ, and so on, in the same
648  fashion as PDB.
649  """
650  state = self.system.create_state()
651  self._readers.append(reader)
652  # key is unique name, value is (atomic res, nonatomicres)
653  these_domain_res = {}
654  these_domains = {} # key is unique name, value is _Component
655  if chain_ids is None:
656  chain_ids = IMP.pmi.output._ChainIDs()
657  numchain = 0
658 
659  # setup representation
660  # loop over molecules, copies, then domains
661  for molname in reader.get_molecules():
662  copies = reader.get_molecules()[molname].domains
663  for nc, copyname in enumerate(copies):
664  print("BuildSystem.add_state: setting up molecule %s copy "
665  "number %s" % (molname, str(nc)))
666  copy = copies[copyname]
667  # option to not rename chains
668  if keep_chain_id:
669  all_chains = [c for c in copy if c.chain is not None]
670  if all_chains:
671  chain_id = all_chains[0].chain
672  else:
673  chain_id = chain_ids[numchain]
674  warnings.warn(
675  "No PDBs specified for %s, so keep_chain_id has "
676  "no effect; using default chain ID '%s'"
677  % (molname, chain_id), IMP.pmi.ParameterWarning)
678  else:
679  chain_id = chain_ids[numchain]
680  if nc == 0:
681  alphabet = IMP.pmi.alphabets.amino_acid
682  fasta_flag = copy[0].fasta_flag
683  if fasta_flag in self._alphabets:
684  alphabet = self._alphabets[fasta_flag]
686  copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
687  print("BuildSystem.add_state: molecule %s sequence has "
688  "%s residues" % (molname, len(seq)))
689  orig_mol = state.create_molecule(molname, seq, chain_id,
690  alphabet=alphabet)
691  mol = orig_mol
692  numchain += 1
693  else:
694  print("BuildSystem.add_state: creating a copy for "
695  "molecule %s" % molname)
696  mol = orig_mol.create_copy(chain_id)
697  numchain += 1
698 
699  for domainnumber, domain in enumerate(copy):
700  print("BuildSystem.add_state: ---- setting up domain %s "
701  "of molecule %s" % (domainnumber, molname))
702  # we build everything in the residue range, even if it
703  # extends beyond what's in the actual PDB file
704  these_domains[domain.get_unique_name()] = domain
705  if domain.residue_range == [] or \
706  domain.residue_range is None:
707  domain_res = mol.get_residues()
708  else:
709  start = domain.residue_range[0]+domain.pdb_offset
710  if domain.residue_range[1] == 'END':
711  end = len(mol.sequence)
712  else:
713  end = domain.residue_range[1]+domain.pdb_offset
714  domain_res = mol.residue_range(start-1, end-1)
715  print("BuildSystem.add_state: -------- domain %s of "
716  "molecule %s extends from residue %s to "
717  "residue %s "
718  % (domainnumber, molname, start, end))
719  if domain.pdb_file == "BEADS":
720  print("BuildSystem.add_state: -------- domain %s of "
721  "molecule %s represented by BEADS "
722  % (domainnumber, molname))
723  mol.add_representation(
724  domain_res,
725  resolutions=[domain.bead_size],
726  setup_particles_as_densities=(
727  domain.em_residues_per_gaussian != 0),
728  color=domain.color)
729  these_domain_res[domain.get_unique_name()] = \
730  (set(), domain_res)
731  elif domain.pdb_file == "IDEAL_HELIX":
732  print("BuildSystem.add_state: -------- domain %s of "
733  "molecule %s represented by IDEAL_HELIX "
734  % (domainnumber, molname))
735  emper = domain.em_residues_per_gaussian
736  mol.add_representation(
737  domain_res,
738  resolutions=self.resolutions,
739  ideal_helix=True,
740  density_residues_per_component=emper,
741  density_prefix=domain.density_prefix,
742  density_force_compute=self.force_create_gmm_files,
743  color=domain.color)
744  these_domain_res[domain.get_unique_name()] = \
745  (domain_res, set())
746  else:
747  print("BuildSystem.add_state: -------- domain %s of "
748  "molecule %s represented by pdb file %s "
749  % (domainnumber, molname, domain.pdb_file))
750  domain_atomic = mol.add_structure(domain.pdb_file,
751  domain.chain,
752  domain.residue_range,
753  domain.pdb_offset,
754  soft_check=True)
755  domain_non_atomic = domain_res - domain_atomic
756  if not domain.em_residues_per_gaussian:
757  mol.add_representation(
758  domain_atomic, resolutions=self.resolutions,
759  color=domain.color)
760  if len(domain_non_atomic) > 0:
761  mol.add_representation(
762  domain_non_atomic,
763  resolutions=[domain.bead_size],
764  color=domain.color)
765  else:
766  print("BuildSystem.add_state: -------- domain %s "
767  "of molecule %s represented by gaussians "
768  % (domainnumber, molname))
769  emper = domain.em_residues_per_gaussian
770  creategmm = self.force_create_gmm_files
771  mol.add_representation(
772  domain_atomic,
773  resolutions=self.resolutions,
774  density_residues_per_component=emper,
775  density_prefix=domain.density_prefix,
776  density_force_compute=creategmm,
777  color=domain.color)
778  if len(domain_non_atomic) > 0:
779  mol.add_representation(
780  domain_non_atomic,
781  resolutions=[domain.bead_size],
782  setup_particles_as_densities=True,
783  color=domain.color)
784  these_domain_res[domain.get_unique_name()] = (
785  domain_atomic, domain_non_atomic)
786  self._domain_res.append(these_domain_res)
787  self._domains.append(these_domains)
788  print('BuildSystem.add_state: State', len(self.system.states), 'added')
789 
790  def get_molecules(self):
791  """Return list of all molecules grouped by state.
792  For each state, it's a dictionary of Molecules where key is the
793  molecule name
794  """
795  return [s.get_molecules() for s in self.system.get_states()]
796 
797  def get_molecule(self, molname, copy_index=0, state_index=0):
798  return self.system.get_states()[state_index].get_molecules()[
799  molname][copy_index]
800 
801  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
802  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
803  """Builds representations and sets up degrees of freedom"""
804  print("BuildSystem.execute_macro: building representations")
805  self.root_hier = self.system.build()
806 
807  print("BuildSystem.execute_macro: setting up degrees of freedom")
808  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
809  for nstate, reader in enumerate(self._readers):
810  rbs = reader.get_rigid_bodies()
811  srbs = reader.get_super_rigid_bodies()
812  csrbs = reader.get_chains_of_super_rigid_bodies()
813 
814  # add rigid bodies
815  domains_in_rbs = set()
816  for rblist in rbs:
817  print("BuildSystem.execute_macro: -------- building rigid "
818  "body %s" % (str(rblist)))
819  all_res = IMP.pmi.tools.OrderedSet()
820  bead_res = IMP.pmi.tools.OrderedSet()
821  for dname in rblist:
822  domain = self._domains[nstate][dname]
823  print("BuildSystem.execute_macro: -------- adding %s"
824  % (str(dname)))
825  all_res |= self._domain_res[nstate][dname][0]
826  bead_res |= self._domain_res[nstate][dname][1]
827  domains_in_rbs.add(dname)
828  all_res |= bead_res
829  print("BuildSystem.execute_macro: -------- creating rigid "
830  "body with max_trans %s max_rot %s "
831  "non_rigid_max_trans %s"
832  % (str(max_rb_trans), str(max_rb_rot),
833  str(max_bead_trans)))
834  self.dof.create_rigid_body(all_res,
835  nonrigid_parts=bead_res,
836  max_trans=max_rb_trans,
837  max_rot=max_rb_rot,
838  nonrigid_max_trans=max_bead_trans,
839  name="RigidBody %s" % dname)
840 
841  # if you have any domains not in an RB, set them as flexible beads
842  for dname, domain in self._domains[nstate].items():
843  if dname not in domains_in_rbs:
844  if domain.pdb_file != "BEADS":
845  warnings.warn(
846  "No rigid bodies set for %s. Residues read from "
847  "the PDB file will not be sampled - only regions "
848  "missing from the PDB will be treated flexibly. "
849  "To sample the entire sequence, use BEADS instead "
850  "of a PDB file name" % dname,
852  self.dof.create_flexible_beads(
853  self._domain_res[nstate][dname][1],
854  max_trans=max_bead_trans)
855 
856  # add super rigid bodies
857  for srblist in srbs:
858  print("BuildSystem.execute_macro: -------- building "
859  "super rigid body %s" % (str(srblist)))
860  all_res = IMP.pmi.tools.OrderedSet()
861  for dname in srblist:
862  print("BuildSystem.execute_macro: -------- adding %s"
863  % (str(dname)))
864  all_res |= self._domain_res[nstate][dname][0]
865  all_res |= self._domain_res[nstate][dname][1]
866 
867  print("BuildSystem.execute_macro: -------- creating super "
868  "rigid body with max_trans %s max_rot %s "
869  % (str(max_srb_trans), str(max_srb_rot)))
870  self.dof.create_super_rigid_body(
871  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
872 
873  # add chains of super rigid bodies
874  for csrblist in csrbs:
875  all_res = IMP.pmi.tools.OrderedSet()
876  for dname in csrblist:
877  all_res |= self._domain_res[nstate][dname][0]
878  all_res |= self._domain_res[nstate][dname][1]
879  all_res = list(all_res)
880  all_res.sort(key=lambda r: r.get_index())
881  self.dof.create_main_chain_mover(all_res)
882  return self.root_hier, self.dof
883 
884 
885 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
886 class AnalysisReplicaExchange0(object):
887  """A macro for running all the basic operations of analysis.
888  Includes clustering, precision analysis, and making ensemble density maps.
889  A number of plots are also supported.
890  """
891  def __init__(self, model,
892  merge_directories=["./"],
893  stat_file_name_suffix="stat",
894  best_pdb_name_suffix="model",
895  do_clean_first=True,
896  do_create_directories=True,
897  global_output_directory="output/",
898  replica_stat_file_suffix="stat_replica",
899  global_analysis_result_directory="./analysis/",
900  test_mode=False):
901  """Constructor.
902  @param model The IMP model
903  @param stat_file_name_suffix
904  @param merge_directories The directories containing output files
905  @param best_pdb_name_suffix
906  @param do_clean_first
907  @param do_create_directories
908  @param global_output_directory Where everything is
909  @param replica_stat_file_suffix
910  @param global_analysis_result_directory
911  @param test_mode If True, nothing is changed on disk
912  """
913 
914  try:
915  from mpi4py import MPI
916  self.comm = MPI.COMM_WORLD
917  self.rank = self.comm.Get_rank()
918  self.number_of_processes = self.comm.size
919  except ImportError:
920  self.rank = 0
921  self.number_of_processes = 1
922 
923  self.test_mode = test_mode
924  self._protocol_output = []
925  self.cluster_obj = None
926  self.model = model
927  stat_dir = global_output_directory
928  self.stat_files = []
929  # it contains the position of the root directories
930  for rd in merge_directories:
931  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
932  if len(stat_files) == 0:
933  warnings.warn("no stat files found in %s"
934  % os.path.join(rd, stat_dir),
936  self.stat_files += stat_files
937 
938  def add_protocol_output(self, p):
939  """Capture details of the modeling protocol.
940  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
941  """
942  # Assume last state is the one we're interested in
943  self._protocol_output.append((p, p._last_state))
944 
945  def get_modeling_trajectory(self,
946  score_key="SimplifiedModel_Total_Score_None",
947  rmf_file_key="rmf_file",
948  rmf_file_frame_key="rmf_frame_index",
949  outputdir="./",
950  get_every=1,
951  nframes_trajectory=10000):
952  """ Get a trajectory of the modeling run, for generating
953  demonstrative movies
954 
955  @param score_key The score for ranking models
956  @param rmf_file_key Key pointing to RMF filename
957  @param rmf_file_frame_key Key pointing to RMF frame number
958  @param outputdir The local output directory used in the run
959  @param get_every Extract every nth frame
960  @param nframes_trajectory Total number of frames of the trajectory
961  """
962  import math
963 
964  trajectory_models = IMP.pmi.io.get_trajectory_models(
965  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
966  get_every)
967  score_list = list(map(float, trajectory_models[2]))
968 
969  max_score = max(score_list)
970  min_score = min(score_list)
971 
972  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
973  for i in range(nframes_trajectory)]
974  binned_scores = [None]*nframes_trajectory
975  binned_model_indexes = [-1]*nframes_trajectory
976 
977  for model_index, s in enumerate(score_list):
978  bins_score_diffs = [abs(s-b) for b in bins]
979  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
980  if binned_scores[bin_index] is None:
981  binned_scores[bin_index] = s
982  binned_model_indexes[bin_index] = model_index
983  else:
984  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
985  new_diff = abs(s-bins[bin_index])
986  if new_diff < old_diff:
987  binned_scores[bin_index] = s
988  binned_model_indexes[bin_index] = model_index
989 
990  print(binned_scores)
991  print(binned_model_indexes)
992 
993  def _expand_ambiguity(self, prot, d):
994  """If using PMI2, expand the dictionary to include copies as
995  ambiguous options
996 
997  This also keeps the states separate.
998  """
999  newdict = {}
1000  for key in d:
1001  val = d[key]
1002  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1003  newdict[key] = val
1004  continue
1005  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1006  if isinstance(val, tuple):
1007  start = val[0]
1008  stop = val[1]
1009  name = val[2]
1010  else:
1011  start = 1
1012  stop = -1
1013  name = val
1014  for nst in range(len(states)):
1015  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1016  copies = sel.get_selected_particles(with_representation=False)
1017  if len(copies) > 1:
1018  for nc in range(len(copies)):
1019  if len(states) > 1:
1020  newdict['%s.%i..%i' % (name, nst, nc)] = \
1021  (start, stop, name, nc, nst)
1022  else:
1023  newdict['%s..%i' % (name, nc)] = \
1024  (start, stop, name, nc, nst)
1025  else:
1026  newdict[key] = val
1027  return newdict
1028 
1029  def clustering(self,
1030  score_key="SimplifiedModel_Total_Score_None",
1031  rmf_file_key="rmf_file",
1032  rmf_file_frame_key="rmf_frame_index",
1033  state_number=0,
1034  prefiltervalue=None,
1035  feature_keys=[],
1036  outputdir="./",
1037  alignment_components=None,
1038  number_of_best_scoring_models=10,
1039  rmsd_calculation_components=None,
1040  distance_matrix_file='distances.mat',
1041  load_distance_matrix_file=False,
1042  skip_clustering=False,
1043  number_of_clusters=1,
1044  display_plot=False,
1045  exit_after_display=True,
1046  get_every=1,
1047  first_and_last_frames=None,
1048  density_custom_ranges=None,
1049  write_pdb_with_centered_coordinates=False,
1050  voxel_size=5.0):
1051  """Get the best scoring models, compute a distance matrix,
1052  cluster them, and create density maps.
1053 
1054  Tuple format: "molname" just the molecule,
1055  or (start,stop,molname,copy_num(optional),state_num(optional)
1056  Can pass None for copy or state to ignore that field.
1057  If you don't pass a specific copy number
1058 
1059  @param score_key The score for ranking models.
1060  Use "Total_Score" instead of default for PMI2.
1061  @param rmf_file_key Key pointing to RMF filename
1062  @param rmf_file_frame_key Key pointing to RMF frame number
1063  @param state_number State number to analyze
1064  @param prefiltervalue Only include frames where the
1065  score key is below this value
1066  @param feature_keys Keywords for which you want to
1067  calculate average, medians, etc.
1068  If you pass "Keyname" it'll include everything that matches
1069  "*Keyname*"
1070  @param outputdir The local output directory used in
1071  the run
1072  @param alignment_components Dictionary with keys=groupname,
1073  values are tuples for aligning the structures
1074  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1075  @param number_of_best_scoring_models Num models to keep per run
1076  @param rmsd_calculation_components For calculating RMSD
1077  (same format as alignment_components)
1078  @param distance_matrix_file Where to store/read the
1079  distance matrix
1080  @param load_distance_matrix_file Try to load the distance
1081  matrix file
1082  @param skip_clustering Just extract the best scoring
1083  models and save the pdbs
1084  @param number_of_clusters Number of k-means clusters
1085  @param display_plot Display the distance matrix
1086  @param exit_after_display Exit after displaying distance
1087  matrix
1088  @param get_every Extract every nth frame
1089  @param first_and_last_frames A tuple with the first and last
1090  frames to be analyzed. Values are percentages!
1091  Default: get all frames
1092  @param density_custom_ranges For density calculation
1093  (same format as alignment_components)
1094  @param write_pdb_with_centered_coordinates
1095  @param voxel_size Used for the density output
1096  """
1097  # Track provenance information to be added to each output model
1098  prov = []
1099  self._outputdir = Path(outputdir).absolute()
1100  self._number_of_clusters = number_of_clusters
1101  for p, state in self._protocol_output:
1102  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1103 
1104  if self.test_mode:
1105  return
1106 
1107  if self.rank == 0:
1108  try:
1109  os.mkdir(outputdir)
1110  except: # noqa: E722
1111  pass
1112 
1113  if not load_distance_matrix_file:
1114  if len(self.stat_files) == 0:
1115  print("ERROR: no stat file found in the given path")
1116  return
1117  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1118  self.stat_files, self.number_of_processes)[self.rank]
1119 
1120  # read ahead to check if you need the PMI2 score key instead
1121  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1122  orig_score_key = score_key
1123  if score_key not in po.get_keys():
1124  if 'Total_Score' in po.get_keys():
1125  score_key = 'Total_Score'
1126  warnings.warn(
1127  "Using 'Total_Score' instead of "
1128  "'SimplifiedModel_Total_Score_None' for the score key",
1130  for k in [orig_score_key, score_key, rmf_file_key,
1131  rmf_file_frame_key]:
1132  if k in feature_keys:
1133  warnings.warn(
1134  "no need to pass " + k + " to feature_keys.",
1136  feature_keys.remove(k)
1137 
1138  best_models = IMP.pmi.io.get_best_models(
1139  my_stat_files, score_key, feature_keys, rmf_file_key,
1140  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1141  rmf_file_list = best_models[0]
1142  rmf_file_frame_list = best_models[1]
1143  score_list = best_models[2]
1144  feature_keyword_list_dict = best_models[3]
1145 
1146 # ------------------------------------------------------------------------
1147 # collect all the files and scores
1148 # ------------------------------------------------------------------------
1149 
1150  if self.number_of_processes > 1:
1151  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1152  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1153  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1154  rmf_file_frame_list)
1155  for k in feature_keyword_list_dict:
1156  feature_keyword_list_dict[k] = \
1158  feature_keyword_list_dict[k])
1159 
1160  # sort by score and get the best scoring ones
1161  score_rmf_tuples = list(zip(score_list,
1162  rmf_file_list,
1163  rmf_file_frame_list,
1164  list(range(len(score_list)))))
1165 
1166  if density_custom_ranges:
1167  for k in density_custom_ranges:
1168  if not isinstance(density_custom_ranges[k], list):
1169  raise Exception("Density custom ranges: values must "
1170  "be lists of tuples")
1171 
1172  # keep subset of frames if requested
1173  if first_and_last_frames is not None:
1174  nframes = len(score_rmf_tuples)
1175  first_frame = int(first_and_last_frames[0] * nframes)
1176  last_frame = int(first_and_last_frames[1] * nframes)
1177  if last_frame > len(score_rmf_tuples):
1178  last_frame = -1
1179  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1180 
1181  # sort RMFs by the score_key in ascending order, and store the rank
1182  best_score_rmf_tuples = sorted(
1183  score_rmf_tuples,
1184  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1185  best_score_rmf_tuples = [t+(n,) for n, t in
1186  enumerate(best_score_rmf_tuples)]
1187  # Note in the provenance info that we only kept best-scoring models
1188  prov.append(IMP.pmi.io.FilterProvenance(
1189  "Best scoring", 0, number_of_best_scoring_models))
1190  # sort the feature scores in the same way
1191  best_score_feature_keyword_list_dict = defaultdict(list)
1192  for tpl in best_score_rmf_tuples:
1193  index = tpl[3]
1194  for f in feature_keyword_list_dict:
1195  best_score_feature_keyword_list_dict[f].append(
1196  feature_keyword_list_dict[f][index])
1197  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1198  best_score_rmf_tuples,
1199  self.number_of_processes)[self.rank]
1200 
1201  # expand the dictionaries to include ambiguous copies
1202  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1203  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1204  if IMP.pmi.get_is_canonical(prot_ahead):
1205  if rmsd_calculation_components is not None:
1206  tmp = self._expand_ambiguity(
1207  prot_ahead, rmsd_calculation_components)
1208  if tmp != rmsd_calculation_components:
1209  print('Detected ambiguity, expand rmsd components to',
1210  tmp)
1211  rmsd_calculation_components = tmp
1212  if alignment_components is not None:
1213  tmp = self._expand_ambiguity(prot_ahead,
1214  alignment_components)
1215  if tmp != alignment_components:
1216  print('Detected ambiguity, expand alignment '
1217  'components to', tmp)
1218  alignment_components = tmp
1219 
1220 # -------------------------------------------------------------
1221 # read the coordinates
1222 # ------------------------------------------------------------
1223  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1224  self.model, my_best_score_rmf_tuples[0],
1225  rmsd_calculation_components, state_number=state_number)
1227  self.model, my_best_score_rmf_tuples, alignment_components,
1228  rmsd_calculation_components, state_number=state_number)
1229 
1230  # note! the coordinates are simply float tuples, NOT decorators,
1231  # NOT Vector3D, NOR particles, because these object cannot be
1232  # serialized. We need serialization
1233  # for the parallel computation based on mpi.
1234 
1235  # dict:key=component name,val=coords per hit
1236  all_coordinates = got_coords[0]
1237 
1238  # same as above, limited to alignment bits
1239  alignment_coordinates = got_coords[1]
1240 
1241  # same as above, limited to RMSD bits
1242  rmsd_coordinates = got_coords[2]
1243 
1244  # dictionary with key=RMF, value=score rank
1245  rmf_file_name_index_dict = got_coords[3]
1246 
1247  # RMF file per hit
1248  all_rmf_file_names = got_coords[4]
1249 
1250 # ------------------------------------------------------------------------
1251 # optionally don't compute distance matrix or cluster, just write top files
1252 # ------------------------------------------------------------------------
1253  if skip_clustering:
1254  if density_custom_ranges:
1255  DensModule = IMP.pmi.analysis.GetModelDensity(
1256  density_custom_ranges, voxel=voxel_size)
1257 
1258  dircluster = os.path.join(outputdir,
1259  "all_models."+str(self.rank))
1260  try:
1261  os.mkdir(outputdir)
1262  except: # noqa: E722
1263  pass
1264  try:
1265  os.mkdir(dircluster)
1266  except: # noqa: E722
1267  pass
1268  clusstat = open(os.path.join(
1269  dircluster, "stat."+str(self.rank)+".out"), "w")
1270  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1271  rmf_name = tpl[1]
1272  rmf_frame_number = tpl[2]
1273  tmp_dict = {}
1274  index = tpl[4]
1275  for key in best_score_feature_keyword_list_dict:
1276  tmp_dict[key] = \
1277  best_score_feature_keyword_list_dict[key][index]
1278 
1279  if cnt == 0:
1280  prots, rs = \
1281  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1282  self.model, rmf_frame_number, rmf_name)
1283  else:
1284  linking_successful = \
1285  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1286  self.model, prots, rs, rmf_frame_number,
1287  rmf_name)
1288  if not linking_successful:
1289  continue
1290 
1291  if not prots:
1292  continue
1293 
1294  if IMP.pmi.get_is_canonical(prots[0]):
1295  states = IMP.atom.get_by_type(
1296  prots[0], IMP.atom.STATE_TYPE)
1297  prot = states[state_number]
1298  else:
1299  prot = prots[state_number]
1300 
1301  # get transformation aligning coordinates of
1302  # requested tuples to the first RMF file
1303  if cnt == 0:
1304  coords_f1 = alignment_coordinates[cnt]
1305  if cnt > 0:
1306  coords_f2 = alignment_coordinates[cnt]
1307  if coords_f2:
1309  coords_f1, coords_f2)
1310  transformation = Ali.align()[1]
1311  else:
1312  transformation = \
1314 
1315  rbs = set()
1316  for p in IMP.atom.get_leaves(prot):
1317  if not IMP.core.XYZR.get_is_setup(p):
1319  IMP.core.XYZR(p).set_radius(0.0001)
1320  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1321 
1323  rbm = IMP.core.RigidBodyMember(p)
1324  rb = rbm.get_rigid_body()
1325  rbs.add(rb)
1326  else:
1328  transformation)
1329  for rb in rbs:
1330  IMP.core.transform(rb, transformation)
1331 
1332  o = IMP.pmi.output.Output()
1333  self.model.update()
1334  out_pdb_fn = os.path.join(
1335  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1336  out_rmf_fn = os.path.join(
1337  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1338  o.init_pdb(out_pdb_fn, prot)
1339  tc = write_pdb_with_centered_coordinates
1340  o.write_pdb(out_pdb_fn,
1341  translate_to_geometric_center=tc)
1342 
1343  tmp_dict["local_pdb_file_name"] = \
1344  os.path.basename(out_pdb_fn)
1345  tmp_dict["rmf_file_full_path"] = rmf_name
1346  tmp_dict["local_rmf_file_name"] = \
1347  os.path.basename(out_rmf_fn)
1348  tmp_dict["local_rmf_frame_number"] = 0
1349 
1350  clusstat.write(str(tmp_dict)+"\n")
1351 
1352  if IMP.pmi.get_is_canonical(prot):
1353  # create a single-state System and write that
1355  IMP.Particle(self.model))
1356  h.set_name("System")
1357  h.add_child(prot)
1358  o.init_rmf(out_rmf_fn, [h], rs)
1359  else:
1360  o.init_rmf(out_rmf_fn, [prot], rs)
1361 
1362  o.write_rmf(out_rmf_fn)
1363  o.close_rmf(out_rmf_fn)
1364  # add the density
1365  if density_custom_ranges:
1366  DensModule.add_subunits_density(prot)
1367 
1368  if density_custom_ranges:
1369  DensModule.write_mrc(path=dircluster)
1370  del DensModule
1371  return
1372 
1373  # broadcast the coordinates
1374  if self.number_of_processes > 1:
1375  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1376  all_coordinates)
1377  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1378  all_rmf_file_names)
1379  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1380  rmf_file_name_index_dict)
1381  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1382  alignment_coordinates)
1383  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1384  rmsd_coordinates)
1385 
1386  if self.rank == 0:
1387  # save needed informations in external files
1388  self.save_objects(
1389  [best_score_feature_keyword_list_dict,
1390  rmf_file_name_index_dict],
1391  ".macro.pkl")
1392 
1393 # ------------------------------------------------------------------------
1394 # Calculate distance matrix and cluster
1395 # ------------------------------------------------------------------------
1396  print("setup clustering class")
1397  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1398 
1399  for n, model_coordinate_dict in enumerate(all_coordinates):
1400  # let's try to align
1401  if (alignment_components is not None
1402  and len(self.cluster_obj.all_coords) == 0):
1403  # set the first model as template coordinates
1404  self.cluster_obj.set_template(alignment_coordinates[n])
1405  self.cluster_obj.fill(all_rmf_file_names[n],
1406  rmsd_coordinates[n])
1407  print("Global calculating the distance matrix")
1408 
1409  # calculate distance matrix, all against all
1410  self.cluster_obj.dist_matrix()
1411 
1412  # perform clustering and optionally display
1413  if self.rank == 0:
1414  self.cluster_obj.do_cluster(number_of_clusters)
1415  if display_plot:
1416  if self.rank == 0:
1417  self.cluster_obj.plot_matrix(
1418  figurename=os.path.join(outputdir,
1419  'dist_matrix.pdf'))
1420  if exit_after_display:
1421  exit()
1422  self.cluster_obj.save_distance_matrix_file(
1423  file_name=distance_matrix_file)
1424 
1425 # ------------------------------------------------------------------------
1426 # Alteratively, load the distance matrix from file and cluster that
1427 # ------------------------------------------------------------------------
1428  else:
1429  if self.rank == 0:
1430  print("setup clustering class")
1431  self.cluster_obj = IMP.pmi.analysis.Clustering()
1432  self.cluster_obj.load_distance_matrix_file(
1433  file_name=distance_matrix_file)
1434  print("clustering with %s clusters" % str(number_of_clusters))
1435  self.cluster_obj.do_cluster(number_of_clusters)
1436  [best_score_feature_keyword_list_dict,
1437  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1438  if display_plot:
1439  if self.rank == 0:
1440  self.cluster_obj.plot_matrix(figurename=os.path.join(
1441  outputdir, 'dist_matrix.pdf'))
1442  if exit_after_display:
1443  exit()
1444  if self.number_of_processes > 1:
1445  self.comm.Barrier()
1446 
1447 # ------------------------------------------------------------------------
1448 # now save all informations about the clusters
1449 # ------------------------------------------------------------------------
1450 
1451  if self.rank == 0:
1452  print(self.cluster_obj.get_cluster_labels())
1453  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1454  print("rank %s " % str(self.rank))
1455  print("cluster %s " % str(n))
1456  print("cluster label %s " % str(cl))
1457  print(self.cluster_obj.get_cluster_label_names(cl))
1458  cluster_size = \
1459  len(self.cluster_obj.get_cluster_label_names(cl))
1460  cluster_prov = \
1461  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1462 
1463  # first initialize the Density class if requested
1464  if density_custom_ranges:
1465  DensModule = IMP.pmi.analysis.GetModelDensity(
1466  density_custom_ranges,
1467  voxel=voxel_size)
1468 
1469  dircluster = outputdir + "/cluster." + str(n) + "/"
1470  try:
1471  os.mkdir(dircluster)
1472  except: # noqa: E722
1473  pass
1474 
1475  rmsd_dict = {
1476  "AVERAGE_RMSD":
1477  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1478  clusstat = open(dircluster + "stat.out", "w")
1479  for k, structure_name in enumerate(
1480  self.cluster_obj.get_cluster_label_names(cl)):
1481  # extract the features
1482  tmp_dict = {}
1483  tmp_dict.update(rmsd_dict)
1484  index = rmf_file_name_index_dict[structure_name]
1485  for key in best_score_feature_keyword_list_dict:
1486  tmp_dict[
1487  key] = best_score_feature_keyword_list_dict[
1488  key][
1489  index]
1490 
1491  # get the rmf name and the frame number from the list of
1492  # frame names
1493  rmf_name = structure_name.split("|")[0]
1494  rmf_frame_number = int(structure_name.split("|")[1])
1495  clusstat.write(str(tmp_dict) + "\n")
1496 
1497  # extract frame (open or link to existing)
1498  if k == 0:
1499  prots, rs = \
1500  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1501  self.model, rmf_frame_number, rmf_name)
1502  else:
1503  linking_successful = \
1504  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1505  self.model, prots, rs, rmf_frame_number,
1506  rmf_name)
1507  if not linking_successful:
1508  continue
1509  if not prots:
1510  continue
1511 
1512  if IMP.pmi.get_is_canonical(prots[0]):
1513  states = IMP.atom.get_by_type(
1514  prots[0], IMP.atom.STATE_TYPE)
1515  prot = states[state_number]
1516  else:
1517  prot = prots[state_number]
1518  if k == 0:
1519  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1520 
1521  # transform clusters onto first
1522  if k > 0:
1523  co = self.cluster_obj
1524  model_index = co.get_model_index_from_name(
1525  structure_name)
1526  transformation = co.get_transformation_to_first_member(
1527  cl, model_index)
1528  rbs = set()
1529  for p in IMP.atom.get_leaves(prot):
1530  if not IMP.core.XYZR.get_is_setup(p):
1532  IMP.core.XYZR(p).set_radius(0.0001)
1533  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1534 
1536  rbm = IMP.core.RigidBodyMember(p)
1537  rb = rbm.get_rigid_body()
1538  rbs.add(rb)
1539  else:
1541  transformation)
1542  for rb in rbs:
1543  IMP.core.transform(rb, transformation)
1544 
1545  # add the density
1546  if density_custom_ranges:
1547  DensModule.add_subunits_density(prot)
1548 
1549  # pdb writing should be optimized!
1550  o = IMP.pmi.output.Output()
1551  self.model.update()
1552  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1553  o.write_pdb(dircluster + str(k) + ".pdb")
1554 
1555  if IMP.pmi.get_is_canonical(prot):
1556  # create a single-state System and write that
1558  IMP.Particle(self.model))
1559  h.set_name("System")
1560  h.add_child(prot)
1561  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1562  else:
1563  o.init_rmf(dircluster + str(k) + ".rmf3", [prot], rs)
1564  o.write_rmf(dircluster + str(k) + ".rmf3")
1565  o.close_rmf(dircluster + str(k) + ".rmf3")
1566 
1567  del o
1568  # IMP.atom.destroy(prot)
1569 
1570  if density_custom_ranges:
1571  DensModule.write_mrc(path=dircluster)
1572  del DensModule
1573 
1574  if self.number_of_processes > 1:
1575  self.comm.Barrier()
1576 
1577  def get_cluster_rmsd(self, cluster_num):
1578  if self.cluster_obj is None:
1579  raise Exception("Run clustering first")
1580  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1581 
1582  def save_objects(self, objects, file_name):
1583  import pickle
1584  outf = open(file_name, 'wb')
1585  pickle.dump(objects, outf)
1586  outf.close()
1587 
1588  def load_objects(self, file_name):
1589  import pickle
1590  inputf = open(file_name, 'rb')
1591  objects = pickle.load(inputf)
1592  inputf.close()
1593  return objects
1594 
1595 
1597 
1598  """
1599  This class contains analysis utilities to investigate ReplicaExchange
1600  results.
1601  """
1602 
1603  ########################
1604  # Construction and Setup
1605  ########################
1606 
1607  def __init__(self, model, stat_files, best_models=None, score_key=None,
1608  alignment=True):
1609  """
1610  Construction of the Class.
1611  @param model IMP.Model()
1612  @param stat_files list of string. Can be ascii stat files,
1613  rmf files names
1614  @param best_models Integer. Number of best scoring models,
1615  if None: all models will be read
1616  @param alignment boolean (Default=True). Align before computing
1617  the rmsd.
1618  """
1619 
1620  self.model = model
1621  self.best_models = best_models
1623  model, stat_files, self.best_models, score_key, cache=True)
1625  StatHierarchyHandler=self.stath0)
1626 
1627  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1629  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1631  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1632  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1633  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1634  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1635  self.clusters = []
1636  # fill the cluster list with a single cluster containing all models
1637  c = IMP.pmi.output.Cluster(0)
1638  self.clusters.append(c)
1639  for n0 in range(len(self.stath0)):
1640  c.add_member(n0)
1641  self.pairwise_rmsd = {}
1642  self.pairwise_molecular_assignment = {}
1643  self.alignment = alignment
1644  self.symmetric_molecules = {}
1645  self.issymmetricsel = {}
1646  self.update_seldicts()
1647  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1648  IMP.atom.get_leaves(self.stath0))
1649  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1650  IMP.atom.get_leaves(self.stath1))
1651 
1652  def set_rmsd_selection(self, **kwargs):
1653  """
1654  Setup the selection onto which the rmsd is computed
1655  @param kwargs use IMP.atom.Selection keywords
1656  """
1657  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1658  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1659  self.update_seldicts()
1660 
1661  def set_symmetric(self, molecule_name):
1662  """
1663  Store names of symmetric molecules
1664  """
1665  self.symmetric_molecules[molecule_name] = 0
1666  self.update_seldicts()
1667 
1668  def set_alignment_selection(self, **kwargs):
1669  """
1670  Setup the selection onto which the alignment is computed
1671  @param kwargs use IMP.atom.Selection keywords
1672  """
1673  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1674  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1675 
1676  ######################
1677  # Clustering functions
1678  ######################
1679  def clean_clusters(self):
1680  for c in self.clusters:
1681  del c
1682  self.clusters = []
1683 
1684  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1685  """
1686  Cluster the models based on RMSD.
1687  @param rmsd_cutoff Float the distance cutoff in Angstrom
1688  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1689  be used to compute rmsds
1690  """
1691  self.clean_clusters()
1692  not_clustered = set(range(len(self.stath1)))
1693  while len(not_clustered) > 0:
1694  self.aggregate(not_clustered, rmsd_cutoff, metric)
1695  self.update_clusters()
1696 
1697  def refine(self, rmsd_cutoff=10):
1698  """
1699  Refine the clusters by merging the ones whose centers are close
1700  @param rmsd_cutoff cutoff distance in Angstorms
1701  """
1702  clusters_copy = self.clusters
1703  for c0, c1 in itertools.combinations(self.clusters, 2):
1704  if c0.center_index is None:
1705  self.compute_cluster_center(c0)
1706  if c1.center_index is None:
1707  self.compute_cluster_center(c1)
1708  _ = self.stath0[c0.center_index]
1709  _ = self.stath1[c1.center_index]
1710  rmsd, molecular_assignment = self.rmsd()
1711  if rmsd <= rmsd_cutoff:
1712  if c1 in self.clusters:
1713  clusters_copy.remove(c1)
1714  c0 += c1
1715  self.clusters = clusters_copy
1716  self.update_clusters()
1717 
1718  ####################
1719  # Input Output
1720  ####################
1721 
1722  def set_cluster_assignments(self, cluster_ids):
1723  if len(cluster_ids) != len(self.stath0):
1724  raise ValueError('cluster ids has to be same length as '
1725  'number of frames')
1726 
1727  self.clusters = []
1728  for i in sorted(list(set(cluster_ids))):
1729  self.clusters.append(IMP.pmi.output.Cluster(i))
1730  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1731  self.clusters[idx].add_member(i, d)
1732 
1733  def get_cluster_data(self, cluster):
1734  """
1735  Return the model data from a cluster
1736  @param cluster IMP.pmi.output.Cluster object
1737  """
1738  data = []
1739  for m in cluster:
1740  data.append(m)
1741  return data
1742 
1743  def save_data(self, filename='data.pkl'):
1744  """
1745  Save the data for the whole models into a pickle file
1746  @param filename string
1747  """
1748  self.stath0.save_data(filename)
1749 
1750  def set_data(self, data):
1751  """
1752  Set the data from an external IMP.pmi.output.Data
1753  @param data IMP.pmi.output.Data
1754  """
1755  self.stath0.data = data
1756  self.stath1.data = data
1757 
1758  def load_data(self, filename='data.pkl'):
1759  """
1760  Load the data from an external pickled file
1761  @param filename string
1762  """
1763  self.stath0.load_data(filename)
1764  self.stath1.load_data(filename)
1765  self.best_models = len(self.stath0)
1766 
1767  def add_cluster(self, rmf_name_list):
1768  c = IMP.pmi.output.Cluster(len(self.clusters))
1769  print("creating cluster index "+str(len(self.clusters)))
1770  self.clusters.append(c)
1771  current_len = len(self.stath0)
1772 
1773  for rmf in rmf_name_list:
1774  print("adding rmf "+rmf)
1775  self.stath0.add_stat_file(rmf)
1776  self.stath1.add_stat_file(rmf)
1777 
1778  for n0 in range(current_len, len(self.stath0)):
1779  d0 = self.stath0[n0]
1780  c.add_member(n0, d0)
1781  self.update_clusters()
1782 
1783  def save_clusters(self, filename='clusters.pkl'):
1784  """
1785  Save the clusters into a pickle file
1786  @param filename string
1787  """
1788  try:
1789  import cPickle as pickle
1790  except ImportError:
1791  import pickle
1792  fl = open(filename, 'wb')
1793  pickle.dump(self.clusters, fl)
1794 
1795  def load_clusters(self, filename='clusters.pkl', append=False):
1796  """
1797  Load the clusters from a pickle file
1798  @param filename string
1799  @param append bool (Default=False), if True. append the clusters
1800  to the ones currently present
1801  """
1802  try:
1803  import cPickle as pickle
1804  except ImportError:
1805  import pickle
1806  fl = open(filename, 'rb')
1807  self.clean_clusters()
1808  if append:
1809  self.clusters += pickle.load(fl)
1810  else:
1811  self.clusters = pickle.load(fl)
1812  self.update_clusters()
1813 
1814  ####################
1815  # Analysis Functions
1816  ####################
1817 
1818  def compute_cluster_center(self, cluster):
1819  """
1820  Compute the cluster center for a given cluster
1821  """
1822  member_distance = defaultdict(float)
1823 
1824  for n0, n1 in itertools.combinations(cluster.members, 2):
1825  _ = self.stath0[n0]
1826  _ = self.stath1[n1]
1827  rmsd, _ = self.rmsd()
1828  member_distance[n0] += rmsd
1829 
1830  if len(member_distance) > 0:
1831  cluster.center_index = min(member_distance,
1832  key=member_distance.get)
1833  else:
1834  cluster.center_index = cluster.members[0]
1835 
1836  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1837  prefix="./"):
1838  """
1839  Save the coordinates of the current cluster a single rmf file
1840  """
1841  print("saving coordinates", cluster)
1842  if self.alignment:
1843  self.set_reference(reference, cluster)
1844  o = IMP.pmi.output.Output()
1845  if rmf_name is None:
1846  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1847 
1848  _ = self.stath1[cluster.members[0]]
1849  self.model.update()
1850  o.init_rmf(rmf_name, [self.stath1])
1851  for n1 in cluster.members:
1852  _ = self.stath1[n1]
1853  self.model.update()
1855  if self.alignment:
1856  self.align()
1857  o.write_rmf(rmf_name)
1859  o.close_rmf(rmf_name)
1860 
1861  def prune_redundant_structures(self, rmsd_cutoff=10):
1862  """
1863  remove structures that are similar
1864  append it to a new cluster
1865  """
1866  print("pruning models")
1867  selected = 0
1868  filtered = [selected]
1869  remaining = range(1, len(self.stath1), 10)
1870 
1871  while len(remaining) > 0:
1872  d0 = self.stath0[selected]
1873  rm = []
1874  for n1 in remaining:
1875  _ = self.stath1[n1]
1876  if self.alignment:
1877  self.align()
1878  d, _ = self.rmsd()
1879  if d <= rmsd_cutoff:
1880  rm.append(n1)
1881  print("pruning model %s, similar to model %s, rmsd %s"
1882  % (str(n1), str(selected), str(d)))
1883  remaining = [x for x in remaining if x not in rm]
1884  if len(remaining) == 0:
1885  break
1886  selected = remaining[0]
1887  filtered.append(selected)
1888  remaining.pop(0)
1889  c = IMP.pmi.output.Cluster(len(self.clusters))
1890  self.clusters.append(c)
1891  for n0 in filtered:
1892  d0 = self.stath0[n0]
1893  c.add_member(n0, d0)
1894  self.update_clusters()
1895 
1896  def precision(self, cluster):
1897  """
1898  Compute the precision of a cluster
1899  """
1900  npairs = 0
1901  rmsd = 0.0
1902  precision = None
1903 
1904  if cluster.center_index is not None:
1905  members1 = [cluster.center_index]
1906  else:
1907  members1 = cluster.members
1908 
1909  for n0 in members1:
1910  _ = self.stath0[n0]
1911  for n1 in cluster.members:
1912  if n0 != n1:
1913  npairs += 1
1914  _ = self.stath1[n1]
1916  tmp_rmsd, _ = self.rmsd()
1917  rmsd += tmp_rmsd
1919 
1920  if npairs > 0:
1921  precision = rmsd/npairs
1922  cluster.precision = precision
1923  return precision
1924 
1925  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1926  """
1927  Compute the bipartite precision (ie the cross-precision)
1928  between two clusters
1929  """
1930  npairs = 0
1931  rmsd = 0.0
1932  for cn0, n0 in enumerate(cluster1.members):
1933  _ = self.stath0[n0]
1934  for cn1, n1 in enumerate(cluster2.members):
1935  _ = self.stath1[n1]
1936  tmp_rmsd, _ = self.rmsd()
1937  if verbose:
1938  print("--- rmsd between structure %s and structure "
1939  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1940  rmsd += tmp_rmsd
1941  npairs += 1
1942  precision = rmsd/npairs
1943  return precision
1944 
1945  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1946  cluster_ref=None, step=1):
1947  """
1948  Compute the Root mean square fluctuations
1949  of a molecule in a cluster
1950  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1951  residue indexes and the value is the rmsf
1952  """
1953  rmsf = IMP.pmi.tools.OrderedDict()
1954 
1955  # assumes that residue indexes are identical for stath0 and stath1
1956  if cluster_ref is not None:
1957  if cluster_ref.center_index is not None:
1958  members0 = [cluster_ref.center_index]
1959  else:
1960  members0 = cluster_ref.members
1961  else:
1962  if cluster.center_index is not None:
1963  members0 = [cluster.center_index]
1964  else:
1965  members0 = cluster.members
1966 
1967  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1968  copy_index=copy_index, state_index=state_index)
1969  ps0 = s0.get_selected_particles()
1970  # get the residue indexes
1971  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1972  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1973 
1974  # get the corresponding particles
1975  npairs = 0
1976  for n0 in members0:
1977  d0 = self.stath0[n0]
1978  for n1 in cluster.members[::step]:
1979  if n0 != n1:
1980  print("--- rmsf %s %s" % (str(n0), str(n1)))
1982 
1983  s1 = IMP.atom.Selection(
1984  self.stath1, molecule=molecule,
1985  residue_indexes=residue_indexes, resolution=1,
1986  copy_index=copy_index, state_index=state_index)
1987  ps1 = s1.get_selected_particles()
1988 
1989  d1 = self.stath1[n1]
1990  if self.alignment:
1991  self.align()
1992  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
1993  r = residue_indexes[n]
1994  d0 = IMP.core.XYZ(p0)
1995  d1 = IMP.core.XYZ(p1)
1996  if r in rmsf:
1997  rmsf[r] += IMP.core.get_distance(d0, d1)
1998  else:
1999  rmsf[r] = IMP.core.get_distance(d0, d1)
2000  npairs += 1
2002  for r in rmsf:
2003  rmsf[r] /= npairs
2004 
2005  for stath in [self.stath0, self.stath1]:
2006  if molecule not in self.symmetric_molecules:
2007  s = IMP.atom.Selection(
2008  stath, molecule=molecule, residue_index=r,
2009  resolution=1, copy_index=copy_index,
2010  state_index=state_index)
2011  else:
2012  s = IMP.atom.Selection(
2013  stath, molecule=molecule, residue_index=r,
2014  resolution=1, state_index=state_index)
2015 
2016  ps = s.get_selected_particles()
2017  for p in ps:
2019  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2020  else:
2022 
2023  return rmsf
2024 
2025  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2026  reference="Absolute", prefix="./", step=1):
2027  if self.alignment:
2028  self.set_reference(reference, cluster)
2029  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2030  voxel=voxel_size)
2031 
2032  for n1 in cluster.members[::step]:
2033  print("density "+str(n1))
2034  _ = self.stath1[n1]
2036  if self.alignment:
2037  self.align()
2038  dens.add_subunits_density(self.stath1)
2040  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2041  del dens
2042 
2043  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2044  consolidate=False, molecules=None, prefix='./',
2045  reference="Absolute"):
2046  if self.alignment:
2047  self.set_reference(reference, cluster)
2048  import numpy as np
2049  import matplotlib.pyplot as plt
2050  import matplotlib.cm as cm
2051  from scipy.spatial.distance import cdist
2052  import IMP.pmi.topology
2053  if molecules is None:
2055  for mol in IMP.pmi.tools.get_molecules(
2056  IMP.atom.get_leaves(self.stath1))]
2057  else:
2059  for mol in IMP.pmi.tools.get_molecules(
2061  self.stath1,
2062  molecules=molecules).get_selected_particles())]
2063  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2064  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2065  total_len_unique = sum(max(mol.get_residue_indexes())
2066  for mol in unique_copies)
2067 
2068  index_dict = {}
2069  prev_stop = 0
2070 
2071  if not consolidate:
2072  for mol in mols:
2073  seqlen = max(mol.get_residue_indexes())
2074  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2075  prev_stop += seqlen
2076 
2077  else:
2078  for mol in unique_copies:
2079  seqlen = max(mol.get_residue_indexes())
2080  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2081  prev_stop += seqlen
2082 
2083  for ncl, n1 in enumerate(cluster.members):
2084  print(ncl)
2085  _ = self.stath1[n1]
2086  coord_dict = IMP.pmi.tools.OrderedDict()
2087  for mol in mols:
2088  rindexes = mol.get_residue_indexes()
2089  coords = np.ones((max(rindexes), 3))
2090  for rnum in rindexes:
2091  sel = IMP.atom.Selection(mol, residue_index=rnum,
2092  resolution=1)
2093  selpart = sel.get_selected_particles()
2094  if len(selpart) == 0:
2095  continue
2096  selpart = selpart[0]
2097  coords[rnum - 1, :] = \
2098  IMP.core.XYZ(selpart).get_coordinates()
2099  coord_dict[mol] = coords
2100 
2101  if not consolidate:
2102  coords = np.concatenate(list(coord_dict.values()))
2103  dists = cdist(coords, coords)
2104  binary_dists = np.where((dists <= contact_threshold)
2105  & (dists >= 1.0), 1.0, 0.0)
2106  else:
2107  binary_dists_dict = {}
2108  for mol1 in mols:
2109  len1 = max(mol1.get_residue_indexes())
2110  for mol2 in mols:
2111  name1 = mol1.get_name()
2112  name2 = mol2.get_name()
2113  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2114  if (name1, name2) not in binary_dists_dict:
2115  binary_dists_dict[(name1, name2)] = \
2116  np.zeros((len1, len1))
2117  binary_dists_dict[(name1, name2)] += \
2118  np.where((dists <= contact_threshold)
2119  & (dists >= 1.0), 1.0, 0.0)
2120  binary_dists = np.zeros((total_len_unique, total_len_unique))
2121 
2122  for name1, name2 in binary_dists_dict:
2123  r1 = index_dict[mol_names_unique[name1]]
2124  r2 = index_dict[mol_names_unique[name2]]
2125  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2126  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2127  1.0, 0.0)
2128 
2129  if ncl == 0:
2130  dist_maps = [dists]
2131  av_dist_map = dists
2132  contact_freqs = binary_dists
2133  else:
2134  dist_maps.append(dists)
2135  av_dist_map += dists
2136  contact_freqs += binary_dists
2137 
2138  if log_scale:
2139  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2140  else:
2141  contact_freqs = 1.0/len(cluster)*contact_freqs
2142  av_dist_map = 1.0/len(cluster)*contact_freqs
2143 
2144  fig = plt.figure(figsize=(100, 100))
2145  ax = fig.add_subplot(111)
2146  ax.set_xticks([])
2147  ax.set_yticks([])
2148  gap_between_components = 50
2149  colormap = cm.Blues
2150  colornorm = None
2151 
2152  if not consolidate:
2153  sorted_tuple = sorted(
2155  mol).get_extended_name(), mol) for mol in mols)
2156  prot_list = list(zip(*sorted_tuple))[1]
2157  else:
2158  sorted_tuple = sorted(
2159  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2160  for mol in unique_copies)
2161  prot_list = list(zip(*sorted_tuple))[1]
2162 
2163  prot_listx = prot_list
2164  nresx = gap_between_components + \
2165  sum([max(mol.get_residue_indexes())
2166  + gap_between_components for mol in prot_listx])
2167 
2168  # set the list of proteins on the y axis
2169  prot_listy = prot_list
2170  nresy = gap_between_components + \
2171  sum([max(mol.get_residue_indexes())
2172  + gap_between_components for mol in prot_listy])
2173 
2174  # this is the residue offset for each protein
2175  resoffsetx = {}
2176  resendx = {}
2177  res = gap_between_components
2178  for mol in prot_listx:
2179  resoffsetx[mol] = res
2180  res += max(mol.get_residue_indexes())
2181  resendx[mol] = res
2182  res += gap_between_components
2183 
2184  resoffsety = {}
2185  resendy = {}
2186  res = gap_between_components
2187  for mol in prot_listy:
2188  resoffsety[mol] = res
2189  res += max(mol.get_residue_indexes())
2190  resendy[mol] = res
2191  res += gap_between_components
2192 
2193  resoffsetdiagonal = {}
2194  res = gap_between_components
2195  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2196  resoffsetdiagonal[mol] = res
2197  res += max(mol.get_residue_indexes())
2198  res += gap_between_components
2199 
2200  # plot protein boundaries
2201  xticks = []
2202  xlabels = []
2203  for n, prot in enumerate(prot_listx):
2204  res = resoffsetx[prot]
2205  end = resendx[prot]
2206  for proty in prot_listy:
2207  resy = resoffsety[proty]
2208  endy = resendy[proty]
2209  ax.plot([res, res], [resy, endy], linestyle='-',
2210  color='gray', lw=0.4)
2211  ax.plot([end, end], [resy, endy], linestyle='-',
2212  color='gray', lw=0.4)
2213  xticks.append((float(res) + float(end)) / 2)
2215  prot).get_extended_name())
2216 
2217  yticks = []
2218  ylabels = []
2219  for n, prot in enumerate(prot_listy):
2220  res = resoffsety[prot]
2221  end = resendy[prot]
2222  for protx in prot_listx:
2223  resx = resoffsetx[protx]
2224  endx = resendx[protx]
2225  ax.plot([resx, endx], [res, res], linestyle='-',
2226  color='gray', lw=0.4)
2227  ax.plot([resx, endx], [end, end], linestyle='-',
2228  color='gray', lw=0.4)
2229  yticks.append((float(res) + float(end)) / 2)
2231  prot).get_extended_name())
2232 
2233  # plot the contact map
2234 
2235  tmp_array = np.zeros((nresx, nresy))
2236  ret = {}
2237  for px in prot_listx:
2238  for py in prot_listy:
2239  resx = resoffsetx[px]
2240  lengx = resendx[px] - 1
2241  resy = resoffsety[py]
2242  lengy = resendy[py] - 1
2243  indexes_x = index_dict[px]
2244  minx = min(indexes_x)
2245  maxx = max(indexes_x)
2246  indexes_y = index_dict[py]
2247  miny = min(indexes_y)
2248  maxy = max(indexes_y)
2249  tmp_array[resx:lengx, resy:lengy] = \
2250  contact_freqs[minx:maxx, miny:maxy]
2251  ret[(px, py)] = np.argwhere(
2252  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2253 
2254  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2255  origin='lower', alpha=0.6, interpolation='nearest')
2256 
2257  ax.set_xticks(xticks)
2258  ax.set_xticklabels(xlabels, rotation=90)
2259  ax.set_yticks(yticks)
2260  ax.set_yticklabels(ylabels)
2261  plt.setp(ax.get_xticklabels(), fontsize=6)
2262  plt.setp(ax.get_yticklabels(), fontsize=6)
2263 
2264  # display and write to file
2265  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2266  [i.set_linewidth(2.0) for i in ax.spines.values()]
2267 
2268  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2269  dpi=300, transparent="False")
2270  return ret
2271 
2272  def plot_rmsd_matrix(self, filename):
2273  self.compute_all_pairwise_rmsd()
2274  distance_matrix = np.zeros(
2275  (len(self.stath0), len(self.stath1)))
2276  for (n0, n1) in self.pairwise_rmsd:
2277  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2278 
2279  import matplotlib as mpl
2280  mpl.use('Agg')
2281  import matplotlib.pylab as pl
2282  from scipy.cluster import hierarchy as hrc
2283 
2284  fig = pl.figure(figsize=(10, 8))
2285  ax = fig.add_subplot(212)
2286  dendrogram = hrc.dendrogram(
2287  hrc.linkage(distance_matrix),
2288  color_threshold=7,
2289  no_labels=True)
2290  leaves_order = dendrogram['leaves']
2291  ax.set_xlabel('Model')
2292  ax.set_ylabel('RMSD [Angstroms]')
2293 
2294  ax2 = fig.add_subplot(221)
2295  cax = ax2.imshow(
2296  distance_matrix[leaves_order, :][:, leaves_order],
2297  interpolation='nearest')
2298  cb = fig.colorbar(cax)
2299  cb.set_label('RMSD [Angstroms]')
2300  ax2.set_xlabel('Model')
2301  ax2.set_ylabel('Model')
2302 
2303  pl.savefig(filename, dpi=300)
2304  pl.close(fig)
2305 
2306  ####################
2307  # Internal Functions
2308  ####################
2309 
2310  def update_clusters(self):
2311  """
2312  Update the cluster id numbers
2313  """
2314  for n, c in enumerate(self.clusters):
2315  c.cluster_id = n
2316 
2317  def get_molecule(self, hier, name, copy):
2318  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2319  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2320 
2321  def update_seldicts(self):
2322  """
2323  Update the seldicts
2324  """
2325  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2326  self.sel0_rmsd.get_selected_particles())
2327  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2328  self.sel1_rmsd.get_selected_particles())
2329  for mol in self.seldict0:
2330  for sel in self.seldict0[mol]:
2331  self.issymmetricsel[sel] = False
2332  for mol in self.symmetric_molecules:
2333  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2334  for sel in self.seldict0[mol]:
2335  self.issymmetricsel[sel] = True
2336 
2337  def align(self):
2339  self.sel1_alignment, self.sel0_alignment)
2340 
2341  for rb in self.rbs1:
2342  IMP.core.transform(rb, tr)
2343 
2344  for bead in self.beads1:
2345  try:
2346  IMP.core.transform(IMP.core.XYZ(bead), tr)
2347  except: # noqa: E722
2348  continue
2349 
2350  self.model.update()
2351 
2352  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2353  '''
2354  initial filling of the clusters.
2355  '''
2356  n0 = idxs.pop()
2357  print("clustering model "+str(n0))
2358  d0 = self.stath0[n0]
2359  c = IMP.pmi.output.Cluster(len(self.clusters))
2360  print("creating cluster index "+str(len(self.clusters)))
2361  self.clusters.append(c)
2362  c.add_member(n0, d0)
2363  clustered = set([n0])
2364  for n1 in idxs:
2365  print("--- trying to add model " + str(n1) + " to cluster "
2366  + str(len(self.clusters)))
2367  d1 = self.stath1[n1]
2368  if self.alignment:
2369  self.align()
2370  rmsd, _ = self.rmsd(metric=metric)
2371  if rmsd < rmsd_cutoff:
2372  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2373  c.add_member(n1, d1)
2374  clustered.add(n1)
2375  else:
2376  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2377  idxs -= clustered
2378 
2379  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2380  """
2381  merge the clusters that have close members
2382  @param rmsd_cutoff cutoff distance in Angstorms
2383  """
2384  # before merging, clusters are spheres of radius rmsd_cutoff
2385  # centered on the 1st element
2386  # here we only try to merge clusters whose centers are closer
2387  # than 2*rmsd_cutoff
2388  to_merge = []
2389  print("merging...")
2390  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2391  itertools.combinations(self.clusters, 2)):
2392  n0, n1 = [c.members[0] for c in (c0, c1)]
2393  _ = self.stath0[n0]
2394  _ = self.stath1[n1]
2395  rmsd, _ = self.rmsd()
2396  if (rmsd < 2*rmsd_cutoff and
2397  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2398  to_merge.append((c0, c1))
2399 
2400  for c0, c in reversed(to_merge):
2401  self.merge(c0, c)
2402 
2403  # keep only full clusters
2404  self.clusters = [c for c in
2405  filter(lambda x: len(x.members) > 0, self.clusters)]
2406 
2407  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2408  '''
2409  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2410  '''
2411  print("check close members for clusters " + str(c0.cluster_id) +
2412  " and " + str(c1.cluster_id))
2413  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2414  _ = self.stath0[n0]
2415  _ = self.stath1[n1]
2416  rmsd, _ = self.rmsd(metric=metric)
2417  if rmsd < rmsd_cutoff:
2418  return True
2419 
2420  return False
2421 
2422  def merge(self, c0, c1):
2423  '''
2424  merge two clusters
2425  '''
2426  c0 += c1
2427  c1.members = []
2428  c1.data = {}
2429 
2430  def rmsd_helper(self, sels0, sels1, metric):
2431  '''
2432  a function that returns the permutation best_sel of sels0 that
2433  minimizes metric
2434  '''
2435  best_rmsd2 = float('inf')
2436  best_sel = None
2437  if self.issymmetricsel[sels0[0]]:
2438  # this cases happens when symmetries were defined
2439  N = len(sels0)
2440  for offset in range(N):
2441  sels = [sels0[(offset+i) % N] for i in range(N)]
2442  sel0 = sels[0]
2443  sel1 = sels1[0]
2444  r = metric(sel0, sel1)
2445  rmsd2 = r*r*N
2446  if rmsd2 < best_rmsd2:
2447  best_rmsd2 = rmsd2
2448  best_sel = sels
2449  else:
2450  for sels in itertools.permutations(sels0):
2451  rmsd2 = 0.0
2452  for sel0, sel1 in itertools.takewhile(
2453  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2454  r = metric(sel0, sel1)
2455  rmsd2 += r*r
2456  if rmsd2 < best_rmsd2:
2457  best_rmsd2 = rmsd2
2458  best_sel = sels
2459  return best_sel, best_rmsd2
2460 
2461  def compute_all_pairwise_rmsd(self):
2462  for d0 in self.stath0:
2463  for d1 in self.stath1:
2464  rmsd, _ = self.rmsd()
2465 
2466  def rmsd(self, metric=IMP.atom.get_rmsd):
2467  '''
2468  Computes the RMSD. Resolves ambiguous pairs assignments
2469  '''
2470  # here we memoize the rmsd and molecular assignment so that it's
2471  # not done multiple times
2472  n0 = self.stath0.current_index
2473  n1 = self.stath1.current_index
2474  if ((n0, n1) in self.pairwise_rmsd) \
2475  and ((n0, n1) in self.pairwise_molecular_assignment):
2476  return (self.pairwise_rmsd[(n0, n1)],
2477  self.pairwise_molecular_assignment[(n0, n1)])
2478 
2479  if self.alignment:
2480  self.align()
2481  # if it's not yet memoized
2482  total_rmsd = 0.0
2483  total_N = 0
2484  # this is a dictionary which keys are the molecule names, and values
2485  # are the list of IMP.atom.Selection for all molecules that share
2486  # the molecule name
2487  molecular_assignment = {}
2488  for molname, sels0 in self.seldict0.items():
2489  sels_best_order, best_rmsd2 = \
2490  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2491 
2492  Ncoords = len(sels_best_order[0].get_selected_particles())
2493  Ncopies = len(self.seldict1[molname])
2494  total_rmsd += Ncoords*best_rmsd2
2495  total_N += Ncoords*Ncopies
2496 
2497  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2498  p0 = sel0.get_selected_particles()[0]
2499  p1 = sel1.get_selected_particles()[0]
2500  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2501  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2502  c0 = IMP.atom.Copy(m0).get_copy_index()
2503  c1 = IMP.atom.Copy(m1).get_copy_index()
2504  molecular_assignment[(molname, c0)] = (molname, c1)
2505 
2506  total_rmsd = math.sqrt(total_rmsd/total_N)
2507 
2508  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2509  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2510  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2511  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2512  return total_rmsd, molecular_assignment
2513 
2514  def set_reference(self, reference, cluster):
2515  """
2516  Fix the reference structure for structural alignment, rmsd and
2517  chain assignemnt
2518 
2519  @param reference can be either "Absolute" (cluster center of the
2520  first cluster) or Relative (cluster center of the current
2521  cluster)
2522  """
2523  if reference == "Absolute":
2524  _ = self.stath0[0]
2525  elif reference == "Relative":
2526  if cluster.center_index:
2527  n0 = cluster.center_index
2528  else:
2529  n0 = cluster.members[0]
2530  _ = self.stath0[n0]
2531 
2533  """
2534  compute the molecular assignemnts between multiple copies
2535  of the same sequence. It changes the Copy index of Molecules
2536  """
2537  _ = self.stath1[n1]
2538  _, molecular_assignment = self.rmsd()
2539  for (m0, c0), (m1, c1) in molecular_assignment.items():
2540  mol0 = self.molcopydict0[m0][c0]
2541  mol1 = self.molcopydict1[m1][c1]
2542  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2543  p1 = IMP.atom.Copy(mol1).get_particle()
2544  p1.set_value(cik0, c0)
2545 
2547  """
2548  Undo the Copy index assignment
2549  """
2550  _ = self.stath1[n1]
2551  _, molecular_assignment = self.rmsd()
2552  for (m0, c0), (m1, c1) in molecular_assignment.items():
2553  mol0 = self.molcopydict0[m0][c0]
2554  mol1 = self.molcopydict1[m1][c1]
2555  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2556  p1 = IMP.atom.Copy(mol1).get_particle()
2557  p1.set_value(cik0, c1)
2558 
2559  ####################
2560  # Container Functions
2561  ####################
2562 
2563  def __repr__(self):
2564  s = "AnalysisReplicaExchange\n"
2565  s += "---- number of clusters %s \n" % str(len(self.clusters))
2566  s += "---- number of models %s \n" % str(len(self.stath0))
2567  return s
2568 
2569  def __getitem__(self, int_slice_adaptor):
2570  if isinstance(int_slice_adaptor, int):
2571  return self.clusters[int_slice_adaptor]
2572  elif isinstance(int_slice_adaptor, slice):
2573  return self.__iter__(int_slice_adaptor)
2574  else:
2575  raise TypeError("Unknown Type")
2576 
2577  def __len__(self):
2578  return len(self.clusters)
2579 
2580  def __iter__(self, slice_key=None):
2581  if slice_key is None:
2582  for i in range(len(self)):
2583  yield self[i]
2584  else:
2585  for i in range(len(self))[slice_key]:
2586  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2466
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2514
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1795
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1135
def precision
Compute the precision of a cluster.
Definition: macros.py:1896
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:885
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:111
A container for models organized into clusters.
Definition: output.py:1522
Sample using molecular dynamics.
Definition: samplers.py:397
def aggregate
initial filling of the clusters.
Definition: macros.py:2352
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:947
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:616
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:617
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1861
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1945
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1232
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1652
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1733
def __init__
Construction of the Class.
Definition: macros.py:1607
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:790
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1750
def __init__
Constructor.
Definition: macros.py:129
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2546
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1668
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2430
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1836
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1029
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2532
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1596
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:584
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2379
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:938
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1818
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:949
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2321
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2310
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:612
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1697
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:203
Collect timing information.
Definition: tools.py:126
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1661
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:10594
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2422
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:635
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1269
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1743
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:801
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1925
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:613
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1684
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1783
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2407
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
Definition: macros.py:62
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1208
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1758
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:569
Sample using replica exchange.
Definition: samplers.py:502
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27