IMP logo
IMP Reference Guide  2.19.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 try:
18  from pathlib import Path
19 except ImportError: # Use bundled pathlib on Python 2 without pathlib
20  from IMP._compat_pathlib import Path
21 import glob
22 from operator import itemgetter
23 from collections import defaultdict
24 import numpy as np
25 import itertools
26 import warnings
27 import math
28 
29 
30 class _MockMPIValues(object):
31  """Replace samplers.MPI_values when in test mode"""
32  def get_percentile(self, name):
33  return 0.
34 
35 
36 class _RMFRestraints(object):
37  """All restraints that are written out to the RMF file"""
38  def __init__(self, model, user_restraints):
39  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
40  self._user_restraints = user_restraints if user_restraints else []
41 
42  def __len__(self):
43  return (len(self._user_restraints)
44  + self._rmf_rs.get_number_of_restraints())
45 
46  def __bool__(self):
47  return len(self) > 0
48  __nonzero__ = __bool__ # Python 2 compatibility
49 
50  def __getitem__(self, i):
51  class FakePMIWrapper(object):
52  def __init__(self, r):
53  self.r = IMP.RestraintSet.get_from(r)
54 
55  def get_restraint(self):
56  return self.r
57 
58  lenuser = len(self._user_restraints)
59  if 0 <= i < lenuser:
60  return self._user_restraints[i]
61  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62  r = self._rmf_rs.get_restraint(i - lenuser)
63  return FakePMIWrapper(r)
64  else:
65  raise IndexError("Out of range")
66 
67 
68 class ReplicaExchange(object):
69  """A macro to help setup and run replica exchange.
70  Supports Monte Carlo and molecular dynamics.
71  Produces trajectory RMF files, best PDB structures,
72  and output stat files.
73  """
74  def __init__(self, model, root_hier,
75  monte_carlo_sample_objects=None,
76  molecular_dynamics_sample_objects=None,
77  output_objects=[],
78  rmf_output_objects=None,
79  monte_carlo_temperature=1.0,
80  simulated_annealing=False,
81  simulated_annealing_minimum_temperature=1.0,
82  simulated_annealing_maximum_temperature=2.5,
83  simulated_annealing_minimum_temperature_nframes=100,
84  simulated_annealing_maximum_temperature_nframes=100,
85  replica_exchange_minimum_temperature=1.0,
86  replica_exchange_maximum_temperature=2.5,
87  replica_exchange_swap=True,
88  num_sample_rounds=1,
89  number_of_best_scoring_models=500,
90  monte_carlo_steps=10,
91  self_adaptive=False,
92  molecular_dynamics_steps=10,
93  molecular_dynamics_max_time_step=1.0,
94  number_of_frames=1000,
95  save_coordinates_mode="lowest_temperature",
96  nframes_write_coordinates=1,
97  write_initial_rmf=True,
98  initial_rmf_name_suffix="initial",
99  stat_file_name_suffix="stat",
100  best_pdb_name_suffix="model",
101  mmcif=False,
102  do_clean_first=True,
103  do_create_directories=True,
104  global_output_directory="./",
105  rmf_dir="rmfs/",
106  best_pdb_dir="pdbs/",
107  replica_stat_file_suffix="stat_replica",
108  em_object_for_rmf=None,
109  atomistic=False,
110  replica_exchange_object=None,
111  test_mode=False,
112  score_moved=False):
113  """Constructor.
114  @param model The IMP model
115  @param root_hier Top-level (System)hierarchy
116  @param monte_carlo_sample_objects Objects for MC sampling, which
117  should generally be a simple list of Mover objects, e.g.
118  from DegreesOfFreedom.get_movers().
119  @param molecular_dynamics_sample_objects Objects for MD sampling,
120  which should generally be a simple list of particles.
121  @param output_objects A list of structural objects and restraints
122  that will be included in output (ie, statistics "stat"
123  files). Any object that provides a get_output() method
124  can be used here. If None is passed
125  the macro will not write stat files.
126  @param rmf_output_objects A list of structural objects and
127  restraints that will be included in rmf. Any object
128  that provides a get_output() method can be used here.
129  @param monte_carlo_temperature MC temp (may need to be optimized
130  based on post-sampling analysis)
131  @param simulated_annealing If True, perform simulated annealing
132  @param simulated_annealing_minimum_temperature Should generally be
133  the same as monte_carlo_temperature.
134  @param simulated_annealing_minimum_temperature_nframes Number of
135  frames to compute at minimum temperature.
136  @param simulated_annealing_maximum_temperature_nframes Number of
137  frames to compute at
138  temps > simulated_annealing_maximum_temperature.
139  @param replica_exchange_minimum_temperature Low temp for REX; should
140  generally be the same as monte_carlo_temperature.
141  @param replica_exchange_maximum_temperature High temp for REX
142  @param replica_exchange_swap Boolean, enable disable temperature
143  swap (Default=True)
144  @param num_sample_rounds Number of rounds of MC/MD per cycle
145  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146  models to keep around for analysis.
147  @param mmcif If True, write best scoring models in mmCIF format;
148  if False (the default), write in legacy PDB format.
149  @param best_pdb_dir The directory under `global_output_directory`
150  where best-scoring PDB/mmCIF files are written.
151  @param best_pdb_name_suffix Part of the file name for best-scoring
152  PDB/mmCIF files.
153  @param monte_carlo_steps Number of MC steps per round
154  @param self_adaptive self adaptive scheme for Monte Carlo movers
155  @param molecular_dynamics_steps Number of MD steps per round
156  @param molecular_dynamics_max_time_step Max time step for MD
157  @param number_of_frames Number of REX frames to run
158  @param save_coordinates_mode string: how to save coordinates.
159  "lowest_temperature" (default) only the lowest temperatures
160  is saved
161  "25th_score" all replicas whose score is below the 25th
162  percentile
163  "50th_score" all replicas whose score is below the 50th
164  percentile
165  "75th_score" all replicas whose score is below the 75th
166  percentile
167  @param nframes_write_coordinates How often to write the coordinates
168  of a frame
169  @param write_initial_rmf Write the initial configuration
170  @param global_output_directory Folder that will be created to house
171  output.
172  @param test_mode Set to True to avoid writing any files, just test
173  one frame.
174  @param score_moved If True, attempt to speed up Monte Carlo
175  sampling by caching scoring function terms on particles
176  that didn't move.
177  """
178  self.model = model
179  self.vars = {}
180 
181  # add check hierarchy is multistate
182  if output_objects == []:
183  # The "[]" in the default parameters is a global object, so make
184  # our own copy here
185  self.output_objects = []
186  else:
187  self.output_objects = output_objects
188  self.rmf_output_objects = rmf_output_objects
189  if (isinstance(root_hier, IMP.atom.Hierarchy)
190  and not root_hier.get_parent()):
191  if self.output_objects is not None:
192  self.output_objects.append(
193  IMP.pmi.io.TotalScoreOutput(self.model))
194  if self.rmf_output_objects is not None:
195  self.rmf_output_objects.append(
196  IMP.pmi.io.TotalScoreOutput(self.model))
197  self.root_hier = root_hier
198  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
199  self.vars["number_of_states"] = len(states)
200  if len(states) > 1:
201  self.root_hiers = states
202  self.is_multi_state = True
203  else:
204  self.root_hier = root_hier
205  self.is_multi_state = False
206  else:
207  raise TypeError("Must provide System hierarchy (root_hier)")
208 
209  self._rmf_restraints = _RMFRestraints(model, None)
210  self.em_object_for_rmf = em_object_for_rmf
211  self.monte_carlo_sample_objects = monte_carlo_sample_objects
212  self.vars["self_adaptive"] = self_adaptive
213  self.molecular_dynamics_sample_objects = \
214  molecular_dynamics_sample_objects
215  self.replica_exchange_object = replica_exchange_object
216  self.molecular_dynamics_max_time_step = \
217  molecular_dynamics_max_time_step
218  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
219  self.vars["replica_exchange_minimum_temperature"] = \
220  replica_exchange_minimum_temperature
221  self.vars["replica_exchange_maximum_temperature"] = \
222  replica_exchange_maximum_temperature
223  self.vars["replica_exchange_swap"] = replica_exchange_swap
224  self.vars["simulated_annealing"] = simulated_annealing
225  self.vars["simulated_annealing_minimum_temperature"] = \
226  simulated_annealing_minimum_temperature
227  self.vars["simulated_annealing_maximum_temperature"] = \
228  simulated_annealing_maximum_temperature
229  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
230  simulated_annealing_minimum_temperature_nframes
231  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
232  simulated_annealing_maximum_temperature_nframes
233 
234  self.vars["num_sample_rounds"] = num_sample_rounds
235  self.vars[
236  "number_of_best_scoring_models"] = number_of_best_scoring_models
237  self.vars["monte_carlo_steps"] = monte_carlo_steps
238  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
239  self.vars["number_of_frames"] = number_of_frames
240  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
241  "50th_score", "75th_score"):
242  raise Exception("save_coordinates_mode has unrecognized value")
243  else:
244  self.vars["save_coordinates_mode"] = save_coordinates_mode
245  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
246  self.vars["write_initial_rmf"] = write_initial_rmf
247  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
248  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
249  self.vars["mmcif"] = mmcif
250  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
251  self.vars["do_clean_first"] = do_clean_first
252  self.vars["do_create_directories"] = do_create_directories
253  self.vars["global_output_directory"] = global_output_directory
254  self.vars["rmf_dir"] = rmf_dir
255  self.vars["best_pdb_dir"] = best_pdb_dir
256  self.vars["atomistic"] = atomistic
257  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
258  self.vars["geometries"] = None
259  self.test_mode = test_mode
260  self.score_moved = score_moved
261 
262  def add_geometries(self, geometries):
263  if self.vars["geometries"] is None:
264  self.vars["geometries"] = list(geometries)
265  else:
266  self.vars["geometries"].extend(geometries)
267 
268  def show_info(self):
269  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
270  "rmfs/*.rmf3 for each replica ")
271  print("--- it stores the best scoring pdb models in pdbs/")
272  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
273  "lowest temperature")
274  print("--- variables:")
275  keys = list(self.vars.keys())
276  keys.sort()
277  for v in keys:
278  print("------", v.ljust(30), self.vars[v])
279 
280  def get_replica_exchange_object(self):
281  return self.replica_exchange_object
282 
283  def _add_provenance(self, sampler_md, sampler_mc):
284  """Record details about the sampling in the IMP Hierarchies"""
285  iterations = 0
286  if sampler_md:
287  method = "Molecular Dynamics"
288  iterations += self.vars["molecular_dynamics_steps"]
289  if sampler_mc:
290  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
291  iterations += self.vars["monte_carlo_steps"]
292  # If no sampling is actually done, no provenance to write
293  if iterations == 0 or self.vars["number_of_frames"] == 0:
294  return
295  iterations *= self.vars["num_sample_rounds"]
296 
297  pi = self.model.add_particle("sampling")
299  self.model, pi, method, self.vars["number_of_frames"],
300  iterations)
301  p.set_number_of_replicas(
302  self.replica_exchange_object.get_number_of_replicas())
303  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
304  IMP.core.add_provenance(self.model, self.root_hier, p)
305 
306  def execute_macro(self):
307  temp_index_factor = 100000.0
308  samplers = []
309  sampler_mc = None
310  sampler_md = None
311  if self.monte_carlo_sample_objects is not None:
312  print("Setting up MonteCarlo")
313  sampler_mc = IMP.pmi.samplers.MonteCarlo(
314  self.model, self.monte_carlo_sample_objects,
315  self.vars["monte_carlo_temperature"],
316  score_moved=self.score_moved)
317  if self.vars["simulated_annealing"]:
318  tmin = self.vars["simulated_annealing_minimum_temperature"]
319  tmax = self.vars["simulated_annealing_maximum_temperature"]
320  nfmin = self.vars[
321  "simulated_annealing_minimum_temperature_nframes"]
322  nfmax = self.vars[
323  "simulated_annealing_maximum_temperature_nframes"]
324  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
325  if self.vars["self_adaptive"]:
326  sampler_mc.set_self_adaptive(
327  isselfadaptive=self.vars["self_adaptive"])
328  if self.output_objects is not None:
329  self.output_objects.append(sampler_mc)
330  if self.rmf_output_objects is not None:
331  self.rmf_output_objects.append(sampler_mc)
332  samplers.append(sampler_mc)
333 
334  if self.molecular_dynamics_sample_objects is not None:
335  print("Setting up MolecularDynamics")
337  self.model, self.molecular_dynamics_sample_objects,
338  self.vars["monte_carlo_temperature"],
339  maximum_time_step=self.molecular_dynamics_max_time_step)
340  if self.vars["simulated_annealing"]:
341  tmin = self.vars["simulated_annealing_minimum_temperature"]
342  tmax = self.vars["simulated_annealing_maximum_temperature"]
343  nfmin = self.vars[
344  "simulated_annealing_minimum_temperature_nframes"]
345  nfmax = self.vars[
346  "simulated_annealing_maximum_temperature_nframes"]
347  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
348  if self.output_objects is not None:
349  self.output_objects.append(sampler_md)
350  if self.rmf_output_objects is not None:
351  self.rmf_output_objects.append(sampler_md)
352  samplers.append(sampler_md)
353 # -------------------------------------------------------------------------
354 
355  print("Setting up ReplicaExchange")
357  self.model, self.vars["replica_exchange_minimum_temperature"],
358  self.vars["replica_exchange_maximum_temperature"], samplers,
359  replica_exchange_object=self.replica_exchange_object)
360  self.replica_exchange_object = rex.rem
361 
362  myindex = rex.get_my_index()
363  if self.output_objects is not None:
364  self.output_objects.append(rex)
365  if self.rmf_output_objects is not None:
366  self.rmf_output_objects.append(rex)
367  # must reset the minimum temperature due to the
368  # different binary length of rem.get_my_parameter double and python
369  # float
370  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
371 
372 # -------------------------------------------------------------------------
373 
374  globaldir = self.vars["global_output_directory"] + "/"
375  rmf_dir = globaldir + self.vars["rmf_dir"]
376  pdb_dir = globaldir + self.vars["best_pdb_dir"]
377 
378  if not self.test_mode:
379  if self.vars["do_clean_first"]:
380  pass
381 
382  if self.vars["do_create_directories"]:
383 
384  try:
385  os.makedirs(globaldir)
386  except: # noqa: E722
387  pass
388  try:
389  os.makedirs(rmf_dir)
390  except: # noqa: E722
391  pass
392 
393  if not self.is_multi_state:
394  try:
395  os.makedirs(pdb_dir)
396  except: # noqa: E722
397  pass
398  else:
399  for n in range(self.vars["number_of_states"]):
400  try:
401  os.makedirs(pdb_dir + "/" + str(n))
402  except: # noqa: E722
403  pass
404 
405 # -------------------------------------------------------------------------
406 
408  if self.output_objects is not None:
409  self.output_objects.append(sw)
410  if self.rmf_output_objects is not None:
411  self.rmf_output_objects.append(sw)
412 
413  print("Setting up stat file")
414  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
415  low_temp_stat_file = globaldir + \
416  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
417 
418  # Ensure model is updated before saving init files
419  if not self.test_mode:
420  self.model.update()
421 
422  if not self.test_mode:
423  if self.output_objects is not None:
424  output.init_stat2(low_temp_stat_file,
425  self.output_objects,
426  extralabels=["rmf_file", "rmf_frame_index"])
427  else:
428  print("Stat file writing is disabled")
429 
430  if self.rmf_output_objects is not None:
431  print("Stat info being written in the rmf file")
432 
433  print("Setting up replica stat file")
434  replica_stat_file = globaldir + \
435  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
436  if not self.test_mode:
437  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
438 
439  if not self.test_mode:
440  print("Setting up best pdb files")
441  if not self.is_multi_state:
442  if self.vars["number_of_best_scoring_models"] > 0:
443  output.init_pdb_best_scoring(
444  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
445  self.root_hier,
446  self.vars["number_of_best_scoring_models"],
447  replica_exchange=True,
448  mmcif=self.vars['mmcif'],
449  best_score_file=globaldir + "best.scores.rex.py")
450  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
451  output.write_psf(
452  pdb_dir + "/" + "model.psf",
453  pdb_dir + "/" +
454  self.vars["best_pdb_name_suffix"] + pdbext)
455  else:
456  if self.vars["number_of_best_scoring_models"] > 0:
457  for n in range(self.vars["number_of_states"]):
458  output.init_pdb_best_scoring(
459  pdb_dir + "/" + str(n) + "/" +
460  self.vars["best_pdb_name_suffix"],
461  self.root_hiers[n],
462  self.vars["number_of_best_scoring_models"],
463  replica_exchange=True,
464  mmcif=self.vars['mmcif'],
465  best_score_file=globaldir + "best.scores.rex.py")
466  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
467  output.write_psf(
468  pdb_dir + "/" + str(n) + "/" + "model.psf",
469  pdb_dir + "/" + str(n) + "/" +
470  self.vars["best_pdb_name_suffix"] + pdbext)
471 # ---------------------------------------------
472 
473  if self.em_object_for_rmf is not None:
474  output_hierarchies = [
475  self.root_hier,
476  self.em_object_for_rmf.get_density_as_hierarchy(
477  )]
478  else:
479  output_hierarchies = [self.root_hier]
480 
481  if not self.test_mode:
482  print("Setting up and writing initial rmf coordinate file")
483  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
484  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
485  output_hierarchies,
486  listofobjects=self.rmf_output_objects)
487  if self._rmf_restraints:
488  output.add_restraints_to_rmf(
489  init_suffix + "." + str(myindex) + ".rmf3",
490  self._rmf_restraints)
491  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
492  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
493 
494  if not self.test_mode:
495  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
496  else:
497  mpivs = _MockMPIValues()
498 
499  self._add_provenance(sampler_md, sampler_mc)
500 
501  if not self.test_mode:
502  print("Setting up production rmf files")
503  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
504  output.init_rmf(rmfname, output_hierarchies,
505  geometries=self.vars["geometries"],
506  listofobjects=self.rmf_output_objects)
507 
508  if self._rmf_restraints:
509  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
510 
511  ntimes_at_low_temp = 0
512 
513  if myindex == 0:
514  self.show_info()
515  self.replica_exchange_object.set_was_used(True)
516  nframes = self.vars["number_of_frames"]
517  if self.test_mode:
518  nframes = 1
519  for i in range(nframes):
520  if self.test_mode:
521  score = 0.
522  else:
523  for nr in range(self.vars["num_sample_rounds"]):
524  if sampler_md is not None:
525  sampler_md.optimize(
526  self.vars["molecular_dynamics_steps"])
527  if sampler_mc is not None:
528  sampler_mc.optimize(self.vars["monte_carlo_steps"])
530  self.model).evaluate(False)
531  mpivs.set_value("score", score)
532  output.set_output_entry("score", score)
533 
534  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
535 
536  if self.vars["save_coordinates_mode"] == "lowest_temperature":
537  save_frame = (min_temp_index == my_temp_index)
538  elif self.vars["save_coordinates_mode"] == "25th_score":
539  score_perc = mpivs.get_percentile("score")
540  save_frame = (score_perc*100.0 <= 25.0)
541  elif self.vars["save_coordinates_mode"] == "50th_score":
542  score_perc = mpivs.get_percentile("score")
543  save_frame = (score_perc*100.0 <= 50.0)
544  elif self.vars["save_coordinates_mode"] == "75th_score":
545  score_perc = mpivs.get_percentile("score")
546  save_frame = (score_perc*100.0 <= 75.0)
547 
548  # Ensure model is updated before saving output files
549  if save_frame and not self.test_mode:
550  self.model.update()
551 
552  if save_frame:
553  print("--- frame %s score %s " % (str(i), str(score)))
554 
555  if not self.test_mode:
556  if i % self.vars["nframes_write_coordinates"] == 0:
557  print('--- writing coordinates')
558  if self.vars["number_of_best_scoring_models"] > 0:
559  output.write_pdb_best_scoring(score)
560  output.write_rmf(rmfname)
561  output.set_output_entry("rmf_file", rmfname)
562  output.set_output_entry("rmf_frame_index",
563  ntimes_at_low_temp)
564  else:
565  output.set_output_entry("rmf_file", rmfname)
566  output.set_output_entry("rmf_frame_index", '-1')
567  if self.output_objects is not None:
568  output.write_stat2(low_temp_stat_file)
569  ntimes_at_low_temp += 1
570 
571  if not self.test_mode:
572  output.write_stat2(replica_stat_file)
573  if self.vars["replica_exchange_swap"]:
574  rex.swap_temp(i, score)
575  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
576  p.add_replica_exchange(state, self)
577 
578  if not self.test_mode:
579  print("closing production rmf files")
580  output.close_rmf(rmfname)
581 
582 
583 class BuildSystem(object):
584  """A macro to build a IMP::pmi::topology::System based on a
585  TopologyReader object.
586 
587  Easily create multi-state systems by calling this macro
588  repeatedly with different TopologyReader objects!
589  A useful function is get_molecules() which returns the PMI Molecules
590  grouped by state as a dictionary with key = (molecule name),
591  value = IMP.pmi.topology.Molecule
592  Quick multi-state system:
593  @code{.python}
594  model = IMP.Model()
595  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
596  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
597  bs = IMP.pmi.macros.BuildSystem(model)
598  bs.add_state(reader1)
599  bs.add_state(reader2)
600  bs.execute_macro() # build everything including degrees of freedom
601  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
602  ### now you have a two state system, you add restraints etc
603  @endcode
604  @note The "domain name" entry of the topology reader is not used.
605  All molecules are set up by the component name, but split into rigid bodies
606  as requested.
607  """
608 
609  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
610  'RNA': IMP.pmi.alphabets.rna}
611 
612  def __init__(self, model, sequence_connectivity_scale=4.0,
613  force_create_gmm_files=False, resolutions=[1, 10],
614  name='System'):
615  """Constructor
616  @param model An IMP Model
617  @param sequence_connectivity_scale For scaling the connectivity
618  restraint
619  @param force_create_gmm_files If True, will sample and create GMMs
620  no matter what. If False, will only sample if the
621  files don't exist. If number of Gaussians is zero, won't
622  do anything.
623  @param resolutions The resolutions to build for structured regions
624  @param name The name of the top-level hierarchy node.
625  """
626  self.model = model
627  self.system = IMP.pmi.topology.System(self.model, name=name)
628  self._readers = [] # the TopologyReaders (one per state)
629  # TempResidues for each domain key=unique name,
630  # value=(atomic_res,non_atomic_res).
631  self._domain_res = []
632  self._domains = [] # key = domain unique name, value = Component
633  self.force_create_gmm_files = force_create_gmm_files
634  self.resolutions = resolutions
635 
636  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
637  chain_ids=None):
638  """Add a state using the topology info in a
639  IMP::pmi::topology::TopologyReader object.
640  When you are done adding states, call execute_macro()
641  @param reader The TopologyReader object
642  @param fasta_name_map dictionary for converting protein names
643  found in the fasta file
644  @param chain_ids A list or string of chain IDs for assigning to
645  newly-created molecules, e.g.
646  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
647  If not specified, chain IDs A through Z are assigned, then
648  AA through AZ, then BA through BZ, and so on, in the same
649  fashion as PDB.
650  """
651  state = self.system.create_state()
652  self._readers.append(reader)
653  # key is unique name, value is (atomic res, nonatomicres)
654  these_domain_res = {}
655  these_domains = {} # key is unique name, value is _Component
656  if chain_ids is None:
657  chain_ids = IMP.pmi.output._ChainIDs()
658  numchain = 0
659 
660  # setup representation
661  # loop over molecules, copies, then domains
662  for molname in reader.get_molecules():
663  copies = reader.get_molecules()[molname].domains
664  for nc, copyname in enumerate(copies):
665  print("BuildSystem.add_state: setting up molecule %s copy "
666  "number %s" % (molname, str(nc)))
667  copy = copies[copyname]
668  # option to not rename chains
669  if keep_chain_id:
670  all_chains = [c for c in copy if c.chain is not None]
671  if all_chains:
672  chain_id = all_chains[0].chain
673  else:
674  chain_id = chain_ids[numchain]
675  warnings.warn(
676  "No PDBs specified for %s, so keep_chain_id has "
677  "no effect; using default chain ID '%s'"
678  % (molname, chain_id), IMP.pmi.ParameterWarning)
679  else:
680  chain_id = chain_ids[numchain]
681  if nc == 0:
682  alphabet = IMP.pmi.alphabets.amino_acid
683  fasta_flag = copy[0].fasta_flag
684  if fasta_flag in self._alphabets:
685  alphabet = self._alphabets[fasta_flag]
687  copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
688  print("BuildSystem.add_state: molecule %s sequence has "
689  "%s residues" % (molname, len(seq)))
690  orig_mol = state.create_molecule(molname, seq, chain_id,
691  alphabet=alphabet)
692  mol = orig_mol
693  numchain += 1
694  else:
695  print("BuildSystem.add_state: creating a copy for "
696  "molecule %s" % molname)
697  mol = orig_mol.create_copy(chain_id)
698  numchain += 1
699 
700  for domainnumber, domain in enumerate(copy):
701  print("BuildSystem.add_state: ---- setting up domain %s "
702  "of molecule %s" % (domainnumber, molname))
703  # we build everything in the residue range, even if it
704  # extends beyond what's in the actual PDB file
705  these_domains[domain.get_unique_name()] = domain
706  if domain.residue_range == [] or \
707  domain.residue_range is None:
708  domain_res = mol.get_residues()
709  else:
710  start = domain.residue_range[0]+domain.pdb_offset
711  if domain.residue_range[1] == 'END':
712  end = len(mol.sequence)
713  else:
714  end = domain.residue_range[1]+domain.pdb_offset
715  domain_res = mol.residue_range(start-1, end-1)
716  print("BuildSystem.add_state: -------- domain %s of "
717  "molecule %s extends from residue %s to "
718  "residue %s "
719  % (domainnumber, molname, start, end))
720  if domain.pdb_file == "BEADS":
721  print("BuildSystem.add_state: -------- domain %s of "
722  "molecule %s represented by BEADS "
723  % (domainnumber, molname))
724  mol.add_representation(
725  domain_res,
726  resolutions=[domain.bead_size],
727  setup_particles_as_densities=(
728  domain.em_residues_per_gaussian != 0),
729  color=domain.color)
730  these_domain_res[domain.get_unique_name()] = \
731  (set(), domain_res)
732  elif domain.pdb_file == "IDEAL_HELIX":
733  print("BuildSystem.add_state: -------- domain %s of "
734  "molecule %s represented by IDEAL_HELIX "
735  % (domainnumber, molname))
736  emper = domain.em_residues_per_gaussian
737  mol.add_representation(
738  domain_res,
739  resolutions=self.resolutions,
740  ideal_helix=True,
741  density_residues_per_component=emper,
742  density_prefix=domain.density_prefix,
743  density_force_compute=self.force_create_gmm_files,
744  color=domain.color)
745  these_domain_res[domain.get_unique_name()] = \
746  (domain_res, set())
747  else:
748  print("BuildSystem.add_state: -------- domain %s of "
749  "molecule %s represented by pdb file %s "
750  % (domainnumber, molname, domain.pdb_file))
751  domain_atomic = mol.add_structure(domain.pdb_file,
752  domain.chain,
753  domain.residue_range,
754  domain.pdb_offset,
755  soft_check=True)
756  domain_non_atomic = domain_res - domain_atomic
757  if not domain.em_residues_per_gaussian:
758  mol.add_representation(
759  domain_atomic, resolutions=self.resolutions,
760  color=domain.color)
761  if len(domain_non_atomic) > 0:
762  mol.add_representation(
763  domain_non_atomic,
764  resolutions=[domain.bead_size],
765  color=domain.color)
766  else:
767  print("BuildSystem.add_state: -------- domain %s "
768  "of molecule %s represented by gaussians "
769  % (domainnumber, molname))
770  emper = domain.em_residues_per_gaussian
771  creategmm = self.force_create_gmm_files
772  mol.add_representation(
773  domain_atomic,
774  resolutions=self.resolutions,
775  density_residues_per_component=emper,
776  density_prefix=domain.density_prefix,
777  density_force_compute=creategmm,
778  color=domain.color)
779  if len(domain_non_atomic) > 0:
780  mol.add_representation(
781  domain_non_atomic,
782  resolutions=[domain.bead_size],
783  setup_particles_as_densities=True,
784  color=domain.color)
785  these_domain_res[domain.get_unique_name()] = (
786  domain_atomic, domain_non_atomic)
787  self._domain_res.append(these_domain_res)
788  self._domains.append(these_domains)
789  print('BuildSystem.add_state: State', len(self.system.states), 'added')
790  return state
791 
792  def get_molecules(self):
793  """Return list of all molecules grouped by state.
794  For each state, it's a dictionary of Molecules where key is the
795  molecule name
796  """
797  return [s.get_molecules() for s in self.system.get_states()]
798 
799  def get_molecule(self, molname, copy_index=0, state_index=0):
800  return self.system.get_states()[state_index].get_molecules()[
801  molname][copy_index]
802 
803  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
804  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
805  """Builds representations and sets up degrees of freedom"""
806  print("BuildSystem.execute_macro: building representations")
807  self.root_hier = self.system.build()
808 
809  print("BuildSystem.execute_macro: setting up degrees of freedom")
810  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
811  for nstate, reader in enumerate(self._readers):
812  rbs = reader.get_rigid_bodies()
813  srbs = reader.get_super_rigid_bodies()
814  csrbs = reader.get_chains_of_super_rigid_bodies()
815 
816  # add rigid bodies
817  domains_in_rbs = set()
818  for rblist in rbs:
819  print("BuildSystem.execute_macro: -------- building rigid "
820  "body %s" % (str(rblist)))
821  all_res = IMP.pmi.tools.OrderedSet()
822  bead_res = IMP.pmi.tools.OrderedSet()
823  for dname in rblist:
824  domain = self._domains[nstate][dname]
825  print("BuildSystem.execute_macro: -------- adding %s"
826  % (str(dname)))
827  all_res |= self._domain_res[nstate][dname][0]
828  bead_res |= self._domain_res[nstate][dname][1]
829  domains_in_rbs.add(dname)
830  all_res |= bead_res
831  print("BuildSystem.execute_macro: -------- creating rigid "
832  "body with max_trans %s max_rot %s "
833  "non_rigid_max_trans %s"
834  % (str(max_rb_trans), str(max_rb_rot),
835  str(max_bead_trans)))
836  self.dof.create_rigid_body(all_res,
837  nonrigid_parts=bead_res,
838  max_trans=max_rb_trans,
839  max_rot=max_rb_rot,
840  nonrigid_max_trans=max_bead_trans,
841  name="RigidBody %s" % dname)
842 
843  # if you have any domains not in an RB, set them as flexible beads
844  for dname, domain in self._domains[nstate].items():
845  if dname not in domains_in_rbs:
846  if domain.pdb_file != "BEADS":
847  warnings.warn(
848  "No rigid bodies set for %s. Residues read from "
849  "the PDB file will not be sampled - only regions "
850  "missing from the PDB will be treated flexibly. "
851  "To sample the entire sequence, use BEADS instead "
852  "of a PDB file name" % dname,
854  self.dof.create_flexible_beads(
855  self._domain_res[nstate][dname][1],
856  max_trans=max_bead_trans)
857 
858  # add super rigid bodies
859  for srblist in srbs:
860  print("BuildSystem.execute_macro: -------- building "
861  "super rigid body %s" % (str(srblist)))
862  all_res = IMP.pmi.tools.OrderedSet()
863  for dname in srblist:
864  print("BuildSystem.execute_macro: -------- adding %s"
865  % (str(dname)))
866  all_res |= self._domain_res[nstate][dname][0]
867  all_res |= self._domain_res[nstate][dname][1]
868 
869  print("BuildSystem.execute_macro: -------- creating super "
870  "rigid body with max_trans %s max_rot %s "
871  % (str(max_srb_trans), str(max_srb_rot)))
872  self.dof.create_super_rigid_body(
873  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
874 
875  # add chains of super rigid bodies
876  for csrblist in csrbs:
877  all_res = IMP.pmi.tools.OrderedSet()
878  for dname in csrblist:
879  all_res |= self._domain_res[nstate][dname][0]
880  all_res |= self._domain_res[nstate][dname][1]
881  all_res = list(all_res)
882  all_res.sort(key=lambda r: r.get_index())
883  self.dof.create_main_chain_mover(all_res)
884  return self.root_hier, self.dof
885 
886 
887 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
888 class AnalysisReplicaExchange0(object):
889  """A macro for running all the basic operations of analysis.
890  Includes clustering, precision analysis, and making ensemble density maps.
891  A number of plots are also supported.
892  """
893  def __init__(self, model,
894  merge_directories=["./"],
895  stat_file_name_suffix="stat",
896  best_pdb_name_suffix="model",
897  do_clean_first=True,
898  do_create_directories=True,
899  global_output_directory="output/",
900  replica_stat_file_suffix="stat_replica",
901  global_analysis_result_directory="./analysis/",
902  test_mode=False):
903  """Constructor.
904  @param model The IMP model
905  @param stat_file_name_suffix
906  @param merge_directories The directories containing output files
907  @param best_pdb_name_suffix
908  @param do_clean_first
909  @param do_create_directories
910  @param global_output_directory Where everything is
911  @param replica_stat_file_suffix
912  @param global_analysis_result_directory
913  @param test_mode If True, nothing is changed on disk
914  """
915 
916  try:
917  from mpi4py import MPI
918  self.comm = MPI.COMM_WORLD
919  self.rank = self.comm.Get_rank()
920  self.number_of_processes = self.comm.size
921  except ImportError:
922  self.rank = 0
923  self.number_of_processes = 1
924 
925  self.test_mode = test_mode
926  self._protocol_output = []
927  self.cluster_obj = None
928  self.model = model
929  stat_dir = global_output_directory
930  self.stat_files = []
931  # it contains the position of the root directories
932  for rd in merge_directories:
933  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
934  if len(stat_files) == 0:
935  warnings.warn("no stat files found in %s"
936  % os.path.join(rd, stat_dir),
938  self.stat_files += stat_files
939 
940  def add_protocol_output(self, p):
941  """Capture details of the modeling protocol.
942  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
943  """
944  # Assume last state is the one we're interested in
945  self._protocol_output.append((p, p._last_state))
946 
947  def get_modeling_trajectory(self,
948  score_key="Total_Score",
949  rmf_file_key="rmf_file",
950  rmf_file_frame_key="rmf_frame_index",
951  outputdir="./",
952  get_every=1,
953  nframes_trajectory=10000):
954  """ Get a trajectory of the modeling run, for generating
955  demonstrative movies
956 
957  @param score_key The score for ranking models
958  @param rmf_file_key Key pointing to RMF filename
959  @param rmf_file_frame_key Key pointing to RMF frame number
960  @param outputdir The local output directory used in the run
961  @param get_every Extract every nth frame
962  @param nframes_trajectory Total number of frames of the trajectory
963  """
964  import math
965 
966  trajectory_models = IMP.pmi.io.get_trajectory_models(
967  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
968  get_every)
969  score_list = list(map(float, trajectory_models[2]))
970 
971  max_score = max(score_list)
972  min_score = min(score_list)
973 
974  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
975  for i in range(nframes_trajectory)]
976  binned_scores = [None]*nframes_trajectory
977  binned_model_indexes = [-1]*nframes_trajectory
978 
979  for model_index, s in enumerate(score_list):
980  bins_score_diffs = [abs(s-b) for b in bins]
981  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
982  if binned_scores[bin_index] is None:
983  binned_scores[bin_index] = s
984  binned_model_indexes[bin_index] = model_index
985  else:
986  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
987  new_diff = abs(s-bins[bin_index])
988  if new_diff < old_diff:
989  binned_scores[bin_index] = s
990  binned_model_indexes[bin_index] = model_index
991 
992  print(binned_scores)
993  print(binned_model_indexes)
994 
995  def _expand_ambiguity(self, prot, d):
996  """If using PMI2, expand the dictionary to include copies as
997  ambiguous options
998 
999  This also keeps the states separate.
1000  """
1001  newdict = {}
1002  for key in d:
1003  val = d[key]
1004  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1005  newdict[key] = val
1006  continue
1007  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1008  if isinstance(val, tuple):
1009  start = val[0]
1010  stop = val[1]
1011  name = val[2]
1012  else:
1013  start = 1
1014  stop = -1
1015  name = val
1016  for nst in range(len(states)):
1017  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1018  copies = sel.get_selected_particles(with_representation=False)
1019  if len(copies) > 1:
1020  for nc in range(len(copies)):
1021  if len(states) > 1:
1022  newdict['%s.%i..%i' % (name, nst, nc)] = \
1023  (start, stop, name, nc, nst)
1024  else:
1025  newdict['%s..%i' % (name, nc)] = \
1026  (start, stop, name, nc, nst)
1027  else:
1028  newdict[key] = val
1029  return newdict
1030 
1031  def clustering(self,
1032  score_key="Total_Score",
1033  rmf_file_key="rmf_file",
1034  rmf_file_frame_key="rmf_frame_index",
1035  state_number=0,
1036  prefiltervalue=None,
1037  feature_keys=[],
1038  outputdir="./",
1039  alignment_components=None,
1040  number_of_best_scoring_models=10,
1041  rmsd_calculation_components=None,
1042  distance_matrix_file='distances.mat',
1043  load_distance_matrix_file=False,
1044  skip_clustering=False,
1045  number_of_clusters=1,
1046  display_plot=False,
1047  exit_after_display=True,
1048  get_every=1,
1049  first_and_last_frames=None,
1050  density_custom_ranges=None,
1051  write_pdb_with_centered_coordinates=False,
1052  voxel_size=5.0):
1053  """Get the best scoring models, compute a distance matrix,
1054  cluster them, and create density maps.
1055 
1056  Tuple format: "molname" just the molecule,
1057  or (start,stop,molname,copy_num(optional),state_num(optional)
1058  Can pass None for copy or state to ignore that field.
1059  If you don't pass a specific copy number
1060 
1061  @param score_key The score for ranking models.
1062  @param rmf_file_key Key pointing to RMF filename
1063  @param rmf_file_frame_key Key pointing to RMF frame number
1064  @param state_number State number to analyze
1065  @param prefiltervalue Only include frames where the
1066  score key is below this value
1067  @param feature_keys Keywords for which you want to
1068  calculate average, medians, etc.
1069  If you pass "Keyname" it'll include everything that matches
1070  "*Keyname*"
1071  @param outputdir The local output directory used in
1072  the run
1073  @param alignment_components Dictionary with keys=groupname,
1074  values are tuples for aligning the structures
1075  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1076  @param number_of_best_scoring_models Num models to keep per run
1077  @param rmsd_calculation_components For calculating RMSD
1078  (same format as alignment_components)
1079  @param distance_matrix_file Where to store/read the
1080  distance matrix
1081  @param load_distance_matrix_file Try to load the distance
1082  matrix file
1083  @param skip_clustering Just extract the best scoring
1084  models and save the pdbs
1085  @param number_of_clusters Number of k-means clusters
1086  @param display_plot Display the distance matrix
1087  @param exit_after_display Exit after displaying distance
1088  matrix
1089  @param get_every Extract every nth frame
1090  @param first_and_last_frames A tuple with the first and last
1091  frames to be analyzed. Values are percentages!
1092  Default: get all frames
1093  @param density_custom_ranges For density calculation
1094  (same format as alignment_components)
1095  @param write_pdb_with_centered_coordinates
1096  @param voxel_size Used for the density output
1097  """
1098  # Track provenance information to be added to each output model
1099  prov = []
1100  self._outputdir = Path(outputdir).absolute()
1101  self._number_of_clusters = number_of_clusters
1102  for p, state in self._protocol_output:
1103  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1104 
1105  if self.test_mode:
1106  return
1107 
1108  if self.rank == 0:
1109  try:
1110  os.mkdir(outputdir)
1111  except: # noqa: E722
1112  pass
1113 
1114  if not load_distance_matrix_file:
1115  if len(self.stat_files) == 0:
1116  print("ERROR: no stat file found in the given path")
1117  return
1118  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1119  self.stat_files, self.number_of_processes)[self.rank]
1120 
1121  # read ahead to check if you need the PMI2 score key instead
1122  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1123  if k in feature_keys:
1124  warnings.warn(
1125  "no need to pass " + k + " to feature_keys.",
1127  feature_keys.remove(k)
1128 
1129  best_models = IMP.pmi.io.get_best_models(
1130  my_stat_files, score_key, feature_keys, rmf_file_key,
1131  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1132  rmf_file_list = best_models[0]
1133  rmf_file_frame_list = best_models[1]
1134  score_list = best_models[2]
1135  feature_keyword_list_dict = best_models[3]
1136 
1137 # ------------------------------------------------------------------------
1138 # collect all the files and scores
1139 # ------------------------------------------------------------------------
1140 
1141  if self.number_of_processes > 1:
1142  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1143  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1144  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1145  rmf_file_frame_list)
1146  for k in feature_keyword_list_dict:
1147  feature_keyword_list_dict[k] = \
1149  feature_keyword_list_dict[k])
1150 
1151  # sort by score and get the best scoring ones
1152  score_rmf_tuples = list(zip(score_list,
1153  rmf_file_list,
1154  rmf_file_frame_list,
1155  list(range(len(score_list)))))
1156 
1157  if density_custom_ranges:
1158  for k in density_custom_ranges:
1159  if not isinstance(density_custom_ranges[k], list):
1160  raise Exception("Density custom ranges: values must "
1161  "be lists of tuples")
1162 
1163  # keep subset of frames if requested
1164  if first_and_last_frames is not None:
1165  nframes = len(score_rmf_tuples)
1166  first_frame = int(first_and_last_frames[0] * nframes)
1167  last_frame = int(first_and_last_frames[1] * nframes)
1168  if last_frame > len(score_rmf_tuples):
1169  last_frame = -1
1170  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1171 
1172  # sort RMFs by the score_key in ascending order, and store the rank
1173  best_score_rmf_tuples = sorted(
1174  score_rmf_tuples,
1175  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1176  best_score_rmf_tuples = [t+(n,) for n, t in
1177  enumerate(best_score_rmf_tuples)]
1178  # Note in the provenance info that we only kept best-scoring models
1179  prov.append(IMP.pmi.io.FilterProvenance(
1180  "Best scoring", 0, number_of_best_scoring_models))
1181  # sort the feature scores in the same way
1182  best_score_feature_keyword_list_dict = defaultdict(list)
1183  for tpl in best_score_rmf_tuples:
1184  index = tpl[3]
1185  for f in feature_keyword_list_dict:
1186  best_score_feature_keyword_list_dict[f].append(
1187  feature_keyword_list_dict[f][index])
1188  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1189  best_score_rmf_tuples,
1190  self.number_of_processes)[self.rank]
1191 
1192  # expand the dictionaries to include ambiguous copies
1193  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1194  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1195  if IMP.pmi.get_is_canonical(prot_ahead):
1196  if rmsd_calculation_components is not None:
1197  tmp = self._expand_ambiguity(
1198  prot_ahead, rmsd_calculation_components)
1199  if tmp != rmsd_calculation_components:
1200  print('Detected ambiguity, expand rmsd components to',
1201  tmp)
1202  rmsd_calculation_components = tmp
1203  if alignment_components is not None:
1204  tmp = self._expand_ambiguity(prot_ahead,
1205  alignment_components)
1206  if tmp != alignment_components:
1207  print('Detected ambiguity, expand alignment '
1208  'components to', tmp)
1209  alignment_components = tmp
1210 
1211 # -------------------------------------------------------------
1212 # read the coordinates
1213 # ------------------------------------------------------------
1214  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1215  self.model, my_best_score_rmf_tuples[0],
1216  rmsd_calculation_components, state_number=state_number)
1218  self.model, my_best_score_rmf_tuples, alignment_components,
1219  rmsd_calculation_components, state_number=state_number)
1220 
1221  # note! the coordinates are simply float tuples, NOT decorators,
1222  # NOT Vector3D, NOR particles, because these object cannot be
1223  # serialized. We need serialization
1224  # for the parallel computation based on mpi.
1225 
1226  # dict:key=component name,val=coords per hit
1227  all_coordinates = got_coords[0]
1228 
1229  # same as above, limited to alignment bits
1230  alignment_coordinates = got_coords[1]
1231 
1232  # same as above, limited to RMSD bits
1233  rmsd_coordinates = got_coords[2]
1234 
1235  # dictionary with key=RMF, value=score rank
1236  rmf_file_name_index_dict = got_coords[3]
1237 
1238  # RMF file per hit
1239  all_rmf_file_names = got_coords[4]
1240 
1241 # ------------------------------------------------------------------------
1242 # optionally don't compute distance matrix or cluster, just write top files
1243 # ------------------------------------------------------------------------
1244  if skip_clustering:
1245  if density_custom_ranges:
1246  DensModule = IMP.pmi.analysis.GetModelDensity(
1247  density_custom_ranges, voxel=voxel_size)
1248 
1249  dircluster = os.path.join(outputdir,
1250  "all_models."+str(self.rank))
1251  try:
1252  os.mkdir(outputdir)
1253  except: # noqa: E722
1254  pass
1255  try:
1256  os.mkdir(dircluster)
1257  except: # noqa: E722
1258  pass
1259  clusstat = open(os.path.join(
1260  dircluster, "stat."+str(self.rank)+".out"), "w")
1261  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1262  rmf_name = tpl[1]
1263  rmf_frame_number = tpl[2]
1264  tmp_dict = {}
1265  index = tpl[4]
1266  for key in best_score_feature_keyword_list_dict:
1267  tmp_dict[key] = \
1268  best_score_feature_keyword_list_dict[key][index]
1269 
1270  if cnt == 0:
1271  prots, rs = \
1272  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1273  self.model, rmf_frame_number, rmf_name)
1274  else:
1275  linking_successful = \
1276  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1277  self.model, prots, rs, rmf_frame_number,
1278  rmf_name)
1279  if not linking_successful:
1280  continue
1281 
1282  if not prots:
1283  continue
1284 
1285  if IMP.pmi.get_is_canonical(prots[0]):
1286  states = IMP.atom.get_by_type(
1287  prots[0], IMP.atom.STATE_TYPE)
1288  prot = states[state_number]
1289  else:
1290  prot = prots[state_number]
1291 
1292  # get transformation aligning coordinates of
1293  # requested tuples to the first RMF file
1294  if cnt == 0:
1295  coords_f1 = alignment_coordinates[cnt]
1296  if cnt > 0:
1297  coords_f2 = alignment_coordinates[cnt]
1298  if coords_f2:
1300  coords_f1, coords_f2)
1301  transformation = Ali.align()[1]
1302  else:
1303  transformation = \
1305 
1306  rbs = set()
1307  for p in IMP.atom.get_leaves(prot):
1308  if not IMP.core.XYZR.get_is_setup(p):
1310  IMP.core.XYZR(p).set_radius(0.0001)
1311  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1312 
1314  rbm = IMP.core.RigidBodyMember(p)
1315  rb = rbm.get_rigid_body()
1316  rbs.add(rb)
1317  else:
1319  transformation)
1320  for rb in rbs:
1321  IMP.core.transform(rb, transformation)
1322 
1323  o = IMP.pmi.output.Output()
1324  self.model.update()
1325  out_pdb_fn = os.path.join(
1326  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1327  out_rmf_fn = os.path.join(
1328  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1329  o.init_pdb(out_pdb_fn, prot)
1330  tc = write_pdb_with_centered_coordinates
1331  o.write_pdb(out_pdb_fn,
1332  translate_to_geometric_center=tc)
1333 
1334  tmp_dict["local_pdb_file_name"] = \
1335  os.path.basename(out_pdb_fn)
1336  tmp_dict["rmf_file_full_path"] = rmf_name
1337  tmp_dict["local_rmf_file_name"] = \
1338  os.path.basename(out_rmf_fn)
1339  tmp_dict["local_rmf_frame_number"] = 0
1340 
1341  clusstat.write(str(tmp_dict)+"\n")
1342 
1343  if IMP.pmi.get_is_canonical(prot):
1344  # create a single-state System and write that
1346  IMP.Particle(self.model))
1347  h.set_name("System")
1348  h.add_child(prot)
1349  o.init_rmf(out_rmf_fn, [h], rs)
1350  else:
1351  o.init_rmf(out_rmf_fn, [prot], rs)
1352 
1353  o.write_rmf(out_rmf_fn)
1354  o.close_rmf(out_rmf_fn)
1355  # add the density
1356  if density_custom_ranges:
1357  DensModule.add_subunits_density(prot)
1358 
1359  if density_custom_ranges:
1360  DensModule.write_mrc(path=dircluster)
1361  del DensModule
1362  return
1363 
1364  # broadcast the coordinates
1365  if self.number_of_processes > 1:
1366  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1367  all_coordinates)
1368  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1369  all_rmf_file_names)
1370  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1371  rmf_file_name_index_dict)
1372  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1373  alignment_coordinates)
1374  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1375  rmsd_coordinates)
1376 
1377  if self.rank == 0:
1378  # save needed informations in external files
1379  self.save_objects(
1380  [best_score_feature_keyword_list_dict,
1381  rmf_file_name_index_dict],
1382  ".macro.pkl")
1383 
1384 # ------------------------------------------------------------------------
1385 # Calculate distance matrix and cluster
1386 # ------------------------------------------------------------------------
1387  print("setup clustering class")
1388  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1389 
1390  for n, model_coordinate_dict in enumerate(all_coordinates):
1391  # let's try to align
1392  if (alignment_components is not None
1393  and len(self.cluster_obj.all_coords) == 0):
1394  # set the first model as template coordinates
1395  self.cluster_obj.set_template(alignment_coordinates[n])
1396  self.cluster_obj.fill(all_rmf_file_names[n],
1397  rmsd_coordinates[n])
1398  print("Global calculating the distance matrix")
1399 
1400  # calculate distance matrix, all against all
1401  self.cluster_obj.dist_matrix()
1402 
1403  # perform clustering and optionally display
1404  if self.rank == 0:
1405  self.cluster_obj.do_cluster(number_of_clusters)
1406  if display_plot:
1407  if self.rank == 0:
1408  self.cluster_obj.plot_matrix(
1409  figurename=os.path.join(outputdir,
1410  'dist_matrix.pdf'))
1411  if exit_after_display:
1412  exit()
1413  self.cluster_obj.save_distance_matrix_file(
1414  file_name=distance_matrix_file)
1415 
1416 # ------------------------------------------------------------------------
1417 # Alteratively, load the distance matrix from file and cluster that
1418 # ------------------------------------------------------------------------
1419  else:
1420  if self.rank == 0:
1421  print("setup clustering class")
1422  self.cluster_obj = IMP.pmi.analysis.Clustering()
1423  self.cluster_obj.load_distance_matrix_file(
1424  file_name=distance_matrix_file)
1425  print("clustering with %s clusters" % str(number_of_clusters))
1426  self.cluster_obj.do_cluster(number_of_clusters)
1427  [best_score_feature_keyword_list_dict,
1428  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1429  if display_plot:
1430  if self.rank == 0:
1431  self.cluster_obj.plot_matrix(figurename=os.path.join(
1432  outputdir, 'dist_matrix.pdf'))
1433  if exit_after_display:
1434  exit()
1435  if self.number_of_processes > 1:
1436  self.comm.Barrier()
1437 
1438 # ------------------------------------------------------------------------
1439 # now save all informations about the clusters
1440 # ------------------------------------------------------------------------
1441 
1442  if self.rank == 0:
1443  print(self.cluster_obj.get_cluster_labels())
1444  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1445  print("rank %s " % str(self.rank))
1446  print("cluster %s " % str(n))
1447  print("cluster label %s " % str(cl))
1448  print(self.cluster_obj.get_cluster_label_names(cl))
1449  cluster_size = \
1450  len(self.cluster_obj.get_cluster_label_names(cl))
1451  cluster_prov = \
1452  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1453 
1454  # first initialize the Density class if requested
1455  if density_custom_ranges:
1456  DensModule = IMP.pmi.analysis.GetModelDensity(
1457  density_custom_ranges,
1458  voxel=voxel_size)
1459 
1460  dircluster = outputdir + "/cluster." + str(n) + "/"
1461  try:
1462  os.mkdir(dircluster)
1463  except: # noqa: E722
1464  pass
1465 
1466  rmsd_dict = {
1467  "AVERAGE_RMSD":
1468  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1469  clusstat = open(dircluster + "stat.out", "w")
1470  for k, structure_name in enumerate(
1471  self.cluster_obj.get_cluster_label_names(cl)):
1472  # extract the features
1473  tmp_dict = {}
1474  tmp_dict.update(rmsd_dict)
1475  index = rmf_file_name_index_dict[structure_name]
1476  for key in best_score_feature_keyword_list_dict:
1477  tmp_dict[
1478  key] = best_score_feature_keyword_list_dict[
1479  key][
1480  index]
1481 
1482  # get the rmf name and the frame number from the list of
1483  # frame names
1484  rmf_name = structure_name.split("|")[0]
1485  rmf_frame_number = int(structure_name.split("|")[1])
1486  clusstat.write(str(tmp_dict) + "\n")
1487 
1488  # extract frame (open or link to existing)
1489  if k == 0:
1490  prots, rs = \
1491  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1492  self.model, rmf_frame_number, rmf_name)
1493  else:
1494  linking_successful = \
1495  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1496  self.model, prots, rs, rmf_frame_number,
1497  rmf_name)
1498  if not linking_successful:
1499  continue
1500  if not prots:
1501  continue
1502 
1503  if IMP.pmi.get_is_canonical(prots[0]):
1504  states = IMP.atom.get_by_type(
1505  prots[0], IMP.atom.STATE_TYPE)
1506  prot = states[state_number]
1507  else:
1508  prot = prots[state_number]
1509  if k == 0:
1510  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1511 
1512  # transform clusters onto first
1513  if k > 0:
1514  co = self.cluster_obj
1515  model_index = co.get_model_index_from_name(
1516  structure_name)
1517  transformation = co.get_transformation_to_first_member(
1518  cl, model_index)
1519  rbs = set()
1520  for p in IMP.atom.get_leaves(prot):
1521  if not IMP.core.XYZR.get_is_setup(p):
1523  IMP.core.XYZR(p).set_radius(0.0001)
1524  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1525 
1527  rbm = IMP.core.RigidBodyMember(p)
1528  rb = rbm.get_rigid_body()
1529  rbs.add(rb)
1530  else:
1532  transformation)
1533  for rb in rbs:
1534  IMP.core.transform(rb, transformation)
1535 
1536  # add the density
1537  if density_custom_ranges:
1538  DensModule.add_subunits_density(prot)
1539 
1540  # pdb writing should be optimized!
1541  o = IMP.pmi.output.Output()
1542  self.model.update()
1543  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1544  o.write_pdb(dircluster + str(k) + ".pdb")
1545 
1546  if IMP.pmi.get_is_canonical(prot):
1547  # create a single-state System and write that
1549  IMP.Particle(self.model))
1550  h.set_name("System")
1551  h.add_child(prot)
1552  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1553  else:
1554  o.init_rmf(dircluster + str(k) + ".rmf3", [prot], rs)
1555  o.write_rmf(dircluster + str(k) + ".rmf3")
1556  o.close_rmf(dircluster + str(k) + ".rmf3")
1557 
1558  del o
1559  # IMP.atom.destroy(prot)
1560 
1561  if density_custom_ranges:
1562  DensModule.write_mrc(path=dircluster)
1563  del DensModule
1564 
1565  if self.number_of_processes > 1:
1566  self.comm.Barrier()
1567 
1568  def get_cluster_rmsd(self, cluster_num):
1569  if self.cluster_obj is None:
1570  raise Exception("Run clustering first")
1571  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1572 
1573  def save_objects(self, objects, file_name):
1574  import pickle
1575  outf = open(file_name, 'wb')
1576  pickle.dump(objects, outf)
1577  outf.close()
1578 
1579  def load_objects(self, file_name):
1580  import pickle
1581  inputf = open(file_name, 'rb')
1582  objects = pickle.load(inputf)
1583  inputf.close()
1584  return objects
1585 
1586 
1588 
1589  """
1590  This class contains analysis utilities to investigate ReplicaExchange
1591  results.
1592  """
1593 
1594  ########################
1595  # Construction and Setup
1596  ########################
1597 
1598  def __init__(self, model, stat_files, best_models=None, score_key=None,
1599  alignment=True):
1600  """
1601  Construction of the Class.
1602  @param model IMP.Model()
1603  @param stat_files list of string. Can be ascii stat files,
1604  rmf files names
1605  @param best_models Integer. Number of best scoring models,
1606  if None: all models will be read
1607  @param alignment boolean (Default=True). Align before computing
1608  the rmsd.
1609  """
1610 
1611  self.model = model
1612  self.best_models = best_models
1614  model, stat_files, self.best_models, score_key, cache=True)
1616  StatHierarchyHandler=self.stath0)
1617 
1618  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1620  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1622  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1623  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1624  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1625  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1626  self.clusters = []
1627  # fill the cluster list with a single cluster containing all models
1628  c = IMP.pmi.output.Cluster(0)
1629  self.clusters.append(c)
1630  for n0 in range(len(self.stath0)):
1631  c.add_member(n0)
1632  self.pairwise_rmsd = {}
1633  self.pairwise_molecular_assignment = {}
1634  self.alignment = alignment
1635  self.symmetric_molecules = {}
1636  self.issymmetricsel = {}
1637  self.update_seldicts()
1638  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1639  IMP.atom.get_leaves(self.stath0))
1640  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1641  IMP.atom.get_leaves(self.stath1))
1642 
1643  def set_rmsd_selection(self, **kwargs):
1644  """
1645  Setup the selection onto which the rmsd is computed
1646  @param kwargs use IMP.atom.Selection keywords
1647  """
1648  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1649  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1650  self.update_seldicts()
1651 
1652  def set_symmetric(self, molecule_name):
1653  """
1654  Store names of symmetric molecules
1655  """
1656  self.symmetric_molecules[molecule_name] = 0
1657  self.update_seldicts()
1658 
1659  def set_alignment_selection(self, **kwargs):
1660  """
1661  Setup the selection onto which the alignment is computed
1662  @param kwargs use IMP.atom.Selection keywords
1663  """
1664  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1665  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1666 
1667  ######################
1668  # Clustering functions
1669  ######################
1670  def clean_clusters(self):
1671  for c in self.clusters:
1672  del c
1673  self.clusters = []
1674 
1675  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1676  """
1677  Cluster the models based on RMSD.
1678  @param rmsd_cutoff Float the distance cutoff in Angstrom
1679  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1680  be used to compute rmsds
1681  """
1682  self.clean_clusters()
1683  not_clustered = set(range(len(self.stath1)))
1684  while len(not_clustered) > 0:
1685  self.aggregate(not_clustered, rmsd_cutoff, metric)
1686  self.update_clusters()
1687 
1688  def refine(self, rmsd_cutoff=10):
1689  """
1690  Refine the clusters by merging the ones whose centers are close
1691  @param rmsd_cutoff cutoff distance in Angstorms
1692  """
1693  clusters_copy = self.clusters
1694  for c0, c1 in itertools.combinations(self.clusters, 2):
1695  if c0.center_index is None:
1696  self.compute_cluster_center(c0)
1697  if c1.center_index is None:
1698  self.compute_cluster_center(c1)
1699  _ = self.stath0[c0.center_index]
1700  _ = self.stath1[c1.center_index]
1701  rmsd, molecular_assignment = self.rmsd()
1702  if rmsd <= rmsd_cutoff:
1703  if c1 in self.clusters:
1704  clusters_copy.remove(c1)
1705  c0 += c1
1706  self.clusters = clusters_copy
1707  self.update_clusters()
1708 
1709  ####################
1710  # Input Output
1711  ####################
1712 
1713  def set_cluster_assignments(self, cluster_ids):
1714  if len(cluster_ids) != len(self.stath0):
1715  raise ValueError('cluster ids has to be same length as '
1716  'number of frames')
1717 
1718  self.clusters = []
1719  for i in sorted(list(set(cluster_ids))):
1720  self.clusters.append(IMP.pmi.output.Cluster(i))
1721  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1722  self.clusters[idx].add_member(i, d)
1723 
1724  def get_cluster_data(self, cluster):
1725  """
1726  Return the model data from a cluster
1727  @param cluster IMP.pmi.output.Cluster object
1728  """
1729  data = []
1730  for m in cluster:
1731  data.append(m)
1732  return data
1733 
1734  def save_data(self, filename='data.pkl'):
1735  """
1736  Save the data for the whole models into a pickle file
1737  @param filename string
1738  """
1739  self.stath0.save_data(filename)
1740 
1741  def set_data(self, data):
1742  """
1743  Set the data from an external IMP.pmi.output.Data
1744  @param data IMP.pmi.output.Data
1745  """
1746  self.stath0.data = data
1747  self.stath1.data = data
1748 
1749  def load_data(self, filename='data.pkl'):
1750  """
1751  Load the data from an external pickled file
1752  @param filename string
1753  """
1754  self.stath0.load_data(filename)
1755  self.stath1.load_data(filename)
1756  self.best_models = len(self.stath0)
1757 
1758  def add_cluster(self, rmf_name_list):
1759  c = IMP.pmi.output.Cluster(len(self.clusters))
1760  print("creating cluster index "+str(len(self.clusters)))
1761  self.clusters.append(c)
1762  current_len = len(self.stath0)
1763 
1764  for rmf in rmf_name_list:
1765  print("adding rmf "+rmf)
1766  self.stath0.add_stat_file(rmf)
1767  self.stath1.add_stat_file(rmf)
1768 
1769  for n0 in range(current_len, len(self.stath0)):
1770  d0 = self.stath0[n0]
1771  c.add_member(n0, d0)
1772  self.update_clusters()
1773 
1774  def save_clusters(self, filename='clusters.pkl'):
1775  """
1776  Save the clusters into a pickle file
1777  @param filename string
1778  """
1779  try:
1780  import cPickle as pickle
1781  except ImportError:
1782  import pickle
1783  fl = open(filename, 'wb')
1784  pickle.dump(self.clusters, fl)
1785 
1786  def load_clusters(self, filename='clusters.pkl', append=False):
1787  """
1788  Load the clusters from a pickle file
1789  @param filename string
1790  @param append bool (Default=False), if True. append the clusters
1791  to the ones currently present
1792  """
1793  try:
1794  import cPickle as pickle
1795  except ImportError:
1796  import pickle
1797  fl = open(filename, 'rb')
1798  self.clean_clusters()
1799  if append:
1800  self.clusters += pickle.load(fl)
1801  else:
1802  self.clusters = pickle.load(fl)
1803  self.update_clusters()
1804 
1805  ####################
1806  # Analysis Functions
1807  ####################
1808 
1809  def compute_cluster_center(self, cluster):
1810  """
1811  Compute the cluster center for a given cluster
1812  """
1813  member_distance = defaultdict(float)
1814 
1815  for n0, n1 in itertools.combinations(cluster.members, 2):
1816  _ = self.stath0[n0]
1817  _ = self.stath1[n1]
1818  rmsd, _ = self.rmsd()
1819  member_distance[n0] += rmsd
1820 
1821  if len(member_distance) > 0:
1822  cluster.center_index = min(member_distance,
1823  key=member_distance.get)
1824  else:
1825  cluster.center_index = cluster.members[0]
1826 
1827  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1828  prefix="./"):
1829  """
1830  Save the coordinates of the current cluster a single rmf file
1831  """
1832  print("saving coordinates", cluster)
1833  if self.alignment:
1834  self.set_reference(reference, cluster)
1835  o = IMP.pmi.output.Output()
1836  if rmf_name is None:
1837  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1838 
1839  _ = self.stath1[cluster.members[0]]
1840  self.model.update()
1841  o.init_rmf(rmf_name, [self.stath1])
1842  for n1 in cluster.members:
1843  _ = self.stath1[n1]
1844  self.model.update()
1846  if self.alignment:
1847  self.align()
1848  o.write_rmf(rmf_name)
1850  o.close_rmf(rmf_name)
1851 
1852  def prune_redundant_structures(self, rmsd_cutoff=10):
1853  """
1854  remove structures that are similar
1855  append it to a new cluster
1856  """
1857  print("pruning models")
1858  selected = 0
1859  filtered = [selected]
1860  remaining = range(1, len(self.stath1), 10)
1861 
1862  while len(remaining) > 0:
1863  d0 = self.stath0[selected]
1864  rm = []
1865  for n1 in remaining:
1866  _ = self.stath1[n1]
1867  if self.alignment:
1868  self.align()
1869  d, _ = self.rmsd()
1870  if d <= rmsd_cutoff:
1871  rm.append(n1)
1872  print("pruning model %s, similar to model %s, rmsd %s"
1873  % (str(n1), str(selected), str(d)))
1874  remaining = [x for x in remaining if x not in rm]
1875  if len(remaining) == 0:
1876  break
1877  selected = remaining[0]
1878  filtered.append(selected)
1879  remaining.pop(0)
1880  c = IMP.pmi.output.Cluster(len(self.clusters))
1881  self.clusters.append(c)
1882  for n0 in filtered:
1883  d0 = self.stath0[n0]
1884  c.add_member(n0, d0)
1885  self.update_clusters()
1886 
1887  def precision(self, cluster):
1888  """
1889  Compute the precision of a cluster
1890  """
1891  npairs = 0
1892  rmsd = 0.0
1893  precision = None
1894 
1895  if cluster.center_index is not None:
1896  members1 = [cluster.center_index]
1897  else:
1898  members1 = cluster.members
1899 
1900  for n0 in members1:
1901  _ = self.stath0[n0]
1902  for n1 in cluster.members:
1903  if n0 != n1:
1904  npairs += 1
1905  _ = self.stath1[n1]
1907  tmp_rmsd, _ = self.rmsd()
1908  rmsd += tmp_rmsd
1910 
1911  if npairs > 0:
1912  precision = rmsd/npairs
1913  cluster.precision = precision
1914  return precision
1915 
1916  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1917  """
1918  Compute the bipartite precision (ie the cross-precision)
1919  between two clusters
1920  """
1921  npairs = 0
1922  rmsd = 0.0
1923  for cn0, n0 in enumerate(cluster1.members):
1924  _ = self.stath0[n0]
1925  for cn1, n1 in enumerate(cluster2.members):
1926  _ = self.stath1[n1]
1927  tmp_rmsd, _ = self.rmsd()
1928  if verbose:
1929  print("--- rmsd between structure %s and structure "
1930  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1931  rmsd += tmp_rmsd
1932  npairs += 1
1933  precision = rmsd/npairs
1934  return precision
1935 
1936  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1937  cluster_ref=None, step=1):
1938  """
1939  Compute the Root mean square fluctuations
1940  of a molecule in a cluster
1941  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1942  residue indexes and the value is the rmsf
1943  """
1944  rmsf = IMP.pmi.tools.OrderedDict()
1945 
1946  # assumes that residue indexes are identical for stath0 and stath1
1947  if cluster_ref is not None:
1948  if cluster_ref.center_index is not None:
1949  members0 = [cluster_ref.center_index]
1950  else:
1951  members0 = cluster_ref.members
1952  else:
1953  if cluster.center_index is not None:
1954  members0 = [cluster.center_index]
1955  else:
1956  members0 = cluster.members
1957 
1958  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1959  copy_index=copy_index, state_index=state_index)
1960  ps0 = s0.get_selected_particles()
1961  # get the residue indexes
1962  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1963  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1964 
1965  # get the corresponding particles
1966  npairs = 0
1967  for n0 in members0:
1968  d0 = self.stath0[n0]
1969  for n1 in cluster.members[::step]:
1970  if n0 != n1:
1971  print("--- rmsf %s %s" % (str(n0), str(n1)))
1973 
1974  s1 = IMP.atom.Selection(
1975  self.stath1, molecule=molecule,
1976  residue_indexes=residue_indexes, resolution=1,
1977  copy_index=copy_index, state_index=state_index)
1978  ps1 = s1.get_selected_particles()
1979 
1980  d1 = self.stath1[n1]
1981  if self.alignment:
1982  self.align()
1983  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
1984  r = residue_indexes[n]
1985  d0 = IMP.core.XYZ(p0)
1986  d1 = IMP.core.XYZ(p1)
1987  if r in rmsf:
1988  rmsf[r] += IMP.core.get_distance(d0, d1)
1989  else:
1990  rmsf[r] = IMP.core.get_distance(d0, d1)
1991  npairs += 1
1993  for r in rmsf:
1994  rmsf[r] /= npairs
1995 
1996  for stath in [self.stath0, self.stath1]:
1997  if molecule not in self.symmetric_molecules:
1998  s = IMP.atom.Selection(
1999  stath, molecule=molecule, residue_index=r,
2000  resolution=1, copy_index=copy_index,
2001  state_index=state_index)
2002  else:
2003  s = IMP.atom.Selection(
2004  stath, molecule=molecule, residue_index=r,
2005  resolution=1, state_index=state_index)
2006 
2007  ps = s.get_selected_particles()
2008  for p in ps:
2010  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2011  else:
2013 
2014  return rmsf
2015 
2016  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2017  reference="Absolute", prefix="./", step=1):
2018  if self.alignment:
2019  self.set_reference(reference, cluster)
2020  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2021  voxel=voxel_size)
2022 
2023  for n1 in cluster.members[::step]:
2024  print("density "+str(n1))
2025  _ = self.stath1[n1]
2027  if self.alignment:
2028  self.align()
2029  dens.add_subunits_density(self.stath1)
2031  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2032  del dens
2033 
2034  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2035  consolidate=False, molecules=None, prefix='./',
2036  reference="Absolute"):
2037  if self.alignment:
2038  self.set_reference(reference, cluster)
2039  import numpy as np
2040  import matplotlib.pyplot as plt
2041  import matplotlib.cm as cm
2042  from scipy.spatial.distance import cdist
2043  import IMP.pmi.topology
2044  if molecules is None:
2046  for mol in IMP.pmi.tools.get_molecules(
2047  IMP.atom.get_leaves(self.stath1))]
2048  else:
2050  for mol in IMP.pmi.tools.get_molecules(
2052  self.stath1,
2053  molecules=molecules).get_selected_particles())]
2054  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2055  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2056  total_len_unique = sum(max(mol.get_residue_indexes())
2057  for mol in unique_copies)
2058 
2059  index_dict = {}
2060  prev_stop = 0
2061 
2062  if not consolidate:
2063  for mol in mols:
2064  seqlen = max(mol.get_residue_indexes())
2065  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2066  prev_stop += seqlen
2067 
2068  else:
2069  for mol in unique_copies:
2070  seqlen = max(mol.get_residue_indexes())
2071  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2072  prev_stop += seqlen
2073 
2074  for ncl, n1 in enumerate(cluster.members):
2075  print(ncl)
2076  _ = self.stath1[n1]
2077  coord_dict = IMP.pmi.tools.OrderedDict()
2078  for mol in mols:
2079  rindexes = mol.get_residue_indexes()
2080  coords = np.ones((max(rindexes), 3))
2081  for rnum in rindexes:
2082  sel = IMP.atom.Selection(mol, residue_index=rnum,
2083  resolution=1)
2084  selpart = sel.get_selected_particles()
2085  if len(selpart) == 0:
2086  continue
2087  selpart = selpart[0]
2088  coords[rnum - 1, :] = \
2089  IMP.core.XYZ(selpart).get_coordinates()
2090  coord_dict[mol] = coords
2091 
2092  if not consolidate:
2093  coords = np.concatenate(list(coord_dict.values()))
2094  dists = cdist(coords, coords)
2095  binary_dists = np.where((dists <= contact_threshold)
2096  & (dists >= 1.0), 1.0, 0.0)
2097  else:
2098  binary_dists_dict = {}
2099  for mol1 in mols:
2100  len1 = max(mol1.get_residue_indexes())
2101  for mol2 in mols:
2102  name1 = mol1.get_name()
2103  name2 = mol2.get_name()
2104  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2105  if (name1, name2) not in binary_dists_dict:
2106  binary_dists_dict[(name1, name2)] = \
2107  np.zeros((len1, len1))
2108  binary_dists_dict[(name1, name2)] += \
2109  np.where((dists <= contact_threshold)
2110  & (dists >= 1.0), 1.0, 0.0)
2111  binary_dists = np.zeros((total_len_unique, total_len_unique))
2112 
2113  for name1, name2 in binary_dists_dict:
2114  r1 = index_dict[mol_names_unique[name1]]
2115  r2 = index_dict[mol_names_unique[name2]]
2116  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2117  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2118  1.0, 0.0)
2119 
2120  if ncl == 0:
2121  dist_maps = [dists]
2122  av_dist_map = dists
2123  contact_freqs = binary_dists
2124  else:
2125  dist_maps.append(dists)
2126  av_dist_map += dists
2127  contact_freqs += binary_dists
2128 
2129  if log_scale:
2130  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2131  else:
2132  contact_freqs = 1.0/len(cluster)*contact_freqs
2133  av_dist_map = 1.0/len(cluster)*contact_freqs
2134 
2135  fig = plt.figure(figsize=(100, 100))
2136  ax = fig.add_subplot(111)
2137  ax.set_xticks([])
2138  ax.set_yticks([])
2139  gap_between_components = 50
2140  colormap = cm.Blues
2141  colornorm = None
2142 
2143  if not consolidate:
2144  sorted_tuple = sorted(
2146  mol).get_extended_name(), mol) for mol in mols)
2147  prot_list = list(zip(*sorted_tuple))[1]
2148  else:
2149  sorted_tuple = sorted(
2150  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2151  for mol in unique_copies)
2152  prot_list = list(zip(*sorted_tuple))[1]
2153 
2154  prot_listx = prot_list
2155  nresx = gap_between_components + \
2156  sum([max(mol.get_residue_indexes())
2157  + gap_between_components for mol in prot_listx])
2158 
2159  # set the list of proteins on the y axis
2160  prot_listy = prot_list
2161  nresy = gap_between_components + \
2162  sum([max(mol.get_residue_indexes())
2163  + gap_between_components for mol in prot_listy])
2164 
2165  # this is the residue offset for each protein
2166  resoffsetx = {}
2167  resendx = {}
2168  res = gap_between_components
2169  for mol in prot_listx:
2170  resoffsetx[mol] = res
2171  res += max(mol.get_residue_indexes())
2172  resendx[mol] = res
2173  res += gap_between_components
2174 
2175  resoffsety = {}
2176  resendy = {}
2177  res = gap_between_components
2178  for mol in prot_listy:
2179  resoffsety[mol] = res
2180  res += max(mol.get_residue_indexes())
2181  resendy[mol] = res
2182  res += gap_between_components
2183 
2184  resoffsetdiagonal = {}
2185  res = gap_between_components
2186  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2187  resoffsetdiagonal[mol] = res
2188  res += max(mol.get_residue_indexes())
2189  res += gap_between_components
2190 
2191  # plot protein boundaries
2192  xticks = []
2193  xlabels = []
2194  for n, prot in enumerate(prot_listx):
2195  res = resoffsetx[prot]
2196  end = resendx[prot]
2197  for proty in prot_listy:
2198  resy = resoffsety[proty]
2199  endy = resendy[proty]
2200  ax.plot([res, res], [resy, endy], linestyle='-',
2201  color='gray', lw=0.4)
2202  ax.plot([end, end], [resy, endy], linestyle='-',
2203  color='gray', lw=0.4)
2204  xticks.append((float(res) + float(end)) / 2)
2206  prot).get_extended_name())
2207 
2208  yticks = []
2209  ylabels = []
2210  for n, prot in enumerate(prot_listy):
2211  res = resoffsety[prot]
2212  end = resendy[prot]
2213  for protx in prot_listx:
2214  resx = resoffsetx[protx]
2215  endx = resendx[protx]
2216  ax.plot([resx, endx], [res, res], linestyle='-',
2217  color='gray', lw=0.4)
2218  ax.plot([resx, endx], [end, end], linestyle='-',
2219  color='gray', lw=0.4)
2220  yticks.append((float(res) + float(end)) / 2)
2222  prot).get_extended_name())
2223 
2224  # plot the contact map
2225 
2226  tmp_array = np.zeros((nresx, nresy))
2227  ret = {}
2228  for px in prot_listx:
2229  for py in prot_listy:
2230  resx = resoffsetx[px]
2231  lengx = resendx[px] - 1
2232  resy = resoffsety[py]
2233  lengy = resendy[py] - 1
2234  indexes_x = index_dict[px]
2235  minx = min(indexes_x)
2236  maxx = max(indexes_x)
2237  indexes_y = index_dict[py]
2238  miny = min(indexes_y)
2239  maxy = max(indexes_y)
2240  tmp_array[resx:lengx, resy:lengy] = \
2241  contact_freqs[minx:maxx, miny:maxy]
2242  ret[(px, py)] = np.argwhere(
2243  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2244 
2245  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2246  origin='lower', alpha=0.6, interpolation='nearest')
2247 
2248  ax.set_xticks(xticks)
2249  ax.set_xticklabels(xlabels, rotation=90)
2250  ax.set_yticks(yticks)
2251  ax.set_yticklabels(ylabels)
2252  plt.setp(ax.get_xticklabels(), fontsize=6)
2253  plt.setp(ax.get_yticklabels(), fontsize=6)
2254 
2255  # display and write to file
2256  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2257  [i.set_linewidth(2.0) for i in ax.spines.values()]
2258 
2259  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2260  dpi=300, transparent="False")
2261  return ret
2262 
2263  def plot_rmsd_matrix(self, filename):
2264  self.compute_all_pairwise_rmsd()
2265  distance_matrix = np.zeros(
2266  (len(self.stath0), len(self.stath1)))
2267  for (n0, n1) in self.pairwise_rmsd:
2268  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2269 
2270  import matplotlib as mpl
2271  mpl.use('Agg')
2272  import matplotlib.pylab as pl
2273  from scipy.cluster import hierarchy as hrc
2274 
2275  fig = pl.figure(figsize=(10, 8))
2276  ax = fig.add_subplot(212)
2277  dendrogram = hrc.dendrogram(
2278  hrc.linkage(distance_matrix),
2279  color_threshold=7,
2280  no_labels=True)
2281  leaves_order = dendrogram['leaves']
2282  ax.set_xlabel('Model')
2283  ax.set_ylabel('RMSD [Angstroms]')
2284 
2285  ax2 = fig.add_subplot(221)
2286  cax = ax2.imshow(
2287  distance_matrix[leaves_order, :][:, leaves_order],
2288  interpolation='nearest')
2289  cb = fig.colorbar(cax)
2290  cb.set_label('RMSD [Angstroms]')
2291  ax2.set_xlabel('Model')
2292  ax2.set_ylabel('Model')
2293 
2294  pl.savefig(filename, dpi=300)
2295  pl.close(fig)
2296 
2297  ####################
2298  # Internal Functions
2299  ####################
2300 
2301  def update_clusters(self):
2302  """
2303  Update the cluster id numbers
2304  """
2305  for n, c in enumerate(self.clusters):
2306  c.cluster_id = n
2307 
2308  def get_molecule(self, hier, name, copy):
2309  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2310  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2311 
2312  def update_seldicts(self):
2313  """
2314  Update the seldicts
2315  """
2316  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2317  self.sel0_rmsd.get_selected_particles())
2318  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2319  self.sel1_rmsd.get_selected_particles())
2320  for mol in self.seldict0:
2321  for sel in self.seldict0[mol]:
2322  self.issymmetricsel[sel] = False
2323  for mol in self.symmetric_molecules:
2324  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2325  for sel in self.seldict0[mol]:
2326  self.issymmetricsel[sel] = True
2327 
2328  def align(self):
2330  self.sel1_alignment, self.sel0_alignment)
2331 
2332  for rb in self.rbs1:
2333  IMP.core.transform(rb, tr)
2334 
2335  for bead in self.beads1:
2336  try:
2337  IMP.core.transform(IMP.core.XYZ(bead), tr)
2338  except: # noqa: E722
2339  continue
2340 
2341  self.model.update()
2342 
2343  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2344  '''
2345  initial filling of the clusters.
2346  '''
2347  n0 = idxs.pop()
2348  print("clustering model "+str(n0))
2349  d0 = self.stath0[n0]
2350  c = IMP.pmi.output.Cluster(len(self.clusters))
2351  print("creating cluster index "+str(len(self.clusters)))
2352  self.clusters.append(c)
2353  c.add_member(n0, d0)
2354  clustered = set([n0])
2355  for n1 in idxs:
2356  print("--- trying to add model " + str(n1) + " to cluster "
2357  + str(len(self.clusters)))
2358  d1 = self.stath1[n1]
2359  if self.alignment:
2360  self.align()
2361  rmsd, _ = self.rmsd(metric=metric)
2362  if rmsd < rmsd_cutoff:
2363  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2364  c.add_member(n1, d1)
2365  clustered.add(n1)
2366  else:
2367  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2368  idxs -= clustered
2369 
2370  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2371  """
2372  merge the clusters that have close members
2373  @param rmsd_cutoff cutoff distance in Angstorms
2374  """
2375  # before merging, clusters are spheres of radius rmsd_cutoff
2376  # centered on the 1st element
2377  # here we only try to merge clusters whose centers are closer
2378  # than 2*rmsd_cutoff
2379  to_merge = []
2380  print("merging...")
2381  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2382  itertools.combinations(self.clusters, 2)):
2383  n0, n1 = [c.members[0] for c in (c0, c1)]
2384  _ = self.stath0[n0]
2385  _ = self.stath1[n1]
2386  rmsd, _ = self.rmsd()
2387  if (rmsd < 2*rmsd_cutoff and
2388  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2389  to_merge.append((c0, c1))
2390 
2391  for c0, c in reversed(to_merge):
2392  self.merge(c0, c)
2393 
2394  # keep only full clusters
2395  self.clusters = [c for c in
2396  filter(lambda x: len(x.members) > 0, self.clusters)]
2397 
2398  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2399  '''
2400  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2401  '''
2402  print("check close members for clusters " + str(c0.cluster_id) +
2403  " and " + str(c1.cluster_id))
2404  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2405  _ = self.stath0[n0]
2406  _ = self.stath1[n1]
2407  rmsd, _ = self.rmsd(metric=metric)
2408  if rmsd < rmsd_cutoff:
2409  return True
2410 
2411  return False
2412 
2413  def merge(self, c0, c1):
2414  '''
2415  merge two clusters
2416  '''
2417  c0 += c1
2418  c1.members = []
2419  c1.data = {}
2420 
2421  def rmsd_helper(self, sels0, sels1, metric):
2422  '''
2423  a function that returns the permutation best_sel of sels0 that
2424  minimizes metric
2425  '''
2426  best_rmsd2 = float('inf')
2427  best_sel = None
2428  if self.issymmetricsel[sels0[0]]:
2429  # this cases happens when symmetries were defined
2430  N = len(sels0)
2431  for offset in range(N):
2432  sels = [sels0[(offset+i) % N] for i in range(N)]
2433  sel0 = sels[0]
2434  sel1 = sels1[0]
2435  r = metric(sel0, sel1)
2436  rmsd2 = r*r*N
2437  if rmsd2 < best_rmsd2:
2438  best_rmsd2 = rmsd2
2439  best_sel = sels
2440  else:
2441  for sels in itertools.permutations(sels0):
2442  rmsd2 = 0.0
2443  for sel0, sel1 in itertools.takewhile(
2444  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2445  r = metric(sel0, sel1)
2446  rmsd2 += r*r
2447  if rmsd2 < best_rmsd2:
2448  best_rmsd2 = rmsd2
2449  best_sel = sels
2450  return best_sel, best_rmsd2
2451 
2452  def compute_all_pairwise_rmsd(self):
2453  for d0 in self.stath0:
2454  for d1 in self.stath1:
2455  rmsd, _ = self.rmsd()
2456 
2457  def rmsd(self, metric=IMP.atom.get_rmsd):
2458  '''
2459  Computes the RMSD. Resolves ambiguous pairs assignments
2460  '''
2461  # here we memoize the rmsd and molecular assignment so that it's
2462  # not done multiple times
2463  n0 = self.stath0.current_index
2464  n1 = self.stath1.current_index
2465  if ((n0, n1) in self.pairwise_rmsd) \
2466  and ((n0, n1) in self.pairwise_molecular_assignment):
2467  return (self.pairwise_rmsd[(n0, n1)],
2468  self.pairwise_molecular_assignment[(n0, n1)])
2469 
2470  if self.alignment:
2471  self.align()
2472  # if it's not yet memoized
2473  total_rmsd = 0.0
2474  total_N = 0
2475  # this is a dictionary which keys are the molecule names, and values
2476  # are the list of IMP.atom.Selection for all molecules that share
2477  # the molecule name
2478  molecular_assignment = {}
2479  for molname, sels0 in self.seldict0.items():
2480  sels_best_order, best_rmsd2 = \
2481  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2482 
2483  Ncoords = len(sels_best_order[0].get_selected_particles())
2484  Ncopies = len(self.seldict1[molname])
2485  total_rmsd += Ncoords*best_rmsd2
2486  total_N += Ncoords*Ncopies
2487 
2488  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2489  p0 = sel0.get_selected_particles()[0]
2490  p1 = sel1.get_selected_particles()[0]
2491  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2492  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2493  c0 = IMP.atom.Copy(m0).get_copy_index()
2494  c1 = IMP.atom.Copy(m1).get_copy_index()
2495  molecular_assignment[(molname, c0)] = (molname, c1)
2496 
2497  total_rmsd = math.sqrt(total_rmsd/total_N)
2498 
2499  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2500  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2501  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2502  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2503  return total_rmsd, molecular_assignment
2504 
2505  def set_reference(self, reference, cluster):
2506  """
2507  Fix the reference structure for structural alignment, rmsd and
2508  chain assignemnt
2509 
2510  @param reference can be either "Absolute" (cluster center of the
2511  first cluster) or Relative (cluster center of the current
2512  cluster)
2513  """
2514  if reference == "Absolute":
2515  _ = self.stath0[0]
2516  elif reference == "Relative":
2517  if cluster.center_index:
2518  n0 = cluster.center_index
2519  else:
2520  n0 = cluster.members[0]
2521  _ = self.stath0[n0]
2522 
2524  """
2525  compute the molecular assignemnts between multiple copies
2526  of the same sequence. It changes the Copy index of Molecules
2527  """
2528  _ = self.stath1[n1]
2529  _, molecular_assignment = self.rmsd()
2530  for (m0, c0), (m1, c1) in molecular_assignment.items():
2531  mol0 = self.molcopydict0[m0][c0]
2532  mol1 = self.molcopydict1[m1][c1]
2533  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2534  p1 = IMP.atom.Copy(mol1).get_particle()
2535  p1.set_value(cik0, c0)
2536 
2538  """
2539  Undo the Copy index assignment
2540  """
2541  _ = self.stath1[n1]
2542  _, molecular_assignment = self.rmsd()
2543  for (m0, c0), (m1, c1) in molecular_assignment.items():
2544  mol0 = self.molcopydict0[m0][c0]
2545  mol1 = self.molcopydict1[m1][c1]
2546  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2547  p1 = IMP.atom.Copy(mol1).get_particle()
2548  p1.set_value(cik0, c1)
2549 
2550  ####################
2551  # Container Functions
2552  ####################
2553 
2554  def __repr__(self):
2555  s = "AnalysisReplicaExchange\n"
2556  s += "---- number of clusters %s \n" % str(len(self.clusters))
2557  s += "---- number of models %s \n" % str(len(self.stath0))
2558  return s
2559 
2560  def __getitem__(self, int_slice_adaptor):
2561  if isinstance(int_slice_adaptor, int):
2562  return self.clusters[int_slice_adaptor]
2563  elif isinstance(int_slice_adaptor, slice):
2564  return self.__iter__(int_slice_adaptor)
2565  else:
2566  raise TypeError("Unknown Type")
2567 
2568  def __len__(self):
2569  return len(self.clusters)
2570 
2571  def __iter__(self, slice_key=None):
2572  if slice_key is None:
2573  for i in range(len(self)):
2574  yield self[i]
2575  else:
2576  for i in range(len(self))[slice_key]:
2577  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2457
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2505
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1786
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1078
def precision
Compute the precision of a cluster.
Definition: macros.py:1887
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:887
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:112
A container for models organized into clusters.
Definition: output.py:1522
Sample using molecular dynamics.
Definition: samplers.py:327
def aggregate
initial filling of the clusters.
Definition: macros.py:2343
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634
A macro to help setup and run replica exchange.
Definition: macros.py:68
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1852
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1936
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1175
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1643
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1724
def __init__
Construction of the Class.
Definition: macros.py:1598
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:792
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1741
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2537
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1659
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2421
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1827
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1031
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2523
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1587
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:583
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2370
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:940
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1809
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:951
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:133
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2312
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2301
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:555
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1688
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:203
Collect timing information.
Definition: tools.py:127
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1652
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:8986
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2413
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:636
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1269
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1734
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:803
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1916
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:612
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1675
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1774
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2398
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1151
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1749
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:512
Sample using replica exchange.
Definition: samplers.py:432
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27