IMP logo
IMP Reference Guide  2.16.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import itertools
22 import warnings
23 import math
24 
25 
26 class _RMFRestraints(object):
27  """All restraints that are written out to the RMF file"""
28  def __init__(self, model, user_restraints):
29  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
30  self._user_restraints = user_restraints if user_restraints else []
31 
32  def __len__(self):
33  return (len(self._user_restraints)
34  + self._rmf_rs.get_number_of_restraints())
35 
36  def __bool__(self):
37  return len(self) > 0
38  __nonzero__ = __bool__ # Python 2 compatibility
39 
40  def __getitem__(self, i):
41  class FakePMIWrapper(object):
42  def __init__(self, r):
43  self.r = IMP.RestraintSet.get_from(r)
44 
45  def get_restraint(self):
46  return self.r
47 
48  lenuser = len(self._user_restraints)
49  if 0 <= i < lenuser:
50  return self._user_restraints[i]
51  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
52  r = self._rmf_rs.get_restraint(i - lenuser)
53  return FakePMIWrapper(r)
54  else:
55  raise IndexError("Out of range")
56 
57 
58 class ReplicaExchange0(object):
59  """A macro to help setup and run replica exchange.
60  Supports Monte Carlo and molecular dynamics.
61  Produces trajectory RMF files, best PDB structures,
62  and output stat files.
63  """
64  def __init__(self, model, root_hier,
65  sample_objects=None, # DEPRECATED
66  monte_carlo_sample_objects=None,
67  molecular_dynamics_sample_objects=None,
68  output_objects=[],
69  rmf_output_objects=None,
70  crosslink_restraints=None, # DEPRECATED
71  monte_carlo_temperature=1.0,
72  simulated_annealing=False,
73  simulated_annealing_minimum_temperature=1.0,
74  simulated_annealing_maximum_temperature=2.5,
75  simulated_annealing_minimum_temperature_nframes=100,
76  simulated_annealing_maximum_temperature_nframes=100,
77  replica_exchange_minimum_temperature=1.0,
78  replica_exchange_maximum_temperature=2.5,
79  replica_exchange_swap=True,
80  num_sample_rounds=1,
81  number_of_best_scoring_models=500,
82  monte_carlo_steps=10,
83  self_adaptive=False,
84  molecular_dynamics_steps=10,
85  molecular_dynamics_max_time_step=1.0,
86  number_of_frames=1000,
87  save_coordinates_mode="lowest_temperature",
88  nframes_write_coordinates=1,
89  write_initial_rmf=True,
90  initial_rmf_name_suffix="initial",
91  stat_file_name_suffix="stat",
92  best_pdb_name_suffix="model",
93  do_clean_first=True,
94  do_create_directories=True,
95  global_output_directory="./",
96  rmf_dir="rmfs/",
97  best_pdb_dir="pdbs/",
98  replica_stat_file_suffix="stat_replica",
99  em_object_for_rmf=None,
100  atomistic=False,
101  replica_exchange_object=None,
102  test_mode=False,
103  score_moved=False):
104  """Constructor.
105  @param model The IMP model
106  @param root_hier Top-level (System)hierarchy
107  @param monte_carlo_sample_objects Objects for MC sampling; a list of
108  structural components (generally the representation) that
109  will be moved and restraints with parameters that need to
110  be sampled.
111  For PMI2: just pass flat list of movers
112  @param molecular_dynamics_sample_objects Objects for MD sampling
113  For PMI2: just pass flat list of particles
114  @param output_objects A list of structural objects and restraints
115  that will be included in output (ie, statistics "stat"
116  files). Any object that provides a get_output() method
117  can be used here. If None is passed
118  the macro will not write stat files.
119  @param rmf_output_objects A list of structural objects and
120  restraints that will be included in rmf. Any object
121  that provides a get_output() method can be used here.
122  @param monte_carlo_temperature MC temp (may need to be optimized
123  based on post-sampling analysis)
124  @param simulated_annealing If True, perform simulated annealing
125  @param simulated_annealing_minimum_temperature Should generally be
126  the same as monte_carlo_temperature.
127  @param simulated_annealing_minimum_temperature_nframes Number of
128  frames to compute at minimum temperature.
129  @param simulated_annealing_maximum_temperature_nframes Number of
130  frames to compute at
131  temps > simulated_annealing_maximum_temperature.
132  @param replica_exchange_minimum_temperature Low temp for REX; should
133  generally be the same as monte_carlo_temperature.
134  @param replica_exchange_maximum_temperature High temp for REX
135  @param replica_exchange_swap Boolean, enable disable temperature
136  swap (Default=True)
137  @param num_sample_rounds Number of rounds of MC/MD per cycle
138  @param number_of_best_scoring_models Number of top-scoring PDB
139  models to keep around for analysis
140  @param monte_carlo_steps Number of MC steps per round
141  @param self_adaptive self adaptive scheme for Monte Carlo movers
142  @param molecular_dynamics_steps Number of MD steps per round
143  @param molecular_dynamics_max_time_step Max time step for MD
144  @param number_of_frames Number of REX frames to run
145  @param save_coordinates_mode string: how to save coordinates.
146  "lowest_temperature" (default) only the lowest temperatures
147  is saved
148  "25th_score" all replicas whose score is below the 25th
149  percentile
150  "50th_score" all replicas whose score is below the 50th
151  percentile
152  "75th_score" all replicas whose score is below the 75th
153  percentile
154  @param nframes_write_coordinates How often to write the coordinates
155  of a frame
156  @param write_initial_rmf Write the initial configuration
157  @param global_output_directory Folder that will be created to house
158  output.
159  @param test_mode Set to True to avoid writing any files, just test
160  one frame.
161  @param score_moved If True, attempt to speed up Monte Carlo
162  sampling by caching scoring function terms on particles
163  that didn't move.
164  """
165  self.model = model
166  self.vars = {}
167 
168  # add check hierarchy is multistate
169  self.output_objects = output_objects
170  self.rmf_output_objects = rmf_output_objects
171  if (isinstance(root_hier, IMP.atom.Hierarchy)
172  and root_hier.get_name() == 'System'):
173  if self.output_objects is not None:
174  self.output_objects.append(
175  IMP.pmi.io.TotalScoreOutput(self.model))
176  if self.rmf_output_objects is not None:
177  self.rmf_output_objects.append(
178  IMP.pmi.io.TotalScoreOutput(self.model))
179  self.root_hier = root_hier
180  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
181  self.vars["number_of_states"] = len(states)
182  if len(states) > 1:
183  self.root_hiers = states
184  self.is_multi_state = True
185  else:
186  self.root_hier = root_hier
187  self.is_multi_state = False
188  else:
189  raise TypeError("Must provide System hierarchy (root_hier)")
190 
191  if crosslink_restraints:
193  "crosslink_restraints is deprecated and is ignored; "
194  "all cross-link restraints should be automatically "
195  "added to output RMF files")
196  self._rmf_restraints = _RMFRestraints(model, None)
197  self.em_object_for_rmf = em_object_for_rmf
198  self.monte_carlo_sample_objects = monte_carlo_sample_objects
199  self.vars["self_adaptive"] = self_adaptive
200  if sample_objects is not None:
202  "sample_objects is deprecated; use monte_carlo_sample_objects "
203  "(or molecular_dynamics_sample_objects) instead")
204  self.monte_carlo_sample_objects += sample_objects
205  self.molecular_dynamics_sample_objects = \
206  molecular_dynamics_sample_objects
207  self.replica_exchange_object = replica_exchange_object
208  self.molecular_dynamics_max_time_step = \
209  molecular_dynamics_max_time_step
210  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
211  self.vars["replica_exchange_minimum_temperature"] = \
212  replica_exchange_minimum_temperature
213  self.vars["replica_exchange_maximum_temperature"] = \
214  replica_exchange_maximum_temperature
215  self.vars["replica_exchange_swap"] = replica_exchange_swap
216  self.vars["simulated_annealing"] = simulated_annealing
217  self.vars["simulated_annealing_minimum_temperature"] = \
218  simulated_annealing_minimum_temperature
219  self.vars["simulated_annealing_maximum_temperature"] = \
220  simulated_annealing_maximum_temperature
221  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
222  simulated_annealing_minimum_temperature_nframes
223  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
224  simulated_annealing_maximum_temperature_nframes
225 
226  self.vars["num_sample_rounds"] = num_sample_rounds
227  self.vars[
228  "number_of_best_scoring_models"] = number_of_best_scoring_models
229  self.vars["monte_carlo_steps"] = monte_carlo_steps
230  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
231  self.vars["number_of_frames"] = number_of_frames
232  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
233  "50th_score", "75th_score"):
234  raise Exception("save_coordinates_mode has unrecognized value")
235  else:
236  self.vars["save_coordinates_mode"] = save_coordinates_mode
237  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
238  self.vars["write_initial_rmf"] = write_initial_rmf
239  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
240  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
241  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
242  self.vars["do_clean_first"] = do_clean_first
243  self.vars["do_create_directories"] = do_create_directories
244  self.vars["global_output_directory"] = global_output_directory
245  self.vars["rmf_dir"] = rmf_dir
246  self.vars["best_pdb_dir"] = best_pdb_dir
247  self.vars["atomistic"] = atomistic
248  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
249  self.vars["geometries"] = None
250  self.test_mode = test_mode
251  self.score_moved = score_moved
252 
253  def add_geometries(self, geometries):
254  if self.vars["geometries"] is None:
255  self.vars["geometries"] = list(geometries)
256  else:
257  self.vars["geometries"].extend(geometries)
258 
259  def show_info(self):
260  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, "
261  "rmfs/*.rmf3 for each replica ")
262  print("--- it stores the best scoring pdb models in pdbs/")
263  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
264  "lowest temperature")
265  print("--- variables:")
266  keys = list(self.vars.keys())
267  keys.sort()
268  for v in keys:
269  print("------", v.ljust(30), self.vars[v])
270 
271  def get_replica_exchange_object(self):
272  return self.replica_exchange_object
273 
274  def _add_provenance(self, sampler_md, sampler_mc):
275  """Record details about the sampling in the IMP Hierarchies"""
276  iterations = 0
277  if sampler_md:
278  method = "Molecular Dynamics"
279  iterations += self.vars["molecular_dynamics_steps"]
280  if sampler_mc:
281  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
282  iterations += self.vars["monte_carlo_steps"]
283  # If no sampling is actually done, no provenance to write
284  if iterations == 0 or self.vars["number_of_frames"] == 0:
285  return
286  iterations *= self.vars["num_sample_rounds"]
287 
288  pi = self.model.add_particle("sampling")
290  self.model, pi, method, self.vars["number_of_frames"],
291  iterations)
292  p.set_number_of_replicas(
293  self.replica_exchange_object.get_number_of_replicas())
294  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
295  IMP.core.add_provenance(self.model, self.root_hier, p)
296 
297  def execute_macro(self):
298  temp_index_factor = 100000.0
299  samplers = []
300  sampler_mc = None
301  sampler_md = None
302  if self.monte_carlo_sample_objects is not None:
303  print("Setting up MonteCarlo")
304  sampler_mc = IMP.pmi.samplers.MonteCarlo(
305  self.model, self.monte_carlo_sample_objects,
306  self.vars["monte_carlo_temperature"],
307  score_moved=self.score_moved)
308  if self.vars["simulated_annealing"]:
309  tmin = self.vars["simulated_annealing_minimum_temperature"]
310  tmax = self.vars["simulated_annealing_maximum_temperature"]
311  nfmin = self.vars[
312  "simulated_annealing_minimum_temperature_nframes"]
313  nfmax = self.vars[
314  "simulated_annealing_maximum_temperature_nframes"]
315  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
316  if self.vars["self_adaptive"]:
317  sampler_mc.set_self_adaptive(
318  isselfadaptive=self.vars["self_adaptive"])
319  if self.output_objects is not None:
320  self.output_objects.append(sampler_mc)
321  if self.rmf_output_objects is not None:
322  self.rmf_output_objects.append(sampler_mc)
323  samplers.append(sampler_mc)
324 
325  if self.molecular_dynamics_sample_objects is not None:
326  print("Setting up MolecularDynamics")
328  self.model, self.molecular_dynamics_sample_objects,
329  self.vars["monte_carlo_temperature"],
330  maximum_time_step=self.molecular_dynamics_max_time_step)
331  if self.vars["simulated_annealing"]:
332  tmin = self.vars["simulated_annealing_minimum_temperature"]
333  tmax = self.vars["simulated_annealing_maximum_temperature"]
334  nfmin = self.vars[
335  "simulated_annealing_minimum_temperature_nframes"]
336  nfmax = self.vars[
337  "simulated_annealing_maximum_temperature_nframes"]
338  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
339  if self.output_objects is not None:
340  self.output_objects.append(sampler_md)
341  if self.rmf_output_objects is not None:
342  self.rmf_output_objects.append(sampler_md)
343  samplers.append(sampler_md)
344 # -------------------------------------------------------------------------
345 
346  print("Setting up ReplicaExchange")
348  self.model, self.vars["replica_exchange_minimum_temperature"],
349  self.vars["replica_exchange_maximum_temperature"], samplers,
350  replica_exchange_object=self.replica_exchange_object)
351  self.replica_exchange_object = rex.rem
352 
353  myindex = rex.get_my_index()
354  if self.output_objects is not None:
355  self.output_objects.append(rex)
356  if self.rmf_output_objects is not None:
357  self.rmf_output_objects.append(rex)
358  # must reset the minimum temperature due to the
359  # different binary length of rem.get_my_parameter double and python
360  # float
361  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
362 
363 # -------------------------------------------------------------------------
364 
365  globaldir = self.vars["global_output_directory"] + "/"
366  rmf_dir = globaldir + self.vars["rmf_dir"]
367  pdb_dir = globaldir + self.vars["best_pdb_dir"]
368 
369  if not self.test_mode:
370  if self.vars["do_clean_first"]:
371  pass
372 
373  if self.vars["do_create_directories"]:
374 
375  try:
376  os.makedirs(globaldir)
377  except: # noqa: E722
378  pass
379  try:
380  os.makedirs(rmf_dir)
381  except: # noqa: E722
382  pass
383 
384  if not self.is_multi_state:
385  try:
386  os.makedirs(pdb_dir)
387  except: # noqa: E722
388  pass
389  else:
390  for n in range(self.vars["number_of_states"]):
391  try:
392  os.makedirs(pdb_dir + "/" + str(n))
393  except: # noqa: E722
394  pass
395 
396 # -------------------------------------------------------------------------
397 
399  if self.output_objects is not None:
400  self.output_objects.append(sw)
401  if self.rmf_output_objects is not None:
402  self.rmf_output_objects.append(sw)
403 
404  print("Setting up stat file")
405  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
406  low_temp_stat_file = globaldir + \
407  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
408 
409  # Ensure model is updated before saving init files
410  if not self.test_mode:
411  self.model.update()
412 
413  if not self.test_mode:
414  if self.output_objects is not None:
415  output.init_stat2(low_temp_stat_file,
416  self.output_objects,
417  extralabels=["rmf_file", "rmf_frame_index"])
418  else:
419  print("Stat file writing is disabled")
420 
421  if self.rmf_output_objects is not None:
422  print("Stat info being written in the rmf file")
423 
424  print("Setting up replica stat file")
425  replica_stat_file = globaldir + \
426  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
427  if not self.test_mode:
428  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
429 
430  if not self.test_mode:
431  print("Setting up best pdb files")
432  if not self.is_multi_state:
433  if self.vars["number_of_best_scoring_models"] > 0:
434  output.init_pdb_best_scoring(
435  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
436  self.root_hier,
437  self.vars["number_of_best_scoring_models"],
438  replica_exchange=True)
439  output.write_psf(
440  pdb_dir + "/" + "model.psf",
441  pdb_dir + "/" +
442  self.vars["best_pdb_name_suffix"] + ".0.pdb")
443  else:
444  if self.vars["number_of_best_scoring_models"] > 0:
445  for n in range(self.vars["number_of_states"]):
446  output.init_pdb_best_scoring(
447  pdb_dir + "/" + str(n) + "/" +
448  self.vars["best_pdb_name_suffix"],
449  self.root_hiers[n],
450  self.vars["number_of_best_scoring_models"],
451  replica_exchange=True)
452  output.write_psf(
453  pdb_dir + "/" + str(n) + "/" + "model.psf",
454  pdb_dir + "/" + str(n) + "/" +
455  self.vars["best_pdb_name_suffix"] + ".0.pdb")
456 # ---------------------------------------------
457 
458  if self.em_object_for_rmf is not None:
459  output_hierarchies = [
460  self.root_hier,
461  self.em_object_for_rmf.get_density_as_hierarchy(
462  )]
463  else:
464  output_hierarchies = [self.root_hier]
465 
466  if not self.test_mode:
467  print("Setting up and writing initial rmf coordinate file")
468  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
469  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
470  output_hierarchies,
471  listofobjects=self.rmf_output_objects)
472  if self._rmf_restraints:
473  output.add_restraints_to_rmf(
474  init_suffix + "." + str(myindex) + ".rmf3",
475  self._rmf_restraints)
476  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
477  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
478 
479  if not self.test_mode:
480  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
481 
482  self._add_provenance(sampler_md, sampler_mc)
483 
484  if not self.test_mode:
485  print("Setting up production rmf files")
486  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
487  output.init_rmf(rmfname, output_hierarchies,
488  geometries=self.vars["geometries"],
489  listofobjects=self.rmf_output_objects)
490 
491  if self._rmf_restraints:
492  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
493 
494  ntimes_at_low_temp = 0
495 
496  if myindex == 0:
497  self.show_info()
498  self.replica_exchange_object.set_was_used(True)
499  nframes = self.vars["number_of_frames"]
500  if self.test_mode:
501  nframes = 1
502  for i in range(nframes):
503  if self.test_mode:
504  score = 0.
505  else:
506  for nr in range(self.vars["num_sample_rounds"]):
507  if sampler_md is not None:
508  sampler_md.optimize(
509  self.vars["molecular_dynamics_steps"])
510  if sampler_mc is not None:
511  sampler_mc.optimize(self.vars["monte_carlo_steps"])
513  self.model).evaluate(False)
514  mpivs.set_value("score", score)
515  output.set_output_entry("score", score)
516 
517  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
518 
519  if self.vars["save_coordinates_mode"] == "lowest_temperature":
520  save_frame = (min_temp_index == my_temp_index)
521  elif self.vars["save_coordinates_mode"] == "25th_score":
522  score_perc = mpivs.get_percentile("score")
523  save_frame = (score_perc*100.0 <= 25.0)
524  elif self.vars["save_coordinates_mode"] == "50th_score":
525  score_perc = mpivs.get_percentile("score")
526  save_frame = (score_perc*100.0 <= 50.0)
527  elif self.vars["save_coordinates_mode"] == "75th_score":
528  score_perc = mpivs.get_percentile("score")
529  save_frame = (score_perc*100.0 <= 75.0)
530 
531  # Ensure model is updated before saving output files
532  if save_frame and not self.test_mode:
533  self.model.update()
534 
535  if save_frame:
536  print("--- frame %s score %s " % (str(i), str(score)))
537 
538  if not self.test_mode:
539  if i % self.vars["nframes_write_coordinates"] == 0:
540  print('--- writing coordinates')
541  if self.vars["number_of_best_scoring_models"] > 0:
542  output.write_pdb_best_scoring(score)
543  output.write_rmf(rmfname)
544  output.set_output_entry("rmf_file", rmfname)
545  output.set_output_entry("rmf_frame_index",
546  ntimes_at_low_temp)
547  else:
548  output.set_output_entry("rmf_file", rmfname)
549  output.set_output_entry("rmf_frame_index", '-1')
550  if self.output_objects is not None:
551  output.write_stat2(low_temp_stat_file)
552  ntimes_at_low_temp += 1
553 
554  if not self.test_mode:
555  output.write_stat2(replica_stat_file)
556  if self.vars["replica_exchange_swap"]:
557  rex.swap_temp(i, score)
558  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
559  p.add_replica_exchange(state, self)
560 
561  if not self.test_mode:
562  print("closing production rmf files")
563  output.close_rmf(rmfname)
564 
565 
566 class BuildSystem(object):
567  """A macro to build a IMP::pmi::topology::System based on a
568  TopologyReader object.
569 
570  Easily create multi-state systems by calling this macro
571  repeatedly with different TopologyReader objects!
572  A useful function is get_molecules() which returns the PMI Molecules
573  grouped by state as a dictionary with key = (molecule name),
574  value = IMP.pmi.topology.Molecule
575  Quick multi-state system:
576  @code{.python}
577  model = IMP.Model()
578  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
579  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
580  bs = IMP.pmi.macros.BuildSystem(model)
581  bs.add_state(reader1)
582  bs.add_state(reader2)
583  bs.execute_macro() # build everything including degrees of freedom
584  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
585  ### now you have a two state system, you add restraints etc
586  @endcode
587  @note The "domain name" entry of the topology reader is not used.
588  All molecules are set up by the component name, but split into rigid bodies
589  as requested.
590  """
591 
592  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
593  'RNA': IMP.pmi.alphabets.rna}
594 
595  def __init__(self, model, sequence_connectivity_scale=4.0,
596  force_create_gmm_files=False, resolutions=[1, 10]):
597  """Constructor
598  @param model An IMP Model
599  @param sequence_connectivity_scale For scaling the connectivity
600  restraint
601  @param force_create_gmm_files If True, will sample and create GMMs
602  no matter what. If False, will only sample if the
603  files don't exist. If number of Gaussians is zero, won't
604  do anything.
605  @param resolutions The resolutions to build for structured regions
606  """
607  self.model = model
608  self.system = IMP.pmi.topology.System(self.model)
609  self._readers = [] # the TopologyReaders (one per state)
610  # TempResidues for each domain key=unique name,
611  # value=(atomic_res,non_atomic_res).
612  self._domain_res = []
613  self._domains = [] # key = domain unique name, value = Component
614  self.force_create_gmm_files = force_create_gmm_files
615  self.resolutions = resolutions
616 
617  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
618  chain_ids=None):
619  """Add a state using the topology info in a
620  IMP::pmi::topology::TopologyReader object.
621  When you are done adding states, call execute_macro()
622  @param reader The TopologyReader object
623  @param fasta_name_map dictionary for converting protein names
624  found in the fasta file
625  @param chain_ids A list or string of chain IDs for assigning to
626  newly-created molecules, e.g.
627  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
628  If not specified, chain IDs A through Z are assigned, then
629  AA through AZ, then BA through BZ, and so on, in the same
630  fashion as PDB.
631  """
632  state = self.system.create_state()
633  self._readers.append(reader)
634  # key is unique name, value is (atomic res, nonatomicres)
635  these_domain_res = {}
636  these_domains = {} # key is unique name, value is _Component
637  if chain_ids is None:
638  chain_ids = IMP.pmi.output._ChainIDs()
639  numchain = 0
640 
641  # setup representation
642  # loop over molecules, copies, then domains
643  for molname in reader.get_molecules():
644  copies = reader.get_molecules()[molname].domains
645  for nc, copyname in enumerate(copies):
646  print("BuildSystem.add_state: setting up molecule %s copy "
647  "number %s" % (molname, str(nc)))
648  copy = copies[copyname]
649  # option to not rename chains
650  if keep_chain_id:
651  all_chains = [c for c in copy if c.chain is not None]
652  if all_chains:
653  chain_id = all_chains[0].chain
654  else:
655  chain_id = chain_ids[numchain]
656  warnings.warn(
657  "No PDBs specified for %s, so keep_chain_id has "
658  "no effect; using default chain ID '%s'"
659  % (molname, chain_id), IMP.pmi.ParameterWarning)
660  else:
661  chain_id = chain_ids[numchain]
662  if nc == 0:
663  alphabet = IMP.pmi.alphabets.amino_acid
664  fasta_flag = copy[0].fasta_flag
665  if fasta_flag in self._alphabets:
666  alphabet = self._alphabets[fasta_flag]
668  copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
669  print("BuildSystem.add_state: molecule %s sequence has "
670  "%s residues" % (molname, len(seq)))
671  orig_mol = state.create_molecule(molname, seq, chain_id,
672  alphabet=alphabet)
673  mol = orig_mol
674  numchain += 1
675  else:
676  print("BuildSystem.add_state: creating a copy for "
677  "molecule %s" % molname)
678  mol = orig_mol.create_copy(chain_id)
679  numchain += 1
680 
681  for domainnumber, domain in enumerate(copy):
682  print("BuildSystem.add_state: ---- setting up domain %s "
683  "of molecule %s" % (domainnumber, molname))
684  # we build everything in the residue range, even if it
685  # extends beyond what's in the actual PDB file
686  these_domains[domain.get_unique_name()] = domain
687  if domain.residue_range == [] or \
688  domain.residue_range is None:
689  domain_res = mol.get_residues()
690  else:
691  start = domain.residue_range[0]+domain.pdb_offset
692  if domain.residue_range[1] == 'END':
693  end = len(mol.sequence)
694  else:
695  end = domain.residue_range[1]+domain.pdb_offset
696  domain_res = mol.residue_range(start-1, end-1)
697  print("BuildSystem.add_state: -------- domain %s of "
698  "molecule %s extends from residue %s to "
699  "residue %s "
700  % (domainnumber, molname, start, end))
701  if domain.pdb_file == "BEADS":
702  print("BuildSystem.add_state: -------- domain %s of "
703  "molecule %s represented by BEADS "
704  % (domainnumber, molname))
705  mol.add_representation(
706  domain_res,
707  resolutions=[domain.bead_size],
708  setup_particles_as_densities=(
709  domain.em_residues_per_gaussian != 0),
710  color=domain.color)
711  these_domain_res[domain.get_unique_name()] = \
712  (set(), domain_res)
713  elif domain.pdb_file == "IDEAL_HELIX":
714  print("BuildSystem.add_state: -------- domain %s of "
715  "molecule %s represented by IDEAL_HELIX "
716  % (domainnumber, molname))
717  emper = domain.em_residues_per_gaussian
718  mol.add_representation(
719  domain_res,
720  resolutions=self.resolutions,
721  ideal_helix=True,
722  density_residues_per_component=emper,
723  density_prefix=domain.density_prefix,
724  density_force_compute=self.force_create_gmm_files,
725  color=domain.color)
726  these_domain_res[domain.get_unique_name()] = \
727  (domain_res, set())
728  else:
729  print("BuildSystem.add_state: -------- domain %s of "
730  "molecule %s represented by pdb file %s "
731  % (domainnumber, molname, domain.pdb_file))
732  domain_atomic = mol.add_structure(domain.pdb_file,
733  domain.chain,
734  domain.residue_range,
735  domain.pdb_offset,
736  soft_check=True)
737  domain_non_atomic = domain_res - domain_atomic
738  if not domain.em_residues_per_gaussian:
739  mol.add_representation(
740  domain_atomic, resolutions=self.resolutions,
741  color=domain.color)
742  if len(domain_non_atomic) > 0:
743  mol.add_representation(
744  domain_non_atomic,
745  resolutions=[domain.bead_size],
746  color=domain.color)
747  else:
748  print("BuildSystem.add_state: -------- domain %s "
749  "of molecule %s represented by gaussians "
750  % (domainnumber, molname))
751  emper = domain.em_residues_per_gaussian
752  creategmm = self.force_create_gmm_files
753  mol.add_representation(
754  domain_atomic,
755  resolutions=self.resolutions,
756  density_residues_per_component=emper,
757  density_prefix=domain.density_prefix,
758  density_force_compute=creategmm,
759  color=domain.color)
760  if len(domain_non_atomic) > 0:
761  mol.add_representation(
762  domain_non_atomic,
763  resolutions=[domain.bead_size],
764  setup_particles_as_densities=True,
765  color=domain.color)
766  these_domain_res[domain.get_unique_name()] = (
767  domain_atomic, domain_non_atomic)
768  self._domain_res.append(these_domain_res)
769  self._domains.append(these_domains)
770  print('BuildSystem.add_state: State', len(self.system.states), 'added')
771 
772  def get_molecules(self):
773  """Return list of all molecules grouped by state.
774  For each state, it's a dictionary of Molecules where key is the
775  molecule name
776  """
777  return [s.get_molecules() for s in self.system.get_states()]
778 
779  def get_molecule(self, molname, copy_index=0, state_index=0):
780  return self.system.get_states()[state_index].get_molecules()[
781  molname][copy_index]
782 
783  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
784  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
785  """Builds representations and sets up degrees of freedom"""
786  print("BuildSystem.execute_macro: building representations")
787  self.root_hier = self.system.build()
788 
789  print("BuildSystem.execute_macro: setting up degrees of freedom")
790  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
791  for nstate, reader in enumerate(self._readers):
792  rbs = reader.get_rigid_bodies()
793  srbs = reader.get_super_rigid_bodies()
794  csrbs = reader.get_chains_of_super_rigid_bodies()
795 
796  # add rigid bodies
797  domains_in_rbs = set()
798  for rblist in rbs:
799  print("BuildSystem.execute_macro: -------- building rigid "
800  "body %s" % (str(rblist)))
801  all_res = IMP.pmi.tools.OrderedSet()
802  bead_res = IMP.pmi.tools.OrderedSet()
803  for dname in rblist:
804  domain = self._domains[nstate][dname]
805  print("BuildSystem.execute_macro: -------- adding %s"
806  % (str(dname)))
807  all_res |= self._domain_res[nstate][dname][0]
808  bead_res |= self._domain_res[nstate][dname][1]
809  domains_in_rbs.add(dname)
810  all_res |= bead_res
811  print("BuildSystem.execute_macro: -------- creating rigid "
812  "body with max_trans %s max_rot %s "
813  "non_rigid_max_trans %s"
814  % (str(max_rb_trans), str(max_rb_rot),
815  str(max_bead_trans)))
816  self.dof.create_rigid_body(all_res,
817  nonrigid_parts=bead_res,
818  max_trans=max_rb_trans,
819  max_rot=max_rb_rot,
820  nonrigid_max_trans=max_bead_trans,
821  name="RigidBody %s" % dname)
822 
823  # if you have any domains not in an RB, set them as flexible beads
824  for dname, domain in self._domains[nstate].items():
825  if dname not in domains_in_rbs:
826  if domain.pdb_file != "BEADS":
827  warnings.warn(
828  "No rigid bodies set for %s. Residues read from "
829  "the PDB file will not be sampled - only regions "
830  "missing from the PDB will be treated flexibly. "
831  "To sample the entire sequence, use BEADS instead "
832  "of a PDB file name" % dname,
834  self.dof.create_flexible_beads(
835  self._domain_res[nstate][dname][1],
836  max_trans=max_bead_trans)
837 
838  # add super rigid bodies
839  for srblist in srbs:
840  print("BuildSystem.execute_macro: -------- building "
841  "super rigid body %s" % (str(srblist)))
842  all_res = IMP.pmi.tools.OrderedSet()
843  for dname in srblist:
844  print("BuildSystem.execute_macro: -------- adding %s"
845  % (str(dname)))
846  all_res |= self._domain_res[nstate][dname][0]
847  all_res |= self._domain_res[nstate][dname][1]
848 
849  print("BuildSystem.execute_macro: -------- creating super "
850  "rigid body with max_trans %s max_rot %s "
851  % (str(max_srb_trans), str(max_srb_rot)))
852  self.dof.create_super_rigid_body(
853  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
854 
855  # add chains of super rigid bodies
856  for csrblist in csrbs:
857  all_res = IMP.pmi.tools.OrderedSet()
858  for dname in csrblist:
859  all_res |= self._domain_res[nstate][dname][0]
860  all_res |= self._domain_res[nstate][dname][1]
861  all_res = list(all_res)
862  all_res.sort(key=lambda r: r.get_index())
863  self.dof.create_main_chain_mover(all_res)
864  return self.root_hier, self.dof
865 
866 
867 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
868 class AnalysisReplicaExchange0(object):
869  """A macro for running all the basic operations of analysis.
870  Includes clustering, precision analysis, and making ensemble density maps.
871  A number of plots are also supported.
872  """
873  def __init__(self, model,
874  merge_directories=["./"],
875  stat_file_name_suffix="stat",
876  best_pdb_name_suffix="model",
877  do_clean_first=True,
878  do_create_directories=True,
879  global_output_directory="output/",
880  replica_stat_file_suffix="stat_replica",
881  global_analysis_result_directory="./analysis/",
882  test_mode=False):
883  """Constructor.
884  @param model The IMP model
885  @param stat_file_name_suffix
886  @param merge_directories The directories containing output files
887  @param best_pdb_name_suffix
888  @param do_clean_first
889  @param do_create_directories
890  @param global_output_directory Where everything is
891  @param replica_stat_file_suffix
892  @param global_analysis_result_directory
893  @param test_mode If True, nothing is changed on disk
894  """
895 
896  try:
897  from mpi4py import MPI
898  self.comm = MPI.COMM_WORLD
899  self.rank = self.comm.Get_rank()
900  self.number_of_processes = self.comm.size
901  except ImportError:
902  self.rank = 0
903  self.number_of_processes = 1
904 
905  self.test_mode = test_mode
906  self._protocol_output = []
907  self.cluster_obj = None
908  self.model = model
909  stat_dir = global_output_directory
910  self.stat_files = []
911  # it contains the position of the root directories
912  for rd in merge_directories:
913  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
914  if len(stat_files) == 0:
915  warnings.warn("no stat files found in %s"
916  % os.path.join(rd, stat_dir),
918  self.stat_files += stat_files
919 
920  def add_protocol_output(self, p):
921  """Capture details of the modeling protocol.
922  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
923  """
924  # Assume last state is the one we're interested in
925  self._protocol_output.append((p, p._last_state))
926 
927  def get_modeling_trajectory(self,
928  score_key="SimplifiedModel_Total_Score_None",
929  rmf_file_key="rmf_file",
930  rmf_file_frame_key="rmf_frame_index",
931  outputdir="./",
932  get_every=1,
933  nframes_trajectory=10000):
934  """ Get a trajectory of the modeling run, for generating
935  demonstrative movies
936 
937  @param score_key The score for ranking models
938  @param rmf_file_key Key pointing to RMF filename
939  @param rmf_file_frame_key Key pointing to RMF frame number
940  @param outputdir The local output directory used in the run
941  @param get_every Extract every nth frame
942  @param nframes_trajectory Total number of frames of the trajectory
943  """
944  import math
945 
946  trajectory_models = IMP.pmi.io.get_trajectory_models(
947  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
948  get_every)
949  score_list = list(map(float, trajectory_models[2]))
950 
951  max_score = max(score_list)
952  min_score = min(score_list)
953 
954  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
955  for i in range(nframes_trajectory)]
956  binned_scores = [None]*nframes_trajectory
957  binned_model_indexes = [-1]*nframes_trajectory
958 
959  for model_index, s in enumerate(score_list):
960  bins_score_diffs = [abs(s-b) for b in bins]
961  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
962  if binned_scores[bin_index] is None:
963  binned_scores[bin_index] = s
964  binned_model_indexes[bin_index] = model_index
965  else:
966  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
967  new_diff = abs(s-bins[bin_index])
968  if new_diff < old_diff:
969  binned_scores[bin_index] = s
970  binned_model_indexes[bin_index] = model_index
971 
972  print(binned_scores)
973  print(binned_model_indexes)
974 
975  def _expand_ambiguity(self, prot, d):
976  """If using PMI2, expand the dictionary to include copies as
977  ambiguous options
978 
979  This also keeps the states separate.
980  """
981  newdict = {}
982  for key in d:
983  val = d[key]
984  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
985  newdict[key] = val
986  continue
987  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
988  if isinstance(val, tuple):
989  start = val[0]
990  stop = val[1]
991  name = val[2]
992  else:
993  start = 1
994  stop = -1
995  name = val
996  for nst in range(len(states)):
997  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
998  copies = sel.get_selected_particles(with_representation=False)
999  if len(copies) > 1:
1000  for nc in range(len(copies)):
1001  if len(states) > 1:
1002  newdict['%s.%i..%i' % (name, nst, nc)] = \
1003  (start, stop, name, nc, nst)
1004  else:
1005  newdict['%s..%i' % (name, nc)] = \
1006  (start, stop, name, nc, nst)
1007  else:
1008  newdict[key] = val
1009  return newdict
1010 
1011  def clustering(self,
1012  score_key="SimplifiedModel_Total_Score_None",
1013  rmf_file_key="rmf_file",
1014  rmf_file_frame_key="rmf_frame_index",
1015  state_number=0,
1016  prefiltervalue=None,
1017  feature_keys=[],
1018  outputdir="./",
1019  alignment_components=None,
1020  number_of_best_scoring_models=10,
1021  rmsd_calculation_components=None,
1022  distance_matrix_file='distances.mat',
1023  load_distance_matrix_file=False,
1024  skip_clustering=False,
1025  number_of_clusters=1,
1026  display_plot=False,
1027  exit_after_display=True,
1028  get_every=1,
1029  first_and_last_frames=None,
1030  density_custom_ranges=None,
1031  write_pdb_with_centered_coordinates=False,
1032  voxel_size=5.0):
1033  """Get the best scoring models, compute a distance matrix,
1034  cluster them, and create density maps.
1035 
1036  Tuple format: "molname" just the molecule,
1037  or (start,stop,molname,copy_num(optional),state_num(optional)
1038  Can pass None for copy or state to ignore that field.
1039  If you don't pass a specific copy number
1040 
1041  @param score_key The score for ranking models.
1042  Use "Total_Score" instead of default for PMI2.
1043  @param rmf_file_key Key pointing to RMF filename
1044  @param rmf_file_frame_key Key pointing to RMF frame number
1045  @param state_number State number to analyze
1046  @param prefiltervalue Only include frames where the
1047  score key is below this value
1048  @param feature_keys Keywords for which you want to
1049  calculate average, medians, etc.
1050  If you pass "Keyname" it'll include everything that matches
1051  "*Keyname*"
1052  @param outputdir The local output directory used in
1053  the run
1054  @param alignment_components Dictionary with keys=groupname,
1055  values are tuples for aligning the structures
1056  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1057  @param number_of_best_scoring_models Num models to keep per run
1058  @param rmsd_calculation_components For calculating RMSD
1059  (same format as alignment_components)
1060  @param distance_matrix_file Where to store/read the
1061  distance matrix
1062  @param load_distance_matrix_file Try to load the distance
1063  matrix file
1064  @param skip_clustering Just extract the best scoring
1065  models and save the pdbs
1066  @param number_of_clusters Number of k-means clusters
1067  @param display_plot Display the distance matrix
1068  @param exit_after_display Exit after displaying distance
1069  matrix
1070  @param get_every Extract every nth frame
1071  @param first_and_last_frames A tuple with the first and last
1072  frames to be analyzed. Values are percentages!
1073  Default: get all frames
1074  @param density_custom_ranges For density calculation
1075  (same format as alignment_components)
1076  @param write_pdb_with_centered_coordinates
1077  @param voxel_size Used for the density output
1078  """
1079  # Track provenance information to be added to each output model
1080  prov = []
1081  self._outputdir = os.path.abspath(outputdir)
1082  self._number_of_clusters = number_of_clusters
1083  for p, state in self._protocol_output:
1084  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1085 
1086  if self.test_mode:
1087  return
1088 
1089  if self.rank == 0:
1090  try:
1091  os.mkdir(outputdir)
1092  except: # noqa: E722
1093  pass
1094 
1095  if not load_distance_matrix_file:
1096  if len(self.stat_files) == 0:
1097  print("ERROR: no stat file found in the given path")
1098  return
1099  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1100  self.stat_files, self.number_of_processes)[self.rank]
1101 
1102  # read ahead to check if you need the PMI2 score key instead
1103  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1104  orig_score_key = score_key
1105  if score_key not in po.get_keys():
1106  if 'Total_Score' in po.get_keys():
1107  score_key = 'Total_Score'
1108  warnings.warn(
1109  "Using 'Total_Score' instead of "
1110  "'SimplifiedModel_Total_Score_None' for the score key",
1112  for k in [orig_score_key, score_key, rmf_file_key,
1113  rmf_file_frame_key]:
1114  if k in feature_keys:
1115  warnings.warn(
1116  "no need to pass " + k + " to feature_keys.",
1118  feature_keys.remove(k)
1119 
1120  best_models = IMP.pmi.io.get_best_models(
1121  my_stat_files, score_key, feature_keys, rmf_file_key,
1122  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1123  rmf_file_list = best_models[0]
1124  rmf_file_frame_list = best_models[1]
1125  score_list = best_models[2]
1126  feature_keyword_list_dict = best_models[3]
1127 
1128 # ------------------------------------------------------------------------
1129 # collect all the files and scores
1130 # ------------------------------------------------------------------------
1131 
1132  if self.number_of_processes > 1:
1133  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1134  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1135  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1136  rmf_file_frame_list)
1137  for k in feature_keyword_list_dict:
1138  feature_keyword_list_dict[k] = \
1140  feature_keyword_list_dict[k])
1141 
1142  # sort by score and get the best scoring ones
1143  score_rmf_tuples = list(zip(score_list,
1144  rmf_file_list,
1145  rmf_file_frame_list,
1146  list(range(len(score_list)))))
1147 
1148  if density_custom_ranges:
1149  for k in density_custom_ranges:
1150  if not isinstance(density_custom_ranges[k], list):
1151  raise Exception("Density custom ranges: values must "
1152  "be lists of tuples")
1153 
1154  # keep subset of frames if requested
1155  if first_and_last_frames is not None:
1156  nframes = len(score_rmf_tuples)
1157  first_frame = int(first_and_last_frames[0] * nframes)
1158  last_frame = int(first_and_last_frames[1] * nframes)
1159  if last_frame > len(score_rmf_tuples):
1160  last_frame = -1
1161  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1162 
1163  # sort RMFs by the score_key in ascending order, and store the rank
1164  best_score_rmf_tuples = sorted(
1165  score_rmf_tuples,
1166  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1167  best_score_rmf_tuples = [t+(n,) for n, t in
1168  enumerate(best_score_rmf_tuples)]
1169  # Note in the provenance info that we only kept best-scoring models
1170  prov.append(IMP.pmi.io.FilterProvenance(
1171  "Best scoring", 0, number_of_best_scoring_models))
1172  # sort the feature scores in the same way
1173  best_score_feature_keyword_list_dict = defaultdict(list)
1174  for tpl in best_score_rmf_tuples:
1175  index = tpl[3]
1176  for f in feature_keyword_list_dict:
1177  best_score_feature_keyword_list_dict[f].append(
1178  feature_keyword_list_dict[f][index])
1179  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1180  best_score_rmf_tuples,
1181  self.number_of_processes)[self.rank]
1182 
1183  # expand the dictionaries to include ambiguous copies
1184  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1185  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1186  if IMP.pmi.get_is_canonical(prot_ahead):
1187  if rmsd_calculation_components is not None:
1188  tmp = self._expand_ambiguity(
1189  prot_ahead, rmsd_calculation_components)
1190  if tmp != rmsd_calculation_components:
1191  print('Detected ambiguity, expand rmsd components to',
1192  tmp)
1193  rmsd_calculation_components = tmp
1194  if alignment_components is not None:
1195  tmp = self._expand_ambiguity(prot_ahead,
1196  alignment_components)
1197  if tmp != alignment_components:
1198  print('Detected ambiguity, expand alignment '
1199  'components to', tmp)
1200  alignment_components = tmp
1201 
1202 # -------------------------------------------------------------
1203 # read the coordinates
1204 # ------------------------------------------------------------
1205  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1206  self.model, my_best_score_rmf_tuples[0],
1207  rmsd_calculation_components, state_number=state_number)
1209  self.model, my_best_score_rmf_tuples, alignment_components,
1210  rmsd_calculation_components, state_number=state_number)
1211 
1212  # note! the coordinates are simply float tuples, NOT decorators,
1213  # NOT Vector3D, NOR particles, because these object cannot be
1214  # serialized. We need serialization
1215  # for the parallel computation based on mpi.
1216 
1217  # dict:key=component name,val=coords per hit
1218  all_coordinates = got_coords[0]
1219 
1220  # same as above, limited to alignment bits
1221  alignment_coordinates = got_coords[1]
1222 
1223  # same as above, limited to RMSD bits
1224  rmsd_coordinates = got_coords[2]
1225 
1226  # dictionary with key=RMF, value=score rank
1227  rmf_file_name_index_dict = got_coords[3]
1228 
1229  # RMF file per hit
1230  all_rmf_file_names = got_coords[4]
1231 
1232 # ------------------------------------------------------------------------
1233 # optionally don't compute distance matrix or cluster, just write top files
1234 # ------------------------------------------------------------------------
1235  if skip_clustering:
1236  if density_custom_ranges:
1237  DensModule = IMP.pmi.analysis.GetModelDensity(
1238  density_custom_ranges, voxel=voxel_size)
1239 
1240  dircluster = os.path.join(outputdir,
1241  "all_models."+str(self.rank))
1242  try:
1243  os.mkdir(outputdir)
1244  except: # noqa: E722
1245  pass
1246  try:
1247  os.mkdir(dircluster)
1248  except: # noqa: E722
1249  pass
1250  clusstat = open(os.path.join(
1251  dircluster, "stat."+str(self.rank)+".out"), "w")
1252  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1253  rmf_name = tpl[1]
1254  rmf_frame_number = tpl[2]
1255  tmp_dict = {}
1256  index = tpl[4]
1257  for key in best_score_feature_keyword_list_dict:
1258  tmp_dict[key] = \
1259  best_score_feature_keyword_list_dict[key][index]
1260 
1261  if cnt == 0:
1262  prots, rs = \
1263  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1264  self.model, rmf_frame_number, rmf_name)
1265  else:
1266  linking_successful = \
1267  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1268  self.model, prots, rs, rmf_frame_number,
1269  rmf_name)
1270  if not linking_successful:
1271  continue
1272 
1273  if not prots:
1274  continue
1275 
1276  if IMP.pmi.get_is_canonical(prots[0]):
1277  states = IMP.atom.get_by_type(
1278  prots[0], IMP.atom.STATE_TYPE)
1279  prot = states[state_number]
1280  else:
1281  prot = prots[state_number]
1282 
1283  # get transformation aligning coordinates of
1284  # requested tuples to the first RMF file
1285  if cnt == 0:
1286  coords_f1 = alignment_coordinates[cnt]
1287  if cnt > 0:
1288  coords_f2 = alignment_coordinates[cnt]
1289  if coords_f2:
1291  coords_f1, coords_f2)
1292  transformation = Ali.align()[1]
1293  else:
1294  transformation = \
1296 
1297  rbs = set()
1298  for p in IMP.atom.get_leaves(prot):
1299  if not IMP.core.XYZR.get_is_setup(p):
1301  IMP.core.XYZR(p).set_radius(0.0001)
1302  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1303 
1305  rbm = IMP.core.RigidBodyMember(p)
1306  rb = rbm.get_rigid_body()
1307  rbs.add(rb)
1308  else:
1310  transformation)
1311  for rb in rbs:
1312  IMP.core.transform(rb, transformation)
1313 
1314  o = IMP.pmi.output.Output()
1315  self.model.update()
1316  out_pdb_fn = os.path.join(
1317  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1318  out_rmf_fn = os.path.join(
1319  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1320  o.init_pdb(out_pdb_fn, prot)
1321  tc = write_pdb_with_centered_coordinates
1322  o.write_pdb(out_pdb_fn,
1323  translate_to_geometric_center=tc)
1324 
1325  tmp_dict["local_pdb_file_name"] = \
1326  os.path.basename(out_pdb_fn)
1327  tmp_dict["rmf_file_full_path"] = rmf_name
1328  tmp_dict["local_rmf_file_name"] = \
1329  os.path.basename(out_rmf_fn)
1330  tmp_dict["local_rmf_frame_number"] = 0
1331 
1332  clusstat.write(str(tmp_dict)+"\n")
1333 
1334  if IMP.pmi.get_is_canonical(prot):
1335  # create a single-state System and write that
1337  IMP.Particle(self.model))
1338  h.set_name("System")
1339  h.add_child(prot)
1340  o.init_rmf(out_rmf_fn, [h], rs)
1341  else:
1342  o.init_rmf(out_rmf_fn, [prot], rs)
1343 
1344  o.write_rmf(out_rmf_fn)
1345  o.close_rmf(out_rmf_fn)
1346  # add the density
1347  if density_custom_ranges:
1348  DensModule.add_subunits_density(prot)
1349 
1350  if density_custom_ranges:
1351  DensModule.write_mrc(path=dircluster)
1352  del DensModule
1353  return
1354 
1355  # broadcast the coordinates
1356  if self.number_of_processes > 1:
1357  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1358  all_coordinates)
1359  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1360  all_rmf_file_names)
1361  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1362  rmf_file_name_index_dict)
1363  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1364  alignment_coordinates)
1365  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1366  rmsd_coordinates)
1367 
1368  if self.rank == 0:
1369  # save needed informations in external files
1370  self.save_objects(
1371  [best_score_feature_keyword_list_dict,
1372  rmf_file_name_index_dict],
1373  ".macro.pkl")
1374 
1375 # ------------------------------------------------------------------------
1376 # Calculate distance matrix and cluster
1377 # ------------------------------------------------------------------------
1378  print("setup clustering class")
1379  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1380 
1381  for n, model_coordinate_dict in enumerate(all_coordinates):
1382  # let's try to align
1383  if (alignment_components is not None
1384  and len(self.cluster_obj.all_coords) == 0):
1385  # set the first model as template coordinates
1386  self.cluster_obj.set_template(alignment_coordinates[n])
1387  self.cluster_obj.fill(all_rmf_file_names[n],
1388  rmsd_coordinates[n])
1389  print("Global calculating the distance matrix")
1390 
1391  # calculate distance matrix, all against all
1392  self.cluster_obj.dist_matrix()
1393 
1394  # perform clustering and optionally display
1395  if self.rank == 0:
1396  self.cluster_obj.do_cluster(number_of_clusters)
1397  if display_plot:
1398  if self.rank == 0:
1399  self.cluster_obj.plot_matrix(
1400  figurename=os.path.join(outputdir,
1401  'dist_matrix.pdf'))
1402  if exit_after_display:
1403  exit()
1404  self.cluster_obj.save_distance_matrix_file(
1405  file_name=distance_matrix_file)
1406 
1407 # ------------------------------------------------------------------------
1408 # Alteratively, load the distance matrix from file and cluster that
1409 # ------------------------------------------------------------------------
1410  else:
1411  if self.rank == 0:
1412  print("setup clustering class")
1413  self.cluster_obj = IMP.pmi.analysis.Clustering()
1414  self.cluster_obj.load_distance_matrix_file(
1415  file_name=distance_matrix_file)
1416  print("clustering with %s clusters" % str(number_of_clusters))
1417  self.cluster_obj.do_cluster(number_of_clusters)
1418  [best_score_feature_keyword_list_dict,
1419  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1420  if display_plot:
1421  if self.rank == 0:
1422  self.cluster_obj.plot_matrix(figurename=os.path.join(
1423  outputdir, 'dist_matrix.pdf'))
1424  if exit_after_display:
1425  exit()
1426  if self.number_of_processes > 1:
1427  self.comm.Barrier()
1428 
1429 # ------------------------------------------------------------------------
1430 # now save all informations about the clusters
1431 # ------------------------------------------------------------------------
1432 
1433  if self.rank == 0:
1434  print(self.cluster_obj.get_cluster_labels())
1435  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1436  print("rank %s " % str(self.rank))
1437  print("cluster %s " % str(n))
1438  print("cluster label %s " % str(cl))
1439  print(self.cluster_obj.get_cluster_label_names(cl))
1440  cluster_size = \
1441  len(self.cluster_obj.get_cluster_label_names(cl))
1442  cluster_prov = \
1443  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1444 
1445  # first initialize the Density class if requested
1446  if density_custom_ranges:
1447  DensModule = IMP.pmi.analysis.GetModelDensity(
1448  density_custom_ranges,
1449  voxel=voxel_size)
1450 
1451  dircluster = outputdir + "/cluster." + str(n) + "/"
1452  try:
1453  os.mkdir(dircluster)
1454  except: # noqa: E722
1455  pass
1456 
1457  rmsd_dict = {
1458  "AVERAGE_RMSD":
1459  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1460  clusstat = open(dircluster + "stat.out", "w")
1461  for k, structure_name in enumerate(
1462  self.cluster_obj.get_cluster_label_names(cl)):
1463  # extract the features
1464  tmp_dict = {}
1465  tmp_dict.update(rmsd_dict)
1466  index = rmf_file_name_index_dict[structure_name]
1467  for key in best_score_feature_keyword_list_dict:
1468  tmp_dict[
1469  key] = best_score_feature_keyword_list_dict[
1470  key][
1471  index]
1472 
1473  # get the rmf name and the frame number from the list of
1474  # frame names
1475  rmf_name = structure_name.split("|")[0]
1476  rmf_frame_number = int(structure_name.split("|")[1])
1477  clusstat.write(str(tmp_dict) + "\n")
1478 
1479  # extract frame (open or link to existing)
1480  if k == 0:
1481  prots, rs = \
1482  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1483  self.model, rmf_frame_number, rmf_name)
1484  else:
1485  linking_successful = \
1486  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1487  self.model, prots, rs, rmf_frame_number,
1488  rmf_name)
1489  if not linking_successful:
1490  continue
1491  if not prots:
1492  continue
1493 
1494  if IMP.pmi.get_is_canonical(prots[0]):
1495  states = IMP.atom.get_by_type(
1496  prots[0], IMP.atom.STATE_TYPE)
1497  prot = states[state_number]
1498  else:
1499  prot = prots[state_number]
1500  if k == 0:
1501  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1502 
1503  # transform clusters onto first
1504  if k > 0:
1505  co = self.cluster_obj
1506  model_index = co.get_model_index_from_name(
1507  structure_name)
1508  transformation = co.get_transformation_to_first_member(
1509  cl, model_index)
1510  rbs = set()
1511  for p in IMP.atom.get_leaves(prot):
1512  if not IMP.core.XYZR.get_is_setup(p):
1514  IMP.core.XYZR(p).set_radius(0.0001)
1515  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1516 
1518  rbm = IMP.core.RigidBodyMember(p)
1519  rb = rbm.get_rigid_body()
1520  rbs.add(rb)
1521  else:
1523  transformation)
1524  for rb in rbs:
1525  IMP.core.transform(rb, transformation)
1526 
1527  # add the density
1528  if density_custom_ranges:
1529  DensModule.add_subunits_density(prot)
1530 
1531  # pdb writing should be optimized!
1532  o = IMP.pmi.output.Output()
1533  self.model.update()
1534  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1535  o.write_pdb(dircluster + str(k) + ".pdb")
1536 
1537  if IMP.pmi.get_is_canonical(prot):
1538  # create a single-state System and write that
1540  IMP.Particle(self.model))
1541  h.set_name("System")
1542  h.add_child(prot)
1543  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1544  else:
1545  o.init_rmf(dircluster + str(k) + ".rmf3", [prot], rs)
1546  o.write_rmf(dircluster + str(k) + ".rmf3")
1547  o.close_rmf(dircluster + str(k) + ".rmf3")
1548 
1549  del o
1550  # IMP.atom.destroy(prot)
1551 
1552  if density_custom_ranges:
1553  DensModule.write_mrc(path=dircluster)
1554  del DensModule
1555 
1556  if self.number_of_processes > 1:
1557  self.comm.Barrier()
1558 
1559  def get_cluster_rmsd(self, cluster_num):
1560  if self.cluster_obj is None:
1561  raise Exception("Run clustering first")
1562  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1563 
1564  def save_objects(self, objects, file_name):
1565  import pickle
1566  outf = open(file_name, 'wb')
1567  pickle.dump(objects, outf)
1568  outf.close()
1569 
1570  def load_objects(self, file_name):
1571  import pickle
1572  inputf = open(file_name, 'rb')
1573  objects = pickle.load(inputf)
1574  inputf.close()
1575  return objects
1576 
1577 
1579 
1580  """
1581  This class contains analysis utilities to investigate ReplicaExchange
1582  results.
1583  """
1584 
1585  ########################
1586  # Construction and Setup
1587  ########################
1588 
1589  def __init__(self, model, stat_files, best_models=None, score_key=None,
1590  alignment=True):
1591  """
1592  Construction of the Class.
1593  @param model IMP.Model()
1594  @param stat_files list of string. Can be ascii stat files,
1595  rmf files names
1596  @param best_models Integer. Number of best scoring models,
1597  if None: all models will be read
1598  @param alignment boolean (Default=True). Align before computing
1599  the rmsd.
1600  """
1601 
1602  self.model = model
1603  self.best_models = best_models
1605  model, stat_files, self.best_models, score_key, cache=True)
1607  StatHierarchyHandler=self.stath0)
1608 
1609  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1611  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1613  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1614  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1615  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1616  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1617  self.clusters = []
1618  # fill the cluster list with a single cluster containing all models
1619  c = IMP.pmi.output.Cluster(0)
1620  self.clusters.append(c)
1621  for n0 in range(len(self.stath0)):
1622  c.add_member(n0)
1623  self.pairwise_rmsd = {}
1624  self.pairwise_molecular_assignment = {}
1625  self.alignment = alignment
1626  self.symmetric_molecules = {}
1627  self.issymmetricsel = {}
1628  self.update_seldicts()
1629  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1630  IMP.atom.get_leaves(self.stath0))
1631  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1632  IMP.atom.get_leaves(self.stath1))
1633 
1634  def set_rmsd_selection(self, **kwargs):
1635  """
1636  Setup the selection onto which the rmsd is computed
1637  @param kwargs use IMP.atom.Selection keywords
1638  """
1639  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1640  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1641  self.update_seldicts()
1642 
1643  def set_symmetric(self, molecule_name):
1644  """
1645  Store names of symmetric molecules
1646  """
1647  self.symmetric_molecules[molecule_name] = 0
1648  self.update_seldicts()
1649 
1650  def set_alignment_selection(self, **kwargs):
1651  """
1652  Setup the selection onto which the alignment is computed
1653  @param kwargs use IMP.atom.Selection keywords
1654  """
1655  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1656  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1657 
1658  ######################
1659  # Clustering functions
1660  ######################
1661  def clean_clusters(self):
1662  for c in self.clusters:
1663  del c
1664  self.clusters = []
1665 
1666  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1667  """
1668  Cluster the models based on RMSD.
1669  @param rmsd_cutoff Float the distance cutoff in Angstrom
1670  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1671  be used to compute rmsds
1672  """
1673  self.clean_clusters()
1674  not_clustered = set(range(len(self.stath1)))
1675  while len(not_clustered) > 0:
1676  self.aggregate(not_clustered, rmsd_cutoff, metric)
1677  self.update_clusters()
1678 
1679  def refine(self, rmsd_cutoff=10):
1680  """
1681  Refine the clusters by merging the ones whose centers are close
1682  @param rmsd_cutoff cutoff distance in Angstorms
1683  """
1684  clusters_copy = self.clusters
1685  for c0, c1 in itertools.combinations(self.clusters, 2):
1686  if c0.center_index is None:
1687  self.compute_cluster_center(c0)
1688  if c1.center_index is None:
1689  self.compute_cluster_center(c1)
1690  _ = self.stath0[c0.center_index]
1691  _ = self.stath1[c1.center_index]
1692  rmsd, molecular_assignment = self.rmsd()
1693  if rmsd <= rmsd_cutoff:
1694  if c1 in self.clusters:
1695  clusters_copy.remove(c1)
1696  c0 += c1
1697  self.clusters = clusters_copy
1698  self.update_clusters()
1699 
1700  ####################
1701  # Input Output
1702  ####################
1703 
1704  def set_cluster_assignments(self, cluster_ids):
1705  if len(cluster_ids) != len(self.stath0):
1706  raise ValueError('cluster ids has to be same length as '
1707  'number of frames')
1708 
1709  self.clusters = []
1710  for i in sorted(list(set(cluster_ids))):
1711  self.clusters.append(IMP.pmi.output.Cluster(i))
1712  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1713  self.clusters[idx].add_member(i, d)
1714 
1715  def get_cluster_data(self, cluster):
1716  """
1717  Return the model data from a cluster
1718  @param cluster IMP.pmi.output.Cluster object
1719  """
1720  data = []
1721  for m in cluster:
1722  data.append(m)
1723  return data
1724 
1725  def save_data(self, filename='data.pkl'):
1726  """
1727  Save the data for the whole models into a pickle file
1728  @param filename string
1729  """
1730  self.stath0.save_data(filename)
1731 
1732  def set_data(self, data):
1733  """
1734  Set the data from an external IMP.pmi.output.Data
1735  @param data IMP.pmi.output.Data
1736  """
1737  self.stath0.data = data
1738  self.stath1.data = data
1739 
1740  def load_data(self, filename='data.pkl'):
1741  """
1742  Load the data from an external pickled file
1743  @param filename string
1744  """
1745  self.stath0.load_data(filename)
1746  self.stath1.load_data(filename)
1747  self.best_models = len(self.stath0)
1748 
1749  def add_cluster(self, rmf_name_list):
1750  c = IMP.pmi.output.Cluster(len(self.clusters))
1751  print("creating cluster index "+str(len(self.clusters)))
1752  self.clusters.append(c)
1753  current_len = len(self.stath0)
1754 
1755  for rmf in rmf_name_list:
1756  print("adding rmf "+rmf)
1757  self.stath0.add_stat_file(rmf)
1758  self.stath1.add_stat_file(rmf)
1759 
1760  for n0 in range(current_len, len(self.stath0)):
1761  d0 = self.stath0[n0]
1762  c.add_member(n0, d0)
1763  self.update_clusters()
1764 
1765  def save_clusters(self, filename='clusters.pkl'):
1766  """
1767  Save the clusters into a pickle file
1768  @param filename string
1769  """
1770  try:
1771  import cPickle as pickle
1772  except ImportError:
1773  import pickle
1774  fl = open(filename, 'wb')
1775  pickle.dump(self.clusters, fl)
1776 
1777  def load_clusters(self, filename='clusters.pkl', append=False):
1778  """
1779  Load the clusters from a pickle file
1780  @param filename string
1781  @param append bool (Default=False), if True. append the clusters
1782  to the ones currently present
1783  """
1784  try:
1785  import cPickle as pickle
1786  except ImportError:
1787  import pickle
1788  fl = open(filename, 'rb')
1789  self.clean_clusters()
1790  if append:
1791  self.clusters += pickle.load(fl)
1792  else:
1793  self.clusters = pickle.load(fl)
1794  self.update_clusters()
1795 
1796  ####################
1797  # Analysis Functions
1798  ####################
1799 
1800  def compute_cluster_center(self, cluster):
1801  """
1802  Compute the cluster center for a given cluster
1803  """
1804  member_distance = defaultdict(float)
1805 
1806  for n0, n1 in itertools.combinations(cluster.members, 2):
1807  _ = self.stath0[n0]
1808  _ = self.stath1[n1]
1809  rmsd, _ = self.rmsd()
1810  member_distance[n0] += rmsd
1811 
1812  if len(member_distance) > 0:
1813  cluster.center_index = min(member_distance,
1814  key=member_distance.get)
1815  else:
1816  cluster.center_index = cluster.members[0]
1817 
1818  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1819  prefix="./"):
1820  """
1821  Save the coordinates of the current cluster a single rmf file
1822  """
1823  print("saving coordinates", cluster)
1824  if self.alignment:
1825  self.set_reference(reference, cluster)
1826  o = IMP.pmi.output.Output()
1827  if rmf_name is None:
1828  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1829 
1830  _ = self.stath1[cluster.members[0]]
1831  self.model.update()
1832  o.init_rmf(rmf_name, [self.stath1])
1833  for n1 in cluster.members:
1834  _ = self.stath1[n1]
1835  self.model.update()
1837  if self.alignment:
1838  self.align()
1839  o.write_rmf(rmf_name)
1841  o.close_rmf(rmf_name)
1842 
1843  def prune_redundant_structures(self, rmsd_cutoff=10):
1844  """
1845  remove structures that are similar
1846  append it to a new cluster
1847  """
1848  print("pruning models")
1849  selected = 0
1850  filtered = [selected]
1851  remaining = range(1, len(self.stath1), 10)
1852 
1853  while len(remaining) > 0:
1854  d0 = self.stath0[selected]
1855  rm = []
1856  for n1 in remaining:
1857  _ = self.stath1[n1]
1858  if self.alignment:
1859  self.align()
1860  d, _ = self.rmsd()
1861  if d <= rmsd_cutoff:
1862  rm.append(n1)
1863  print("pruning model %s, similar to model %s, rmsd %s"
1864  % (str(n1), str(selected), str(d)))
1865  remaining = [x for x in remaining if x not in rm]
1866  if len(remaining) == 0:
1867  break
1868  selected = remaining[0]
1869  filtered.append(selected)
1870  remaining.pop(0)
1871  c = IMP.pmi.output.Cluster(len(self.clusters))
1872  self.clusters.append(c)
1873  for n0 in filtered:
1874  d0 = self.stath0[n0]
1875  c.add_member(n0, d0)
1876  self.update_clusters()
1877 
1878  def precision(self, cluster):
1879  """
1880  Compute the precision of a cluster
1881  """
1882  npairs = 0
1883  rmsd = 0.0
1884  precision = None
1885 
1886  if cluster.center_index is not None:
1887  members1 = [cluster.center_index]
1888  else:
1889  members1 = cluster.members
1890 
1891  for n0 in members1:
1892  _ = self.stath0[n0]
1893  for n1 in cluster.members:
1894  if n0 != n1:
1895  npairs += 1
1896  _ = self.stath1[n1]
1898  tmp_rmsd, _ = self.rmsd()
1899  rmsd += tmp_rmsd
1901 
1902  if npairs > 0:
1903  precision = rmsd/npairs
1904  cluster.precision = precision
1905  return precision
1906 
1907  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1908  """
1909  Compute the bipartite precision (ie the cross-precision)
1910  between two clusters
1911  """
1912  npairs = 0
1913  rmsd = 0.0
1914  for cn0, n0 in enumerate(cluster1.members):
1915  _ = self.stath0[n0]
1916  for cn1, n1 in enumerate(cluster2.members):
1917  _ = self.stath1[n1]
1918  tmp_rmsd, _ = self.rmsd()
1919  if verbose:
1920  print("--- rmsd between structure %s and structure "
1921  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1922  rmsd += tmp_rmsd
1923  npairs += 1
1924  precision = rmsd/npairs
1925  return precision
1926 
1927  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1928  cluster_ref=None, step=1):
1929  """
1930  Compute the Root mean square fluctuations
1931  of a molecule in a cluster
1932  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1933  residue indexes and the value is the rmsf
1934  """
1935  rmsf = IMP.pmi.tools.OrderedDict()
1936 
1937  # assumes that residue indexes are identical for stath0 and stath1
1938  if cluster_ref is not None:
1939  if cluster_ref.center_index is not None:
1940  members0 = [cluster_ref.center_index]
1941  else:
1942  members0 = cluster_ref.members
1943  else:
1944  if cluster.center_index is not None:
1945  members0 = [cluster.center_index]
1946  else:
1947  members0 = cluster.members
1948 
1949  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1950  copy_index=copy_index, state_index=state_index)
1951  ps0 = s0.get_selected_particles()
1952  # get the residue indexes
1953  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1954  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1955 
1956  # get the corresponding particles
1957  npairs = 0
1958  for n0 in members0:
1959  d0 = self.stath0[n0]
1960  for n1 in cluster.members[::step]:
1961  if n0 != n1:
1962  print("--- rmsf %s %s" % (str(n0), str(n1)))
1964 
1965  s1 = IMP.atom.Selection(
1966  self.stath1, molecule=molecule,
1967  residue_indexes=residue_indexes, resolution=1,
1968  copy_index=copy_index, state_index=state_index)
1969  ps1 = s1.get_selected_particles()
1970 
1971  d1 = self.stath1[n1]
1972  if self.alignment:
1973  self.align()
1974  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
1975  r = residue_indexes[n]
1976  d0 = IMP.core.XYZ(p0)
1977  d1 = IMP.core.XYZ(p1)
1978  if r in rmsf:
1979  rmsf[r] += IMP.core.get_distance(d0, d1)
1980  else:
1981  rmsf[r] = IMP.core.get_distance(d0, d1)
1982  npairs += 1
1984  for r in rmsf:
1985  rmsf[r] /= npairs
1986 
1987  for stath in [self.stath0, self.stath1]:
1988  if molecule not in self.symmetric_molecules:
1989  s = IMP.atom.Selection(
1990  stath, molecule=molecule, residue_index=r,
1991  resolution=1, copy_index=copy_index,
1992  state_index=state_index)
1993  else:
1994  s = IMP.atom.Selection(
1995  stath, molecule=molecule, residue_index=r,
1996  resolution=1, state_index=state_index)
1997 
1998  ps = s.get_selected_particles()
1999  for p in ps:
2001  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2002  else:
2004 
2005  return rmsf
2006 
2007  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2008  reference="Absolute", prefix="./", step=1):
2009  if self.alignment:
2010  self.set_reference(reference, cluster)
2011  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2012  voxel=voxel_size)
2013 
2014  for n1 in cluster.members[::step]:
2015  print("density "+str(n1))
2016  _ = self.stath1[n1]
2018  if self.alignment:
2019  self.align()
2020  dens.add_subunits_density(self.stath1)
2022  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2023  del dens
2024 
2025  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2026  consolidate=False, molecules=None, prefix='./',
2027  reference="Absolute"):
2028  if self.alignment:
2029  self.set_reference(reference, cluster)
2030  import numpy as np
2031  import matplotlib.pyplot as plt
2032  import matplotlib.cm as cm
2033  from scipy.spatial.distance import cdist
2034  import IMP.pmi.topology
2035  if molecules is None:
2037  for mol in IMP.pmi.tools.get_molecules(
2038  IMP.atom.get_leaves(self.stath1))]
2039  else:
2041  for mol in IMP.pmi.tools.get_molecules(
2043  self.stath1,
2044  molecules=molecules).get_selected_particles())]
2045  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2046  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2047  total_len_unique = sum(max(mol.get_residue_indexes())
2048  for mol in unique_copies)
2049 
2050  index_dict = {}
2051  prev_stop = 0
2052 
2053  if not consolidate:
2054  for mol in mols:
2055  seqlen = max(mol.get_residue_indexes())
2056  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2057  prev_stop += seqlen
2058 
2059  else:
2060  for mol in unique_copies:
2061  seqlen = max(mol.get_residue_indexes())
2062  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2063  prev_stop += seqlen
2064 
2065  for ncl, n1 in enumerate(cluster.members):
2066  print(ncl)
2067  _ = self.stath1[n1]
2068  coord_dict = IMP.pmi.tools.OrderedDict()
2069  for mol in mols:
2070  rindexes = mol.get_residue_indexes()
2071  coords = np.ones((max(rindexes), 3))
2072  for rnum in rindexes:
2073  sel = IMP.atom.Selection(mol, residue_index=rnum,
2074  resolution=1)
2075  selpart = sel.get_selected_particles()
2076  if len(selpart) == 0:
2077  continue
2078  selpart = selpart[0]
2079  coords[rnum - 1, :] = \
2080  IMP.core.XYZ(selpart).get_coordinates()
2081  coord_dict[mol] = coords
2082 
2083  if not consolidate:
2084  coords = np.concatenate(list(coord_dict.values()))
2085  dists = cdist(coords, coords)
2086  binary_dists = np.where((dists <= contact_threshold)
2087  & (dists >= 1.0), 1.0, 0.0)
2088  else:
2089  binary_dists_dict = {}
2090  for mol1 in mols:
2091  len1 = max(mol1.get_residue_indexes())
2092  for mol2 in mols:
2093  name1 = mol1.get_name()
2094  name2 = mol2.get_name()
2095  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2096  if (name1, name2) not in binary_dists_dict:
2097  binary_dists_dict[(name1, name2)] = \
2098  np.zeros((len1, len1))
2099  binary_dists_dict[(name1, name2)] += \
2100  np.where((dists <= contact_threshold)
2101  & (dists >= 1.0), 1.0, 0.0)
2102  binary_dists = np.zeros((total_len_unique, total_len_unique))
2103 
2104  for name1, name2 in binary_dists_dict:
2105  r1 = index_dict[mol_names_unique[name1]]
2106  r2 = index_dict[mol_names_unique[name2]]
2107  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2108  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2109  1.0, 0.0)
2110 
2111  if ncl == 0:
2112  dist_maps = [dists]
2113  av_dist_map = dists
2114  contact_freqs = binary_dists
2115  else:
2116  dist_maps.append(dists)
2117  av_dist_map += dists
2118  contact_freqs += binary_dists
2119 
2120  if log_scale:
2121  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2122  else:
2123  contact_freqs = 1.0/len(cluster)*contact_freqs
2124  av_dist_map = 1.0/len(cluster)*contact_freqs
2125 
2126  fig = plt.figure(figsize=(100, 100))
2127  ax = fig.add_subplot(111)
2128  ax.set_xticks([])
2129  ax.set_yticks([])
2130  gap_between_components = 50
2131  colormap = cm.Blues
2132  colornorm = None
2133 
2134  if not consolidate:
2135  sorted_tuple = sorted(
2137  mol).get_extended_name(), mol) for mol in mols)
2138  prot_list = list(zip(*sorted_tuple))[1]
2139  else:
2140  sorted_tuple = sorted(
2141  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2142  for mol in unique_copies)
2143  prot_list = list(zip(*sorted_tuple))[1]
2144 
2145  prot_listx = prot_list
2146  nresx = gap_between_components + \
2147  sum([max(mol.get_residue_indexes())
2148  + gap_between_components for mol in prot_listx])
2149 
2150  # set the list of proteins on the y axis
2151  prot_listy = prot_list
2152  nresy = gap_between_components + \
2153  sum([max(mol.get_residue_indexes())
2154  + gap_between_components for mol in prot_listy])
2155 
2156  # this is the residue offset for each protein
2157  resoffsetx = {}
2158  resendx = {}
2159  res = gap_between_components
2160  for mol in prot_listx:
2161  resoffsetx[mol] = res
2162  res += max(mol.get_residue_indexes())
2163  resendx[mol] = res
2164  res += gap_between_components
2165 
2166  resoffsety = {}
2167  resendy = {}
2168  res = gap_between_components
2169  for mol in prot_listy:
2170  resoffsety[mol] = res
2171  res += max(mol.get_residue_indexes())
2172  resendy[mol] = res
2173  res += gap_between_components
2174 
2175  resoffsetdiagonal = {}
2176  res = gap_between_components
2177  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2178  resoffsetdiagonal[mol] = res
2179  res += max(mol.get_residue_indexes())
2180  res += gap_between_components
2181 
2182  # plot protein boundaries
2183  xticks = []
2184  xlabels = []
2185  for n, prot in enumerate(prot_listx):
2186  res = resoffsetx[prot]
2187  end = resendx[prot]
2188  for proty in prot_listy:
2189  resy = resoffsety[proty]
2190  endy = resendy[proty]
2191  ax.plot([res, res], [resy, endy], linestyle='-',
2192  color='gray', lw=0.4)
2193  ax.plot([end, end], [resy, endy], linestyle='-',
2194  color='gray', lw=0.4)
2195  xticks.append((float(res) + float(end)) / 2)
2197  prot).get_extended_name())
2198 
2199  yticks = []
2200  ylabels = []
2201  for n, prot in enumerate(prot_listy):
2202  res = resoffsety[prot]
2203  end = resendy[prot]
2204  for protx in prot_listx:
2205  resx = resoffsetx[protx]
2206  endx = resendx[protx]
2207  ax.plot([resx, endx], [res, res], linestyle='-',
2208  color='gray', lw=0.4)
2209  ax.plot([resx, endx], [end, end], linestyle='-',
2210  color='gray', lw=0.4)
2211  yticks.append((float(res) + float(end)) / 2)
2213  prot).get_extended_name())
2214 
2215  # plot the contact map
2216 
2217  tmp_array = np.zeros((nresx, nresy))
2218  ret = {}
2219  for px in prot_listx:
2220  for py in prot_listy:
2221  resx = resoffsetx[px]
2222  lengx = resendx[px] - 1
2223  resy = resoffsety[py]
2224  lengy = resendy[py] - 1
2225  indexes_x = index_dict[px]
2226  minx = min(indexes_x)
2227  maxx = max(indexes_x)
2228  indexes_y = index_dict[py]
2229  miny = min(indexes_y)
2230  maxy = max(indexes_y)
2231  tmp_array[resx:lengx, resy:lengy] = \
2232  contact_freqs[minx:maxx, miny:maxy]
2233  ret[(px, py)] = np.argwhere(
2234  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2235 
2236  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2237  origin='lower', alpha=0.6, interpolation='nearest')
2238 
2239  ax.set_xticks(xticks)
2240  ax.set_xticklabels(xlabels, rotation=90)
2241  ax.set_yticks(yticks)
2242  ax.set_yticklabels(ylabels)
2243  plt.setp(ax.get_xticklabels(), fontsize=6)
2244  plt.setp(ax.get_yticklabels(), fontsize=6)
2245 
2246  # display and write to file
2247  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2248  [i.set_linewidth(2.0) for i in ax.spines.values()]
2249 
2250  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2251  dpi=300, transparent="False")
2252  return ret
2253 
2254  def plot_rmsd_matrix(self, filename):
2255  self.compute_all_pairwise_rmsd()
2256  distance_matrix = np.zeros(
2257  (len(self.stath0), len(self.stath1)))
2258  for (n0, n1) in self.pairwise_rmsd:
2259  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2260 
2261  import matplotlib as mpl
2262  mpl.use('Agg')
2263  import matplotlib.pylab as pl
2264  from scipy.cluster import hierarchy as hrc
2265 
2266  fig = pl.figure(figsize=(10, 8))
2267  ax = fig.add_subplot(212)
2268  dendrogram = hrc.dendrogram(
2269  hrc.linkage(distance_matrix),
2270  color_threshold=7,
2271  no_labels=True)
2272  leaves_order = dendrogram['leaves']
2273  ax.set_xlabel('Model')
2274  ax.set_ylabel('RMSD [Angstroms]')
2275 
2276  ax2 = fig.add_subplot(221)
2277  cax = ax2.imshow(
2278  distance_matrix[leaves_order, :][:, leaves_order],
2279  interpolation='nearest')
2280  cb = fig.colorbar(cax)
2281  cb.set_label('RMSD [Angstroms]')
2282  ax2.set_xlabel('Model')
2283  ax2.set_ylabel('Model')
2284 
2285  pl.savefig(filename, dpi=300)
2286  pl.close(fig)
2287 
2288  ####################
2289  # Internal Functions
2290  ####################
2291 
2292  def update_clusters(self):
2293  """
2294  Update the cluster id numbers
2295  """
2296  for n, c in enumerate(self.clusters):
2297  c.cluster_id = n
2298 
2299  def get_molecule(self, hier, name, copy):
2300  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2301  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2302 
2303  def update_seldicts(self):
2304  """
2305  Update the seldicts
2306  """
2307  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2308  self.sel0_rmsd.get_selected_particles())
2309  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2310  self.sel1_rmsd.get_selected_particles())
2311  for mol in self.seldict0:
2312  for sel in self.seldict0[mol]:
2313  self.issymmetricsel[sel] = False
2314  for mol in self.symmetric_molecules:
2315  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2316  for sel in self.seldict0[mol]:
2317  self.issymmetricsel[sel] = True
2318 
2319  def align(self):
2321  self.sel1_alignment, self.sel0_alignment)
2322 
2323  for rb in self.rbs1:
2324  IMP.core.transform(rb, tr)
2325 
2326  for bead in self.beads1:
2327  try:
2328  IMP.core.transform(IMP.core.XYZ(bead), tr)
2329  except: # noqa: E722
2330  continue
2331 
2332  self.model.update()
2333 
2334  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2335  '''
2336  initial filling of the clusters.
2337  '''
2338  n0 = idxs.pop()
2339  print("clustering model "+str(n0))
2340  d0 = self.stath0[n0]
2341  c = IMP.pmi.output.Cluster(len(self.clusters))
2342  print("creating cluster index "+str(len(self.clusters)))
2343  self.clusters.append(c)
2344  c.add_member(n0, d0)
2345  clustered = set([n0])
2346  for n1 in idxs:
2347  print("--- trying to add model " + str(n1) + " to cluster "
2348  + str(len(self.clusters)))
2349  d1 = self.stath1[n1]
2350  if self.alignment:
2351  self.align()
2352  rmsd, _ = self.rmsd(metric=metric)
2353  if rmsd < rmsd_cutoff:
2354  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2355  c.add_member(n1, d1)
2356  clustered.add(n1)
2357  else:
2358  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2359  idxs -= clustered
2360 
2361  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2362  """
2363  merge the clusters that have close members
2364  @param rmsd_cutoff cutoff distance in Angstorms
2365  """
2366  # before merging, clusters are spheres of radius rmsd_cutoff
2367  # centered on the 1st element
2368  # here we only try to merge clusters whose centers are closer
2369  # than 2*rmsd_cutoff
2370  to_merge = []
2371  print("merging...")
2372  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2373  itertools.combinations(self.clusters, 2)):
2374  n0, n1 = [c.members[0] for c in (c0, c1)]
2375  _ = self.stath0[n0]
2376  _ = self.stath1[n1]
2377  rmsd, _ = self.rmsd()
2378  if (rmsd < 2*rmsd_cutoff and
2379  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2380  to_merge.append((c0, c1))
2381 
2382  for c0, c in reversed(to_merge):
2383  self.merge(c0, c)
2384 
2385  # keep only full clusters
2386  self.clusters = [c for c in
2387  filter(lambda x: len(x.members) > 0, self.clusters)]
2388 
2389  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2390  '''
2391  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2392  '''
2393  print("check close members for clusters " + str(c0.cluster_id) +
2394  " and " + str(c1.cluster_id))
2395  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2396  _ = self.stath0[n0]
2397  _ = self.stath1[n1]
2398  rmsd, _ = self.rmsd(metric=metric)
2399  if rmsd < rmsd_cutoff:
2400  return True
2401 
2402  return False
2403 
2404  def merge(self, c0, c1):
2405  '''
2406  merge two clusters
2407  '''
2408  c0 += c1
2409  c1.members = []
2410  c1.data = {}
2411 
2412  def rmsd_helper(self, sels0, sels1, metric):
2413  '''
2414  a function that returns the permutation best_sel of sels0 that
2415  minimizes metric
2416  '''
2417  best_rmsd2 = float('inf')
2418  best_sel = None
2419  if self.issymmetricsel[sels0[0]]:
2420  # this cases happens when symmetries were defined
2421  N = len(sels0)
2422  for offset in range(N):
2423  sels = [sels0[(offset+i) % N] for i in range(N)]
2424  sel0 = sels[0]
2425  sel1 = sels1[0]
2426  r = metric(sel0, sel1)
2427  rmsd2 = r*r*N
2428  if rmsd2 < best_rmsd2:
2429  best_rmsd2 = rmsd2
2430  best_sel = sels
2431  else:
2432  for sels in itertools.permutations(sels0):
2433  rmsd2 = 0.0
2434  for sel0, sel1 in itertools.takewhile(
2435  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2436  r = metric(sel0, sel1)
2437  rmsd2 += r*r
2438  if rmsd2 < best_rmsd2:
2439  best_rmsd2 = rmsd2
2440  best_sel = sels
2441  return best_sel, best_rmsd2
2442 
2443  def compute_all_pairwise_rmsd(self):
2444  for d0 in self.stath0:
2445  for d1 in self.stath1:
2446  rmsd, _ = self.rmsd()
2447 
2448  def rmsd(self, metric=IMP.atom.get_rmsd):
2449  '''
2450  Computes the RMSD. Resolves ambiguous pairs assignments
2451  '''
2452  # here we memoize the rmsd and molecular assignment so that it's
2453  # not done multiple times
2454  n0 = self.stath0.current_index
2455  n1 = self.stath1.current_index
2456  if ((n0, n1) in self.pairwise_rmsd) \
2457  and ((n0, n1) in self.pairwise_molecular_assignment):
2458  return (self.pairwise_rmsd[(n0, n1)],
2459  self.pairwise_molecular_assignment[(n0, n1)])
2460 
2461  if self.alignment:
2462  self.align()
2463  # if it's not yet memoized
2464  total_rmsd = 0.0
2465  total_N = 0
2466  # this is a dictionary which keys are the molecule names, and values
2467  # are the list of IMP.atom.Selection for all molecules that share
2468  # the molecule name
2469  molecular_assignment = {}
2470  for molname, sels0 in self.seldict0.items():
2471  sels_best_order, best_rmsd2 = \
2472  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2473 
2474  Ncoords = len(sels_best_order[0].get_selected_particles())
2475  Ncopies = len(self.seldict1[molname])
2476  total_rmsd += Ncoords*best_rmsd2
2477  total_N += Ncoords*Ncopies
2478 
2479  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2480  p0 = sel0.get_selected_particles()[0]
2481  p1 = sel1.get_selected_particles()[0]
2482  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2483  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2484  c0 = IMP.atom.Copy(m0).get_copy_index()
2485  c1 = IMP.atom.Copy(m1).get_copy_index()
2486  molecular_assignment[(molname, c0)] = (molname, c1)
2487 
2488  total_rmsd = math.sqrt(total_rmsd/total_N)
2489 
2490  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2491  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2492  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2493  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2494  return total_rmsd, molecular_assignment
2495 
2496  def set_reference(self, reference, cluster):
2497  """
2498  Fix the reference structure for structural alignment, rmsd and
2499  chain assignemnt
2500 
2501  @param reference can be either "Absolute" (cluster center of the
2502  first cluster) or Relative (cluster center of the current
2503  cluster)
2504  """
2505  if reference == "Absolute":
2506  _ = self.stath0[0]
2507  elif reference == "Relative":
2508  if cluster.center_index:
2509  n0 = cluster.center_index
2510  else:
2511  n0 = cluster.members[0]
2512  _ = self.stath0[n0]
2513 
2515  """
2516  compute the molecular assignemnts between multiple copies
2517  of the same sequence. It changes the Copy index of Molecules
2518  """
2519  _ = self.stath1[n1]
2520  _, molecular_assignment = self.rmsd()
2521  for (m0, c0), (m1, c1) in molecular_assignment.items():
2522  mol0 = self.molcopydict0[m0][c0]
2523  mol1 = self.molcopydict1[m1][c1]
2524  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2525  p1 = IMP.atom.Copy(mol1).get_particle()
2526  p1.set_value(cik0, c0)
2527 
2529  """
2530  Undo the Copy index assignment
2531  """
2532  _ = self.stath1[n1]
2533  _, molecular_assignment = self.rmsd()
2534  for (m0, c0), (m1, c1) in molecular_assignment.items():
2535  mol0 = self.molcopydict0[m0][c0]
2536  mol1 = self.molcopydict1[m1][c1]
2537  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2538  p1 = IMP.atom.Copy(mol1).get_particle()
2539  p1.set_value(cik0, c1)
2540 
2541  ####################
2542  # Container Functions
2543  ####################
2544 
2545  def __repr__(self):
2546  s = "AnalysisReplicaExchange\n"
2547  s += "---- number of clusters %s \n" % str(len(self.clusters))
2548  s += "---- number of models %s \n" % str(len(self.stath0))
2549  return s
2550 
2551  def __getitem__(self, int_slice_adaptor):
2552  if isinstance(int_slice_adaptor, int):
2553  return self.clusters[int_slice_adaptor]
2554  elif isinstance(int_slice_adaptor, slice):
2555  return self.__iter__(int_slice_adaptor)
2556  else:
2557  raise TypeError("Unknown Type")
2558 
2559  def __len__(self):
2560  return len(self.clusters)
2561 
2562  def __iter__(self, slice_key=None):
2563  if slice_key is None:
2564  for i in range(len(self)):
2565  yield self[i]
2566  else:
2567  for i in range(len(self))[slice_key]:
2568  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2448
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2496
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1777
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1138
def precision
Compute the precision of a cluster.
Definition: macros.py:1878
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:867
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:111
A container for models organized into clusters.
Definition: output.py:1510
Sample using molecular dynamics.
Definition: samplers.py:397
def aggregate
initial filling of the clusters.
Definition: macros.py:2334
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:935
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:616
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:617
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1843
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1927
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1235
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1634
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1715
def __init__
Construction of the Class.
Definition: macros.py:1589
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:772
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1732
def __init__
Constructor.
Definition: macros.py:119
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2528
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1650
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2412
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1818
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1011
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2514
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1578
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:566
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2361
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:920
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1800
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:931
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2303
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2292
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:612
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1679
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:201
Collect timing information.
Definition: tools.py:126
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1643
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:10568
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2404
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:617
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1257
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1725
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:783
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1907
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:595
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1666
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1765
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2389
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
Definition: macros.py:58
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1211
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1740
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:569
Sample using replica exchange.
Definition: samplers.py:502
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27