IMP logo
IMP Reference Guide  develop.7400db2aee,2024/11/23
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 import IMP
6 import IMP.pmi.tools
7 import IMP.pmi.samplers
8 import IMP.pmi.output
9 import IMP.pmi.analysis
10 import IMP.pmi.io
11 import IMP.pmi.alphabets
12 import IMP.rmf
13 import IMP.isd
14 import IMP.pmi.dof
15 import os
16 from pathlib import Path
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import itertools
22 import warnings
23 import math
24 
25 import pickle
26 
27 
28 class _MockMPIValues:
29  """Replace samplers.MPI_values when in test mode"""
30  def get_percentile(self, name):
31  return 0.
32 
33 
34 class _RMFRestraints:
35  """All restraints that are written out to the RMF file"""
36  def __init__(self, model, user_restraints):
37  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
38  self._user_restraints = user_restraints if user_restraints else []
39 
40  def __len__(self):
41  return (len(self._user_restraints)
42  + self._rmf_rs.get_number_of_restraints())
43 
44  def __bool__(self):
45  return len(self) > 0
46 
47  def __getitem__(self, i):
48  class FakePMIWrapper:
49  def __init__(self, r):
50  self.r = IMP.RestraintSet.get_from(r)
51 
52  def get_restraint(self):
53  return self.r
54 
55  lenuser = len(self._user_restraints)
56  if 0 <= i < lenuser:
57  return self._user_restraints[i]
58  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
59  r = self._rmf_rs.get_restraint(i - lenuser)
60  return FakePMIWrapper(r)
61  else:
62  raise IndexError("Out of range")
63 
64 
66  """A macro to help setup and run replica exchange.
67  Supports Monte Carlo and molecular dynamics.
68  Produces trajectory RMF files, best PDB structures,
69  and output stat files.
70  """
71  def __init__(self, model, root_hier,
72  monte_carlo_sample_objects=None,
73  molecular_dynamics_sample_objects=None,
74  output_objects=[],
75  rmf_output_objects=None,
76  monte_carlo_temperature=1.0,
77  simulated_annealing=False,
78  simulated_annealing_minimum_temperature=1.0,
79  simulated_annealing_maximum_temperature=2.5,
80  simulated_annealing_minimum_temperature_nframes=100,
81  simulated_annealing_maximum_temperature_nframes=100,
82  replica_exchange_minimum_temperature=1.0,
83  replica_exchange_maximum_temperature=2.5,
84  replica_exchange_swap=True,
85  num_sample_rounds=1,
86  number_of_best_scoring_models=500,
87  monte_carlo_steps=10,
88  self_adaptive=False,
89  molecular_dynamics_steps=10,
90  molecular_dynamics_max_time_step=1.0,
91  number_of_frames=1000,
92  save_coordinates_mode="lowest_temperature",
93  nframes_write_coordinates=1,
94  write_initial_rmf=True,
95  initial_rmf_name_suffix="initial",
96  stat_file_name_suffix="stat",
97  best_pdb_name_suffix="model",
98  mmcif=False,
99  do_clean_first=True,
100  do_create_directories=True,
101  global_output_directory="./",
102  rmf_dir="rmfs/",
103  best_pdb_dir="pdbs/",
104  replica_stat_file_suffix="stat_replica",
105  em_object_for_rmf=None,
106  atomistic=False,
107  replica_exchange_object=None,
108  test_mode=False,
109  score_moved=False,
110  use_nestor=False,
111  nestor_restraints=None,
112  nestor_rmf_fname_prefix="nested",):
113  """Constructor.
114  @param model The IMP model
115  @param root_hier Top-level (System)hierarchy
116  @param monte_carlo_sample_objects Objects for MC sampling, which
117  should generally be a simple list of Mover objects, e.g.
118  from DegreesOfFreedom.get_movers().
119  @param molecular_dynamics_sample_objects Objects for MD sampling,
120  which should generally be a simple list of particles.
121  @param output_objects A list of structural objects and restraints
122  that will be included in output (ie, statistics "stat"
123  files). Any object that provides a get_output() method
124  can be used here. If None is passed
125  the macro will not write stat files.
126  @param rmf_output_objects A list of structural objects and
127  restraints that will be included in rmf. Any object
128  that provides a get_output() method can be used here.
129  @param monte_carlo_temperature MC temp (may need to be optimized
130  based on post-sampling analysis)
131  @param simulated_annealing If True, perform simulated annealing
132  @param simulated_annealing_minimum_temperature Should generally be
133  the same as monte_carlo_temperature.
134  @param simulated_annealing_minimum_temperature_nframes Number of
135  frames to compute at minimum temperature.
136  @param simulated_annealing_maximum_temperature_nframes Number of
137  frames to compute at
138  temps > simulated_annealing_maximum_temperature.
139  @param replica_exchange_minimum_temperature Low temp for REX; should
140  generally be the same as monte_carlo_temperature.
141  @param replica_exchange_maximum_temperature High temp for REX
142  @param replica_exchange_swap Boolean, enable disable temperature
143  swap (Default=True)
144  @param num_sample_rounds Number of rounds of MC/MD per cycle
145  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146  models to keep around for analysis.
147  @param mmcif If True, write best scoring models in mmCIF format;
148  if False (the default), write in legacy PDB format.
149  @param best_pdb_dir The directory under `global_output_directory`
150  where best-scoring PDB/mmCIF files are written.
151  @param best_pdb_name_suffix Part of the file name for best-scoring
152  PDB/mmCIF files.
153  @param monte_carlo_steps Number of MC steps per round
154  @param self_adaptive self adaptive scheme for Monte Carlo movers
155  @param molecular_dynamics_steps Number of MD steps per round
156  @param molecular_dynamics_max_time_step Max time step for MD
157  @param number_of_frames Number of REX frames to run
158  @param save_coordinates_mode string: how to save coordinates.
159  "lowest_temperature" (default) only the lowest temperatures
160  is saved
161  "25th_score" all replicas whose score is below the 25th
162  percentile
163  "50th_score" all replicas whose score is below the 50th
164  percentile
165  "75th_score" all replicas whose score is below the 75th
166  percentile
167  @param nframes_write_coordinates How often to write the coordinates
168  of a frame
169  @param write_initial_rmf Write the initial configuration
170  @param global_output_directory Folder that will be created to house
171  output.
172  @param test_mode Set to True to avoid writing any files, just test
173  one frame.
174  @param score_moved If True, attempt to speed up Monte Carlo
175  sampling by caching scoring function terms on particles
176  that didn't move.
177  @param use_nestor If True, follows the Nested Sampling workflow
178  of the NestOR module and skips writing stat files and
179  replica stat files.
180  @param nestor_restraints A list of restraints for which
181  likelihoods are to be computed for use by NestOR module.
182  @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
183  files generated by NestOR .
184  """
185  self.model = model
186  self.vars = {}
187 
188  # add check hierarchy is multistate
189  if output_objects == []:
190  # The "[]" in the default parameters is a global object, so make
191  # our own copy here
192  self.output_objects = []
193  else:
194  self.output_objects = output_objects
195  self.rmf_output_objects = rmf_output_objects
196  if (isinstance(root_hier, IMP.atom.Hierarchy)
197  and not root_hier.get_parent()):
198  if self.output_objects is not None:
199  self.output_objects.append(
200  IMP.pmi.io.TotalScoreOutput(self.model))
201  if self.rmf_output_objects is not None:
202  self.rmf_output_objects.append(
203  IMP.pmi.io.TotalScoreOutput(self.model))
204  self.root_hier = root_hier
205  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
206  self.vars["number_of_states"] = len(states)
207  if len(states) > 1:
208  self.root_hiers = states
209  self.is_multi_state = True
210  else:
211  self.root_hier = root_hier
212  self.is_multi_state = False
213  else:
214  raise TypeError("Must provide System hierarchy (root_hier)")
215 
216  self._rmf_restraints = _RMFRestraints(model, None)
217  self.em_object_for_rmf = em_object_for_rmf
218  self.monte_carlo_sample_objects = monte_carlo_sample_objects
219  self.vars["self_adaptive"] = self_adaptive
220  self.molecular_dynamics_sample_objects = \
221  molecular_dynamics_sample_objects
222  self.replica_exchange_object = replica_exchange_object
223  self.molecular_dynamics_max_time_step = \
224  molecular_dynamics_max_time_step
225  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
226  self.vars["replica_exchange_minimum_temperature"] = \
227  replica_exchange_minimum_temperature
228  self.vars["replica_exchange_maximum_temperature"] = \
229  replica_exchange_maximum_temperature
230  self.vars["replica_exchange_swap"] = replica_exchange_swap
231  self.vars["simulated_annealing"] = simulated_annealing
232  self.vars["simulated_annealing_minimum_temperature"] = \
233  simulated_annealing_minimum_temperature
234  self.vars["simulated_annealing_maximum_temperature"] = \
235  simulated_annealing_maximum_temperature
236  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
237  simulated_annealing_minimum_temperature_nframes
238  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
239  simulated_annealing_maximum_temperature_nframes
240 
241  self.vars["num_sample_rounds"] = num_sample_rounds
242  self.vars[
243  "number_of_best_scoring_models"] = number_of_best_scoring_models
244  self.vars["monte_carlo_steps"] = monte_carlo_steps
245  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
246  self.vars["number_of_frames"] = number_of_frames
247  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
248  "50th_score", "75th_score"):
249  raise Exception("save_coordinates_mode has unrecognized value")
250  else:
251  self.vars["save_coordinates_mode"] = save_coordinates_mode
252  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
253  self.vars["write_initial_rmf"] = write_initial_rmf
254  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
255  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
256  self.vars["mmcif"] = mmcif
257  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
258  self.vars["do_clean_first"] = do_clean_first
259  self.vars["do_create_directories"] = do_create_directories
260  self.vars["global_output_directory"] = global_output_directory
261  self.vars["rmf_dir"] = rmf_dir
262  self.vars["best_pdb_dir"] = best_pdb_dir
263  self.vars["atomistic"] = atomistic
264  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
265  self.vars["geometries"] = None
266  self.test_mode = test_mode
267  self.score_moved = score_moved
268  self.nest = use_nestor
269  self.nestor_restraints = nestor_restraints
270  self.nestor_rmf_fname = nestor_rmf_fname_prefix
271 
272  def add_geometries(self, geometries):
273  if self.vars["geometries"] is None:
274  self.vars["geometries"] = list(geometries)
275  else:
276  self.vars["geometries"].extend(geometries)
277 
278  def show_info(self):
279  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
280  "rmfs/*.rmf3 for each replica ")
281  print("--- it stores the best scoring pdb models in pdbs/")
282  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
283  "lowest temperature")
284  print("--- variables:")
285  keys = list(self.vars.keys())
286  keys.sort()
287  for v in keys:
288  print("------", v.ljust(30), self.vars[v])
289  print("Use nestor: ", self.nest)
290 
291  def get_replica_exchange_object(self):
292  return self.replica_exchange_object
293 
294  def _add_provenance(self, sampler_md, sampler_mc):
295  """Record details about the sampling in the IMP Hierarchies"""
296  iterations = 0
297  if sampler_md:
298  method = "Molecular Dynamics"
299  iterations += self.vars["molecular_dynamics_steps"]
300  if sampler_mc:
301  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
302  iterations += self.vars["monte_carlo_steps"]
303  # If no sampling is actually done, no provenance to write
304  if iterations == 0 or self.vars["number_of_frames"] == 0:
305  return
306  iterations *= self.vars["num_sample_rounds"]
307 
308  pi = self.model.add_particle("sampling")
310  self.model, pi, method, self.vars["number_of_frames"],
311  iterations)
312  p.set_number_of_replicas(
313  self.replica_exchange_object.get_number_of_replicas())
314  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
315  IMP.core.add_provenance(self.model, self.root_hier, p)
316 
317  def execute_macro(self):
318  temp_index_factor = 100000.0
319  samplers = []
320  sampler_mc = None
321  sampler_md = None
322  if self.monte_carlo_sample_objects is not None:
323  print("Setting up MonteCarlo")
324  sampler_mc = IMP.pmi.samplers.MonteCarlo(
325  self.model, self.monte_carlo_sample_objects,
326  self.vars["monte_carlo_temperature"],
327  score_moved=self.score_moved)
328  if self.vars["simulated_annealing"]:
329  tmin = self.vars["simulated_annealing_minimum_temperature"]
330  tmax = self.vars["simulated_annealing_maximum_temperature"]
331  nfmin = self.vars[
332  "simulated_annealing_minimum_temperature_nframes"]
333  nfmax = self.vars[
334  "simulated_annealing_maximum_temperature_nframes"]
335  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
336  if self.vars["self_adaptive"]:
337  sampler_mc.set_self_adaptive(
338  isselfadaptive=self.vars["self_adaptive"])
339  if self.output_objects is not None:
340  self.output_objects.append(sampler_mc)
341  if self.rmf_output_objects is not None:
342  self.rmf_output_objects.append(sampler_mc)
343  samplers.append(sampler_mc)
344 
345  if self.molecular_dynamics_sample_objects is not None:
346  print("Setting up MolecularDynamics")
348  self.model, self.molecular_dynamics_sample_objects,
349  self.vars["monte_carlo_temperature"],
350  maximum_time_step=self.molecular_dynamics_max_time_step)
351  if self.vars["simulated_annealing"]:
352  tmin = self.vars["simulated_annealing_minimum_temperature"]
353  tmax = self.vars["simulated_annealing_maximum_temperature"]
354  nfmin = self.vars[
355  "simulated_annealing_minimum_temperature_nframes"]
356  nfmax = self.vars[
357  "simulated_annealing_maximum_temperature_nframes"]
358  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
359  if self.output_objects is not None:
360  self.output_objects.append(sampler_md)
361  if self.rmf_output_objects is not None:
362  self.rmf_output_objects.append(sampler_md)
363  samplers.append(sampler_md)
364 # -------------------------------------------------------------------------
365 
366  print("Setting up ReplicaExchange")
368  self.model, self.vars["replica_exchange_minimum_temperature"],
369  self.vars["replica_exchange_maximum_temperature"], samplers,
370  replica_exchange_object=self.replica_exchange_object)
371  self.replica_exchange_object = rex.rem
372 
373  myindex = rex.get_my_index()
374  if self.output_objects is not None:
375  self.output_objects.append(rex)
376  if self.rmf_output_objects is not None:
377  self.rmf_output_objects.append(rex)
378  # must reset the minimum temperature due to the
379  # different binary length of rem.get_my_parameter double and python
380  # float
381  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
382 
383 # -------------------------------------------------------------------------
384 
385  globaldir = self.vars["global_output_directory"] + "/"
386  rmf_dir = globaldir + self.vars["rmf_dir"]
387  pdb_dir = globaldir + self.vars["best_pdb_dir"]
388 
389  if not self.test_mode and not self.nest:
390  if self.vars["do_clean_first"]:
391  pass
392 
393  if self.vars["do_create_directories"]:
394 
395  try:
396  os.makedirs(globaldir)
397  except: # noqa: E722
398  pass
399  try:
400  os.makedirs(rmf_dir)
401  except: # noqa: E722
402  pass
403 
404  if not self.is_multi_state:
405  try:
406  os.makedirs(pdb_dir)
407  except: # noqa: E722
408  pass
409  else:
410  for n in range(self.vars["number_of_states"]):
411  try:
412  os.makedirs(pdb_dir + "/" + str(n))
413  except: # noqa: E722
414  pass
415 
416 # -------------------------------------------------------------------------
417 
419  if self.output_objects is not None:
420  self.output_objects.append(sw)
421  if self.rmf_output_objects is not None:
422  self.rmf_output_objects.append(sw)
423 
424  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
425 
426  if not self.nest:
427  print("Setting up stat file")
428  low_temp_stat_file = globaldir + \
429  self.vars["stat_file_name_suffix"] + "." + \
430  str(myindex) + ".out"
431 
432  # Ensure model is updated before saving init files
433  if not self.test_mode:
434  self.model.update()
435 
436  if not self.test_mode and not self.nest:
437  if self.output_objects is not None:
438  output.init_stat2(low_temp_stat_file,
439  self.output_objects,
440  extralabels=["rmf_file", "rmf_frame_index"])
441  else:
442  print("Stat file writing is disabled")
443 
444  if self.rmf_output_objects is not None and not self.nest:
445  print("Stat info being written in the rmf file")
446 
447  if not self.test_mode and not self.nest:
448  print("Setting up replica stat file")
449  replica_stat_file = globaldir + \
450  self.vars["replica_stat_file_suffix"] + "." + \
451  str(myindex) + ".out"
452  if not self.test_mode:
453  output.init_stat2(replica_stat_file, [rex],
454  extralabels=["score"])
455 
456  print("Setting up best pdb files")
457  if not self.is_multi_state:
458  if self.vars["number_of_best_scoring_models"] > 0:
459  output.init_pdb_best_scoring(
460  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
461  self.root_hier,
462  self.vars["number_of_best_scoring_models"],
463  replica_exchange=True,
464  mmcif=self.vars['mmcif'],
465  best_score_file=globaldir + "best.scores.rex.py")
466  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
467  output.write_psf(
468  pdb_dir + "/" + "model.psf",
469  pdb_dir + "/" +
470  self.vars["best_pdb_name_suffix"] + pdbext)
471  else:
472  if self.vars["number_of_best_scoring_models"] > 0:
473  for n in range(self.vars["number_of_states"]):
474  output.init_pdb_best_scoring(
475  pdb_dir + "/" + str(n) + "/" +
476  self.vars["best_pdb_name_suffix"],
477  self.root_hiers[n],
478  self.vars["number_of_best_scoring_models"],
479  replica_exchange=True,
480  mmcif=self.vars['mmcif'],
481  best_score_file=globaldir + "best.scores.rex.py")
482  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
483  output.write_psf(
484  pdb_dir + "/" + str(n) + "/" + "model.psf",
485  pdb_dir + "/" + str(n) + "/" +
486  self.vars["best_pdb_name_suffix"] + pdbext)
487 # ---------------------------------------------
488 
489  if self.em_object_for_rmf is not None:
490  output_hierarchies = [
491  self.root_hier,
492  self.em_object_for_rmf.get_density_as_hierarchy(
493  )]
494  else:
495  output_hierarchies = [self.root_hier]
496 
497  if not self.test_mode and not self.nest:
498  print("Setting up and writing initial rmf coordinate file")
499  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
500  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
501  output_hierarchies,
502  listofobjects=self.rmf_output_objects)
503  if self._rmf_restraints:
504  output.add_restraints_to_rmf(
505  init_suffix + "." + str(myindex) + ".rmf3",
506  self._rmf_restraints)
507  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
508  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
509 
510  if not self.test_mode:
511  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
512  else:
513  mpivs = _MockMPIValues()
514 
515  self._add_provenance(sampler_md, sampler_mc)
516 
517  if not self.test_mode and not self.nest:
518  print("Setting up production rmf files")
519  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
520  output.init_rmf(rmfname, output_hierarchies,
521  geometries=self.vars["geometries"],
522  listofobjects=self.rmf_output_objects)
523 
524  if self._rmf_restraints:
525  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
526 
527  if not self.test_mode and self.nest:
528  print("Setting up NestOR rmf files")
529  nestor_rmf_fname = str(self.nestor_rmf_fname) + '_' + \
530  str(self.replica_exchange_object.get_my_index()) + '.rmf3'
531 
532  output.init_rmf(nestor_rmf_fname, output_hierarchies,
533  geometries=self.vars["geometries"],
534  listofobjects=self.rmf_output_objects)
535 
536  ntimes_at_low_temp = 0
537 
538  if myindex == 0 and not self.nest:
539  self.show_info()
540  self.replica_exchange_object.set_was_used(True)
541  nframes = self.vars["number_of_frames"]
542  if self.test_mode:
543  nframes = 1
544 
545  sampled_likelihoods = []
546  for i in range(nframes):
547  if self.test_mode:
548  score = 0.
549  else:
550  for nr in range(self.vars["num_sample_rounds"]):
551  if sampler_md is not None:
552  sampler_md.optimize(
553  self.vars["molecular_dynamics_steps"])
554  if sampler_mc is not None:
555  sampler_mc.optimize(self.vars["monte_carlo_steps"])
557  self.model).evaluate(False)
558  mpivs.set_value("score", score)
559  if not self.nest:
560  output.set_output_entry("score", score)
561 
562  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
563 
564  if self.vars["save_coordinates_mode"] == "lowest_temperature":
565  save_frame = (min_temp_index == my_temp_index)
566  elif self.vars["save_coordinates_mode"] == "25th_score":
567  score_perc = mpivs.get_percentile("score")
568  save_frame = (score_perc*100.0 <= 25.0)
569  elif self.vars["save_coordinates_mode"] == "50th_score":
570  score_perc = mpivs.get_percentile("score")
571  save_frame = (score_perc*100.0 <= 50.0)
572  elif self.vars["save_coordinates_mode"] == "75th_score":
573  score_perc = mpivs.get_percentile("score")
574  save_frame = (score_perc*100.0 <= 75.0)
575 
576  # Ensure model is updated before saving output files
577  if save_frame and not self.test_mode:
578  self.model.update()
579 
580  if save_frame:
581  print("--- frame %s score %s " % (str(i), str(score)))
582 
583  if self.nest:
584  if math.isnan(score):
585  sampled_likelihoods.append(math.nan)
586  else:
587  likelihood_for_sample = 1
588  for rstrnt in self.nestor_restraints:
589  likelihood_for_sample *= rstrnt.get_likelihood()
590  sampled_likelihoods.append(likelihood_for_sample)
591  output.write_rmf(nestor_rmf_fname)
592 
593  if not self.test_mode and not self.nest:
594  if i % self.vars["nframes_write_coordinates"] == 0:
595  print('--- writing coordinates')
596  if self.vars["number_of_best_scoring_models"] > 0:
597  output.write_pdb_best_scoring(score)
598  output.write_rmf(rmfname)
599  output.set_output_entry("rmf_file", rmfname)
600  output.set_output_entry("rmf_frame_index",
601  ntimes_at_low_temp)
602  else:
603  output.set_output_entry("rmf_file", rmfname)
604  output.set_output_entry("rmf_frame_index", '-1')
605  if self.output_objects is not None:
606  output.write_stat2(low_temp_stat_file)
607  ntimes_at_low_temp += 1
608 
609  if not self.test_mode and not self.nest:
610  output.write_stat2(replica_stat_file)
611  if self.vars["replica_exchange_swap"]:
612  rex.swap_temp(i, score)
613 
614  if self.nest and len(sampled_likelihoods) > 0:
615  with open("likelihoods_"
616  + str(self.replica_exchange_object.get_my_index()),
617  "wb") as lif:
618  pickle.dump(sampled_likelihoods, lif)
619 
620  output.close_rmf(nestor_rmf_fname)
621 
622  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
623  p.add_replica_exchange(state, self)
624 
625  if not self.test_mode and not self.nest:
626  print("closing production rmf files")
627  output.close_rmf(rmfname)
628 
629 
631  """A macro to build a IMP::pmi::topology::System based on a
632  TopologyReader object.
633 
634  Easily create multi-state systems by calling this macro
635  repeatedly with different TopologyReader objects!
636  A useful function is get_molecules() which returns the PMI Molecules
637  grouped by state as a dictionary with key = (molecule name),
638  value = IMP.pmi.topology.Molecule
639  Quick multi-state system:
640  @code{.python}
641  model = IMP.Model()
642  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
643  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
644  bs = IMP.pmi.macros.BuildSystem(model)
645  bs.add_state(reader1)
646  bs.add_state(reader2)
647  bs.execute_macro() # build everything including degrees of freedom
648  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
649  ### now you have a two state system, you add restraints etc
650  @endcode
651  @note The "domain name" entry of the topology reader is not used.
652  All molecules are set up by the component name, but split into rigid bodies
653  as requested.
654  """
655 
656  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
657  'RNA': IMP.pmi.alphabets.rna}
658 
659  def __init__(self, model, sequence_connectivity_scale=4.0,
660  force_create_gmm_files=False, resolutions=[1, 10],
661  name='System'):
662  """Constructor
663  @param model An IMP Model
664  @param sequence_connectivity_scale For scaling the connectivity
665  restraint
666  @param force_create_gmm_files If True, will sample and create GMMs
667  no matter what. If False, will only sample if the
668  files don't exist. If number of Gaussians is zero, won't
669  do anything.
670  @param resolutions The resolutions to build for structured regions
671  @param name The name of the top-level hierarchy node.
672  """
673  self.model = model
674  self.system = IMP.pmi.topology.System(self.model, name=name)
675  self._readers = [] # the TopologyReaders (one per state)
676  # TempResidues for each domain key=unique name,
677  # value=(atomic_res,non_atomic_res).
678  self._domain_res = []
679  self._domains = [] # key = domain unique name, value = Component
680  self.force_create_gmm_files = force_create_gmm_files
681  self.resolutions = resolutions
682 
683  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
684  chain_ids=None):
685  """Add a state using the topology info in a
686  IMP::pmi::topology::TopologyReader object.
687  When you are done adding states, call execute_macro()
688  @param reader The TopologyReader object
689  @param keep_chain_id If True, keep the chain IDs from the
690  original PDB files, if available
691  @param fasta_name_map dictionary for converting protein names
692  found in the fasta file
693  @param chain_ids A list or string of chain IDs for assigning to
694  newly-created molecules, e.g.
695  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
696  If not specified, chain IDs A through Z are assigned, then
697  AA through AZ, then BA through BZ, and so on, in the same
698  fashion as PDB.
699  """
700  state = self.system.create_state()
701  self._readers.append(reader)
702  # key is unique name, value is (atomic res, nonatomicres)
703  these_domain_res = {}
704  these_domains = {} # key is unique name, value is _Component
705  if chain_ids is None:
706  chain_ids = IMP.pmi.output._ChainIDs()
707  numchain = 0
708 
709  # setup representation
710  # loop over molecules, copies, then domains
711  for molname in reader.get_molecules():
712  copies = reader.get_molecules()[molname].domains
713  for nc, copyname in enumerate(copies):
714  print("BuildSystem.add_state: setting up molecule %s copy "
715  "number %s" % (molname, str(nc)))
716  copy = copies[copyname]
717  # option to not rename chains
718  if keep_chain_id:
719  all_chains = [c for c in copy if c.chain is not None]
720  if all_chains:
721  chain_id = all_chains[0].chain
722  else:
723  chain_id = chain_ids[numchain]
724  warnings.warn(
725  "No PDBs specified for %s, so keep_chain_id has "
726  "no effect; using default chain ID '%s'"
727  % (molname, chain_id), IMP.pmi.ParameterWarning)
728  else:
729  chain_id = chain_ids[numchain]
730  if nc == 0:
731  alphabet = IMP.pmi.alphabets.amino_acid
732  fasta_flag = copy[0].fasta_flag
733  if fasta_flag in self._alphabets:
734  alphabet = self._alphabets[fasta_flag]
736  copy[0].fasta_file, fasta_name_map)
737  seq = seqs[copy[0].fasta_id]
738  print("BuildSystem.add_state: molecule %s sequence has "
739  "%s residues" % (molname, len(seq)))
740  orig_mol = state.create_molecule(
741  molname, seq, chain_id, alphabet=alphabet,
742  uniprot=seqs.uniprot.get(copy[0].fasta_id))
743  mol = orig_mol
744  numchain += 1
745  else:
746  print("BuildSystem.add_state: creating a copy for "
747  "molecule %s" % molname)
748  mol = orig_mol.create_copy(chain_id)
749  numchain += 1
750 
751  for domainnumber, domain in enumerate(copy):
752  print("BuildSystem.add_state: ---- setting up domain %s "
753  "of molecule %s" % (domainnumber, molname))
754  # we build everything in the residue range, even if it
755  # extends beyond what's in the actual PDB file
756  these_domains[domain.get_unique_name()] = domain
757  if domain.residue_range == [] or \
758  domain.residue_range is None:
759  domain_res = mol.get_residues()
760  else:
761  start = domain.residue_range[0]+domain.pdb_offset
762  if domain.residue_range[1] == 'END':
763  end = len(mol.sequence)
764  else:
765  end = domain.residue_range[1]+domain.pdb_offset
766  domain_res = mol.residue_range(start-1, end-1)
767  print("BuildSystem.add_state: -------- domain %s of "
768  "molecule %s extends from residue %s to "
769  "residue %s "
770  % (domainnumber, molname, start, end))
771  if domain.pdb_file == "BEADS":
772  print("BuildSystem.add_state: -------- domain %s of "
773  "molecule %s represented by BEADS "
774  % (domainnumber, molname))
775  mol.add_representation(
776  domain_res,
777  resolutions=[domain.bead_size],
778  setup_particles_as_densities=(
779  domain.em_residues_per_gaussian != 0),
780  color=domain.color)
781  these_domain_res[domain.get_unique_name()] = \
782  (set(), domain_res)
783  elif domain.pdb_file == "IDEAL_HELIX":
784  print("BuildSystem.add_state: -------- domain %s of "
785  "molecule %s represented by IDEAL_HELIX "
786  % (domainnumber, molname))
787  emper = domain.em_residues_per_gaussian
788  mol.add_representation(
789  domain_res,
790  resolutions=self.resolutions,
791  ideal_helix=True,
792  density_residues_per_component=emper,
793  density_prefix=domain.density_prefix,
794  density_force_compute=self.force_create_gmm_files,
795  color=domain.color)
796  these_domain_res[domain.get_unique_name()] = \
797  (domain_res, set())
798  else:
799  print("BuildSystem.add_state: -------- domain %s of "
800  "molecule %s represented by pdb file %s "
801  % (domainnumber, molname, domain.pdb_file))
802  domain_atomic = mol.add_structure(domain.pdb_file,
803  domain.chain,
804  domain.residue_range,
805  domain.pdb_offset,
806  soft_check=True)
807  domain_non_atomic = domain_res - domain_atomic
808  if not domain.em_residues_per_gaussian:
809  mol.add_representation(
810  domain_atomic, resolutions=self.resolutions,
811  color=domain.color)
812  if len(domain_non_atomic) > 0:
813  mol.add_representation(
814  domain_non_atomic,
815  resolutions=[domain.bead_size],
816  color=domain.color)
817  else:
818  print("BuildSystem.add_state: -------- domain %s "
819  "of molecule %s represented by gaussians "
820  % (domainnumber, molname))
821  emper = domain.em_residues_per_gaussian
822  creategmm = self.force_create_gmm_files
823  mol.add_representation(
824  domain_atomic,
825  resolutions=self.resolutions,
826  density_residues_per_component=emper,
827  density_prefix=domain.density_prefix,
828  density_force_compute=creategmm,
829  color=domain.color)
830  if len(domain_non_atomic) > 0:
831  mol.add_representation(
832  domain_non_atomic,
833  resolutions=[domain.bead_size],
834  setup_particles_as_densities=True,
835  color=domain.color)
836  these_domain_res[domain.get_unique_name()] = (
837  domain_atomic, domain_non_atomic)
838  self._domain_res.append(these_domain_res)
839  self._domains.append(these_domains)
840  print('BuildSystem.add_state: State', len(self.system.states), 'added')
841  return state
842 
843  def get_molecules(self):
844  """Return list of all molecules grouped by state.
845  For each state, it's a dictionary of Molecules where key is the
846  molecule name
847  """
848  return [s.get_molecules() for s in self.system.get_states()]
849 
850  def get_molecule(self, molname, copy_index=0, state_index=0):
851  return self.system.get_states()[state_index].get_molecules()[
852  molname][copy_index]
853 
854  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
855  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
856  """Builds representations and sets up degrees of freedom"""
857  print("BuildSystem.execute_macro: building representations")
858  self.root_hier = self.system.build()
859 
860  print("BuildSystem.execute_macro: setting up degrees of freedom")
861  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
862  for nstate, reader in enumerate(self._readers):
863  rbs = reader.get_rigid_bodies()
864  srbs = reader.get_super_rigid_bodies()
865  csrbs = reader.get_chains_of_super_rigid_bodies()
866 
867  # add rigid bodies
868  domains_in_rbs = set()
869  for rblist in rbs:
870  print("BuildSystem.execute_macro: -------- building rigid "
871  "body %s" % (str(rblist)))
872  all_res = IMP.pmi.tools.OrderedSet()
873  bead_res = IMP.pmi.tools.OrderedSet()
874  for dname in rblist:
875  domain = self._domains[nstate][dname]
876  print("BuildSystem.execute_macro: -------- adding %s"
877  % (str(dname)))
878  all_res |= self._domain_res[nstate][dname][0]
879  bead_res |= self._domain_res[nstate][dname][1]
880  domains_in_rbs.add(dname)
881  all_res |= bead_res
882  print("BuildSystem.execute_macro: -------- creating rigid "
883  "body with max_trans %s max_rot %s "
884  "non_rigid_max_trans %s"
885  % (str(max_rb_trans), str(max_rb_rot),
886  str(max_bead_trans)))
887  self.dof.create_rigid_body(all_res,
888  nonrigid_parts=bead_res,
889  max_trans=max_rb_trans,
890  max_rot=max_rb_rot,
891  nonrigid_max_trans=max_bead_trans,
892  name="RigidBody %s" % dname)
893 
894  # if you have any domains not in an RB, set them as flexible beads
895  for dname, domain in self._domains[nstate].items():
896  if dname not in domains_in_rbs:
897  if domain.pdb_file != "BEADS":
898  warnings.warn(
899  "No rigid bodies set for %s. Residues read from "
900  "the PDB file will not be sampled - only regions "
901  "missing from the PDB will be treated flexibly. "
902  "To sample the entire sequence, use BEADS instead "
903  "of a PDB file name" % dname,
905  self.dof.create_flexible_beads(
906  self._domain_res[nstate][dname][1],
907  max_trans=max_bead_trans)
908 
909  # add super rigid bodies
910  for srblist in srbs:
911  print("BuildSystem.execute_macro: -------- building "
912  "super rigid body %s" % (str(srblist)))
913  all_res = IMP.pmi.tools.OrderedSet()
914  for dname in srblist:
915  print("BuildSystem.execute_macro: -------- adding %s"
916  % (str(dname)))
917  all_res |= self._domain_res[nstate][dname][0]
918  all_res |= self._domain_res[nstate][dname][1]
919 
920  print("BuildSystem.execute_macro: -------- creating super "
921  "rigid body with max_trans %s max_rot %s "
922  % (str(max_srb_trans), str(max_srb_rot)))
923  self.dof.create_super_rigid_body(
924  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
925 
926  # add chains of super rigid bodies
927  for csrblist in csrbs:
928  all_res = IMP.pmi.tools.OrderedSet()
929  for dname in csrblist:
930  all_res |= self._domain_res[nstate][dname][0]
931  all_res |= self._domain_res[nstate][dname][1]
932  all_res = list(all_res)
933  all_res.sort(key=lambda r: r.get_index())
934  self.dof.create_main_chain_mover(all_res)
935  return self.root_hier, self.dof
936 
937 
938 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
940  """A macro for running all the basic operations of analysis.
941  Includes clustering, precision analysis, and making ensemble density maps.
942  A number of plots are also supported.
943  """
944  def __init__(self, model,
945  merge_directories=["./"],
946  stat_file_name_suffix="stat",
947  best_pdb_name_suffix="model",
948  do_clean_first=True,
949  do_create_directories=True,
950  global_output_directory="output/",
951  replica_stat_file_suffix="stat_replica",
952  global_analysis_result_directory="./analysis/",
953  test_mode=False):
954  """Constructor.
955  @param model The IMP model
956  @param stat_file_name_suffix
957  @param merge_directories The directories containing output files
958  @param best_pdb_name_suffix
959  @param do_clean_first
960  @param do_create_directories
961  @param global_output_directory Where everything is
962  @param replica_stat_file_suffix
963  @param global_analysis_result_directory
964  @param test_mode If True, nothing is changed on disk
965  """
966 
967  try:
968  from mpi4py import MPI
969  self.comm = MPI.COMM_WORLD
970  self.rank = self.comm.Get_rank()
971  self.number_of_processes = self.comm.size
972  except ImportError:
973  self.rank = 0
974  self.number_of_processes = 1
975 
976  self.test_mode = test_mode
977  self._protocol_output = []
978  self.cluster_obj = None
979  self.model = model
980  stat_dir = global_output_directory
981  self.stat_files = []
982  # it contains the position of the root directories
983  for rd in merge_directories:
984  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
985  if len(stat_files) == 0:
986  warnings.warn("no stat files found in %s"
987  % os.path.join(rd, stat_dir),
989  self.stat_files += stat_files
990 
991  def add_protocol_output(self, p):
992  """Capture details of the modeling protocol.
993  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
994  """
995  # Assume last state is the one we're interested in
996  self._protocol_output.append((p, p._last_state))
997 
998  def get_modeling_trajectory(self,
999  score_key="Total_Score",
1000  rmf_file_key="rmf_file",
1001  rmf_file_frame_key="rmf_frame_index",
1002  outputdir="./",
1003  get_every=1,
1004  nframes_trajectory=10000):
1005  """ Get a trajectory of the modeling run, for generating
1006  demonstrative movies
1007 
1008  @param score_key The score for ranking models
1009  @param rmf_file_key Key pointing to RMF filename
1010  @param rmf_file_frame_key Key pointing to RMF frame number
1011  @param outputdir The local output directory used in the run
1012  @param get_every Extract every nth frame
1013  @param nframes_trajectory Total number of frames of the trajectory
1014  """
1015  import math
1016 
1017  trajectory_models = IMP.pmi.io.get_trajectory_models(
1018  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1019  get_every)
1020  score_list = list(map(float, trajectory_models[2]))
1021 
1022  max_score = max(score_list)
1023  min_score = min(score_list)
1024 
1025  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1026  for i in range(nframes_trajectory)]
1027  binned_scores = [None]*nframes_trajectory
1028  binned_model_indexes = [-1]*nframes_trajectory
1029 
1030  for model_index, s in enumerate(score_list):
1031  bins_score_diffs = [abs(s-b) for b in bins]
1032  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1033  if binned_scores[bin_index] is None:
1034  binned_scores[bin_index] = s
1035  binned_model_indexes[bin_index] = model_index
1036  else:
1037  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1038  new_diff = abs(s-bins[bin_index])
1039  if new_diff < old_diff:
1040  binned_scores[bin_index] = s
1041  binned_model_indexes[bin_index] = model_index
1042 
1043  print(binned_scores)
1044  print(binned_model_indexes)
1045 
1046  def _expand_ambiguity(self, prot, d):
1047  """If using PMI2, expand the dictionary to include copies as
1048  ambiguous options
1049 
1050  This also keeps the states separate.
1051  """
1052  newdict = {}
1053  for key in d:
1054  val = d[key]
1055  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1056  newdict[key] = val
1057  continue
1058  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1059  if isinstance(val, tuple):
1060  start = val[0]
1061  stop = val[1]
1062  name = val[2]
1063  else:
1064  start = 1
1065  stop = -1
1066  name = val
1067  for nst in range(len(states)):
1068  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1069  copies = sel.get_selected_particles(with_representation=False)
1070  if len(copies) > 1:
1071  for nc in range(len(copies)):
1072  if len(states) > 1:
1073  newdict['%s.%i..%i' % (name, nst, nc)] = \
1074  (start, stop, name, nc, nst)
1075  else:
1076  newdict['%s..%i' % (name, nc)] = \
1077  (start, stop, name, nc, nst)
1078  else:
1079  newdict[key] = val
1080  return newdict
1081 
1082  def clustering(self,
1083  score_key="Total_Score",
1084  rmf_file_key="rmf_file",
1085  rmf_file_frame_key="rmf_frame_index",
1086  state_number=0,
1087  prefiltervalue=None,
1088  feature_keys=[],
1089  outputdir="./",
1090  alignment_components=None,
1091  number_of_best_scoring_models=10,
1092  rmsd_calculation_components=None,
1093  distance_matrix_file='distances.mat',
1094  load_distance_matrix_file=False,
1095  skip_clustering=False,
1096  number_of_clusters=1,
1097  display_plot=False,
1098  exit_after_display=True,
1099  get_every=1,
1100  first_and_last_frames=None,
1101  density_custom_ranges=None,
1102  write_pdb_with_centered_coordinates=False,
1103  voxel_size=5.0):
1104  """Get the best scoring models, compute a distance matrix,
1105  cluster them, and create density maps.
1106 
1107  Tuple format: "molname" just the molecule,
1108  or (start,stop,molname,copy_num(optional),state_num(optional)
1109  Can pass None for copy or state to ignore that field.
1110  If you don't pass a specific copy number
1111 
1112  @param score_key The score for ranking models.
1113  @param rmf_file_key Key pointing to RMF filename
1114  @param rmf_file_frame_key Key pointing to RMF frame number
1115  @param state_number State number to analyze
1116  @param prefiltervalue Only include frames where the
1117  score key is below this value
1118  @param feature_keys Keywords for which you want to
1119  calculate average, medians, etc.
1120  If you pass "Keyname" it'll include everything that matches
1121  "*Keyname*"
1122  @param outputdir The local output directory used in
1123  the run
1124  @param alignment_components Dictionary with keys=groupname,
1125  values are tuples for aligning the structures
1126  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1127  @param number_of_best_scoring_models Num models to keep per run
1128  @param rmsd_calculation_components For calculating RMSD
1129  (same format as alignment_components)
1130  @param distance_matrix_file Where to store/read the
1131  distance matrix
1132  @param load_distance_matrix_file Try to load the distance
1133  matrix file
1134  @param skip_clustering Just extract the best scoring
1135  models and save the pdbs
1136  @param number_of_clusters Number of k-means clusters
1137  @param display_plot Display the distance matrix
1138  @param exit_after_display Exit after displaying distance
1139  matrix
1140  @param get_every Extract every nth frame
1141  @param first_and_last_frames A tuple with the first and last
1142  frames to be analyzed. Values are percentages!
1143  Default: get all frames
1144  @param density_custom_ranges For density calculation
1145  (same format as alignment_components)
1146  @param write_pdb_with_centered_coordinates
1147  @param voxel_size Used for the density output
1148  """
1149  # Track provenance information to be added to each output model
1150  prov = []
1151  self._outputdir = Path(outputdir).absolute()
1152  self._number_of_clusters = number_of_clusters
1153  for p, state in self._protocol_output:
1154  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1155 
1156  if self.test_mode:
1157  return
1158 
1159  if self.rank == 0:
1160  try:
1161  os.mkdir(outputdir)
1162  except: # noqa: E722
1163  pass
1164 
1165  if not load_distance_matrix_file:
1166  if len(self.stat_files) == 0:
1167  print("ERROR: no stat file found in the given path")
1168  return
1169  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1170  self.stat_files, self.number_of_processes)[self.rank]
1171 
1172  # read ahead to check if you need the PMI2 score key instead
1173  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1174  if k in feature_keys:
1175  warnings.warn(
1176  "no need to pass " + k + " to feature_keys.",
1178  feature_keys.remove(k)
1179 
1180  best_models = IMP.pmi.io.get_best_models(
1181  my_stat_files, score_key, feature_keys, rmf_file_key,
1182  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1183  rmf_file_list = best_models[0]
1184  rmf_file_frame_list = best_models[1]
1185  score_list = best_models[2]
1186  feature_keyword_list_dict = best_models[3]
1187 
1188 # ------------------------------------------------------------------------
1189 # collect all the files and scores
1190 # ------------------------------------------------------------------------
1191 
1192  if self.number_of_processes > 1:
1193  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1194  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1195  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1196  rmf_file_frame_list)
1197  for k in feature_keyword_list_dict:
1198  feature_keyword_list_dict[k] = \
1200  feature_keyword_list_dict[k])
1201 
1202  # sort by score and get the best scoring ones
1203  score_rmf_tuples = list(zip(score_list,
1204  rmf_file_list,
1205  rmf_file_frame_list,
1206  list(range(len(score_list)))))
1207 
1208  if density_custom_ranges:
1209  for k in density_custom_ranges:
1210  if not isinstance(density_custom_ranges[k], list):
1211  raise Exception("Density custom ranges: values must "
1212  "be lists of tuples")
1213 
1214  # keep subset of frames if requested
1215  if first_and_last_frames is not None:
1216  nframes = len(score_rmf_tuples)
1217  first_frame = int(first_and_last_frames[0] * nframes)
1218  last_frame = int(first_and_last_frames[1] * nframes)
1219  if last_frame > len(score_rmf_tuples):
1220  last_frame = -1
1221  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1222 
1223  # sort RMFs by the score_key in ascending order, and store the rank
1224  best_score_rmf_tuples = sorted(
1225  score_rmf_tuples,
1226  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1227  best_score_rmf_tuples = [t+(n,) for n, t in
1228  enumerate(best_score_rmf_tuples)]
1229  # Note in the provenance info that we only kept best-scoring models
1230  prov.append(IMP.pmi.io.FilterProvenance(
1231  "Best scoring", 0, number_of_best_scoring_models))
1232  # sort the feature scores in the same way
1233  best_score_feature_keyword_list_dict = defaultdict(list)
1234  for tpl in best_score_rmf_tuples:
1235  index = tpl[3]
1236  for f in feature_keyword_list_dict:
1237  best_score_feature_keyword_list_dict[f].append(
1238  feature_keyword_list_dict[f][index])
1239  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1240  best_score_rmf_tuples,
1241  self.number_of_processes)[self.rank]
1242 
1243  # expand the dictionaries to include ambiguous copies
1244  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1245  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1246  if rmsd_calculation_components is not None:
1247  tmp = self._expand_ambiguity(
1248  prot_ahead, rmsd_calculation_components)
1249  if tmp != rmsd_calculation_components:
1250  print('Detected ambiguity, expand rmsd components to',
1251  tmp)
1252  rmsd_calculation_components = tmp
1253  if alignment_components is not None:
1254  tmp = self._expand_ambiguity(prot_ahead,
1255  alignment_components)
1256  if tmp != alignment_components:
1257  print('Detected ambiguity, expand alignment '
1258  'components to', tmp)
1259  alignment_components = tmp
1260 
1261 # -------------------------------------------------------------
1262 # read the coordinates
1263 # ------------------------------------------------------------
1264  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1265  self.model, my_best_score_rmf_tuples[0],
1266  rmsd_calculation_components, state_number=state_number)
1268  self.model, my_best_score_rmf_tuples, alignment_components,
1269  rmsd_calculation_components, state_number=state_number)
1270 
1271  # note! the coordinates are simply float tuples, NOT decorators,
1272  # NOT Vector3D, NOR particles, because these object cannot be
1273  # serialized. We need serialization
1274  # for the parallel computation based on mpi.
1275 
1276  # dict:key=component name,val=coords per hit
1277  all_coordinates = got_coords[0]
1278 
1279  # same as above, limited to alignment bits
1280  alignment_coordinates = got_coords[1]
1281 
1282  # same as above, limited to RMSD bits
1283  rmsd_coordinates = got_coords[2]
1284 
1285  # dictionary with key=RMF, value=score rank
1286  rmf_file_name_index_dict = got_coords[3]
1287 
1288  # RMF file per hit
1289  all_rmf_file_names = got_coords[4]
1290 
1291 # ------------------------------------------------------------------------
1292 # optionally don't compute distance matrix or cluster, just write top files
1293 # ------------------------------------------------------------------------
1294  if skip_clustering:
1295  if density_custom_ranges:
1296  DensModule = IMP.pmi.analysis.GetModelDensity(
1297  density_custom_ranges, voxel=voxel_size)
1298 
1299  dircluster = os.path.join(outputdir,
1300  "all_models."+str(self.rank))
1301  try:
1302  os.mkdir(outputdir)
1303  except: # noqa: E722
1304  pass
1305  try:
1306  os.mkdir(dircluster)
1307  except: # noqa: E722
1308  pass
1309  clusstat = open(os.path.join(
1310  dircluster, "stat."+str(self.rank)+".out"), "w")
1311  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1312  rmf_name = tpl[1]
1313  rmf_frame_number = tpl[2]
1314  tmp_dict = {}
1315  index = tpl[4]
1316  for key in best_score_feature_keyword_list_dict:
1317  tmp_dict[key] = \
1318  best_score_feature_keyword_list_dict[key][index]
1319 
1320  if cnt == 0:
1321  prots, rs = \
1322  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1323  self.model, rmf_frame_number, rmf_name)
1324  else:
1325  linking_successful = \
1326  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1327  self.model, prots, rs, rmf_frame_number,
1328  rmf_name)
1329  if not linking_successful:
1330  continue
1331 
1332  if not prots:
1333  continue
1334 
1335  states = IMP.atom.get_by_type(
1336  prots[0], IMP.atom.STATE_TYPE)
1337  prot = states[state_number]
1338 
1339  # get transformation aligning coordinates of
1340  # requested tuples to the first RMF file
1341  if cnt == 0:
1342  coords_f1 = alignment_coordinates[cnt]
1343  if cnt > 0:
1344  coords_f2 = alignment_coordinates[cnt]
1345  if coords_f2:
1347  coords_f1, coords_f2)
1348  transformation = Ali.align()[1]
1349  else:
1350  transformation = \
1352 
1353  rbs = set()
1354  for p in IMP.atom.get_leaves(prot):
1355  if not IMP.core.XYZR.get_is_setup(p):
1357  IMP.core.XYZR(p).set_radius(0.0001)
1358  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1359 
1361  rbm = IMP.core.RigidBodyMember(p)
1362  rb = rbm.get_rigid_body()
1363  rbs.add(rb)
1364  else:
1366  transformation)
1367  for rb in rbs:
1368  IMP.core.transform(rb, transformation)
1369 
1370  o = IMP.pmi.output.Output()
1371  self.model.update()
1372  out_pdb_fn = os.path.join(
1373  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1374  out_rmf_fn = os.path.join(
1375  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1376  o.init_pdb(out_pdb_fn, prot)
1377  tc = write_pdb_with_centered_coordinates
1378  o.write_pdb(out_pdb_fn,
1379  translate_to_geometric_center=tc)
1380 
1381  tmp_dict["local_pdb_file_name"] = \
1382  os.path.basename(out_pdb_fn)
1383  tmp_dict["rmf_file_full_path"] = rmf_name
1384  tmp_dict["local_rmf_file_name"] = \
1385  os.path.basename(out_rmf_fn)
1386  tmp_dict["local_rmf_frame_number"] = 0
1387 
1388  clusstat.write(str(tmp_dict)+"\n")
1389 
1390  # create a single-state System and write that
1392  IMP.Particle(self.model))
1393  h.set_name("System")
1394  h.add_child(prot)
1395  o.init_rmf(out_rmf_fn, [h], rs)
1396 
1397  o.write_rmf(out_rmf_fn)
1398  o.close_rmf(out_rmf_fn)
1399  # add the density
1400  if density_custom_ranges:
1401  DensModule.add_subunits_density(prot)
1402 
1403  if density_custom_ranges:
1404  DensModule.write_mrc(path=dircluster)
1405  del DensModule
1406  return
1407 
1408  # broadcast the coordinates
1409  if self.number_of_processes > 1:
1410  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1411  all_coordinates)
1412  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1413  all_rmf_file_names)
1414  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1415  rmf_file_name_index_dict)
1416  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1417  alignment_coordinates)
1418  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1419  rmsd_coordinates)
1420 
1421  if self.rank == 0:
1422  # save needed information in external files
1423  self.save_objects(
1424  [best_score_feature_keyword_list_dict,
1425  rmf_file_name_index_dict],
1426  ".macro.pkl")
1427 
1428 # ------------------------------------------------------------------------
1429 # Calculate distance matrix and cluster
1430 # ------------------------------------------------------------------------
1431  print("setup clustering class")
1432  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1433 
1434  for n, model_coordinate_dict in enumerate(all_coordinates):
1435  # let's try to align
1436  if (alignment_components is not None
1437  and len(self.cluster_obj.all_coords) == 0):
1438  # set the first model as template coordinates
1439  self.cluster_obj.set_template(alignment_coordinates[n])
1440  self.cluster_obj.fill(all_rmf_file_names[n],
1441  rmsd_coordinates[n])
1442  print("Global calculating the distance matrix")
1443 
1444  # calculate distance matrix, all against all
1445  self.cluster_obj.dist_matrix()
1446 
1447  # perform clustering and optionally display
1448  if self.rank == 0:
1449  self.cluster_obj.do_cluster(number_of_clusters)
1450  if display_plot:
1451  if self.rank == 0:
1452  self.cluster_obj.plot_matrix(
1453  figurename=os.path.join(outputdir,
1454  'dist_matrix.pdf'))
1455  if exit_after_display:
1456  exit()
1457  self.cluster_obj.save_distance_matrix_file(
1458  file_name=distance_matrix_file)
1459 
1460 # ------------------------------------------------------------------------
1461 # Alternatively, load the distance matrix from file and cluster that
1462 # ------------------------------------------------------------------------
1463  else:
1464  if self.rank == 0:
1465  print("setup clustering class")
1466  self.cluster_obj = IMP.pmi.analysis.Clustering()
1467  self.cluster_obj.load_distance_matrix_file(
1468  file_name=distance_matrix_file)
1469  print("clustering with %s clusters" % str(number_of_clusters))
1470  self.cluster_obj.do_cluster(number_of_clusters)
1471  [best_score_feature_keyword_list_dict,
1472  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1473  if display_plot:
1474  if self.rank == 0:
1475  self.cluster_obj.plot_matrix(figurename=os.path.join(
1476  outputdir, 'dist_matrix.pdf'))
1477  if exit_after_display:
1478  exit()
1479  if self.number_of_processes > 1:
1480  self.comm.Barrier()
1481 
1482 # ------------------------------------------------------------------------
1483 # now save all information about the clusters
1484 # ------------------------------------------------------------------------
1485 
1486  if self.rank == 0:
1487  print(self.cluster_obj.get_cluster_labels())
1488  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1489  print("rank %s " % str(self.rank))
1490  print("cluster %s " % str(n))
1491  print("cluster label %s " % str(cl))
1492  print(self.cluster_obj.get_cluster_label_names(cl))
1493  cluster_size = \
1494  len(self.cluster_obj.get_cluster_label_names(cl))
1495  cluster_prov = \
1496  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1497 
1498  # first initialize the Density class if requested
1499  if density_custom_ranges:
1500  DensModule = IMP.pmi.analysis.GetModelDensity(
1501  density_custom_ranges,
1502  voxel=voxel_size)
1503 
1504  dircluster = outputdir + "/cluster." + str(n) + "/"
1505  try:
1506  os.mkdir(dircluster)
1507  except: # noqa: E722
1508  pass
1509 
1510  rmsd_dict = {
1511  "AVERAGE_RMSD":
1512  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1513  clusstat = open(dircluster + "stat.out", "w")
1514  for k, structure_name in enumerate(
1515  self.cluster_obj.get_cluster_label_names(cl)):
1516  # extract the features
1517  tmp_dict = {}
1518  tmp_dict.update(rmsd_dict)
1519  index = rmf_file_name_index_dict[structure_name]
1520  for key in best_score_feature_keyword_list_dict:
1521  tmp_dict[
1522  key] = best_score_feature_keyword_list_dict[
1523  key][
1524  index]
1525 
1526  # get the rmf name and the frame number from the list of
1527  # frame names
1528  rmf_name = structure_name.split("|")[0]
1529  rmf_frame_number = int(structure_name.split("|")[1])
1530  clusstat.write(str(tmp_dict) + "\n")
1531 
1532  # extract frame (open or link to existing)
1533  if k == 0:
1534  prots, rs = \
1535  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1536  self.model, rmf_frame_number, rmf_name)
1537  else:
1538  linking_successful = \
1539  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1540  self.model, prots, rs, rmf_frame_number,
1541  rmf_name)
1542  if not linking_successful:
1543  continue
1544  if not prots:
1545  continue
1546 
1547  states = IMP.atom.get_by_type(
1548  prots[0], IMP.atom.STATE_TYPE)
1549  prot = states[state_number]
1550  if k == 0:
1551  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1552 
1553  # transform clusters onto first
1554  if k > 0:
1555  co = self.cluster_obj
1556  model_index = co.get_model_index_from_name(
1557  structure_name)
1558  transformation = co.get_transformation_to_first_member(
1559  cl, model_index)
1560  rbs = set()
1561  for p in IMP.atom.get_leaves(prot):
1562  if not IMP.core.XYZR.get_is_setup(p):
1564  IMP.core.XYZR(p).set_radius(0.0001)
1565  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1566 
1568  rbm = IMP.core.RigidBodyMember(p)
1569  rb = rbm.get_rigid_body()
1570  rbs.add(rb)
1571  else:
1573  transformation)
1574  for rb in rbs:
1575  IMP.core.transform(rb, transformation)
1576 
1577  # add the density
1578  if density_custom_ranges:
1579  DensModule.add_subunits_density(prot)
1580 
1581  # pdb writing should be optimized!
1582  o = IMP.pmi.output.Output()
1583  self.model.update()
1584  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1585  o.write_pdb(dircluster + str(k) + ".pdb")
1586 
1587  # create a single-state System and write that
1589  IMP.Particle(self.model))
1590  h.set_name("System")
1591  h.add_child(prot)
1592  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1593  o.write_rmf(dircluster + str(k) + ".rmf3")
1594  o.close_rmf(dircluster + str(k) + ".rmf3")
1595 
1596  del o
1597  # IMP.atom.destroy(prot)
1598 
1599  if density_custom_ranges:
1600  DensModule.write_mrc(path=dircluster)
1601  del DensModule
1602 
1603  if self.number_of_processes > 1:
1604  self.comm.Barrier()
1605 
1606  def get_cluster_rmsd(self, cluster_num):
1607  if self.cluster_obj is None:
1608  raise Exception("Run clustering first")
1609  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1610 
1611  def save_objects(self, objects, file_name):
1612  import pickle
1613  outf = open(file_name, 'wb')
1614  pickle.dump(objects, outf)
1615  outf.close()
1616 
1617  def load_objects(self, file_name):
1618  import pickle
1619  inputf = open(file_name, 'rb')
1620  objects = pickle.load(inputf)
1621  inputf.close()
1622  return objects
1623 
1624 
1626 
1627  """
1628  This class contains analysis utilities to investigate ReplicaExchange
1629  results.
1630  """
1631 
1632  ########################
1633  # Construction and Setup
1634  ########################
1635 
1636  def __init__(self, model, stat_files, best_models=None, score_key=None,
1637  alignment=True):
1638  """
1639  Construction of the Class.
1640  @param model IMP.Model()
1641  @param stat_files list of string. Can be ascii stat files,
1642  rmf files names
1643  @param best_models Integer. Number of best scoring models,
1644  if None: all models will be read
1645  @param score_key Use the provided stat key keyword as the score
1646  (by default, the total score is used)
1647  @param alignment boolean (Default=True). Align before computing
1648  the rmsd.
1649  """
1650 
1651  self.model = model
1652  self.best_models = best_models
1654  model, stat_files, self.best_models, score_key, cache=True)
1656  StatHierarchyHandler=self.stath0)
1657 
1658  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1660  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1662  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1663  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1664  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1665  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1666  self.clusters = []
1667  # fill the cluster list with a single cluster containing all models
1668  c = IMP.pmi.output.Cluster(0)
1669  self.clusters.append(c)
1670  for n0 in range(len(self.stath0)):
1671  c.add_member(n0)
1672  self.pairwise_rmsd = {}
1673  self.pairwise_molecular_assignment = {}
1674  self.alignment = alignment
1675  self.symmetric_molecules = {}
1676  self.issymmetricsel = {}
1677  self.update_seldicts()
1678  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1679  IMP.atom.get_leaves(self.stath0))
1680  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1681  IMP.atom.get_leaves(self.stath1))
1682 
1683  def set_rmsd_selection(self, **kwargs):
1684  """
1685  Setup the selection onto which the rmsd is computed
1686  @param kwargs use IMP.atom.Selection keywords
1687  """
1688  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1689  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1690  self.update_seldicts()
1691 
1692  def set_symmetric(self, molecule_name):
1693  """
1694  Store names of symmetric molecules
1695  """
1696  self.symmetric_molecules[molecule_name] = 0
1697  self.update_seldicts()
1698 
1699  def set_alignment_selection(self, **kwargs):
1700  """
1701  Setup the selection onto which the alignment is computed
1702  @param kwargs use IMP.atom.Selection keywords
1703  """
1704  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1705  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1706 
1707  ######################
1708  # Clustering functions
1709  ######################
1710  def clean_clusters(self):
1711  for c in self.clusters:
1712  del c
1713  self.clusters = []
1714 
1715  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1716  """
1717  Cluster the models based on RMSD.
1718  @param rmsd_cutoff Float the distance cutoff in Angstrom
1719  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1720  be used to compute rmsds
1721  """
1722  self.clean_clusters()
1723  not_clustered = set(range(len(self.stath1)))
1724  while len(not_clustered) > 0:
1725  self.aggregate(not_clustered, rmsd_cutoff, metric)
1726  self.update_clusters()
1727 
1728  def refine(self, rmsd_cutoff=10):
1729  """
1730  Refine the clusters by merging the ones whose centers are close
1731  @param rmsd_cutoff cutoff distance in Angstorms
1732  """
1733  clusters_copy = self.clusters
1734  for c0, c1 in itertools.combinations(self.clusters, 2):
1735  if c0.center_index is None:
1736  self.compute_cluster_center(c0)
1737  if c1.center_index is None:
1738  self.compute_cluster_center(c1)
1739  _ = self.stath0[c0.center_index]
1740  _ = self.stath1[c1.center_index]
1741  rmsd, molecular_assignment = self.rmsd()
1742  if rmsd <= rmsd_cutoff:
1743  if c1 in self.clusters:
1744  clusters_copy.remove(c1)
1745  c0 += c1
1746  self.clusters = clusters_copy
1747  self.update_clusters()
1748 
1749  ####################
1750  # Input Output
1751  ####################
1752 
1753  def set_cluster_assignments(self, cluster_ids):
1754  if len(cluster_ids) != len(self.stath0):
1755  raise ValueError('cluster ids has to be same length as '
1756  'number of frames')
1757 
1758  self.clusters = []
1759  for i in sorted(list(set(cluster_ids))):
1760  self.clusters.append(IMP.pmi.output.Cluster(i))
1761  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1762  self.clusters[idx].add_member(i, d)
1763 
1764  def get_cluster_data(self, cluster):
1765  """
1766  Return the model data from a cluster
1767  @param cluster IMP.pmi.output.Cluster object
1768  """
1769  data = []
1770  for m in cluster:
1771  data.append(m)
1772  return data
1773 
1774  def save_data(self, filename='data.pkl'):
1775  """
1776  Save the data for the whole models into a pickle file
1777  @param filename string
1778  """
1779  self.stath0.save_data(filename)
1780 
1781  def set_data(self, data):
1782  """
1783  Set the data from an external IMP.pmi.output.Data
1784  @param data IMP.pmi.output.Data
1785  """
1786  self.stath0.data = data
1787  self.stath1.data = data
1788 
1789  def load_data(self, filename='data.pkl'):
1790  """
1791  Load the data from an external pickled file
1792  @param filename string
1793  """
1794  self.stath0.load_data(filename)
1795  self.stath1.load_data(filename)
1796  self.best_models = len(self.stath0)
1797 
1798  def add_cluster(self, rmf_name_list):
1799  c = IMP.pmi.output.Cluster(len(self.clusters))
1800  print("creating cluster index "+str(len(self.clusters)))
1801  self.clusters.append(c)
1802  current_len = len(self.stath0)
1803 
1804  for rmf in rmf_name_list:
1805  print("adding rmf "+rmf)
1806  self.stath0.add_stat_file(rmf)
1807  self.stath1.add_stat_file(rmf)
1808 
1809  for n0 in range(current_len, len(self.stath0)):
1810  d0 = self.stath0[n0]
1811  c.add_member(n0, d0)
1812  self.update_clusters()
1813 
1814  def save_clusters(self, filename='clusters.pkl'):
1815  """
1816  Save the clusters into a pickle file
1817  @param filename string
1818  """
1819  import pickle
1820  fl = open(filename, 'wb')
1821  pickle.dump(self.clusters, fl)
1822 
1823  def load_clusters(self, filename='clusters.pkl', append=False):
1824  """
1825  Load the clusters from a pickle file
1826  @param filename string
1827  @param append bool (Default=False), if True. append the clusters
1828  to the ones currently present
1829  """
1830  import pickle
1831  fl = open(filename, 'rb')
1832  self.clean_clusters()
1833  if append:
1834  self.clusters += pickle.load(fl)
1835  else:
1836  self.clusters = pickle.load(fl)
1837  self.update_clusters()
1838 
1839  ####################
1840  # Analysis Functions
1841  ####################
1842 
1843  def compute_cluster_center(self, cluster):
1844  """
1845  Compute the cluster center for a given cluster
1846  """
1847  member_distance = defaultdict(float)
1848 
1849  for n0, n1 in itertools.combinations(cluster.members, 2):
1850  _ = self.stath0[n0]
1851  _ = self.stath1[n1]
1852  rmsd, _ = self.rmsd()
1853  member_distance[n0] += rmsd
1854 
1855  if len(member_distance) > 0:
1856  cluster.center_index = min(member_distance,
1857  key=member_distance.get)
1858  else:
1859  cluster.center_index = cluster.members[0]
1860 
1861  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1862  prefix="./"):
1863  """
1864  Save the coordinates of the current cluster a single rmf file
1865  """
1866  print("saving coordinates", cluster)
1867  if self.alignment:
1868  self.set_reference(reference, cluster)
1869  o = IMP.pmi.output.Output()
1870  if rmf_name is None:
1871  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1872 
1873  _ = self.stath1[cluster.members[0]]
1874  self.model.update()
1875  o.init_rmf(rmf_name, [self.stath1])
1876  for n1 in cluster.members:
1877  _ = self.stath1[n1]
1878  self.model.update()
1880  if self.alignment:
1881  self.align()
1882  o.write_rmf(rmf_name)
1884  o.close_rmf(rmf_name)
1885 
1886  def prune_redundant_structures(self, rmsd_cutoff=10):
1887  """
1888  remove structures that are similar
1889  append it to a new cluster
1890  """
1891  print("pruning models")
1892  selected = 0
1893  filtered = [selected]
1894  remaining = range(1, len(self.stath1), 10)
1895 
1896  while len(remaining) > 0:
1897  d0 = self.stath0[selected]
1898  rm = []
1899  for n1 in remaining:
1900  _ = self.stath1[n1]
1901  if self.alignment:
1902  self.align()
1903  d, _ = self.rmsd()
1904  if d <= rmsd_cutoff:
1905  rm.append(n1)
1906  print("pruning model %s, similar to model %s, rmsd %s"
1907  % (str(n1), str(selected), str(d)))
1908  remaining = [x for x in remaining if x not in rm]
1909  if len(remaining) == 0:
1910  break
1911  selected = remaining[0]
1912  filtered.append(selected)
1913  remaining.pop(0)
1914  c = IMP.pmi.output.Cluster(len(self.clusters))
1915  self.clusters.append(c)
1916  for n0 in filtered:
1917  d0 = self.stath0[n0]
1918  c.add_member(n0, d0)
1919  self.update_clusters()
1920 
1921  def precision(self, cluster):
1922  """
1923  Compute the precision of a cluster
1924  """
1925  npairs = 0
1926  rmsd = 0.0
1927  precision = None
1928 
1929  if cluster.center_index is not None:
1930  members1 = [cluster.center_index]
1931  else:
1932  members1 = cluster.members
1933 
1934  for n0 in members1:
1935  _ = self.stath0[n0]
1936  for n1 in cluster.members:
1937  if n0 != n1:
1938  npairs += 1
1939  _ = self.stath1[n1]
1941  tmp_rmsd, _ = self.rmsd()
1942  rmsd += tmp_rmsd
1944 
1945  if npairs > 0:
1946  precision = rmsd/npairs
1947  cluster.precision = precision
1948  return precision
1949 
1950  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1951  """
1952  Compute the bipartite precision (ie the cross-precision)
1953  between two clusters
1954  """
1955  npairs = 0
1956  rmsd = 0.0
1957  for cn0, n0 in enumerate(cluster1.members):
1958  _ = self.stath0[n0]
1959  for cn1, n1 in enumerate(cluster2.members):
1960  _ = self.stath1[n1]
1961  tmp_rmsd, _ = self.rmsd()
1962  if verbose:
1963  print("--- rmsd between structure %s and structure "
1964  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1965  rmsd += tmp_rmsd
1966  npairs += 1
1967  precision = rmsd/npairs
1968  return precision
1969 
1970  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1971  cluster_ref=None, step=1):
1972  """
1973  Compute the Root mean square fluctuations
1974  of a molecule in a cluster
1975  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1976  residue indexes and the value is the rmsf
1977  """
1978  rmsf = IMP.pmi.tools.OrderedDict()
1979 
1980  # assumes that residue indexes are identical for stath0 and stath1
1981  if cluster_ref is not None:
1982  if cluster_ref.center_index is not None:
1983  members0 = [cluster_ref.center_index]
1984  else:
1985  members0 = cluster_ref.members
1986  else:
1987  if cluster.center_index is not None:
1988  members0 = [cluster.center_index]
1989  else:
1990  members0 = cluster.members
1991 
1992  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1993  copy_index=copy_index, state_index=state_index)
1994  ps0 = s0.get_selected_particles()
1995  # get the residue indexes
1996  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1997  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1998 
1999  # get the corresponding particles
2000  npairs = 0
2001  for n0 in members0:
2002  d0 = self.stath0[n0]
2003  for n1 in cluster.members[::step]:
2004  if n0 != n1:
2005  print("--- rmsf %s %s" % (str(n0), str(n1)))
2007 
2008  s1 = IMP.atom.Selection(
2009  self.stath1, molecule=molecule,
2010  residue_indexes=residue_indexes, resolution=1,
2011  copy_index=copy_index, state_index=state_index)
2012  ps1 = s1.get_selected_particles()
2013 
2014  d1 = self.stath1[n1]
2015  if self.alignment:
2016  self.align()
2017  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
2018  r = residue_indexes[n]
2019  d0 = IMP.core.XYZ(p0)
2020  d1 = IMP.core.XYZ(p1)
2021  if r in rmsf:
2022  rmsf[r] += IMP.core.get_distance(d0, d1)
2023  else:
2024  rmsf[r] = IMP.core.get_distance(d0, d1)
2025  npairs += 1
2027  for r in rmsf:
2028  rmsf[r] /= npairs
2029 
2030  for stath in [self.stath0, self.stath1]:
2031  if molecule not in self.symmetric_molecules:
2032  s = IMP.atom.Selection(
2033  stath, molecule=molecule, residue_index=r,
2034  resolution=1, copy_index=copy_index,
2035  state_index=state_index)
2036  else:
2037  s = IMP.atom.Selection(
2038  stath, molecule=molecule, residue_index=r,
2039  resolution=1, state_index=state_index)
2040 
2041  ps = s.get_selected_particles()
2042  for p in ps:
2044  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2045  else:
2047 
2048  return rmsf
2049 
2050  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2051  reference="Absolute", prefix="./", step=1):
2052  if self.alignment:
2053  self.set_reference(reference, cluster)
2054  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2055  voxel=voxel_size)
2056 
2057  for n1 in cluster.members[::step]:
2058  print("density "+str(n1))
2059  _ = self.stath1[n1]
2061  if self.alignment:
2062  self.align()
2063  dens.add_subunits_density(self.stath1)
2065  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2066  del dens
2067 
2068  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2069  consolidate=False, molecules=None, prefix='./',
2070  reference="Absolute"):
2071  if self.alignment:
2072  self.set_reference(reference, cluster)
2073  import numpy as np
2074  import matplotlib.pyplot as plt
2075  import matplotlib.cm as cm
2076  from scipy.spatial.distance import cdist
2077  import IMP.pmi.topology
2078  if molecules is None:
2080  for mol in IMP.pmi.tools.get_molecules(
2081  IMP.atom.get_leaves(self.stath1))]
2082  else:
2084  for mol in IMP.pmi.tools.get_molecules(
2086  self.stath1,
2087  molecules=molecules).get_selected_particles())]
2088  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2089  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2090  total_len_unique = sum(max(mol.get_residue_indexes())
2091  for mol in unique_copies)
2092 
2093  index_dict = {}
2094  prev_stop = 0
2095 
2096  if not consolidate:
2097  for mol in mols:
2098  seqlen = max(mol.get_residue_indexes())
2099  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2100  prev_stop += seqlen
2101 
2102  else:
2103  for mol in unique_copies:
2104  seqlen = max(mol.get_residue_indexes())
2105  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2106  prev_stop += seqlen
2107 
2108  for ncl, n1 in enumerate(cluster.members):
2109  print(ncl)
2110  _ = self.stath1[n1]
2111  coord_dict = IMP.pmi.tools.OrderedDict()
2112  for mol in mols:
2113  rindexes = mol.get_residue_indexes()
2114  coords = np.ones((max(rindexes), 3))
2115  for rnum in rindexes:
2116  sel = IMP.atom.Selection(mol, residue_index=rnum,
2117  resolution=1)
2118  selpart = sel.get_selected_particles()
2119  if len(selpart) == 0:
2120  continue
2121  selpart = selpart[0]
2122  coords[rnum - 1, :] = \
2123  IMP.core.XYZ(selpart).get_coordinates()
2124  coord_dict[mol] = coords
2125 
2126  if not consolidate:
2127  coords = np.concatenate(list(coord_dict.values()))
2128  dists = cdist(coords, coords)
2129  binary_dists = np.where((dists <= contact_threshold)
2130  & (dists >= 1.0), 1.0, 0.0)
2131  else:
2132  binary_dists_dict = {}
2133  for mol1 in mols:
2134  len1 = max(mol1.get_residue_indexes())
2135  for mol2 in mols:
2136  name1 = mol1.get_name()
2137  name2 = mol2.get_name()
2138  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2139  if (name1, name2) not in binary_dists_dict:
2140  binary_dists_dict[(name1, name2)] = \
2141  np.zeros((len1, len1))
2142  binary_dists_dict[(name1, name2)] += \
2143  np.where((dists <= contact_threshold)
2144  & (dists >= 1.0), 1.0, 0.0)
2145  binary_dists = np.zeros((total_len_unique, total_len_unique))
2146 
2147  for name1, name2 in binary_dists_dict:
2148  r1 = index_dict[mol_names_unique[name1]]
2149  r2 = index_dict[mol_names_unique[name2]]
2150  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2151  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2152  1.0, 0.0)
2153 
2154  if ncl == 0:
2155  dist_maps = [dists]
2156  av_dist_map = dists
2157  contact_freqs = binary_dists
2158  else:
2159  dist_maps.append(dists)
2160  av_dist_map += dists
2161  contact_freqs += binary_dists
2162 
2163  if log_scale:
2164  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2165  else:
2166  contact_freqs = 1.0/len(cluster)*contact_freqs
2167  av_dist_map = 1.0/len(cluster)*contact_freqs
2168 
2169  fig = plt.figure(figsize=(100, 100))
2170  ax = fig.add_subplot(111)
2171  ax.set_xticks([])
2172  ax.set_yticks([])
2173  gap_between_components = 50
2174  colormap = cm.Blues
2175  colornorm = None
2176 
2177  if not consolidate:
2178  sorted_tuple = sorted(
2180  mol).get_extended_name(), mol) for mol in mols)
2181  prot_list = list(zip(*sorted_tuple))[1]
2182  else:
2183  sorted_tuple = sorted(
2184  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2185  for mol in unique_copies)
2186  prot_list = list(zip(*sorted_tuple))[1]
2187 
2188  prot_listx = prot_list
2189  nresx = gap_between_components + \
2190  sum([max(mol.get_residue_indexes())
2191  + gap_between_components for mol in prot_listx])
2192 
2193  # set the list of proteins on the y axis
2194  prot_listy = prot_list
2195  nresy = gap_between_components + \
2196  sum([max(mol.get_residue_indexes())
2197  + gap_between_components for mol in prot_listy])
2198 
2199  # this is the residue offset for each protein
2200  resoffsetx = {}
2201  resendx = {}
2202  res = gap_between_components
2203  for mol in prot_listx:
2204  resoffsetx[mol] = res
2205  res += max(mol.get_residue_indexes())
2206  resendx[mol] = res
2207  res += gap_between_components
2208 
2209  resoffsety = {}
2210  resendy = {}
2211  res = gap_between_components
2212  for mol in prot_listy:
2213  resoffsety[mol] = res
2214  res += max(mol.get_residue_indexes())
2215  resendy[mol] = res
2216  res += gap_between_components
2217 
2218  resoffsetdiagonal = {}
2219  res = gap_between_components
2220  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2221  resoffsetdiagonal[mol] = res
2222  res += max(mol.get_residue_indexes())
2223  res += gap_between_components
2224 
2225  # plot protein boundaries
2226  xticks = []
2227  xlabels = []
2228  for n, prot in enumerate(prot_listx):
2229  res = resoffsetx[prot]
2230  end = resendx[prot]
2231  for proty in prot_listy:
2232  resy = resoffsety[proty]
2233  endy = resendy[proty]
2234  ax.plot([res, res], [resy, endy], linestyle='-',
2235  color='gray', lw=0.4)
2236  ax.plot([end, end], [resy, endy], linestyle='-',
2237  color='gray', lw=0.4)
2238  xticks.append((float(res) + float(end)) / 2)
2240  prot).get_extended_name())
2241 
2242  yticks = []
2243  ylabels = []
2244  for n, prot in enumerate(prot_listy):
2245  res = resoffsety[prot]
2246  end = resendy[prot]
2247  for protx in prot_listx:
2248  resx = resoffsetx[protx]
2249  endx = resendx[protx]
2250  ax.plot([resx, endx], [res, res], linestyle='-',
2251  color='gray', lw=0.4)
2252  ax.plot([resx, endx], [end, end], linestyle='-',
2253  color='gray', lw=0.4)
2254  yticks.append((float(res) + float(end)) / 2)
2256  prot).get_extended_name())
2257 
2258  # plot the contact map
2259 
2260  tmp_array = np.zeros((nresx, nresy))
2261  ret = {}
2262  for px in prot_listx:
2263  for py in prot_listy:
2264  resx = resoffsetx[px]
2265  lengx = resendx[px] - 1
2266  resy = resoffsety[py]
2267  lengy = resendy[py] - 1
2268  indexes_x = index_dict[px]
2269  minx = min(indexes_x)
2270  maxx = max(indexes_x)
2271  indexes_y = index_dict[py]
2272  miny = min(indexes_y)
2273  maxy = max(indexes_y)
2274  tmp_array[resx:lengx, resy:lengy] = \
2275  contact_freqs[minx:maxx, miny:maxy]
2276  ret[(px, py)] = np.argwhere(
2277  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2278 
2279  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2280  origin='lower', alpha=0.6, interpolation='nearest')
2281 
2282  ax.set_xticks(xticks)
2283  ax.set_xticklabels(xlabels, rotation=90)
2284  ax.set_yticks(yticks)
2285  ax.set_yticklabels(ylabels)
2286  plt.setp(ax.get_xticklabels(), fontsize=6)
2287  plt.setp(ax.get_yticklabels(), fontsize=6)
2288 
2289  # display and write to file
2290  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2291  [i.set_linewidth(2.0) for i in ax.spines.values()]
2292 
2293  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2294  dpi=300, transparent="False")
2295  return ret
2296 
2297  def plot_rmsd_matrix(self, filename):
2298  self.compute_all_pairwise_rmsd()
2299  distance_matrix = np.zeros(
2300  (len(self.stath0), len(self.stath1)))
2301  for (n0, n1) in self.pairwise_rmsd:
2302  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2303 
2304  import matplotlib as mpl
2305  mpl.use('Agg')
2306  import matplotlib.pylab as pl
2307  from scipy.cluster import hierarchy as hrc
2308 
2309  fig = pl.figure(figsize=(10, 8))
2310  ax = fig.add_subplot(212)
2311  dendrogram = hrc.dendrogram(
2312  hrc.linkage(distance_matrix),
2313  color_threshold=7,
2314  no_labels=True)
2315  leaves_order = dendrogram['leaves']
2316  ax.set_xlabel('Model')
2317  ax.set_ylabel('RMSD [Angstroms]')
2318 
2319  ax2 = fig.add_subplot(221)
2320  cax = ax2.imshow(
2321  distance_matrix[leaves_order, :][:, leaves_order],
2322  interpolation='nearest')
2323  cb = fig.colorbar(cax)
2324  cb.set_label('RMSD [Angstroms]')
2325  ax2.set_xlabel('Model')
2326  ax2.set_ylabel('Model')
2327 
2328  pl.savefig(filename, dpi=300)
2329  pl.close(fig)
2330 
2331  ####################
2332  # Internal Functions
2333  ####################
2334 
2335  def update_clusters(self):
2336  """
2337  Update the cluster id numbers
2338  """
2339  for n, c in enumerate(self.clusters):
2340  c.cluster_id = n
2341 
2342  def get_molecule(self, hier, name, copy):
2343  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2344  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2345 
2346  def update_seldicts(self):
2347  """
2348  Update the seldicts
2349  """
2350  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2351  self.sel0_rmsd.get_selected_particles())
2352  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2353  self.sel1_rmsd.get_selected_particles())
2354  for mol in self.seldict0:
2355  for sel in self.seldict0[mol]:
2356  self.issymmetricsel[sel] = False
2357  for mol in self.symmetric_molecules:
2358  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2359  for sel in self.seldict0[mol]:
2360  self.issymmetricsel[sel] = True
2361 
2362  def align(self):
2364  self.sel1_alignment, self.sel0_alignment)
2365 
2366  for rb in self.rbs1:
2367  IMP.core.transform(rb, tr)
2368 
2369  for bead in self.beads1:
2370  try:
2371  IMP.core.transform(IMP.core.XYZ(bead), tr)
2372  except: # noqa: E722
2373  continue
2374 
2375  self.model.update()
2376 
2377  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2378  '''
2379  initial filling of the clusters.
2380  '''
2381  n0 = idxs.pop()
2382  print("clustering model "+str(n0))
2383  d0 = self.stath0[n0]
2384  c = IMP.pmi.output.Cluster(len(self.clusters))
2385  print("creating cluster index "+str(len(self.clusters)))
2386  self.clusters.append(c)
2387  c.add_member(n0, d0)
2388  clustered = set([n0])
2389  for n1 in idxs:
2390  print("--- trying to add model " + str(n1) + " to cluster "
2391  + str(len(self.clusters)))
2392  d1 = self.stath1[n1]
2393  if self.alignment:
2394  self.align()
2395  rmsd, _ = self.rmsd(metric=metric)
2396  if rmsd < rmsd_cutoff:
2397  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2398  c.add_member(n1, d1)
2399  clustered.add(n1)
2400  else:
2401  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2402  idxs -= clustered
2403 
2404  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2405  """
2406  merge the clusters that have close members
2407 
2408  @param rmsd_cutoff cutoff distance in Angstorms
2409  @param metric Function to calculate distance between two Selections
2410  (by default, IMP.atom.get_rmsd is used)
2411  """
2412  # before merging, clusters are spheres of radius rmsd_cutoff
2413  # centered on the 1st element
2414  # here we only try to merge clusters whose centers are closer
2415  # than 2*rmsd_cutoff
2416  to_merge = []
2417  print("merging...")
2418  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2419  itertools.combinations(self.clusters, 2)):
2420  n0, n1 = [c.members[0] for c in (c0, c1)]
2421  _ = self.stath0[n0]
2422  _ = self.stath1[n1]
2423  rmsd, _ = self.rmsd()
2424  if (rmsd < 2*rmsd_cutoff and
2425  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2426  to_merge.append((c0, c1))
2427 
2428  for c0, c in reversed(to_merge):
2429  self.merge(c0, c)
2430 
2431  # keep only full clusters
2432  self.clusters = [c for c in
2433  filter(lambda x: len(x.members) > 0, self.clusters)]
2434 
2435  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2436  '''
2437  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2438  '''
2439  print("check close members for clusters " + str(c0.cluster_id) +
2440  " and " + str(c1.cluster_id))
2441  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2442  _ = self.stath0[n0]
2443  _ = self.stath1[n1]
2444  rmsd, _ = self.rmsd(metric=metric)
2445  if rmsd < rmsd_cutoff:
2446  return True
2447 
2448  return False
2449 
2450  def merge(self, c0, c1):
2451  '''
2452  merge two clusters
2453  '''
2454  c0 += c1
2455  c1.members = []
2456  c1.data = {}
2457 
2458  def rmsd_helper(self, sels0, sels1, metric):
2459  '''
2460  a function that returns the permutation best_sel of sels0 that
2461  minimizes metric
2462  '''
2463  best_rmsd2 = float('inf')
2464  best_sel = None
2465  if self.issymmetricsel[sels0[0]]:
2466  # this cases happens when symmetries were defined
2467  N = len(sels0)
2468  for offset in range(N):
2469  sels = [sels0[(offset+i) % N] for i in range(N)]
2470  sel0 = sels[0]
2471  sel1 = sels1[0]
2472  r = metric(sel0, sel1)
2473  rmsd2 = r*r*N
2474  if rmsd2 < best_rmsd2:
2475  best_rmsd2 = rmsd2
2476  best_sel = sels
2477  else:
2478  for sels in itertools.permutations(sels0):
2479  rmsd2 = 0.0
2480  for sel0, sel1 in itertools.takewhile(
2481  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2482  r = metric(sel0, sel1)
2483  rmsd2 += r*r
2484  if rmsd2 < best_rmsd2:
2485  best_rmsd2 = rmsd2
2486  best_sel = sels
2487  return best_sel, best_rmsd2
2488 
2489  def compute_all_pairwise_rmsd(self):
2490  for d0 in self.stath0:
2491  for d1 in self.stath1:
2492  rmsd, _ = self.rmsd()
2493 
2494  def rmsd(self, metric=IMP.atom.get_rmsd):
2495  '''
2496  Computes the RMSD. Resolves ambiguous pairs assignments
2497  '''
2498  # here we memoize the rmsd and molecular assignment so that it's
2499  # not done multiple times
2500  n0 = self.stath0.current_index
2501  n1 = self.stath1.current_index
2502  if ((n0, n1) in self.pairwise_rmsd) \
2503  and ((n0, n1) in self.pairwise_molecular_assignment):
2504  return (self.pairwise_rmsd[(n0, n1)],
2505  self.pairwise_molecular_assignment[(n0, n1)])
2506 
2507  if self.alignment:
2508  self.align()
2509  # if it's not yet memoized
2510  total_rmsd = 0.0
2511  total_N = 0
2512  # this is a dictionary which keys are the molecule names, and values
2513  # are the list of IMP.atom.Selection for all molecules that share
2514  # the molecule name
2515  molecular_assignment = {}
2516  for molname, sels0 in self.seldict0.items():
2517  sels_best_order, best_rmsd2 = \
2518  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2519 
2520  Ncoords = len(sels_best_order[0].get_selected_particles())
2521  Ncopies = len(self.seldict1[molname])
2522  total_rmsd += Ncoords*best_rmsd2
2523  total_N += Ncoords*Ncopies
2524 
2525  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2526  p0 = sel0.get_selected_particles()[0]
2527  p1 = sel1.get_selected_particles()[0]
2528  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2529  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2530  c0 = IMP.atom.Copy(m0).get_copy_index()
2531  c1 = IMP.atom.Copy(m1).get_copy_index()
2532  molecular_assignment[(molname, c0)] = (molname, c1)
2533 
2534  total_rmsd = math.sqrt(total_rmsd/total_N)
2535 
2536  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2537  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2538  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2539  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2540  return total_rmsd, molecular_assignment
2541 
2542  def set_reference(self, reference, cluster):
2543  """
2544  Fix the reference structure for structural alignment, rmsd and
2545  chain assignment
2546 
2547  @param reference can be either "Absolute" (cluster center of the
2548  first cluster) or Relative (cluster center of the current
2549  cluster)
2550  #param cluster the reference IMP.pmi.output.Cluster object
2551  """
2552  if reference == "Absolute":
2553  _ = self.stath0[0]
2554  elif reference == "Relative":
2555  if cluster.center_index:
2556  n0 = cluster.center_index
2557  else:
2558  n0 = cluster.members[0]
2559  _ = self.stath0[n0]
2560 
2562  """
2563  compute the molecular assignments between multiple copies
2564  of the same sequence. It changes the Copy index of Molecules
2565  """
2566  _ = self.stath1[n1]
2567  _, molecular_assignment = self.rmsd()
2568  for (m0, c0), (m1, c1) in molecular_assignment.items():
2569  mol0 = self.molcopydict0[m0][c0]
2570  mol1 = self.molcopydict1[m1][c1]
2571  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2572  p1 = IMP.atom.Copy(mol1).get_particle()
2573  p1.set_value(cik0, c0)
2574 
2576  """
2577  Undo the Copy index assignment
2578  """
2579  _ = self.stath1[n1]
2580  _, molecular_assignment = self.rmsd()
2581  for (m0, c0), (m1, c1) in molecular_assignment.items():
2582  mol0 = self.molcopydict0[m0][c0]
2583  mol1 = self.molcopydict1[m1][c1]
2584  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2585  p1 = IMP.atom.Copy(mol1).get_particle()
2586  p1.set_value(cik0, c1)
2587 
2588  ####################
2589  # Container Functions
2590  ####################
2591 
2592  def __repr__(self):
2593  s = "AnalysisReplicaExchange\n"
2594  s += "---- number of clusters %s \n" % str(len(self.clusters))
2595  s += "---- number of models %s \n" % str(len(self.stath0))
2596  return s
2597 
2598  def __getitem__(self, int_slice_adaptor):
2599  if isinstance(int_slice_adaptor, int):
2600  return self.clusters[int_slice_adaptor]
2601  elif isinstance(int_slice_adaptor, slice):
2602  return self.__iter__(int_slice_adaptor)
2603  else:
2604  raise TypeError("Unknown Type")
2605 
2606  def __len__(self):
2607  return len(self.clusters)
2608 
2609  def __iter__(self, slice_key=None):
2610  if slice_key is None:
2611  for i in range(len(self)):
2612  yield self[i]
2613  else:
2614  for i in range(len(self))[slice_key]:
2615  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2494
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: macros.py:2542
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1823
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1062
def precision
Compute the precision of a cluster.
Definition: macros.py:1921
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:939
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:104
A container for models organized into clusters.
Definition: output.py:1506
Sample using molecular dynamics.
Definition: samplers.py:326
def aggregate
initial filling of the clusters.
Definition: macros.py:2377
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634
A macro to help setup and run replica exchange.
Definition: macros.py:65
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1886
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1970
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1159
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1683
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1764
def __init__
Construction of the Class.
Definition: macros.py:1636
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:843
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1781
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2575
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1699
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2458
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1861
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1082
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: macros.py:2561
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1625
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:630
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2404
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:991
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1843
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1002
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:137
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:21
def update_seldicts
Update the seldicts.
Definition: macros.py:2346
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2335
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:542
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1728
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199
Collect timing information.
Definition: tools.py:119
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1692
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9518
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:46
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2450
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:683
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1253
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1774
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:854
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1950
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:659
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1715
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1814
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2435
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1135
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1789
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:499
Sample using replica exchange.
Definition: samplers.py:431
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27