IMP logo
IMP Reference Guide  develop.374319d7bb,2024/06/14
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 import IMP
6 import IMP.pmi.tools
7 import IMP.pmi.samplers
8 import IMP.pmi.output
9 import IMP.pmi.analysis
10 import IMP.pmi.io
11 import IMP.pmi.alphabets
12 import IMP.rmf
13 import IMP.isd
14 import IMP.pmi.dof
15 import os
16 from pathlib import Path
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import itertools
22 import warnings
23 import math
24 
25 import pickle
26 
27 
28 class _MockMPIValues:
29  """Replace samplers.MPI_values when in test mode"""
30  def get_percentile(self, name):
31  return 0.
32 
33 
34 class _RMFRestraints:
35  """All restraints that are written out to the RMF file"""
36  def __init__(self, model, user_restraints):
37  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
38  self._user_restraints = user_restraints if user_restraints else []
39 
40  def __len__(self):
41  return (len(self._user_restraints)
42  + self._rmf_rs.get_number_of_restraints())
43 
44  def __bool__(self):
45  return len(self) > 0
46 
47  def __getitem__(self, i):
48  class FakePMIWrapper:
49  def __init__(self, r):
50  self.r = IMP.RestraintSet.get_from(r)
51 
52  def get_restraint(self):
53  return self.r
54 
55  lenuser = len(self._user_restraints)
56  if 0 <= i < lenuser:
57  return self._user_restraints[i]
58  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
59  r = self._rmf_rs.get_restraint(i - lenuser)
60  return FakePMIWrapper(r)
61  else:
62  raise IndexError("Out of range")
63 
64 
66  """A macro to help setup and run replica exchange.
67  Supports Monte Carlo and molecular dynamics.
68  Produces trajectory RMF files, best PDB structures,
69  and output stat files.
70  """
71  def __init__(self, model, root_hier,
72  monte_carlo_sample_objects=None,
73  molecular_dynamics_sample_objects=None,
74  output_objects=[],
75  rmf_output_objects=None,
76  monte_carlo_temperature=1.0,
77  simulated_annealing=False,
78  simulated_annealing_minimum_temperature=1.0,
79  simulated_annealing_maximum_temperature=2.5,
80  simulated_annealing_minimum_temperature_nframes=100,
81  simulated_annealing_maximum_temperature_nframes=100,
82  replica_exchange_minimum_temperature=1.0,
83  replica_exchange_maximum_temperature=2.5,
84  replica_exchange_swap=True,
85  num_sample_rounds=1,
86  number_of_best_scoring_models=500,
87  monte_carlo_steps=10,
88  self_adaptive=False,
89  molecular_dynamics_steps=10,
90  molecular_dynamics_max_time_step=1.0,
91  number_of_frames=1000,
92  save_coordinates_mode="lowest_temperature",
93  nframes_write_coordinates=1,
94  write_initial_rmf=True,
95  initial_rmf_name_suffix="initial",
96  stat_file_name_suffix="stat",
97  best_pdb_name_suffix="model",
98  mmcif=False,
99  do_clean_first=True,
100  do_create_directories=True,
101  global_output_directory="./",
102  rmf_dir="rmfs/",
103  best_pdb_dir="pdbs/",
104  replica_stat_file_suffix="stat_replica",
105  em_object_for_rmf=None,
106  atomistic=False,
107  replica_exchange_object=None,
108  test_mode=False,
109  score_moved=False,
110  use_nestor=False,
111  nestor_restraints=None,
112  nestor_rmf_fname_prefix="nested",):
113  """Constructor.
114  @param model The IMP model
115  @param root_hier Top-level (System)hierarchy
116  @param monte_carlo_sample_objects Objects for MC sampling, which
117  should generally be a simple list of Mover objects, e.g.
118  from DegreesOfFreedom.get_movers().
119  @param molecular_dynamics_sample_objects Objects for MD sampling,
120  which should generally be a simple list of particles.
121  @param output_objects A list of structural objects and restraints
122  that will be included in output (ie, statistics "stat"
123  files). Any object that provides a get_output() method
124  can be used here. If None is passed
125  the macro will not write stat files.
126  @param rmf_output_objects A list of structural objects and
127  restraints that will be included in rmf. Any object
128  that provides a get_output() method can be used here.
129  @param monte_carlo_temperature MC temp (may need to be optimized
130  based on post-sampling analysis)
131  @param simulated_annealing If True, perform simulated annealing
132  @param simulated_annealing_minimum_temperature Should generally be
133  the same as monte_carlo_temperature.
134  @param simulated_annealing_minimum_temperature_nframes Number of
135  frames to compute at minimum temperature.
136  @param simulated_annealing_maximum_temperature_nframes Number of
137  frames to compute at
138  temps > simulated_annealing_maximum_temperature.
139  @param replica_exchange_minimum_temperature Low temp for REX; should
140  generally be the same as monte_carlo_temperature.
141  @param replica_exchange_maximum_temperature High temp for REX
142  @param replica_exchange_swap Boolean, enable disable temperature
143  swap (Default=True)
144  @param num_sample_rounds Number of rounds of MC/MD per cycle
145  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146  models to keep around for analysis.
147  @param mmcif If True, write best scoring models in mmCIF format;
148  if False (the default), write in legacy PDB format.
149  @param best_pdb_dir The directory under `global_output_directory`
150  where best-scoring PDB/mmCIF files are written.
151  @param best_pdb_name_suffix Part of the file name for best-scoring
152  PDB/mmCIF files.
153  @param monte_carlo_steps Number of MC steps per round
154  @param self_adaptive self adaptive scheme for Monte Carlo movers
155  @param molecular_dynamics_steps Number of MD steps per round
156  @param molecular_dynamics_max_time_step Max time step for MD
157  @param number_of_frames Number of REX frames to run
158  @param save_coordinates_mode string: how to save coordinates.
159  "lowest_temperature" (default) only the lowest temperatures
160  is saved
161  "25th_score" all replicas whose score is below the 25th
162  percentile
163  "50th_score" all replicas whose score is below the 50th
164  percentile
165  "75th_score" all replicas whose score is below the 75th
166  percentile
167  @param nframes_write_coordinates How often to write the coordinates
168  of a frame
169  @param write_initial_rmf Write the initial configuration
170  @param global_output_directory Folder that will be created to house
171  output.
172  @param test_mode Set to True to avoid writing any files, just test
173  one frame.
174  @param score_moved If True, attempt to speed up Monte Carlo
175  sampling by caching scoring function terms on particles
176  that didn't move.
177  @param use_nestor If True, follows the Nested Sampling workflow
178  of the NestOR module and skips writing stat files and
179  replica stat files.
180  @param nestor_restraints A list of restraints for which
181  likelihoods are to be computed for use by NestOR module.
182  @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
183  files generated by NestOR .
184  """
185  self.model = model
186  self.vars = {}
187 
188  # add check hierarchy is multistate
189  if output_objects == []:
190  # The "[]" in the default parameters is a global object, so make
191  # our own copy here
192  self.output_objects = []
193  else:
194  self.output_objects = output_objects
195  self.rmf_output_objects = rmf_output_objects
196  if (isinstance(root_hier, IMP.atom.Hierarchy)
197  and not root_hier.get_parent()):
198  if self.output_objects is not None:
199  self.output_objects.append(
200  IMP.pmi.io.TotalScoreOutput(self.model))
201  if self.rmf_output_objects is not None:
202  self.rmf_output_objects.append(
203  IMP.pmi.io.TotalScoreOutput(self.model))
204  self.root_hier = root_hier
205  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
206  self.vars["number_of_states"] = len(states)
207  if len(states) > 1:
208  self.root_hiers = states
209  self.is_multi_state = True
210  else:
211  self.root_hier = root_hier
212  self.is_multi_state = False
213  else:
214  raise TypeError("Must provide System hierarchy (root_hier)")
215 
216  self._rmf_restraints = _RMFRestraints(model, None)
217  self.em_object_for_rmf = em_object_for_rmf
218  self.monte_carlo_sample_objects = monte_carlo_sample_objects
219  self.vars["self_adaptive"] = self_adaptive
220  self.molecular_dynamics_sample_objects = \
221  molecular_dynamics_sample_objects
222  self.replica_exchange_object = replica_exchange_object
223  self.molecular_dynamics_max_time_step = \
224  molecular_dynamics_max_time_step
225  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
226  self.vars["replica_exchange_minimum_temperature"] = \
227  replica_exchange_minimum_temperature
228  self.vars["replica_exchange_maximum_temperature"] = \
229  replica_exchange_maximum_temperature
230  self.vars["replica_exchange_swap"] = replica_exchange_swap
231  self.vars["simulated_annealing"] = simulated_annealing
232  self.vars["simulated_annealing_minimum_temperature"] = \
233  simulated_annealing_minimum_temperature
234  self.vars["simulated_annealing_maximum_temperature"] = \
235  simulated_annealing_maximum_temperature
236  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
237  simulated_annealing_minimum_temperature_nframes
238  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
239  simulated_annealing_maximum_temperature_nframes
240 
241  self.vars["num_sample_rounds"] = num_sample_rounds
242  self.vars[
243  "number_of_best_scoring_models"] = number_of_best_scoring_models
244  self.vars["monte_carlo_steps"] = monte_carlo_steps
245  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
246  self.vars["number_of_frames"] = number_of_frames
247  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
248  "50th_score", "75th_score"):
249  raise Exception("save_coordinates_mode has unrecognized value")
250  else:
251  self.vars["save_coordinates_mode"] = save_coordinates_mode
252  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
253  self.vars["write_initial_rmf"] = write_initial_rmf
254  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
255  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
256  self.vars["mmcif"] = mmcif
257  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
258  self.vars["do_clean_first"] = do_clean_first
259  self.vars["do_create_directories"] = do_create_directories
260  self.vars["global_output_directory"] = global_output_directory
261  self.vars["rmf_dir"] = rmf_dir
262  self.vars["best_pdb_dir"] = best_pdb_dir
263  self.vars["atomistic"] = atomistic
264  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
265  self.vars["geometries"] = None
266  self.test_mode = test_mode
267  self.score_moved = score_moved
268  self.nest = use_nestor
269  self.nestor_restraints = nestor_restraints
270  self.nestor_rmf_fname = nestor_rmf_fname_prefix
271 
272  def add_geometries(self, geometries):
273  if self.vars["geometries"] is None:
274  self.vars["geometries"] = list(geometries)
275  else:
276  self.vars["geometries"].extend(geometries)
277 
278  def show_info(self):
279  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
280  "rmfs/*.rmf3 for each replica ")
281  print("--- it stores the best scoring pdb models in pdbs/")
282  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
283  "lowest temperature")
284  print("--- variables:")
285  keys = list(self.vars.keys())
286  keys.sort()
287  for v in keys:
288  print("------", v.ljust(30), self.vars[v])
289  print("Use nestor: ", self.nest)
290 
291  def get_replica_exchange_object(self):
292  return self.replica_exchange_object
293 
294  def _add_provenance(self, sampler_md, sampler_mc):
295  """Record details about the sampling in the IMP Hierarchies"""
296  iterations = 0
297  if sampler_md:
298  method = "Molecular Dynamics"
299  iterations += self.vars["molecular_dynamics_steps"]
300  if sampler_mc:
301  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
302  iterations += self.vars["monte_carlo_steps"]
303  # If no sampling is actually done, no provenance to write
304  if iterations == 0 or self.vars["number_of_frames"] == 0:
305  return
306  iterations *= self.vars["num_sample_rounds"]
307 
308  pi = self.model.add_particle("sampling")
310  self.model, pi, method, self.vars["number_of_frames"],
311  iterations)
312  p.set_number_of_replicas(
313  self.replica_exchange_object.get_number_of_replicas())
314  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
315  IMP.core.add_provenance(self.model, self.root_hier, p)
316 
317  def execute_macro(self):
318  temp_index_factor = 100000.0
319  samplers = []
320  sampler_mc = None
321  sampler_md = None
322  if self.monte_carlo_sample_objects is not None:
323  print("Setting up MonteCarlo")
324  sampler_mc = IMP.pmi.samplers.MonteCarlo(
325  self.model, self.monte_carlo_sample_objects,
326  self.vars["monte_carlo_temperature"],
327  score_moved=self.score_moved)
328  if self.vars["simulated_annealing"]:
329  tmin = self.vars["simulated_annealing_minimum_temperature"]
330  tmax = self.vars["simulated_annealing_maximum_temperature"]
331  nfmin = self.vars[
332  "simulated_annealing_minimum_temperature_nframes"]
333  nfmax = self.vars[
334  "simulated_annealing_maximum_temperature_nframes"]
335  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
336  if self.vars["self_adaptive"]:
337  sampler_mc.set_self_adaptive(
338  isselfadaptive=self.vars["self_adaptive"])
339  if self.output_objects is not None:
340  self.output_objects.append(sampler_mc)
341  if self.rmf_output_objects is not None:
342  self.rmf_output_objects.append(sampler_mc)
343  samplers.append(sampler_mc)
344 
345  if self.molecular_dynamics_sample_objects is not None:
346  print("Setting up MolecularDynamics")
348  self.model, self.molecular_dynamics_sample_objects,
349  self.vars["monte_carlo_temperature"],
350  maximum_time_step=self.molecular_dynamics_max_time_step)
351  if self.vars["simulated_annealing"]:
352  tmin = self.vars["simulated_annealing_minimum_temperature"]
353  tmax = self.vars["simulated_annealing_maximum_temperature"]
354  nfmin = self.vars[
355  "simulated_annealing_minimum_temperature_nframes"]
356  nfmax = self.vars[
357  "simulated_annealing_maximum_temperature_nframes"]
358  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
359  if self.output_objects is not None:
360  self.output_objects.append(sampler_md)
361  if self.rmf_output_objects is not None:
362  self.rmf_output_objects.append(sampler_md)
363  samplers.append(sampler_md)
364 # -------------------------------------------------------------------------
365 
366  print("Setting up ReplicaExchange")
368  self.model, self.vars["replica_exchange_minimum_temperature"],
369  self.vars["replica_exchange_maximum_temperature"], samplers,
370  replica_exchange_object=self.replica_exchange_object)
371  self.replica_exchange_object = rex.rem
372 
373  myindex = rex.get_my_index()
374  if self.output_objects is not None:
375  self.output_objects.append(rex)
376  if self.rmf_output_objects is not None:
377  self.rmf_output_objects.append(rex)
378  # must reset the minimum temperature due to the
379  # different binary length of rem.get_my_parameter double and python
380  # float
381  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
382 
383 # -------------------------------------------------------------------------
384 
385  globaldir = self.vars["global_output_directory"] + "/"
386  rmf_dir = globaldir + self.vars["rmf_dir"]
387  pdb_dir = globaldir + self.vars["best_pdb_dir"]
388 
389  if not self.test_mode and not self.nest:
390  if self.vars["do_clean_first"]:
391  pass
392 
393  if self.vars["do_create_directories"]:
394 
395  try:
396  os.makedirs(globaldir)
397  except: # noqa: E722
398  pass
399  try:
400  os.makedirs(rmf_dir)
401  except: # noqa: E722
402  pass
403 
404  if not self.is_multi_state:
405  try:
406  os.makedirs(pdb_dir)
407  except: # noqa: E722
408  pass
409  else:
410  for n in range(self.vars["number_of_states"]):
411  try:
412  os.makedirs(pdb_dir + "/" + str(n))
413  except: # noqa: E722
414  pass
415 
416 # -------------------------------------------------------------------------
417 
419  if self.output_objects is not None:
420  self.output_objects.append(sw)
421  if self.rmf_output_objects is not None:
422  self.rmf_output_objects.append(sw)
423 
424  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
425 
426  if not self.nest:
427  print("Setting up stat file")
428  low_temp_stat_file = globaldir + \
429  self.vars["stat_file_name_suffix"] + "." + \
430  str(myindex) + ".out"
431 
432  # Ensure model is updated before saving init files
433  if not self.test_mode:
434  self.model.update()
435 
436  if not self.test_mode and not self.nest:
437  if self.output_objects is not None:
438  output.init_stat2(low_temp_stat_file,
439  self.output_objects,
440  extralabels=["rmf_file", "rmf_frame_index"])
441  else:
442  print("Stat file writing is disabled")
443 
444  if self.rmf_output_objects is not None and not self.nest:
445  print("Stat info being written in the rmf file")
446 
447  if not self.test_mode and not self.nest:
448  print("Setting up replica stat file")
449  replica_stat_file = globaldir + \
450  self.vars["replica_stat_file_suffix"] + "." + \
451  str(myindex) + ".out"
452  if not self.test_mode:
453  output.init_stat2(replica_stat_file, [rex],
454  extralabels=["score"])
455 
456  print("Setting up best pdb files")
457  if not self.is_multi_state:
458  if self.vars["number_of_best_scoring_models"] > 0:
459  output.init_pdb_best_scoring(
460  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
461  self.root_hier,
462  self.vars["number_of_best_scoring_models"],
463  replica_exchange=True,
464  mmcif=self.vars['mmcif'],
465  best_score_file=globaldir + "best.scores.rex.py")
466  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
467  output.write_psf(
468  pdb_dir + "/" + "model.psf",
469  pdb_dir + "/" +
470  self.vars["best_pdb_name_suffix"] + pdbext)
471  else:
472  if self.vars["number_of_best_scoring_models"] > 0:
473  for n in range(self.vars["number_of_states"]):
474  output.init_pdb_best_scoring(
475  pdb_dir + "/" + str(n) + "/" +
476  self.vars["best_pdb_name_suffix"],
477  self.root_hiers[n],
478  self.vars["number_of_best_scoring_models"],
479  replica_exchange=True,
480  mmcif=self.vars['mmcif'],
481  best_score_file=globaldir + "best.scores.rex.py")
482  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
483  output.write_psf(
484  pdb_dir + "/" + str(n) + "/" + "model.psf",
485  pdb_dir + "/" + str(n) + "/" +
486  self.vars["best_pdb_name_suffix"] + pdbext)
487 # ---------------------------------------------
488 
489  if self.em_object_for_rmf is not None:
490  output_hierarchies = [
491  self.root_hier,
492  self.em_object_for_rmf.get_density_as_hierarchy(
493  )]
494  else:
495  output_hierarchies = [self.root_hier]
496 
497  if not self.test_mode and not self.nest:
498  print("Setting up and writing initial rmf coordinate file")
499  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
500  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
501  output_hierarchies,
502  listofobjects=self.rmf_output_objects)
503  if self._rmf_restraints:
504  output.add_restraints_to_rmf(
505  init_suffix + "." + str(myindex) + ".rmf3",
506  self._rmf_restraints)
507  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
508  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
509 
510  if not self.test_mode:
511  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
512  else:
513  mpivs = _MockMPIValues()
514 
515  self._add_provenance(sampler_md, sampler_mc)
516 
517  if not self.test_mode and not self.nest:
518  print("Setting up production rmf files")
519  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
520  output.init_rmf(rmfname, output_hierarchies,
521  geometries=self.vars["geometries"],
522  listofobjects=self.rmf_output_objects)
523 
524  if self._rmf_restraints:
525  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
526 
527  if not self.test_mode and self.nest:
528  print("Setting up NestOR rmf files")
529  nestor_rmf_fname = str(self.nestor_rmf_fname) + '_' + \
530  str(self.replica_exchange_object.get_my_index()) + '.rmf3'
531 
532  output.init_rmf(nestor_rmf_fname, output_hierarchies,
533  geometries=self.vars["geometries"],
534  listofobjects=self.rmf_output_objects)
535 
536  ntimes_at_low_temp = 0
537 
538  if myindex == 0 and not self.nest:
539  self.show_info()
540  self.replica_exchange_object.set_was_used(True)
541  nframes = self.vars["number_of_frames"]
542  if self.test_mode:
543  nframes = 1
544 
545  sampled_likelihoods = []
546  for i in range(nframes):
547  if self.test_mode:
548  score = 0.
549  else:
550  for nr in range(self.vars["num_sample_rounds"]):
551  if sampler_md is not None:
552  sampler_md.optimize(
553  self.vars["molecular_dynamics_steps"])
554  if sampler_mc is not None:
555  sampler_mc.optimize(self.vars["monte_carlo_steps"])
557  self.model).evaluate(False)
558  mpivs.set_value("score", score)
559  if not self.nest:
560  output.set_output_entry("score", score)
561 
562  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
563 
564  if self.vars["save_coordinates_mode"] == "lowest_temperature":
565  save_frame = (min_temp_index == my_temp_index)
566  elif self.vars["save_coordinates_mode"] == "25th_score":
567  score_perc = mpivs.get_percentile("score")
568  save_frame = (score_perc*100.0 <= 25.0)
569  elif self.vars["save_coordinates_mode"] == "50th_score":
570  score_perc = mpivs.get_percentile("score")
571  save_frame = (score_perc*100.0 <= 50.0)
572  elif self.vars["save_coordinates_mode"] == "75th_score":
573  score_perc = mpivs.get_percentile("score")
574  save_frame = (score_perc*100.0 <= 75.0)
575 
576  # Ensure model is updated before saving output files
577  if save_frame and not self.test_mode:
578  self.model.update()
579 
580  if save_frame:
581  print("--- frame %s score %s " % (str(i), str(score)))
582 
583  if self.nest:
584  if math.isnan(score):
585  sampled_likelihoods.append(math.nan)
586  else:
587  likelihood_for_sample = 1
588  for rstrnt in self.nestor_restraints:
589  likelihood_for_sample *= rstrnt.get_likelihood()
590  sampled_likelihoods.append(likelihood_for_sample)
591  output.write_rmf(nestor_rmf_fname)
592 
593  if not self.test_mode and not self.nest:
594  if i % self.vars["nframes_write_coordinates"] == 0:
595  print('--- writing coordinates')
596  if self.vars["number_of_best_scoring_models"] > 0:
597  output.write_pdb_best_scoring(score)
598  output.write_rmf(rmfname)
599  output.set_output_entry("rmf_file", rmfname)
600  output.set_output_entry("rmf_frame_index",
601  ntimes_at_low_temp)
602  else:
603  output.set_output_entry("rmf_file", rmfname)
604  output.set_output_entry("rmf_frame_index", '-1')
605  if self.output_objects is not None:
606  output.write_stat2(low_temp_stat_file)
607  ntimes_at_low_temp += 1
608 
609  if not self.test_mode and not self.nest:
610  output.write_stat2(replica_stat_file)
611  if self.vars["replica_exchange_swap"]:
612  rex.swap_temp(i, score)
613 
614  if self.nest and len(sampled_likelihoods) > 0:
615  with open("likelihoods_"
616  + str(self.replica_exchange_object.get_my_index()),
617  "wb") as lif:
618  pickle.dump(sampled_likelihoods, lif)
619 
620  output.close_rmf(nestor_rmf_fname)
621 
622  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
623  p.add_replica_exchange(state, self)
624 
625  if not self.test_mode and not self.nest:
626  print("closing production rmf files")
627  output.close_rmf(rmfname)
628 
629 
631  """A macro to build a IMP::pmi::topology::System based on a
632  TopologyReader object.
633 
634  Easily create multi-state systems by calling this macro
635  repeatedly with different TopologyReader objects!
636  A useful function is get_molecules() which returns the PMI Molecules
637  grouped by state as a dictionary with key = (molecule name),
638  value = IMP.pmi.topology.Molecule
639  Quick multi-state system:
640  @code{.python}
641  model = IMP.Model()
642  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
643  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
644  bs = IMP.pmi.macros.BuildSystem(model)
645  bs.add_state(reader1)
646  bs.add_state(reader2)
647  bs.execute_macro() # build everything including degrees of freedom
648  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
649  ### now you have a two state system, you add restraints etc
650  @endcode
651  @note The "domain name" entry of the topology reader is not used.
652  All molecules are set up by the component name, but split into rigid bodies
653  as requested.
654  """
655 
656  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
657  'RNA': IMP.pmi.alphabets.rna}
658 
659  def __init__(self, model, sequence_connectivity_scale=4.0,
660  force_create_gmm_files=False, resolutions=[1, 10],
661  name='System'):
662  """Constructor
663  @param model An IMP Model
664  @param sequence_connectivity_scale For scaling the connectivity
665  restraint
666  @param force_create_gmm_files If True, will sample and create GMMs
667  no matter what. If False, will only sample if the
668  files don't exist. If number of Gaussians is zero, won't
669  do anything.
670  @param resolutions The resolutions to build for structured regions
671  @param name The name of the top-level hierarchy node.
672  """
673  self.model = model
674  self.system = IMP.pmi.topology.System(self.model, name=name)
675  self._readers = [] # the TopologyReaders (one per state)
676  # TempResidues for each domain key=unique name,
677  # value=(atomic_res,non_atomic_res).
678  self._domain_res = []
679  self._domains = [] # key = domain unique name, value = Component
680  self.force_create_gmm_files = force_create_gmm_files
681  self.resolutions = resolutions
682 
683  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
684  chain_ids=None):
685  """Add a state using the topology info in a
686  IMP::pmi::topology::TopologyReader object.
687  When you are done adding states, call execute_macro()
688  @param reader The TopologyReader object
689  @param fasta_name_map dictionary for converting protein names
690  found in the fasta file
691  @param chain_ids A list or string of chain IDs for assigning to
692  newly-created molecules, e.g.
693  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
694  If not specified, chain IDs A through Z are assigned, then
695  AA through AZ, then BA through BZ, and so on, in the same
696  fashion as PDB.
697  """
698  state = self.system.create_state()
699  self._readers.append(reader)
700  # key is unique name, value is (atomic res, nonatomicres)
701  these_domain_res = {}
702  these_domains = {} # key is unique name, value is _Component
703  if chain_ids is None:
704  chain_ids = IMP.pmi.output._ChainIDs()
705  numchain = 0
706 
707  # setup representation
708  # loop over molecules, copies, then domains
709  for molname in reader.get_molecules():
710  copies = reader.get_molecules()[molname].domains
711  for nc, copyname in enumerate(copies):
712  print("BuildSystem.add_state: setting up molecule %s copy "
713  "number %s" % (molname, str(nc)))
714  copy = copies[copyname]
715  # option to not rename chains
716  if keep_chain_id:
717  all_chains = [c for c in copy if c.chain is not None]
718  if all_chains:
719  chain_id = all_chains[0].chain
720  else:
721  chain_id = chain_ids[numchain]
722  warnings.warn(
723  "No PDBs specified for %s, so keep_chain_id has "
724  "no effect; using default chain ID '%s'"
725  % (molname, chain_id), IMP.pmi.ParameterWarning)
726  else:
727  chain_id = chain_ids[numchain]
728  if nc == 0:
729  alphabet = IMP.pmi.alphabets.amino_acid
730  fasta_flag = copy[0].fasta_flag
731  if fasta_flag in self._alphabets:
732  alphabet = self._alphabets[fasta_flag]
734  copy[0].fasta_file, fasta_name_map)
735  seq = seqs[copy[0].fasta_id]
736  print("BuildSystem.add_state: molecule %s sequence has "
737  "%s residues" % (molname, len(seq)))
738  orig_mol = state.create_molecule(
739  molname, seq, chain_id, alphabet=alphabet,
740  uniprot=seqs.uniprot.get(copy[0].fasta_id))
741  mol = orig_mol
742  numchain += 1
743  else:
744  print("BuildSystem.add_state: creating a copy for "
745  "molecule %s" % molname)
746  mol = orig_mol.create_copy(chain_id)
747  numchain += 1
748 
749  for domainnumber, domain in enumerate(copy):
750  print("BuildSystem.add_state: ---- setting up domain %s "
751  "of molecule %s" % (domainnumber, molname))
752  # we build everything in the residue range, even if it
753  # extends beyond what's in the actual PDB file
754  these_domains[domain.get_unique_name()] = domain
755  if domain.residue_range == [] or \
756  domain.residue_range is None:
757  domain_res = mol.get_residues()
758  else:
759  start = domain.residue_range[0]+domain.pdb_offset
760  if domain.residue_range[1] == 'END':
761  end = len(mol.sequence)
762  else:
763  end = domain.residue_range[1]+domain.pdb_offset
764  domain_res = mol.residue_range(start-1, end-1)
765  print("BuildSystem.add_state: -------- domain %s of "
766  "molecule %s extends from residue %s to "
767  "residue %s "
768  % (domainnumber, molname, start, end))
769  if domain.pdb_file == "BEADS":
770  print("BuildSystem.add_state: -------- domain %s of "
771  "molecule %s represented by BEADS "
772  % (domainnumber, molname))
773  mol.add_representation(
774  domain_res,
775  resolutions=[domain.bead_size],
776  setup_particles_as_densities=(
777  domain.em_residues_per_gaussian != 0),
778  color=domain.color)
779  these_domain_res[domain.get_unique_name()] = \
780  (set(), domain_res)
781  elif domain.pdb_file == "IDEAL_HELIX":
782  print("BuildSystem.add_state: -------- domain %s of "
783  "molecule %s represented by IDEAL_HELIX "
784  % (domainnumber, molname))
785  emper = domain.em_residues_per_gaussian
786  mol.add_representation(
787  domain_res,
788  resolutions=self.resolutions,
789  ideal_helix=True,
790  density_residues_per_component=emper,
791  density_prefix=domain.density_prefix,
792  density_force_compute=self.force_create_gmm_files,
793  color=domain.color)
794  these_domain_res[domain.get_unique_name()] = \
795  (domain_res, set())
796  else:
797  print("BuildSystem.add_state: -------- domain %s of "
798  "molecule %s represented by pdb file %s "
799  % (domainnumber, molname, domain.pdb_file))
800  domain_atomic = mol.add_structure(domain.pdb_file,
801  domain.chain,
802  domain.residue_range,
803  domain.pdb_offset,
804  soft_check=True)
805  domain_non_atomic = domain_res - domain_atomic
806  if not domain.em_residues_per_gaussian:
807  mol.add_representation(
808  domain_atomic, resolutions=self.resolutions,
809  color=domain.color)
810  if len(domain_non_atomic) > 0:
811  mol.add_representation(
812  domain_non_atomic,
813  resolutions=[domain.bead_size],
814  color=domain.color)
815  else:
816  print("BuildSystem.add_state: -------- domain %s "
817  "of molecule %s represented by gaussians "
818  % (domainnumber, molname))
819  emper = domain.em_residues_per_gaussian
820  creategmm = self.force_create_gmm_files
821  mol.add_representation(
822  domain_atomic,
823  resolutions=self.resolutions,
824  density_residues_per_component=emper,
825  density_prefix=domain.density_prefix,
826  density_force_compute=creategmm,
827  color=domain.color)
828  if len(domain_non_atomic) > 0:
829  mol.add_representation(
830  domain_non_atomic,
831  resolutions=[domain.bead_size],
832  setup_particles_as_densities=True,
833  color=domain.color)
834  these_domain_res[domain.get_unique_name()] = (
835  domain_atomic, domain_non_atomic)
836  self._domain_res.append(these_domain_res)
837  self._domains.append(these_domains)
838  print('BuildSystem.add_state: State', len(self.system.states), 'added')
839  return state
840 
841  def get_molecules(self):
842  """Return list of all molecules grouped by state.
843  For each state, it's a dictionary of Molecules where key is the
844  molecule name
845  """
846  return [s.get_molecules() for s in self.system.get_states()]
847 
848  def get_molecule(self, molname, copy_index=0, state_index=0):
849  return self.system.get_states()[state_index].get_molecules()[
850  molname][copy_index]
851 
852  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
853  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
854  """Builds representations and sets up degrees of freedom"""
855  print("BuildSystem.execute_macro: building representations")
856  self.root_hier = self.system.build()
857 
858  print("BuildSystem.execute_macro: setting up degrees of freedom")
859  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
860  for nstate, reader in enumerate(self._readers):
861  rbs = reader.get_rigid_bodies()
862  srbs = reader.get_super_rigid_bodies()
863  csrbs = reader.get_chains_of_super_rigid_bodies()
864 
865  # add rigid bodies
866  domains_in_rbs = set()
867  for rblist in rbs:
868  print("BuildSystem.execute_macro: -------- building rigid "
869  "body %s" % (str(rblist)))
870  all_res = IMP.pmi.tools.OrderedSet()
871  bead_res = IMP.pmi.tools.OrderedSet()
872  for dname in rblist:
873  domain = self._domains[nstate][dname]
874  print("BuildSystem.execute_macro: -------- adding %s"
875  % (str(dname)))
876  all_res |= self._domain_res[nstate][dname][0]
877  bead_res |= self._domain_res[nstate][dname][1]
878  domains_in_rbs.add(dname)
879  all_res |= bead_res
880  print("BuildSystem.execute_macro: -------- creating rigid "
881  "body with max_trans %s max_rot %s "
882  "non_rigid_max_trans %s"
883  % (str(max_rb_trans), str(max_rb_rot),
884  str(max_bead_trans)))
885  self.dof.create_rigid_body(all_res,
886  nonrigid_parts=bead_res,
887  max_trans=max_rb_trans,
888  max_rot=max_rb_rot,
889  nonrigid_max_trans=max_bead_trans,
890  name="RigidBody %s" % dname)
891 
892  # if you have any domains not in an RB, set them as flexible beads
893  for dname, domain in self._domains[nstate].items():
894  if dname not in domains_in_rbs:
895  if domain.pdb_file != "BEADS":
896  warnings.warn(
897  "No rigid bodies set for %s. Residues read from "
898  "the PDB file will not be sampled - only regions "
899  "missing from the PDB will be treated flexibly. "
900  "To sample the entire sequence, use BEADS instead "
901  "of a PDB file name" % dname,
903  self.dof.create_flexible_beads(
904  self._domain_res[nstate][dname][1],
905  max_trans=max_bead_trans)
906 
907  # add super rigid bodies
908  for srblist in srbs:
909  print("BuildSystem.execute_macro: -------- building "
910  "super rigid body %s" % (str(srblist)))
911  all_res = IMP.pmi.tools.OrderedSet()
912  for dname in srblist:
913  print("BuildSystem.execute_macro: -------- adding %s"
914  % (str(dname)))
915  all_res |= self._domain_res[nstate][dname][0]
916  all_res |= self._domain_res[nstate][dname][1]
917 
918  print("BuildSystem.execute_macro: -------- creating super "
919  "rigid body with max_trans %s max_rot %s "
920  % (str(max_srb_trans), str(max_srb_rot)))
921  self.dof.create_super_rigid_body(
922  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
923 
924  # add chains of super rigid bodies
925  for csrblist in csrbs:
926  all_res = IMP.pmi.tools.OrderedSet()
927  for dname in csrblist:
928  all_res |= self._domain_res[nstate][dname][0]
929  all_res |= self._domain_res[nstate][dname][1]
930  all_res = list(all_res)
931  all_res.sort(key=lambda r: r.get_index())
932  self.dof.create_main_chain_mover(all_res)
933  return self.root_hier, self.dof
934 
935 
936 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
938  """A macro for running all the basic operations of analysis.
939  Includes clustering, precision analysis, and making ensemble density maps.
940  A number of plots are also supported.
941  """
942  def __init__(self, model,
943  merge_directories=["./"],
944  stat_file_name_suffix="stat",
945  best_pdb_name_suffix="model",
946  do_clean_first=True,
947  do_create_directories=True,
948  global_output_directory="output/",
949  replica_stat_file_suffix="stat_replica",
950  global_analysis_result_directory="./analysis/",
951  test_mode=False):
952  """Constructor.
953  @param model The IMP model
954  @param stat_file_name_suffix
955  @param merge_directories The directories containing output files
956  @param best_pdb_name_suffix
957  @param do_clean_first
958  @param do_create_directories
959  @param global_output_directory Where everything is
960  @param replica_stat_file_suffix
961  @param global_analysis_result_directory
962  @param test_mode If True, nothing is changed on disk
963  """
964 
965  try:
966  from mpi4py import MPI
967  self.comm = MPI.COMM_WORLD
968  self.rank = self.comm.Get_rank()
969  self.number_of_processes = self.comm.size
970  except ImportError:
971  self.rank = 0
972  self.number_of_processes = 1
973 
974  self.test_mode = test_mode
975  self._protocol_output = []
976  self.cluster_obj = None
977  self.model = model
978  stat_dir = global_output_directory
979  self.stat_files = []
980  # it contains the position of the root directories
981  for rd in merge_directories:
982  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
983  if len(stat_files) == 0:
984  warnings.warn("no stat files found in %s"
985  % os.path.join(rd, stat_dir),
987  self.stat_files += stat_files
988 
989  def add_protocol_output(self, p):
990  """Capture details of the modeling protocol.
991  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
992  """
993  # Assume last state is the one we're interested in
994  self._protocol_output.append((p, p._last_state))
995 
996  def get_modeling_trajectory(self,
997  score_key="Total_Score",
998  rmf_file_key="rmf_file",
999  rmf_file_frame_key="rmf_frame_index",
1000  outputdir="./",
1001  get_every=1,
1002  nframes_trajectory=10000):
1003  """ Get a trajectory of the modeling run, for generating
1004  demonstrative movies
1005 
1006  @param score_key The score for ranking models
1007  @param rmf_file_key Key pointing to RMF filename
1008  @param rmf_file_frame_key Key pointing to RMF frame number
1009  @param outputdir The local output directory used in the run
1010  @param get_every Extract every nth frame
1011  @param nframes_trajectory Total number of frames of the trajectory
1012  """
1013  import math
1014 
1015  trajectory_models = IMP.pmi.io.get_trajectory_models(
1016  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1017  get_every)
1018  score_list = list(map(float, trajectory_models[2]))
1019 
1020  max_score = max(score_list)
1021  min_score = min(score_list)
1022 
1023  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1024  for i in range(nframes_trajectory)]
1025  binned_scores = [None]*nframes_trajectory
1026  binned_model_indexes = [-1]*nframes_trajectory
1027 
1028  for model_index, s in enumerate(score_list):
1029  bins_score_diffs = [abs(s-b) for b in bins]
1030  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1031  if binned_scores[bin_index] is None:
1032  binned_scores[bin_index] = s
1033  binned_model_indexes[bin_index] = model_index
1034  else:
1035  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1036  new_diff = abs(s-bins[bin_index])
1037  if new_diff < old_diff:
1038  binned_scores[bin_index] = s
1039  binned_model_indexes[bin_index] = model_index
1040 
1041  print(binned_scores)
1042  print(binned_model_indexes)
1043 
1044  def _expand_ambiguity(self, prot, d):
1045  """If using PMI2, expand the dictionary to include copies as
1046  ambiguous options
1047 
1048  This also keeps the states separate.
1049  """
1050  newdict = {}
1051  for key in d:
1052  val = d[key]
1053  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1054  newdict[key] = val
1055  continue
1056  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1057  if isinstance(val, tuple):
1058  start = val[0]
1059  stop = val[1]
1060  name = val[2]
1061  else:
1062  start = 1
1063  stop = -1
1064  name = val
1065  for nst in range(len(states)):
1066  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1067  copies = sel.get_selected_particles(with_representation=False)
1068  if len(copies) > 1:
1069  for nc in range(len(copies)):
1070  if len(states) > 1:
1071  newdict['%s.%i..%i' % (name, nst, nc)] = \
1072  (start, stop, name, nc, nst)
1073  else:
1074  newdict['%s..%i' % (name, nc)] = \
1075  (start, stop, name, nc, nst)
1076  else:
1077  newdict[key] = val
1078  return newdict
1079 
1080  def clustering(self,
1081  score_key="Total_Score",
1082  rmf_file_key="rmf_file",
1083  rmf_file_frame_key="rmf_frame_index",
1084  state_number=0,
1085  prefiltervalue=None,
1086  feature_keys=[],
1087  outputdir="./",
1088  alignment_components=None,
1089  number_of_best_scoring_models=10,
1090  rmsd_calculation_components=None,
1091  distance_matrix_file='distances.mat',
1092  load_distance_matrix_file=False,
1093  skip_clustering=False,
1094  number_of_clusters=1,
1095  display_plot=False,
1096  exit_after_display=True,
1097  get_every=1,
1098  first_and_last_frames=None,
1099  density_custom_ranges=None,
1100  write_pdb_with_centered_coordinates=False,
1101  voxel_size=5.0):
1102  """Get the best scoring models, compute a distance matrix,
1103  cluster them, and create density maps.
1104 
1105  Tuple format: "molname" just the molecule,
1106  or (start,stop,molname,copy_num(optional),state_num(optional)
1107  Can pass None for copy or state to ignore that field.
1108  If you don't pass a specific copy number
1109 
1110  @param score_key The score for ranking models.
1111  @param rmf_file_key Key pointing to RMF filename
1112  @param rmf_file_frame_key Key pointing to RMF frame number
1113  @param state_number State number to analyze
1114  @param prefiltervalue Only include frames where the
1115  score key is below this value
1116  @param feature_keys Keywords for which you want to
1117  calculate average, medians, etc.
1118  If you pass "Keyname" it'll include everything that matches
1119  "*Keyname*"
1120  @param outputdir The local output directory used in
1121  the run
1122  @param alignment_components Dictionary with keys=groupname,
1123  values are tuples for aligning the structures
1124  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1125  @param number_of_best_scoring_models Num models to keep per run
1126  @param rmsd_calculation_components For calculating RMSD
1127  (same format as alignment_components)
1128  @param distance_matrix_file Where to store/read the
1129  distance matrix
1130  @param load_distance_matrix_file Try to load the distance
1131  matrix file
1132  @param skip_clustering Just extract the best scoring
1133  models and save the pdbs
1134  @param number_of_clusters Number of k-means clusters
1135  @param display_plot Display the distance matrix
1136  @param exit_after_display Exit after displaying distance
1137  matrix
1138  @param get_every Extract every nth frame
1139  @param first_and_last_frames A tuple with the first and last
1140  frames to be analyzed. Values are percentages!
1141  Default: get all frames
1142  @param density_custom_ranges For density calculation
1143  (same format as alignment_components)
1144  @param write_pdb_with_centered_coordinates
1145  @param voxel_size Used for the density output
1146  """
1147  # Track provenance information to be added to each output model
1148  prov = []
1149  self._outputdir = Path(outputdir).absolute()
1150  self._number_of_clusters = number_of_clusters
1151  for p, state in self._protocol_output:
1152  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1153 
1154  if self.test_mode:
1155  return
1156 
1157  if self.rank == 0:
1158  try:
1159  os.mkdir(outputdir)
1160  except: # noqa: E722
1161  pass
1162 
1163  if not load_distance_matrix_file:
1164  if len(self.stat_files) == 0:
1165  print("ERROR: no stat file found in the given path")
1166  return
1167  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1168  self.stat_files, self.number_of_processes)[self.rank]
1169 
1170  # read ahead to check if you need the PMI2 score key instead
1171  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1172  if k in feature_keys:
1173  warnings.warn(
1174  "no need to pass " + k + " to feature_keys.",
1176  feature_keys.remove(k)
1177 
1178  best_models = IMP.pmi.io.get_best_models(
1179  my_stat_files, score_key, feature_keys, rmf_file_key,
1180  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1181  rmf_file_list = best_models[0]
1182  rmf_file_frame_list = best_models[1]
1183  score_list = best_models[2]
1184  feature_keyword_list_dict = best_models[3]
1185 
1186 # ------------------------------------------------------------------------
1187 # collect all the files and scores
1188 # ------------------------------------------------------------------------
1189 
1190  if self.number_of_processes > 1:
1191  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1192  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1193  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1194  rmf_file_frame_list)
1195  for k in feature_keyword_list_dict:
1196  feature_keyword_list_dict[k] = \
1198  feature_keyword_list_dict[k])
1199 
1200  # sort by score and get the best scoring ones
1201  score_rmf_tuples = list(zip(score_list,
1202  rmf_file_list,
1203  rmf_file_frame_list,
1204  list(range(len(score_list)))))
1205 
1206  if density_custom_ranges:
1207  for k in density_custom_ranges:
1208  if not isinstance(density_custom_ranges[k], list):
1209  raise Exception("Density custom ranges: values must "
1210  "be lists of tuples")
1211 
1212  # keep subset of frames if requested
1213  if first_and_last_frames is not None:
1214  nframes = len(score_rmf_tuples)
1215  first_frame = int(first_and_last_frames[0] * nframes)
1216  last_frame = int(first_and_last_frames[1] * nframes)
1217  if last_frame > len(score_rmf_tuples):
1218  last_frame = -1
1219  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1220 
1221  # sort RMFs by the score_key in ascending order, and store the rank
1222  best_score_rmf_tuples = sorted(
1223  score_rmf_tuples,
1224  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1225  best_score_rmf_tuples = [t+(n,) for n, t in
1226  enumerate(best_score_rmf_tuples)]
1227  # Note in the provenance info that we only kept best-scoring models
1228  prov.append(IMP.pmi.io.FilterProvenance(
1229  "Best scoring", 0, number_of_best_scoring_models))
1230  # sort the feature scores in the same way
1231  best_score_feature_keyword_list_dict = defaultdict(list)
1232  for tpl in best_score_rmf_tuples:
1233  index = tpl[3]
1234  for f in feature_keyword_list_dict:
1235  best_score_feature_keyword_list_dict[f].append(
1236  feature_keyword_list_dict[f][index])
1237  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1238  best_score_rmf_tuples,
1239  self.number_of_processes)[self.rank]
1240 
1241  # expand the dictionaries to include ambiguous copies
1242  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1243  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1244  if rmsd_calculation_components is not None:
1245  tmp = self._expand_ambiguity(
1246  prot_ahead, rmsd_calculation_components)
1247  if tmp != rmsd_calculation_components:
1248  print('Detected ambiguity, expand rmsd components to',
1249  tmp)
1250  rmsd_calculation_components = tmp
1251  if alignment_components is not None:
1252  tmp = self._expand_ambiguity(prot_ahead,
1253  alignment_components)
1254  if tmp != alignment_components:
1255  print('Detected ambiguity, expand alignment '
1256  'components to', tmp)
1257  alignment_components = tmp
1258 
1259 # -------------------------------------------------------------
1260 # read the coordinates
1261 # ------------------------------------------------------------
1262  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1263  self.model, my_best_score_rmf_tuples[0],
1264  rmsd_calculation_components, state_number=state_number)
1266  self.model, my_best_score_rmf_tuples, alignment_components,
1267  rmsd_calculation_components, state_number=state_number)
1268 
1269  # note! the coordinates are simply float tuples, NOT decorators,
1270  # NOT Vector3D, NOR particles, because these object cannot be
1271  # serialized. We need serialization
1272  # for the parallel computation based on mpi.
1273 
1274  # dict:key=component name,val=coords per hit
1275  all_coordinates = got_coords[0]
1276 
1277  # same as above, limited to alignment bits
1278  alignment_coordinates = got_coords[1]
1279 
1280  # same as above, limited to RMSD bits
1281  rmsd_coordinates = got_coords[2]
1282 
1283  # dictionary with key=RMF, value=score rank
1284  rmf_file_name_index_dict = got_coords[3]
1285 
1286  # RMF file per hit
1287  all_rmf_file_names = got_coords[4]
1288 
1289 # ------------------------------------------------------------------------
1290 # optionally don't compute distance matrix or cluster, just write top files
1291 # ------------------------------------------------------------------------
1292  if skip_clustering:
1293  if density_custom_ranges:
1294  DensModule = IMP.pmi.analysis.GetModelDensity(
1295  density_custom_ranges, voxel=voxel_size)
1296 
1297  dircluster = os.path.join(outputdir,
1298  "all_models."+str(self.rank))
1299  try:
1300  os.mkdir(outputdir)
1301  except: # noqa: E722
1302  pass
1303  try:
1304  os.mkdir(dircluster)
1305  except: # noqa: E722
1306  pass
1307  clusstat = open(os.path.join(
1308  dircluster, "stat."+str(self.rank)+".out"), "w")
1309  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1310  rmf_name = tpl[1]
1311  rmf_frame_number = tpl[2]
1312  tmp_dict = {}
1313  index = tpl[4]
1314  for key in best_score_feature_keyword_list_dict:
1315  tmp_dict[key] = \
1316  best_score_feature_keyword_list_dict[key][index]
1317 
1318  if cnt == 0:
1319  prots, rs = \
1320  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1321  self.model, rmf_frame_number, rmf_name)
1322  else:
1323  linking_successful = \
1324  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1325  self.model, prots, rs, rmf_frame_number,
1326  rmf_name)
1327  if not linking_successful:
1328  continue
1329 
1330  if not prots:
1331  continue
1332 
1333  states = IMP.atom.get_by_type(
1334  prots[0], IMP.atom.STATE_TYPE)
1335  prot = states[state_number]
1336 
1337  # get transformation aligning coordinates of
1338  # requested tuples to the first RMF file
1339  if cnt == 0:
1340  coords_f1 = alignment_coordinates[cnt]
1341  if cnt > 0:
1342  coords_f2 = alignment_coordinates[cnt]
1343  if coords_f2:
1345  coords_f1, coords_f2)
1346  transformation = Ali.align()[1]
1347  else:
1348  transformation = \
1350 
1351  rbs = set()
1352  for p in IMP.atom.get_leaves(prot):
1353  if not IMP.core.XYZR.get_is_setup(p):
1355  IMP.core.XYZR(p).set_radius(0.0001)
1356  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1357 
1359  rbm = IMP.core.RigidBodyMember(p)
1360  rb = rbm.get_rigid_body()
1361  rbs.add(rb)
1362  else:
1364  transformation)
1365  for rb in rbs:
1366  IMP.core.transform(rb, transformation)
1367 
1368  o = IMP.pmi.output.Output()
1369  self.model.update()
1370  out_pdb_fn = os.path.join(
1371  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1372  out_rmf_fn = os.path.join(
1373  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1374  o.init_pdb(out_pdb_fn, prot)
1375  tc = write_pdb_with_centered_coordinates
1376  o.write_pdb(out_pdb_fn,
1377  translate_to_geometric_center=tc)
1378 
1379  tmp_dict["local_pdb_file_name"] = \
1380  os.path.basename(out_pdb_fn)
1381  tmp_dict["rmf_file_full_path"] = rmf_name
1382  tmp_dict["local_rmf_file_name"] = \
1383  os.path.basename(out_rmf_fn)
1384  tmp_dict["local_rmf_frame_number"] = 0
1385 
1386  clusstat.write(str(tmp_dict)+"\n")
1387 
1388  # create a single-state System and write that
1390  IMP.Particle(self.model))
1391  h.set_name("System")
1392  h.add_child(prot)
1393  o.init_rmf(out_rmf_fn, [h], rs)
1394 
1395  o.write_rmf(out_rmf_fn)
1396  o.close_rmf(out_rmf_fn)
1397  # add the density
1398  if density_custom_ranges:
1399  DensModule.add_subunits_density(prot)
1400 
1401  if density_custom_ranges:
1402  DensModule.write_mrc(path=dircluster)
1403  del DensModule
1404  return
1405 
1406  # broadcast the coordinates
1407  if self.number_of_processes > 1:
1408  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1409  all_coordinates)
1410  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1411  all_rmf_file_names)
1412  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1413  rmf_file_name_index_dict)
1414  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1415  alignment_coordinates)
1416  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1417  rmsd_coordinates)
1418 
1419  if self.rank == 0:
1420  # save needed information in external files
1421  self.save_objects(
1422  [best_score_feature_keyword_list_dict,
1423  rmf_file_name_index_dict],
1424  ".macro.pkl")
1425 
1426 # ------------------------------------------------------------------------
1427 # Calculate distance matrix and cluster
1428 # ------------------------------------------------------------------------
1429  print("setup clustering class")
1430  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1431 
1432  for n, model_coordinate_dict in enumerate(all_coordinates):
1433  # let's try to align
1434  if (alignment_components is not None
1435  and len(self.cluster_obj.all_coords) == 0):
1436  # set the first model as template coordinates
1437  self.cluster_obj.set_template(alignment_coordinates[n])
1438  self.cluster_obj.fill(all_rmf_file_names[n],
1439  rmsd_coordinates[n])
1440  print("Global calculating the distance matrix")
1441 
1442  # calculate distance matrix, all against all
1443  self.cluster_obj.dist_matrix()
1444 
1445  # perform clustering and optionally display
1446  if self.rank == 0:
1447  self.cluster_obj.do_cluster(number_of_clusters)
1448  if display_plot:
1449  if self.rank == 0:
1450  self.cluster_obj.plot_matrix(
1451  figurename=os.path.join(outputdir,
1452  'dist_matrix.pdf'))
1453  if exit_after_display:
1454  exit()
1455  self.cluster_obj.save_distance_matrix_file(
1456  file_name=distance_matrix_file)
1457 
1458 # ------------------------------------------------------------------------
1459 # Alternatively, load the distance matrix from file and cluster that
1460 # ------------------------------------------------------------------------
1461  else:
1462  if self.rank == 0:
1463  print("setup clustering class")
1464  self.cluster_obj = IMP.pmi.analysis.Clustering()
1465  self.cluster_obj.load_distance_matrix_file(
1466  file_name=distance_matrix_file)
1467  print("clustering with %s clusters" % str(number_of_clusters))
1468  self.cluster_obj.do_cluster(number_of_clusters)
1469  [best_score_feature_keyword_list_dict,
1470  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1471  if display_plot:
1472  if self.rank == 0:
1473  self.cluster_obj.plot_matrix(figurename=os.path.join(
1474  outputdir, 'dist_matrix.pdf'))
1475  if exit_after_display:
1476  exit()
1477  if self.number_of_processes > 1:
1478  self.comm.Barrier()
1479 
1480 # ------------------------------------------------------------------------
1481 # now save all information about the clusters
1482 # ------------------------------------------------------------------------
1483 
1484  if self.rank == 0:
1485  print(self.cluster_obj.get_cluster_labels())
1486  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1487  print("rank %s " % str(self.rank))
1488  print("cluster %s " % str(n))
1489  print("cluster label %s " % str(cl))
1490  print(self.cluster_obj.get_cluster_label_names(cl))
1491  cluster_size = \
1492  len(self.cluster_obj.get_cluster_label_names(cl))
1493  cluster_prov = \
1494  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1495 
1496  # first initialize the Density class if requested
1497  if density_custom_ranges:
1498  DensModule = IMP.pmi.analysis.GetModelDensity(
1499  density_custom_ranges,
1500  voxel=voxel_size)
1501 
1502  dircluster = outputdir + "/cluster." + str(n) + "/"
1503  try:
1504  os.mkdir(dircluster)
1505  except: # noqa: E722
1506  pass
1507 
1508  rmsd_dict = {
1509  "AVERAGE_RMSD":
1510  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1511  clusstat = open(dircluster + "stat.out", "w")
1512  for k, structure_name in enumerate(
1513  self.cluster_obj.get_cluster_label_names(cl)):
1514  # extract the features
1515  tmp_dict = {}
1516  tmp_dict.update(rmsd_dict)
1517  index = rmf_file_name_index_dict[structure_name]
1518  for key in best_score_feature_keyword_list_dict:
1519  tmp_dict[
1520  key] = best_score_feature_keyword_list_dict[
1521  key][
1522  index]
1523 
1524  # get the rmf name and the frame number from the list of
1525  # frame names
1526  rmf_name = structure_name.split("|")[0]
1527  rmf_frame_number = int(structure_name.split("|")[1])
1528  clusstat.write(str(tmp_dict) + "\n")
1529 
1530  # extract frame (open or link to existing)
1531  if k == 0:
1532  prots, rs = \
1533  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1534  self.model, rmf_frame_number, rmf_name)
1535  else:
1536  linking_successful = \
1537  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1538  self.model, prots, rs, rmf_frame_number,
1539  rmf_name)
1540  if not linking_successful:
1541  continue
1542  if not prots:
1543  continue
1544 
1545  states = IMP.atom.get_by_type(
1546  prots[0], IMP.atom.STATE_TYPE)
1547  prot = states[state_number]
1548  if k == 0:
1549  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1550 
1551  # transform clusters onto first
1552  if k > 0:
1553  co = self.cluster_obj
1554  model_index = co.get_model_index_from_name(
1555  structure_name)
1556  transformation = co.get_transformation_to_first_member(
1557  cl, model_index)
1558  rbs = set()
1559  for p in IMP.atom.get_leaves(prot):
1560  if not IMP.core.XYZR.get_is_setup(p):
1562  IMP.core.XYZR(p).set_radius(0.0001)
1563  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1564 
1566  rbm = IMP.core.RigidBodyMember(p)
1567  rb = rbm.get_rigid_body()
1568  rbs.add(rb)
1569  else:
1571  transformation)
1572  for rb in rbs:
1573  IMP.core.transform(rb, transformation)
1574 
1575  # add the density
1576  if density_custom_ranges:
1577  DensModule.add_subunits_density(prot)
1578 
1579  # pdb writing should be optimized!
1580  o = IMP.pmi.output.Output()
1581  self.model.update()
1582  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1583  o.write_pdb(dircluster + str(k) + ".pdb")
1584 
1585  # create a single-state System and write that
1587  IMP.Particle(self.model))
1588  h.set_name("System")
1589  h.add_child(prot)
1590  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1591  o.write_rmf(dircluster + str(k) + ".rmf3")
1592  o.close_rmf(dircluster + str(k) + ".rmf3")
1593 
1594  del o
1595  # IMP.atom.destroy(prot)
1596 
1597  if density_custom_ranges:
1598  DensModule.write_mrc(path=dircluster)
1599  del DensModule
1600 
1601  if self.number_of_processes > 1:
1602  self.comm.Barrier()
1603 
1604  def get_cluster_rmsd(self, cluster_num):
1605  if self.cluster_obj is None:
1606  raise Exception("Run clustering first")
1607  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1608 
1609  def save_objects(self, objects, file_name):
1610  import pickle
1611  outf = open(file_name, 'wb')
1612  pickle.dump(objects, outf)
1613  outf.close()
1614 
1615  def load_objects(self, file_name):
1616  import pickle
1617  inputf = open(file_name, 'rb')
1618  objects = pickle.load(inputf)
1619  inputf.close()
1620  return objects
1621 
1622 
1624 
1625  """
1626  This class contains analysis utilities to investigate ReplicaExchange
1627  results.
1628  """
1629 
1630  ########################
1631  # Construction and Setup
1632  ########################
1633 
1634  def __init__(self, model, stat_files, best_models=None, score_key=None,
1635  alignment=True):
1636  """
1637  Construction of the Class.
1638  @param model IMP.Model()
1639  @param stat_files list of string. Can be ascii stat files,
1640  rmf files names
1641  @param best_models Integer. Number of best scoring models,
1642  if None: all models will be read
1643  @param alignment boolean (Default=True). Align before computing
1644  the rmsd.
1645  """
1646 
1647  self.model = model
1648  self.best_models = best_models
1650  model, stat_files, self.best_models, score_key, cache=True)
1652  StatHierarchyHandler=self.stath0)
1653 
1654  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1656  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1658  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1659  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1660  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1661  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1662  self.clusters = []
1663  # fill the cluster list with a single cluster containing all models
1664  c = IMP.pmi.output.Cluster(0)
1665  self.clusters.append(c)
1666  for n0 in range(len(self.stath0)):
1667  c.add_member(n0)
1668  self.pairwise_rmsd = {}
1669  self.pairwise_molecular_assignment = {}
1670  self.alignment = alignment
1671  self.symmetric_molecules = {}
1672  self.issymmetricsel = {}
1673  self.update_seldicts()
1674  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1675  IMP.atom.get_leaves(self.stath0))
1676  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1677  IMP.atom.get_leaves(self.stath1))
1678 
1679  def set_rmsd_selection(self, **kwargs):
1680  """
1681  Setup the selection onto which the rmsd is computed
1682  @param kwargs use IMP.atom.Selection keywords
1683  """
1684  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1685  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1686  self.update_seldicts()
1687 
1688  def set_symmetric(self, molecule_name):
1689  """
1690  Store names of symmetric molecules
1691  """
1692  self.symmetric_molecules[molecule_name] = 0
1693  self.update_seldicts()
1694 
1695  def set_alignment_selection(self, **kwargs):
1696  """
1697  Setup the selection onto which the alignment is computed
1698  @param kwargs use IMP.atom.Selection keywords
1699  """
1700  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1701  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1702 
1703  ######################
1704  # Clustering functions
1705  ######################
1706  def clean_clusters(self):
1707  for c in self.clusters:
1708  del c
1709  self.clusters = []
1710 
1711  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1712  """
1713  Cluster the models based on RMSD.
1714  @param rmsd_cutoff Float the distance cutoff in Angstrom
1715  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1716  be used to compute rmsds
1717  """
1718  self.clean_clusters()
1719  not_clustered = set(range(len(self.stath1)))
1720  while len(not_clustered) > 0:
1721  self.aggregate(not_clustered, rmsd_cutoff, metric)
1722  self.update_clusters()
1723 
1724  def refine(self, rmsd_cutoff=10):
1725  """
1726  Refine the clusters by merging the ones whose centers are close
1727  @param rmsd_cutoff cutoff distance in Angstorms
1728  """
1729  clusters_copy = self.clusters
1730  for c0, c1 in itertools.combinations(self.clusters, 2):
1731  if c0.center_index is None:
1732  self.compute_cluster_center(c0)
1733  if c1.center_index is None:
1734  self.compute_cluster_center(c1)
1735  _ = self.stath0[c0.center_index]
1736  _ = self.stath1[c1.center_index]
1737  rmsd, molecular_assignment = self.rmsd()
1738  if rmsd <= rmsd_cutoff:
1739  if c1 in self.clusters:
1740  clusters_copy.remove(c1)
1741  c0 += c1
1742  self.clusters = clusters_copy
1743  self.update_clusters()
1744 
1745  ####################
1746  # Input Output
1747  ####################
1748 
1749  def set_cluster_assignments(self, cluster_ids):
1750  if len(cluster_ids) != len(self.stath0):
1751  raise ValueError('cluster ids has to be same length as '
1752  'number of frames')
1753 
1754  self.clusters = []
1755  for i in sorted(list(set(cluster_ids))):
1756  self.clusters.append(IMP.pmi.output.Cluster(i))
1757  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1758  self.clusters[idx].add_member(i, d)
1759 
1760  def get_cluster_data(self, cluster):
1761  """
1762  Return the model data from a cluster
1763  @param cluster IMP.pmi.output.Cluster object
1764  """
1765  data = []
1766  for m in cluster:
1767  data.append(m)
1768  return data
1769 
1770  def save_data(self, filename='data.pkl'):
1771  """
1772  Save the data for the whole models into a pickle file
1773  @param filename string
1774  """
1775  self.stath0.save_data(filename)
1776 
1777  def set_data(self, data):
1778  """
1779  Set the data from an external IMP.pmi.output.Data
1780  @param data IMP.pmi.output.Data
1781  """
1782  self.stath0.data = data
1783  self.stath1.data = data
1784 
1785  def load_data(self, filename='data.pkl'):
1786  """
1787  Load the data from an external pickled file
1788  @param filename string
1789  """
1790  self.stath0.load_data(filename)
1791  self.stath1.load_data(filename)
1792  self.best_models = len(self.stath0)
1793 
1794  def add_cluster(self, rmf_name_list):
1795  c = IMP.pmi.output.Cluster(len(self.clusters))
1796  print("creating cluster index "+str(len(self.clusters)))
1797  self.clusters.append(c)
1798  current_len = len(self.stath0)
1799 
1800  for rmf in rmf_name_list:
1801  print("adding rmf "+rmf)
1802  self.stath0.add_stat_file(rmf)
1803  self.stath1.add_stat_file(rmf)
1804 
1805  for n0 in range(current_len, len(self.stath0)):
1806  d0 = self.stath0[n0]
1807  c.add_member(n0, d0)
1808  self.update_clusters()
1809 
1810  def save_clusters(self, filename='clusters.pkl'):
1811  """
1812  Save the clusters into a pickle file
1813  @param filename string
1814  """
1815  import pickle
1816  fl = open(filename, 'wb')
1817  pickle.dump(self.clusters, fl)
1818 
1819  def load_clusters(self, filename='clusters.pkl', append=False):
1820  """
1821  Load the clusters from a pickle file
1822  @param filename string
1823  @param append bool (Default=False), if True. append the clusters
1824  to the ones currently present
1825  """
1826  import pickle
1827  fl = open(filename, 'rb')
1828  self.clean_clusters()
1829  if append:
1830  self.clusters += pickle.load(fl)
1831  else:
1832  self.clusters = pickle.load(fl)
1833  self.update_clusters()
1834 
1835  ####################
1836  # Analysis Functions
1837  ####################
1838 
1839  def compute_cluster_center(self, cluster):
1840  """
1841  Compute the cluster center for a given cluster
1842  """
1843  member_distance = defaultdict(float)
1844 
1845  for n0, n1 in itertools.combinations(cluster.members, 2):
1846  _ = self.stath0[n0]
1847  _ = self.stath1[n1]
1848  rmsd, _ = self.rmsd()
1849  member_distance[n0] += rmsd
1850 
1851  if len(member_distance) > 0:
1852  cluster.center_index = min(member_distance,
1853  key=member_distance.get)
1854  else:
1855  cluster.center_index = cluster.members[0]
1856 
1857  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1858  prefix="./"):
1859  """
1860  Save the coordinates of the current cluster a single rmf file
1861  """
1862  print("saving coordinates", cluster)
1863  if self.alignment:
1864  self.set_reference(reference, cluster)
1865  o = IMP.pmi.output.Output()
1866  if rmf_name is None:
1867  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1868 
1869  _ = self.stath1[cluster.members[0]]
1870  self.model.update()
1871  o.init_rmf(rmf_name, [self.stath1])
1872  for n1 in cluster.members:
1873  _ = self.stath1[n1]
1874  self.model.update()
1876  if self.alignment:
1877  self.align()
1878  o.write_rmf(rmf_name)
1880  o.close_rmf(rmf_name)
1881 
1882  def prune_redundant_structures(self, rmsd_cutoff=10):
1883  """
1884  remove structures that are similar
1885  append it to a new cluster
1886  """
1887  print("pruning models")
1888  selected = 0
1889  filtered = [selected]
1890  remaining = range(1, len(self.stath1), 10)
1891 
1892  while len(remaining) > 0:
1893  d0 = self.stath0[selected]
1894  rm = []
1895  for n1 in remaining:
1896  _ = self.stath1[n1]
1897  if self.alignment:
1898  self.align()
1899  d, _ = self.rmsd()
1900  if d <= rmsd_cutoff:
1901  rm.append(n1)
1902  print("pruning model %s, similar to model %s, rmsd %s"
1903  % (str(n1), str(selected), str(d)))
1904  remaining = [x for x in remaining if x not in rm]
1905  if len(remaining) == 0:
1906  break
1907  selected = remaining[0]
1908  filtered.append(selected)
1909  remaining.pop(0)
1910  c = IMP.pmi.output.Cluster(len(self.clusters))
1911  self.clusters.append(c)
1912  for n0 in filtered:
1913  d0 = self.stath0[n0]
1914  c.add_member(n0, d0)
1915  self.update_clusters()
1916 
1917  def precision(self, cluster):
1918  """
1919  Compute the precision of a cluster
1920  """
1921  npairs = 0
1922  rmsd = 0.0
1923  precision = None
1924 
1925  if cluster.center_index is not None:
1926  members1 = [cluster.center_index]
1927  else:
1928  members1 = cluster.members
1929 
1930  for n0 in members1:
1931  _ = self.stath0[n0]
1932  for n1 in cluster.members:
1933  if n0 != n1:
1934  npairs += 1
1935  _ = self.stath1[n1]
1937  tmp_rmsd, _ = self.rmsd()
1938  rmsd += tmp_rmsd
1940 
1941  if npairs > 0:
1942  precision = rmsd/npairs
1943  cluster.precision = precision
1944  return precision
1945 
1946  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1947  """
1948  Compute the bipartite precision (ie the cross-precision)
1949  between two clusters
1950  """
1951  npairs = 0
1952  rmsd = 0.0
1953  for cn0, n0 in enumerate(cluster1.members):
1954  _ = self.stath0[n0]
1955  for cn1, n1 in enumerate(cluster2.members):
1956  _ = self.stath1[n1]
1957  tmp_rmsd, _ = self.rmsd()
1958  if verbose:
1959  print("--- rmsd between structure %s and structure "
1960  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1961  rmsd += tmp_rmsd
1962  npairs += 1
1963  precision = rmsd/npairs
1964  return precision
1965 
1966  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1967  cluster_ref=None, step=1):
1968  """
1969  Compute the Root mean square fluctuations
1970  of a molecule in a cluster
1971  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1972  residue indexes and the value is the rmsf
1973  """
1974  rmsf = IMP.pmi.tools.OrderedDict()
1975 
1976  # assumes that residue indexes are identical for stath0 and stath1
1977  if cluster_ref is not None:
1978  if cluster_ref.center_index is not None:
1979  members0 = [cluster_ref.center_index]
1980  else:
1981  members0 = cluster_ref.members
1982  else:
1983  if cluster.center_index is not None:
1984  members0 = [cluster.center_index]
1985  else:
1986  members0 = cluster.members
1987 
1988  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
1989  copy_index=copy_index, state_index=state_index)
1990  ps0 = s0.get_selected_particles()
1991  # get the residue indexes
1992  residue_indexes = list(IMP.pmi.tools.OrderedSet(
1993  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1994 
1995  # get the corresponding particles
1996  npairs = 0
1997  for n0 in members0:
1998  d0 = self.stath0[n0]
1999  for n1 in cluster.members[::step]:
2000  if n0 != n1:
2001  print("--- rmsf %s %s" % (str(n0), str(n1)))
2003 
2004  s1 = IMP.atom.Selection(
2005  self.stath1, molecule=molecule,
2006  residue_indexes=residue_indexes, resolution=1,
2007  copy_index=copy_index, state_index=state_index)
2008  ps1 = s1.get_selected_particles()
2009 
2010  d1 = self.stath1[n1]
2011  if self.alignment:
2012  self.align()
2013  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
2014  r = residue_indexes[n]
2015  d0 = IMP.core.XYZ(p0)
2016  d1 = IMP.core.XYZ(p1)
2017  if r in rmsf:
2018  rmsf[r] += IMP.core.get_distance(d0, d1)
2019  else:
2020  rmsf[r] = IMP.core.get_distance(d0, d1)
2021  npairs += 1
2023  for r in rmsf:
2024  rmsf[r] /= npairs
2025 
2026  for stath in [self.stath0, self.stath1]:
2027  if molecule not in self.symmetric_molecules:
2028  s = IMP.atom.Selection(
2029  stath, molecule=molecule, residue_index=r,
2030  resolution=1, copy_index=copy_index,
2031  state_index=state_index)
2032  else:
2033  s = IMP.atom.Selection(
2034  stath, molecule=molecule, residue_index=r,
2035  resolution=1, state_index=state_index)
2036 
2037  ps = s.get_selected_particles()
2038  for p in ps:
2040  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2041  else:
2043 
2044  return rmsf
2045 
2046  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2047  reference="Absolute", prefix="./", step=1):
2048  if self.alignment:
2049  self.set_reference(reference, cluster)
2050  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2051  voxel=voxel_size)
2052 
2053  for n1 in cluster.members[::step]:
2054  print("density "+str(n1))
2055  _ = self.stath1[n1]
2057  if self.alignment:
2058  self.align()
2059  dens.add_subunits_density(self.stath1)
2061  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2062  del dens
2063 
2064  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2065  consolidate=False, molecules=None, prefix='./',
2066  reference="Absolute"):
2067  if self.alignment:
2068  self.set_reference(reference, cluster)
2069  import numpy as np
2070  import matplotlib.pyplot as plt
2071  import matplotlib.cm as cm
2072  from scipy.spatial.distance import cdist
2073  import IMP.pmi.topology
2074  if molecules is None:
2076  for mol in IMP.pmi.tools.get_molecules(
2077  IMP.atom.get_leaves(self.stath1))]
2078  else:
2080  for mol in IMP.pmi.tools.get_molecules(
2082  self.stath1,
2083  molecules=molecules).get_selected_particles())]
2084  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2085  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2086  total_len_unique = sum(max(mol.get_residue_indexes())
2087  for mol in unique_copies)
2088 
2089  index_dict = {}
2090  prev_stop = 0
2091 
2092  if not consolidate:
2093  for mol in mols:
2094  seqlen = max(mol.get_residue_indexes())
2095  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2096  prev_stop += seqlen
2097 
2098  else:
2099  for mol in unique_copies:
2100  seqlen = max(mol.get_residue_indexes())
2101  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2102  prev_stop += seqlen
2103 
2104  for ncl, n1 in enumerate(cluster.members):
2105  print(ncl)
2106  _ = self.stath1[n1]
2107  coord_dict = IMP.pmi.tools.OrderedDict()
2108  for mol in mols:
2109  rindexes = mol.get_residue_indexes()
2110  coords = np.ones((max(rindexes), 3))
2111  for rnum in rindexes:
2112  sel = IMP.atom.Selection(mol, residue_index=rnum,
2113  resolution=1)
2114  selpart = sel.get_selected_particles()
2115  if len(selpart) == 0:
2116  continue
2117  selpart = selpart[0]
2118  coords[rnum - 1, :] = \
2119  IMP.core.XYZ(selpart).get_coordinates()
2120  coord_dict[mol] = coords
2121 
2122  if not consolidate:
2123  coords = np.concatenate(list(coord_dict.values()))
2124  dists = cdist(coords, coords)
2125  binary_dists = np.where((dists <= contact_threshold)
2126  & (dists >= 1.0), 1.0, 0.0)
2127  else:
2128  binary_dists_dict = {}
2129  for mol1 in mols:
2130  len1 = max(mol1.get_residue_indexes())
2131  for mol2 in mols:
2132  name1 = mol1.get_name()
2133  name2 = mol2.get_name()
2134  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2135  if (name1, name2) not in binary_dists_dict:
2136  binary_dists_dict[(name1, name2)] = \
2137  np.zeros((len1, len1))
2138  binary_dists_dict[(name1, name2)] += \
2139  np.where((dists <= contact_threshold)
2140  & (dists >= 1.0), 1.0, 0.0)
2141  binary_dists = np.zeros((total_len_unique, total_len_unique))
2142 
2143  for name1, name2 in binary_dists_dict:
2144  r1 = index_dict[mol_names_unique[name1]]
2145  r2 = index_dict[mol_names_unique[name2]]
2146  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2147  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2148  1.0, 0.0)
2149 
2150  if ncl == 0:
2151  dist_maps = [dists]
2152  av_dist_map = dists
2153  contact_freqs = binary_dists
2154  else:
2155  dist_maps.append(dists)
2156  av_dist_map += dists
2157  contact_freqs += binary_dists
2158 
2159  if log_scale:
2160  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2161  else:
2162  contact_freqs = 1.0/len(cluster)*contact_freqs
2163  av_dist_map = 1.0/len(cluster)*contact_freqs
2164 
2165  fig = plt.figure(figsize=(100, 100))
2166  ax = fig.add_subplot(111)
2167  ax.set_xticks([])
2168  ax.set_yticks([])
2169  gap_between_components = 50
2170  colormap = cm.Blues
2171  colornorm = None
2172 
2173  if not consolidate:
2174  sorted_tuple = sorted(
2176  mol).get_extended_name(), mol) for mol in mols)
2177  prot_list = list(zip(*sorted_tuple))[1]
2178  else:
2179  sorted_tuple = sorted(
2180  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2181  for mol in unique_copies)
2182  prot_list = list(zip(*sorted_tuple))[1]
2183 
2184  prot_listx = prot_list
2185  nresx = gap_between_components + \
2186  sum([max(mol.get_residue_indexes())
2187  + gap_between_components for mol in prot_listx])
2188 
2189  # set the list of proteins on the y axis
2190  prot_listy = prot_list
2191  nresy = gap_between_components + \
2192  sum([max(mol.get_residue_indexes())
2193  + gap_between_components for mol in prot_listy])
2194 
2195  # this is the residue offset for each protein
2196  resoffsetx = {}
2197  resendx = {}
2198  res = gap_between_components
2199  for mol in prot_listx:
2200  resoffsetx[mol] = res
2201  res += max(mol.get_residue_indexes())
2202  resendx[mol] = res
2203  res += gap_between_components
2204 
2205  resoffsety = {}
2206  resendy = {}
2207  res = gap_between_components
2208  for mol in prot_listy:
2209  resoffsety[mol] = res
2210  res += max(mol.get_residue_indexes())
2211  resendy[mol] = res
2212  res += gap_between_components
2213 
2214  resoffsetdiagonal = {}
2215  res = gap_between_components
2216  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2217  resoffsetdiagonal[mol] = res
2218  res += max(mol.get_residue_indexes())
2219  res += gap_between_components
2220 
2221  # plot protein boundaries
2222  xticks = []
2223  xlabels = []
2224  for n, prot in enumerate(prot_listx):
2225  res = resoffsetx[prot]
2226  end = resendx[prot]
2227  for proty in prot_listy:
2228  resy = resoffsety[proty]
2229  endy = resendy[proty]
2230  ax.plot([res, res], [resy, endy], linestyle='-',
2231  color='gray', lw=0.4)
2232  ax.plot([end, end], [resy, endy], linestyle='-',
2233  color='gray', lw=0.4)
2234  xticks.append((float(res) + float(end)) / 2)
2236  prot).get_extended_name())
2237 
2238  yticks = []
2239  ylabels = []
2240  for n, prot in enumerate(prot_listy):
2241  res = resoffsety[prot]
2242  end = resendy[prot]
2243  for protx in prot_listx:
2244  resx = resoffsetx[protx]
2245  endx = resendx[protx]
2246  ax.plot([resx, endx], [res, res], linestyle='-',
2247  color='gray', lw=0.4)
2248  ax.plot([resx, endx], [end, end], linestyle='-',
2249  color='gray', lw=0.4)
2250  yticks.append((float(res) + float(end)) / 2)
2252  prot).get_extended_name())
2253 
2254  # plot the contact map
2255 
2256  tmp_array = np.zeros((nresx, nresy))
2257  ret = {}
2258  for px in prot_listx:
2259  for py in prot_listy:
2260  resx = resoffsetx[px]
2261  lengx = resendx[px] - 1
2262  resy = resoffsety[py]
2263  lengy = resendy[py] - 1
2264  indexes_x = index_dict[px]
2265  minx = min(indexes_x)
2266  maxx = max(indexes_x)
2267  indexes_y = index_dict[py]
2268  miny = min(indexes_y)
2269  maxy = max(indexes_y)
2270  tmp_array[resx:lengx, resy:lengy] = \
2271  contact_freqs[minx:maxx, miny:maxy]
2272  ret[(px, py)] = np.argwhere(
2273  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2274 
2275  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2276  origin='lower', alpha=0.6, interpolation='nearest')
2277 
2278  ax.set_xticks(xticks)
2279  ax.set_xticklabels(xlabels, rotation=90)
2280  ax.set_yticks(yticks)
2281  ax.set_yticklabels(ylabels)
2282  plt.setp(ax.get_xticklabels(), fontsize=6)
2283  plt.setp(ax.get_yticklabels(), fontsize=6)
2284 
2285  # display and write to file
2286  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2287  [i.set_linewidth(2.0) for i in ax.spines.values()]
2288 
2289  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2290  dpi=300, transparent="False")
2291  return ret
2292 
2293  def plot_rmsd_matrix(self, filename):
2294  self.compute_all_pairwise_rmsd()
2295  distance_matrix = np.zeros(
2296  (len(self.stath0), len(self.stath1)))
2297  for (n0, n1) in self.pairwise_rmsd:
2298  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2299 
2300  import matplotlib as mpl
2301  mpl.use('Agg')
2302  import matplotlib.pylab as pl
2303  from scipy.cluster import hierarchy as hrc
2304 
2305  fig = pl.figure(figsize=(10, 8))
2306  ax = fig.add_subplot(212)
2307  dendrogram = hrc.dendrogram(
2308  hrc.linkage(distance_matrix),
2309  color_threshold=7,
2310  no_labels=True)
2311  leaves_order = dendrogram['leaves']
2312  ax.set_xlabel('Model')
2313  ax.set_ylabel('RMSD [Angstroms]')
2314 
2315  ax2 = fig.add_subplot(221)
2316  cax = ax2.imshow(
2317  distance_matrix[leaves_order, :][:, leaves_order],
2318  interpolation='nearest')
2319  cb = fig.colorbar(cax)
2320  cb.set_label('RMSD [Angstroms]')
2321  ax2.set_xlabel('Model')
2322  ax2.set_ylabel('Model')
2323 
2324  pl.savefig(filename, dpi=300)
2325  pl.close(fig)
2326 
2327  ####################
2328  # Internal Functions
2329  ####################
2330 
2331  def update_clusters(self):
2332  """
2333  Update the cluster id numbers
2334  """
2335  for n, c in enumerate(self.clusters):
2336  c.cluster_id = n
2337 
2338  def get_molecule(self, hier, name, copy):
2339  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2340  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2341 
2342  def update_seldicts(self):
2343  """
2344  Update the seldicts
2345  """
2346  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2347  self.sel0_rmsd.get_selected_particles())
2348  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2349  self.sel1_rmsd.get_selected_particles())
2350  for mol in self.seldict0:
2351  for sel in self.seldict0[mol]:
2352  self.issymmetricsel[sel] = False
2353  for mol in self.symmetric_molecules:
2354  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2355  for sel in self.seldict0[mol]:
2356  self.issymmetricsel[sel] = True
2357 
2358  def align(self):
2360  self.sel1_alignment, self.sel0_alignment)
2361 
2362  for rb in self.rbs1:
2363  IMP.core.transform(rb, tr)
2364 
2365  for bead in self.beads1:
2366  try:
2367  IMP.core.transform(IMP.core.XYZ(bead), tr)
2368  except: # noqa: E722
2369  continue
2370 
2371  self.model.update()
2372 
2373  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2374  '''
2375  initial filling of the clusters.
2376  '''
2377  n0 = idxs.pop()
2378  print("clustering model "+str(n0))
2379  d0 = self.stath0[n0]
2380  c = IMP.pmi.output.Cluster(len(self.clusters))
2381  print("creating cluster index "+str(len(self.clusters)))
2382  self.clusters.append(c)
2383  c.add_member(n0, d0)
2384  clustered = set([n0])
2385  for n1 in idxs:
2386  print("--- trying to add model " + str(n1) + " to cluster "
2387  + str(len(self.clusters)))
2388  d1 = self.stath1[n1]
2389  if self.alignment:
2390  self.align()
2391  rmsd, _ = self.rmsd(metric=metric)
2392  if rmsd < rmsd_cutoff:
2393  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2394  c.add_member(n1, d1)
2395  clustered.add(n1)
2396  else:
2397  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2398  idxs -= clustered
2399 
2400  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2401  """
2402  merge the clusters that have close members
2403  @param rmsd_cutoff cutoff distance in Angstorms
2404  """
2405  # before merging, clusters are spheres of radius rmsd_cutoff
2406  # centered on the 1st element
2407  # here we only try to merge clusters whose centers are closer
2408  # than 2*rmsd_cutoff
2409  to_merge = []
2410  print("merging...")
2411  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2412  itertools.combinations(self.clusters, 2)):
2413  n0, n1 = [c.members[0] for c in (c0, c1)]
2414  _ = self.stath0[n0]
2415  _ = self.stath1[n1]
2416  rmsd, _ = self.rmsd()
2417  if (rmsd < 2*rmsd_cutoff and
2418  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2419  to_merge.append((c0, c1))
2420 
2421  for c0, c in reversed(to_merge):
2422  self.merge(c0, c)
2423 
2424  # keep only full clusters
2425  self.clusters = [c for c in
2426  filter(lambda x: len(x.members) > 0, self.clusters)]
2427 
2428  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2429  '''
2430  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2431  '''
2432  print("check close members for clusters " + str(c0.cluster_id) +
2433  " and " + str(c1.cluster_id))
2434  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2435  _ = self.stath0[n0]
2436  _ = self.stath1[n1]
2437  rmsd, _ = self.rmsd(metric=metric)
2438  if rmsd < rmsd_cutoff:
2439  return True
2440 
2441  return False
2442 
2443  def merge(self, c0, c1):
2444  '''
2445  merge two clusters
2446  '''
2447  c0 += c1
2448  c1.members = []
2449  c1.data = {}
2450 
2451  def rmsd_helper(self, sels0, sels1, metric):
2452  '''
2453  a function that returns the permutation best_sel of sels0 that
2454  minimizes metric
2455  '''
2456  best_rmsd2 = float('inf')
2457  best_sel = None
2458  if self.issymmetricsel[sels0[0]]:
2459  # this cases happens when symmetries were defined
2460  N = len(sels0)
2461  for offset in range(N):
2462  sels = [sels0[(offset+i) % N] for i in range(N)]
2463  sel0 = sels[0]
2464  sel1 = sels1[0]
2465  r = metric(sel0, sel1)
2466  rmsd2 = r*r*N
2467  if rmsd2 < best_rmsd2:
2468  best_rmsd2 = rmsd2
2469  best_sel = sels
2470  else:
2471  for sels in itertools.permutations(sels0):
2472  rmsd2 = 0.0
2473  for sel0, sel1 in itertools.takewhile(
2474  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2475  r = metric(sel0, sel1)
2476  rmsd2 += r*r
2477  if rmsd2 < best_rmsd2:
2478  best_rmsd2 = rmsd2
2479  best_sel = sels
2480  return best_sel, best_rmsd2
2481 
2482  def compute_all_pairwise_rmsd(self):
2483  for d0 in self.stath0:
2484  for d1 in self.stath1:
2485  rmsd, _ = self.rmsd()
2486 
2487  def rmsd(self, metric=IMP.atom.get_rmsd):
2488  '''
2489  Computes the RMSD. Resolves ambiguous pairs assignments
2490  '''
2491  # here we memoize the rmsd and molecular assignment so that it's
2492  # not done multiple times
2493  n0 = self.stath0.current_index
2494  n1 = self.stath1.current_index
2495  if ((n0, n1) in self.pairwise_rmsd) \
2496  and ((n0, n1) in self.pairwise_molecular_assignment):
2497  return (self.pairwise_rmsd[(n0, n1)],
2498  self.pairwise_molecular_assignment[(n0, n1)])
2499 
2500  if self.alignment:
2501  self.align()
2502  # if it's not yet memoized
2503  total_rmsd = 0.0
2504  total_N = 0
2505  # this is a dictionary which keys are the molecule names, and values
2506  # are the list of IMP.atom.Selection for all molecules that share
2507  # the molecule name
2508  molecular_assignment = {}
2509  for molname, sels0 in self.seldict0.items():
2510  sels_best_order, best_rmsd2 = \
2511  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2512 
2513  Ncoords = len(sels_best_order[0].get_selected_particles())
2514  Ncopies = len(self.seldict1[molname])
2515  total_rmsd += Ncoords*best_rmsd2
2516  total_N += Ncoords*Ncopies
2517 
2518  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2519  p0 = sel0.get_selected_particles()[0]
2520  p1 = sel1.get_selected_particles()[0]
2521  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2522  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2523  c0 = IMP.atom.Copy(m0).get_copy_index()
2524  c1 = IMP.atom.Copy(m1).get_copy_index()
2525  molecular_assignment[(molname, c0)] = (molname, c1)
2526 
2527  total_rmsd = math.sqrt(total_rmsd/total_N)
2528 
2529  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2530  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2531  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2532  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2533  return total_rmsd, molecular_assignment
2534 
2535  def set_reference(self, reference, cluster):
2536  """
2537  Fix the reference structure for structural alignment, rmsd and
2538  chain assignment
2539 
2540  @param reference can be either "Absolute" (cluster center of the
2541  first cluster) or Relative (cluster center of the current
2542  cluster)
2543  """
2544  if reference == "Absolute":
2545  _ = self.stath0[0]
2546  elif reference == "Relative":
2547  if cluster.center_index:
2548  n0 = cluster.center_index
2549  else:
2550  n0 = cluster.members[0]
2551  _ = self.stath0[n0]
2552 
2554  """
2555  compute the molecular assignments between multiple copies
2556  of the same sequence. It changes the Copy index of Molecules
2557  """
2558  _ = self.stath1[n1]
2559  _, molecular_assignment = self.rmsd()
2560  for (m0, c0), (m1, c1) in molecular_assignment.items():
2561  mol0 = self.molcopydict0[m0][c0]
2562  mol1 = self.molcopydict1[m1][c1]
2563  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2564  p1 = IMP.atom.Copy(mol1).get_particle()
2565  p1.set_value(cik0, c0)
2566 
2568  """
2569  Undo the Copy index assignment
2570  """
2571  _ = self.stath1[n1]
2572  _, molecular_assignment = self.rmsd()
2573  for (m0, c0), (m1, c1) in molecular_assignment.items():
2574  mol0 = self.molcopydict0[m0][c0]
2575  mol1 = self.molcopydict1[m1][c1]
2576  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2577  p1 = IMP.atom.Copy(mol1).get_particle()
2578  p1.set_value(cik0, c1)
2579 
2580  ####################
2581  # Container Functions
2582  ####################
2583 
2584  def __repr__(self):
2585  s = "AnalysisReplicaExchange\n"
2586  s += "---- number of clusters %s \n" % str(len(self.clusters))
2587  s += "---- number of models %s \n" % str(len(self.stath0))
2588  return s
2589 
2590  def __getitem__(self, int_slice_adaptor):
2591  if isinstance(int_slice_adaptor, int):
2592  return self.clusters[int_slice_adaptor]
2593  elif isinstance(int_slice_adaptor, slice):
2594  return self.__iter__(int_slice_adaptor)
2595  else:
2596  raise TypeError("Unknown Type")
2597 
2598  def __len__(self):
2599  return len(self.clusters)
2600 
2601  def __iter__(self, slice_key=None):
2602  if slice_key is None:
2603  for i in range(len(self)):
2604  yield self[i]
2605  else:
2606  for i in range(len(self))[slice_key]:
2607  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2487
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: macros.py:2535
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1819
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1062
def precision
Compute the precision of a cluster.
Definition: macros.py:1917
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:937
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:104
A container for models organized into clusters.
Definition: output.py:1506
Sample using molecular dynamics.
Definition: samplers.py:326
def aggregate
initial filling of the clusters.
Definition: macros.py:2373
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634
A macro to help setup and run replica exchange.
Definition: macros.py:65
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1882
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1966
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1159
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1679
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1760
def __init__
Construction of the Class.
Definition: macros.py:1634
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:841
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1777
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2567
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1695
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2451
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1857
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1080
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: macros.py:2553
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1623
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:630
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2400
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:989
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1839
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1000
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:137
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:21
def update_seldicts
Update the seldicts.
Definition: macros.py:2342
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2331
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:542
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1724
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199
Collect timing information.
Definition: tools.py:119
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1688
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9518
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:46
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2443
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:683
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1253
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1770
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:852
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1946
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:659
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1711
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1810
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2428
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1135
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1785
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:499
Sample using replica exchange.
Definition: samplers.py:431
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27