IMP logo
IMP Reference Guide  2.21.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import os
17 try:
18  from pathlib import Path
19 except ImportError: # Use bundled pathlib on Python 2 without pathlib
20  from IMP._compat_pathlib import Path
21 import glob
22 from operator import itemgetter
23 from collections import defaultdict
24 import numpy as np
25 import itertools
26 import warnings
27 import math
28 
29 import pickle
30 
31 
32 class _MockMPIValues(object):
33  """Replace samplers.MPI_values when in test mode"""
34  def get_percentile(self, name):
35  return 0.
36 
37 
38 class _RMFRestraints(object):
39  """All restraints that are written out to the RMF file"""
40  def __init__(self, model, user_restraints):
41  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
42  self._user_restraints = user_restraints if user_restraints else []
43 
44  def __len__(self):
45  return (len(self._user_restraints)
46  + self._rmf_rs.get_number_of_restraints())
47 
48  def __bool__(self):
49  return len(self) > 0
50  __nonzero__ = __bool__ # Python 2 compatibility
51 
52  def __getitem__(self, i):
53  class FakePMIWrapper(object):
54  def __init__(self, r):
55  self.r = IMP.RestraintSet.get_from(r)
56 
57  def get_restraint(self):
58  return self.r
59 
60  lenuser = len(self._user_restraints)
61  if 0 <= i < lenuser:
62  return self._user_restraints[i]
63  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
64  r = self._rmf_rs.get_restraint(i - lenuser)
65  return FakePMIWrapper(r)
66  else:
67  raise IndexError("Out of range")
68 
69 
70 class ReplicaExchange(object):
71  """A macro to help setup and run replica exchange.
72  Supports Monte Carlo and molecular dynamics.
73  Produces trajectory RMF files, best PDB structures,
74  and output stat files.
75  """
76  def __init__(self, model, root_hier,
77  monte_carlo_sample_objects=None,
78  molecular_dynamics_sample_objects=None,
79  output_objects=[],
80  rmf_output_objects=None,
81  monte_carlo_temperature=1.0,
82  simulated_annealing=False,
83  simulated_annealing_minimum_temperature=1.0,
84  simulated_annealing_maximum_temperature=2.5,
85  simulated_annealing_minimum_temperature_nframes=100,
86  simulated_annealing_maximum_temperature_nframes=100,
87  replica_exchange_minimum_temperature=1.0,
88  replica_exchange_maximum_temperature=2.5,
89  replica_exchange_swap=True,
90  num_sample_rounds=1,
91  number_of_best_scoring_models=500,
92  monte_carlo_steps=10,
93  self_adaptive=False,
94  molecular_dynamics_steps=10,
95  molecular_dynamics_max_time_step=1.0,
96  number_of_frames=1000,
97  save_coordinates_mode="lowest_temperature",
98  nframes_write_coordinates=1,
99  write_initial_rmf=True,
100  initial_rmf_name_suffix="initial",
101  stat_file_name_suffix="stat",
102  best_pdb_name_suffix="model",
103  mmcif=False,
104  do_clean_first=True,
105  do_create_directories=True,
106  global_output_directory="./",
107  rmf_dir="rmfs/",
108  best_pdb_dir="pdbs/",
109  replica_stat_file_suffix="stat_replica",
110  em_object_for_rmf=None,
111  atomistic=False,
112  replica_exchange_object=None,
113  test_mode=False,
114  score_moved=False,
115  use_nestor=False,
116  nestor_restraints=None,
117  nestor_rmf_fname_prefix="nested",):
118  """Constructor.
119  @param model The IMP model
120  @param root_hier Top-level (System)hierarchy
121  @param monte_carlo_sample_objects Objects for MC sampling, which
122  should generally be a simple list of Mover objects, e.g.
123  from DegreesOfFreedom.get_movers().
124  @param molecular_dynamics_sample_objects Objects for MD sampling,
125  which should generally be a simple list of particles.
126  @param output_objects A list of structural objects and restraints
127  that will be included in output (ie, statistics "stat"
128  files). Any object that provides a get_output() method
129  can be used here. If None is passed
130  the macro will not write stat files.
131  @param rmf_output_objects A list of structural objects and
132  restraints that will be included in rmf. Any object
133  that provides a get_output() method can be used here.
134  @param monte_carlo_temperature MC temp (may need to be optimized
135  based on post-sampling analysis)
136  @param simulated_annealing If True, perform simulated annealing
137  @param simulated_annealing_minimum_temperature Should generally be
138  the same as monte_carlo_temperature.
139  @param simulated_annealing_minimum_temperature_nframes Number of
140  frames to compute at minimum temperature.
141  @param simulated_annealing_maximum_temperature_nframes Number of
142  frames to compute at
143  temps > simulated_annealing_maximum_temperature.
144  @param replica_exchange_minimum_temperature Low temp for REX; should
145  generally be the same as monte_carlo_temperature.
146  @param replica_exchange_maximum_temperature High temp for REX
147  @param replica_exchange_swap Boolean, enable disable temperature
148  swap (Default=True)
149  @param num_sample_rounds Number of rounds of MC/MD per cycle
150  @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
151  models to keep around for analysis.
152  @param mmcif If True, write best scoring models in mmCIF format;
153  if False (the default), write in legacy PDB format.
154  @param best_pdb_dir The directory under `global_output_directory`
155  where best-scoring PDB/mmCIF files are written.
156  @param best_pdb_name_suffix Part of the file name for best-scoring
157  PDB/mmCIF files.
158  @param monte_carlo_steps Number of MC steps per round
159  @param self_adaptive self adaptive scheme for Monte Carlo movers
160  @param molecular_dynamics_steps Number of MD steps per round
161  @param molecular_dynamics_max_time_step Max time step for MD
162  @param number_of_frames Number of REX frames to run
163  @param save_coordinates_mode string: how to save coordinates.
164  "lowest_temperature" (default) only the lowest temperatures
165  is saved
166  "25th_score" all replicas whose score is below the 25th
167  percentile
168  "50th_score" all replicas whose score is below the 50th
169  percentile
170  "75th_score" all replicas whose score is below the 75th
171  percentile
172  @param nframes_write_coordinates How often to write the coordinates
173  of a frame
174  @param write_initial_rmf Write the initial configuration
175  @param global_output_directory Folder that will be created to house
176  output.
177  @param test_mode Set to True to avoid writing any files, just test
178  one frame.
179  @param score_moved If True, attempt to speed up Monte Carlo
180  sampling by caching scoring function terms on particles
181  that didn't move.
182  @param use_nestor If True, follows the Nested Sampling workflow
183  of the NestOR module and skips writing stat files and
184  replica stat files.
185  @param nestor_restraints A list of restraints for which
186  likelihoods are to be computed for use by NestOR module.
187  @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
188  files generated by NestOR .
189  """
190  self.model = model
191  self.vars = {}
192 
193  # add check hierarchy is multistate
194  if output_objects == []:
195  # The "[]" in the default parameters is a global object, so make
196  # our own copy here
197  self.output_objects = []
198  else:
199  self.output_objects = output_objects
200  self.rmf_output_objects = rmf_output_objects
201  if (isinstance(root_hier, IMP.atom.Hierarchy)
202  and not root_hier.get_parent()):
203  if self.output_objects is not None:
204  self.output_objects.append(
205  IMP.pmi.io.TotalScoreOutput(self.model))
206  if self.rmf_output_objects is not None:
207  self.rmf_output_objects.append(
208  IMP.pmi.io.TotalScoreOutput(self.model))
209  self.root_hier = root_hier
210  states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
211  self.vars["number_of_states"] = len(states)
212  if len(states) > 1:
213  self.root_hiers = states
214  self.is_multi_state = True
215  else:
216  self.root_hier = root_hier
217  self.is_multi_state = False
218  else:
219  raise TypeError("Must provide System hierarchy (root_hier)")
220 
221  self._rmf_restraints = _RMFRestraints(model, None)
222  self.em_object_for_rmf = em_object_for_rmf
223  self.monte_carlo_sample_objects = monte_carlo_sample_objects
224  self.vars["self_adaptive"] = self_adaptive
225  self.molecular_dynamics_sample_objects = \
226  molecular_dynamics_sample_objects
227  self.replica_exchange_object = replica_exchange_object
228  self.molecular_dynamics_max_time_step = \
229  molecular_dynamics_max_time_step
230  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
231  self.vars["replica_exchange_minimum_temperature"] = \
232  replica_exchange_minimum_temperature
233  self.vars["replica_exchange_maximum_temperature"] = \
234  replica_exchange_maximum_temperature
235  self.vars["replica_exchange_swap"] = replica_exchange_swap
236  self.vars["simulated_annealing"] = simulated_annealing
237  self.vars["simulated_annealing_minimum_temperature"] = \
238  simulated_annealing_minimum_temperature
239  self.vars["simulated_annealing_maximum_temperature"] = \
240  simulated_annealing_maximum_temperature
241  self.vars["simulated_annealing_minimum_temperature_nframes"] = \
242  simulated_annealing_minimum_temperature_nframes
243  self.vars["simulated_annealing_maximum_temperature_nframes"] = \
244  simulated_annealing_maximum_temperature_nframes
245 
246  self.vars["num_sample_rounds"] = num_sample_rounds
247  self.vars[
248  "number_of_best_scoring_models"] = number_of_best_scoring_models
249  self.vars["monte_carlo_steps"] = monte_carlo_steps
250  self.vars["molecular_dynamics_steps"] = molecular_dynamics_steps
251  self.vars["number_of_frames"] = number_of_frames
252  if save_coordinates_mode not in ("lowest_temperature", "25th_score",
253  "50th_score", "75th_score"):
254  raise Exception("save_coordinates_mode has unrecognized value")
255  else:
256  self.vars["save_coordinates_mode"] = save_coordinates_mode
257  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
258  self.vars["write_initial_rmf"] = write_initial_rmf
259  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
260  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
261  self.vars["mmcif"] = mmcif
262  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
263  self.vars["do_clean_first"] = do_clean_first
264  self.vars["do_create_directories"] = do_create_directories
265  self.vars["global_output_directory"] = global_output_directory
266  self.vars["rmf_dir"] = rmf_dir
267  self.vars["best_pdb_dir"] = best_pdb_dir
268  self.vars["atomistic"] = atomistic
269  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
270  self.vars["geometries"] = None
271  self.test_mode = test_mode
272  self.score_moved = score_moved
273  self.nest = use_nestor
274  self.nestor_restraints = nestor_restraints
275  self.nestor_rmf_fname = nestor_rmf_fname_prefix
276 
277  def add_geometries(self, geometries):
278  if self.vars["geometries"] is None:
279  self.vars["geometries"] = list(geometries)
280  else:
281  self.vars["geometries"].extend(geometries)
282 
283  def show_info(self):
284  print("ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
285  "rmfs/*.rmf3 for each replica ")
286  print("--- it stores the best scoring pdb models in pdbs/")
287  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
288  "lowest temperature")
289  print("--- variables:")
290  keys = list(self.vars.keys())
291  keys.sort()
292  for v in keys:
293  print("------", v.ljust(30), self.vars[v])
294  print("Use nestor: ", self.nest)
295 
296  def get_replica_exchange_object(self):
297  return self.replica_exchange_object
298 
299  def _add_provenance(self, sampler_md, sampler_mc):
300  """Record details about the sampling in the IMP Hierarchies"""
301  iterations = 0
302  if sampler_md:
303  method = "Molecular Dynamics"
304  iterations += self.vars["molecular_dynamics_steps"]
305  if sampler_mc:
306  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
307  iterations += self.vars["monte_carlo_steps"]
308  # If no sampling is actually done, no provenance to write
309  if iterations == 0 or self.vars["number_of_frames"] == 0:
310  return
311  iterations *= self.vars["num_sample_rounds"]
312 
313  pi = self.model.add_particle("sampling")
315  self.model, pi, method, self.vars["number_of_frames"],
316  iterations)
317  p.set_number_of_replicas(
318  self.replica_exchange_object.get_number_of_replicas())
319  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
320  IMP.core.add_provenance(self.model, self.root_hier, p)
321 
322  def execute_macro(self):
323  temp_index_factor = 100000.0
324  samplers = []
325  sampler_mc = None
326  sampler_md = None
327  if self.monte_carlo_sample_objects is not None:
328  print("Setting up MonteCarlo")
329  sampler_mc = IMP.pmi.samplers.MonteCarlo(
330  self.model, self.monte_carlo_sample_objects,
331  self.vars["monte_carlo_temperature"],
332  score_moved=self.score_moved)
333  if self.vars["simulated_annealing"]:
334  tmin = self.vars["simulated_annealing_minimum_temperature"]
335  tmax = self.vars["simulated_annealing_maximum_temperature"]
336  nfmin = self.vars[
337  "simulated_annealing_minimum_temperature_nframes"]
338  nfmax = self.vars[
339  "simulated_annealing_maximum_temperature_nframes"]
340  sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
341  if self.vars["self_adaptive"]:
342  sampler_mc.set_self_adaptive(
343  isselfadaptive=self.vars["self_adaptive"])
344  if self.output_objects is not None:
345  self.output_objects.append(sampler_mc)
346  if self.rmf_output_objects is not None:
347  self.rmf_output_objects.append(sampler_mc)
348  samplers.append(sampler_mc)
349 
350  if self.molecular_dynamics_sample_objects is not None:
351  print("Setting up MolecularDynamics")
353  self.model, self.molecular_dynamics_sample_objects,
354  self.vars["monte_carlo_temperature"],
355  maximum_time_step=self.molecular_dynamics_max_time_step)
356  if self.vars["simulated_annealing"]:
357  tmin = self.vars["simulated_annealing_minimum_temperature"]
358  tmax = self.vars["simulated_annealing_maximum_temperature"]
359  nfmin = self.vars[
360  "simulated_annealing_minimum_temperature_nframes"]
361  nfmax = self.vars[
362  "simulated_annealing_maximum_temperature_nframes"]
363  sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
364  if self.output_objects is not None:
365  self.output_objects.append(sampler_md)
366  if self.rmf_output_objects is not None:
367  self.rmf_output_objects.append(sampler_md)
368  samplers.append(sampler_md)
369 # -------------------------------------------------------------------------
370 
371  print("Setting up ReplicaExchange")
373  self.model, self.vars["replica_exchange_minimum_temperature"],
374  self.vars["replica_exchange_maximum_temperature"], samplers,
375  replica_exchange_object=self.replica_exchange_object)
376  self.replica_exchange_object = rex.rem
377 
378  myindex = rex.get_my_index()
379  if self.output_objects is not None:
380  self.output_objects.append(rex)
381  if self.rmf_output_objects is not None:
382  self.rmf_output_objects.append(rex)
383  # must reset the minimum temperature due to the
384  # different binary length of rem.get_my_parameter double and python
385  # float
386  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
387 
388 # -------------------------------------------------------------------------
389 
390  globaldir = self.vars["global_output_directory"] + "/"
391  rmf_dir = globaldir + self.vars["rmf_dir"]
392  pdb_dir = globaldir + self.vars["best_pdb_dir"]
393 
394  if not self.test_mode and not self.nest:
395  if self.vars["do_clean_first"]:
396  pass
397 
398  if self.vars["do_create_directories"]:
399 
400  try:
401  os.makedirs(globaldir)
402  except: # noqa: E722
403  pass
404  try:
405  os.makedirs(rmf_dir)
406  except: # noqa: E722
407  pass
408 
409  if not self.is_multi_state:
410  try:
411  os.makedirs(pdb_dir)
412  except: # noqa: E722
413  pass
414  else:
415  for n in range(self.vars["number_of_states"]):
416  try:
417  os.makedirs(pdb_dir + "/" + str(n))
418  except: # noqa: E722
419  pass
420 
421 # -------------------------------------------------------------------------
422 
424  if self.output_objects is not None:
425  self.output_objects.append(sw)
426  if self.rmf_output_objects is not None:
427  self.rmf_output_objects.append(sw)
428 
429  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
430 
431  if not self.nest:
432  print("Setting up stat file")
433  low_temp_stat_file = globaldir + \
434  self.vars["stat_file_name_suffix"] + "." + \
435  str(myindex) + ".out"
436 
437  # Ensure model is updated before saving init files
438  if not self.test_mode:
439  self.model.update()
440 
441  if not self.test_mode and not self.nest:
442  if self.output_objects is not None:
443  output.init_stat2(low_temp_stat_file,
444  self.output_objects,
445  extralabels=["rmf_file", "rmf_frame_index"])
446  else:
447  print("Stat file writing is disabled")
448 
449  if self.rmf_output_objects is not None and not self.nest:
450  print("Stat info being written in the rmf file")
451 
452  if not self.test_mode and not self.nest:
453  print("Setting up replica stat file")
454  replica_stat_file = globaldir + \
455  self.vars["replica_stat_file_suffix"] + "." + \
456  str(myindex) + ".out"
457  if not self.test_mode:
458  output.init_stat2(replica_stat_file, [rex],
459  extralabels=["score"])
460 
461  print("Setting up best pdb files")
462  if not self.is_multi_state:
463  if self.vars["number_of_best_scoring_models"] > 0:
464  output.init_pdb_best_scoring(
465  pdb_dir + "/" + self.vars["best_pdb_name_suffix"],
466  self.root_hier,
467  self.vars["number_of_best_scoring_models"],
468  replica_exchange=True,
469  mmcif=self.vars['mmcif'],
470  best_score_file=globaldir + "best.scores.rex.py")
471  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
472  output.write_psf(
473  pdb_dir + "/" + "model.psf",
474  pdb_dir + "/" +
475  self.vars["best_pdb_name_suffix"] + pdbext)
476  else:
477  if self.vars["number_of_best_scoring_models"] > 0:
478  for n in range(self.vars["number_of_states"]):
479  output.init_pdb_best_scoring(
480  pdb_dir + "/" + str(n) + "/" +
481  self.vars["best_pdb_name_suffix"],
482  self.root_hiers[n],
483  self.vars["number_of_best_scoring_models"],
484  replica_exchange=True,
485  mmcif=self.vars['mmcif'],
486  best_score_file=globaldir + "best.scores.rex.py")
487  pdbext = ".0.cif" if self.vars['mmcif'] else ".0.pdb"
488  output.write_psf(
489  pdb_dir + "/" + str(n) + "/" + "model.psf",
490  pdb_dir + "/" + str(n) + "/" +
491  self.vars["best_pdb_name_suffix"] + pdbext)
492 # ---------------------------------------------
493 
494  if self.em_object_for_rmf is not None:
495  output_hierarchies = [
496  self.root_hier,
497  self.em_object_for_rmf.get_density_as_hierarchy(
498  )]
499  else:
500  output_hierarchies = [self.root_hier]
501 
502  if not self.test_mode and not self.nest:
503  print("Setting up and writing initial rmf coordinate file")
504  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
505  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
506  output_hierarchies,
507  listofobjects=self.rmf_output_objects)
508  if self._rmf_restraints:
509  output.add_restraints_to_rmf(
510  init_suffix + "." + str(myindex) + ".rmf3",
511  self._rmf_restraints)
512  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
513  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
514 
515  if not self.test_mode:
516  mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
517  else:
518  mpivs = _MockMPIValues()
519 
520  self._add_provenance(sampler_md, sampler_mc)
521 
522  if not self.test_mode and not self.nest:
523  print("Setting up production rmf files")
524  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
525  output.init_rmf(rmfname, output_hierarchies,
526  geometries=self.vars["geometries"],
527  listofobjects=self.rmf_output_objects)
528 
529  if self._rmf_restraints:
530  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
531 
532  if not self.test_mode and self.nest:
533  print("Setting up NestOR rmf files")
534  nestor_rmf_fname = str(self.nestor_rmf_fname) + '_' + \
535  str(self.replica_exchange_object.get_my_index()) + '.rmf3'
536 
537  output.init_rmf(nestor_rmf_fname, output_hierarchies,
538  geometries=self.vars["geometries"],
539  listofobjects=self.rmf_output_objects)
540 
541  ntimes_at_low_temp = 0
542 
543  if myindex == 0 and not self.nest:
544  self.show_info()
545  self.replica_exchange_object.set_was_used(True)
546  nframes = self.vars["number_of_frames"]
547  if self.test_mode:
548  nframes = 1
549 
550  sampled_likelihoods = []
551  for i in range(nframes):
552  if self.test_mode:
553  score = 0.
554  else:
555  for nr in range(self.vars["num_sample_rounds"]):
556  if sampler_md is not None:
557  sampler_md.optimize(
558  self.vars["molecular_dynamics_steps"])
559  if sampler_mc is not None:
560  sampler_mc.optimize(self.vars["monte_carlo_steps"])
562  self.model).evaluate(False)
563  mpivs.set_value("score", score)
564  if not self.nest:
565  output.set_output_entry("score", score)
566 
567  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
568 
569  if self.vars["save_coordinates_mode"] == "lowest_temperature":
570  save_frame = (min_temp_index == my_temp_index)
571  elif self.vars["save_coordinates_mode"] == "25th_score":
572  score_perc = mpivs.get_percentile("score")
573  save_frame = (score_perc*100.0 <= 25.0)
574  elif self.vars["save_coordinates_mode"] == "50th_score":
575  score_perc = mpivs.get_percentile("score")
576  save_frame = (score_perc*100.0 <= 50.0)
577  elif self.vars["save_coordinates_mode"] == "75th_score":
578  score_perc = mpivs.get_percentile("score")
579  save_frame = (score_perc*100.0 <= 75.0)
580 
581  # Ensure model is updated before saving output files
582  if save_frame and not self.test_mode:
583  self.model.update()
584 
585  if save_frame:
586  print("--- frame %s score %s " % (str(i), str(score)))
587 
588  if self.nest:
589  if math.isnan(score):
590  sampled_likelihoods.append(math.nan)
591  else:
592  likelihood_for_sample = 1
593  for rstrnt in self.nestor_restraints:
594  likelihood_for_sample *= rstrnt.get_likelihood()
595  sampled_likelihoods.append(likelihood_for_sample)
596  output.write_rmf(nestor_rmf_fname)
597 
598  if not self.test_mode and not self.nest:
599  if i % self.vars["nframes_write_coordinates"] == 0:
600  print('--- writing coordinates')
601  if self.vars["number_of_best_scoring_models"] > 0:
602  output.write_pdb_best_scoring(score)
603  output.write_rmf(rmfname)
604  output.set_output_entry("rmf_file", rmfname)
605  output.set_output_entry("rmf_frame_index",
606  ntimes_at_low_temp)
607  else:
608  output.set_output_entry("rmf_file", rmfname)
609  output.set_output_entry("rmf_frame_index", '-1')
610  if self.output_objects is not None:
611  output.write_stat2(low_temp_stat_file)
612  ntimes_at_low_temp += 1
613 
614  if not self.test_mode and not self.nest:
615  output.write_stat2(replica_stat_file)
616  if self.vars["replica_exchange_swap"]:
617  rex.swap_temp(i, score)
618 
619  if self.nest and len(sampled_likelihoods) > 0:
620  with open("likelihoods_"
621  + str(self.replica_exchange_object.get_my_index()),
622  "wb") as lif:
623  pickle.dump(sampled_likelihoods, lif)
624 
625  output.close_rmf(nestor_rmf_fname)
626 
627  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
628  p.add_replica_exchange(state, self)
629 
630  if not self.test_mode and not self.nest:
631  print("closing production rmf files")
632  output.close_rmf(rmfname)
633 
634 
635 class BuildSystem(object):
636  """A macro to build a IMP::pmi::topology::System based on a
637  TopologyReader object.
638 
639  Easily create multi-state systems by calling this macro
640  repeatedly with different TopologyReader objects!
641  A useful function is get_molecules() which returns the PMI Molecules
642  grouped by state as a dictionary with key = (molecule name),
643  value = IMP.pmi.topology.Molecule
644  Quick multi-state system:
645  @code{.python}
646  model = IMP.Model()
647  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
648  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
649  bs = IMP.pmi.macros.BuildSystem(model)
650  bs.add_state(reader1)
651  bs.add_state(reader2)
652  bs.execute_macro() # build everything including degrees of freedom
653  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
654  ### now you have a two state system, you add restraints etc
655  @endcode
656  @note The "domain name" entry of the topology reader is not used.
657  All molecules are set up by the component name, but split into rigid bodies
658  as requested.
659  """
660 
661  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
662  'RNA': IMP.pmi.alphabets.rna}
663 
664  def __init__(self, model, sequence_connectivity_scale=4.0,
665  force_create_gmm_files=False, resolutions=[1, 10],
666  name='System'):
667  """Constructor
668  @param model An IMP Model
669  @param sequence_connectivity_scale For scaling the connectivity
670  restraint
671  @param force_create_gmm_files If True, will sample and create GMMs
672  no matter what. If False, will only sample if the
673  files don't exist. If number of Gaussians is zero, won't
674  do anything.
675  @param resolutions The resolutions to build for structured regions
676  @param name The name of the top-level hierarchy node.
677  """
678  self.model = model
679  self.system = IMP.pmi.topology.System(self.model, name=name)
680  self._readers = [] # the TopologyReaders (one per state)
681  # TempResidues for each domain key=unique name,
682  # value=(atomic_res,non_atomic_res).
683  self._domain_res = []
684  self._domains = [] # key = domain unique name, value = Component
685  self.force_create_gmm_files = force_create_gmm_files
686  self.resolutions = resolutions
687 
688  def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
689  chain_ids=None):
690  """Add a state using the topology info in a
691  IMP::pmi::topology::TopologyReader object.
692  When you are done adding states, call execute_macro()
693  @param reader The TopologyReader object
694  @param fasta_name_map dictionary for converting protein names
695  found in the fasta file
696  @param chain_ids A list or string of chain IDs for assigning to
697  newly-created molecules, e.g.
698  `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
699  If not specified, chain IDs A through Z are assigned, then
700  AA through AZ, then BA through BZ, and so on, in the same
701  fashion as PDB.
702  """
703  state = self.system.create_state()
704  self._readers.append(reader)
705  # key is unique name, value is (atomic res, nonatomicres)
706  these_domain_res = {}
707  these_domains = {} # key is unique name, value is _Component
708  if chain_ids is None:
709  chain_ids = IMP.pmi.output._ChainIDs()
710  numchain = 0
711 
712  # setup representation
713  # loop over molecules, copies, then domains
714  for molname in reader.get_molecules():
715  copies = reader.get_molecules()[molname].domains
716  for nc, copyname in enumerate(copies):
717  print("BuildSystem.add_state: setting up molecule %s copy "
718  "number %s" % (molname, str(nc)))
719  copy = copies[copyname]
720  # option to not rename chains
721  if keep_chain_id:
722  all_chains = [c for c in copy if c.chain is not None]
723  if all_chains:
724  chain_id = all_chains[0].chain
725  else:
726  chain_id = chain_ids[numchain]
727  warnings.warn(
728  "No PDBs specified for %s, so keep_chain_id has "
729  "no effect; using default chain ID '%s'"
730  % (molname, chain_id), IMP.pmi.ParameterWarning)
731  else:
732  chain_id = chain_ids[numchain]
733  if nc == 0:
734  alphabet = IMP.pmi.alphabets.amino_acid
735  fasta_flag = copy[0].fasta_flag
736  if fasta_flag in self._alphabets:
737  alphabet = self._alphabets[fasta_flag]
739  copy[0].fasta_file, fasta_name_map)
740  seq = seqs[copy[0].fasta_id]
741  print("BuildSystem.add_state: molecule %s sequence has "
742  "%s residues" % (molname, len(seq)))
743  orig_mol = state.create_molecule(
744  molname, seq, chain_id, alphabet=alphabet,
745  uniprot=seqs.uniprot.get(copy[0].fasta_id))
746  mol = orig_mol
747  numchain += 1
748  else:
749  print("BuildSystem.add_state: creating a copy for "
750  "molecule %s" % molname)
751  mol = orig_mol.create_copy(chain_id)
752  numchain += 1
753 
754  for domainnumber, domain in enumerate(copy):
755  print("BuildSystem.add_state: ---- setting up domain %s "
756  "of molecule %s" % (domainnumber, molname))
757  # we build everything in the residue range, even if it
758  # extends beyond what's in the actual PDB file
759  these_domains[domain.get_unique_name()] = domain
760  if domain.residue_range == [] or \
761  domain.residue_range is None:
762  domain_res = mol.get_residues()
763  else:
764  start = domain.residue_range[0]+domain.pdb_offset
765  if domain.residue_range[1] == 'END':
766  end = len(mol.sequence)
767  else:
768  end = domain.residue_range[1]+domain.pdb_offset
769  domain_res = mol.residue_range(start-1, end-1)
770  print("BuildSystem.add_state: -------- domain %s of "
771  "molecule %s extends from residue %s to "
772  "residue %s "
773  % (domainnumber, molname, start, end))
774  if domain.pdb_file == "BEADS":
775  print("BuildSystem.add_state: -------- domain %s of "
776  "molecule %s represented by BEADS "
777  % (domainnumber, molname))
778  mol.add_representation(
779  domain_res,
780  resolutions=[domain.bead_size],
781  setup_particles_as_densities=(
782  domain.em_residues_per_gaussian != 0),
783  color=domain.color)
784  these_domain_res[domain.get_unique_name()] = \
785  (set(), domain_res)
786  elif domain.pdb_file == "IDEAL_HELIX":
787  print("BuildSystem.add_state: -------- domain %s of "
788  "molecule %s represented by IDEAL_HELIX "
789  % (domainnumber, molname))
790  emper = domain.em_residues_per_gaussian
791  mol.add_representation(
792  domain_res,
793  resolutions=self.resolutions,
794  ideal_helix=True,
795  density_residues_per_component=emper,
796  density_prefix=domain.density_prefix,
797  density_force_compute=self.force_create_gmm_files,
798  color=domain.color)
799  these_domain_res[domain.get_unique_name()] = \
800  (domain_res, set())
801  else:
802  print("BuildSystem.add_state: -------- domain %s of "
803  "molecule %s represented by pdb file %s "
804  % (domainnumber, molname, domain.pdb_file))
805  domain_atomic = mol.add_structure(domain.pdb_file,
806  domain.chain,
807  domain.residue_range,
808  domain.pdb_offset,
809  soft_check=True)
810  domain_non_atomic = domain_res - domain_atomic
811  if not domain.em_residues_per_gaussian:
812  mol.add_representation(
813  domain_atomic, resolutions=self.resolutions,
814  color=domain.color)
815  if len(domain_non_atomic) > 0:
816  mol.add_representation(
817  domain_non_atomic,
818  resolutions=[domain.bead_size],
819  color=domain.color)
820  else:
821  print("BuildSystem.add_state: -------- domain %s "
822  "of molecule %s represented by gaussians "
823  % (domainnumber, molname))
824  emper = domain.em_residues_per_gaussian
825  creategmm = self.force_create_gmm_files
826  mol.add_representation(
827  domain_atomic,
828  resolutions=self.resolutions,
829  density_residues_per_component=emper,
830  density_prefix=domain.density_prefix,
831  density_force_compute=creategmm,
832  color=domain.color)
833  if len(domain_non_atomic) > 0:
834  mol.add_representation(
835  domain_non_atomic,
836  resolutions=[domain.bead_size],
837  setup_particles_as_densities=True,
838  color=domain.color)
839  these_domain_res[domain.get_unique_name()] = (
840  domain_atomic, domain_non_atomic)
841  self._domain_res.append(these_domain_res)
842  self._domains.append(these_domains)
843  print('BuildSystem.add_state: State', len(self.system.states), 'added')
844  return state
845 
846  def get_molecules(self):
847  """Return list of all molecules grouped by state.
848  For each state, it's a dictionary of Molecules where key is the
849  molecule name
850  """
851  return [s.get_molecules() for s in self.system.get_states()]
852 
853  def get_molecule(self, molname, copy_index=0, state_index=0):
854  return self.system.get_states()[state_index].get_molecules()[
855  molname][copy_index]
856 
857  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04,
858  max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
859  """Builds representations and sets up degrees of freedom"""
860  print("BuildSystem.execute_macro: building representations")
861  self.root_hier = self.system.build()
862 
863  print("BuildSystem.execute_macro: setting up degrees of freedom")
864  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
865  for nstate, reader in enumerate(self._readers):
866  rbs = reader.get_rigid_bodies()
867  srbs = reader.get_super_rigid_bodies()
868  csrbs = reader.get_chains_of_super_rigid_bodies()
869 
870  # add rigid bodies
871  domains_in_rbs = set()
872  for rblist in rbs:
873  print("BuildSystem.execute_macro: -------- building rigid "
874  "body %s" % (str(rblist)))
875  all_res = IMP.pmi.tools.OrderedSet()
876  bead_res = IMP.pmi.tools.OrderedSet()
877  for dname in rblist:
878  domain = self._domains[nstate][dname]
879  print("BuildSystem.execute_macro: -------- adding %s"
880  % (str(dname)))
881  all_res |= self._domain_res[nstate][dname][0]
882  bead_res |= self._domain_res[nstate][dname][1]
883  domains_in_rbs.add(dname)
884  all_res |= bead_res
885  print("BuildSystem.execute_macro: -------- creating rigid "
886  "body with max_trans %s max_rot %s "
887  "non_rigid_max_trans %s"
888  % (str(max_rb_trans), str(max_rb_rot),
889  str(max_bead_trans)))
890  self.dof.create_rigid_body(all_res,
891  nonrigid_parts=bead_res,
892  max_trans=max_rb_trans,
893  max_rot=max_rb_rot,
894  nonrigid_max_trans=max_bead_trans,
895  name="RigidBody %s" % dname)
896 
897  # if you have any domains not in an RB, set them as flexible beads
898  for dname, domain in self._domains[nstate].items():
899  if dname not in domains_in_rbs:
900  if domain.pdb_file != "BEADS":
901  warnings.warn(
902  "No rigid bodies set for %s. Residues read from "
903  "the PDB file will not be sampled - only regions "
904  "missing from the PDB will be treated flexibly. "
905  "To sample the entire sequence, use BEADS instead "
906  "of a PDB file name" % dname,
908  self.dof.create_flexible_beads(
909  self._domain_res[nstate][dname][1],
910  max_trans=max_bead_trans)
911 
912  # add super rigid bodies
913  for srblist in srbs:
914  print("BuildSystem.execute_macro: -------- building "
915  "super rigid body %s" % (str(srblist)))
916  all_res = IMP.pmi.tools.OrderedSet()
917  for dname in srblist:
918  print("BuildSystem.execute_macro: -------- adding %s"
919  % (str(dname)))
920  all_res |= self._domain_res[nstate][dname][0]
921  all_res |= self._domain_res[nstate][dname][1]
922 
923  print("BuildSystem.execute_macro: -------- creating super "
924  "rigid body with max_trans %s max_rot %s "
925  % (str(max_srb_trans), str(max_srb_rot)))
926  self.dof.create_super_rigid_body(
927  all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
928 
929  # add chains of super rigid bodies
930  for csrblist in csrbs:
931  all_res = IMP.pmi.tools.OrderedSet()
932  for dname in csrblist:
933  all_res |= self._domain_res[nstate][dname][0]
934  all_res |= self._domain_res[nstate][dname][1]
935  all_res = list(all_res)
936  all_res.sort(key=lambda r: r.get_index())
937  self.dof.create_main_chain_mover(all_res)
938  return self.root_hier, self.dof
939 
940 
941 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
942 class AnalysisReplicaExchange0(object):
943  """A macro for running all the basic operations of analysis.
944  Includes clustering, precision analysis, and making ensemble density maps.
945  A number of plots are also supported.
946  """
947  def __init__(self, model,
948  merge_directories=["./"],
949  stat_file_name_suffix="stat",
950  best_pdb_name_suffix="model",
951  do_clean_first=True,
952  do_create_directories=True,
953  global_output_directory="output/",
954  replica_stat_file_suffix="stat_replica",
955  global_analysis_result_directory="./analysis/",
956  test_mode=False):
957  """Constructor.
958  @param model The IMP model
959  @param stat_file_name_suffix
960  @param merge_directories The directories containing output files
961  @param best_pdb_name_suffix
962  @param do_clean_first
963  @param do_create_directories
964  @param global_output_directory Where everything is
965  @param replica_stat_file_suffix
966  @param global_analysis_result_directory
967  @param test_mode If True, nothing is changed on disk
968  """
969 
970  try:
971  from mpi4py import MPI
972  self.comm = MPI.COMM_WORLD
973  self.rank = self.comm.Get_rank()
974  self.number_of_processes = self.comm.size
975  except ImportError:
976  self.rank = 0
977  self.number_of_processes = 1
978 
979  self.test_mode = test_mode
980  self._protocol_output = []
981  self.cluster_obj = None
982  self.model = model
983  stat_dir = global_output_directory
984  self.stat_files = []
985  # it contains the position of the root directories
986  for rd in merge_directories:
987  stat_files = glob.glob(os.path.join(rd, stat_dir, "stat.*.out"))
988  if len(stat_files) == 0:
989  warnings.warn("no stat files found in %s"
990  % os.path.join(rd, stat_dir),
992  self.stat_files += stat_files
993 
994  def add_protocol_output(self, p):
995  """Capture details of the modeling protocol.
996  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
997  """
998  # Assume last state is the one we're interested in
999  self._protocol_output.append((p, p._last_state))
1000 
1001  def get_modeling_trajectory(self,
1002  score_key="Total_Score",
1003  rmf_file_key="rmf_file",
1004  rmf_file_frame_key="rmf_frame_index",
1005  outputdir="./",
1006  get_every=1,
1007  nframes_trajectory=10000):
1008  """ Get a trajectory of the modeling run, for generating
1009  demonstrative movies
1010 
1011  @param score_key The score for ranking models
1012  @param rmf_file_key Key pointing to RMF filename
1013  @param rmf_file_frame_key Key pointing to RMF frame number
1014  @param outputdir The local output directory used in the run
1015  @param get_every Extract every nth frame
1016  @param nframes_trajectory Total number of frames of the trajectory
1017  """
1018  import math
1019 
1020  trajectory_models = IMP.pmi.io.get_trajectory_models(
1021  self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1022  get_every)
1023  score_list = list(map(float, trajectory_models[2]))
1024 
1025  max_score = max(score_list)
1026  min_score = min(score_list)
1027 
1028  bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1029  for i in range(nframes_trajectory)]
1030  binned_scores = [None]*nframes_trajectory
1031  binned_model_indexes = [-1]*nframes_trajectory
1032 
1033  for model_index, s in enumerate(score_list):
1034  bins_score_diffs = [abs(s-b) for b in bins]
1035  bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1036  if binned_scores[bin_index] is None:
1037  binned_scores[bin_index] = s
1038  binned_model_indexes[bin_index] = model_index
1039  else:
1040  old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1041  new_diff = abs(s-bins[bin_index])
1042  if new_diff < old_diff:
1043  binned_scores[bin_index] = s
1044  binned_model_indexes[bin_index] = model_index
1045 
1046  print(binned_scores)
1047  print(binned_model_indexes)
1048 
1049  def _expand_ambiguity(self, prot, d):
1050  """If using PMI2, expand the dictionary to include copies as
1051  ambiguous options
1052 
1053  This also keeps the states separate.
1054  """
1055  newdict = {}
1056  for key in d:
1057  val = d[key]
1058  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
1059  newdict[key] = val
1060  continue
1061  states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1062  if isinstance(val, tuple):
1063  start = val[0]
1064  stop = val[1]
1065  name = val[2]
1066  else:
1067  start = 1
1068  stop = -1
1069  name = val
1070  for nst in range(len(states)):
1071  sel = IMP.atom.Selection(prot, molecule=name, state_index=nst)
1072  copies = sel.get_selected_particles(with_representation=False)
1073  if len(copies) > 1:
1074  for nc in range(len(copies)):
1075  if len(states) > 1:
1076  newdict['%s.%i..%i' % (name, nst, nc)] = \
1077  (start, stop, name, nc, nst)
1078  else:
1079  newdict['%s..%i' % (name, nc)] = \
1080  (start, stop, name, nc, nst)
1081  else:
1082  newdict[key] = val
1083  return newdict
1084 
1085  def clustering(self,
1086  score_key="Total_Score",
1087  rmf_file_key="rmf_file",
1088  rmf_file_frame_key="rmf_frame_index",
1089  state_number=0,
1090  prefiltervalue=None,
1091  feature_keys=[],
1092  outputdir="./",
1093  alignment_components=None,
1094  number_of_best_scoring_models=10,
1095  rmsd_calculation_components=None,
1096  distance_matrix_file='distances.mat',
1097  load_distance_matrix_file=False,
1098  skip_clustering=False,
1099  number_of_clusters=1,
1100  display_plot=False,
1101  exit_after_display=True,
1102  get_every=1,
1103  first_and_last_frames=None,
1104  density_custom_ranges=None,
1105  write_pdb_with_centered_coordinates=False,
1106  voxel_size=5.0):
1107  """Get the best scoring models, compute a distance matrix,
1108  cluster them, and create density maps.
1109 
1110  Tuple format: "molname" just the molecule,
1111  or (start,stop,molname,copy_num(optional),state_num(optional)
1112  Can pass None for copy or state to ignore that field.
1113  If you don't pass a specific copy number
1114 
1115  @param score_key The score for ranking models.
1116  @param rmf_file_key Key pointing to RMF filename
1117  @param rmf_file_frame_key Key pointing to RMF frame number
1118  @param state_number State number to analyze
1119  @param prefiltervalue Only include frames where the
1120  score key is below this value
1121  @param feature_keys Keywords for which you want to
1122  calculate average, medians, etc.
1123  If you pass "Keyname" it'll include everything that matches
1124  "*Keyname*"
1125  @param outputdir The local output directory used in
1126  the run
1127  @param alignment_components Dictionary with keys=groupname,
1128  values are tuples for aligning the structures
1129  e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1130  @param number_of_best_scoring_models Num models to keep per run
1131  @param rmsd_calculation_components For calculating RMSD
1132  (same format as alignment_components)
1133  @param distance_matrix_file Where to store/read the
1134  distance matrix
1135  @param load_distance_matrix_file Try to load the distance
1136  matrix file
1137  @param skip_clustering Just extract the best scoring
1138  models and save the pdbs
1139  @param number_of_clusters Number of k-means clusters
1140  @param display_plot Display the distance matrix
1141  @param exit_after_display Exit after displaying distance
1142  matrix
1143  @param get_every Extract every nth frame
1144  @param first_and_last_frames A tuple with the first and last
1145  frames to be analyzed. Values are percentages!
1146  Default: get all frames
1147  @param density_custom_ranges For density calculation
1148  (same format as alignment_components)
1149  @param write_pdb_with_centered_coordinates
1150  @param voxel_size Used for the density output
1151  """
1152  # Track provenance information to be added to each output model
1153  prov = []
1154  self._outputdir = Path(outputdir).absolute()
1155  self._number_of_clusters = number_of_clusters
1156  for p, state in self._protocol_output:
1157  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1158 
1159  if self.test_mode:
1160  return
1161 
1162  if self.rank == 0:
1163  try:
1164  os.mkdir(outputdir)
1165  except: # noqa: E722
1166  pass
1167 
1168  if not load_distance_matrix_file:
1169  if len(self.stat_files) == 0:
1170  print("ERROR: no stat file found in the given path")
1171  return
1172  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1173  self.stat_files, self.number_of_processes)[self.rank]
1174 
1175  # read ahead to check if you need the PMI2 score key instead
1176  for k in (score_key, rmf_file_key, rmf_file_frame_key):
1177  if k in feature_keys:
1178  warnings.warn(
1179  "no need to pass " + k + " to feature_keys.",
1181  feature_keys.remove(k)
1182 
1183  best_models = IMP.pmi.io.get_best_models(
1184  my_stat_files, score_key, feature_keys, rmf_file_key,
1185  rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1186  rmf_file_list = best_models[0]
1187  rmf_file_frame_list = best_models[1]
1188  score_list = best_models[2]
1189  feature_keyword_list_dict = best_models[3]
1190 
1191 # ------------------------------------------------------------------------
1192 # collect all the files and scores
1193 # ------------------------------------------------------------------------
1194 
1195  if self.number_of_processes > 1:
1196  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1197  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1198  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1199  rmf_file_frame_list)
1200  for k in feature_keyword_list_dict:
1201  feature_keyword_list_dict[k] = \
1203  feature_keyword_list_dict[k])
1204 
1205  # sort by score and get the best scoring ones
1206  score_rmf_tuples = list(zip(score_list,
1207  rmf_file_list,
1208  rmf_file_frame_list,
1209  list(range(len(score_list)))))
1210 
1211  if density_custom_ranges:
1212  for k in density_custom_ranges:
1213  if not isinstance(density_custom_ranges[k], list):
1214  raise Exception("Density custom ranges: values must "
1215  "be lists of tuples")
1216 
1217  # keep subset of frames if requested
1218  if first_and_last_frames is not None:
1219  nframes = len(score_rmf_tuples)
1220  first_frame = int(first_and_last_frames[0] * nframes)
1221  last_frame = int(first_and_last_frames[1] * nframes)
1222  if last_frame > len(score_rmf_tuples):
1223  last_frame = -1
1224  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1225 
1226  # sort RMFs by the score_key in ascending order, and store the rank
1227  best_score_rmf_tuples = sorted(
1228  score_rmf_tuples,
1229  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1230  best_score_rmf_tuples = [t+(n,) for n, t in
1231  enumerate(best_score_rmf_tuples)]
1232  # Note in the provenance info that we only kept best-scoring models
1233  prov.append(IMP.pmi.io.FilterProvenance(
1234  "Best scoring", 0, number_of_best_scoring_models))
1235  # sort the feature scores in the same way
1236  best_score_feature_keyword_list_dict = defaultdict(list)
1237  for tpl in best_score_rmf_tuples:
1238  index = tpl[3]
1239  for f in feature_keyword_list_dict:
1240  best_score_feature_keyword_list_dict[f].append(
1241  feature_keyword_list_dict[f][index])
1242  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1243  best_score_rmf_tuples,
1244  self.number_of_processes)[self.rank]
1245 
1246  # expand the dictionaries to include ambiguous copies
1247  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1248  self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1249  if rmsd_calculation_components is not None:
1250  tmp = self._expand_ambiguity(
1251  prot_ahead, rmsd_calculation_components)
1252  if tmp != rmsd_calculation_components:
1253  print('Detected ambiguity, expand rmsd components to',
1254  tmp)
1255  rmsd_calculation_components = tmp
1256  if alignment_components is not None:
1257  tmp = self._expand_ambiguity(prot_ahead,
1258  alignment_components)
1259  if tmp != alignment_components:
1260  print('Detected ambiguity, expand alignment '
1261  'components to', tmp)
1262  alignment_components = tmp
1263 
1264 # -------------------------------------------------------------
1265 # read the coordinates
1266 # ------------------------------------------------------------
1267  rmsd_weights = IMP.pmi.io.get_bead_sizes(
1268  self.model, my_best_score_rmf_tuples[0],
1269  rmsd_calculation_components, state_number=state_number)
1271  self.model, my_best_score_rmf_tuples, alignment_components,
1272  rmsd_calculation_components, state_number=state_number)
1273 
1274  # note! the coordinates are simply float tuples, NOT decorators,
1275  # NOT Vector3D, NOR particles, because these object cannot be
1276  # serialized. We need serialization
1277  # for the parallel computation based on mpi.
1278 
1279  # dict:key=component name,val=coords per hit
1280  all_coordinates = got_coords[0]
1281 
1282  # same as above, limited to alignment bits
1283  alignment_coordinates = got_coords[1]
1284 
1285  # same as above, limited to RMSD bits
1286  rmsd_coordinates = got_coords[2]
1287 
1288  # dictionary with key=RMF, value=score rank
1289  rmf_file_name_index_dict = got_coords[3]
1290 
1291  # RMF file per hit
1292  all_rmf_file_names = got_coords[4]
1293 
1294 # ------------------------------------------------------------------------
1295 # optionally don't compute distance matrix or cluster, just write top files
1296 # ------------------------------------------------------------------------
1297  if skip_clustering:
1298  if density_custom_ranges:
1299  DensModule = IMP.pmi.analysis.GetModelDensity(
1300  density_custom_ranges, voxel=voxel_size)
1301 
1302  dircluster = os.path.join(outputdir,
1303  "all_models."+str(self.rank))
1304  try:
1305  os.mkdir(outputdir)
1306  except: # noqa: E722
1307  pass
1308  try:
1309  os.mkdir(dircluster)
1310  except: # noqa: E722
1311  pass
1312  clusstat = open(os.path.join(
1313  dircluster, "stat."+str(self.rank)+".out"), "w")
1314  for cnt, tpl in enumerate(my_best_score_rmf_tuples):
1315  rmf_name = tpl[1]
1316  rmf_frame_number = tpl[2]
1317  tmp_dict = {}
1318  index = tpl[4]
1319  for key in best_score_feature_keyword_list_dict:
1320  tmp_dict[key] = \
1321  best_score_feature_keyword_list_dict[key][index]
1322 
1323  if cnt == 0:
1324  prots, rs = \
1325  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1326  self.model, rmf_frame_number, rmf_name)
1327  else:
1328  linking_successful = \
1329  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1330  self.model, prots, rs, rmf_frame_number,
1331  rmf_name)
1332  if not linking_successful:
1333  continue
1334 
1335  if not prots:
1336  continue
1337 
1338  states = IMP.atom.get_by_type(
1339  prots[0], IMP.atom.STATE_TYPE)
1340  prot = states[state_number]
1341 
1342  # get transformation aligning coordinates of
1343  # requested tuples to the first RMF file
1344  if cnt == 0:
1345  coords_f1 = alignment_coordinates[cnt]
1346  if cnt > 0:
1347  coords_f2 = alignment_coordinates[cnt]
1348  if coords_f2:
1350  coords_f1, coords_f2)
1351  transformation = Ali.align()[1]
1352  else:
1353  transformation = \
1355 
1356  rbs = set()
1357  for p in IMP.atom.get_leaves(prot):
1358  if not IMP.core.XYZR.get_is_setup(p):
1360  IMP.core.XYZR(p).set_radius(0.0001)
1361  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1362 
1364  rbm = IMP.core.RigidBodyMember(p)
1365  rb = rbm.get_rigid_body()
1366  rbs.add(rb)
1367  else:
1369  transformation)
1370  for rb in rbs:
1371  IMP.core.transform(rb, transformation)
1372 
1373  o = IMP.pmi.output.Output()
1374  self.model.update()
1375  out_pdb_fn = os.path.join(
1376  dircluster, str(cnt)+"."+str(self.rank)+".pdb")
1377  out_rmf_fn = os.path.join(
1378  dircluster, str(cnt)+"."+str(self.rank)+".rmf3")
1379  o.init_pdb(out_pdb_fn, prot)
1380  tc = write_pdb_with_centered_coordinates
1381  o.write_pdb(out_pdb_fn,
1382  translate_to_geometric_center=tc)
1383 
1384  tmp_dict["local_pdb_file_name"] = \
1385  os.path.basename(out_pdb_fn)
1386  tmp_dict["rmf_file_full_path"] = rmf_name
1387  tmp_dict["local_rmf_file_name"] = \
1388  os.path.basename(out_rmf_fn)
1389  tmp_dict["local_rmf_frame_number"] = 0
1390 
1391  clusstat.write(str(tmp_dict)+"\n")
1392 
1393  # create a single-state System and write that
1395  IMP.Particle(self.model))
1396  h.set_name("System")
1397  h.add_child(prot)
1398  o.init_rmf(out_rmf_fn, [h], rs)
1399 
1400  o.write_rmf(out_rmf_fn)
1401  o.close_rmf(out_rmf_fn)
1402  # add the density
1403  if density_custom_ranges:
1404  DensModule.add_subunits_density(prot)
1405 
1406  if density_custom_ranges:
1407  DensModule.write_mrc(path=dircluster)
1408  del DensModule
1409  return
1410 
1411  # broadcast the coordinates
1412  if self.number_of_processes > 1:
1413  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1414  all_coordinates)
1415  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1416  all_rmf_file_names)
1417  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1418  rmf_file_name_index_dict)
1419  alignment_coordinates = IMP.pmi.tools.scatter_and_gather(
1420  alignment_coordinates)
1421  rmsd_coordinates = IMP.pmi.tools.scatter_and_gather(
1422  rmsd_coordinates)
1423 
1424  if self.rank == 0:
1425  # save needed information in external files
1426  self.save_objects(
1427  [best_score_feature_keyword_list_dict,
1428  rmf_file_name_index_dict],
1429  ".macro.pkl")
1430 
1431 # ------------------------------------------------------------------------
1432 # Calculate distance matrix and cluster
1433 # ------------------------------------------------------------------------
1434  print("setup clustering class")
1435  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1436 
1437  for n, model_coordinate_dict in enumerate(all_coordinates):
1438  # let's try to align
1439  if (alignment_components is not None
1440  and len(self.cluster_obj.all_coords) == 0):
1441  # set the first model as template coordinates
1442  self.cluster_obj.set_template(alignment_coordinates[n])
1443  self.cluster_obj.fill(all_rmf_file_names[n],
1444  rmsd_coordinates[n])
1445  print("Global calculating the distance matrix")
1446 
1447  # calculate distance matrix, all against all
1448  self.cluster_obj.dist_matrix()
1449 
1450  # perform clustering and optionally display
1451  if self.rank == 0:
1452  self.cluster_obj.do_cluster(number_of_clusters)
1453  if display_plot:
1454  if self.rank == 0:
1455  self.cluster_obj.plot_matrix(
1456  figurename=os.path.join(outputdir,
1457  'dist_matrix.pdf'))
1458  if exit_after_display:
1459  exit()
1460  self.cluster_obj.save_distance_matrix_file(
1461  file_name=distance_matrix_file)
1462 
1463 # ------------------------------------------------------------------------
1464 # Alternatively, load the distance matrix from file and cluster that
1465 # ------------------------------------------------------------------------
1466  else:
1467  if self.rank == 0:
1468  print("setup clustering class")
1469  self.cluster_obj = IMP.pmi.analysis.Clustering()
1470  self.cluster_obj.load_distance_matrix_file(
1471  file_name=distance_matrix_file)
1472  print("clustering with %s clusters" % str(number_of_clusters))
1473  self.cluster_obj.do_cluster(number_of_clusters)
1474  [best_score_feature_keyword_list_dict,
1475  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1476  if display_plot:
1477  if self.rank == 0:
1478  self.cluster_obj.plot_matrix(figurename=os.path.join(
1479  outputdir, 'dist_matrix.pdf'))
1480  if exit_after_display:
1481  exit()
1482  if self.number_of_processes > 1:
1483  self.comm.Barrier()
1484 
1485 # ------------------------------------------------------------------------
1486 # now save all information about the clusters
1487 # ------------------------------------------------------------------------
1488 
1489  if self.rank == 0:
1490  print(self.cluster_obj.get_cluster_labels())
1491  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1492  print("rank %s " % str(self.rank))
1493  print("cluster %s " % str(n))
1494  print("cluster label %s " % str(cl))
1495  print(self.cluster_obj.get_cluster_label_names(cl))
1496  cluster_size = \
1497  len(self.cluster_obj.get_cluster_label_names(cl))
1498  cluster_prov = \
1499  prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1500 
1501  # first initialize the Density class if requested
1502  if density_custom_ranges:
1503  DensModule = IMP.pmi.analysis.GetModelDensity(
1504  density_custom_ranges,
1505  voxel=voxel_size)
1506 
1507  dircluster = outputdir + "/cluster." + str(n) + "/"
1508  try:
1509  os.mkdir(dircluster)
1510  except: # noqa: E722
1511  pass
1512 
1513  rmsd_dict = {
1514  "AVERAGE_RMSD":
1515  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1516  clusstat = open(dircluster + "stat.out", "w")
1517  for k, structure_name in enumerate(
1518  self.cluster_obj.get_cluster_label_names(cl)):
1519  # extract the features
1520  tmp_dict = {}
1521  tmp_dict.update(rmsd_dict)
1522  index = rmf_file_name_index_dict[structure_name]
1523  for key in best_score_feature_keyword_list_dict:
1524  tmp_dict[
1525  key] = best_score_feature_keyword_list_dict[
1526  key][
1527  index]
1528 
1529  # get the rmf name and the frame number from the list of
1530  # frame names
1531  rmf_name = structure_name.split("|")[0]
1532  rmf_frame_number = int(structure_name.split("|")[1])
1533  clusstat.write(str(tmp_dict) + "\n")
1534 
1535  # extract frame (open or link to existing)
1536  if k == 0:
1537  prots, rs = \
1538  IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1539  self.model, rmf_frame_number, rmf_name)
1540  else:
1541  linking_successful = \
1542  IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1543  self.model, prots, rs, rmf_frame_number,
1544  rmf_name)
1545  if not linking_successful:
1546  continue
1547  if not prots:
1548  continue
1549 
1550  states = IMP.atom.get_by_type(
1551  prots[0], IMP.atom.STATE_TYPE)
1552  prot = states[state_number]
1553  if k == 0:
1554  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1555 
1556  # transform clusters onto first
1557  if k > 0:
1558  co = self.cluster_obj
1559  model_index = co.get_model_index_from_name(
1560  structure_name)
1561  transformation = co.get_transformation_to_first_member(
1562  cl, model_index)
1563  rbs = set()
1564  for p in IMP.atom.get_leaves(prot):
1565  if not IMP.core.XYZR.get_is_setup(p):
1567  IMP.core.XYZR(p).set_radius(0.0001)
1568  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1569 
1571  rbm = IMP.core.RigidBodyMember(p)
1572  rb = rbm.get_rigid_body()
1573  rbs.add(rb)
1574  else:
1576  transformation)
1577  for rb in rbs:
1578  IMP.core.transform(rb, transformation)
1579 
1580  # add the density
1581  if density_custom_ranges:
1582  DensModule.add_subunits_density(prot)
1583 
1584  # pdb writing should be optimized!
1585  o = IMP.pmi.output.Output()
1586  self.model.update()
1587  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1588  o.write_pdb(dircluster + str(k) + ".pdb")
1589 
1590  # create a single-state System and write that
1592  IMP.Particle(self.model))
1593  h.set_name("System")
1594  h.add_child(prot)
1595  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1596  o.write_rmf(dircluster + str(k) + ".rmf3")
1597  o.close_rmf(dircluster + str(k) + ".rmf3")
1598 
1599  del o
1600  # IMP.atom.destroy(prot)
1601 
1602  if density_custom_ranges:
1603  DensModule.write_mrc(path=dircluster)
1604  del DensModule
1605 
1606  if self.number_of_processes > 1:
1607  self.comm.Barrier()
1608 
1609  def get_cluster_rmsd(self, cluster_num):
1610  if self.cluster_obj is None:
1611  raise Exception("Run clustering first")
1612  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1613 
1614  def save_objects(self, objects, file_name):
1615  import pickle
1616  outf = open(file_name, 'wb')
1617  pickle.dump(objects, outf)
1618  outf.close()
1619 
1620  def load_objects(self, file_name):
1621  import pickle
1622  inputf = open(file_name, 'rb')
1623  objects = pickle.load(inputf)
1624  inputf.close()
1625  return objects
1626 
1627 
1629 
1630  """
1631  This class contains analysis utilities to investigate ReplicaExchange
1632  results.
1633  """
1634 
1635  ########################
1636  # Construction and Setup
1637  ########################
1638 
1639  def __init__(self, model, stat_files, best_models=None, score_key=None,
1640  alignment=True):
1641  """
1642  Construction of the Class.
1643  @param model IMP.Model()
1644  @param stat_files list of string. Can be ascii stat files,
1645  rmf files names
1646  @param best_models Integer. Number of best scoring models,
1647  if None: all models will be read
1648  @param alignment boolean (Default=True). Align before computing
1649  the rmsd.
1650  """
1651 
1652  self.model = model
1653  self.best_models = best_models
1655  model, stat_files, self.best_models, score_key, cache=True)
1657  StatHierarchyHandler=self.stath0)
1658 
1659  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(
1661  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(
1663  self.sel0_rmsd = IMP.atom.Selection(self.stath0)
1664  self.sel1_rmsd = IMP.atom.Selection(self.stath1)
1665  self.sel0_alignment = IMP.atom.Selection(self.stath0)
1666  self.sel1_alignment = IMP.atom.Selection(self.stath1)
1667  self.clusters = []
1668  # fill the cluster list with a single cluster containing all models
1669  c = IMP.pmi.output.Cluster(0)
1670  self.clusters.append(c)
1671  for n0 in range(len(self.stath0)):
1672  c.add_member(n0)
1673  self.pairwise_rmsd = {}
1674  self.pairwise_molecular_assignment = {}
1675  self.alignment = alignment
1676  self.symmetric_molecules = {}
1677  self.issymmetricsel = {}
1678  self.update_seldicts()
1679  self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1680  IMP.atom.get_leaves(self.stath0))
1681  self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1682  IMP.atom.get_leaves(self.stath1))
1683 
1684  def set_rmsd_selection(self, **kwargs):
1685  """
1686  Setup the selection onto which the rmsd is computed
1687  @param kwargs use IMP.atom.Selection keywords
1688  """
1689  self.sel0_rmsd = IMP.atom.Selection(self.stath0, **kwargs)
1690  self.sel1_rmsd = IMP.atom.Selection(self.stath1, **kwargs)
1691  self.update_seldicts()
1692 
1693  def set_symmetric(self, molecule_name):
1694  """
1695  Store names of symmetric molecules
1696  """
1697  self.symmetric_molecules[molecule_name] = 0
1698  self.update_seldicts()
1699 
1700  def set_alignment_selection(self, **kwargs):
1701  """
1702  Setup the selection onto which the alignment is computed
1703  @param kwargs use IMP.atom.Selection keywords
1704  """
1705  self.sel0_alignment = IMP.atom.Selection(self.stath0, **kwargs)
1706  self.sel1_alignment = IMP.atom.Selection(self.stath1, **kwargs)
1707 
1708  ######################
1709  # Clustering functions
1710  ######################
1711  def clean_clusters(self):
1712  for c in self.clusters:
1713  del c
1714  self.clusters = []
1715 
1716  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1717  """
1718  Cluster the models based on RMSD.
1719  @param rmsd_cutoff Float the distance cutoff in Angstrom
1720  @param metric (Default=IMP.atom.get_rmsd) the metric that will
1721  be used to compute rmsds
1722  """
1723  self.clean_clusters()
1724  not_clustered = set(range(len(self.stath1)))
1725  while len(not_clustered) > 0:
1726  self.aggregate(not_clustered, rmsd_cutoff, metric)
1727  self.update_clusters()
1728 
1729  def refine(self, rmsd_cutoff=10):
1730  """
1731  Refine the clusters by merging the ones whose centers are close
1732  @param rmsd_cutoff cutoff distance in Angstorms
1733  """
1734  clusters_copy = self.clusters
1735  for c0, c1 in itertools.combinations(self.clusters, 2):
1736  if c0.center_index is None:
1737  self.compute_cluster_center(c0)
1738  if c1.center_index is None:
1739  self.compute_cluster_center(c1)
1740  _ = self.stath0[c0.center_index]
1741  _ = self.stath1[c1.center_index]
1742  rmsd, molecular_assignment = self.rmsd()
1743  if rmsd <= rmsd_cutoff:
1744  if c1 in self.clusters:
1745  clusters_copy.remove(c1)
1746  c0 += c1
1747  self.clusters = clusters_copy
1748  self.update_clusters()
1749 
1750  ####################
1751  # Input Output
1752  ####################
1753 
1754  def set_cluster_assignments(self, cluster_ids):
1755  if len(cluster_ids) != len(self.stath0):
1756  raise ValueError('cluster ids has to be same length as '
1757  'number of frames')
1758 
1759  self.clusters = []
1760  for i in sorted(list(set(cluster_ids))):
1761  self.clusters.append(IMP.pmi.output.Cluster(i))
1762  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1763  self.clusters[idx].add_member(i, d)
1764 
1765  def get_cluster_data(self, cluster):
1766  """
1767  Return the model data from a cluster
1768  @param cluster IMP.pmi.output.Cluster object
1769  """
1770  data = []
1771  for m in cluster:
1772  data.append(m)
1773  return data
1774 
1775  def save_data(self, filename='data.pkl'):
1776  """
1777  Save the data for the whole models into a pickle file
1778  @param filename string
1779  """
1780  self.stath0.save_data(filename)
1781 
1782  def set_data(self, data):
1783  """
1784  Set the data from an external IMP.pmi.output.Data
1785  @param data IMP.pmi.output.Data
1786  """
1787  self.stath0.data = data
1788  self.stath1.data = data
1789 
1790  def load_data(self, filename='data.pkl'):
1791  """
1792  Load the data from an external pickled file
1793  @param filename string
1794  """
1795  self.stath0.load_data(filename)
1796  self.stath1.load_data(filename)
1797  self.best_models = len(self.stath0)
1798 
1799  def add_cluster(self, rmf_name_list):
1800  c = IMP.pmi.output.Cluster(len(self.clusters))
1801  print("creating cluster index "+str(len(self.clusters)))
1802  self.clusters.append(c)
1803  current_len = len(self.stath0)
1804 
1805  for rmf in rmf_name_list:
1806  print("adding rmf "+rmf)
1807  self.stath0.add_stat_file(rmf)
1808  self.stath1.add_stat_file(rmf)
1809 
1810  for n0 in range(current_len, len(self.stath0)):
1811  d0 = self.stath0[n0]
1812  c.add_member(n0, d0)
1813  self.update_clusters()
1814 
1815  def save_clusters(self, filename='clusters.pkl'):
1816  """
1817  Save the clusters into a pickle file
1818  @param filename string
1819  """
1820  try:
1821  import cPickle as pickle
1822  except ImportError:
1823  import pickle
1824  fl = open(filename, 'wb')
1825  pickle.dump(self.clusters, fl)
1826 
1827  def load_clusters(self, filename='clusters.pkl', append=False):
1828  """
1829  Load the clusters from a pickle file
1830  @param filename string
1831  @param append bool (Default=False), if True. append the clusters
1832  to the ones currently present
1833  """
1834  try:
1835  import cPickle as pickle
1836  except ImportError:
1837  import pickle
1838  fl = open(filename, 'rb')
1839  self.clean_clusters()
1840  if append:
1841  self.clusters += pickle.load(fl)
1842  else:
1843  self.clusters = pickle.load(fl)
1844  self.update_clusters()
1845 
1846  ####################
1847  # Analysis Functions
1848  ####################
1849 
1850  def compute_cluster_center(self, cluster):
1851  """
1852  Compute the cluster center for a given cluster
1853  """
1854  member_distance = defaultdict(float)
1855 
1856  for n0, n1 in itertools.combinations(cluster.members, 2):
1857  _ = self.stath0[n0]
1858  _ = self.stath1[n1]
1859  rmsd, _ = self.rmsd()
1860  member_distance[n0] += rmsd
1861 
1862  if len(member_distance) > 0:
1863  cluster.center_index = min(member_distance,
1864  key=member_distance.get)
1865  else:
1866  cluster.center_index = cluster.members[0]
1867 
1868  def save_coordinates(self, cluster, rmf_name=None, reference="Absolute",
1869  prefix="./"):
1870  """
1871  Save the coordinates of the current cluster a single rmf file
1872  """
1873  print("saving coordinates", cluster)
1874  if self.alignment:
1875  self.set_reference(reference, cluster)
1876  o = IMP.pmi.output.Output()
1877  if rmf_name is None:
1878  rmf_name = prefix+'/'+str(cluster.cluster_id)+".rmf3"
1879 
1880  _ = self.stath1[cluster.members[0]]
1881  self.model.update()
1882  o.init_rmf(rmf_name, [self.stath1])
1883  for n1 in cluster.members:
1884  _ = self.stath1[n1]
1885  self.model.update()
1887  if self.alignment:
1888  self.align()
1889  o.write_rmf(rmf_name)
1891  o.close_rmf(rmf_name)
1892 
1893  def prune_redundant_structures(self, rmsd_cutoff=10):
1894  """
1895  remove structures that are similar
1896  append it to a new cluster
1897  """
1898  print("pruning models")
1899  selected = 0
1900  filtered = [selected]
1901  remaining = range(1, len(self.stath1), 10)
1902 
1903  while len(remaining) > 0:
1904  d0 = self.stath0[selected]
1905  rm = []
1906  for n1 in remaining:
1907  _ = self.stath1[n1]
1908  if self.alignment:
1909  self.align()
1910  d, _ = self.rmsd()
1911  if d <= rmsd_cutoff:
1912  rm.append(n1)
1913  print("pruning model %s, similar to model %s, rmsd %s"
1914  % (str(n1), str(selected), str(d)))
1915  remaining = [x for x in remaining if x not in rm]
1916  if len(remaining) == 0:
1917  break
1918  selected = remaining[0]
1919  filtered.append(selected)
1920  remaining.pop(0)
1921  c = IMP.pmi.output.Cluster(len(self.clusters))
1922  self.clusters.append(c)
1923  for n0 in filtered:
1924  d0 = self.stath0[n0]
1925  c.add_member(n0, d0)
1926  self.update_clusters()
1927 
1928  def precision(self, cluster):
1929  """
1930  Compute the precision of a cluster
1931  """
1932  npairs = 0
1933  rmsd = 0.0
1934  precision = None
1935 
1936  if cluster.center_index is not None:
1937  members1 = [cluster.center_index]
1938  else:
1939  members1 = cluster.members
1940 
1941  for n0 in members1:
1942  _ = self.stath0[n0]
1943  for n1 in cluster.members:
1944  if n0 != n1:
1945  npairs += 1
1946  _ = self.stath1[n1]
1948  tmp_rmsd, _ = self.rmsd()
1949  rmsd += tmp_rmsd
1951 
1952  if npairs > 0:
1953  precision = rmsd/npairs
1954  cluster.precision = precision
1955  return precision
1956 
1957  def bipartite_precision(self, cluster1, cluster2, verbose=False):
1958  """
1959  Compute the bipartite precision (ie the cross-precision)
1960  between two clusters
1961  """
1962  npairs = 0
1963  rmsd = 0.0
1964  for cn0, n0 in enumerate(cluster1.members):
1965  _ = self.stath0[n0]
1966  for cn1, n1 in enumerate(cluster2.members):
1967  _ = self.stath1[n1]
1968  tmp_rmsd, _ = self.rmsd()
1969  if verbose:
1970  print("--- rmsd between structure %s and structure "
1971  "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1972  rmsd += tmp_rmsd
1973  npairs += 1
1974  precision = rmsd/npairs
1975  return precision
1976 
1977  def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1978  cluster_ref=None, step=1):
1979  """
1980  Compute the Root mean square fluctuations
1981  of a molecule in a cluster
1982  Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1983  residue indexes and the value is the rmsf
1984  """
1985  rmsf = IMP.pmi.tools.OrderedDict()
1986 
1987  # assumes that residue indexes are identical for stath0 and stath1
1988  if cluster_ref is not None:
1989  if cluster_ref.center_index is not None:
1990  members0 = [cluster_ref.center_index]
1991  else:
1992  members0 = cluster_ref.members
1993  else:
1994  if cluster.center_index is not None:
1995  members0 = [cluster.center_index]
1996  else:
1997  members0 = cluster.members
1998 
1999  s0 = IMP.atom.Selection(self.stath0, molecule=molecule, resolution=1,
2000  copy_index=copy_index, state_index=state_index)
2001  ps0 = s0.get_selected_particles()
2002  # get the residue indexes
2003  residue_indexes = list(IMP.pmi.tools.OrderedSet(
2004  [IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
2005 
2006  # get the corresponding particles
2007  npairs = 0
2008  for n0 in members0:
2009  d0 = self.stath0[n0]
2010  for n1 in cluster.members[::step]:
2011  if n0 != n1:
2012  print("--- rmsf %s %s" % (str(n0), str(n1)))
2014 
2015  s1 = IMP.atom.Selection(
2016  self.stath1, molecule=molecule,
2017  residue_indexes=residue_indexes, resolution=1,
2018  copy_index=copy_index, state_index=state_index)
2019  ps1 = s1.get_selected_particles()
2020 
2021  d1 = self.stath1[n1]
2022  if self.alignment:
2023  self.align()
2024  for n, (p0, p1) in enumerate(zip(ps0, ps1)):
2025  r = residue_indexes[n]
2026  d0 = IMP.core.XYZ(p0)
2027  d1 = IMP.core.XYZ(p1)
2028  if r in rmsf:
2029  rmsf[r] += IMP.core.get_distance(d0, d1)
2030  else:
2031  rmsf[r] = IMP.core.get_distance(d0, d1)
2032  npairs += 1
2034  for r in rmsf:
2035  rmsf[r] /= npairs
2036 
2037  for stath in [self.stath0, self.stath1]:
2038  if molecule not in self.symmetric_molecules:
2039  s = IMP.atom.Selection(
2040  stath, molecule=molecule, residue_index=r,
2041  resolution=1, copy_index=copy_index,
2042  state_index=state_index)
2043  else:
2044  s = IMP.atom.Selection(
2045  stath, molecule=molecule, residue_index=r,
2046  resolution=1, state_index=state_index)
2047 
2048  ps = s.get_selected_particles()
2049  for p in ps:
2051  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2052  else:
2054 
2055  return rmsf
2056 
2057  def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2058  reference="Absolute", prefix="./", step=1):
2059  if self.alignment:
2060  self.set_reference(reference, cluster)
2061  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2062  voxel=voxel_size)
2063 
2064  for n1 in cluster.members[::step]:
2065  print("density "+str(n1))
2066  _ = self.stath1[n1]
2068  if self.alignment:
2069  self.align()
2070  dens.add_subunits_density(self.stath1)
2072  dens.write_mrc(path=prefix+'/', suffix=str(cluster.cluster_id))
2073  del dens
2074 
2075  def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2076  consolidate=False, molecules=None, prefix='./',
2077  reference="Absolute"):
2078  if self.alignment:
2079  self.set_reference(reference, cluster)
2080  import numpy as np
2081  import matplotlib.pyplot as plt
2082  import matplotlib.cm as cm
2083  from scipy.spatial.distance import cdist
2084  import IMP.pmi.topology
2085  if molecules is None:
2087  for mol in IMP.pmi.tools.get_molecules(
2088  IMP.atom.get_leaves(self.stath1))]
2089  else:
2091  for mol in IMP.pmi.tools.get_molecules(
2093  self.stath1,
2094  molecules=molecules).get_selected_particles())]
2095  unique_copies = [mol for mol in mols if mol.get_copy_index() == 0]
2096  mol_names_unique = dict((mol.get_name(), mol) for mol in unique_copies)
2097  total_len_unique = sum(max(mol.get_residue_indexes())
2098  for mol in unique_copies)
2099 
2100  index_dict = {}
2101  prev_stop = 0
2102 
2103  if not consolidate:
2104  for mol in mols:
2105  seqlen = max(mol.get_residue_indexes())
2106  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2107  prev_stop += seqlen
2108 
2109  else:
2110  for mol in unique_copies:
2111  seqlen = max(mol.get_residue_indexes())
2112  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2113  prev_stop += seqlen
2114 
2115  for ncl, n1 in enumerate(cluster.members):
2116  print(ncl)
2117  _ = self.stath1[n1]
2118  coord_dict = IMP.pmi.tools.OrderedDict()
2119  for mol in mols:
2120  rindexes = mol.get_residue_indexes()
2121  coords = np.ones((max(rindexes), 3))
2122  for rnum in rindexes:
2123  sel = IMP.atom.Selection(mol, residue_index=rnum,
2124  resolution=1)
2125  selpart = sel.get_selected_particles()
2126  if len(selpart) == 0:
2127  continue
2128  selpart = selpart[0]
2129  coords[rnum - 1, :] = \
2130  IMP.core.XYZ(selpart).get_coordinates()
2131  coord_dict[mol] = coords
2132 
2133  if not consolidate:
2134  coords = np.concatenate(list(coord_dict.values()))
2135  dists = cdist(coords, coords)
2136  binary_dists = np.where((dists <= contact_threshold)
2137  & (dists >= 1.0), 1.0, 0.0)
2138  else:
2139  binary_dists_dict = {}
2140  for mol1 in mols:
2141  len1 = max(mol1.get_residue_indexes())
2142  for mol2 in mols:
2143  name1 = mol1.get_name()
2144  name2 = mol2.get_name()
2145  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2146  if (name1, name2) not in binary_dists_dict:
2147  binary_dists_dict[(name1, name2)] = \
2148  np.zeros((len1, len1))
2149  binary_dists_dict[(name1, name2)] += \
2150  np.where((dists <= contact_threshold)
2151  & (dists >= 1.0), 1.0, 0.0)
2152  binary_dists = np.zeros((total_len_unique, total_len_unique))
2153 
2154  for name1, name2 in binary_dists_dict:
2155  r1 = index_dict[mol_names_unique[name1]]
2156  r2 = index_dict[mol_names_unique[name2]]
2157  binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2158  np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2159  1.0, 0.0)
2160 
2161  if ncl == 0:
2162  dist_maps = [dists]
2163  av_dist_map = dists
2164  contact_freqs = binary_dists
2165  else:
2166  dist_maps.append(dists)
2167  av_dist_map += dists
2168  contact_freqs += binary_dists
2169 
2170  if log_scale:
2171  contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2172  else:
2173  contact_freqs = 1.0/len(cluster)*contact_freqs
2174  av_dist_map = 1.0/len(cluster)*contact_freqs
2175 
2176  fig = plt.figure(figsize=(100, 100))
2177  ax = fig.add_subplot(111)
2178  ax.set_xticks([])
2179  ax.set_yticks([])
2180  gap_between_components = 50
2181  colormap = cm.Blues
2182  colornorm = None
2183 
2184  if not consolidate:
2185  sorted_tuple = sorted(
2187  mol).get_extended_name(), mol) for mol in mols)
2188  prot_list = list(zip(*sorted_tuple))[1]
2189  else:
2190  sorted_tuple = sorted(
2191  (IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(), mol)
2192  for mol in unique_copies)
2193  prot_list = list(zip(*sorted_tuple))[1]
2194 
2195  prot_listx = prot_list
2196  nresx = gap_between_components + \
2197  sum([max(mol.get_residue_indexes())
2198  + gap_between_components for mol in prot_listx])
2199 
2200  # set the list of proteins on the y axis
2201  prot_listy = prot_list
2202  nresy = gap_between_components + \
2203  sum([max(mol.get_residue_indexes())
2204  + gap_between_components for mol in prot_listy])
2205 
2206  # this is the residue offset for each protein
2207  resoffsetx = {}
2208  resendx = {}
2209  res = gap_between_components
2210  for mol in prot_listx:
2211  resoffsetx[mol] = res
2212  res += max(mol.get_residue_indexes())
2213  resendx[mol] = res
2214  res += gap_between_components
2215 
2216  resoffsety = {}
2217  resendy = {}
2218  res = gap_between_components
2219  for mol in prot_listy:
2220  resoffsety[mol] = res
2221  res += max(mol.get_residue_indexes())
2222  resendy[mol] = res
2223  res += gap_between_components
2224 
2225  resoffsetdiagonal = {}
2226  res = gap_between_components
2227  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2228  resoffsetdiagonal[mol] = res
2229  res += max(mol.get_residue_indexes())
2230  res += gap_between_components
2231 
2232  # plot protein boundaries
2233  xticks = []
2234  xlabels = []
2235  for n, prot in enumerate(prot_listx):
2236  res = resoffsetx[prot]
2237  end = resendx[prot]
2238  for proty in prot_listy:
2239  resy = resoffsety[proty]
2240  endy = resendy[proty]
2241  ax.plot([res, res], [resy, endy], linestyle='-',
2242  color='gray', lw=0.4)
2243  ax.plot([end, end], [resy, endy], linestyle='-',
2244  color='gray', lw=0.4)
2245  xticks.append((float(res) + float(end)) / 2)
2247  prot).get_extended_name())
2248 
2249  yticks = []
2250  ylabels = []
2251  for n, prot in enumerate(prot_listy):
2252  res = resoffsety[prot]
2253  end = resendy[prot]
2254  for protx in prot_listx:
2255  resx = resoffsetx[protx]
2256  endx = resendx[protx]
2257  ax.plot([resx, endx], [res, res], linestyle='-',
2258  color='gray', lw=0.4)
2259  ax.plot([resx, endx], [end, end], linestyle='-',
2260  color='gray', lw=0.4)
2261  yticks.append((float(res) + float(end)) / 2)
2263  prot).get_extended_name())
2264 
2265  # plot the contact map
2266 
2267  tmp_array = np.zeros((nresx, nresy))
2268  ret = {}
2269  for px in prot_listx:
2270  for py in prot_listy:
2271  resx = resoffsetx[px]
2272  lengx = resendx[px] - 1
2273  resy = resoffsety[py]
2274  lengy = resendy[py] - 1
2275  indexes_x = index_dict[px]
2276  minx = min(indexes_x)
2277  maxx = max(indexes_x)
2278  indexes_y = index_dict[py]
2279  miny = min(indexes_y)
2280  maxy = max(indexes_y)
2281  tmp_array[resx:lengx, resy:lengy] = \
2282  contact_freqs[minx:maxx, miny:maxy]
2283  ret[(px, py)] = np.argwhere(
2284  contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2285 
2286  ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2287  origin='lower', alpha=0.6, interpolation='nearest')
2288 
2289  ax.set_xticks(xticks)
2290  ax.set_xticklabels(xlabels, rotation=90)
2291  ax.set_yticks(yticks)
2292  ax.set_yticklabels(ylabels)
2293  plt.setp(ax.get_xticklabels(), fontsize=6)
2294  plt.setp(ax.get_yticklabels(), fontsize=6)
2295 
2296  # display and write to file
2297  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2298  [i.set_linewidth(2.0) for i in ax.spines.values()]
2299 
2300  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf",
2301  dpi=300, transparent="False")
2302  return ret
2303 
2304  def plot_rmsd_matrix(self, filename):
2305  self.compute_all_pairwise_rmsd()
2306  distance_matrix = np.zeros(
2307  (len(self.stath0), len(self.stath1)))
2308  for (n0, n1) in self.pairwise_rmsd:
2309  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2310 
2311  import matplotlib as mpl
2312  mpl.use('Agg')
2313  import matplotlib.pylab as pl
2314  from scipy.cluster import hierarchy as hrc
2315 
2316  fig = pl.figure(figsize=(10, 8))
2317  ax = fig.add_subplot(212)
2318  dendrogram = hrc.dendrogram(
2319  hrc.linkage(distance_matrix),
2320  color_threshold=7,
2321  no_labels=True)
2322  leaves_order = dendrogram['leaves']
2323  ax.set_xlabel('Model')
2324  ax.set_ylabel('RMSD [Angstroms]')
2325 
2326  ax2 = fig.add_subplot(221)
2327  cax = ax2.imshow(
2328  distance_matrix[leaves_order, :][:, leaves_order],
2329  interpolation='nearest')
2330  cb = fig.colorbar(cax)
2331  cb.set_label('RMSD [Angstroms]')
2332  ax2.set_xlabel('Model')
2333  ax2.set_ylabel('Model')
2334 
2335  pl.savefig(filename, dpi=300)
2336  pl.close(fig)
2337 
2338  ####################
2339  # Internal Functions
2340  ####################
2341 
2342  def update_clusters(self):
2343  """
2344  Update the cluster id numbers
2345  """
2346  for n, c in enumerate(self.clusters):
2347  c.cluster_id = n
2348 
2349  def get_molecule(self, hier, name, copy):
2350  s = IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2351  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2352 
2353  def update_seldicts(self):
2354  """
2355  Update the seldicts
2356  """
2357  self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2358  self.sel0_rmsd.get_selected_particles())
2359  self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2360  self.sel1_rmsd.get_selected_particles())
2361  for mol in self.seldict0:
2362  for sel in self.seldict0[mol]:
2363  self.issymmetricsel[sel] = False
2364  for mol in self.symmetric_molecules:
2365  self.symmetric_molecules[mol] = len(self.seldict0[mol])
2366  for sel in self.seldict0[mol]:
2367  self.issymmetricsel[sel] = True
2368 
2369  def align(self):
2371  self.sel1_alignment, self.sel0_alignment)
2372 
2373  for rb in self.rbs1:
2374  IMP.core.transform(rb, tr)
2375 
2376  for bead in self.beads1:
2377  try:
2378  IMP.core.transform(IMP.core.XYZ(bead), tr)
2379  except: # noqa: E722
2380  continue
2381 
2382  self.model.update()
2383 
2384  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2385  '''
2386  initial filling of the clusters.
2387  '''
2388  n0 = idxs.pop()
2389  print("clustering model "+str(n0))
2390  d0 = self.stath0[n0]
2391  c = IMP.pmi.output.Cluster(len(self.clusters))
2392  print("creating cluster index "+str(len(self.clusters)))
2393  self.clusters.append(c)
2394  c.add_member(n0, d0)
2395  clustered = set([n0])
2396  for n1 in idxs:
2397  print("--- trying to add model " + str(n1) + " to cluster "
2398  + str(len(self.clusters)))
2399  d1 = self.stath1[n1]
2400  if self.alignment:
2401  self.align()
2402  rmsd, _ = self.rmsd(metric=metric)
2403  if rmsd < rmsd_cutoff:
2404  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2405  c.add_member(n1, d1)
2406  clustered.add(n1)
2407  else:
2408  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2409  idxs -= clustered
2410 
2411  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2412  """
2413  merge the clusters that have close members
2414  @param rmsd_cutoff cutoff distance in Angstorms
2415  """
2416  # before merging, clusters are spheres of radius rmsd_cutoff
2417  # centered on the 1st element
2418  # here we only try to merge clusters whose centers are closer
2419  # than 2*rmsd_cutoff
2420  to_merge = []
2421  print("merging...")
2422  for c0, c1 in filter(lambda x: len(x[0].members) > 1,
2423  itertools.combinations(self.clusters, 2)):
2424  n0, n1 = [c.members[0] for c in (c0, c1)]
2425  _ = self.stath0[n0]
2426  _ = self.stath1[n1]
2427  rmsd, _ = self.rmsd()
2428  if (rmsd < 2*rmsd_cutoff and
2429  self.have_close_members(c0, c1, rmsd_cutoff, metric)):
2430  to_merge.append((c0, c1))
2431 
2432  for c0, c in reversed(to_merge):
2433  self.merge(c0, c)
2434 
2435  # keep only full clusters
2436  self.clusters = [c for c in
2437  filter(lambda x: len(x.members) > 0, self.clusters)]
2438 
2439  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2440  '''
2441  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2442  '''
2443  print("check close members for clusters " + str(c0.cluster_id) +
2444  " and " + str(c1.cluster_id))
2445  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2446  _ = self.stath0[n0]
2447  _ = self.stath1[n1]
2448  rmsd, _ = self.rmsd(metric=metric)
2449  if rmsd < rmsd_cutoff:
2450  return True
2451 
2452  return False
2453 
2454  def merge(self, c0, c1):
2455  '''
2456  merge two clusters
2457  '''
2458  c0 += c1
2459  c1.members = []
2460  c1.data = {}
2461 
2462  def rmsd_helper(self, sels0, sels1, metric):
2463  '''
2464  a function that returns the permutation best_sel of sels0 that
2465  minimizes metric
2466  '''
2467  best_rmsd2 = float('inf')
2468  best_sel = None
2469  if self.issymmetricsel[sels0[0]]:
2470  # this cases happens when symmetries were defined
2471  N = len(sels0)
2472  for offset in range(N):
2473  sels = [sels0[(offset+i) % N] for i in range(N)]
2474  sel0 = sels[0]
2475  sel1 = sels1[0]
2476  r = metric(sel0, sel1)
2477  rmsd2 = r*r*N
2478  if rmsd2 < best_rmsd2:
2479  best_rmsd2 = rmsd2
2480  best_sel = sels
2481  else:
2482  for sels in itertools.permutations(sels0):
2483  rmsd2 = 0.0
2484  for sel0, sel1 in itertools.takewhile(
2485  lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2486  r = metric(sel0, sel1)
2487  rmsd2 += r*r
2488  if rmsd2 < best_rmsd2:
2489  best_rmsd2 = rmsd2
2490  best_sel = sels
2491  return best_sel, best_rmsd2
2492 
2493  def compute_all_pairwise_rmsd(self):
2494  for d0 in self.stath0:
2495  for d1 in self.stath1:
2496  rmsd, _ = self.rmsd()
2497 
2498  def rmsd(self, metric=IMP.atom.get_rmsd):
2499  '''
2500  Computes the RMSD. Resolves ambiguous pairs assignments
2501  '''
2502  # here we memoize the rmsd and molecular assignment so that it's
2503  # not done multiple times
2504  n0 = self.stath0.current_index
2505  n1 = self.stath1.current_index
2506  if ((n0, n1) in self.pairwise_rmsd) \
2507  and ((n0, n1) in self.pairwise_molecular_assignment):
2508  return (self.pairwise_rmsd[(n0, n1)],
2509  self.pairwise_molecular_assignment[(n0, n1)])
2510 
2511  if self.alignment:
2512  self.align()
2513  # if it's not yet memoized
2514  total_rmsd = 0.0
2515  total_N = 0
2516  # this is a dictionary which keys are the molecule names, and values
2517  # are the list of IMP.atom.Selection for all molecules that share
2518  # the molecule name
2519  molecular_assignment = {}
2520  for molname, sels0 in self.seldict0.items():
2521  sels_best_order, best_rmsd2 = \
2522  self.rmsd_helper(sels0, self.seldict1[molname], metric)
2523 
2524  Ncoords = len(sels_best_order[0].get_selected_particles())
2525  Ncopies = len(self.seldict1[molname])
2526  total_rmsd += Ncoords*best_rmsd2
2527  total_N += Ncoords*Ncopies
2528 
2529  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2530  p0 = sel0.get_selected_particles()[0]
2531  p1 = sel1.get_selected_particles()[0]
2532  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2533  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2534  c0 = IMP.atom.Copy(m0).get_copy_index()
2535  c1 = IMP.atom.Copy(m1).get_copy_index()
2536  molecular_assignment[(molname, c0)] = (molname, c1)
2537 
2538  total_rmsd = math.sqrt(total_rmsd/total_N)
2539 
2540  self.pairwise_rmsd[(n0, n1)] = total_rmsd
2541  self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2542  self.pairwise_rmsd[(n1, n0)] = total_rmsd
2543  self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2544  return total_rmsd, molecular_assignment
2545 
2546  def set_reference(self, reference, cluster):
2547  """
2548  Fix the reference structure for structural alignment, rmsd and
2549  chain assignment
2550 
2551  @param reference can be either "Absolute" (cluster center of the
2552  first cluster) or Relative (cluster center of the current
2553  cluster)
2554  """
2555  if reference == "Absolute":
2556  _ = self.stath0[0]
2557  elif reference == "Relative":
2558  if cluster.center_index:
2559  n0 = cluster.center_index
2560  else:
2561  n0 = cluster.members[0]
2562  _ = self.stath0[n0]
2563 
2565  """
2566  compute the molecular assignments between multiple copies
2567  of the same sequence. It changes the Copy index of Molecules
2568  """
2569  _ = self.stath1[n1]
2570  _, molecular_assignment = self.rmsd()
2571  for (m0, c0), (m1, c1) in molecular_assignment.items():
2572  mol0 = self.molcopydict0[m0][c0]
2573  mol1 = self.molcopydict1[m1][c1]
2574  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2575  p1 = IMP.atom.Copy(mol1).get_particle()
2576  p1.set_value(cik0, c0)
2577 
2579  """
2580  Undo the Copy index assignment
2581  """
2582  _ = self.stath1[n1]
2583  _, molecular_assignment = self.rmsd()
2584  for (m0, c0), (m1, c1) in molecular_assignment.items():
2585  mol0 = self.molcopydict0[m0][c0]
2586  mol1 = self.molcopydict1[m1][c1]
2587  cik0 = IMP.atom.Copy(mol0).get_copy_index_key()
2588  p1 = IMP.atom.Copy(mol1).get_particle()
2589  p1.set_value(cik0, c1)
2590 
2591  ####################
2592  # Container Functions
2593  ####################
2594 
2595  def __repr__(self):
2596  s = "AnalysisReplicaExchange\n"
2597  s += "---- number of clusters %s \n" % str(len(self.clusters))
2598  s += "---- number of models %s \n" % str(len(self.stath0))
2599  return s
2600 
2601  def __getitem__(self, int_slice_adaptor):
2602  if isinstance(int_slice_adaptor, int):
2603  return self.clusters[int_slice_adaptor]
2604  elif isinstance(int_slice_adaptor, slice):
2605  return self.__iter__(int_slice_adaptor)
2606  else:
2607  raise TypeError("Unknown Type")
2608 
2609  def __len__(self):
2610  return len(self.clusters)
2611 
2612  def __iter__(self, slice_key=None):
2613  if slice_key is None:
2614  for i in range(len(self)):
2615  yield self[i]
2616  else:
2617  for i in range(len(self))[slice_key]:
2618  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2498
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: macros.py:2546
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1827
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1078
def precision
Compute the precision of a cluster.
Definition: macros.py:1928
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:941
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:112
A container for models organized into clusters.
Definition: output.py:1510
Sample using molecular dynamics.
Definition: samplers.py:327
def aggregate
initial filling of the clusters.
Definition: macros.py:2384
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634
A macro to help setup and run replica exchange.
Definition: macros.py:70
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1893
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1977
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1175
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1684
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1765
def __init__
Construction of the Class.
Definition: macros.py:1639
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:846
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1782
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2578
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1700
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2462
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1868
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1085
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: macros.py:2564
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1628
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:635
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2411
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:994
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1850
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1005
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def __init__
Constructor.
Definition: macros.py:142
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def update_seldicts
Update the seldicts.
Definition: macros.py:2353
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2342
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:555
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1729
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:203
Collect timing information.
Definition: tools.py:127
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1693
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9522
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:47
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2454
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:688
The general base class for IMP exceptions.
Definition: exception.h:48
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1257
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1775
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:857
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1957
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:664
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1716
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1815
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2439
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1151
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1790
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:512
Sample using replica exchange.
Definition: samplers.py:432
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27