IMP logo
IMP Reference Guide  2.12.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.rmf
13 import IMP.isd
14 import IMP.pmi.dof
15 import RMF
16 import os
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import string
22 import itertools
23 import warnings
24 import math
25 import warnings
26 
27 class _RMFRestraints(object):
28  """All restraints that are written out to the RMF file"""
29  def __init__(self, model, user_restraints):
30  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
31  self._user_restraints = user_restraints if user_restraints else []
32 
33  def __len__(self):
34  return (len(self._user_restraints)
35  + self._rmf_rs.get_number_of_restraints())
36 
37  def __bool__(self):
38  return len(self) > 0
39  __nonzero__ = __bool__ # Python 2 compatibility
40 
41  def __getitem__(self, i):
42  class FakePMIWrapper(object):
43  def __init__(self, r):
44  self.r = IMP.RestraintSet.get_from(r)
45  get_restraint = lambda self: self.r
46  lenuser = len(self._user_restraints)
47  if 0 <= i < lenuser:
48  return self._user_restraints[i]
49  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
50  r = self._rmf_rs.get_restraint(i - lenuser)
51  return FakePMIWrapper(r)
52  else:
53  raise IndexError("Out of range")
54 
55 
56 class ReplicaExchange0(object):
57  """A macro to help setup and run replica exchange.
58  Supports Monte Carlo and molecular dynamics.
59  Produces trajectory RMF files, best PDB structures,
60  and output stat files.
61  """
62  def __init__(self, model,
63  representation=None,
64  root_hier=None,
65  sample_objects=None, # DEPRECATED
66  monte_carlo_sample_objects=None,
67  molecular_dynamics_sample_objects=None,
68  output_objects=[],
69  rmf_output_objects=None,
70  crosslink_restraints=None,
71  monte_carlo_temperature=1.0,
72  simulated_annealing=False,
73  simulated_annealing_minimum_temperature=1.0,
74  simulated_annealing_maximum_temperature=2.5,
75  simulated_annealing_minimum_temperature_nframes=100,
76  simulated_annealing_maximum_temperature_nframes=100,
77  replica_exchange_minimum_temperature=1.0,
78  replica_exchange_maximum_temperature=2.5,
79  replica_exchange_swap=True,
80  num_sample_rounds=1,
81  number_of_best_scoring_models=500,
82  monte_carlo_steps=10,
83  self_adaptive=False,
84  molecular_dynamics_steps=10,
85  molecular_dynamics_max_time_step=1.0,
86  number_of_frames=1000,
87  save_coordinates_mode="lowest_temperature",
88  nframes_write_coordinates=1,
89  write_initial_rmf=True,
90  initial_rmf_name_suffix="initial",
91  stat_file_name_suffix="stat",
92  best_pdb_name_suffix="model",
93  do_clean_first=True,
94  do_create_directories=True,
95  global_output_directory="./",
96  rmf_dir="rmfs/",
97  best_pdb_dir="pdbs/",
98  replica_stat_file_suffix="stat_replica",
99  em_object_for_rmf=None,
100  atomistic=False,
101  replica_exchange_object=None,
102  test_mode=False):
103  """Constructor.
104  @param model The IMP model
105  @param representation PMI.representation.Representation object
106  (or list of them, for multi-state modeling)
107  @param root_hier Instead of passing Representation, pass the System hierarchy
108  @param monte_carlo_sample_objects Objects for MC sampling; a list of
109  structural components (generally the representation) that will
110  be moved and restraints with parameters that need to
111  be sampled.
112  For PMI2: just pass flat list of movers
113  @param molecular_dynamics_sample_objects Objects for MD sampling
114  For PMI2: just pass flat list of particles
115  @param output_objects A list of structural objects and restraints
116  that will be included in output (ie, statistics "stat" files). Any object
117  that provides a get_output() method can be used here. If None is passed
118  the macro will not write stat files.
119  @param rmf_output_objects A list of structural objects and restraints
120  that will be included in rmf. Any object
121  that provides a get_output() method can be used here.
122  @param crosslink_restraints List of cross-link restraints that will
123  be included in output RMF files (for visualization).
124  @param monte_carlo_temperature MC temp (may need to be optimized
125  based on post-sampling analysis)
126  @param simulated_annealing If True, perform simulated annealing
127  @param simulated_annealing_minimum_temperature Should generally be
128  the same as monte_carlo_temperature.
129  @param simulated_annealing_minimum_temperature_nframes Number of
130  frames to compute at minimum temperature.
131  @param simulated_annealing_maximum_temperature_nframes Number of
132  frames to compute at
133  temps > simulated_annealing_maximum_temperature.
134  @param replica_exchange_minimum_temperature Low temp for REX; should
135  generally be the same as monte_carlo_temperature.
136  @param replica_exchange_maximum_temperature High temp for REX
137  @param replica_exchange_swap Boolean, enable disable temperature
138  swap (Default=True)
139  @param num_sample_rounds Number of rounds of MC/MD per cycle
140  @param number_of_best_scoring_models Number of top-scoring PDB models
141  to keep around for analysis
142  @param monte_carlo_steps Number of MC steps per round
143  @param self_adaptive self adaptive scheme for monte carlo movers
144  @param molecular_dynamics_steps Number of MD steps per round
145  @param molecular_dynamics_max_time_step Max time step for MD
146  @param number_of_frames Number of REX frames to run
147  @param save_coordinates_mode string: how to save coordinates.
148  "lowest_temperature" (default) only the lowest temperatures is saved
149  "25th_score" all replicas whose score is below the 25th percentile
150  "50th_score" all replicas whose score is below the 50th percentile
151  "75th_score" all replicas whose score is below the 75th percentile
152  @param nframes_write_coordinates How often to write the coordinates
153  of a frame
154  @param write_initial_rmf Write the initial configuration
155  @param global_output_directory Folder that will be created to house
156  output.
157  @param test_mode Set to True to avoid writing any files, just test one frame.
158  """
159  self.model = model
160  self.vars = {}
161  self.pmi2 = False
162 
163  ### add check hierarchy is multistate
164  self.output_objects = output_objects
165  self.rmf_output_objects=rmf_output_objects
166  self.representation = representation
167  if representation:
168  if isinstance(representation, list):
169  self.is_multi_state = True
170  self.root_hiers = [r.prot for r in representation]
171  self.vars["number_of_states"] = len(representation)
172  else:
173  self.is_multi_state = False
174  self.root_hier = representation.prot
175  self.vars["number_of_states"] = 1
176  elif root_hier and isinstance(root_hier, IMP.atom.Hierarchy) \
177  and root_hier.get_name() == 'System':
178  self.pmi2 = True
179  if self.output_objects is not None:
180  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
181  if self.rmf_output_objects is not None:
182  self.rmf_output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
183  self.root_hier = root_hier
184  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
185  self.vars["number_of_states"] = len(states)
186  if len(states)>1:
187  self.root_hiers = states
188  self.is_multi_state = True
189  else:
190  self.root_hier = root_hier
191  self.is_multi_state = False
192  else:
193  raise Exception("Must provide representation or System hierarchy (root_hier)")
194 
195  self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
196  self.em_object_for_rmf = em_object_for_rmf
197  self.monte_carlo_sample_objects = monte_carlo_sample_objects
198  self.vars["self_adaptive"]=self_adaptive
199  if sample_objects is not None:
200  self.monte_carlo_sample_objects+=sample_objects
201  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
202  self.replica_exchange_object = replica_exchange_object
203  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
204  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
205  self.vars[
206  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
207  self.vars[
208  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
209  self.vars["replica_exchange_swap"] = replica_exchange_swap
210  self.vars["simulated_annealing"]=\
211  simulated_annealing
212  self.vars["simulated_annealing_minimum_temperature"]=\
213  simulated_annealing_minimum_temperature
214  self.vars["simulated_annealing_maximum_temperature"]=\
215  simulated_annealing_maximum_temperature
216  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
217  simulated_annealing_minimum_temperature_nframes
218  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
219  simulated_annealing_maximum_temperature_nframes
220 
221  self.vars["num_sample_rounds"] = num_sample_rounds
222  self.vars[
223  "number_of_best_scoring_models"] = number_of_best_scoring_models
224  self.vars["monte_carlo_steps"] = monte_carlo_steps
225  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
226  self.vars["number_of_frames"] = number_of_frames
227  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
228  raise Exception("save_coordinates_mode has unrecognized value")
229  else:
230  self.vars["save_coordinates_mode"] = save_coordinates_mode
231  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
232  self.vars["write_initial_rmf"] = write_initial_rmf
233  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
234  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
235  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
236  self.vars["do_clean_first"] = do_clean_first
237  self.vars["do_create_directories"] = do_create_directories
238  self.vars["global_output_directory"] = global_output_directory
239  self.vars["rmf_dir"] = rmf_dir
240  self.vars["best_pdb_dir"] = best_pdb_dir
241  self.vars["atomistic"] = atomistic
242  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
243  self.vars["geometries"] = None
244  self.test_mode = test_mode
245 
246  def add_geometries(self, geometries):
247  if self.vars["geometries"] is None:
248  self.vars["geometries"] = list(geometries)
249  else:
250  self.vars["geometries"].extend(geometries)
251 
252  def show_info(self):
253  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
254  print("--- it stores the best scoring pdb models in pdbs/")
255  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
256  print("--- variables:")
257  keys = list(self.vars.keys())
258  keys.sort()
259  for v in keys:
260  print("------", v.ljust(30), self.vars[v])
261 
262  def get_replica_exchange_object(self):
263  return self.replica_exchange_object
264 
265  def _add_provenance(self, sampler_md, sampler_mc):
266  """Record details about the sampling in the IMP Hierarchies"""
267  if not self.is_multi_state or self.pmi2:
268  output_hierarchies = [self.root_hier]
269  else:
270  output_hierarchies = self.root_hiers
271 
272  iterations = 0
273  if sampler_md:
274  method = "Molecular Dynamics"
275  iterations += self.vars["molecular_dynamics_steps"]
276  if sampler_mc:
277  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
278  iterations += self.vars["monte_carlo_steps"]
279  # If no sampling is actually done, no provenance to write
280  if iterations == 0 or self.vars["number_of_frames"] == 0:
281  return
282  iterations *= self.vars["num_sample_rounds"]
283 
284  for h in output_hierarchies:
285  pi = self.model.add_particle("sampling")
287  self.model, pi, method, self.vars["number_of_frames"],
288  iterations)
289  p.set_number_of_replicas(
290  self.replica_exchange_object.get_number_of_replicas())
291  IMP.pmi.tools._add_pmi_provenance(h)
292  IMP.core.add_provenance(self.model, h, p)
293 
294  def execute_macro(self):
295  temp_index_factor = 100000.0
296  samplers=[]
297  sampler_mc=None
298  sampler_md=None
299  if self.monte_carlo_sample_objects is not None:
300  print("Setting up MonteCarlo")
301  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
302  self.monte_carlo_sample_objects,
303  self.vars["monte_carlo_temperature"])
304  if self.vars["simulated_annealing"]:
305  tmin=self.vars["simulated_annealing_minimum_temperature"]
306  tmax=self.vars["simulated_annealing_maximum_temperature"]
307  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
308  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
309  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
310  if self.vars["self_adaptive"]:
311  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
312  if self.output_objects is not None:
313  self.output_objects.append(sampler_mc)
314  if self.rmf_output_objects is not None:
315  self.rmf_output_objects.append(sampler_mc)
316  samplers.append(sampler_mc)
317 
318 
319  if self.molecular_dynamics_sample_objects is not None:
320  print("Setting up MolecularDynamics")
321  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
322  self.molecular_dynamics_sample_objects,
323  self.vars["monte_carlo_temperature"],
324  maximum_time_step=self.molecular_dynamics_max_time_step)
325  if self.vars["simulated_annealing"]:
326  tmin=self.vars["simulated_annealing_minimum_temperature"]
327  tmax=self.vars["simulated_annealing_maximum_temperature"]
328  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
329  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
330  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
331  if self.output_objects is not None:
332  self.output_objects.append(sampler_md)
333  if self.rmf_output_objects is not None:
334  self.rmf_output_objects.append(sampler_md)
335  samplers.append(sampler_md)
336 # -------------------------------------------------------------------------
337 
338  print("Setting up ReplicaExchange")
339  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
340  self.vars[
341  "replica_exchange_minimum_temperature"],
342  self.vars[
343  "replica_exchange_maximum_temperature"],
344  samplers,
345  replica_exchange_object=self.replica_exchange_object)
346  self.replica_exchange_object = rex.rem
347 
348  myindex = rex.get_my_index()
349  if self.output_objects is not None:
350  self.output_objects.append(rex)
351  if self.rmf_output_objects is not None:
352  self.rmf_output_objects.append(rex)
353  # must reset the minimum temperature due to the
354  # different binary length of rem.get_my_parameter double and python
355  # float
356  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
357 
358 # -------------------------------------------------------------------------
359 
360  globaldir = self.vars["global_output_directory"] + "/"
361  rmf_dir = globaldir + self.vars["rmf_dir"]
362  pdb_dir = globaldir + self.vars["best_pdb_dir"]
363 
364  if not self.test_mode:
365  if self.vars["do_clean_first"]:
366  pass
367 
368  if self.vars["do_create_directories"]:
369 
370  try:
371  os.makedirs(globaldir)
372  except:
373  pass
374  try:
375  os.makedirs(rmf_dir)
376  except:
377  pass
378 
379  if not self.is_multi_state:
380  try:
381  os.makedirs(pdb_dir)
382  except:
383  pass
384  else:
385  for n in range(self.vars["number_of_states"]):
386  try:
387  os.makedirs(pdb_dir + "/" + str(n))
388  except:
389  pass
390 
391 # -------------------------------------------------------------------------
392 
394  if self.output_objects is not None:
395  self.output_objects.append(sw)
396  if self.rmf_output_objects is not None:
397  self.rmf_output_objects.append(sw)
398 
399  print("Setting up stat file")
400  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
401  low_temp_stat_file = globaldir + \
402  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
403 
404  # Ensure model is updated before saving init files
405  if not self.test_mode:
406  self.model.update()
407 
408  if not self.test_mode:
409  if self.output_objects is not None:
410  output.init_stat2(low_temp_stat_file,
411  self.output_objects,
412  extralabels=["rmf_file", "rmf_frame_index"])
413  else:
414  print("Stat file writing is disabled")
415 
416  if self.rmf_output_objects is not None:
417  print("Stat info being written in the rmf file")
418 
419  print("Setting up replica stat file")
420  replica_stat_file = globaldir + \
421  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
422  if not self.test_mode:
423  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
424 
425  if not self.test_mode:
426  print("Setting up best pdb files")
427  if not self.is_multi_state:
428  if self.vars["number_of_best_scoring_models"] > 0:
429  output.init_pdb_best_scoring(pdb_dir + "/" +
430  self.vars["best_pdb_name_suffix"],
431  self.root_hier,
432  self.vars[
433  "number_of_best_scoring_models"],
434  replica_exchange=True)
435  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
436  self.vars["best_pdb_name_suffix"]+".0.pdb")
437  else:
438  if self.vars["number_of_best_scoring_models"] > 0:
439  for n in range(self.vars["number_of_states"]):
440  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
441  self.vars["best_pdb_name_suffix"],
442  self.root_hiers[n],
443  self.vars[
444  "number_of_best_scoring_models"],
445  replica_exchange=True)
446  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
447  self.vars["best_pdb_name_suffix"]+".0.pdb")
448 # ---------------------------------------------
449 
450  if self.em_object_for_rmf is not None:
451  if not self.is_multi_state or self.pmi2:
452  output_hierarchies = [
453  self.root_hier,
454  self.em_object_for_rmf.get_density_as_hierarchy(
455  )]
456  else:
457  output_hierarchies = self.root_hiers
458  output_hierarchies.append(
459  self.em_object_for_rmf.get_density_as_hierarchy())
460  else:
461  if not self.is_multi_state or self.pmi2:
462  output_hierarchies = [self.root_hier]
463  else:
464  output_hierarchies = self.root_hiers
465 
466 #----------------------------------------------
467  if not self.test_mode:
468  print("Setting up and writing initial rmf coordinate file")
469  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
470  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
471  output_hierarchies,listofobjects=self.rmf_output_objects)
472  if self._rmf_restraints:
473  output.add_restraints_to_rmf(
474  init_suffix + "." + str(myindex) + ".rmf3",
475  self._rmf_restraints)
476  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
477  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
478 
479 #----------------------------------------------
480 
481  if not self.test_mode:
482  mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
483 
484 #----------------------------------------------
485 
486  self._add_provenance(sampler_md, sampler_mc)
487 
488  if not self.test_mode:
489  print("Setting up production rmf files")
490  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
491  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"],
492  listofobjects = self.rmf_output_objects)
493 
494  if self._rmf_restraints:
495  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
496 
497  ntimes_at_low_temp = 0
498 
499  if myindex == 0:
500  self.show_info()
501  self.replica_exchange_object.set_was_used(True)
502  nframes = self.vars["number_of_frames"]
503  if self.test_mode:
504  nframes = 1
505  for i in range(nframes):
506  if self.test_mode:
507  score = 0.
508  else:
509  for nr in range(self.vars["num_sample_rounds"]):
510  if sampler_md is not None:
511  sampler_md.optimize(
512  self.vars["molecular_dynamics_steps"])
513  if sampler_mc is not None:
514  sampler_mc.optimize(self.vars["monte_carlo_steps"])
516  self.model).evaluate(False)
517  mpivs.set_value("score",score)
518  output.set_output_entry("score", score)
519 
520 
521 
522  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
523 
524  if self.vars["save_coordinates_mode"] == "lowest_temperature":
525  save_frame=(min_temp_index == my_temp_index)
526  elif self.vars["save_coordinates_mode"] == "25th_score":
527  score_perc=mpivs.get_percentile("score")
528  save_frame=(score_perc*100.0<=25.0)
529  elif self.vars["save_coordinates_mode"] == "50th_score":
530  score_perc=mpivs.get_percentile("score")
531  save_frame=(score_perc*100.0<=50.0)
532  elif self.vars["save_coordinates_mode"] == "75th_score":
533  score_perc=mpivs.get_percentile("score")
534  save_frame=(score_perc*100.0<=75.0)
535 
536  # Ensure model is updated before saving output files
537  if save_frame or not self.test_mode:
538  self.model.update()
539 
540  if save_frame:
541  print("--- frame %s score %s " % (str(i), str(score)))
542 
543  if not self.test_mode:
544  if i % self.vars["nframes_write_coordinates"]==0:
545  print('--- writing coordinates')
546  if self.vars["number_of_best_scoring_models"] > 0:
547  output.write_pdb_best_scoring(score)
548  output.write_rmf(rmfname)
549  output.set_output_entry("rmf_file", rmfname)
550  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
551  else:
552  output.set_output_entry("rmf_file", rmfname)
553  output.set_output_entry("rmf_frame_index", '-1')
554  if self.output_objects is not None:
555  output.write_stat2(low_temp_stat_file)
556  ntimes_at_low_temp += 1
557 
558  if not self.test_mode:
559  output.write_stat2(replica_stat_file)
560  if self.vars["replica_exchange_swap"]:
561  rex.swap_temp(i, score)
562  for p, state in IMP.pmi.tools._all_protocol_outputs(
563  [self.representation],
564  self.root_hier if self.pmi2 else None):
565  p.add_replica_exchange(state, self)
566 
567  if not self.test_mode:
568  print("closing production rmf files")
569  output.close_rmf(rmfname)
570 
571 
572 
573 # ----------------------------------------------------------------------
574 class BuildSystem(object):
575  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
576  Easily create multi-state systems by calling this macro
577  repeatedly with different TopologyReader objects!
578  A useful function is get_molecules() which returns the PMI Molecules grouped by state
579  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
580  Quick multi-state system:
581  @code{.python}
582  model = IMP.Model()
583  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
584  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
585  bs = IMP.pmi.macros.BuildSystem(model)
586  bs.add_state(reader1)
587  bs.add_state(reader2)
588  bs.execute_macro() # build everything including degrees of freedom
589  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
590  ### now you have a two state system, you add restraints etc
591  @endcode
592  @note The "domain name" entry of the topology reader is not used.
593  All molecules are set up by the component name, but split into rigid bodies
594  as requested.
595  """
596  def __init__(self,
597  model,
598  sequence_connectivity_scale=4.0,
599  force_create_gmm_files=False,
600  resolutions=[1,10]):
601  """Constructor
602  @param model An IMP Model
603  @param sequence_connectivity_scale For scaling the connectivity restraint
604  @param force_create_gmm_files If True, will sample and create GMMs
605  no matter what. If False, will only sample if the
606  files don't exist. If number of Gaussians is zero, won't
607  do anything.
608  @param resolutions The resolutions to build for structured regions
609  """
610  self.model = model
611  self.system = IMP.pmi.topology.System(self.model)
612  self._readers = [] # the TopologyReaders (one per state)
613  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
614  self._domains = [] # key = domain unique name, value = Component
615  self.force_create_gmm_files = force_create_gmm_files
616  self.resolutions = resolutions
617 
618  def add_state(self,
619  reader,
620  keep_chain_id=False, fasta_name_map=None):
621  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
622  When you are done adding states, call execute_macro()
623  @param reader The TopologyReader object
624  @param fasta_name_map dictionary for converting protein names found in the fasta file
625  """
626  state = self.system.create_state()
627  self._readers.append(reader)
628  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
629  these_domains = {} # key is unique name, value is _Component
630  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
631  numchain = 0
632 
633  ### setup representation
634  # loop over molecules, copies, then domains
635  for molname in reader.get_molecules():
636  copies = reader.get_molecules()[molname].domains
637  for nc,copyname in enumerate(copies):
638  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
639  copy = copies[copyname]
640  # option to not rename chains
641  if keep_chain_id == True:
642  chain_id = copy[0].chain
643  else:
644  chain_id = chain_ids[numchain]
645  if nc==0:
646  is_nucleic=False
647  fasta_flag=copy[0].fasta_flag
648  if fasta_flag == "RNA" or fasta_flag == "DNA": is_nucleic=True
649  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
650  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
651  orig_mol = state.create_molecule(molname,
652  seq,
653  chain_id,is_nucleic)
654  mol = orig_mol
655  numchain+=1
656  else:
657  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
658  mol = orig_mol.create_copy(chain_id)
659  numchain+=1
660 
661  for domainnumber,domain in enumerate(copy):
662  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
663  # we build everything in the residue range, even if it
664  # extends beyond what's in the actual PDB file
665  these_domains[domain.get_unique_name()] = domain
666  if domain.residue_range==[] or domain.residue_range is None:
667  domain_res = mol.get_residues()
668  else:
669  start = domain.residue_range[0]+domain.pdb_offset
670  if domain.residue_range[1]=='END':
671  end = len(mol.sequence)
672  else:
673  end = domain.residue_range[1]+domain.pdb_offset
674  domain_res = mol.residue_range(start-1,end-1)
675  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
676  if domain.pdb_file=="BEADS":
677  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
678  mol.add_representation(
679  domain_res,
680  resolutions=[domain.bead_size],
681  setup_particles_as_densities=(
682  domain.em_residues_per_gaussian!=0),
683  color = domain.color)
684  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
685  elif domain.pdb_file=="IDEAL_HELIX":
686  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
687  mol.add_representation(
688  domain_res,
689  resolutions=self.resolutions,
690  ideal_helix=True,
691  density_residues_per_component=domain.em_residues_per_gaussian,
692  density_prefix=domain.density_prefix,
693  density_force_compute=self.force_create_gmm_files,
694  color = domain.color)
695  these_domain_res[domain.get_unique_name()] = (domain_res,set())
696  else:
697  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
698  domain_atomic = mol.add_structure(domain.pdb_file,
699  domain.chain,
700  domain.residue_range,
701  domain.pdb_offset,
702  soft_check=True)
703  domain_non_atomic = domain_res - domain_atomic
704  if not domain.em_residues_per_gaussian:
705  mol.add_representation(domain_atomic,
706  resolutions=self.resolutions,
707  color = domain.color)
708  if len(domain_non_atomic)>0:
709  mol.add_representation(domain_non_atomic,
710  resolutions=[domain.bead_size],
711  color = domain.color)
712  else:
713  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
714  mol.add_representation(
715  domain_atomic,
716  resolutions=self.resolutions,
717  density_residues_per_component=domain.em_residues_per_gaussian,
718  density_prefix=domain.density_prefix,
719  density_force_compute=self.force_create_gmm_files,
720  color = domain.color)
721  if len(domain_non_atomic)>0:
722  mol.add_representation(domain_non_atomic,
723  resolutions=[domain.bead_size],
724  setup_particles_as_densities=True,
725  color = domain.color)
726  these_domain_res[domain.get_unique_name()] = (
727  domain_atomic,domain_non_atomic)
728  self._domain_res.append(these_domain_res)
729  self._domains.append(these_domains)
730  print('BuildSystem.add_state: State',len(self.system.states),'added')
731 
732  def get_molecules(self):
733  """Return list of all molecules grouped by state.
734  For each state, it's a dictionary of Molecules where key is the molecule name
735  """
736  return [s.get_molecules() for s in self.system.get_states()]
737 
738  def get_molecule(self,molname,copy_index=0,state_index=0):
739  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
740 
741  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
742  """Builds representations and sets up degrees of freedom"""
743  print("BuildSystem.execute_macro: building representations")
744  self.root_hier = self.system.build()
745 
746  print("BuildSystem.execute_macro: setting up degrees of freedom")
747  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
748  for nstate,reader in enumerate(self._readers):
749  rbs = reader.get_rigid_bodies()
750  srbs = reader.get_super_rigid_bodies()
751  csrbs = reader.get_chains_of_super_rigid_bodies()
752 
753  # add rigid bodies
754  domains_in_rbs = set()
755  for rblist in rbs:
756  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
757  all_res = IMP.pmi.tools.OrderedSet()
758  bead_res = IMP.pmi.tools.OrderedSet()
759  for dname in rblist:
760  domain = self._domains[nstate][dname]
761  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
762  all_res|=self._domain_res[nstate][dname][0]
763  bead_res|=self._domain_res[nstate][dname][1]
764  domains_in_rbs.add(dname)
765  all_res|=bead_res
766  print("BuildSystem.execute_macro: -------- \
767 creating rigid body with max_trans %s \
768 max_rot %s non_rigid_max_trans %s" \
769  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
770  self.dof.create_rigid_body(all_res,
771  nonrigid_parts=bead_res,
772  max_trans=max_rb_trans,
773  max_rot=max_rb_rot,
774  nonrigid_max_trans=max_bead_trans)
775 
776  # if you have any domains not in an RB, set them as flexible beads
777  for dname, domain in self._domains[nstate].items():
778  if dname not in domains_in_rbs:
779  if domain.pdb_file != "BEADS":
780  warnings.warn(
781  "Making %s flexible. This may distort the "
782  "structure; consider making it rigid" % dname,
784  self.dof.create_flexible_beads(
785  self._domain_res[nstate][dname][1],
786  max_trans=max_bead_trans)
787 
788  # add super rigid bodies
789  for srblist in srbs:
790  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
791  all_res = IMP.pmi.tools.OrderedSet()
792  for dname in srblist:
793  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
794  all_res|=self._domain_res[nstate][dname][0]
795  all_res|=self._domain_res[nstate][dname][1]
796 
797  print("BuildSystem.execute_macro: -------- \
798 creating super rigid body with max_trans %s max_rot %s " \
799  % (str(max_srb_trans),str(max_srb_rot)))
800  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
801 
802  # add chains of super rigid bodies
803  for csrblist in csrbs:
804  all_res = IMP.pmi.tools.OrderedSet()
805  for dname in csrblist:
806  all_res|=self._domain_res[nstate][dname][0]
807  all_res|=self._domain_res[nstate][dname][1]
808  all_res = list(all_res)
809  all_res.sort(key=lambda r:r.get_index())
810  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
811  self.dof.create_main_chain_mover(all_res)
812  return self.root_hier,self.dof
813 
814 
815 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
816 class AnalysisReplicaExchange0(object):
817  """A macro for running all the basic operations of analysis.
818  Includes clustering, precision analysis, and making ensemble density maps.
819  A number of plots are also supported.
820  """
821  def __init__(self, model,
822  merge_directories=["./"],
823  stat_file_name_suffix="stat",
824  best_pdb_name_suffix="model",
825  do_clean_first=True,
826  do_create_directories=True,
827  global_output_directory="output/",
828  replica_stat_file_suffix="stat_replica",
829  global_analysis_result_directory="./analysis/",
830  test_mode=False):
831  """Constructor.
832  @param model The IMP model
833  @param stat_file_name_suffix
834  @param merge_directories The directories containing output files
835  @param best_pdb_name_suffix
836  @param do_clean_first
837  @param do_create_directories
838  @param global_output_directory Where everything is
839  @param replica_stat_file_suffix
840  @param global_analysis_result_directory
841  @param test_mode If True, nothing is changed on disk
842  """
843 
844  try:
845  from mpi4py import MPI
846  self.comm = MPI.COMM_WORLD
847  self.rank = self.comm.Get_rank()
848  self.number_of_processes = self.comm.size
849  except ImportError:
850  self.rank = 0
851  self.number_of_processes = 1
852 
853  self.test_mode = test_mode
854  self._protocol_output = []
855  self.cluster_obj = None
856  self.model = model
857  stat_dir = global_output_directory
858  self.stat_files = []
859  # it contains the position of the root directories
860  for rd in merge_directories:
861  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
862  if len(stat_files)==0:
863  warnings.warn("no stat files found in %s"
864  % os.path.join(rd, stat_dir),
866  self.stat_files += stat_files
867 
868  def add_protocol_output(self, p):
869  """Capture details of the modeling protocol.
870  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
871  """
872  # Assume last state is the one we're interested in
873  self._protocol_output.append((p, p._last_state))
874 
875  def get_modeling_trajectory(self,
876  score_key="SimplifiedModel_Total_Score_None",
877  rmf_file_key="rmf_file",
878  rmf_file_frame_key="rmf_frame_index",
879  outputdir="./",
880  get_every=1,
881  nframes_trajectory=10000):
882  """ Get a trajectory of the modeling run, for generating demonstrative movies
883  @param score_key The score for ranking models
884  @param rmf_file_key Key pointing to RMF filename
885  @param rmf_file_frame_key Key pointing to RMF frame number
886  @param outputdir The local output directory used in the run
887  @param get_every Extract every nth frame
888  @param nframes_trajectory Total number of frames of the trajectory
889  """
890  from operator import itemgetter
891  import math
892 
893  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
894  score_key,
895  rmf_file_key,
896  rmf_file_frame_key,
897  get_every)
898  rmf_file_list=trajectory_models[0]
899  rmf_file_frame_list=trajectory_models[1]
900  score_list=list(map(float, trajectory_models[2]))
901 
902  max_score=max(score_list)
903  min_score=min(score_list)
904 
905 
906  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
907  binned_scores=[None]*nframes_trajectory
908  binned_model_indexes=[-1]*nframes_trajectory
909 
910  for model_index,s in enumerate(score_list):
911  bins_score_diffs=[abs(s-b) for b in bins]
912  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
913  if binned_scores[bin_index]==None:
914  binned_scores[bin_index]=s
915  binned_model_indexes[bin_index]=model_index
916  else:
917  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
918  new_diff=abs(s-bins[bin_index])
919  if new_diff < old_diff:
920  binned_scores[bin_index]=s
921  binned_model_indexes[bin_index]=model_index
922 
923  print(binned_scores)
924  print(binned_model_indexes)
925 
926 
927  def _expand_ambiguity(self,prot,d):
928  """If using PMI2, expand the dictionary to include copies as ambiguous options
929  This also keeps the states separate.
930  """
931  newdict = {}
932  for key in d:
933  val = d[key]
934  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
935  newdict[key] = val
936  continue
937  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
938  if isinstance(val, tuple):
939  start = val[0]
940  stop = val[1]
941  name = val[2]
942  else:
943  start = 1
944  stop = -1
945  name = val
946  for nst in range(len(states)):
947  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
948  copies = sel.get_selected_particles(with_representation=False)
949  if len(copies)>1:
950  for nc in range(len(copies)):
951  if len(states)>1:
952  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
953  else:
954  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
955  else:
956  newdict[key] = val
957  return newdict
958 
959 
960  def clustering(self,
961  score_key="SimplifiedModel_Total_Score_None",
962  rmf_file_key="rmf_file",
963  rmf_file_frame_key="rmf_frame_index",
964  state_number=0,
965  prefiltervalue=None,
966  feature_keys=[],
967  outputdir="./",
968  alignment_components=None,
969  number_of_best_scoring_models=10,
970  rmsd_calculation_components=None,
971  distance_matrix_file='distances.mat',
972  load_distance_matrix_file=False,
973  skip_clustering=False,
974  number_of_clusters=1,
975  display_plot=False,
976  exit_after_display=True,
977  get_every=1,
978  first_and_last_frames=None,
979  density_custom_ranges=None,
980  write_pdb_with_centered_coordinates=False,
981  voxel_size=5.0):
982  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
983  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
984  Can pass None for copy or state to ignore that field.
985  If you don't pass a specific copy number
986  @param score_key The score for ranking models. Use "Total_Score"
987  instead of default for PMI2.
988  @param rmf_file_key Key pointing to RMF filename
989  @param rmf_file_frame_key Key pointing to RMF frame number
990  @param state_number State number to analyze
991  @param prefiltervalue Only include frames where the
992  score key is below this value
993  @param feature_keys Keywords for which you want to
994  calculate average, medians, etc.
995  If you pass "Keyname" it'll include everything that matches "*Keyname*"
996  @param outputdir The local output directory used in the run
997  @param alignment_components Dictionary with keys=groupname, values are tuples
998  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
999  @param number_of_best_scoring_models Num models to keep per run
1000  @param rmsd_calculation_components For calculating RMSD
1001  (same format as alignment_components)
1002  @param distance_matrix_file Where to store/read the distance matrix
1003  @param load_distance_matrix_file Try to load the distance matrix file
1004  @param skip_clustering Just extract the best scoring models
1005  and save the pdbs
1006  @param number_of_clusters Number of k-means clusters
1007  @param display_plot Display the distance matrix
1008  @param exit_after_display Exit after displaying distance matrix
1009  @param get_every Extract every nth frame
1010  @param first_and_last_frames A tuple with the first and last frames to be
1011  analyzed. Values are percentages!
1012  Default: get all frames
1013  @param density_custom_ranges For density calculation
1014  (same format as alignment_components)
1015  @param write_pdb_with_centered_coordinates
1016  @param voxel_size Used for the density output
1017  """
1018  # Track provenance information to be added to each output model
1019  prov = []
1020  self._outputdir = os.path.abspath(outputdir)
1021  self._number_of_clusters = number_of_clusters
1022  for p, state in self._protocol_output:
1023  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1024 
1025  if self.test_mode:
1026  return
1027 
1028  if self.rank==0:
1029  try:
1030  os.mkdir(outputdir)
1031  except:
1032  pass
1033 
1034  if not load_distance_matrix_file:
1035  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1036  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1037  self.stat_files,self.number_of_processes)[self.rank]
1038 
1039  # read ahead to check if you need the PMI2 score key instead
1040  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1041  orig_score_key = score_key
1042  if score_key not in po.get_keys():
1043  if 'Total_Score' in po.get_keys():
1044  score_key = 'Total_Score'
1045  warnings.warn(
1046  "Using 'Total_Score' instead of "
1047  "'SimplifiedModel_Total_Score_None' for the score key",
1049  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1050  if k in feature_keys:
1051  warnings.warn(
1052  "no need to pass " + k + " to feature_keys.",
1054  feature_keys.remove(k)
1055 
1056  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1057  score_key,
1058  feature_keys,
1059  rmf_file_key,
1060  rmf_file_frame_key,
1061  prefiltervalue,
1062  get_every, provenance=prov)
1063  rmf_file_list=best_models[0]
1064  rmf_file_frame_list=best_models[1]
1065  score_list=best_models[2]
1066  feature_keyword_list_dict=best_models[3]
1067 
1068 # ------------------------------------------------------------------------
1069 # collect all the files and scores
1070 # ------------------------------------------------------------------------
1071 
1072  if self.number_of_processes > 1:
1073  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1074  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1075  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1076  rmf_file_frame_list)
1077  for k in feature_keyword_list_dict:
1078  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1079  feature_keyword_list_dict[k])
1080 
1081  # sort by score and get the best scoring ones
1082  score_rmf_tuples = list(zip(score_list,
1083  rmf_file_list,
1084  rmf_file_frame_list,
1085  list(range(len(score_list)))))
1086 
1087  if density_custom_ranges:
1088  for k in density_custom_ranges:
1089  if not isinstance(density_custom_ranges[k], list):
1090  raise Exception("Density custom ranges: values must be lists of tuples")
1091 
1092  # keep subset of frames if requested
1093  if first_and_last_frames is not None:
1094  nframes = len(score_rmf_tuples)
1095  first_frame = int(first_and_last_frames[0] * nframes)
1096  last_frame = int(first_and_last_frames[1] * nframes)
1097  if last_frame > len(score_rmf_tuples):
1098  last_frame = -1
1099  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1100 
1101  # sort RMFs by the score_key in ascending order, and store the rank
1102  best_score_rmf_tuples = sorted(score_rmf_tuples,
1103  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1104  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1105  # Note in the provenance info that we only kept best-scoring models
1106  prov.append(IMP.pmi.io.FilterProvenance("Best scoring",
1107  0, number_of_best_scoring_models))
1108  # sort the feature scores in the same way
1109  best_score_feature_keyword_list_dict = defaultdict(list)
1110  for tpl in best_score_rmf_tuples:
1111  index = tpl[3]
1112  for f in feature_keyword_list_dict:
1113  best_score_feature_keyword_list_dict[f].append(
1114  feature_keyword_list_dict[f][index])
1115  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1116  best_score_rmf_tuples,
1117  self.number_of_processes)[self.rank]
1118 
1119  # expand the dictionaries to include ambiguous copies
1120  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1121  0,
1122  my_best_score_rmf_tuples[0][1])[0]
1123  if IMP.pmi.get_is_canonical(prot_ahead):
1124  if rmsd_calculation_components is not None:
1125  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1126  if tmp!=rmsd_calculation_components:
1127  print('Detected ambiguity, expand rmsd components to',tmp)
1128  rmsd_calculation_components = tmp
1129  if alignment_components is not None:
1130  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1131  if tmp!=alignment_components:
1132  print('Detected ambiguity, expand alignment components to',tmp)
1133  alignment_components = tmp
1134 
1135 #-------------------------------------------------------------
1136 # read the coordinates
1137 # ------------------------------------------------------------
1138  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1139  my_best_score_rmf_tuples[0],
1140  rmsd_calculation_components,
1141  state_number=state_number)
1142  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1143  my_best_score_rmf_tuples,
1144  alignment_components,
1145  rmsd_calculation_components,
1146  state_number=state_number)
1147 
1148  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1149  # NOR particles, because these object cannot be serialized. We need serialization
1150  # for the parallel computation based on mpi.
1151  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1152  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1153  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1154  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1155  all_rmf_file_names=got_coords[4] # RMF file per hit
1156 
1157 # ------------------------------------------------------------------------
1158 # optionally don't compute distance matrix or cluster, just write top files
1159 # ------------------------------------------------------------------------
1160  if skip_clustering:
1161  if density_custom_ranges:
1162  DensModule = IMP.pmi.analysis.GetModelDensity(
1163  density_custom_ranges,
1164  voxel=voxel_size)
1165 
1166  dircluster=os.path.join(outputdir,"all_models."+str(n))
1167  try:
1168  os.mkdir(outputdir)
1169  except:
1170  pass
1171  try:
1172  os.mkdir(dircluster)
1173  except:
1174  pass
1175  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1176  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1177  rmf_name=tpl[1]
1178  rmf_frame_number=tpl[2]
1179  tmp_dict={}
1180  index=tpl[4]
1181  for key in best_score_feature_keyword_list_dict:
1182  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1183 
1184  if cnt==0:
1185  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1186  self.model,
1187  rmf_frame_number,
1188  rmf_name)
1189  else:
1190  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1191  self.model,
1192  prots,
1193  rs,
1194  rmf_frame_number,
1195  rmf_name)
1196  if not linking_successful:
1197  continue
1198 
1199  if not prots:
1200  continue
1201 
1202  if IMP.pmi.get_is_canonical(prots[0]):
1203  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1204  prot = states[state_number]
1205  else:
1206  prot = prots[state_number]
1207 
1208  # get transformation aligning coordinates of requested tuples
1209  # to the first RMF file
1210  if cnt==0:
1211  coords_f1=alignment_coordinates[cnt]
1212  if cnt > 0:
1213  coords_f2=alignment_coordinates[cnt]
1214  if coords_f2:
1215  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1216  transformation = Ali.align()[1]
1217  else:
1219 
1220  rbs = set()
1221  for p in IMP.atom.get_leaves(prot):
1222  if not IMP.core.XYZR.get_is_setup(p):
1224  IMP.core.XYZR(p).set_radius(0.0001)
1225  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1226 
1228  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1229  rbs.add(rb)
1230  else:
1232  transformation)
1233  for rb in rbs:
1234  IMP.core.transform(rb,transformation)
1235 
1237  self.model.update()
1238  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1239  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1240  o.init_pdb(out_pdb_fn,prot)
1241  o.write_pdb(out_pdb_fn,
1242  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1243 
1244  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1245  tmp_dict["rmf_file_full_path"]=rmf_name
1246  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1247  tmp_dict["local_rmf_frame_number"]=0
1248 
1249  clusstat.write(str(tmp_dict)+"\n")
1250 
1251  if IMP.pmi.get_is_canonical(prot):
1252  # create a single-state System and write that
1254  h.set_name("System")
1255  h.add_child(prot)
1256  o.init_rmf(out_rmf_fn, [h], rs)
1257  else:
1258  o.init_rmf(out_rmf_fn, [prot],rs)
1259 
1260  o.write_rmf(out_rmf_fn)
1261  o.close_rmf(out_rmf_fn)
1262  # add the density
1263  if density_custom_ranges:
1264  DensModule.add_subunits_density(prot)
1265 
1266  if density_custom_ranges:
1267  DensModule.write_mrc(path=dircluster)
1268  del DensModule
1269  return
1270 
1271 
1272 
1273  # broadcast the coordinates
1274  if self.number_of_processes > 1:
1275  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1276  all_coordinates)
1277  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1278  all_rmf_file_names)
1279  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1280  rmf_file_name_index_dict)
1281  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1282  alignment_coordinates)
1283  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1284  rmsd_coordinates)
1285 
1286  if self.rank == 0:
1287  # save needed informations in external files
1288  self.save_objects(
1289  [best_score_feature_keyword_list_dict,
1290  rmf_file_name_index_dict],
1291  ".macro.pkl")
1292 
1293 # ------------------------------------------------------------------------
1294 # Calculate distance matrix and cluster
1295 # ------------------------------------------------------------------------
1296  print("setup clustering class")
1297  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1298 
1299  for n, model_coordinate_dict in enumerate(all_coordinates):
1300  template_coordinate_dict = {}
1301  # let's try to align
1302  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1303  # set the first model as template coordinates
1304  self.cluster_obj.set_template(alignment_coordinates[n])
1305  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1306  print("Global calculating the distance matrix")
1307 
1308  # calculate distance matrix, all against all
1309  self.cluster_obj.dist_matrix()
1310 
1311  # perform clustering and optionally display
1312  if self.rank == 0:
1313  self.cluster_obj.do_cluster(number_of_clusters)
1314  if display_plot:
1315  if self.rank == 0:
1316  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1317  if exit_after_display:
1318  exit()
1319  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1320 
1321 # ------------------------------------------------------------------------
1322 # Alteratively, load the distance matrix from file and cluster that
1323 # ------------------------------------------------------------------------
1324  else:
1325  if self.rank==0:
1326  print("setup clustering class")
1327  self.cluster_obj = IMP.pmi.analysis.Clustering()
1328  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1329  print("clustering with %s clusters" % str(number_of_clusters))
1330  self.cluster_obj.do_cluster(number_of_clusters)
1331  [best_score_feature_keyword_list_dict,
1332  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1333  if display_plot:
1334  if self.rank == 0:
1335  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1336  if exit_after_display:
1337  exit()
1338  if self.number_of_processes > 1:
1339  self.comm.Barrier()
1340 
1341 # ------------------------------------------------------------------------
1342 # now save all informations about the clusters
1343 # ------------------------------------------------------------------------
1344 
1345  if self.rank == 0:
1346  print(self.cluster_obj.get_cluster_labels())
1347  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1348  print("rank %s " % str(self.rank))
1349  print("cluster %s " % str(n))
1350  print("cluster label %s " % str(cl))
1351  print(self.cluster_obj.get_cluster_label_names(cl))
1352  cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1353  cluster_prov = prov + \
1354  [IMP.pmi.io.ClusterProvenance(cluster_size)]
1355 
1356  # first initialize the Density class if requested
1357  if density_custom_ranges:
1358  DensModule = IMP.pmi.analysis.GetModelDensity(
1359  density_custom_ranges,
1360  voxel=voxel_size)
1361 
1362  dircluster = outputdir + "/cluster." + str(n) + "/"
1363  try:
1364  os.mkdir(dircluster)
1365  except:
1366  pass
1367 
1368  rmsd_dict = {"AVERAGE_RMSD":
1369  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1370  clusstat = open(dircluster + "stat.out", "w")
1371  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1372  # extract the features
1373  tmp_dict = {}
1374  tmp_dict.update(rmsd_dict)
1375  index = rmf_file_name_index_dict[structure_name]
1376  for key in best_score_feature_keyword_list_dict:
1377  tmp_dict[
1378  key] = best_score_feature_keyword_list_dict[
1379  key][
1380  index]
1381 
1382  # get the rmf name and the frame number from the list of
1383  # frame names
1384  rmf_name = structure_name.split("|")[0]
1385  rmf_frame_number = int(structure_name.split("|")[1])
1386  clusstat.write(str(tmp_dict) + "\n")
1387 
1388  # extract frame (open or link to existing)
1389  if k==0:
1390  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1391  self.model,
1392  rmf_frame_number,
1393  rmf_name)
1394  else:
1395  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1396  self.model,
1397  prots,
1398  rs,
1399  rmf_frame_number,
1400  rmf_name)
1401  if not linking_successful:
1402  continue
1403  if not prots:
1404  continue
1405 
1406  if IMP.pmi.get_is_canonical(prots[0]):
1407  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1408  prot = states[state_number]
1409  else:
1410  prot = prots[state_number]
1411  if k==0:
1412  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1413 
1414  # transform clusters onto first
1415  if k > 0:
1416  model_index = self.cluster_obj.get_model_index_from_name(
1417  structure_name)
1418  transformation = self.cluster_obj.get_transformation_to_first_member(
1419  cl,
1420  model_index)
1421  rbs = set()
1422  for p in IMP.atom.get_leaves(prot):
1423  if not IMP.core.XYZR.get_is_setup(p):
1425  IMP.core.XYZR(p).set_radius(0.0001)
1426  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1427 
1429  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1430  rbs.add(rb)
1431  else:
1433  transformation)
1434  for rb in rbs:
1435  IMP.core.transform(rb,transformation)
1436 
1437  # add the density
1438  if density_custom_ranges:
1439  DensModule.add_subunits_density(prot)
1440 
1441  # pdb writing should be optimized!
1442  o = IMP.pmi.output.Output()
1443  self.model.update()
1444  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1445  o.write_pdb(dircluster + str(k) + ".pdb")
1446 
1447  if IMP.pmi.get_is_canonical(prot):
1448  # create a single-state System and write that
1450  h.set_name("System")
1451  h.add_child(prot)
1452  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1453  else:
1454  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
1455  o.write_rmf(dircluster + str(k) + ".rmf3")
1456  o.close_rmf(dircluster + str(k) + ".rmf3")
1457 
1458  del o
1459  # IMP.atom.destroy(prot)
1460 
1461  if density_custom_ranges:
1462  DensModule.write_mrc(path=dircluster)
1463  del DensModule
1464 
1465  if self.number_of_processes>1:
1466  self.comm.Barrier()
1467 
1468  def get_cluster_rmsd(self,cluster_num):
1469  if self.cluster_obj is None:
1470  raise Exception("Run clustering first")
1471  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1472 
1473  def save_objects(self, objects, file_name):
1474  import pickle
1475  outf = open(file_name, 'wb')
1476  pickle.dump(objects, outf)
1477  outf.close()
1478 
1479  def load_objects(self, file_name):
1480  import pickle
1481  inputf = open(file_name, 'rb')
1482  objects = pickle.load(inputf)
1483  inputf.close()
1484  return objects
1485 
1487 
1488  """
1489  This class contains analysis utilities to investigate ReplicaExchange results.
1490  """
1491 
1492  ########################
1493  # Construction and Setup
1494  ########################
1495 
1496  def __init__(self,model,
1497  stat_files,
1498  best_models=None,
1499  score_key=None,
1500  alignment=True):
1501  """
1502  Construction of the Class.
1503  @param model IMP.Model()
1504  @param stat_files list of string. Can be ascii stat files, rmf files names
1505  @param best_models Integer. Number of best scoring models, if None: all models will be read
1506  @param alignment boolean (Default=True). Align before computing the rmsd.
1507  """
1508 
1509  self.model=model
1510  self.best_models=best_models
1511  self.stath0=IMP.pmi.output.StatHierarchyHandler(model,stat_files,self.best_models,score_key,cache=True)
1512  self.stath1=IMP.pmi.output.StatHierarchyHandler(StatHierarchyHandler=self.stath0)
1513 
1514  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath1))
1515  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath0))
1516  self.sel0_rmsd=IMP.atom.Selection(self.stath0)
1517  self.sel1_rmsd=IMP.atom.Selection(self.stath1)
1518  self.sel0_alignment=IMP.atom.Selection(self.stath0)
1519  self.sel1_alignment=IMP.atom.Selection(self.stath1)
1520  self.clusters=[]
1521  # fill the cluster list with a single cluster containing all models
1522  c = IMP.pmi.output.Cluster(0)
1523  self.clusters.append(c)
1524  for n0 in range(len(self.stath0)):
1525  c.add_member(n0)
1526  self.pairwise_rmsd={}
1527  self.pairwise_molecular_assignment={}
1528  self.alignment=alignment
1529  self.symmetric_molecules={}
1530  self.issymmetricsel={}
1531  self.update_seldicts()
1532  self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath0))
1533  self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath1))
1534 
1535  def set_rmsd_selection(self,**kwargs):
1536  """
1537  Setup the selection onto which the rmsd is computed
1538  @param kwargs use IMP.atom.Selection keywords
1539  """
1540  self.sel0_rmsd=IMP.atom.Selection(self.stath0,**kwargs)
1541  self.sel1_rmsd=IMP.atom.Selection(self.stath1,**kwargs)
1542  self.update_seldicts()
1543 
1544  def set_symmetric(self,molecule_name):
1545  """
1546  Store names of symmetric molecules
1547  """
1548  self.symmetric_molecules[molecule_name]=0
1549  self.update_seldicts()
1550 
1551  def set_alignment_selection(self,**kwargs):
1552  """
1553  Setup the selection onto which the alignment is computed
1554  @param kwargs use IMP.atom.Selection keywords
1555  """
1556  self.sel0_alignment=IMP.atom.Selection(self.stath0,**kwargs)
1557  self.sel1_alignment=IMP.atom.Selection(self.stath1,**kwargs)
1558 
1559  ######################
1560  # Clustering functions
1561  ######################
1562  def clean_clusters(self):
1563  for c in self.clusters: del c
1564  self.clusters=[]
1565 
1566 
1567  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1568  """
1569  Cluster the models based on RMSD.
1570  @param rmsd_cutoff Float the distance cutoff in Angstrom
1571  @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1572  """
1573  self.clean_clusters()
1574  not_clustered=set(range(len(self.stath1)))
1575  while len(not_clustered)>0:
1576  self.aggregate(not_clustered, rmsd_cutoff, metric)
1577  #self.merge_aggregates(rmsd_cutoff, metric)
1578  self.update_clusters()
1579 
1580  def refine(self,rmsd_cutoff=10):
1581  """
1582  Refine the clusters by merging the ones whose centers are close
1583  @param rmsd_cutoff cutoff distance in Angstorms
1584  """
1585  merge_pairs=[]
1586  clusters_copy=self.clusters
1587  for c0,c1 in itertools.combinations(self.clusters,2):
1588  if c0.center_index is None:
1589  self.compute_cluster_center(c0)
1590  if c1.center_index is None:
1591  self.compute_cluster_center(c1)
1592  d0=self.stath0[c0.center_index]
1593  d1=self.stath1[c1.center_index]
1594  rmsd, molecular_assignment = self.rmsd()
1595  if rmsd <= rmsd_cutoff:
1596  if c1 in self.clusters:
1597  clusters_copy.remove(c1)
1598  c0+=c1
1599  self.clusters=clusters_copy
1600  self.update_clusters()
1601 
1602  ####################
1603  # Input Output
1604  ####################
1605 
1606  def set_cluster_assignments(self, cluster_ids):
1607  if len(cluster_ids)!=len(self.stath0):
1608  raise ValueError('cluster ids has to be same length as number of frames')
1609 
1610  self.clusters=[]
1611  for i in sorted(list(set(cluster_ids))):
1612  self.clusters.append(IMP.pmi.output.Cluster(i))
1613  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1614  self.clusters[idx].add_member(i,d)
1615 
1616  def get_cluster_data(self, cluster):
1617  """
1618  Return the model data from a cluster
1619  @param cluster IMP.pmi.output.Cluster object
1620  """
1621  data=[]
1622  for m in cluster:
1623  data.append(m)
1624  return data
1625 
1626  def save_data(self,filename='data.pkl'):
1627  """
1628  Save the data for the whole models into a pickle file
1629  @param filename string
1630  """
1631  self.stath0.save_data(filename)
1632 
1633  def set_data(self,data):
1634  """
1635  Set the data from an external IMP.pmi.output.Data
1636  @param data IMP.pmi.output.Data
1637  """
1638  self.stath0.data=data
1639  self.stath1.data=data
1640 
1641  def load_data(self,filename='data.pkl'):
1642  """
1643  Load the data from an external pickled file
1644  @param filename string
1645  """
1646  self.stath0.load_data(filename)
1647  self.stath1.load_data(filename)
1648  self.best_models=len(self.stath0)
1649 
1650  def add_cluster(self,rmf_name_list):
1651  c = IMP.pmi.output.Cluster(len(self.clusters))
1652  print("creating cluster index "+str(len(self.clusters)))
1653  self.clusters.append(c)
1654  current_len=len(self.stath0)
1655 
1656  for rmf in rmf_name_list:
1657  print("adding rmf "+rmf)
1658  self.stath0.add_stat_file(rmf)
1659  self.stath1.add_stat_file(rmf)
1660 
1661  for n0 in range(current_len,len(self.stath0)):
1662  d0=self.stath0[n0]
1663  c.add_member(n0,d0)
1664  self.update_clusters()
1665 
1666  def save_clusters(self,filename='clusters.pkl'):
1667  """
1668  Save the clusters into a pickle file
1669  @param filename string
1670  """
1671  try:
1672  import cPickle as pickle
1673  except ImportError:
1674  import pickle
1675  fl=open(filename,'wb')
1676  pickle.dump(self.clusters,fl)
1677 
1678  def load_clusters(self,filename='clusters.pkl',append=False):
1679  """
1680  Load the clusters from a pickle file
1681  @param filename string
1682  @param append bool (Default=False), if True. append the clusters to the ones currently present
1683  """
1684  try:
1685  import cPickle as pickle
1686  except ImportError:
1687  import pickle
1688  fl=open(filename,'rb')
1689  self.clean_clusters()
1690  if append:
1691  self.clusters+=pickle.load(fl)
1692  else:
1693  self.clusters=pickle.load(fl)
1694  self.update_clusters()
1695 
1696  ####################
1697  # Analysis Functions
1698  ####################
1699 
1700  def compute_cluster_center(self,cluster):
1701  """
1702  Compute the cluster center for a given cluster
1703  """
1704  member_distance=defaultdict(float)
1705 
1706  for n0,n1 in itertools.combinations(cluster.members,2):
1707  d0=self.stath0[n0]
1708  d1=self.stath1[n1]
1709  rmsd, _ = self.rmsd()
1710  member_distance[n0]+=rmsd
1711 
1712  if len(member_distance)>0:
1713  cluster.center_index=min(member_distance, key=member_distance.get)
1714  else:
1715  cluster.center_index=cluster.members[0]
1716 
1717  def save_coordinates(self,cluster,rmf_name=None,reference="Absolute", prefix="./"):
1718  """
1719  Save the coordinates of the current cluster a single rmf file
1720  """
1721  print("saving coordinates",cluster)
1722  if self.alignment: self.set_reference(reference,cluster)
1724  if rmf_name is None:
1725  rmf_name=prefix+'/'+str(cluster.cluster_id)+".rmf3"
1726 
1727  d1=self.stath1[cluster.members[0]]
1728  self.model.update()
1729  o.init_rmf(rmf_name, [self.stath1])
1730  for n1 in cluster.members:
1731  d1=self.stath1[n1]
1732  self.model.update()
1734  if self.alignment: self.align()
1735  o.write_rmf(rmf_name)
1737  o.close_rmf(rmf_name)
1738 
1739  def prune_redundant_structures(self,rmsd_cutoff=10):
1740  """
1741  remove structures that are similar
1742  append it to a new cluster
1743  """
1744  print("pruning models")
1745  selected=0
1746  filtered=[selected]
1747  remaining=range(1,len(self.stath1),10)
1748 
1749  while len(remaining)>0:
1750  d0=self.stath0[selected]
1751  rm=[]
1752  for n1 in remaining:
1753  d1=self.stath1[n1]
1754  if self.alignment: self.align()
1755  d, _ = self.rmsd()
1756  if d<=rmsd_cutoff:
1757  rm.append(n1)
1758  print("pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
1759  remaining=[x for x in remaining if x not in rm]
1760  if len(remaining)==0: break
1761  selected=remaining[0]
1762  filtered.append(selected)
1763  remaining.pop(0)
1764  c = IMP.pmi.output.Cluster(len(self.clusters))
1765  self.clusters.append(c)
1766  for n0 in filtered:
1767  d0=self.stath0[n0]
1768  c.add_member(n0,d0)
1769  self.update_clusters()
1770 
1771 
1772 
1773  def precision(self,cluster):
1774  """
1775  Compute the precision of a cluster
1776  """
1777  npairs=0
1778  rmsd=0.0
1779  precision=None
1780 
1781  if not cluster.center_index is None:
1782  members1=[cluster.center_index]
1783  else:
1784  members1=cluster.members
1785 
1786  for n0 in members1:
1787  d0=self.stath0[n0]
1788  for n1 in cluster.members:
1789  if n0!=n1:
1790  npairs+=1
1791  d1=self.stath1[n1]
1793  tmp_rmsd, _ = self.rmsd()
1794  rmsd+=tmp_rmsd
1796 
1797  if npairs>0:
1798  precision=rmsd/npairs
1799  cluster.precision=precision
1800  return precision
1801 
1802  def bipartite_precision(self,cluster1,cluster2,verbose=False):
1803  """
1804  Compute the bipartite precision (ie the cross-precision)
1805  between two clusters
1806  """
1807  npairs=0
1808  rmsd=0.0
1809  for cn0,n0 in enumerate(cluster1.members):
1810  d0=self.stath0[n0]
1811  for cn1,n1 in enumerate(cluster2.members):
1812  d1=self.stath1[n1]
1813  tmp_rmsd, _ =self.rmsd()
1814  if verbose: print("--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
1815  rmsd+=tmp_rmsd
1816  npairs+=1
1817  precision=rmsd/npairs
1818  return precision
1819 
1820  def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
1821  """
1822  Compute the Root mean square fluctuations
1823  of a molecule in a cluster
1824  Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
1825  """
1826  rmsf=IMP.pmi.tools.OrderedDict()
1827 
1828  #assumes that residue indexes are identical for stath0 and stath1
1829  if cluster_ref is not None:
1830  if not cluster_ref.center_index is None:
1831  members0 = [cluster_ref.center_index]
1832  else:
1833  members0 = cluster_ref.members
1834  else:
1835  if not cluster.center_index is None:
1836  members0 = [cluster.center_index]
1837  else:
1838  members0 = cluster.members
1839 
1840  s0=IMP.atom.Selection(self.stath0,molecule=molecule,resolution=1,
1841  copy_index=copy_index,state_index=state_index)
1842  ps0=s0.get_selected_particles()
1843  #get the residue indexes
1844  residue_indexes=list(IMP.pmi.tools.OrderedSet([IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1845 
1846  #get the corresponding particles
1847  #s0=IMP.atom.Selection(stat_ref,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1848  # copy_index=copy_index,state_index=state_index)
1849  #ps0 = s0.get_selected_particles()
1850 
1851 
1852 
1853  npairs=0
1854  for n0 in members0:
1855  d0=self.stath0[n0]
1856  for n1 in cluster.members[::step]:
1857  if n0!=n1:
1858  print("--- rmsf %s %s"%(str(n0),str(n1)))
1860 
1861  s1=IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1862  copy_index=copy_index,state_index=state_index)
1863  ps1 = s1.get_selected_particles()
1864 
1865  d1=self.stath1[n1]
1866  if self.alignment: self.align()
1867  for n,(p0,p1) in enumerate(zip(ps0,ps1)):
1868  r=residue_indexes[n]
1869  d0=IMP.core.XYZ(p0)
1870  d1=IMP.core.XYZ(p1)
1871  if r in rmsf:
1872  rmsf[r]+=IMP.core.get_distance(d0,d1)
1873  else:
1874  rmsf[r]=IMP.core.get_distance(d0,d1)
1875  npairs+=1
1877  for r in rmsf:
1878  rmsf[r]/=npairs
1879 
1880  for stath in [self.stath0,self.stath1]:
1881  if molecule not in self.symmetric_molecules:
1882  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1883  copy_index=copy_index,state_index=state_index)
1884  else:
1885  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1886  state_index=state_index)
1887 
1888  ps = s.get_selected_particles()
1889  for p in ps:
1891  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
1892  else:
1894 
1895  return rmsf
1896 
1897  def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
1898  if self.alignment: self.set_reference(reference,cluster)
1899  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
1900  voxel=voxel_size)
1901 
1902  for n1 in cluster.members[::step]:
1903  print("density "+str(n1))
1904  d1=self.stath1[n1]
1906  if self.alignment: self.align()
1907  dens.add_subunits_density(self.stath1)
1909  dens.write_mrc(path=prefix+'/',suffix=str(cluster.cluster_id))
1910  del dens
1911 
1912  def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
1913  if self.alignment: self.set_reference(reference,cluster)
1914  import numpy as np
1915  import matplotlib.pyplot as plt
1916  import matplotlib.cm as cm
1917  from scipy.spatial.distance import cdist
1918  import IMP.pmi.topology
1919  if molecules is None:
1921  else:
1922  mols=[IMP.pmi.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi.tools.get_molecules(IMP.atom.Selection(self.stath1,molecules=molecules).get_selected_particles())]
1923  unique_copies=[mol for mol in mols if mol.get_copy_index() == 0]
1924  mol_names_unique=dict((mol.get_name(),mol) for mol in unique_copies)
1925  total_len_unique=sum(max(mol.get_residue_indexes()) for mol in unique_copies)
1926 
1927 
1928  # coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
1929  index_dict={}
1930  prev_stop=0
1931 
1932  if not consolidate:
1933  for mol in mols:
1934  seqlen=max(mol.get_residue_indexes())
1935  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1936  prev_stop+=seqlen
1937 
1938  else:
1939  for mol in unique_copies:
1940  seqlen=max(mol.get_residue_indexes())
1941  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1942  prev_stop+=seqlen
1943 
1944 
1945  for ncl,n1 in enumerate(cluster.members):
1946  print(ncl)
1947  d1=self.stath1[n1]
1948  #self.apply_molecular_assignments(n1)
1949  coord_dict=IMP.pmi.tools.OrderedDict()
1950  for mol in mols:
1951  mol_name=mol.get_name()
1952  copy_index=mol.get_copy_index()
1953  rindexes = mol.get_residue_indexes()
1954  coords = np.ones((max(rindexes),3))
1955  for rnum in rindexes:
1956  sel = IMP.atom.Selection(mol, residue_index=rnum, resolution=1)
1957  selpart = sel.get_selected_particles()
1958  if len(selpart) == 0: continue
1959  selpart = selpart[0]
1960  coords[rnum - 1, :] = IMP.core.XYZ(selpart).get_coordinates()
1961  coord_dict[mol]=coords
1962 
1963  if not consolidate:
1964  coords=np.concatenate(list(coord_dict.values()))
1965  dists = cdist(coords, coords)
1966  binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1967  else:
1968  binary_dists_dict={}
1969  for mol1 in mols:
1970  len1 = max(mol1.get_residue_indexes())
1971  for mol2 in mols:
1972  name1=mol1.get_name()
1973  name2=mol2.get_name()
1974  dists = cdist(coord_dict[mol1], coord_dict[mol2])
1975  if (name1, name2) not in binary_dists_dict:
1976  binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
1977  binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1978  binary_dists=np.zeros((total_len_unique,total_len_unique))
1979 
1980  for name1,name2 in binary_dists_dict:
1981  r1=index_dict[mol_names_unique[name1]]
1982  r2=index_dict[mol_names_unique[name2]]
1983  binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
1984 
1985  if ncl==0:
1986  dist_maps = [dists]
1987  av_dist_map = dists
1988  contact_freqs = binary_dists
1989  else:
1990  dist_maps.append(dists)
1991  av_dist_map += dists
1992  contact_freqs += binary_dists
1993 
1994  #self.undo_apply_molecular_assignments(n1)
1995 
1996  if log_scale:
1997  contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
1998  else:
1999  contact_freqs =1.0/len(cluster)*contact_freqs
2000  av_dist_map=1.0/len(cluster)*contact_freqs
2001 
2002  fig = plt.figure(figsize=(100, 100))
2003  ax = fig.add_subplot(111)
2004  ax.set_xticks([])
2005  ax.set_yticks([])
2006  gap_between_components=50
2007  colormap = cm.Blues
2008  colornorm=None
2009 
2010 
2011  #if cbar_labels is not None:
2012  # if len(cbar_labels)!=4:
2013  # print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
2014  # exit()
2015  # set the list of proteins on the x axis
2016  if not consolidate:
2017  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_extended_name(),mol) for mol in mols])
2018  prot_list=list(zip(*sorted_tuple))[1]
2019  else:
2020  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(),mol) for mol in unique_copies])
2021  prot_list=list(zip(*sorted_tuple))[1]
2022 
2023  prot_listx = prot_list
2024  nresx = gap_between_components + \
2025  sum([max(mol.get_residue_indexes())
2026  + gap_between_components for mol in prot_listx])
2027 
2028  # set the list of proteins on the y axis
2029  prot_listy = prot_list
2030  nresy = gap_between_components + \
2031  sum([max(mol.get_residue_indexes())
2032  + gap_between_components for mol in prot_listy])
2033 
2034  # this is the residue offset for each protein
2035  resoffsetx = {}
2036  resendx = {}
2037  res = gap_between_components
2038  for mol in prot_listx:
2039  resoffsetx[mol] = res
2040  res += max(mol.get_residue_indexes())
2041  resendx[mol] = res
2042  res += gap_between_components
2043 
2044  resoffsety = {}
2045  resendy = {}
2046  res = gap_between_components
2047  for mol in prot_listy:
2048  resoffsety[mol] = res
2049  res += max(mol.get_residue_indexes())
2050  resendy[mol] = res
2051  res += gap_between_components
2052 
2053  resoffsetdiagonal = {}
2054  res = gap_between_components
2055  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2056  resoffsetdiagonal[mol] = res
2057  res += max(mol.get_residue_indexes())
2058  res += gap_between_components
2059 
2060  # plot protein boundaries
2061  xticks = []
2062  xlabels = []
2063  for n, prot in enumerate(prot_listx):
2064  res = resoffsetx[prot]
2065  end = resendx[prot]
2066  for proty in prot_listy:
2067  resy = resoffsety[proty]
2068  endy = resendy[proty]
2069  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
2070  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
2071  xticks.append((float(res) + float(end)) / 2)
2072  xlabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2073 
2074  yticks = []
2075  ylabels = []
2076  for n, prot in enumerate(prot_listy):
2077  res = resoffsety[prot]
2078  end = resendy[prot]
2079  for protx in prot_listx:
2080  resx = resoffsetx[protx]
2081  endx = resendx[protx]
2082  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
2083  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
2084  yticks.append((float(res) + float(end)) / 2)
2085  ylabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2086 
2087  # plot the contact map
2088 
2089  tmp_array = np.zeros((nresx, nresy))
2090  ret={}
2091  for px in prot_listx:
2092  for py in prot_listy:
2093  resx = resoffsetx[px]
2094  lengx = resendx[px] - 1
2095  resy = resoffsety[py]
2096  lengy = resendy[py] - 1
2097  indexes_x = index_dict[px]
2098  minx = min(indexes_x)
2099  maxx = max(indexes_x)
2100  indexes_y = index_dict[py]
2101  miny = min(indexes_y)
2102  maxy = max(indexes_y)
2103  tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2104  ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2105 
2106  cax = ax.imshow(tmp_array,
2107  cmap=colormap,
2108  norm=colornorm,
2109  origin='lower',
2110  alpha=0.6,
2111  interpolation='nearest')
2112 
2113  ax.set_xticks(xticks)
2114  ax.set_xticklabels(xlabels, rotation=90)
2115  ax.set_yticks(yticks)
2116  ax.set_yticklabels(ylabels)
2117  plt.setp(ax.get_xticklabels(), fontsize=6)
2118  plt.setp(ax.get_yticklabels(), fontsize=6)
2119 
2120  # display and write to file
2121  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2122  [i.set_linewidth(2.0) for i in ax.spines.values()]
2123  #if cbar_labels is not None:
2124  # cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
2125  # cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
2126 
2127  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf", dpi=300,transparent="False")
2128  return ret
2129 
2130 
2131  def plot_rmsd_matrix(self,filename):
2132  import numpy as np
2133  self.compute_all_pairwise_rmsd()
2134  distance_matrix = np.zeros(
2135  (len(self.stath0), len(self.stath1)))
2136  for (n0,n1) in self.pairwise_rmsd:
2137  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2138 
2139  import matplotlib as mpl
2140  mpl.use('Agg')
2141  import matplotlib.pylab as pl
2142  from scipy.cluster import hierarchy as hrc
2143 
2144  fig = pl.figure(figsize=(10,8))
2145  ax = fig.add_subplot(212)
2146  dendrogram = hrc.dendrogram(
2147  hrc.linkage(distance_matrix),
2148  color_threshold=7,
2149  no_labels=True)
2150  leaves_order = dendrogram['leaves']
2151  ax.set_xlabel('Model')
2152  ax.set_ylabel('RMSD [Angstroms]')
2153 
2154  ax2 = fig.add_subplot(221)
2155  cax = ax2.imshow(
2156  distance_matrix[leaves_order,
2157  :][:,
2158  leaves_order],
2159  interpolation='nearest')
2160  cb = fig.colorbar(cax)
2161  cb.set_label('RMSD [Angstroms]')
2162  ax2.set_xlabel('Model')
2163  ax2.set_ylabel('Model')
2164 
2165  pl.savefig(filename, dpi=300)
2166  pl.close(fig)
2167 
2168  ####################
2169  # Internal Functions
2170  ####################
2171 
2172  def update_clusters(self):
2173  """
2174  Update the cluster id numbers
2175  """
2176  for n,c in enumerate(self.clusters):
2177  c.cluster_id=n
2178 
2179  def get_molecule(self, hier, name, copy):
2180  s=IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2181  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2182 
2183  def update_seldicts(self):
2184  """
2185  Update the seldicts
2186  """
2187  self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2188  self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2189  for mol in self.seldict0:
2190  for sel in self.seldict0[mol]:
2191  self.issymmetricsel[sel]=False
2192  for mol in self.symmetric_molecules:
2193  self.symmetric_molecules[mol]=len(self.seldict0[mol])
2194  for sel in self.seldict0[mol]:
2195  self.issymmetricsel[sel]=True
2196 
2197 
2198  def align(self):
2199  tr = IMP.atom.get_transformation_aligning_first_to_second(self.sel1_alignment, self.sel0_alignment)
2200 
2201  for rb in self.rbs1:
2202  IMP.core.transform(rb, tr)
2203 
2204  for bead in self.beads1:
2205  try:
2206  IMP.core.transform(IMP.core.XYZ(bead), tr)
2207  except:
2208  continue
2209 
2210 
2211  self.model.update()
2212 
2213  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2214  '''
2215  initial filling of the clusters.
2216  '''
2217  n0 = idxs.pop()
2218  print("clustering model "+str(n0))
2219  d0 = self.stath0[n0]
2220  c = IMP.pmi.output.Cluster(len(self.clusters))
2221  print("creating cluster index "+str(len(self.clusters)))
2222  self.clusters.append(c)
2223  c.add_member(n0,d0)
2224  clustered = set([n0])
2225  for n1 in idxs:
2226  print("--- trying to add model "+str(n1)+" to cluster "+str(len(self.clusters)))
2227  d1 = self.stath1[n1]
2228  if self.alignment: self.align()
2229  rmsd, _ = self.rmsd(metric=metric)
2230  if rmsd<rmsd_cutoff:
2231  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2232  c.add_member(n1,d1)
2233  clustered.add(n1)
2234  else:
2235  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2236  idxs-=clustered
2237 
2238  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2239  """
2240  merge the clusters that have close members
2241  @param rmsd_cutoff cutoff distance in Angstorms
2242  """
2243  # before merging, clusters are spheres of radius rmsd_cutoff centered on the 1st element
2244  # here we only try to merge clusters whose centers are closer than 2*rmsd_cutoff
2245  to_merge = []
2246  print("merging...")
2247  for c0, c1 in filter(lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2248  n0, n1 = [c.members[0] for c in (c0,c1)]
2249  d0 = self.stath0[n0]
2250  d1 = self.stath1[n1]
2251  rmsd, _ = self.rmsd()
2252  if rmsd<2*rmsd_cutoff and self.have_close_members(c0,c1,rmsd_cutoff,metric):
2253  to_merge.append((c0,c1))
2254 
2255  for c0, c in reversed(to_merge):
2256  self.merge(c0,c)
2257 
2258  #keep only full clusters
2259  self.clusters = [c for c in filter(lambda x: len(x.members)>0, self.clusters)]
2260 
2261 
2262  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2263  '''
2264  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2265  '''
2266  print("check close members for clusters "+str(c0.cluster_id)+" and "+str(c1.cluster_id))
2267  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2268  d0 = self.stath0[n0]
2269  d1 = self.stath1[n1]
2270  rmsd, _ = self.rmsd(metric=metric)
2271  if rmsd<rmsd_cutoff:
2272  return True
2273 
2274  return False
2275 
2276 
2277  def merge(self, c0, c1):
2278  '''
2279  merge two clusters
2280  '''
2281  c0+=c1
2282  c1.members=[]
2283  c1.data={}
2284 
2285  def rmsd_helper(self, sels0, sels1, metric):
2286  '''
2287  a function that returns the permutation best_sel of sels0 that minimizes metric
2288  '''
2289  best_rmsd2 = float('inf')
2290  best_sel = None
2291  if self.issymmetricsel[sels0[0]]:
2292  #this cases happens when symmetries were defined
2293  N = len(sels0)
2294  for offset in range(N):
2295  order=[(offset+i)%N for i in range(N)]
2296  sels = [sels0[(offset+i)%N] for i in range(N)]
2297  sel0 = sels[0]
2298  sel1 = sels1[0]
2299  r=metric(sel0, sel1)
2300  rmsd2=r*r*N
2301  ###print(order,rmsd2)
2302  if rmsd2 < best_rmsd2:
2303  best_rmsd2 = rmsd2
2304  best_sel = sels
2305  best_order=order
2306  else:
2307  for sels in itertools.permutations(sels0):
2308  rmsd2=0.0
2309  for sel0, sel1 in itertools.takewhile(lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2310  r=metric(sel0, sel1)
2311  rmsd2+=r*r
2312  if rmsd2 < best_rmsd2:
2313  best_rmsd2 = rmsd2
2314  best_sel = sels
2315  ###for i,sel in enumerate(best_sel):
2316  ### p0 = sel.get_selected_particles()[0]
2317  ### p1 = sels1[i].get_selected_particles()[0]
2318  ### m0 = IMP.pmi.tools.get_molecules([p0])[0]
2319  ### m1 = IMP.pmi.tools.get_molecules([p1])[0]
2320  ### c0 = IMP.atom.Copy(m0).get_copy_index()
2321  ### c1 = IMP.atom.Copy(m1).get_copy_index()
2322  ### name0=m0.get_name()
2323  ### name1=m1.get_name()
2324  ### print("WWW",name0,name1,c0,c1)
2325  ###print(best_order,best_rmsd2,m0,m1)
2326  return best_sel, best_rmsd2
2327 
2328  def compute_all_pairwise_rmsd(self):
2329  for d0 in self.stath0:
2330  for d1 in self.stath1:
2331  rmsd, _ = self.rmsd()
2332 
2333  def rmsd(self,metric=IMP.atom.get_rmsd):
2334  '''
2335  Computes the RMSD. Resolves ambiguous pairs assignments
2336  '''
2337  # here we memoize the rmsd and molecular assignment so that it's not done multiple times
2338  n0=self.stath0.current_index
2339  n1=self.stath1.current_index
2340  if ((n0,n1) in self.pairwise_rmsd) and ((n0,n1) in self.pairwise_molecular_assignment):
2341  return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2342 
2343  if self.alignment:
2344  self.align()
2345  #if it's not yet memoized
2346  total_rmsd=0.0
2347  total_N=0
2348  # this is a dictionary which keys are the molecule names, and values are the list of IMP.atom.Selection for all molecules that share the molecule name
2349  seldict_best_order={}
2350  molecular_assignment={}
2351  for molname, sels0 in self.seldict0.items():
2352  sels_best_order, best_rmsd2 = self.rmsd_helper(sels0, self.seldict1[molname], metric)
2353 
2354  Ncoords = len(sels_best_order[0].get_selected_particles())
2355  Ncopies = len(self.seldict1[molname])
2356  total_rmsd += Ncoords*best_rmsd2
2357  total_N += Ncoords*Ncopies
2358 
2359  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2360  p0 = sel0.get_selected_particles()[0]
2361  p1 = sel1.get_selected_particles()[0]
2362  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2363  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2364  c0 = IMP.atom.Copy(m0).get_copy_index()
2365  c1 = IMP.atom.Copy(m1).get_copy_index()
2366  ###print(molname,c0,c1)
2367  molecular_assignment[(molname,c0)]=(molname,c1)
2368 
2369  total_rmsd = math.sqrt(total_rmsd/total_N)
2370 
2371  self.pairwise_rmsd[(n0,n1)]=total_rmsd
2372  self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2373  self.pairwise_rmsd[(n1,n0)]=total_rmsd
2374  self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2375  ###print(n0,n1,molecular_assignment)
2376  return total_rmsd, molecular_assignment
2377 
2378  def set_reference(self,reference,cluster):
2379  """
2380  Fix the reference structure for structural alignment, rmsd and chain assignemnt
2381  @param reference can be either "Absolute" (cluster center of the first cluster)
2382  or Relative (cluster center of the current cluster)
2383  """
2384  if reference=="Absolute":
2385  d0=self.stath0[0]
2386  elif reference=="Relative":
2387  if cluster.center_index:
2388  n0=cluster.center_index
2389  else:
2390  n0=cluster.members[0]
2391  d0=self.stath0[n0]
2392 
2394  """
2395  compute the molecular assignemnts between multiple copies
2396  of the same sequence. It changes the Copy index of Molecules
2397  """
2398  d1=self.stath1[n1]
2399  _, molecular_assignment = self.rmsd()
2400  for (m0, c0), (m1,c1) in molecular_assignment.items():
2401  mol0 = self.molcopydict0[m0][c0]
2402  mol1 = self.molcopydict1[m1][c1]
2403  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2404  p1=IMP.atom.Copy(mol1).get_particle()
2405  p1.set_value(cik0,c0)
2406 
2408  """
2409  Undo the Copy index assignment
2410  """
2411  d1=self.stath1[n1]
2412  _, molecular_assignment = self.rmsd()
2413  mols_newcopys = []
2414  for (m0, c0), (m1,c1) in molecular_assignment.items():
2415  mol0 = self.molcopydict0[m0][c0]
2416  mol1 = self.molcopydict1[m1][c1]
2417  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2418  p1=IMP.atom.Copy(mol1).get_particle()
2419  p1.set_value(cik0,c1)
2420 
2421  ####################
2422  # Container Functions
2423  ####################
2424 
2425  def __repr__(self):
2426  s= "AnalysisReplicaExchange\n"
2427  s+="---- number of clusters %s \n"%str(len(self.clusters))
2428  s+="---- number of models %s \n"%str(len(self.stath0))
2429  return s
2430 
2431  def __getitem__(self,int_slice_adaptor):
2432  if isinstance(int_slice_adaptor, int):
2433  return self.clusters[int_slice_adaptor]
2434  elif isinstance(int_slice_adaptor, slice):
2435  return self.__iter__(int_slice_adaptor)
2436  else:
2437  raise TypeError("Unknown Type")
2438 
2439  def __len__(self):
2440  return len(self.clusters)
2441 
2442  def __iter__(self,slice_key=None):
2443  if slice_key is None:
2444  for i in range(len(self)):
2445  yield self[i]
2446  else:
2447  for i in range(len(self))[slice_key]:
2448  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2333
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2378
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1678
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1125
def precision
Compute the precision of a cluster.
Definition: macros.py:1773
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:815
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:108
A container for models organized into clusters.
Definition: output.py:1317
Sample using molecular dynamics.
Definition: samplers.py:389
def aggregate
initial filling of the clusters.
Definition: macros.py:2213
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:769
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:616
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:617
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1739
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1820
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1216
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1535
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1616
def __init__
Construction of the Class.
Definition: macros.py:1496
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:732
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1633
def __init__
Constructor.
Definition: macros.py:112
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2407
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1551
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2285
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1717
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:960
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2393
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1486
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:574
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2238
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:868
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1700
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:877
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def update_seldicts
Update the seldicts.
Definition: macros.py:2183
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2172
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:596
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1580
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:60
Collect timing information.
Definition: tools.py:122
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1544
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9704
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2277
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:618
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1081
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1626
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:741
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1802
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:596
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1567
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1666
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2262
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
Definition: macros.py:56
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1193
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1641
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:554
Sample using replica exchange.
Definition: samplers.py:496
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27