IMP logo
IMP Reference Guide  2.14.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import RMF
17 import os
18 import glob
19 from operator import itemgetter
20 from collections import defaultdict
21 import numpy as np
22 import string
23 import itertools
24 import warnings
25 import math
26 import warnings
27 
28 class _RMFRestraints(object):
29  """All restraints that are written out to the RMF file"""
30  def __init__(self, model, user_restraints):
31  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
32  self._user_restraints = user_restraints if user_restraints else []
33 
34  def __len__(self):
35  return (len(self._user_restraints)
36  + self._rmf_rs.get_number_of_restraints())
37 
38  def __bool__(self):
39  return len(self) > 0
40  __nonzero__ = __bool__ # Python 2 compatibility
41 
42  def __getitem__(self, i):
43  class FakePMIWrapper(object):
44  def __init__(self, r):
45  self.r = IMP.RestraintSet.get_from(r)
46  get_restraint = lambda self: self.r
47  lenuser = len(self._user_restraints)
48  if 0 <= i < lenuser:
49  return self._user_restraints[i]
50  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
51  r = self._rmf_rs.get_restraint(i - lenuser)
52  return FakePMIWrapper(r)
53  else:
54  raise IndexError("Out of range")
55 
56 
57 class ReplicaExchange0(object):
58  """A macro to help setup and run replica exchange.
59  Supports Monte Carlo and molecular dynamics.
60  Produces trajectory RMF files, best PDB structures,
61  and output stat files.
62  """
63  def __init__(self, model, root_hier,
64  sample_objects=None, # DEPRECATED
65  monte_carlo_sample_objects=None,
66  molecular_dynamics_sample_objects=None,
67  output_objects=[],
68  rmf_output_objects=None,
69  crosslink_restraints=None, # DEPRECATED
70  monte_carlo_temperature=1.0,
71  simulated_annealing=False,
72  simulated_annealing_minimum_temperature=1.0,
73  simulated_annealing_maximum_temperature=2.5,
74  simulated_annealing_minimum_temperature_nframes=100,
75  simulated_annealing_maximum_temperature_nframes=100,
76  replica_exchange_minimum_temperature=1.0,
77  replica_exchange_maximum_temperature=2.5,
78  replica_exchange_swap=True,
79  num_sample_rounds=1,
80  number_of_best_scoring_models=500,
81  monte_carlo_steps=10,
82  self_adaptive=False,
83  molecular_dynamics_steps=10,
84  molecular_dynamics_max_time_step=1.0,
85  number_of_frames=1000,
86  save_coordinates_mode="lowest_temperature",
87  nframes_write_coordinates=1,
88  write_initial_rmf=True,
89  initial_rmf_name_suffix="initial",
90  stat_file_name_suffix="stat",
91  best_pdb_name_suffix="model",
92  do_clean_first=True,
93  do_create_directories=True,
94  global_output_directory="./",
95  rmf_dir="rmfs/",
96  best_pdb_dir="pdbs/",
97  replica_stat_file_suffix="stat_replica",
98  em_object_for_rmf=None,
99  atomistic=False,
100  replica_exchange_object=None,
101  test_mode=False):
102  """Constructor.
103  @param model The IMP model
104  @param root_hier Top-level (System)hierarchy
105  @param monte_carlo_sample_objects Objects for MC sampling; a list of
106  structural components (generally the representation) that will
107  be moved and restraints with parameters that need to
108  be sampled.
109  For PMI2: just pass flat list of movers
110  @param molecular_dynamics_sample_objects Objects for MD sampling
111  For PMI2: just pass flat list of particles
112  @param output_objects A list of structural objects and restraints
113  that will be included in output (ie, statistics "stat" files). Any object
114  that provides a get_output() method can be used here. If None is passed
115  the macro will not write stat files.
116  @param rmf_output_objects A list of structural objects and restraints
117  that will be included in rmf. Any object
118  that provides a get_output() method can be used here.
119  @param monte_carlo_temperature MC temp (may need to be optimized
120  based on post-sampling analysis)
121  @param simulated_annealing If True, perform simulated annealing
122  @param simulated_annealing_minimum_temperature Should generally be
123  the same as monte_carlo_temperature.
124  @param simulated_annealing_minimum_temperature_nframes Number of
125  frames to compute at minimum temperature.
126  @param simulated_annealing_maximum_temperature_nframes Number of
127  frames to compute at
128  temps > simulated_annealing_maximum_temperature.
129  @param replica_exchange_minimum_temperature Low temp for REX; should
130  generally be the same as monte_carlo_temperature.
131  @param replica_exchange_maximum_temperature High temp for REX
132  @param replica_exchange_swap Boolean, enable disable temperature
133  swap (Default=True)
134  @param num_sample_rounds Number of rounds of MC/MD per cycle
135  @param number_of_best_scoring_models Number of top-scoring PDB models
136  to keep around for analysis
137  @param monte_carlo_steps Number of MC steps per round
138  @param self_adaptive self adaptive scheme for monte carlo movers
139  @param molecular_dynamics_steps Number of MD steps per round
140  @param molecular_dynamics_max_time_step Max time step for MD
141  @param number_of_frames Number of REX frames to run
142  @param save_coordinates_mode string: how to save coordinates.
143  "lowest_temperature" (default) only the lowest temperatures is saved
144  "25th_score" all replicas whose score is below the 25th percentile
145  "50th_score" all replicas whose score is below the 50th percentile
146  "75th_score" all replicas whose score is below the 75th percentile
147  @param nframes_write_coordinates How often to write the coordinates
148  of a frame
149  @param write_initial_rmf Write the initial configuration
150  @param global_output_directory Folder that will be created to house
151  output.
152  @param test_mode Set to True to avoid writing any files, just test one frame.
153  """
154  self.model = model
155  self.vars = {}
156 
157  ### add check hierarchy is multistate
158  self.output_objects = output_objects
159  self.rmf_output_objects=rmf_output_objects
160  if (isinstance(root_hier, IMP.atom.Hierarchy)
161  and root_hier.get_name() == 'System'):
162  if self.output_objects is not None:
163  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
164  if self.rmf_output_objects is not None:
165  self.rmf_output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
166  self.root_hier = root_hier
167  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
168  self.vars["number_of_states"] = len(states)
169  if len(states)>1:
170  self.root_hiers = states
171  self.is_multi_state = True
172  else:
173  self.root_hier = root_hier
174  self.is_multi_state = False
175  else:
176  raise TypeError("Must provide System hierarchy (root_hier)")
177 
178  if crosslink_restraints:
180  "crosslink_restraints is deprecated and is ignored; "
181  "all cross-link restraints should be automatically "
182  "added to output RMF files")
183  self._rmf_restraints = _RMFRestraints(model, None)
184  self.em_object_for_rmf = em_object_for_rmf
185  self.monte_carlo_sample_objects = monte_carlo_sample_objects
186  self.vars["self_adaptive"]=self_adaptive
187  if sample_objects is not None:
189  "sample_objects is deprecated; use monte_carlo_sample_objects "
190  "(or molecular_dynamics_sample_objects) instead")
191  self.monte_carlo_sample_objects+=sample_objects
192  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
193  self.replica_exchange_object = replica_exchange_object
194  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
195  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
196  self.vars[
197  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
198  self.vars[
199  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
200  self.vars["replica_exchange_swap"] = replica_exchange_swap
201  self.vars["simulated_annealing"]=\
202  simulated_annealing
203  self.vars["simulated_annealing_minimum_temperature"]=\
204  simulated_annealing_minimum_temperature
205  self.vars["simulated_annealing_maximum_temperature"]=\
206  simulated_annealing_maximum_temperature
207  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
208  simulated_annealing_minimum_temperature_nframes
209  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
210  simulated_annealing_maximum_temperature_nframes
211 
212  self.vars["num_sample_rounds"] = num_sample_rounds
213  self.vars[
214  "number_of_best_scoring_models"] = number_of_best_scoring_models
215  self.vars["monte_carlo_steps"] = monte_carlo_steps
216  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
217  self.vars["number_of_frames"] = number_of_frames
218  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
219  raise Exception("save_coordinates_mode has unrecognized value")
220  else:
221  self.vars["save_coordinates_mode"] = save_coordinates_mode
222  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
223  self.vars["write_initial_rmf"] = write_initial_rmf
224  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
225  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
226  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
227  self.vars["do_clean_first"] = do_clean_first
228  self.vars["do_create_directories"] = do_create_directories
229  self.vars["global_output_directory"] = global_output_directory
230  self.vars["rmf_dir"] = rmf_dir
231  self.vars["best_pdb_dir"] = best_pdb_dir
232  self.vars["atomistic"] = atomistic
233  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
234  self.vars["geometries"] = None
235  self.test_mode = test_mode
236 
237  def add_geometries(self, geometries):
238  if self.vars["geometries"] is None:
239  self.vars["geometries"] = list(geometries)
240  else:
241  self.vars["geometries"].extend(geometries)
242 
243  def show_info(self):
244  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
245  print("--- it stores the best scoring pdb models in pdbs/")
246  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
247  print("--- variables:")
248  keys = list(self.vars.keys())
249  keys.sort()
250  for v in keys:
251  print("------", v.ljust(30), self.vars[v])
252 
253  def get_replica_exchange_object(self):
254  return self.replica_exchange_object
255 
256  def _add_provenance(self, sampler_md, sampler_mc):
257  """Record details about the sampling in the IMP Hierarchies"""
258  iterations = 0
259  if sampler_md:
260  method = "Molecular Dynamics"
261  iterations += self.vars["molecular_dynamics_steps"]
262  if sampler_mc:
263  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
264  iterations += self.vars["monte_carlo_steps"]
265  # If no sampling is actually done, no provenance to write
266  if iterations == 0 or self.vars["number_of_frames"] == 0:
267  return
268  iterations *= self.vars["num_sample_rounds"]
269 
270  pi = self.model.add_particle("sampling")
272  self.model, pi, method, self.vars["number_of_frames"],
273  iterations)
274  p.set_number_of_replicas(
275  self.replica_exchange_object.get_number_of_replicas())
276  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
277  IMP.core.add_provenance(self.model, self.root_hier, p)
278 
279  def execute_macro(self):
280  temp_index_factor = 100000.0
281  samplers=[]
282  sampler_mc=None
283  sampler_md=None
284  if self.monte_carlo_sample_objects is not None:
285  print("Setting up MonteCarlo")
286  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
287  self.monte_carlo_sample_objects,
288  self.vars["monte_carlo_temperature"])
289  if self.vars["simulated_annealing"]:
290  tmin=self.vars["simulated_annealing_minimum_temperature"]
291  tmax=self.vars["simulated_annealing_maximum_temperature"]
292  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
293  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
294  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
295  if self.vars["self_adaptive"]:
296  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
297  if self.output_objects is not None:
298  self.output_objects.append(sampler_mc)
299  if self.rmf_output_objects is not None:
300  self.rmf_output_objects.append(sampler_mc)
301  samplers.append(sampler_mc)
302 
303 
304  if self.molecular_dynamics_sample_objects is not None:
305  print("Setting up MolecularDynamics")
306  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
307  self.molecular_dynamics_sample_objects,
308  self.vars["monte_carlo_temperature"],
309  maximum_time_step=self.molecular_dynamics_max_time_step)
310  if self.vars["simulated_annealing"]:
311  tmin=self.vars["simulated_annealing_minimum_temperature"]
312  tmax=self.vars["simulated_annealing_maximum_temperature"]
313  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
314  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
315  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
316  if self.output_objects is not None:
317  self.output_objects.append(sampler_md)
318  if self.rmf_output_objects is not None:
319  self.rmf_output_objects.append(sampler_md)
320  samplers.append(sampler_md)
321 # -------------------------------------------------------------------------
322 
323  print("Setting up ReplicaExchange")
324  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
325  self.vars[
326  "replica_exchange_minimum_temperature"],
327  self.vars[
328  "replica_exchange_maximum_temperature"],
329  samplers,
330  replica_exchange_object=self.replica_exchange_object)
331  self.replica_exchange_object = rex.rem
332 
333  myindex = rex.get_my_index()
334  if self.output_objects is not None:
335  self.output_objects.append(rex)
336  if self.rmf_output_objects is not None:
337  self.rmf_output_objects.append(rex)
338  # must reset the minimum temperature due to the
339  # different binary length of rem.get_my_parameter double and python
340  # float
341  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
342 
343 # -------------------------------------------------------------------------
344 
345  globaldir = self.vars["global_output_directory"] + "/"
346  rmf_dir = globaldir + self.vars["rmf_dir"]
347  pdb_dir = globaldir + self.vars["best_pdb_dir"]
348 
349  if not self.test_mode:
350  if self.vars["do_clean_first"]:
351  pass
352 
353  if self.vars["do_create_directories"]:
354 
355  try:
356  os.makedirs(globaldir)
357  except:
358  pass
359  try:
360  os.makedirs(rmf_dir)
361  except:
362  pass
363 
364  if not self.is_multi_state:
365  try:
366  os.makedirs(pdb_dir)
367  except:
368  pass
369  else:
370  for n in range(self.vars["number_of_states"]):
371  try:
372  os.makedirs(pdb_dir + "/" + str(n))
373  except:
374  pass
375 
376 # -------------------------------------------------------------------------
377 
379  if self.output_objects is not None:
380  self.output_objects.append(sw)
381  if self.rmf_output_objects is not None:
382  self.rmf_output_objects.append(sw)
383 
384  print("Setting up stat file")
385  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
386  low_temp_stat_file = globaldir + \
387  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
388 
389  # Ensure model is updated before saving init files
390  if not self.test_mode:
391  self.model.update()
392 
393  if not self.test_mode:
394  if self.output_objects is not None:
395  output.init_stat2(low_temp_stat_file,
396  self.output_objects,
397  extralabels=["rmf_file", "rmf_frame_index"])
398  else:
399  print("Stat file writing is disabled")
400 
401  if self.rmf_output_objects is not None:
402  print("Stat info being written in the rmf file")
403 
404  print("Setting up replica stat file")
405  replica_stat_file = globaldir + \
406  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
407  if not self.test_mode:
408  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
409 
410  if not self.test_mode:
411  print("Setting up best pdb files")
412  if not self.is_multi_state:
413  if self.vars["number_of_best_scoring_models"] > 0:
414  output.init_pdb_best_scoring(pdb_dir + "/" +
415  self.vars["best_pdb_name_suffix"],
416  self.root_hier,
417  self.vars[
418  "number_of_best_scoring_models"],
419  replica_exchange=True)
420  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
421  self.vars["best_pdb_name_suffix"]+".0.pdb")
422  else:
423  if self.vars["number_of_best_scoring_models"] > 0:
424  for n in range(self.vars["number_of_states"]):
425  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
426  self.vars["best_pdb_name_suffix"],
427  self.root_hiers[n],
428  self.vars[
429  "number_of_best_scoring_models"],
430  replica_exchange=True)
431  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
432  self.vars["best_pdb_name_suffix"]+".0.pdb")
433 # ---------------------------------------------
434 
435  if self.em_object_for_rmf is not None:
436  output_hierarchies = [
437  self.root_hier,
438  self.em_object_for_rmf.get_density_as_hierarchy(
439  )]
440  else:
441  output_hierarchies = [self.root_hier]
442 
443 #----------------------------------------------
444  if not self.test_mode:
445  print("Setting up and writing initial rmf coordinate file")
446  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
447  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
448  output_hierarchies,listofobjects=self.rmf_output_objects)
449  if self._rmf_restraints:
450  output.add_restraints_to_rmf(
451  init_suffix + "." + str(myindex) + ".rmf3",
452  self._rmf_restraints)
453  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
454  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
455 
456 #----------------------------------------------
457 
458  if not self.test_mode:
459  mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
460 
461 #----------------------------------------------
462 
463  self._add_provenance(sampler_md, sampler_mc)
464 
465  if not self.test_mode:
466  print("Setting up production rmf files")
467  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
468  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"],
469  listofobjects = self.rmf_output_objects)
470 
471  if self._rmf_restraints:
472  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
473 
474  ntimes_at_low_temp = 0
475 
476  if myindex == 0:
477  self.show_info()
478  self.replica_exchange_object.set_was_used(True)
479  nframes = self.vars["number_of_frames"]
480  if self.test_mode:
481  nframes = 1
482  for i in range(nframes):
483  if self.test_mode:
484  score = 0.
485  else:
486  for nr in range(self.vars["num_sample_rounds"]):
487  if sampler_md is not None:
488  sampler_md.optimize(
489  self.vars["molecular_dynamics_steps"])
490  if sampler_mc is not None:
491  sampler_mc.optimize(self.vars["monte_carlo_steps"])
493  self.model).evaluate(False)
494  mpivs.set_value("score",score)
495  output.set_output_entry("score", score)
496 
497 
498 
499  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
500 
501  if self.vars["save_coordinates_mode"] == "lowest_temperature":
502  save_frame=(min_temp_index == my_temp_index)
503  elif self.vars["save_coordinates_mode"] == "25th_score":
504  score_perc=mpivs.get_percentile("score")
505  save_frame=(score_perc*100.0<=25.0)
506  elif self.vars["save_coordinates_mode"] == "50th_score":
507  score_perc=mpivs.get_percentile("score")
508  save_frame=(score_perc*100.0<=50.0)
509  elif self.vars["save_coordinates_mode"] == "75th_score":
510  score_perc=mpivs.get_percentile("score")
511  save_frame=(score_perc*100.0<=75.0)
512 
513  # Ensure model is updated before saving output files
514  if save_frame and not self.test_mode:
515  self.model.update()
516 
517  if save_frame:
518  print("--- frame %s score %s " % (str(i), str(score)))
519 
520  if not self.test_mode:
521  if i % self.vars["nframes_write_coordinates"]==0:
522  print('--- writing coordinates')
523  if self.vars["number_of_best_scoring_models"] > 0:
524  output.write_pdb_best_scoring(score)
525  output.write_rmf(rmfname)
526  output.set_output_entry("rmf_file", rmfname)
527  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
528  else:
529  output.set_output_entry("rmf_file", rmfname)
530  output.set_output_entry("rmf_frame_index", '-1')
531  if self.output_objects is not None:
532  output.write_stat2(low_temp_stat_file)
533  ntimes_at_low_temp += 1
534 
535  if not self.test_mode:
536  output.write_stat2(replica_stat_file)
537  if self.vars["replica_exchange_swap"]:
538  rex.swap_temp(i, score)
539  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
540  p.add_replica_exchange(state, self)
541 
542  if not self.test_mode:
543  print("closing production rmf files")
544  output.close_rmf(rmfname)
545 
546 
547 
548 # ----------------------------------------------------------------------
549 class BuildSystem(object):
550  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
551  Easily create multi-state systems by calling this macro
552  repeatedly with different TopologyReader objects!
553  A useful function is get_molecules() which returns the PMI Molecules grouped by state
554  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
555  Quick multi-state system:
556  @code{.python}
557  model = IMP.Model()
558  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
559  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
560  bs = IMP.pmi.macros.BuildSystem(model)
561  bs.add_state(reader1)
562  bs.add_state(reader2)
563  bs.execute_macro() # build everything including degrees of freedom
564  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
565  ### now you have a two state system, you add restraints etc
566  @endcode
567  @note The "domain name" entry of the topology reader is not used.
568  All molecules are set up by the component name, but split into rigid bodies
569  as requested.
570  """
571 
572  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
573  'RNA': IMP.pmi.alphabets.rna}
574 
575  def __init__(self,
576  model,
577  sequence_connectivity_scale=4.0,
578  force_create_gmm_files=False,
579  resolutions=[1,10]):
580  """Constructor
581  @param model An IMP Model
582  @param sequence_connectivity_scale For scaling the connectivity restraint
583  @param force_create_gmm_files If True, will sample and create GMMs
584  no matter what. If False, will only sample if the
585  files don't exist. If number of Gaussians is zero, won't
586  do anything.
587  @param resolutions The resolutions to build for structured regions
588  """
589  self.model = model
590  self.system = IMP.pmi.topology.System(self.model)
591  self._readers = [] # the TopologyReaders (one per state)
592  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
593  self._domains = [] # key = domain unique name, value = Component
594  self.force_create_gmm_files = force_create_gmm_files
595  self.resolutions = resolutions
596 
597  def add_state(self,
598  reader,
599  keep_chain_id=False, fasta_name_map=None):
600  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
601  When you are done adding states, call execute_macro()
602  @param reader The TopologyReader object
603  @param fasta_name_map dictionary for converting protein names found in the fasta file
604  """
605  state = self.system.create_state()
606  self._readers.append(reader)
607  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
608  these_domains = {} # key is unique name, value is _Component
609  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
610  numchain = 0
611 
612  ### setup representation
613  # loop over molecules, copies, then domains
614  for molname in reader.get_molecules():
615  copies = reader.get_molecules()[molname].domains
616  for nc,copyname in enumerate(copies):
617  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
618  copy = copies[copyname]
619  # option to not rename chains
620  if keep_chain_id == True:
621  chain_id = copy[0].chain
622  else:
623  chain_id = chain_ids[numchain]
624  if nc==0:
625  alphabet = IMP.pmi.alphabets.amino_acid
626  fasta_flag=copy[0].fasta_flag
627  if fasta_flag in self._alphabets:
628  alphabet = self._alphabets[fasta_flag]
629  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
630  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
631  orig_mol = state.create_molecule(molname, seq, chain_id,
632  alphabet=alphabet)
633  mol = orig_mol
634  numchain+=1
635  else:
636  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
637  mol = orig_mol.create_copy(chain_id)
638  numchain+=1
639 
640  for domainnumber,domain in enumerate(copy):
641  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
642  # we build everything in the residue range, even if it
643  # extends beyond what's in the actual PDB file
644  these_domains[domain.get_unique_name()] = domain
645  if domain.residue_range==[] or domain.residue_range is None:
646  domain_res = mol.get_residues()
647  else:
648  start = domain.residue_range[0]+domain.pdb_offset
649  if domain.residue_range[1]=='END':
650  end = len(mol.sequence)
651  else:
652  end = domain.residue_range[1]+domain.pdb_offset
653  domain_res = mol.residue_range(start-1,end-1)
654  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
655  if domain.pdb_file=="BEADS":
656  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
657  mol.add_representation(
658  domain_res,
659  resolutions=[domain.bead_size],
660  setup_particles_as_densities=(
661  domain.em_residues_per_gaussian!=0),
662  color = domain.color)
663  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
664  elif domain.pdb_file=="IDEAL_HELIX":
665  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
666  mol.add_representation(
667  domain_res,
668  resolutions=self.resolutions,
669  ideal_helix=True,
670  density_residues_per_component=domain.em_residues_per_gaussian,
671  density_prefix=domain.density_prefix,
672  density_force_compute=self.force_create_gmm_files,
673  color = domain.color)
674  these_domain_res[domain.get_unique_name()] = (domain_res,set())
675  else:
676  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
677  domain_atomic = mol.add_structure(domain.pdb_file,
678  domain.chain,
679  domain.residue_range,
680  domain.pdb_offset,
681  soft_check=True)
682  domain_non_atomic = domain_res - domain_atomic
683  if not domain.em_residues_per_gaussian:
684  mol.add_representation(domain_atomic,
685  resolutions=self.resolutions,
686  color = domain.color)
687  if len(domain_non_atomic)>0:
688  mol.add_representation(domain_non_atomic,
689  resolutions=[domain.bead_size],
690  color = domain.color)
691  else:
692  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
693  mol.add_representation(
694  domain_atomic,
695  resolutions=self.resolutions,
696  density_residues_per_component=domain.em_residues_per_gaussian,
697  density_prefix=domain.density_prefix,
698  density_force_compute=self.force_create_gmm_files,
699  color = domain.color)
700  if len(domain_non_atomic)>0:
701  mol.add_representation(domain_non_atomic,
702  resolutions=[domain.bead_size],
703  setup_particles_as_densities=True,
704  color = domain.color)
705  these_domain_res[domain.get_unique_name()] = (
706  domain_atomic,domain_non_atomic)
707  self._domain_res.append(these_domain_res)
708  self._domains.append(these_domains)
709  print('BuildSystem.add_state: State',len(self.system.states),'added')
710 
711  def get_molecules(self):
712  """Return list of all molecules grouped by state.
713  For each state, it's a dictionary of Molecules where key is the molecule name
714  """
715  return [s.get_molecules() for s in self.system.get_states()]
716 
717  def get_molecule(self,molname,copy_index=0,state_index=0):
718  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
719 
720  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
721  """Builds representations and sets up degrees of freedom"""
722  print("BuildSystem.execute_macro: building representations")
723  self.root_hier = self.system.build()
724 
725  print("BuildSystem.execute_macro: setting up degrees of freedom")
726  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
727  for nstate,reader in enumerate(self._readers):
728  rbs = reader.get_rigid_bodies()
729  srbs = reader.get_super_rigid_bodies()
730  csrbs = reader.get_chains_of_super_rigid_bodies()
731 
732  # add rigid bodies
733  domains_in_rbs = set()
734  for rblist in rbs:
735  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
736  all_res = IMP.pmi.tools.OrderedSet()
737  bead_res = IMP.pmi.tools.OrderedSet()
738  for dname in rblist:
739  domain = self._domains[nstate][dname]
740  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
741  all_res|=self._domain_res[nstate][dname][0]
742  bead_res|=self._domain_res[nstate][dname][1]
743  domains_in_rbs.add(dname)
744  all_res|=bead_res
745  print("BuildSystem.execute_macro: -------- \
746 creating rigid body with max_trans %s \
747 max_rot %s non_rigid_max_trans %s" \
748  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
749  self.dof.create_rigid_body(all_res,
750  nonrigid_parts=bead_res,
751  max_trans=max_rb_trans,
752  max_rot=max_rb_rot,
753  nonrigid_max_trans=max_bead_trans)
754 
755  # if you have any domains not in an RB, set them as flexible beads
756  for dname, domain in self._domains[nstate].items():
757  if dname not in domains_in_rbs:
758  if domain.pdb_file != "BEADS":
759  warnings.warn(
760  "No rigid bodies set for %s. Residues read from "
761  "the PDB file will not be sampled - only regions "
762  "missing from the PDB will be treated flexibly. "
763  "To sample the entire sequence, use BEADS instead "
764  "of a PDB file name" % dname,
766  self.dof.create_flexible_beads(
767  self._domain_res[nstate][dname][1],
768  max_trans=max_bead_trans)
769 
770  # add super rigid bodies
771  for srblist in srbs:
772  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
773  all_res = IMP.pmi.tools.OrderedSet()
774  for dname in srblist:
775  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
776  all_res|=self._domain_res[nstate][dname][0]
777  all_res|=self._domain_res[nstate][dname][1]
778 
779  print("BuildSystem.execute_macro: -------- \
780 creating super rigid body with max_trans %s max_rot %s " \
781  % (str(max_srb_trans),str(max_srb_rot)))
782  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
783 
784  # add chains of super rigid bodies
785  for csrblist in csrbs:
786  all_res = IMP.pmi.tools.OrderedSet()
787  for dname in csrblist:
788  all_res|=self._domain_res[nstate][dname][0]
789  all_res|=self._domain_res[nstate][dname][1]
790  all_res = list(all_res)
791  all_res.sort(key=lambda r:r.get_index())
792  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
793  self.dof.create_main_chain_mover(all_res)
794  return self.root_hier,self.dof
795 
796 
797 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
798 class AnalysisReplicaExchange0(object):
799  """A macro for running all the basic operations of analysis.
800  Includes clustering, precision analysis, and making ensemble density maps.
801  A number of plots are also supported.
802  """
803  def __init__(self, model,
804  merge_directories=["./"],
805  stat_file_name_suffix="stat",
806  best_pdb_name_suffix="model",
807  do_clean_first=True,
808  do_create_directories=True,
809  global_output_directory="output/",
810  replica_stat_file_suffix="stat_replica",
811  global_analysis_result_directory="./analysis/",
812  test_mode=False):
813  """Constructor.
814  @param model The IMP model
815  @param stat_file_name_suffix
816  @param merge_directories The directories containing output files
817  @param best_pdb_name_suffix
818  @param do_clean_first
819  @param do_create_directories
820  @param global_output_directory Where everything is
821  @param replica_stat_file_suffix
822  @param global_analysis_result_directory
823  @param test_mode If True, nothing is changed on disk
824  """
825 
826  try:
827  from mpi4py import MPI
828  self.comm = MPI.COMM_WORLD
829  self.rank = self.comm.Get_rank()
830  self.number_of_processes = self.comm.size
831  except ImportError:
832  self.rank = 0
833  self.number_of_processes = 1
834 
835  self.test_mode = test_mode
836  self._protocol_output = []
837  self.cluster_obj = None
838  self.model = model
839  stat_dir = global_output_directory
840  self.stat_files = []
841  # it contains the position of the root directories
842  for rd in merge_directories:
843  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
844  if len(stat_files)==0:
845  warnings.warn("no stat files found in %s"
846  % os.path.join(rd, stat_dir),
848  self.stat_files += stat_files
849 
850  def add_protocol_output(self, p):
851  """Capture details of the modeling protocol.
852  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
853  """
854  # Assume last state is the one we're interested in
855  self._protocol_output.append((p, p._last_state))
856 
857  def get_modeling_trajectory(self,
858  score_key="SimplifiedModel_Total_Score_None",
859  rmf_file_key="rmf_file",
860  rmf_file_frame_key="rmf_frame_index",
861  outputdir="./",
862  get_every=1,
863  nframes_trajectory=10000):
864  """ Get a trajectory of the modeling run, for generating demonstrative movies
865  @param score_key The score for ranking models
866  @param rmf_file_key Key pointing to RMF filename
867  @param rmf_file_frame_key Key pointing to RMF frame number
868  @param outputdir The local output directory used in the run
869  @param get_every Extract every nth frame
870  @param nframes_trajectory Total number of frames of the trajectory
871  """
872  from operator import itemgetter
873  import math
874 
875  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
876  score_key,
877  rmf_file_key,
878  rmf_file_frame_key,
879  get_every)
880  rmf_file_list=trajectory_models[0]
881  rmf_file_frame_list=trajectory_models[1]
882  score_list=list(map(float, trajectory_models[2]))
883 
884  max_score=max(score_list)
885  min_score=min(score_list)
886 
887 
888  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
889  binned_scores=[None]*nframes_trajectory
890  binned_model_indexes=[-1]*nframes_trajectory
891 
892  for model_index,s in enumerate(score_list):
893  bins_score_diffs=[abs(s-b) for b in bins]
894  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
895  if binned_scores[bin_index]==None:
896  binned_scores[bin_index]=s
897  binned_model_indexes[bin_index]=model_index
898  else:
899  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
900  new_diff=abs(s-bins[bin_index])
901  if new_diff < old_diff:
902  binned_scores[bin_index]=s
903  binned_model_indexes[bin_index]=model_index
904 
905  print(binned_scores)
906  print(binned_model_indexes)
907 
908 
909  def _expand_ambiguity(self,prot,d):
910  """If using PMI2, expand the dictionary to include copies as ambiguous options
911  This also keeps the states separate.
912  """
913  newdict = {}
914  for key in d:
915  val = d[key]
916  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
917  newdict[key] = val
918  continue
919  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
920  if isinstance(val, tuple):
921  start = val[0]
922  stop = val[1]
923  name = val[2]
924  else:
925  start = 1
926  stop = -1
927  name = val
928  for nst in range(len(states)):
929  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
930  copies = sel.get_selected_particles(with_representation=False)
931  if len(copies)>1:
932  for nc in range(len(copies)):
933  if len(states)>1:
934  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
935  else:
936  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
937  else:
938  newdict[key] = val
939  return newdict
940 
941 
942  def clustering(self,
943  score_key="SimplifiedModel_Total_Score_None",
944  rmf_file_key="rmf_file",
945  rmf_file_frame_key="rmf_frame_index",
946  state_number=0,
947  prefiltervalue=None,
948  feature_keys=[],
949  outputdir="./",
950  alignment_components=None,
951  number_of_best_scoring_models=10,
952  rmsd_calculation_components=None,
953  distance_matrix_file='distances.mat',
954  load_distance_matrix_file=False,
955  skip_clustering=False,
956  number_of_clusters=1,
957  display_plot=False,
958  exit_after_display=True,
959  get_every=1,
960  first_and_last_frames=None,
961  density_custom_ranges=None,
962  write_pdb_with_centered_coordinates=False,
963  voxel_size=5.0):
964  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
965  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
966  Can pass None for copy or state to ignore that field.
967  If you don't pass a specific copy number
968  @param score_key The score for ranking models. Use "Total_Score"
969  instead of default for PMI2.
970  @param rmf_file_key Key pointing to RMF filename
971  @param rmf_file_frame_key Key pointing to RMF frame number
972  @param state_number State number to analyze
973  @param prefiltervalue Only include frames where the
974  score key is below this value
975  @param feature_keys Keywords for which you want to
976  calculate average, medians, etc.
977  If you pass "Keyname" it'll include everything that matches "*Keyname*"
978  @param outputdir The local output directory used in the run
979  @param alignment_components Dictionary with keys=groupname, values are tuples
980  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
981  @param number_of_best_scoring_models Num models to keep per run
982  @param rmsd_calculation_components For calculating RMSD
983  (same format as alignment_components)
984  @param distance_matrix_file Where to store/read the distance matrix
985  @param load_distance_matrix_file Try to load the distance matrix file
986  @param skip_clustering Just extract the best scoring models
987  and save the pdbs
988  @param number_of_clusters Number of k-means clusters
989  @param display_plot Display the distance matrix
990  @param exit_after_display Exit after displaying distance matrix
991  @param get_every Extract every nth frame
992  @param first_and_last_frames A tuple with the first and last frames to be
993  analyzed. Values are percentages!
994  Default: get all frames
995  @param density_custom_ranges For density calculation
996  (same format as alignment_components)
997  @param write_pdb_with_centered_coordinates
998  @param voxel_size Used for the density output
999  """
1000  # Track provenance information to be added to each output model
1001  prov = []
1002  self._outputdir = os.path.abspath(outputdir)
1003  self._number_of_clusters = number_of_clusters
1004  for p, state in self._protocol_output:
1005  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1006 
1007  if self.test_mode:
1008  return
1009 
1010  if self.rank==0:
1011  try:
1012  os.mkdir(outputdir)
1013  except:
1014  pass
1015 
1016  if not load_distance_matrix_file:
1017  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1018  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1019  self.stat_files,self.number_of_processes)[self.rank]
1020 
1021  # read ahead to check if you need the PMI2 score key instead
1022  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1023  orig_score_key = score_key
1024  if score_key not in po.get_keys():
1025  if 'Total_Score' in po.get_keys():
1026  score_key = 'Total_Score'
1027  warnings.warn(
1028  "Using 'Total_Score' instead of "
1029  "'SimplifiedModel_Total_Score_None' for the score key",
1031  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1032  if k in feature_keys:
1033  warnings.warn(
1034  "no need to pass " + k + " to feature_keys.",
1036  feature_keys.remove(k)
1037 
1038  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1039  score_key,
1040  feature_keys,
1041  rmf_file_key,
1042  rmf_file_frame_key,
1043  prefiltervalue,
1044  get_every, provenance=prov)
1045  rmf_file_list=best_models[0]
1046  rmf_file_frame_list=best_models[1]
1047  score_list=best_models[2]
1048  feature_keyword_list_dict=best_models[3]
1049 
1050 # ------------------------------------------------------------------------
1051 # collect all the files and scores
1052 # ------------------------------------------------------------------------
1053 
1054  if self.number_of_processes > 1:
1055  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1056  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1057  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1058  rmf_file_frame_list)
1059  for k in feature_keyword_list_dict:
1060  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1061  feature_keyword_list_dict[k])
1062 
1063  # sort by score and get the best scoring ones
1064  score_rmf_tuples = list(zip(score_list,
1065  rmf_file_list,
1066  rmf_file_frame_list,
1067  list(range(len(score_list)))))
1068 
1069  if density_custom_ranges:
1070  for k in density_custom_ranges:
1071  if not isinstance(density_custom_ranges[k], list):
1072  raise Exception("Density custom ranges: values must be lists of tuples")
1073 
1074  # keep subset of frames if requested
1075  if first_and_last_frames is not None:
1076  nframes = len(score_rmf_tuples)
1077  first_frame = int(first_and_last_frames[0] * nframes)
1078  last_frame = int(first_and_last_frames[1] * nframes)
1079  if last_frame > len(score_rmf_tuples):
1080  last_frame = -1
1081  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1082 
1083  # sort RMFs by the score_key in ascending order, and store the rank
1084  best_score_rmf_tuples = sorted(score_rmf_tuples,
1085  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1086  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1087  # Note in the provenance info that we only kept best-scoring models
1088  prov.append(IMP.pmi.io.FilterProvenance("Best scoring",
1089  0, number_of_best_scoring_models))
1090  # sort the feature scores in the same way
1091  best_score_feature_keyword_list_dict = defaultdict(list)
1092  for tpl in best_score_rmf_tuples:
1093  index = tpl[3]
1094  for f in feature_keyword_list_dict:
1095  best_score_feature_keyword_list_dict[f].append(
1096  feature_keyword_list_dict[f][index])
1097  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1098  best_score_rmf_tuples,
1099  self.number_of_processes)[self.rank]
1100 
1101  # expand the dictionaries to include ambiguous copies
1102  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1103  0,
1104  my_best_score_rmf_tuples[0][1])[0]
1105  if IMP.pmi.get_is_canonical(prot_ahead):
1106  if rmsd_calculation_components is not None:
1107  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1108  if tmp!=rmsd_calculation_components:
1109  print('Detected ambiguity, expand rmsd components to',tmp)
1110  rmsd_calculation_components = tmp
1111  if alignment_components is not None:
1112  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1113  if tmp!=alignment_components:
1114  print('Detected ambiguity, expand alignment components to',tmp)
1115  alignment_components = tmp
1116 
1117 #-------------------------------------------------------------
1118 # read the coordinates
1119 # ------------------------------------------------------------
1120  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1121  my_best_score_rmf_tuples[0],
1122  rmsd_calculation_components,
1123  state_number=state_number)
1124  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1125  my_best_score_rmf_tuples,
1126  alignment_components,
1127  rmsd_calculation_components,
1128  state_number=state_number)
1129 
1130  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1131  # NOR particles, because these object cannot be serialized. We need serialization
1132  # for the parallel computation based on mpi.
1133  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1134  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1135  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1136  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1137  all_rmf_file_names=got_coords[4] # RMF file per hit
1138 
1139 # ------------------------------------------------------------------------
1140 # optionally don't compute distance matrix or cluster, just write top files
1141 # ------------------------------------------------------------------------
1142  if skip_clustering:
1143  if density_custom_ranges:
1144  DensModule = IMP.pmi.analysis.GetModelDensity(
1145  density_custom_ranges,
1146  voxel=voxel_size)
1147 
1148  dircluster=os.path.join(outputdir,"all_models."+str(n))
1149  try:
1150  os.mkdir(outputdir)
1151  except:
1152  pass
1153  try:
1154  os.mkdir(dircluster)
1155  except:
1156  pass
1157  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1158  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1159  rmf_name=tpl[1]
1160  rmf_frame_number=tpl[2]
1161  tmp_dict={}
1162  index=tpl[4]
1163  for key in best_score_feature_keyword_list_dict:
1164  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1165 
1166  if cnt==0:
1167  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1168  self.model,
1169  rmf_frame_number,
1170  rmf_name)
1171  else:
1172  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1173  self.model,
1174  prots,
1175  rs,
1176  rmf_frame_number,
1177  rmf_name)
1178  if not linking_successful:
1179  continue
1180 
1181  if not prots:
1182  continue
1183 
1184  if IMP.pmi.get_is_canonical(prots[0]):
1185  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1186  prot = states[state_number]
1187  else:
1188  prot = prots[state_number]
1189 
1190  # get transformation aligning coordinates of requested tuples
1191  # to the first RMF file
1192  if cnt==0:
1193  coords_f1=alignment_coordinates[cnt]
1194  if cnt > 0:
1195  coords_f2=alignment_coordinates[cnt]
1196  if coords_f2:
1197  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1198  transformation = Ali.align()[1]
1199  else:
1201 
1202  rbs = set()
1203  for p in IMP.atom.get_leaves(prot):
1204  if not IMP.core.XYZR.get_is_setup(p):
1206  IMP.core.XYZR(p).set_radius(0.0001)
1207  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1208 
1210  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1211  rbs.add(rb)
1212  else:
1214  transformation)
1215  for rb in rbs:
1216  IMP.core.transform(rb,transformation)
1217 
1219  self.model.update()
1220  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1221  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1222  o.init_pdb(out_pdb_fn,prot)
1223  o.write_pdb(out_pdb_fn,
1224  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1225 
1226  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1227  tmp_dict["rmf_file_full_path"]=rmf_name
1228  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1229  tmp_dict["local_rmf_frame_number"]=0
1230 
1231  clusstat.write(str(tmp_dict)+"\n")
1232 
1233  if IMP.pmi.get_is_canonical(prot):
1234  # create a single-state System and write that
1236  h.set_name("System")
1237  h.add_child(prot)
1238  o.init_rmf(out_rmf_fn, [h], rs)
1239  else:
1240  o.init_rmf(out_rmf_fn, [prot],rs)
1241 
1242  o.write_rmf(out_rmf_fn)
1243  o.close_rmf(out_rmf_fn)
1244  # add the density
1245  if density_custom_ranges:
1246  DensModule.add_subunits_density(prot)
1247 
1248  if density_custom_ranges:
1249  DensModule.write_mrc(path=dircluster)
1250  del DensModule
1251  return
1252 
1253 
1254 
1255  # broadcast the coordinates
1256  if self.number_of_processes > 1:
1257  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1258  all_coordinates)
1259  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1260  all_rmf_file_names)
1261  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1262  rmf_file_name_index_dict)
1263  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1264  alignment_coordinates)
1265  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1266  rmsd_coordinates)
1267 
1268  if self.rank == 0:
1269  # save needed informations in external files
1270  self.save_objects(
1271  [best_score_feature_keyword_list_dict,
1272  rmf_file_name_index_dict],
1273  ".macro.pkl")
1274 
1275 # ------------------------------------------------------------------------
1276 # Calculate distance matrix and cluster
1277 # ------------------------------------------------------------------------
1278  print("setup clustering class")
1279  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1280 
1281  for n, model_coordinate_dict in enumerate(all_coordinates):
1282  template_coordinate_dict = {}
1283  # let's try to align
1284  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1285  # set the first model as template coordinates
1286  self.cluster_obj.set_template(alignment_coordinates[n])
1287  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1288  print("Global calculating the distance matrix")
1289 
1290  # calculate distance matrix, all against all
1291  self.cluster_obj.dist_matrix()
1292 
1293  # perform clustering and optionally display
1294  if self.rank == 0:
1295  self.cluster_obj.do_cluster(number_of_clusters)
1296  if display_plot:
1297  if self.rank == 0:
1298  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1299  if exit_after_display:
1300  exit()
1301  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1302 
1303 # ------------------------------------------------------------------------
1304 # Alteratively, load the distance matrix from file and cluster that
1305 # ------------------------------------------------------------------------
1306  else:
1307  if self.rank==0:
1308  print("setup clustering class")
1309  self.cluster_obj = IMP.pmi.analysis.Clustering()
1310  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1311  print("clustering with %s clusters" % str(number_of_clusters))
1312  self.cluster_obj.do_cluster(number_of_clusters)
1313  [best_score_feature_keyword_list_dict,
1314  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1315  if display_plot:
1316  if self.rank == 0:
1317  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1318  if exit_after_display:
1319  exit()
1320  if self.number_of_processes > 1:
1321  self.comm.Barrier()
1322 
1323 # ------------------------------------------------------------------------
1324 # now save all informations about the clusters
1325 # ------------------------------------------------------------------------
1326 
1327  if self.rank == 0:
1328  print(self.cluster_obj.get_cluster_labels())
1329  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1330  print("rank %s " % str(self.rank))
1331  print("cluster %s " % str(n))
1332  print("cluster label %s " % str(cl))
1333  print(self.cluster_obj.get_cluster_label_names(cl))
1334  cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1335  cluster_prov = prov + \
1336  [IMP.pmi.io.ClusterProvenance(cluster_size)]
1337 
1338  # first initialize the Density class if requested
1339  if density_custom_ranges:
1340  DensModule = IMP.pmi.analysis.GetModelDensity(
1341  density_custom_ranges,
1342  voxel=voxel_size)
1343 
1344  dircluster = outputdir + "/cluster." + str(n) + "/"
1345  try:
1346  os.mkdir(dircluster)
1347  except:
1348  pass
1349 
1350  rmsd_dict = {"AVERAGE_RMSD":
1351  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1352  clusstat = open(dircluster + "stat.out", "w")
1353  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1354  # extract the features
1355  tmp_dict = {}
1356  tmp_dict.update(rmsd_dict)
1357  index = rmf_file_name_index_dict[structure_name]
1358  for key in best_score_feature_keyword_list_dict:
1359  tmp_dict[
1360  key] = best_score_feature_keyword_list_dict[
1361  key][
1362  index]
1363 
1364  # get the rmf name and the frame number from the list of
1365  # frame names
1366  rmf_name = structure_name.split("|")[0]
1367  rmf_frame_number = int(structure_name.split("|")[1])
1368  clusstat.write(str(tmp_dict) + "\n")
1369 
1370  # extract frame (open or link to existing)
1371  if k==0:
1372  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1373  self.model,
1374  rmf_frame_number,
1375  rmf_name)
1376  else:
1377  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1378  self.model,
1379  prots,
1380  rs,
1381  rmf_frame_number,
1382  rmf_name)
1383  if not linking_successful:
1384  continue
1385  if not prots:
1386  continue
1387 
1388  if IMP.pmi.get_is_canonical(prots[0]):
1389  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1390  prot = states[state_number]
1391  else:
1392  prot = prots[state_number]
1393  if k==0:
1394  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1395 
1396  # transform clusters onto first
1397  if k > 0:
1398  model_index = self.cluster_obj.get_model_index_from_name(
1399  structure_name)
1400  transformation = self.cluster_obj.get_transformation_to_first_member(
1401  cl,
1402  model_index)
1403  rbs = set()
1404  for p in IMP.atom.get_leaves(prot):
1405  if not IMP.core.XYZR.get_is_setup(p):
1407  IMP.core.XYZR(p).set_radius(0.0001)
1408  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1409 
1411  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1412  rbs.add(rb)
1413  else:
1415  transformation)
1416  for rb in rbs:
1417  IMP.core.transform(rb,transformation)
1418 
1419  # add the density
1420  if density_custom_ranges:
1421  DensModule.add_subunits_density(prot)
1422 
1423  # pdb writing should be optimized!
1424  o = IMP.pmi.output.Output()
1425  self.model.update()
1426  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1427  o.write_pdb(dircluster + str(k) + ".pdb")
1428 
1429  if IMP.pmi.get_is_canonical(prot):
1430  # create a single-state System and write that
1432  h.set_name("System")
1433  h.add_child(prot)
1434  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1435  else:
1436  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
1437  o.write_rmf(dircluster + str(k) + ".rmf3")
1438  o.close_rmf(dircluster + str(k) + ".rmf3")
1439 
1440  del o
1441  # IMP.atom.destroy(prot)
1442 
1443  if density_custom_ranges:
1444  DensModule.write_mrc(path=dircluster)
1445  del DensModule
1446 
1447  if self.number_of_processes>1:
1448  self.comm.Barrier()
1449 
1450  def get_cluster_rmsd(self,cluster_num):
1451  if self.cluster_obj is None:
1452  raise Exception("Run clustering first")
1453  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1454 
1455  def save_objects(self, objects, file_name):
1456  import pickle
1457  outf = open(file_name, 'wb')
1458  pickle.dump(objects, outf)
1459  outf.close()
1460 
1461  def load_objects(self, file_name):
1462  import pickle
1463  inputf = open(file_name, 'rb')
1464  objects = pickle.load(inputf)
1465  inputf.close()
1466  return objects
1467 
1469 
1470  """
1471  This class contains analysis utilities to investigate ReplicaExchange results.
1472  """
1473 
1474  ########################
1475  # Construction and Setup
1476  ########################
1477 
1478  def __init__(self,model,
1479  stat_files,
1480  best_models=None,
1481  score_key=None,
1482  alignment=True):
1483  """
1484  Construction of the Class.
1485  @param model IMP.Model()
1486  @param stat_files list of string. Can be ascii stat files, rmf files names
1487  @param best_models Integer. Number of best scoring models, if None: all models will be read
1488  @param alignment boolean (Default=True). Align before computing the rmsd.
1489  """
1490 
1491  self.model=model
1492  self.best_models=best_models
1493  self.stath0=IMP.pmi.output.StatHierarchyHandler(model,stat_files,self.best_models,score_key,cache=True)
1494  self.stath1=IMP.pmi.output.StatHierarchyHandler(StatHierarchyHandler=self.stath0)
1495 
1496  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath1))
1497  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath0))
1498  self.sel0_rmsd=IMP.atom.Selection(self.stath0)
1499  self.sel1_rmsd=IMP.atom.Selection(self.stath1)
1500  self.sel0_alignment=IMP.atom.Selection(self.stath0)
1501  self.sel1_alignment=IMP.atom.Selection(self.stath1)
1502  self.clusters=[]
1503  # fill the cluster list with a single cluster containing all models
1504  c = IMP.pmi.output.Cluster(0)
1505  self.clusters.append(c)
1506  for n0 in range(len(self.stath0)):
1507  c.add_member(n0)
1508  self.pairwise_rmsd={}
1509  self.pairwise_molecular_assignment={}
1510  self.alignment=alignment
1511  self.symmetric_molecules={}
1512  self.issymmetricsel={}
1513  self.update_seldicts()
1514  self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath0))
1515  self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath1))
1516 
1517  def set_rmsd_selection(self,**kwargs):
1518  """
1519  Setup the selection onto which the rmsd is computed
1520  @param kwargs use IMP.atom.Selection keywords
1521  """
1522  self.sel0_rmsd=IMP.atom.Selection(self.stath0,**kwargs)
1523  self.sel1_rmsd=IMP.atom.Selection(self.stath1,**kwargs)
1524  self.update_seldicts()
1525 
1526  def set_symmetric(self,molecule_name):
1527  """
1528  Store names of symmetric molecules
1529  """
1530  self.symmetric_molecules[molecule_name]=0
1531  self.update_seldicts()
1532 
1533  def set_alignment_selection(self,**kwargs):
1534  """
1535  Setup the selection onto which the alignment is computed
1536  @param kwargs use IMP.atom.Selection keywords
1537  """
1538  self.sel0_alignment=IMP.atom.Selection(self.stath0,**kwargs)
1539  self.sel1_alignment=IMP.atom.Selection(self.stath1,**kwargs)
1540 
1541  ######################
1542  # Clustering functions
1543  ######################
1544  def clean_clusters(self):
1545  for c in self.clusters: del c
1546  self.clusters=[]
1547 
1548 
1549  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1550  """
1551  Cluster the models based on RMSD.
1552  @param rmsd_cutoff Float the distance cutoff in Angstrom
1553  @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1554  """
1555  self.clean_clusters()
1556  not_clustered=set(range(len(self.stath1)))
1557  while len(not_clustered)>0:
1558  self.aggregate(not_clustered, rmsd_cutoff, metric)
1559  #self.merge_aggregates(rmsd_cutoff, metric)
1560  self.update_clusters()
1561 
1562  def refine(self,rmsd_cutoff=10):
1563  """
1564  Refine the clusters by merging the ones whose centers are close
1565  @param rmsd_cutoff cutoff distance in Angstorms
1566  """
1567  merge_pairs=[]
1568  clusters_copy=self.clusters
1569  for c0,c1 in itertools.combinations(self.clusters,2):
1570  if c0.center_index is None:
1571  self.compute_cluster_center(c0)
1572  if c1.center_index is None:
1573  self.compute_cluster_center(c1)
1574  d0=self.stath0[c0.center_index]
1575  d1=self.stath1[c1.center_index]
1576  rmsd, molecular_assignment = self.rmsd()
1577  if rmsd <= rmsd_cutoff:
1578  if c1 in self.clusters:
1579  clusters_copy.remove(c1)
1580  c0+=c1
1581  self.clusters=clusters_copy
1582  self.update_clusters()
1583 
1584  ####################
1585  # Input Output
1586  ####################
1587 
1588  def set_cluster_assignments(self, cluster_ids):
1589  if len(cluster_ids)!=len(self.stath0):
1590  raise ValueError('cluster ids has to be same length as number of frames')
1591 
1592  self.clusters=[]
1593  for i in sorted(list(set(cluster_ids))):
1594  self.clusters.append(IMP.pmi.output.Cluster(i))
1595  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1596  self.clusters[idx].add_member(i,d)
1597 
1598  def get_cluster_data(self, cluster):
1599  """
1600  Return the model data from a cluster
1601  @param cluster IMP.pmi.output.Cluster object
1602  """
1603  data=[]
1604  for m in cluster:
1605  data.append(m)
1606  return data
1607 
1608  def save_data(self,filename='data.pkl'):
1609  """
1610  Save the data for the whole models into a pickle file
1611  @param filename string
1612  """
1613  self.stath0.save_data(filename)
1614 
1615  def set_data(self,data):
1616  """
1617  Set the data from an external IMP.pmi.output.Data
1618  @param data IMP.pmi.output.Data
1619  """
1620  self.stath0.data=data
1621  self.stath1.data=data
1622 
1623  def load_data(self,filename='data.pkl'):
1624  """
1625  Load the data from an external pickled file
1626  @param filename string
1627  """
1628  self.stath0.load_data(filename)
1629  self.stath1.load_data(filename)
1630  self.best_models=len(self.stath0)
1631 
1632  def add_cluster(self,rmf_name_list):
1633  c = IMP.pmi.output.Cluster(len(self.clusters))
1634  print("creating cluster index "+str(len(self.clusters)))
1635  self.clusters.append(c)
1636  current_len=len(self.stath0)
1637 
1638  for rmf in rmf_name_list:
1639  print("adding rmf "+rmf)
1640  self.stath0.add_stat_file(rmf)
1641  self.stath1.add_stat_file(rmf)
1642 
1643  for n0 in range(current_len,len(self.stath0)):
1644  d0=self.stath0[n0]
1645  c.add_member(n0,d0)
1646  self.update_clusters()
1647 
1648  def save_clusters(self,filename='clusters.pkl'):
1649  """
1650  Save the clusters into a pickle file
1651  @param filename string
1652  """
1653  try:
1654  import cPickle as pickle
1655  except ImportError:
1656  import pickle
1657  fl=open(filename,'wb')
1658  pickle.dump(self.clusters,fl)
1659 
1660  def load_clusters(self,filename='clusters.pkl',append=False):
1661  """
1662  Load the clusters from a pickle file
1663  @param filename string
1664  @param append bool (Default=False), if True. append the clusters to the ones currently present
1665  """
1666  try:
1667  import cPickle as pickle
1668  except ImportError:
1669  import pickle
1670  fl=open(filename,'rb')
1671  self.clean_clusters()
1672  if append:
1673  self.clusters+=pickle.load(fl)
1674  else:
1675  self.clusters=pickle.load(fl)
1676  self.update_clusters()
1677 
1678  ####################
1679  # Analysis Functions
1680  ####################
1681 
1682  def compute_cluster_center(self,cluster):
1683  """
1684  Compute the cluster center for a given cluster
1685  """
1686  member_distance=defaultdict(float)
1687 
1688  for n0,n1 in itertools.combinations(cluster.members,2):
1689  d0=self.stath0[n0]
1690  d1=self.stath1[n1]
1691  rmsd, _ = self.rmsd()
1692  member_distance[n0]+=rmsd
1693 
1694  if len(member_distance)>0:
1695  cluster.center_index=min(member_distance, key=member_distance.get)
1696  else:
1697  cluster.center_index=cluster.members[0]
1698 
1699  def save_coordinates(self,cluster,rmf_name=None,reference="Absolute", prefix="./"):
1700  """
1701  Save the coordinates of the current cluster a single rmf file
1702  """
1703  print("saving coordinates",cluster)
1704  if self.alignment: self.set_reference(reference,cluster)
1706  if rmf_name is None:
1707  rmf_name=prefix+'/'+str(cluster.cluster_id)+".rmf3"
1708 
1709  d1=self.stath1[cluster.members[0]]
1710  self.model.update()
1711  o.init_rmf(rmf_name, [self.stath1])
1712  for n1 in cluster.members:
1713  d1=self.stath1[n1]
1714  self.model.update()
1716  if self.alignment: self.align()
1717  o.write_rmf(rmf_name)
1719  o.close_rmf(rmf_name)
1720 
1721  def prune_redundant_structures(self,rmsd_cutoff=10):
1722  """
1723  remove structures that are similar
1724  append it to a new cluster
1725  """
1726  print("pruning models")
1727  selected=0
1728  filtered=[selected]
1729  remaining=range(1,len(self.stath1),10)
1730 
1731  while len(remaining)>0:
1732  d0=self.stath0[selected]
1733  rm=[]
1734  for n1 in remaining:
1735  d1=self.stath1[n1]
1736  if self.alignment: self.align()
1737  d, _ = self.rmsd()
1738  if d<=rmsd_cutoff:
1739  rm.append(n1)
1740  print("pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
1741  remaining=[x for x in remaining if x not in rm]
1742  if len(remaining)==0: break
1743  selected=remaining[0]
1744  filtered.append(selected)
1745  remaining.pop(0)
1746  c = IMP.pmi.output.Cluster(len(self.clusters))
1747  self.clusters.append(c)
1748  for n0 in filtered:
1749  d0=self.stath0[n0]
1750  c.add_member(n0,d0)
1751  self.update_clusters()
1752 
1753 
1754 
1755  def precision(self,cluster):
1756  """
1757  Compute the precision of a cluster
1758  """
1759  npairs=0
1760  rmsd=0.0
1761  precision=None
1762 
1763  if not cluster.center_index is None:
1764  members1=[cluster.center_index]
1765  else:
1766  members1=cluster.members
1767 
1768  for n0 in members1:
1769  d0=self.stath0[n0]
1770  for n1 in cluster.members:
1771  if n0!=n1:
1772  npairs+=1
1773  d1=self.stath1[n1]
1775  tmp_rmsd, _ = self.rmsd()
1776  rmsd+=tmp_rmsd
1778 
1779  if npairs>0:
1780  precision=rmsd/npairs
1781  cluster.precision=precision
1782  return precision
1783 
1784  def bipartite_precision(self,cluster1,cluster2,verbose=False):
1785  """
1786  Compute the bipartite precision (ie the cross-precision)
1787  between two clusters
1788  """
1789  npairs=0
1790  rmsd=0.0
1791  for cn0,n0 in enumerate(cluster1.members):
1792  d0=self.stath0[n0]
1793  for cn1,n1 in enumerate(cluster2.members):
1794  d1=self.stath1[n1]
1795  tmp_rmsd, _ =self.rmsd()
1796  if verbose: print("--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
1797  rmsd+=tmp_rmsd
1798  npairs+=1
1799  precision=rmsd/npairs
1800  return precision
1801 
1802  def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
1803  """
1804  Compute the Root mean square fluctuations
1805  of a molecule in a cluster
1806  Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
1807  """
1808  rmsf=IMP.pmi.tools.OrderedDict()
1809 
1810  #assumes that residue indexes are identical for stath0 and stath1
1811  if cluster_ref is not None:
1812  if not cluster_ref.center_index is None:
1813  members0 = [cluster_ref.center_index]
1814  else:
1815  members0 = cluster_ref.members
1816  else:
1817  if not cluster.center_index is None:
1818  members0 = [cluster.center_index]
1819  else:
1820  members0 = cluster.members
1821 
1822  s0=IMP.atom.Selection(self.stath0,molecule=molecule,resolution=1,
1823  copy_index=copy_index,state_index=state_index)
1824  ps0=s0.get_selected_particles()
1825  #get the residue indexes
1826  residue_indexes=list(IMP.pmi.tools.OrderedSet([IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1827 
1828  #get the corresponding particles
1829  #s0=IMP.atom.Selection(stat_ref,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1830  # copy_index=copy_index,state_index=state_index)
1831  #ps0 = s0.get_selected_particles()
1832 
1833 
1834 
1835  npairs=0
1836  for n0 in members0:
1837  d0=self.stath0[n0]
1838  for n1 in cluster.members[::step]:
1839  if n0!=n1:
1840  print("--- rmsf %s %s"%(str(n0),str(n1)))
1842 
1843  s1=IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1844  copy_index=copy_index,state_index=state_index)
1845  ps1 = s1.get_selected_particles()
1846 
1847  d1=self.stath1[n1]
1848  if self.alignment: self.align()
1849  for n,(p0,p1) in enumerate(zip(ps0,ps1)):
1850  r=residue_indexes[n]
1851  d0=IMP.core.XYZ(p0)
1852  d1=IMP.core.XYZ(p1)
1853  if r in rmsf:
1854  rmsf[r]+=IMP.core.get_distance(d0,d1)
1855  else:
1856  rmsf[r]=IMP.core.get_distance(d0,d1)
1857  npairs+=1
1859  for r in rmsf:
1860  rmsf[r]/=npairs
1861 
1862  for stath in [self.stath0,self.stath1]:
1863  if molecule not in self.symmetric_molecules:
1864  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1865  copy_index=copy_index,state_index=state_index)
1866  else:
1867  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1868  state_index=state_index)
1869 
1870  ps = s.get_selected_particles()
1871  for p in ps:
1873  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
1874  else:
1876 
1877  return rmsf
1878 
1879  def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
1880  if self.alignment: self.set_reference(reference,cluster)
1881  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
1882  voxel=voxel_size)
1883 
1884  for n1 in cluster.members[::step]:
1885  print("density "+str(n1))
1886  d1=self.stath1[n1]
1888  if self.alignment: self.align()
1889  dens.add_subunits_density(self.stath1)
1891  dens.write_mrc(path=prefix+'/',suffix=str(cluster.cluster_id))
1892  del dens
1893 
1894  def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
1895  if self.alignment: self.set_reference(reference,cluster)
1896  import numpy as np
1897  import matplotlib.pyplot as plt
1898  import matplotlib.cm as cm
1899  from scipy.spatial.distance import cdist
1900  import IMP.pmi.topology
1901  if molecules is None:
1903  else:
1904  mols=[IMP.pmi.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi.tools.get_molecules(IMP.atom.Selection(self.stath1,molecules=molecules).get_selected_particles())]
1905  unique_copies=[mol for mol in mols if mol.get_copy_index() == 0]
1906  mol_names_unique=dict((mol.get_name(),mol) for mol in unique_copies)
1907  total_len_unique=sum(max(mol.get_residue_indexes()) for mol in unique_copies)
1908 
1909 
1910  # coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
1911  index_dict={}
1912  prev_stop=0
1913 
1914  if not consolidate:
1915  for mol in mols:
1916  seqlen=max(mol.get_residue_indexes())
1917  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1918  prev_stop+=seqlen
1919 
1920  else:
1921  for mol in unique_copies:
1922  seqlen=max(mol.get_residue_indexes())
1923  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1924  prev_stop+=seqlen
1925 
1926 
1927  for ncl,n1 in enumerate(cluster.members):
1928  print(ncl)
1929  d1=self.stath1[n1]
1930  #self.apply_molecular_assignments(n1)
1931  coord_dict=IMP.pmi.tools.OrderedDict()
1932  for mol in mols:
1933  mol_name=mol.get_name()
1934  copy_index=mol.get_copy_index()
1935  rindexes = mol.get_residue_indexes()
1936  coords = np.ones((max(rindexes),3))
1937  for rnum in rindexes:
1938  sel = IMP.atom.Selection(mol, residue_index=rnum, resolution=1)
1939  selpart = sel.get_selected_particles()
1940  if len(selpart) == 0: continue
1941  selpart = selpart[0]
1942  coords[rnum - 1, :] = IMP.core.XYZ(selpart).get_coordinates()
1943  coord_dict[mol]=coords
1944 
1945  if not consolidate:
1946  coords=np.concatenate(list(coord_dict.values()))
1947  dists = cdist(coords, coords)
1948  binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1949  else:
1950  binary_dists_dict={}
1951  for mol1 in mols:
1952  len1 = max(mol1.get_residue_indexes())
1953  for mol2 in mols:
1954  name1=mol1.get_name()
1955  name2=mol2.get_name()
1956  dists = cdist(coord_dict[mol1], coord_dict[mol2])
1957  if (name1, name2) not in binary_dists_dict:
1958  binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
1959  binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1960  binary_dists=np.zeros((total_len_unique,total_len_unique))
1961 
1962  for name1,name2 in binary_dists_dict:
1963  r1=index_dict[mol_names_unique[name1]]
1964  r2=index_dict[mol_names_unique[name2]]
1965  binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
1966 
1967  if ncl==0:
1968  dist_maps = [dists]
1969  av_dist_map = dists
1970  contact_freqs = binary_dists
1971  else:
1972  dist_maps.append(dists)
1973  av_dist_map += dists
1974  contact_freqs += binary_dists
1975 
1976  #self.undo_apply_molecular_assignments(n1)
1977 
1978  if log_scale:
1979  contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
1980  else:
1981  contact_freqs =1.0/len(cluster)*contact_freqs
1982  av_dist_map=1.0/len(cluster)*contact_freqs
1983 
1984  fig = plt.figure(figsize=(100, 100))
1985  ax = fig.add_subplot(111)
1986  ax.set_xticks([])
1987  ax.set_yticks([])
1988  gap_between_components=50
1989  colormap = cm.Blues
1990  colornorm=None
1991 
1992 
1993  #if cbar_labels is not None:
1994  # if len(cbar_labels)!=4:
1995  # print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
1996  # exit()
1997  # set the list of proteins on the x axis
1998  if not consolidate:
1999  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_extended_name(),mol) for mol in mols])
2000  prot_list=list(zip(*sorted_tuple))[1]
2001  else:
2002  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(),mol) for mol in unique_copies])
2003  prot_list=list(zip(*sorted_tuple))[1]
2004 
2005  prot_listx = prot_list
2006  nresx = gap_between_components + \
2007  sum([max(mol.get_residue_indexes())
2008  + gap_between_components for mol in prot_listx])
2009 
2010  # set the list of proteins on the y axis
2011  prot_listy = prot_list
2012  nresy = gap_between_components + \
2013  sum([max(mol.get_residue_indexes())
2014  + gap_between_components for mol in prot_listy])
2015 
2016  # this is the residue offset for each protein
2017  resoffsetx = {}
2018  resendx = {}
2019  res = gap_between_components
2020  for mol in prot_listx:
2021  resoffsetx[mol] = res
2022  res += max(mol.get_residue_indexes())
2023  resendx[mol] = res
2024  res += gap_between_components
2025 
2026  resoffsety = {}
2027  resendy = {}
2028  res = gap_between_components
2029  for mol in prot_listy:
2030  resoffsety[mol] = res
2031  res += max(mol.get_residue_indexes())
2032  resendy[mol] = res
2033  res += gap_between_components
2034 
2035  resoffsetdiagonal = {}
2036  res = gap_between_components
2037  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2038  resoffsetdiagonal[mol] = res
2039  res += max(mol.get_residue_indexes())
2040  res += gap_between_components
2041 
2042  # plot protein boundaries
2043  xticks = []
2044  xlabels = []
2045  for n, prot in enumerate(prot_listx):
2046  res = resoffsetx[prot]
2047  end = resendx[prot]
2048  for proty in prot_listy:
2049  resy = resoffsety[proty]
2050  endy = resendy[proty]
2051  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
2052  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
2053  xticks.append((float(res) + float(end)) / 2)
2054  xlabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2055 
2056  yticks = []
2057  ylabels = []
2058  for n, prot in enumerate(prot_listy):
2059  res = resoffsety[prot]
2060  end = resendy[prot]
2061  for protx in prot_listx:
2062  resx = resoffsetx[protx]
2063  endx = resendx[protx]
2064  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
2065  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
2066  yticks.append((float(res) + float(end)) / 2)
2067  ylabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2068 
2069  # plot the contact map
2070 
2071  tmp_array = np.zeros((nresx, nresy))
2072  ret={}
2073  for px in prot_listx:
2074  for py in prot_listy:
2075  resx = resoffsetx[px]
2076  lengx = resendx[px] - 1
2077  resy = resoffsety[py]
2078  lengy = resendy[py] - 1
2079  indexes_x = index_dict[px]
2080  minx = min(indexes_x)
2081  maxx = max(indexes_x)
2082  indexes_y = index_dict[py]
2083  miny = min(indexes_y)
2084  maxy = max(indexes_y)
2085  tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2086  ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2087 
2088  cax = ax.imshow(tmp_array,
2089  cmap=colormap,
2090  norm=colornorm,
2091  origin='lower',
2092  alpha=0.6,
2093  interpolation='nearest')
2094 
2095  ax.set_xticks(xticks)
2096  ax.set_xticklabels(xlabels, rotation=90)
2097  ax.set_yticks(yticks)
2098  ax.set_yticklabels(ylabels)
2099  plt.setp(ax.get_xticklabels(), fontsize=6)
2100  plt.setp(ax.get_yticklabels(), fontsize=6)
2101 
2102  # display and write to file
2103  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2104  [i.set_linewidth(2.0) for i in ax.spines.values()]
2105  #if cbar_labels is not None:
2106  # cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
2107  # cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
2108 
2109  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf", dpi=300,transparent="False")
2110  return ret
2111 
2112 
2113  def plot_rmsd_matrix(self,filename):
2114  import numpy as np
2115  self.compute_all_pairwise_rmsd()
2116  distance_matrix = np.zeros(
2117  (len(self.stath0), len(self.stath1)))
2118  for (n0,n1) in self.pairwise_rmsd:
2119  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2120 
2121  import matplotlib as mpl
2122  mpl.use('Agg')
2123  import matplotlib.pylab as pl
2124  from scipy.cluster import hierarchy as hrc
2125 
2126  fig = pl.figure(figsize=(10,8))
2127  ax = fig.add_subplot(212)
2128  dendrogram = hrc.dendrogram(
2129  hrc.linkage(distance_matrix),
2130  color_threshold=7,
2131  no_labels=True)
2132  leaves_order = dendrogram['leaves']
2133  ax.set_xlabel('Model')
2134  ax.set_ylabel('RMSD [Angstroms]')
2135 
2136  ax2 = fig.add_subplot(221)
2137  cax = ax2.imshow(
2138  distance_matrix[leaves_order,
2139  :][:,
2140  leaves_order],
2141  interpolation='nearest')
2142  cb = fig.colorbar(cax)
2143  cb.set_label('RMSD [Angstroms]')
2144  ax2.set_xlabel('Model')
2145  ax2.set_ylabel('Model')
2146 
2147  pl.savefig(filename, dpi=300)
2148  pl.close(fig)
2149 
2150  ####################
2151  # Internal Functions
2152  ####################
2153 
2154  def update_clusters(self):
2155  """
2156  Update the cluster id numbers
2157  """
2158  for n,c in enumerate(self.clusters):
2159  c.cluster_id=n
2160 
2161  def get_molecule(self, hier, name, copy):
2162  s=IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2163  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2164 
2165  def update_seldicts(self):
2166  """
2167  Update the seldicts
2168  """
2169  self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2170  self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2171  for mol in self.seldict0:
2172  for sel in self.seldict0[mol]:
2173  self.issymmetricsel[sel]=False
2174  for mol in self.symmetric_molecules:
2175  self.symmetric_molecules[mol]=len(self.seldict0[mol])
2176  for sel in self.seldict0[mol]:
2177  self.issymmetricsel[sel]=True
2178 
2179 
2180  def align(self):
2181  tr = IMP.atom.get_transformation_aligning_first_to_second(self.sel1_alignment, self.sel0_alignment)
2182 
2183  for rb in self.rbs1:
2184  IMP.core.transform(rb, tr)
2185 
2186  for bead in self.beads1:
2187  try:
2188  IMP.core.transform(IMP.core.XYZ(bead), tr)
2189  except:
2190  continue
2191 
2192 
2193  self.model.update()
2194 
2195  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2196  '''
2197  initial filling of the clusters.
2198  '''
2199  n0 = idxs.pop()
2200  print("clustering model "+str(n0))
2201  d0 = self.stath0[n0]
2202  c = IMP.pmi.output.Cluster(len(self.clusters))
2203  print("creating cluster index "+str(len(self.clusters)))
2204  self.clusters.append(c)
2205  c.add_member(n0,d0)
2206  clustered = set([n0])
2207  for n1 in idxs:
2208  print("--- trying to add model "+str(n1)+" to cluster "+str(len(self.clusters)))
2209  d1 = self.stath1[n1]
2210  if self.alignment: self.align()
2211  rmsd, _ = self.rmsd(metric=metric)
2212  if rmsd<rmsd_cutoff:
2213  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2214  c.add_member(n1,d1)
2215  clustered.add(n1)
2216  else:
2217  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2218  idxs-=clustered
2219 
2220  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2221  """
2222  merge the clusters that have close members
2223  @param rmsd_cutoff cutoff distance in Angstorms
2224  """
2225  # before merging, clusters are spheres of radius rmsd_cutoff centered on the 1st element
2226  # here we only try to merge clusters whose centers are closer than 2*rmsd_cutoff
2227  to_merge = []
2228  print("merging...")
2229  for c0, c1 in filter(lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2230  n0, n1 = [c.members[0] for c in (c0,c1)]
2231  d0 = self.stath0[n0]
2232  d1 = self.stath1[n1]
2233  rmsd, _ = self.rmsd()
2234  if rmsd<2*rmsd_cutoff and self.have_close_members(c0,c1,rmsd_cutoff,metric):
2235  to_merge.append((c0,c1))
2236 
2237  for c0, c in reversed(to_merge):
2238  self.merge(c0,c)
2239 
2240  #keep only full clusters
2241  self.clusters = [c for c in filter(lambda x: len(x.members)>0, self.clusters)]
2242 
2243 
2244  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2245  '''
2246  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2247  '''
2248  print("check close members for clusters "+str(c0.cluster_id)+" and "+str(c1.cluster_id))
2249  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2250  d0 = self.stath0[n0]
2251  d1 = self.stath1[n1]
2252  rmsd, _ = self.rmsd(metric=metric)
2253  if rmsd<rmsd_cutoff:
2254  return True
2255 
2256  return False
2257 
2258 
2259  def merge(self, c0, c1):
2260  '''
2261  merge two clusters
2262  '''
2263  c0+=c1
2264  c1.members=[]
2265  c1.data={}
2266 
2267  def rmsd_helper(self, sels0, sels1, metric):
2268  '''
2269  a function that returns the permutation best_sel of sels0 that minimizes metric
2270  '''
2271  best_rmsd2 = float('inf')
2272  best_sel = None
2273  if self.issymmetricsel[sels0[0]]:
2274  #this cases happens when symmetries were defined
2275  N = len(sels0)
2276  for offset in range(N):
2277  order=[(offset+i)%N for i in range(N)]
2278  sels = [sels0[(offset+i)%N] for i in range(N)]
2279  sel0 = sels[0]
2280  sel1 = sels1[0]
2281  r=metric(sel0, sel1)
2282  rmsd2=r*r*N
2283  ###print(order,rmsd2)
2284  if rmsd2 < best_rmsd2:
2285  best_rmsd2 = rmsd2
2286  best_sel = sels
2287  best_order=order
2288  else:
2289  for sels in itertools.permutations(sels0):
2290  rmsd2=0.0
2291  for sel0, sel1 in itertools.takewhile(lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2292  r=metric(sel0, sel1)
2293  rmsd2+=r*r
2294  if rmsd2 < best_rmsd2:
2295  best_rmsd2 = rmsd2
2296  best_sel = sels
2297  ###for i,sel in enumerate(best_sel):
2298  ### p0 = sel.get_selected_particles()[0]
2299  ### p1 = sels1[i].get_selected_particles()[0]
2300  ### m0 = IMP.pmi.tools.get_molecules([p0])[0]
2301  ### m1 = IMP.pmi.tools.get_molecules([p1])[0]
2302  ### c0 = IMP.atom.Copy(m0).get_copy_index()
2303  ### c1 = IMP.atom.Copy(m1).get_copy_index()
2304  ### name0=m0.get_name()
2305  ### name1=m1.get_name()
2306  ### print("WWW",name0,name1,c0,c1)
2307  ###print(best_order,best_rmsd2,m0,m1)
2308  return best_sel, best_rmsd2
2309 
2310  def compute_all_pairwise_rmsd(self):
2311  for d0 in self.stath0:
2312  for d1 in self.stath1:
2313  rmsd, _ = self.rmsd()
2314 
2315  def rmsd(self,metric=IMP.atom.get_rmsd):
2316  '''
2317  Computes the RMSD. Resolves ambiguous pairs assignments
2318  '''
2319  # here we memoize the rmsd and molecular assignment so that it's not done multiple times
2320  n0=self.stath0.current_index
2321  n1=self.stath1.current_index
2322  if ((n0,n1) in self.pairwise_rmsd) and ((n0,n1) in self.pairwise_molecular_assignment):
2323  return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2324 
2325  if self.alignment:
2326  self.align()
2327  #if it's not yet memoized
2328  total_rmsd=0.0
2329  total_N=0
2330  # this is a dictionary which keys are the molecule names, and values are the list of IMP.atom.Selection for all molecules that share the molecule name
2331  seldict_best_order={}
2332  molecular_assignment={}
2333  for molname, sels0 in self.seldict0.items():
2334  sels_best_order, best_rmsd2 = self.rmsd_helper(sels0, self.seldict1[molname], metric)
2335 
2336  Ncoords = len(sels_best_order[0].get_selected_particles())
2337  Ncopies = len(self.seldict1[molname])
2338  total_rmsd += Ncoords*best_rmsd2
2339  total_N += Ncoords*Ncopies
2340 
2341  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2342  p0 = sel0.get_selected_particles()[0]
2343  p1 = sel1.get_selected_particles()[0]
2344  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2345  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2346  c0 = IMP.atom.Copy(m0).get_copy_index()
2347  c1 = IMP.atom.Copy(m1).get_copy_index()
2348  ###print(molname,c0,c1)
2349  molecular_assignment[(molname,c0)]=(molname,c1)
2350 
2351  total_rmsd = math.sqrt(total_rmsd/total_N)
2352 
2353  self.pairwise_rmsd[(n0,n1)]=total_rmsd
2354  self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2355  self.pairwise_rmsd[(n1,n0)]=total_rmsd
2356  self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2357  ###print(n0,n1,molecular_assignment)
2358  return total_rmsd, molecular_assignment
2359 
2360  def set_reference(self,reference,cluster):
2361  """
2362  Fix the reference structure for structural alignment, rmsd and chain assignemnt
2363  @param reference can be either "Absolute" (cluster center of the first cluster)
2364  or Relative (cluster center of the current cluster)
2365  """
2366  if reference=="Absolute":
2367  d0=self.stath0[0]
2368  elif reference=="Relative":
2369  if cluster.center_index:
2370  n0=cluster.center_index
2371  else:
2372  n0=cluster.members[0]
2373  d0=self.stath0[n0]
2374 
2376  """
2377  compute the molecular assignemnts between multiple copies
2378  of the same sequence. It changes the Copy index of Molecules
2379  """
2380  d1=self.stath1[n1]
2381  _, molecular_assignment = self.rmsd()
2382  for (m0, c0), (m1,c1) in molecular_assignment.items():
2383  mol0 = self.molcopydict0[m0][c0]
2384  mol1 = self.molcopydict1[m1][c1]
2385  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2386  p1=IMP.atom.Copy(mol1).get_particle()
2387  p1.set_value(cik0,c0)
2388 
2390  """
2391  Undo the Copy index assignment
2392  """
2393  d1=self.stath1[n1]
2394  _, molecular_assignment = self.rmsd()
2395  mols_newcopys = []
2396  for (m0, c0), (m1,c1) in molecular_assignment.items():
2397  mol0 = self.molcopydict0[m0][c0]
2398  mol1 = self.molcopydict1[m1][c1]
2399  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2400  p1=IMP.atom.Copy(mol1).get_particle()
2401  p1.set_value(cik0,c1)
2402 
2403  ####################
2404  # Container Functions
2405  ####################
2406 
2407  def __repr__(self):
2408  s= "AnalysisReplicaExchange\n"
2409  s+="---- number of clusters %s \n"%str(len(self.clusters))
2410  s+="---- number of models %s \n"%str(len(self.stath0))
2411  return s
2412 
2413  def __getitem__(self,int_slice_adaptor):
2414  if isinstance(int_slice_adaptor, int):
2415  return self.clusters[int_slice_adaptor]
2416  elif isinstance(int_slice_adaptor, slice):
2417  return self.__iter__(int_slice_adaptor)
2418  else:
2419  raise TypeError("Unknown Type")
2420 
2421  def __len__(self):
2422  return len(self.clusters)
2423 
2424  def __iter__(self,slice_key=None):
2425  if slice_key is None:
2426  for i in range(len(self)):
2427  yield self[i]
2428  else:
2429  for i in range(len(self))[slice_key]:
2430  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2315
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2360
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1660
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1115
def precision
Compute the precision of a cluster.
Definition: macros.py:1755
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:797
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:103
A container for models organized into clusters.
Definition: output.py:1452
Sample using molecular dynamics.
Definition: samplers.py:389
def aggregate
initial filling of the clusters.
Definition: macros.py:2195
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:904
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:616
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:617
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1721
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1802
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1206
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1517
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1598
def __init__
Construction of the Class.
Definition: macros.py:1478
void handle_use_deprecated(std::string message)
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:711
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1615
def __init__
Constructor.
Definition: macros.py:109
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2389
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1533
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2267
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1699
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:942
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2375
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1468
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:549
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2220
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:850
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1682
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:859
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def update_seldicts
Update the seldicts.
Definition: macros.py:2165
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2154
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:595
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1562
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:198
Collect timing information.
Definition: tools.py:117
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1526
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:10240
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2259
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:597
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1216
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1608
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:720
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1784
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:575
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1549
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1648
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2244
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
Definition: macros.py:57
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1183
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1623
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:553
Sample using replica exchange.
Definition: samplers.py:496
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27