IMP logo
IMP Reference Guide  2.13.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
7 import IMP.pmi.tools
8 import IMP.pmi.samplers
9 import IMP.pmi.output
10 import IMP.pmi.analysis
11 import IMP.pmi.io
12 import IMP.pmi.alphabets
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import RMF
17 import os
18 import glob
19 from operator import itemgetter
20 from collections import defaultdict
21 import numpy as np
22 import string
23 import itertools
24 import warnings
25 import math
26 import warnings
27 
28 class _RMFRestraints(object):
29  """All restraints that are written out to the RMF file"""
30  def __init__(self, model, user_restraints):
31  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
32  self._user_restraints = user_restraints if user_restraints else []
33 
34  def __len__(self):
35  return (len(self._user_restraints)
36  + self._rmf_rs.get_number_of_restraints())
37 
38  def __bool__(self):
39  return len(self) > 0
40  __nonzero__ = __bool__ # Python 2 compatibility
41 
42  def __getitem__(self, i):
43  class FakePMIWrapper(object):
44  def __init__(self, r):
45  self.r = IMP.RestraintSet.get_from(r)
46  get_restraint = lambda self: self.r
47  lenuser = len(self._user_restraints)
48  if 0 <= i < lenuser:
49  return self._user_restraints[i]
50  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
51  r = self._rmf_rs.get_restraint(i - lenuser)
52  return FakePMIWrapper(r)
53  else:
54  raise IndexError("Out of range")
55 
56 
57 class ReplicaExchange0(object):
58  """A macro to help setup and run replica exchange.
59  Supports Monte Carlo and molecular dynamics.
60  Produces trajectory RMF files, best PDB structures,
61  and output stat files.
62  """
63  def __init__(self, model, root_hier,
64  sample_objects=None, # DEPRECATED
65  monte_carlo_sample_objects=None,
66  molecular_dynamics_sample_objects=None,
67  output_objects=[],
68  rmf_output_objects=None,
69  crosslink_restraints=None, # DEPRECATED
70  monte_carlo_temperature=1.0,
71  simulated_annealing=False,
72  simulated_annealing_minimum_temperature=1.0,
73  simulated_annealing_maximum_temperature=2.5,
74  simulated_annealing_minimum_temperature_nframes=100,
75  simulated_annealing_maximum_temperature_nframes=100,
76  replica_exchange_minimum_temperature=1.0,
77  replica_exchange_maximum_temperature=2.5,
78  replica_exchange_swap=True,
79  num_sample_rounds=1,
80  number_of_best_scoring_models=500,
81  monte_carlo_steps=10,
82  self_adaptive=False,
83  molecular_dynamics_steps=10,
84  molecular_dynamics_max_time_step=1.0,
85  number_of_frames=1000,
86  save_coordinates_mode="lowest_temperature",
87  nframes_write_coordinates=1,
88  write_initial_rmf=True,
89  initial_rmf_name_suffix="initial",
90  stat_file_name_suffix="stat",
91  best_pdb_name_suffix="model",
92  do_clean_first=True,
93  do_create_directories=True,
94  global_output_directory="./",
95  rmf_dir="rmfs/",
96  best_pdb_dir="pdbs/",
97  replica_stat_file_suffix="stat_replica",
98  em_object_for_rmf=None,
99  atomistic=False,
100  replica_exchange_object=None,
101  test_mode=False):
102  """Constructor.
103  @param model The IMP model
104  @param root_hier Top-level (System)hierarchy
105  @param monte_carlo_sample_objects Objects for MC sampling; a list of
106  structural components (generally the representation) that will
107  be moved and restraints with parameters that need to
108  be sampled.
109  For PMI2: just pass flat list of movers
110  @param molecular_dynamics_sample_objects Objects for MD sampling
111  For PMI2: just pass flat list of particles
112  @param output_objects A list of structural objects and restraints
113  that will be included in output (ie, statistics "stat" files). Any object
114  that provides a get_output() method can be used here. If None is passed
115  the macro will not write stat files.
116  @param rmf_output_objects A list of structural objects and restraints
117  that will be included in rmf. Any object
118  that provides a get_output() method can be used here.
119  @param monte_carlo_temperature MC temp (may need to be optimized
120  based on post-sampling analysis)
121  @param simulated_annealing If True, perform simulated annealing
122  @param simulated_annealing_minimum_temperature Should generally be
123  the same as monte_carlo_temperature.
124  @param simulated_annealing_minimum_temperature_nframes Number of
125  frames to compute at minimum temperature.
126  @param simulated_annealing_maximum_temperature_nframes Number of
127  frames to compute at
128  temps > simulated_annealing_maximum_temperature.
129  @param replica_exchange_minimum_temperature Low temp for REX; should
130  generally be the same as monte_carlo_temperature.
131  @param replica_exchange_maximum_temperature High temp for REX
132  @param replica_exchange_swap Boolean, enable disable temperature
133  swap (Default=True)
134  @param num_sample_rounds Number of rounds of MC/MD per cycle
135  @param number_of_best_scoring_models Number of top-scoring PDB models
136  to keep around for analysis
137  @param monte_carlo_steps Number of MC steps per round
138  @param self_adaptive self adaptive scheme for monte carlo movers
139  @param molecular_dynamics_steps Number of MD steps per round
140  @param molecular_dynamics_max_time_step Max time step for MD
141  @param number_of_frames Number of REX frames to run
142  @param save_coordinates_mode string: how to save coordinates.
143  "lowest_temperature" (default) only the lowest temperatures is saved
144  "25th_score" all replicas whose score is below the 25th percentile
145  "50th_score" all replicas whose score is below the 50th percentile
146  "75th_score" all replicas whose score is below the 75th percentile
147  @param nframes_write_coordinates How often to write the coordinates
148  of a frame
149  @param write_initial_rmf Write the initial configuration
150  @param global_output_directory Folder that will be created to house
151  output.
152  @param test_mode Set to True to avoid writing any files, just test one frame.
153  """
154  self.model = model
155  self.vars = {}
156 
157  ### add check hierarchy is multistate
158  self.output_objects = output_objects
159  self.rmf_output_objects=rmf_output_objects
160  if (isinstance(root_hier, IMP.atom.Hierarchy)
161  and root_hier.get_name() == 'System'):
162  if self.output_objects is not None:
163  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
164  if self.rmf_output_objects is not None:
165  self.rmf_output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
166  self.root_hier = root_hier
167  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
168  self.vars["number_of_states"] = len(states)
169  if len(states)>1:
170  self.root_hiers = states
171  self.is_multi_state = True
172  else:
173  self.root_hier = root_hier
174  self.is_multi_state = False
175  else:
176  raise TypeError("Must provide System hierarchy (root_hier)")
177 
178  if crosslink_restraints:
180  "crosslink_restraints is deprecated and is ignored; "
181  "all cross-link restraints should be automatically "
182  "added to output RMF files")
183  self._rmf_restraints = _RMFRestraints(model, None)
184  self.em_object_for_rmf = em_object_for_rmf
185  self.monte_carlo_sample_objects = monte_carlo_sample_objects
186  self.vars["self_adaptive"]=self_adaptive
187  if sample_objects is not None:
189  "sample_objects is deprecated; use monte_carlo_sample_objects "
190  "(or molecular_dynamics_sample_objects) instead")
191  self.monte_carlo_sample_objects+=sample_objects
192  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
193  self.replica_exchange_object = replica_exchange_object
194  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
195  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
196  self.vars[
197  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
198  self.vars[
199  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
200  self.vars["replica_exchange_swap"] = replica_exchange_swap
201  self.vars["simulated_annealing"]=\
202  simulated_annealing
203  self.vars["simulated_annealing_minimum_temperature"]=\
204  simulated_annealing_minimum_temperature
205  self.vars["simulated_annealing_maximum_temperature"]=\
206  simulated_annealing_maximum_temperature
207  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
208  simulated_annealing_minimum_temperature_nframes
209  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
210  simulated_annealing_maximum_temperature_nframes
211 
212  self.vars["num_sample_rounds"] = num_sample_rounds
213  self.vars[
214  "number_of_best_scoring_models"] = number_of_best_scoring_models
215  self.vars["monte_carlo_steps"] = monte_carlo_steps
216  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
217  self.vars["number_of_frames"] = number_of_frames
218  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
219  raise Exception("save_coordinates_mode has unrecognized value")
220  else:
221  self.vars["save_coordinates_mode"] = save_coordinates_mode
222  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
223  self.vars["write_initial_rmf"] = write_initial_rmf
224  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
225  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
226  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
227  self.vars["do_clean_first"] = do_clean_first
228  self.vars["do_create_directories"] = do_create_directories
229  self.vars["global_output_directory"] = global_output_directory
230  self.vars["rmf_dir"] = rmf_dir
231  self.vars["best_pdb_dir"] = best_pdb_dir
232  self.vars["atomistic"] = atomistic
233  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
234  self.vars["geometries"] = None
235  self.test_mode = test_mode
236 
237  def add_geometries(self, geometries):
238  if self.vars["geometries"] is None:
239  self.vars["geometries"] = list(geometries)
240  else:
241  self.vars["geometries"].extend(geometries)
242 
243  def show_info(self):
244  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
245  print("--- it stores the best scoring pdb models in pdbs/")
246  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
247  print("--- variables:")
248  keys = list(self.vars.keys())
249  keys.sort()
250  for v in keys:
251  print("------", v.ljust(30), self.vars[v])
252 
253  def get_replica_exchange_object(self):
254  return self.replica_exchange_object
255 
256  def _add_provenance(self, sampler_md, sampler_mc):
257  """Record details about the sampling in the IMP Hierarchies"""
258  iterations = 0
259  if sampler_md:
260  method = "Molecular Dynamics"
261  iterations += self.vars["molecular_dynamics_steps"]
262  if sampler_mc:
263  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
264  iterations += self.vars["monte_carlo_steps"]
265  # If no sampling is actually done, no provenance to write
266  if iterations == 0 or self.vars["number_of_frames"] == 0:
267  return
268  iterations *= self.vars["num_sample_rounds"]
269 
270  pi = self.model.add_particle("sampling")
272  self.model, pi, method, self.vars["number_of_frames"],
273  iterations)
274  p.set_number_of_replicas(
275  self.replica_exchange_object.get_number_of_replicas())
276  IMP.pmi.tools._add_pmi_provenance(self.root_hier)
277  IMP.core.add_provenance(self.model, self.root_hier, p)
278 
279  def execute_macro(self):
280  temp_index_factor = 100000.0
281  samplers=[]
282  sampler_mc=None
283  sampler_md=None
284  if self.monte_carlo_sample_objects is not None:
285  print("Setting up MonteCarlo")
286  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
287  self.monte_carlo_sample_objects,
288  self.vars["monte_carlo_temperature"])
289  if self.vars["simulated_annealing"]:
290  tmin=self.vars["simulated_annealing_minimum_temperature"]
291  tmax=self.vars["simulated_annealing_maximum_temperature"]
292  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
293  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
294  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
295  if self.vars["self_adaptive"]:
296  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
297  if self.output_objects is not None:
298  self.output_objects.append(sampler_mc)
299  if self.rmf_output_objects is not None:
300  self.rmf_output_objects.append(sampler_mc)
301  samplers.append(sampler_mc)
302 
303 
304  if self.molecular_dynamics_sample_objects is not None:
305  print("Setting up MolecularDynamics")
306  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
307  self.molecular_dynamics_sample_objects,
308  self.vars["monte_carlo_temperature"],
309  maximum_time_step=self.molecular_dynamics_max_time_step)
310  if self.vars["simulated_annealing"]:
311  tmin=self.vars["simulated_annealing_minimum_temperature"]
312  tmax=self.vars["simulated_annealing_maximum_temperature"]
313  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
314  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
315  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
316  if self.output_objects is not None:
317  self.output_objects.append(sampler_md)
318  if self.rmf_output_objects is not None:
319  self.rmf_output_objects.append(sampler_md)
320  samplers.append(sampler_md)
321 # -------------------------------------------------------------------------
322 
323  print("Setting up ReplicaExchange")
324  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
325  self.vars[
326  "replica_exchange_minimum_temperature"],
327  self.vars[
328  "replica_exchange_maximum_temperature"],
329  samplers,
330  replica_exchange_object=self.replica_exchange_object)
331  self.replica_exchange_object = rex.rem
332 
333  myindex = rex.get_my_index()
334  if self.output_objects is not None:
335  self.output_objects.append(rex)
336  if self.rmf_output_objects is not None:
337  self.rmf_output_objects.append(rex)
338  # must reset the minimum temperature due to the
339  # different binary length of rem.get_my_parameter double and python
340  # float
341  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
342 
343 # -------------------------------------------------------------------------
344 
345  globaldir = self.vars["global_output_directory"] + "/"
346  rmf_dir = globaldir + self.vars["rmf_dir"]
347  pdb_dir = globaldir + self.vars["best_pdb_dir"]
348 
349  if not self.test_mode:
350  if self.vars["do_clean_first"]:
351  pass
352 
353  if self.vars["do_create_directories"]:
354 
355  try:
356  os.makedirs(globaldir)
357  except:
358  pass
359  try:
360  os.makedirs(rmf_dir)
361  except:
362  pass
363 
364  if not self.is_multi_state:
365  try:
366  os.makedirs(pdb_dir)
367  except:
368  pass
369  else:
370  for n in range(self.vars["number_of_states"]):
371  try:
372  os.makedirs(pdb_dir + "/" + str(n))
373  except:
374  pass
375 
376 # -------------------------------------------------------------------------
377 
379  if self.output_objects is not None:
380  self.output_objects.append(sw)
381  if self.rmf_output_objects is not None:
382  self.rmf_output_objects.append(sw)
383 
384  print("Setting up stat file")
385  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
386  low_temp_stat_file = globaldir + \
387  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
388 
389  # Ensure model is updated before saving init files
390  if not self.test_mode:
391  self.model.update()
392 
393  if not self.test_mode:
394  if self.output_objects is not None:
395  output.init_stat2(low_temp_stat_file,
396  self.output_objects,
397  extralabels=["rmf_file", "rmf_frame_index"])
398  else:
399  print("Stat file writing is disabled")
400 
401  if self.rmf_output_objects is not None:
402  print("Stat info being written in the rmf file")
403 
404  print("Setting up replica stat file")
405  replica_stat_file = globaldir + \
406  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
407  if not self.test_mode:
408  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
409 
410  if not self.test_mode:
411  print("Setting up best pdb files")
412  if not self.is_multi_state:
413  if self.vars["number_of_best_scoring_models"] > 0:
414  output.init_pdb_best_scoring(pdb_dir + "/" +
415  self.vars["best_pdb_name_suffix"],
416  self.root_hier,
417  self.vars[
418  "number_of_best_scoring_models"],
419  replica_exchange=True)
420  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
421  self.vars["best_pdb_name_suffix"]+".0.pdb")
422  else:
423  if self.vars["number_of_best_scoring_models"] > 0:
424  for n in range(self.vars["number_of_states"]):
425  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
426  self.vars["best_pdb_name_suffix"],
427  self.root_hiers[n],
428  self.vars[
429  "number_of_best_scoring_models"],
430  replica_exchange=True)
431  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
432  self.vars["best_pdb_name_suffix"]+".0.pdb")
433 # ---------------------------------------------
434 
435  if self.em_object_for_rmf is not None:
436  output_hierarchies = [
437  self.root_hier,
438  self.em_object_for_rmf.get_density_as_hierarchy(
439  )]
440  else:
441  output_hierarchies = [self.root_hier]
442 
443 #----------------------------------------------
444  if not self.test_mode:
445  print("Setting up and writing initial rmf coordinate file")
446  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
447  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
448  output_hierarchies,listofobjects=self.rmf_output_objects)
449  if self._rmf_restraints:
450  output.add_restraints_to_rmf(
451  init_suffix + "." + str(myindex) + ".rmf3",
452  self._rmf_restraints)
453  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
454  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
455 
456 #----------------------------------------------
457 
458  if not self.test_mode:
459  mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
460 
461 #----------------------------------------------
462 
463  self._add_provenance(sampler_md, sampler_mc)
464 
465  if not self.test_mode:
466  print("Setting up production rmf files")
467  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
468  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"],
469  listofobjects = self.rmf_output_objects)
470 
471  if self._rmf_restraints:
472  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
473 
474  ntimes_at_low_temp = 0
475 
476  if myindex == 0:
477  self.show_info()
478  self.replica_exchange_object.set_was_used(True)
479  nframes = self.vars["number_of_frames"]
480  if self.test_mode:
481  nframes = 1
482  for i in range(nframes):
483  if self.test_mode:
484  score = 0.
485  else:
486  for nr in range(self.vars["num_sample_rounds"]):
487  if sampler_md is not None:
488  sampler_md.optimize(
489  self.vars["molecular_dynamics_steps"])
490  if sampler_mc is not None:
491  sampler_mc.optimize(self.vars["monte_carlo_steps"])
493  self.model).evaluate(False)
494  mpivs.set_value("score",score)
495  output.set_output_entry("score", score)
496 
497 
498 
499  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
500 
501  if self.vars["save_coordinates_mode"] == "lowest_temperature":
502  save_frame=(min_temp_index == my_temp_index)
503  elif self.vars["save_coordinates_mode"] == "25th_score":
504  score_perc=mpivs.get_percentile("score")
505  save_frame=(score_perc*100.0<=25.0)
506  elif self.vars["save_coordinates_mode"] == "50th_score":
507  score_perc=mpivs.get_percentile("score")
508  save_frame=(score_perc*100.0<=50.0)
509  elif self.vars["save_coordinates_mode"] == "75th_score":
510  score_perc=mpivs.get_percentile("score")
511  save_frame=(score_perc*100.0<=75.0)
512 
513  # Ensure model is updated before saving output files
514  if save_frame and not self.test_mode:
515  self.model.update()
516 
517  if save_frame:
518  print("--- frame %s score %s " % (str(i), str(score)))
519 
520  if not self.test_mode:
521  if i % self.vars["nframes_write_coordinates"]==0:
522  print('--- writing coordinates')
523  if self.vars["number_of_best_scoring_models"] > 0:
524  output.write_pdb_best_scoring(score)
525  output.write_rmf(rmfname)
526  output.set_output_entry("rmf_file", rmfname)
527  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
528  else:
529  output.set_output_entry("rmf_file", rmfname)
530  output.set_output_entry("rmf_frame_index", '-1')
531  if self.output_objects is not None:
532  output.write_stat2(low_temp_stat_file)
533  ntimes_at_low_temp += 1
534 
535  if not self.test_mode:
536  output.write_stat2(replica_stat_file)
537  if self.vars["replica_exchange_swap"]:
538  rex.swap_temp(i, score)
539  for p, state in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
540  p.add_replica_exchange(state, self)
541 
542  if not self.test_mode:
543  print("closing production rmf files")
544  output.close_rmf(rmfname)
545 
546 
547 
548 # ----------------------------------------------------------------------
549 class BuildSystem(object):
550  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
551  Easily create multi-state systems by calling this macro
552  repeatedly with different TopologyReader objects!
553  A useful function is get_molecules() which returns the PMI Molecules grouped by state
554  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
555  Quick multi-state system:
556  @code{.python}
557  model = IMP.Model()
558  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
559  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
560  bs = IMP.pmi.macros.BuildSystem(model)
561  bs.add_state(reader1)
562  bs.add_state(reader2)
563  bs.execute_macro() # build everything including degrees of freedom
564  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
565  ### now you have a two state system, you add restraints etc
566  @endcode
567  @note The "domain name" entry of the topology reader is not used.
568  All molecules are set up by the component name, but split into rigid bodies
569  as requested.
570  """
571 
572  _alphabets = {'DNA': IMP.pmi.alphabets.dna,
573  'RNA': IMP.pmi.alphabets.rna}
574 
575  def __init__(self,
576  model,
577  sequence_connectivity_scale=4.0,
578  force_create_gmm_files=False,
579  resolutions=[1,10]):
580  """Constructor
581  @param model An IMP Model
582  @param sequence_connectivity_scale For scaling the connectivity restraint
583  @param force_create_gmm_files If True, will sample and create GMMs
584  no matter what. If False, will only sample if the
585  files don't exist. If number of Gaussians is zero, won't
586  do anything.
587  @param resolutions The resolutions to build for structured regions
588  """
589  self.model = model
590  self.system = IMP.pmi.topology.System(self.model)
591  self._readers = [] # the TopologyReaders (one per state)
592  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
593  self._domains = [] # key = domain unique name, value = Component
594  self.force_create_gmm_files = force_create_gmm_files
595  self.resolutions = resolutions
596 
597  def add_state(self,
598  reader,
599  keep_chain_id=False, fasta_name_map=None):
600  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
601  When you are done adding states, call execute_macro()
602  @param reader The TopologyReader object
603  @param fasta_name_map dictionary for converting protein names found in the fasta file
604  """
605  state = self.system.create_state()
606  self._readers.append(reader)
607  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
608  these_domains = {} # key is unique name, value is _Component
609  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
610  numchain = 0
611 
612  ### setup representation
613  # loop over molecules, copies, then domains
614  for molname in reader.get_molecules():
615  copies = reader.get_molecules()[molname].domains
616  for nc,copyname in enumerate(copies):
617  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
618  copy = copies[copyname]
619  # option to not rename chains
620  if keep_chain_id == True:
621  chain_id = copy[0].chain
622  else:
623  chain_id = chain_ids[numchain]
624  if nc==0:
625  alphabet = IMP.pmi.alphabets.amino_acid
626  fasta_flag=copy[0].fasta_flag
627  if fasta_flag in self._alphabets:
628  alphabet = self._alphabets[fasta_flag]
629  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
630  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
631  orig_mol = state.create_molecule(molname, seq, chain_id,
632  alphabet=alphabet)
633  mol = orig_mol
634  numchain+=1
635  else:
636  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
637  mol = orig_mol.create_copy(chain_id)
638  numchain+=1
639 
640  for domainnumber,domain in enumerate(copy):
641  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
642  # we build everything in the residue range, even if it
643  # extends beyond what's in the actual PDB file
644  these_domains[domain.get_unique_name()] = domain
645  if domain.residue_range==[] or domain.residue_range is None:
646  domain_res = mol.get_residues()
647  else:
648  start = domain.residue_range[0]+domain.pdb_offset
649  if domain.residue_range[1]=='END':
650  end = len(mol.sequence)
651  else:
652  end = domain.residue_range[1]+domain.pdb_offset
653  domain_res = mol.residue_range(start-1,end-1)
654  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
655  if domain.pdb_file=="BEADS":
656  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
657  mol.add_representation(
658  domain_res,
659  resolutions=[domain.bead_size],
660  setup_particles_as_densities=(
661  domain.em_residues_per_gaussian!=0),
662  color = domain.color)
663  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
664  elif domain.pdb_file=="IDEAL_HELIX":
665  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
666  mol.add_representation(
667  domain_res,
668  resolutions=self.resolutions,
669  ideal_helix=True,
670  density_residues_per_component=domain.em_residues_per_gaussian,
671  density_prefix=domain.density_prefix,
672  density_force_compute=self.force_create_gmm_files,
673  color = domain.color)
674  these_domain_res[domain.get_unique_name()] = (domain_res,set())
675  else:
676  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
677  domain_atomic = mol.add_structure(domain.pdb_file,
678  domain.chain,
679  domain.residue_range,
680  domain.pdb_offset,
681  soft_check=True)
682  domain_non_atomic = domain_res - domain_atomic
683  if not domain.em_residues_per_gaussian:
684  mol.add_representation(domain_atomic,
685  resolutions=self.resolutions,
686  color = domain.color)
687  if len(domain_non_atomic)>0:
688  mol.add_representation(domain_non_atomic,
689  resolutions=[domain.bead_size],
690  color = domain.color)
691  else:
692  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
693  mol.add_representation(
694  domain_atomic,
695  resolutions=self.resolutions,
696  density_residues_per_component=domain.em_residues_per_gaussian,
697  density_prefix=domain.density_prefix,
698  density_force_compute=self.force_create_gmm_files,
699  color = domain.color)
700  if len(domain_non_atomic)>0:
701  mol.add_representation(domain_non_atomic,
702  resolutions=[domain.bead_size],
703  setup_particles_as_densities=True,
704  color = domain.color)
705  these_domain_res[domain.get_unique_name()] = (
706  domain_atomic,domain_non_atomic)
707  self._domain_res.append(these_domain_res)
708  self._domains.append(these_domains)
709  print('BuildSystem.add_state: State',len(self.system.states),'added')
710 
711  def get_molecules(self):
712  """Return list of all molecules grouped by state.
713  For each state, it's a dictionary of Molecules where key is the molecule name
714  """
715  return [s.get_molecules() for s in self.system.get_states()]
716 
717  def get_molecule(self,molname,copy_index=0,state_index=0):
718  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
719 
720  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
721  """Builds representations and sets up degrees of freedom"""
722  print("BuildSystem.execute_macro: building representations")
723  self.root_hier = self.system.build()
724 
725  print("BuildSystem.execute_macro: setting up degrees of freedom")
726  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
727  for nstate,reader in enumerate(self._readers):
728  rbs = reader.get_rigid_bodies()
729  srbs = reader.get_super_rigid_bodies()
730  csrbs = reader.get_chains_of_super_rigid_bodies()
731 
732  # add rigid bodies
733  domains_in_rbs = set()
734  for rblist in rbs:
735  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
736  all_res = IMP.pmi.tools.OrderedSet()
737  bead_res = IMP.pmi.tools.OrderedSet()
738  for dname in rblist:
739  domain = self._domains[nstate][dname]
740  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
741  all_res|=self._domain_res[nstate][dname][0]
742  bead_res|=self._domain_res[nstate][dname][1]
743  domains_in_rbs.add(dname)
744  all_res|=bead_res
745  print("BuildSystem.execute_macro: -------- \
746 creating rigid body with max_trans %s \
747 max_rot %s non_rigid_max_trans %s" \
748  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
749  self.dof.create_rigid_body(all_res,
750  nonrigid_parts=bead_res,
751  max_trans=max_rb_trans,
752  max_rot=max_rb_rot,
753  nonrigid_max_trans=max_bead_trans)
754 
755  # if you have any domains not in an RB, set them as flexible beads
756  for dname, domain in self._domains[nstate].items():
757  if dname not in domains_in_rbs:
758  if domain.pdb_file != "BEADS":
759  warnings.warn(
760  "Making %s flexible. This may distort the "
761  "structure; consider making it rigid" % dname,
763  self.dof.create_flexible_beads(
764  self._domain_res[nstate][dname][1],
765  max_trans=max_bead_trans)
766 
767  # add super rigid bodies
768  for srblist in srbs:
769  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
770  all_res = IMP.pmi.tools.OrderedSet()
771  for dname in srblist:
772  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
773  all_res|=self._domain_res[nstate][dname][0]
774  all_res|=self._domain_res[nstate][dname][1]
775 
776  print("BuildSystem.execute_macro: -------- \
777 creating super rigid body with max_trans %s max_rot %s " \
778  % (str(max_srb_trans),str(max_srb_rot)))
779  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
780 
781  # add chains of super rigid bodies
782  for csrblist in csrbs:
783  all_res = IMP.pmi.tools.OrderedSet()
784  for dname in csrblist:
785  all_res|=self._domain_res[nstate][dname][0]
786  all_res|=self._domain_res[nstate][dname][1]
787  all_res = list(all_res)
788  all_res.sort(key=lambda r:r.get_index())
789  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
790  self.dof.create_main_chain_mover(all_res)
791  return self.root_hier,self.dof
792 
793 
794 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
795 class AnalysisReplicaExchange0(object):
796  """A macro for running all the basic operations of analysis.
797  Includes clustering, precision analysis, and making ensemble density maps.
798  A number of plots are also supported.
799  """
800  def __init__(self, model,
801  merge_directories=["./"],
802  stat_file_name_suffix="stat",
803  best_pdb_name_suffix="model",
804  do_clean_first=True,
805  do_create_directories=True,
806  global_output_directory="output/",
807  replica_stat_file_suffix="stat_replica",
808  global_analysis_result_directory="./analysis/",
809  test_mode=False):
810  """Constructor.
811  @param model The IMP model
812  @param stat_file_name_suffix
813  @param merge_directories The directories containing output files
814  @param best_pdb_name_suffix
815  @param do_clean_first
816  @param do_create_directories
817  @param global_output_directory Where everything is
818  @param replica_stat_file_suffix
819  @param global_analysis_result_directory
820  @param test_mode If True, nothing is changed on disk
821  """
822 
823  try:
824  from mpi4py import MPI
825  self.comm = MPI.COMM_WORLD
826  self.rank = self.comm.Get_rank()
827  self.number_of_processes = self.comm.size
828  except ImportError:
829  self.rank = 0
830  self.number_of_processes = 1
831 
832  self.test_mode = test_mode
833  self._protocol_output = []
834  self.cluster_obj = None
835  self.model = model
836  stat_dir = global_output_directory
837  self.stat_files = []
838  # it contains the position of the root directories
839  for rd in merge_directories:
840  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
841  if len(stat_files)==0:
842  warnings.warn("no stat files found in %s"
843  % os.path.join(rd, stat_dir),
845  self.stat_files += stat_files
846 
847  def add_protocol_output(self, p):
848  """Capture details of the modeling protocol.
849  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
850  """
851  # Assume last state is the one we're interested in
852  self._protocol_output.append((p, p._last_state))
853 
854  def get_modeling_trajectory(self,
855  score_key="SimplifiedModel_Total_Score_None",
856  rmf_file_key="rmf_file",
857  rmf_file_frame_key="rmf_frame_index",
858  outputdir="./",
859  get_every=1,
860  nframes_trajectory=10000):
861  """ Get a trajectory of the modeling run, for generating demonstrative movies
862  @param score_key The score for ranking models
863  @param rmf_file_key Key pointing to RMF filename
864  @param rmf_file_frame_key Key pointing to RMF frame number
865  @param outputdir The local output directory used in the run
866  @param get_every Extract every nth frame
867  @param nframes_trajectory Total number of frames of the trajectory
868  """
869  from operator import itemgetter
870  import math
871 
872  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
873  score_key,
874  rmf_file_key,
875  rmf_file_frame_key,
876  get_every)
877  rmf_file_list=trajectory_models[0]
878  rmf_file_frame_list=trajectory_models[1]
879  score_list=list(map(float, trajectory_models[2]))
880 
881  max_score=max(score_list)
882  min_score=min(score_list)
883 
884 
885  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
886  binned_scores=[None]*nframes_trajectory
887  binned_model_indexes=[-1]*nframes_trajectory
888 
889  for model_index,s in enumerate(score_list):
890  bins_score_diffs=[abs(s-b) for b in bins]
891  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
892  if binned_scores[bin_index]==None:
893  binned_scores[bin_index]=s
894  binned_model_indexes[bin_index]=model_index
895  else:
896  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
897  new_diff=abs(s-bins[bin_index])
898  if new_diff < old_diff:
899  binned_scores[bin_index]=s
900  binned_model_indexes[bin_index]=model_index
901 
902  print(binned_scores)
903  print(binned_model_indexes)
904 
905 
906  def _expand_ambiguity(self,prot,d):
907  """If using PMI2, expand the dictionary to include copies as ambiguous options
908  This also keeps the states separate.
909  """
910  newdict = {}
911  for key in d:
912  val = d[key]
913  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
914  newdict[key] = val
915  continue
916  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
917  if isinstance(val, tuple):
918  start = val[0]
919  stop = val[1]
920  name = val[2]
921  else:
922  start = 1
923  stop = -1
924  name = val
925  for nst in range(len(states)):
926  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
927  copies = sel.get_selected_particles(with_representation=False)
928  if len(copies)>1:
929  for nc in range(len(copies)):
930  if len(states)>1:
931  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
932  else:
933  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
934  else:
935  newdict[key] = val
936  return newdict
937 
938 
939  def clustering(self,
940  score_key="SimplifiedModel_Total_Score_None",
941  rmf_file_key="rmf_file",
942  rmf_file_frame_key="rmf_frame_index",
943  state_number=0,
944  prefiltervalue=None,
945  feature_keys=[],
946  outputdir="./",
947  alignment_components=None,
948  number_of_best_scoring_models=10,
949  rmsd_calculation_components=None,
950  distance_matrix_file='distances.mat',
951  load_distance_matrix_file=False,
952  skip_clustering=False,
953  number_of_clusters=1,
954  display_plot=False,
955  exit_after_display=True,
956  get_every=1,
957  first_and_last_frames=None,
958  density_custom_ranges=None,
959  write_pdb_with_centered_coordinates=False,
960  voxel_size=5.0):
961  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
962  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
963  Can pass None for copy or state to ignore that field.
964  If you don't pass a specific copy number
965  @param score_key The score for ranking models. Use "Total_Score"
966  instead of default for PMI2.
967  @param rmf_file_key Key pointing to RMF filename
968  @param rmf_file_frame_key Key pointing to RMF frame number
969  @param state_number State number to analyze
970  @param prefiltervalue Only include frames where the
971  score key is below this value
972  @param feature_keys Keywords for which you want to
973  calculate average, medians, etc.
974  If you pass "Keyname" it'll include everything that matches "*Keyname*"
975  @param outputdir The local output directory used in the run
976  @param alignment_components Dictionary with keys=groupname, values are tuples
977  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
978  @param number_of_best_scoring_models Num models to keep per run
979  @param rmsd_calculation_components For calculating RMSD
980  (same format as alignment_components)
981  @param distance_matrix_file Where to store/read the distance matrix
982  @param load_distance_matrix_file Try to load the distance matrix file
983  @param skip_clustering Just extract the best scoring models
984  and save the pdbs
985  @param number_of_clusters Number of k-means clusters
986  @param display_plot Display the distance matrix
987  @param exit_after_display Exit after displaying distance matrix
988  @param get_every Extract every nth frame
989  @param first_and_last_frames A tuple with the first and last frames to be
990  analyzed. Values are percentages!
991  Default: get all frames
992  @param density_custom_ranges For density calculation
993  (same format as alignment_components)
994  @param write_pdb_with_centered_coordinates
995  @param voxel_size Used for the density output
996  """
997  # Track provenance information to be added to each output model
998  prov = []
999  self._outputdir = os.path.abspath(outputdir)
1000  self._number_of_clusters = number_of_clusters
1001  for p, state in self._protocol_output:
1002  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1003 
1004  if self.test_mode:
1005  return
1006 
1007  if self.rank==0:
1008  try:
1009  os.mkdir(outputdir)
1010  except:
1011  pass
1012 
1013  if not load_distance_matrix_file:
1014  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1015  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1016  self.stat_files,self.number_of_processes)[self.rank]
1017 
1018  # read ahead to check if you need the PMI2 score key instead
1019  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1020  orig_score_key = score_key
1021  if score_key not in po.get_keys():
1022  if 'Total_Score' in po.get_keys():
1023  score_key = 'Total_Score'
1024  warnings.warn(
1025  "Using 'Total_Score' instead of "
1026  "'SimplifiedModel_Total_Score_None' for the score key",
1028  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1029  if k in feature_keys:
1030  warnings.warn(
1031  "no need to pass " + k + " to feature_keys.",
1033  feature_keys.remove(k)
1034 
1035  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1036  score_key,
1037  feature_keys,
1038  rmf_file_key,
1039  rmf_file_frame_key,
1040  prefiltervalue,
1041  get_every, provenance=prov)
1042  rmf_file_list=best_models[0]
1043  rmf_file_frame_list=best_models[1]
1044  score_list=best_models[2]
1045  feature_keyword_list_dict=best_models[3]
1046 
1047 # ------------------------------------------------------------------------
1048 # collect all the files and scores
1049 # ------------------------------------------------------------------------
1050 
1051  if self.number_of_processes > 1:
1052  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1053  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1054  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1055  rmf_file_frame_list)
1056  for k in feature_keyword_list_dict:
1057  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1058  feature_keyword_list_dict[k])
1059 
1060  # sort by score and get the best scoring ones
1061  score_rmf_tuples = list(zip(score_list,
1062  rmf_file_list,
1063  rmf_file_frame_list,
1064  list(range(len(score_list)))))
1065 
1066  if density_custom_ranges:
1067  for k in density_custom_ranges:
1068  if not isinstance(density_custom_ranges[k], list):
1069  raise Exception("Density custom ranges: values must be lists of tuples")
1070 
1071  # keep subset of frames if requested
1072  if first_and_last_frames is not None:
1073  nframes = len(score_rmf_tuples)
1074  first_frame = int(first_and_last_frames[0] * nframes)
1075  last_frame = int(first_and_last_frames[1] * nframes)
1076  if last_frame > len(score_rmf_tuples):
1077  last_frame = -1
1078  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1079 
1080  # sort RMFs by the score_key in ascending order, and store the rank
1081  best_score_rmf_tuples = sorted(score_rmf_tuples,
1082  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1083  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1084  # Note in the provenance info that we only kept best-scoring models
1085  prov.append(IMP.pmi.io.FilterProvenance("Best scoring",
1086  0, number_of_best_scoring_models))
1087  # sort the feature scores in the same way
1088  best_score_feature_keyword_list_dict = defaultdict(list)
1089  for tpl in best_score_rmf_tuples:
1090  index = tpl[3]
1091  for f in feature_keyword_list_dict:
1092  best_score_feature_keyword_list_dict[f].append(
1093  feature_keyword_list_dict[f][index])
1094  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1095  best_score_rmf_tuples,
1096  self.number_of_processes)[self.rank]
1097 
1098  # expand the dictionaries to include ambiguous copies
1099  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1100  0,
1101  my_best_score_rmf_tuples[0][1])[0]
1102  if IMP.pmi.get_is_canonical(prot_ahead):
1103  if rmsd_calculation_components is not None:
1104  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1105  if tmp!=rmsd_calculation_components:
1106  print('Detected ambiguity, expand rmsd components to',tmp)
1107  rmsd_calculation_components = tmp
1108  if alignment_components is not None:
1109  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1110  if tmp!=alignment_components:
1111  print('Detected ambiguity, expand alignment components to',tmp)
1112  alignment_components = tmp
1113 
1114 #-------------------------------------------------------------
1115 # read the coordinates
1116 # ------------------------------------------------------------
1117  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1118  my_best_score_rmf_tuples[0],
1119  rmsd_calculation_components,
1120  state_number=state_number)
1121  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1122  my_best_score_rmf_tuples,
1123  alignment_components,
1124  rmsd_calculation_components,
1125  state_number=state_number)
1126 
1127  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1128  # NOR particles, because these object cannot be serialized. We need serialization
1129  # for the parallel computation based on mpi.
1130  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1131  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1132  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1133  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1134  all_rmf_file_names=got_coords[4] # RMF file per hit
1135 
1136 # ------------------------------------------------------------------------
1137 # optionally don't compute distance matrix or cluster, just write top files
1138 # ------------------------------------------------------------------------
1139  if skip_clustering:
1140  if density_custom_ranges:
1141  DensModule = IMP.pmi.analysis.GetModelDensity(
1142  density_custom_ranges,
1143  voxel=voxel_size)
1144 
1145  dircluster=os.path.join(outputdir,"all_models."+str(n))
1146  try:
1147  os.mkdir(outputdir)
1148  except:
1149  pass
1150  try:
1151  os.mkdir(dircluster)
1152  except:
1153  pass
1154  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1155  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1156  rmf_name=tpl[1]
1157  rmf_frame_number=tpl[2]
1158  tmp_dict={}
1159  index=tpl[4]
1160  for key in best_score_feature_keyword_list_dict:
1161  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1162 
1163  if cnt==0:
1164  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1165  self.model,
1166  rmf_frame_number,
1167  rmf_name)
1168  else:
1169  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1170  self.model,
1171  prots,
1172  rs,
1173  rmf_frame_number,
1174  rmf_name)
1175  if not linking_successful:
1176  continue
1177 
1178  if not prots:
1179  continue
1180 
1181  if IMP.pmi.get_is_canonical(prots[0]):
1182  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1183  prot = states[state_number]
1184  else:
1185  prot = prots[state_number]
1186 
1187  # get transformation aligning coordinates of requested tuples
1188  # to the first RMF file
1189  if cnt==0:
1190  coords_f1=alignment_coordinates[cnt]
1191  if cnt > 0:
1192  coords_f2=alignment_coordinates[cnt]
1193  if coords_f2:
1194  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1195  transformation = Ali.align()[1]
1196  else:
1198 
1199  rbs = set()
1200  for p in IMP.atom.get_leaves(prot):
1201  if not IMP.core.XYZR.get_is_setup(p):
1203  IMP.core.XYZR(p).set_radius(0.0001)
1204  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1205 
1207  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1208  rbs.add(rb)
1209  else:
1211  transformation)
1212  for rb in rbs:
1213  IMP.core.transform(rb,transformation)
1214 
1216  self.model.update()
1217  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1218  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1219  o.init_pdb(out_pdb_fn,prot)
1220  o.write_pdb(out_pdb_fn,
1221  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1222 
1223  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1224  tmp_dict["rmf_file_full_path"]=rmf_name
1225  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1226  tmp_dict["local_rmf_frame_number"]=0
1227 
1228  clusstat.write(str(tmp_dict)+"\n")
1229 
1230  if IMP.pmi.get_is_canonical(prot):
1231  # create a single-state System and write that
1233  h.set_name("System")
1234  h.add_child(prot)
1235  o.init_rmf(out_rmf_fn, [h], rs)
1236  else:
1237  o.init_rmf(out_rmf_fn, [prot],rs)
1238 
1239  o.write_rmf(out_rmf_fn)
1240  o.close_rmf(out_rmf_fn)
1241  # add the density
1242  if density_custom_ranges:
1243  DensModule.add_subunits_density(prot)
1244 
1245  if density_custom_ranges:
1246  DensModule.write_mrc(path=dircluster)
1247  del DensModule
1248  return
1249 
1250 
1251 
1252  # broadcast the coordinates
1253  if self.number_of_processes > 1:
1254  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1255  all_coordinates)
1256  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1257  all_rmf_file_names)
1258  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1259  rmf_file_name_index_dict)
1260  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1261  alignment_coordinates)
1262  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1263  rmsd_coordinates)
1264 
1265  if self.rank == 0:
1266  # save needed informations in external files
1267  self.save_objects(
1268  [best_score_feature_keyword_list_dict,
1269  rmf_file_name_index_dict],
1270  ".macro.pkl")
1271 
1272 # ------------------------------------------------------------------------
1273 # Calculate distance matrix and cluster
1274 # ------------------------------------------------------------------------
1275  print("setup clustering class")
1276  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1277 
1278  for n, model_coordinate_dict in enumerate(all_coordinates):
1279  template_coordinate_dict = {}
1280  # let's try to align
1281  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1282  # set the first model as template coordinates
1283  self.cluster_obj.set_template(alignment_coordinates[n])
1284  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1285  print("Global calculating the distance matrix")
1286 
1287  # calculate distance matrix, all against all
1288  self.cluster_obj.dist_matrix()
1289 
1290  # perform clustering and optionally display
1291  if self.rank == 0:
1292  self.cluster_obj.do_cluster(number_of_clusters)
1293  if display_plot:
1294  if self.rank == 0:
1295  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1296  if exit_after_display:
1297  exit()
1298  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1299 
1300 # ------------------------------------------------------------------------
1301 # Alteratively, load the distance matrix from file and cluster that
1302 # ------------------------------------------------------------------------
1303  else:
1304  if self.rank==0:
1305  print("setup clustering class")
1306  self.cluster_obj = IMP.pmi.analysis.Clustering()
1307  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1308  print("clustering with %s clusters" % str(number_of_clusters))
1309  self.cluster_obj.do_cluster(number_of_clusters)
1310  [best_score_feature_keyword_list_dict,
1311  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1312  if display_plot:
1313  if self.rank == 0:
1314  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1315  if exit_after_display:
1316  exit()
1317  if self.number_of_processes > 1:
1318  self.comm.Barrier()
1319 
1320 # ------------------------------------------------------------------------
1321 # now save all informations about the clusters
1322 # ------------------------------------------------------------------------
1323 
1324  if self.rank == 0:
1325  print(self.cluster_obj.get_cluster_labels())
1326  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1327  print("rank %s " % str(self.rank))
1328  print("cluster %s " % str(n))
1329  print("cluster label %s " % str(cl))
1330  print(self.cluster_obj.get_cluster_label_names(cl))
1331  cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1332  cluster_prov = prov + \
1333  [IMP.pmi.io.ClusterProvenance(cluster_size)]
1334 
1335  # first initialize the Density class if requested
1336  if density_custom_ranges:
1337  DensModule = IMP.pmi.analysis.GetModelDensity(
1338  density_custom_ranges,
1339  voxel=voxel_size)
1340 
1341  dircluster = outputdir + "/cluster." + str(n) + "/"
1342  try:
1343  os.mkdir(dircluster)
1344  except:
1345  pass
1346 
1347  rmsd_dict = {"AVERAGE_RMSD":
1348  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1349  clusstat = open(dircluster + "stat.out", "w")
1350  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1351  # extract the features
1352  tmp_dict = {}
1353  tmp_dict.update(rmsd_dict)
1354  index = rmf_file_name_index_dict[structure_name]
1355  for key in best_score_feature_keyword_list_dict:
1356  tmp_dict[
1357  key] = best_score_feature_keyword_list_dict[
1358  key][
1359  index]
1360 
1361  # get the rmf name and the frame number from the list of
1362  # frame names
1363  rmf_name = structure_name.split("|")[0]
1364  rmf_frame_number = int(structure_name.split("|")[1])
1365  clusstat.write(str(tmp_dict) + "\n")
1366 
1367  # extract frame (open or link to existing)
1368  if k==0:
1369  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1370  self.model,
1371  rmf_frame_number,
1372  rmf_name)
1373  else:
1374  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1375  self.model,
1376  prots,
1377  rs,
1378  rmf_frame_number,
1379  rmf_name)
1380  if not linking_successful:
1381  continue
1382  if not prots:
1383  continue
1384 
1385  if IMP.pmi.get_is_canonical(prots[0]):
1386  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1387  prot = states[state_number]
1388  else:
1389  prot = prots[state_number]
1390  if k==0:
1391  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1392 
1393  # transform clusters onto first
1394  if k > 0:
1395  model_index = self.cluster_obj.get_model_index_from_name(
1396  structure_name)
1397  transformation = self.cluster_obj.get_transformation_to_first_member(
1398  cl,
1399  model_index)
1400  rbs = set()
1401  for p in IMP.atom.get_leaves(prot):
1402  if not IMP.core.XYZR.get_is_setup(p):
1404  IMP.core.XYZR(p).set_radius(0.0001)
1405  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1406 
1408  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1409  rbs.add(rb)
1410  else:
1412  transformation)
1413  for rb in rbs:
1414  IMP.core.transform(rb,transformation)
1415 
1416  # add the density
1417  if density_custom_ranges:
1418  DensModule.add_subunits_density(prot)
1419 
1420  # pdb writing should be optimized!
1421  o = IMP.pmi.output.Output()
1422  self.model.update()
1423  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1424  o.write_pdb(dircluster + str(k) + ".pdb")
1425 
1426  if IMP.pmi.get_is_canonical(prot):
1427  # create a single-state System and write that
1429  h.set_name("System")
1430  h.add_child(prot)
1431  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1432  else:
1433  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
1434  o.write_rmf(dircluster + str(k) + ".rmf3")
1435  o.close_rmf(dircluster + str(k) + ".rmf3")
1436 
1437  del o
1438  # IMP.atom.destroy(prot)
1439 
1440  if density_custom_ranges:
1441  DensModule.write_mrc(path=dircluster)
1442  del DensModule
1443 
1444  if self.number_of_processes>1:
1445  self.comm.Barrier()
1446 
1447  def get_cluster_rmsd(self,cluster_num):
1448  if self.cluster_obj is None:
1449  raise Exception("Run clustering first")
1450  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1451 
1452  def save_objects(self, objects, file_name):
1453  import pickle
1454  outf = open(file_name, 'wb')
1455  pickle.dump(objects, outf)
1456  outf.close()
1457 
1458  def load_objects(self, file_name):
1459  import pickle
1460  inputf = open(file_name, 'rb')
1461  objects = pickle.load(inputf)
1462  inputf.close()
1463  return objects
1464 
1466 
1467  """
1468  This class contains analysis utilities to investigate ReplicaExchange results.
1469  """
1470 
1471  ########################
1472  # Construction and Setup
1473  ########################
1474 
1475  def __init__(self,model,
1476  stat_files,
1477  best_models=None,
1478  score_key=None,
1479  alignment=True):
1480  """
1481  Construction of the Class.
1482  @param model IMP.Model()
1483  @param stat_files list of string. Can be ascii stat files, rmf files names
1484  @param best_models Integer. Number of best scoring models, if None: all models will be read
1485  @param alignment boolean (Default=True). Align before computing the rmsd.
1486  """
1487 
1488  self.model=model
1489  self.best_models=best_models
1490  self.stath0=IMP.pmi.output.StatHierarchyHandler(model,stat_files,self.best_models,score_key,cache=True)
1491  self.stath1=IMP.pmi.output.StatHierarchyHandler(StatHierarchyHandler=self.stath0)
1492 
1493  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath1))
1494  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath0))
1495  self.sel0_rmsd=IMP.atom.Selection(self.stath0)
1496  self.sel1_rmsd=IMP.atom.Selection(self.stath1)
1497  self.sel0_alignment=IMP.atom.Selection(self.stath0)
1498  self.sel1_alignment=IMP.atom.Selection(self.stath1)
1499  self.clusters=[]
1500  # fill the cluster list with a single cluster containing all models
1501  c = IMP.pmi.output.Cluster(0)
1502  self.clusters.append(c)
1503  for n0 in range(len(self.stath0)):
1504  c.add_member(n0)
1505  self.pairwise_rmsd={}
1506  self.pairwise_molecular_assignment={}
1507  self.alignment=alignment
1508  self.symmetric_molecules={}
1509  self.issymmetricsel={}
1510  self.update_seldicts()
1511  self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath0))
1512  self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath1))
1513 
1514  def set_rmsd_selection(self,**kwargs):
1515  """
1516  Setup the selection onto which the rmsd is computed
1517  @param kwargs use IMP.atom.Selection keywords
1518  """
1519  self.sel0_rmsd=IMP.atom.Selection(self.stath0,**kwargs)
1520  self.sel1_rmsd=IMP.atom.Selection(self.stath1,**kwargs)
1521  self.update_seldicts()
1522 
1523  def set_symmetric(self,molecule_name):
1524  """
1525  Store names of symmetric molecules
1526  """
1527  self.symmetric_molecules[molecule_name]=0
1528  self.update_seldicts()
1529 
1530  def set_alignment_selection(self,**kwargs):
1531  """
1532  Setup the selection onto which the alignment is computed
1533  @param kwargs use IMP.atom.Selection keywords
1534  """
1535  self.sel0_alignment=IMP.atom.Selection(self.stath0,**kwargs)
1536  self.sel1_alignment=IMP.atom.Selection(self.stath1,**kwargs)
1537 
1538  ######################
1539  # Clustering functions
1540  ######################
1541  def clean_clusters(self):
1542  for c in self.clusters: del c
1543  self.clusters=[]
1544 
1545 
1546  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1547  """
1548  Cluster the models based on RMSD.
1549  @param rmsd_cutoff Float the distance cutoff in Angstrom
1550  @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1551  """
1552  self.clean_clusters()
1553  not_clustered=set(range(len(self.stath1)))
1554  while len(not_clustered)>0:
1555  self.aggregate(not_clustered, rmsd_cutoff, metric)
1556  #self.merge_aggregates(rmsd_cutoff, metric)
1557  self.update_clusters()
1558 
1559  def refine(self,rmsd_cutoff=10):
1560  """
1561  Refine the clusters by merging the ones whose centers are close
1562  @param rmsd_cutoff cutoff distance in Angstorms
1563  """
1564  merge_pairs=[]
1565  clusters_copy=self.clusters
1566  for c0,c1 in itertools.combinations(self.clusters,2):
1567  if c0.center_index is None:
1568  self.compute_cluster_center(c0)
1569  if c1.center_index is None:
1570  self.compute_cluster_center(c1)
1571  d0=self.stath0[c0.center_index]
1572  d1=self.stath1[c1.center_index]
1573  rmsd, molecular_assignment = self.rmsd()
1574  if rmsd <= rmsd_cutoff:
1575  if c1 in self.clusters:
1576  clusters_copy.remove(c1)
1577  c0+=c1
1578  self.clusters=clusters_copy
1579  self.update_clusters()
1580 
1581  ####################
1582  # Input Output
1583  ####################
1584 
1585  def set_cluster_assignments(self, cluster_ids):
1586  if len(cluster_ids)!=len(self.stath0):
1587  raise ValueError('cluster ids has to be same length as number of frames')
1588 
1589  self.clusters=[]
1590  for i in sorted(list(set(cluster_ids))):
1591  self.clusters.append(IMP.pmi.output.Cluster(i))
1592  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1593  self.clusters[idx].add_member(i,d)
1594 
1595  def get_cluster_data(self, cluster):
1596  """
1597  Return the model data from a cluster
1598  @param cluster IMP.pmi.output.Cluster object
1599  """
1600  data=[]
1601  for m in cluster:
1602  data.append(m)
1603  return data
1604 
1605  def save_data(self,filename='data.pkl'):
1606  """
1607  Save the data for the whole models into a pickle file
1608  @param filename string
1609  """
1610  self.stath0.save_data(filename)
1611 
1612  def set_data(self,data):
1613  """
1614  Set the data from an external IMP.pmi.output.Data
1615  @param data IMP.pmi.output.Data
1616  """
1617  self.stath0.data=data
1618  self.stath1.data=data
1619 
1620  def load_data(self,filename='data.pkl'):
1621  """
1622  Load the data from an external pickled file
1623  @param filename string
1624  """
1625  self.stath0.load_data(filename)
1626  self.stath1.load_data(filename)
1627  self.best_models=len(self.stath0)
1628 
1629  def add_cluster(self,rmf_name_list):
1630  c = IMP.pmi.output.Cluster(len(self.clusters))
1631  print("creating cluster index "+str(len(self.clusters)))
1632  self.clusters.append(c)
1633  current_len=len(self.stath0)
1634 
1635  for rmf in rmf_name_list:
1636  print("adding rmf "+rmf)
1637  self.stath0.add_stat_file(rmf)
1638  self.stath1.add_stat_file(rmf)
1639 
1640  for n0 in range(current_len,len(self.stath0)):
1641  d0=self.stath0[n0]
1642  c.add_member(n0,d0)
1643  self.update_clusters()
1644 
1645  def save_clusters(self,filename='clusters.pkl'):
1646  """
1647  Save the clusters into a pickle file
1648  @param filename string
1649  """
1650  try:
1651  import cPickle as pickle
1652  except ImportError:
1653  import pickle
1654  fl=open(filename,'wb')
1655  pickle.dump(self.clusters,fl)
1656 
1657  def load_clusters(self,filename='clusters.pkl',append=False):
1658  """
1659  Load the clusters from a pickle file
1660  @param filename string
1661  @param append bool (Default=False), if True. append the clusters to the ones currently present
1662  """
1663  try:
1664  import cPickle as pickle
1665  except ImportError:
1666  import pickle
1667  fl=open(filename,'rb')
1668  self.clean_clusters()
1669  if append:
1670  self.clusters+=pickle.load(fl)
1671  else:
1672  self.clusters=pickle.load(fl)
1673  self.update_clusters()
1674 
1675  ####################
1676  # Analysis Functions
1677  ####################
1678 
1679  def compute_cluster_center(self,cluster):
1680  """
1681  Compute the cluster center for a given cluster
1682  """
1683  member_distance=defaultdict(float)
1684 
1685  for n0,n1 in itertools.combinations(cluster.members,2):
1686  d0=self.stath0[n0]
1687  d1=self.stath1[n1]
1688  rmsd, _ = self.rmsd()
1689  member_distance[n0]+=rmsd
1690 
1691  if len(member_distance)>0:
1692  cluster.center_index=min(member_distance, key=member_distance.get)
1693  else:
1694  cluster.center_index=cluster.members[0]
1695 
1696  def save_coordinates(self,cluster,rmf_name=None,reference="Absolute", prefix="./"):
1697  """
1698  Save the coordinates of the current cluster a single rmf file
1699  """
1700  print("saving coordinates",cluster)
1701  if self.alignment: self.set_reference(reference,cluster)
1703  if rmf_name is None:
1704  rmf_name=prefix+'/'+str(cluster.cluster_id)+".rmf3"
1705 
1706  d1=self.stath1[cluster.members[0]]
1707  self.model.update()
1708  o.init_rmf(rmf_name, [self.stath1])
1709  for n1 in cluster.members:
1710  d1=self.stath1[n1]
1711  self.model.update()
1713  if self.alignment: self.align()
1714  o.write_rmf(rmf_name)
1716  o.close_rmf(rmf_name)
1717 
1718  def prune_redundant_structures(self,rmsd_cutoff=10):
1719  """
1720  remove structures that are similar
1721  append it to a new cluster
1722  """
1723  print("pruning models")
1724  selected=0
1725  filtered=[selected]
1726  remaining=range(1,len(self.stath1),10)
1727 
1728  while len(remaining)>0:
1729  d0=self.stath0[selected]
1730  rm=[]
1731  for n1 in remaining:
1732  d1=self.stath1[n1]
1733  if self.alignment: self.align()
1734  d, _ = self.rmsd()
1735  if d<=rmsd_cutoff:
1736  rm.append(n1)
1737  print("pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
1738  remaining=[x for x in remaining if x not in rm]
1739  if len(remaining)==0: break
1740  selected=remaining[0]
1741  filtered.append(selected)
1742  remaining.pop(0)
1743  c = IMP.pmi.output.Cluster(len(self.clusters))
1744  self.clusters.append(c)
1745  for n0 in filtered:
1746  d0=self.stath0[n0]
1747  c.add_member(n0,d0)
1748  self.update_clusters()
1749 
1750 
1751 
1752  def precision(self,cluster):
1753  """
1754  Compute the precision of a cluster
1755  """
1756  npairs=0
1757  rmsd=0.0
1758  precision=None
1759 
1760  if not cluster.center_index is None:
1761  members1=[cluster.center_index]
1762  else:
1763  members1=cluster.members
1764 
1765  for n0 in members1:
1766  d0=self.stath0[n0]
1767  for n1 in cluster.members:
1768  if n0!=n1:
1769  npairs+=1
1770  d1=self.stath1[n1]
1772  tmp_rmsd, _ = self.rmsd()
1773  rmsd+=tmp_rmsd
1775 
1776  if npairs>0:
1777  precision=rmsd/npairs
1778  cluster.precision=precision
1779  return precision
1780 
1781  def bipartite_precision(self,cluster1,cluster2,verbose=False):
1782  """
1783  Compute the bipartite precision (ie the cross-precision)
1784  between two clusters
1785  """
1786  npairs=0
1787  rmsd=0.0
1788  for cn0,n0 in enumerate(cluster1.members):
1789  d0=self.stath0[n0]
1790  for cn1,n1 in enumerate(cluster2.members):
1791  d1=self.stath1[n1]
1792  tmp_rmsd, _ =self.rmsd()
1793  if verbose: print("--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
1794  rmsd+=tmp_rmsd
1795  npairs+=1
1796  precision=rmsd/npairs
1797  return precision
1798 
1799  def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
1800  """
1801  Compute the Root mean square fluctuations
1802  of a molecule in a cluster
1803  Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
1804  """
1805  rmsf=IMP.pmi.tools.OrderedDict()
1806 
1807  #assumes that residue indexes are identical for stath0 and stath1
1808  if cluster_ref is not None:
1809  if not cluster_ref.center_index is None:
1810  members0 = [cluster_ref.center_index]
1811  else:
1812  members0 = cluster_ref.members
1813  else:
1814  if not cluster.center_index is None:
1815  members0 = [cluster.center_index]
1816  else:
1817  members0 = cluster.members
1818 
1819  s0=IMP.atom.Selection(self.stath0,molecule=molecule,resolution=1,
1820  copy_index=copy_index,state_index=state_index)
1821  ps0=s0.get_selected_particles()
1822  #get the residue indexes
1823  residue_indexes=list(IMP.pmi.tools.OrderedSet([IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1824 
1825  #get the corresponding particles
1826  #s0=IMP.atom.Selection(stat_ref,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1827  # copy_index=copy_index,state_index=state_index)
1828  #ps0 = s0.get_selected_particles()
1829 
1830 
1831 
1832  npairs=0
1833  for n0 in members0:
1834  d0=self.stath0[n0]
1835  for n1 in cluster.members[::step]:
1836  if n0!=n1:
1837  print("--- rmsf %s %s"%(str(n0),str(n1)))
1839 
1840  s1=IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1841  copy_index=copy_index,state_index=state_index)
1842  ps1 = s1.get_selected_particles()
1843 
1844  d1=self.stath1[n1]
1845  if self.alignment: self.align()
1846  for n,(p0,p1) in enumerate(zip(ps0,ps1)):
1847  r=residue_indexes[n]
1848  d0=IMP.core.XYZ(p0)
1849  d1=IMP.core.XYZ(p1)
1850  if r in rmsf:
1851  rmsf[r]+=IMP.core.get_distance(d0,d1)
1852  else:
1853  rmsf[r]=IMP.core.get_distance(d0,d1)
1854  npairs+=1
1856  for r in rmsf:
1857  rmsf[r]/=npairs
1858 
1859  for stath in [self.stath0,self.stath1]:
1860  if molecule not in self.symmetric_molecules:
1861  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1862  copy_index=copy_index,state_index=state_index)
1863  else:
1864  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1865  state_index=state_index)
1866 
1867  ps = s.get_selected_particles()
1868  for p in ps:
1870  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
1871  else:
1873 
1874  return rmsf
1875 
1876  def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
1877  if self.alignment: self.set_reference(reference,cluster)
1878  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
1879  voxel=voxel_size)
1880 
1881  for n1 in cluster.members[::step]:
1882  print("density "+str(n1))
1883  d1=self.stath1[n1]
1885  if self.alignment: self.align()
1886  dens.add_subunits_density(self.stath1)
1888  dens.write_mrc(path=prefix+'/',suffix=str(cluster.cluster_id))
1889  del dens
1890 
1891  def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
1892  if self.alignment: self.set_reference(reference,cluster)
1893  import numpy as np
1894  import matplotlib.pyplot as plt
1895  import matplotlib.cm as cm
1896  from scipy.spatial.distance import cdist
1897  import IMP.pmi.topology
1898  if molecules is None:
1900  else:
1901  mols=[IMP.pmi.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi.tools.get_molecules(IMP.atom.Selection(self.stath1,molecules=molecules).get_selected_particles())]
1902  unique_copies=[mol for mol in mols if mol.get_copy_index() == 0]
1903  mol_names_unique=dict((mol.get_name(),mol) for mol in unique_copies)
1904  total_len_unique=sum(max(mol.get_residue_indexes()) for mol in unique_copies)
1905 
1906 
1907  # coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
1908  index_dict={}
1909  prev_stop=0
1910 
1911  if not consolidate:
1912  for mol in mols:
1913  seqlen=max(mol.get_residue_indexes())
1914  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1915  prev_stop+=seqlen
1916 
1917  else:
1918  for mol in unique_copies:
1919  seqlen=max(mol.get_residue_indexes())
1920  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1921  prev_stop+=seqlen
1922 
1923 
1924  for ncl,n1 in enumerate(cluster.members):
1925  print(ncl)
1926  d1=self.stath1[n1]
1927  #self.apply_molecular_assignments(n1)
1928  coord_dict=IMP.pmi.tools.OrderedDict()
1929  for mol in mols:
1930  mol_name=mol.get_name()
1931  copy_index=mol.get_copy_index()
1932  rindexes = mol.get_residue_indexes()
1933  coords = np.ones((max(rindexes),3))
1934  for rnum in rindexes:
1935  sel = IMP.atom.Selection(mol, residue_index=rnum, resolution=1)
1936  selpart = sel.get_selected_particles()
1937  if len(selpart) == 0: continue
1938  selpart = selpart[0]
1939  coords[rnum - 1, :] = IMP.core.XYZ(selpart).get_coordinates()
1940  coord_dict[mol]=coords
1941 
1942  if not consolidate:
1943  coords=np.concatenate(list(coord_dict.values()))
1944  dists = cdist(coords, coords)
1945  binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1946  else:
1947  binary_dists_dict={}
1948  for mol1 in mols:
1949  len1 = max(mol1.get_residue_indexes())
1950  for mol2 in mols:
1951  name1=mol1.get_name()
1952  name2=mol2.get_name()
1953  dists = cdist(coord_dict[mol1], coord_dict[mol2])
1954  if (name1, name2) not in binary_dists_dict:
1955  binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
1956  binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1957  binary_dists=np.zeros((total_len_unique,total_len_unique))
1958 
1959  for name1,name2 in binary_dists_dict:
1960  r1=index_dict[mol_names_unique[name1]]
1961  r2=index_dict[mol_names_unique[name2]]
1962  binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
1963 
1964  if ncl==0:
1965  dist_maps = [dists]
1966  av_dist_map = dists
1967  contact_freqs = binary_dists
1968  else:
1969  dist_maps.append(dists)
1970  av_dist_map += dists
1971  contact_freqs += binary_dists
1972 
1973  #self.undo_apply_molecular_assignments(n1)
1974 
1975  if log_scale:
1976  contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
1977  else:
1978  contact_freqs =1.0/len(cluster)*contact_freqs
1979  av_dist_map=1.0/len(cluster)*contact_freqs
1980 
1981  fig = plt.figure(figsize=(100, 100))
1982  ax = fig.add_subplot(111)
1983  ax.set_xticks([])
1984  ax.set_yticks([])
1985  gap_between_components=50
1986  colormap = cm.Blues
1987  colornorm=None
1988 
1989 
1990  #if cbar_labels is not None:
1991  # if len(cbar_labels)!=4:
1992  # print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
1993  # exit()
1994  # set the list of proteins on the x axis
1995  if not consolidate:
1996  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_extended_name(),mol) for mol in mols])
1997  prot_list=list(zip(*sorted_tuple))[1]
1998  else:
1999  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(),mol) for mol in unique_copies])
2000  prot_list=list(zip(*sorted_tuple))[1]
2001 
2002  prot_listx = prot_list
2003  nresx = gap_between_components + \
2004  sum([max(mol.get_residue_indexes())
2005  + gap_between_components for mol in prot_listx])
2006 
2007  # set the list of proteins on the y axis
2008  prot_listy = prot_list
2009  nresy = gap_between_components + \
2010  sum([max(mol.get_residue_indexes())
2011  + gap_between_components for mol in prot_listy])
2012 
2013  # this is the residue offset for each protein
2014  resoffsetx = {}
2015  resendx = {}
2016  res = gap_between_components
2017  for mol in prot_listx:
2018  resoffsetx[mol] = res
2019  res += max(mol.get_residue_indexes())
2020  resendx[mol] = res
2021  res += gap_between_components
2022 
2023  resoffsety = {}
2024  resendy = {}
2025  res = gap_between_components
2026  for mol in prot_listy:
2027  resoffsety[mol] = res
2028  res += max(mol.get_residue_indexes())
2029  resendy[mol] = res
2030  res += gap_between_components
2031 
2032  resoffsetdiagonal = {}
2033  res = gap_between_components
2034  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2035  resoffsetdiagonal[mol] = res
2036  res += max(mol.get_residue_indexes())
2037  res += gap_between_components
2038 
2039  # plot protein boundaries
2040  xticks = []
2041  xlabels = []
2042  for n, prot in enumerate(prot_listx):
2043  res = resoffsetx[prot]
2044  end = resendx[prot]
2045  for proty in prot_listy:
2046  resy = resoffsety[proty]
2047  endy = resendy[proty]
2048  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
2049  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
2050  xticks.append((float(res) + float(end)) / 2)
2051  xlabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2052 
2053  yticks = []
2054  ylabels = []
2055  for n, prot in enumerate(prot_listy):
2056  res = resoffsety[prot]
2057  end = resendy[prot]
2058  for protx in prot_listx:
2059  resx = resoffsetx[protx]
2060  endx = resendx[protx]
2061  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
2062  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
2063  yticks.append((float(res) + float(end)) / 2)
2064  ylabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2065 
2066  # plot the contact map
2067 
2068  tmp_array = np.zeros((nresx, nresy))
2069  ret={}
2070  for px in prot_listx:
2071  for py in prot_listy:
2072  resx = resoffsetx[px]
2073  lengx = resendx[px] - 1
2074  resy = resoffsety[py]
2075  lengy = resendy[py] - 1
2076  indexes_x = index_dict[px]
2077  minx = min(indexes_x)
2078  maxx = max(indexes_x)
2079  indexes_y = index_dict[py]
2080  miny = min(indexes_y)
2081  maxy = max(indexes_y)
2082  tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2083  ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2084 
2085  cax = ax.imshow(tmp_array,
2086  cmap=colormap,
2087  norm=colornorm,
2088  origin='lower',
2089  alpha=0.6,
2090  interpolation='nearest')
2091 
2092  ax.set_xticks(xticks)
2093  ax.set_xticklabels(xlabels, rotation=90)
2094  ax.set_yticks(yticks)
2095  ax.set_yticklabels(ylabels)
2096  plt.setp(ax.get_xticklabels(), fontsize=6)
2097  plt.setp(ax.get_yticklabels(), fontsize=6)
2098 
2099  # display and write to file
2100  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2101  [i.set_linewidth(2.0) for i in ax.spines.values()]
2102  #if cbar_labels is not None:
2103  # cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
2104  # cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
2105 
2106  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf", dpi=300,transparent="False")
2107  return ret
2108 
2109 
2110  def plot_rmsd_matrix(self,filename):
2111  import numpy as np
2112  self.compute_all_pairwise_rmsd()
2113  distance_matrix = np.zeros(
2114  (len(self.stath0), len(self.stath1)))
2115  for (n0,n1) in self.pairwise_rmsd:
2116  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2117 
2118  import matplotlib as mpl
2119  mpl.use('Agg')
2120  import matplotlib.pylab as pl
2121  from scipy.cluster import hierarchy as hrc
2122 
2123  fig = pl.figure(figsize=(10,8))
2124  ax = fig.add_subplot(212)
2125  dendrogram = hrc.dendrogram(
2126  hrc.linkage(distance_matrix),
2127  color_threshold=7,
2128  no_labels=True)
2129  leaves_order = dendrogram['leaves']
2130  ax.set_xlabel('Model')
2131  ax.set_ylabel('RMSD [Angstroms]')
2132 
2133  ax2 = fig.add_subplot(221)
2134  cax = ax2.imshow(
2135  distance_matrix[leaves_order,
2136  :][:,
2137  leaves_order],
2138  interpolation='nearest')
2139  cb = fig.colorbar(cax)
2140  cb.set_label('RMSD [Angstroms]')
2141  ax2.set_xlabel('Model')
2142  ax2.set_ylabel('Model')
2143 
2144  pl.savefig(filename, dpi=300)
2145  pl.close(fig)
2146 
2147  ####################
2148  # Internal Functions
2149  ####################
2150 
2151  def update_clusters(self):
2152  """
2153  Update the cluster id numbers
2154  """
2155  for n,c in enumerate(self.clusters):
2156  c.cluster_id=n
2157 
2158  def get_molecule(self, hier, name, copy):
2159  s=IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2160  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2161 
2162  def update_seldicts(self):
2163  """
2164  Update the seldicts
2165  """
2166  self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2167  self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2168  for mol in self.seldict0:
2169  for sel in self.seldict0[mol]:
2170  self.issymmetricsel[sel]=False
2171  for mol in self.symmetric_molecules:
2172  self.symmetric_molecules[mol]=len(self.seldict0[mol])
2173  for sel in self.seldict0[mol]:
2174  self.issymmetricsel[sel]=True
2175 
2176 
2177  def align(self):
2178  tr = IMP.atom.get_transformation_aligning_first_to_second(self.sel1_alignment, self.sel0_alignment)
2179 
2180  for rb in self.rbs1:
2181  IMP.core.transform(rb, tr)
2182 
2183  for bead in self.beads1:
2184  try:
2185  IMP.core.transform(IMP.core.XYZ(bead), tr)
2186  except:
2187  continue
2188 
2189 
2190  self.model.update()
2191 
2192  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2193  '''
2194  initial filling of the clusters.
2195  '''
2196  n0 = idxs.pop()
2197  print("clustering model "+str(n0))
2198  d0 = self.stath0[n0]
2199  c = IMP.pmi.output.Cluster(len(self.clusters))
2200  print("creating cluster index "+str(len(self.clusters)))
2201  self.clusters.append(c)
2202  c.add_member(n0,d0)
2203  clustered = set([n0])
2204  for n1 in idxs:
2205  print("--- trying to add model "+str(n1)+" to cluster "+str(len(self.clusters)))
2206  d1 = self.stath1[n1]
2207  if self.alignment: self.align()
2208  rmsd, _ = self.rmsd(metric=metric)
2209  if rmsd<rmsd_cutoff:
2210  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2211  c.add_member(n1,d1)
2212  clustered.add(n1)
2213  else:
2214  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2215  idxs-=clustered
2216 
2217  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2218  """
2219  merge the clusters that have close members
2220  @param rmsd_cutoff cutoff distance in Angstorms
2221  """
2222  # before merging, clusters are spheres of radius rmsd_cutoff centered on the 1st element
2223  # here we only try to merge clusters whose centers are closer than 2*rmsd_cutoff
2224  to_merge = []
2225  print("merging...")
2226  for c0, c1 in filter(lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2227  n0, n1 = [c.members[0] for c in (c0,c1)]
2228  d0 = self.stath0[n0]
2229  d1 = self.stath1[n1]
2230  rmsd, _ = self.rmsd()
2231  if rmsd<2*rmsd_cutoff and self.have_close_members(c0,c1,rmsd_cutoff,metric):
2232  to_merge.append((c0,c1))
2233 
2234  for c0, c in reversed(to_merge):
2235  self.merge(c0,c)
2236 
2237  #keep only full clusters
2238  self.clusters = [c for c in filter(lambda x: len(x.members)>0, self.clusters)]
2239 
2240 
2241  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2242  '''
2243  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2244  '''
2245  print("check close members for clusters "+str(c0.cluster_id)+" and "+str(c1.cluster_id))
2246  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2247  d0 = self.stath0[n0]
2248  d1 = self.stath1[n1]
2249  rmsd, _ = self.rmsd(metric=metric)
2250  if rmsd<rmsd_cutoff:
2251  return True
2252 
2253  return False
2254 
2255 
2256  def merge(self, c0, c1):
2257  '''
2258  merge two clusters
2259  '''
2260  c0+=c1
2261  c1.members=[]
2262  c1.data={}
2263 
2264  def rmsd_helper(self, sels0, sels1, metric):
2265  '''
2266  a function that returns the permutation best_sel of sels0 that minimizes metric
2267  '''
2268  best_rmsd2 = float('inf')
2269  best_sel = None
2270  if self.issymmetricsel[sels0[0]]:
2271  #this cases happens when symmetries were defined
2272  N = len(sels0)
2273  for offset in range(N):
2274  order=[(offset+i)%N for i in range(N)]
2275  sels = [sels0[(offset+i)%N] for i in range(N)]
2276  sel0 = sels[0]
2277  sel1 = sels1[0]
2278  r=metric(sel0, sel1)
2279  rmsd2=r*r*N
2280  ###print(order,rmsd2)
2281  if rmsd2 < best_rmsd2:
2282  best_rmsd2 = rmsd2
2283  best_sel = sels
2284  best_order=order
2285  else:
2286  for sels in itertools.permutations(sels0):
2287  rmsd2=0.0
2288  for sel0, sel1 in itertools.takewhile(lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2289  r=metric(sel0, sel1)
2290  rmsd2+=r*r
2291  if rmsd2 < best_rmsd2:
2292  best_rmsd2 = rmsd2
2293  best_sel = sels
2294  ###for i,sel in enumerate(best_sel):
2295  ### p0 = sel.get_selected_particles()[0]
2296  ### p1 = sels1[i].get_selected_particles()[0]
2297  ### m0 = IMP.pmi.tools.get_molecules([p0])[0]
2298  ### m1 = IMP.pmi.tools.get_molecules([p1])[0]
2299  ### c0 = IMP.atom.Copy(m0).get_copy_index()
2300  ### c1 = IMP.atom.Copy(m1).get_copy_index()
2301  ### name0=m0.get_name()
2302  ### name1=m1.get_name()
2303  ### print("WWW",name0,name1,c0,c1)
2304  ###print(best_order,best_rmsd2,m0,m1)
2305  return best_sel, best_rmsd2
2306 
2307  def compute_all_pairwise_rmsd(self):
2308  for d0 in self.stath0:
2309  for d1 in self.stath1:
2310  rmsd, _ = self.rmsd()
2311 
2312  def rmsd(self,metric=IMP.atom.get_rmsd):
2313  '''
2314  Computes the RMSD. Resolves ambiguous pairs assignments
2315  '''
2316  # here we memoize the rmsd and molecular assignment so that it's not done multiple times
2317  n0=self.stath0.current_index
2318  n1=self.stath1.current_index
2319  if ((n0,n1) in self.pairwise_rmsd) and ((n0,n1) in self.pairwise_molecular_assignment):
2320  return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2321 
2322  if self.alignment:
2323  self.align()
2324  #if it's not yet memoized
2325  total_rmsd=0.0
2326  total_N=0
2327  # this is a dictionary which keys are the molecule names, and values are the list of IMP.atom.Selection for all molecules that share the molecule name
2328  seldict_best_order={}
2329  molecular_assignment={}
2330  for molname, sels0 in self.seldict0.items():
2331  sels_best_order, best_rmsd2 = self.rmsd_helper(sels0, self.seldict1[molname], metric)
2332 
2333  Ncoords = len(sels_best_order[0].get_selected_particles())
2334  Ncopies = len(self.seldict1[molname])
2335  total_rmsd += Ncoords*best_rmsd2
2336  total_N += Ncoords*Ncopies
2337 
2338  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2339  p0 = sel0.get_selected_particles()[0]
2340  p1 = sel1.get_selected_particles()[0]
2341  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2342  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2343  c0 = IMP.atom.Copy(m0).get_copy_index()
2344  c1 = IMP.atom.Copy(m1).get_copy_index()
2345  ###print(molname,c0,c1)
2346  molecular_assignment[(molname,c0)]=(molname,c1)
2347 
2348  total_rmsd = math.sqrt(total_rmsd/total_N)
2349 
2350  self.pairwise_rmsd[(n0,n1)]=total_rmsd
2351  self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2352  self.pairwise_rmsd[(n1,n0)]=total_rmsd
2353  self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2354  ###print(n0,n1,molecular_assignment)
2355  return total_rmsd, molecular_assignment
2356 
2357  def set_reference(self,reference,cluster):
2358  """
2359  Fix the reference structure for structural alignment, rmsd and chain assignemnt
2360  @param reference can be either "Absolute" (cluster center of the first cluster)
2361  or Relative (cluster center of the current cluster)
2362  """
2363  if reference=="Absolute":
2364  d0=self.stath0[0]
2365  elif reference=="Relative":
2366  if cluster.center_index:
2367  n0=cluster.center_index
2368  else:
2369  n0=cluster.members[0]
2370  d0=self.stath0[n0]
2371 
2373  """
2374  compute the molecular assignemnts between multiple copies
2375  of the same sequence. It changes the Copy index of Molecules
2376  """
2377  d1=self.stath1[n1]
2378  _, molecular_assignment = self.rmsd()
2379  for (m0, c0), (m1,c1) in molecular_assignment.items():
2380  mol0 = self.molcopydict0[m0][c0]
2381  mol1 = self.molcopydict1[m1][c1]
2382  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2383  p1=IMP.atom.Copy(mol1).get_particle()
2384  p1.set_value(cik0,c0)
2385 
2387  """
2388  Undo the Copy index assignment
2389  """
2390  d1=self.stath1[n1]
2391  _, molecular_assignment = self.rmsd()
2392  mols_newcopys = []
2393  for (m0, c0), (m1,c1) in molecular_assignment.items():
2394  mol0 = self.molcopydict0[m0][c0]
2395  mol1 = self.molcopydict1[m1][c1]
2396  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2397  p1=IMP.atom.Copy(mol1).get_particle()
2398  p1.set_value(cik0,c1)
2399 
2400  ####################
2401  # Container Functions
2402  ####################
2403 
2404  def __repr__(self):
2405  s= "AnalysisReplicaExchange\n"
2406  s+="---- number of clusters %s \n"%str(len(self.clusters))
2407  s+="---- number of models %s \n"%str(len(self.stath0))
2408  return s
2409 
2410  def __getitem__(self,int_slice_adaptor):
2411  if isinstance(int_slice_adaptor, int):
2412  return self.clusters[int_slice_adaptor]
2413  elif isinstance(int_slice_adaptor, slice):
2414  return self.__iter__(int_slice_adaptor)
2415  else:
2416  raise TypeError("Unknown Type")
2417 
2418  def __len__(self):
2419  return len(self.clusters)
2420 
2421  def __iter__(self,slice_key=None):
2422  if slice_key is None:
2423  for i in range(len(self)):
2424  yield self[i]
2425  else:
2426  for i in range(len(self))[slice_key]:
2427  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2312
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2357
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1657
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1115
def precision
Compute the precision of a cluster.
Definition: macros.py:1752
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:794
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:103
A container for models organized into clusters.
Definition: output.py:1452
Sample using molecular dynamics.
Definition: samplers.py:389
def aggregate
initial filling of the clusters.
Definition: macros.py:2192
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:904
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:616
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:617
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1718
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1799
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1206
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1514
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1595
def __init__
Construction of the Class.
Definition: macros.py:1475
void handle_use_deprecated(std::string message)
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:711
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1612
def __init__
Constructor.
Definition: macros.py:109
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2386
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1530
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2264
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1696
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:939
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2372
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1465
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:549
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2217
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:847
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1679
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:856
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def update_seldicts
Update the seldicts.
Definition: macros.py:2162
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2151
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:595
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1559
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:198
Collect timing information.
Definition: tools.py:117
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1523
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9852
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2256
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:597
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1216
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1605
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:720
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1781
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:575
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1546
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1645
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2241
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
Definition: macros.py:57
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1183
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1620
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:553
Sample using replica exchange.
Definition: samplers.py:496
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27