IMP logo
IMP Reference Guide  2.11.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
8 import IMP.pmi.tools
9 import IMP.pmi.samplers
10 import IMP.pmi.output
11 import IMP.pmi.analysis
12 import IMP.pmi.io
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import RMF
17 import os
18 import glob
19 from operator import itemgetter
20 from collections import defaultdict
21 import numpy as np
22 import string
23 import itertools
24 import warnings
25 import math
26 import warnings
27 
28 class _RMFRestraints(object):
29  """All restraints that are written out to the RMF file"""
30  def __init__(self, model, user_restraints):
31  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
32  self._user_restraints = user_restraints if user_restraints else []
33 
34  def __len__(self):
35  return (len(self._user_restraints)
36  + self._rmf_rs.get_number_of_restraints())
37 
38  def __bool__(self):
39  return len(self) > 0
40  __nonzero__ = __bool__ # Python 2 compatibility
41 
42  def __getitem__(self, i):
43  class FakePMIWrapper(object):
44  def __init__(self, r):
45  self.r = IMP.RestraintSet.get_from(r)
46  get_restraint = lambda self: self.r
47  lenuser = len(self._user_restraints)
48  if 0 <= i < lenuser:
49  return self._user_restraints[i]
50  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
51  r = self._rmf_rs.get_restraint(i - lenuser)
52  return FakePMIWrapper(r)
53  else:
54  raise IndexError("Out of range")
55 
56 
57 class ReplicaExchange0(object):
58  """A macro to help setup and run replica exchange.
59  Supports Monte Carlo and molecular dynamics.
60  Produces trajectory RMF files, best PDB structures,
61  and output stat files.
62  """
63  def __init__(self, model,
64  representation=None,
65  root_hier=None,
66  sample_objects=None, # DEPRECATED
67  monte_carlo_sample_objects=None,
68  molecular_dynamics_sample_objects=None,
69  output_objects=[],
70  rmf_output_objects=None,
71  crosslink_restraints=None,
72  monte_carlo_temperature=1.0,
73  simulated_annealing=False,
74  simulated_annealing_minimum_temperature=1.0,
75  simulated_annealing_maximum_temperature=2.5,
76  simulated_annealing_minimum_temperature_nframes=100,
77  simulated_annealing_maximum_temperature_nframes=100,
78  replica_exchange_minimum_temperature=1.0,
79  replica_exchange_maximum_temperature=2.5,
80  replica_exchange_swap=True,
81  num_sample_rounds=1,
82  number_of_best_scoring_models=500,
83  monte_carlo_steps=10,
84  self_adaptive=False,
85  molecular_dynamics_steps=10,
86  molecular_dynamics_max_time_step=1.0,
87  number_of_frames=1000,
88  save_coordinates_mode="lowest_temperature",
89  nframes_write_coordinates=1,
90  write_initial_rmf=True,
91  initial_rmf_name_suffix="initial",
92  stat_file_name_suffix="stat",
93  best_pdb_name_suffix="model",
94  do_clean_first=True,
95  do_create_directories=True,
96  global_output_directory="./",
97  rmf_dir="rmfs/",
98  best_pdb_dir="pdbs/",
99  replica_stat_file_suffix="stat_replica",
100  em_object_for_rmf=None,
101  atomistic=False,
102  replica_exchange_object=None,
103  test_mode=False):
104  """Constructor.
105  @param model The IMP model
106  @param representation PMI.representation.Representation object
107  (or list of them, for multi-state modeling)
108  @param root_hier Instead of passing Representation, pass the System hierarchy
109  @param monte_carlo_sample_objects Objects for MC sampling; a list of
110  structural components (generally the representation) that will
111  be moved and restraints with parameters that need to
112  be sampled.
113  For PMI2: just pass flat list of movers
114  @param molecular_dynamics_sample_objects Objects for MD sampling
115  For PMI2: just pass flat list of particles
116  @param output_objects A list of structural objects and restraints
117  that will be included in output (ie, statistics "stat" files). Any object
118  that provides a get_output() method can be used here. If None is passed
119  the macro will not write stat files.
120  @param rmf_output_objects A list of structural objects and restraints
121  that will be included in rmf. Any object
122  that provides a get_output() method can be used here.
123  @param crosslink_restraints List of cross-link restraints that will
124  be included in output RMF files (for visualization).
125  @param monte_carlo_temperature MC temp (may need to be optimized
126  based on post-sampling analysis)
127  @param simulated_annealing If True, perform simulated annealing
128  @param simulated_annealing_minimum_temperature Should generally be
129  the same as monte_carlo_temperature.
130  @param simulated_annealing_minimum_temperature_nframes Number of
131  frames to compute at minimum temperature.
132  @param simulated_annealing_maximum_temperature_nframes Number of
133  frames to compute at
134  temps > simulated_annealing_maximum_temperature.
135  @param replica_exchange_minimum_temperature Low temp for REX; should
136  generally be the same as monte_carlo_temperature.
137  @param replica_exchange_maximum_temperature High temp for REX
138  @param replica_exchange_swap Boolean, enable disable temperature
139  swap (Default=True)
140  @param num_sample_rounds Number of rounds of MC/MD per cycle
141  @param number_of_best_scoring_models Number of top-scoring PDB models
142  to keep around for analysis
143  @param monte_carlo_steps Number of MC steps per round
144  @param self_adaptive self adaptive scheme for monte carlo movers
145  @param molecular_dynamics_steps Number of MD steps per round
146  @param molecular_dynamics_max_time_step Max time step for MD
147  @param number_of_frames Number of REX frames to run
148  @param save_coordinates_mode string: how to save coordinates.
149  "lowest_temperature" (default) only the lowest temperatures is saved
150  "25th_score" all replicas whose score is below the 25th percentile
151  "50th_score" all replicas whose score is below the 50th percentile
152  "75th_score" all replicas whose score is below the 75th percentile
153  @param nframes_write_coordinates How often to write the coordinates
154  of a frame
155  @param write_initial_rmf Write the initial configuration
156  @param global_output_directory Folder that will be created to house
157  output.
158  @param test_mode Set to True to avoid writing any files, just test one frame.
159  """
160  self.model = model
161  self.vars = {}
162  self.pmi2 = False
163 
164  ### add check hierarchy is multistate
165  self.output_objects = output_objects
166  self.rmf_output_objects=rmf_output_objects
167  self.representation = representation
168  if representation:
169  if isinstance(representation, list):
170  self.is_multi_state = True
171  self.root_hiers = [r.prot for r in representation]
172  self.vars["number_of_states"] = len(representation)
173  else:
174  self.is_multi_state = False
175  self.root_hier = representation.prot
176  self.vars["number_of_states"] = 1
177  elif root_hier and isinstance(root_hier, IMP.atom.Hierarchy) \
178  and root_hier.get_name() == 'System':
179  self.pmi2 = True
180  if self.output_objects is not None:
181  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
182  if self.rmf_output_objects is not None:
183  self.rmf_output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
184  self.root_hier = root_hier
185  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
186  self.vars["number_of_states"] = len(states)
187  if len(states)>1:
188  self.root_hiers = states
189  self.is_multi_state = True
190  else:
191  self.root_hier = root_hier
192  self.is_multi_state = False
193  else:
194  raise Exception("Must provide representation or System hierarchy (root_hier)")
195 
196  self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
197  self.em_object_for_rmf = em_object_for_rmf
198  self.monte_carlo_sample_objects = monte_carlo_sample_objects
199  self.vars["self_adaptive"]=self_adaptive
200  if sample_objects is not None:
201  self.monte_carlo_sample_objects+=sample_objects
202  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
203  self.replica_exchange_object = replica_exchange_object
204  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
205  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
206  self.vars[
207  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
208  self.vars[
209  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
210  self.vars["replica_exchange_swap"] = replica_exchange_swap
211  self.vars["simulated_annealing"]=\
212  simulated_annealing
213  self.vars["simulated_annealing_minimum_temperature"]=\
214  simulated_annealing_minimum_temperature
215  self.vars["simulated_annealing_maximum_temperature"]=\
216  simulated_annealing_maximum_temperature
217  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
218  simulated_annealing_minimum_temperature_nframes
219  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
220  simulated_annealing_maximum_temperature_nframes
221 
222  self.vars["num_sample_rounds"] = num_sample_rounds
223  self.vars[
224  "number_of_best_scoring_models"] = number_of_best_scoring_models
225  self.vars["monte_carlo_steps"] = monte_carlo_steps
226  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
227  self.vars["number_of_frames"] = number_of_frames
228  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
229  raise Exception("save_coordinates_mode has unrecognized value")
230  else:
231  self.vars["save_coordinates_mode"] = save_coordinates_mode
232  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
233  self.vars["write_initial_rmf"] = write_initial_rmf
234  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
235  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
236  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
237  self.vars["do_clean_first"] = do_clean_first
238  self.vars["do_create_directories"] = do_create_directories
239  self.vars["global_output_directory"] = global_output_directory
240  self.vars["rmf_dir"] = rmf_dir
241  self.vars["best_pdb_dir"] = best_pdb_dir
242  self.vars["atomistic"] = atomistic
243  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
244  self.vars["geometries"] = None
245  self.test_mode = test_mode
246 
247  def add_geometries(self, geometries):
248  if self.vars["geometries"] is None:
249  self.vars["geometries"] = list(geometries)
250  else:
251  self.vars["geometries"].extend(geometries)
252 
253  def show_info(self):
254  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
255  print("--- it stores the best scoring pdb models in pdbs/")
256  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
257  print("--- variables:")
258  keys = list(self.vars.keys())
259  keys.sort()
260  for v in keys:
261  print("------", v.ljust(30), self.vars[v])
262 
263  def get_replica_exchange_object(self):
264  return self.replica_exchange_object
265 
266  def _add_provenance(self, sampler_md, sampler_mc):
267  """Record details about the sampling in the IMP Hierarchies"""
268  if not self.is_multi_state or self.pmi2:
269  output_hierarchies = [self.root_hier]
270  else:
271  output_hierarchies = self.root_hiers
272 
273  iterations = 0
274  if sampler_md:
275  method = "Molecular Dynamics"
276  iterations += self.vars["molecular_dynamics_steps"]
277  if sampler_mc:
278  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
279  iterations += self.vars["monte_carlo_steps"]
280  # If no sampling is actually done, no provenance to write
281  if iterations == 0 or self.vars["number_of_frames"] == 0:
282  return
283  iterations *= self.vars["num_sample_rounds"]
284 
285  for h in output_hierarchies:
286  pi = self.model.add_particle("sampling")
288  self.model, pi, method, self.vars["number_of_frames"],
289  iterations)
290  p.set_number_of_replicas(
291  self.replica_exchange_object.get_number_of_replicas())
292  IMP.pmi.tools._add_pmi_provenance(h)
293  IMP.core.add_provenance(self.model, h, p)
294 
295  def execute_macro(self):
296  temp_index_factor = 100000.0
297  samplers=[]
298  sampler_mc=None
299  sampler_md=None
300  if self.monte_carlo_sample_objects is not None:
301  print("Setting up MonteCarlo")
302  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
303  self.monte_carlo_sample_objects,
304  self.vars["monte_carlo_temperature"])
305  if self.vars["simulated_annealing"]:
306  tmin=self.vars["simulated_annealing_minimum_temperature"]
307  tmax=self.vars["simulated_annealing_maximum_temperature"]
308  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
309  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
310  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
311  if self.vars["self_adaptive"]:
312  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
313  if self.output_objects is not None:
314  self.output_objects.append(sampler_mc)
315  if self.rmf_output_objects is not None:
316  self.rmf_output_objects.append(sampler_mc)
317  samplers.append(sampler_mc)
318 
319 
320  if self.molecular_dynamics_sample_objects is not None:
321  print("Setting up MolecularDynamics")
322  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
323  self.molecular_dynamics_sample_objects,
324  self.vars["monte_carlo_temperature"],
325  maximum_time_step=self.molecular_dynamics_max_time_step)
326  if self.vars["simulated_annealing"]:
327  tmin=self.vars["simulated_annealing_minimum_temperature"]
328  tmax=self.vars["simulated_annealing_maximum_temperature"]
329  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
330  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
331  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
332  if self.output_objects is not None:
333  self.output_objects.append(sampler_md)
334  if self.rmf_output_objects is not None:
335  self.rmf_output_objects.append(sampler_md)
336  samplers.append(sampler_md)
337 # -------------------------------------------------------------------------
338 
339  print("Setting up ReplicaExchange")
340  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
341  self.vars[
342  "replica_exchange_minimum_temperature"],
343  self.vars[
344  "replica_exchange_maximum_temperature"],
345  samplers,
346  replica_exchange_object=self.replica_exchange_object)
347  self.replica_exchange_object = rex.rem
348 
349  myindex = rex.get_my_index()
350  if self.output_objects is not None:
351  self.output_objects.append(rex)
352  if self.rmf_output_objects is not None:
353  self.rmf_output_objects.append(rex)
354  # must reset the minimum temperature due to the
355  # different binary length of rem.get_my_parameter double and python
356  # float
357  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
358 
359 # -------------------------------------------------------------------------
360 
361  globaldir = self.vars["global_output_directory"] + "/"
362  rmf_dir = globaldir + self.vars["rmf_dir"]
363  pdb_dir = globaldir + self.vars["best_pdb_dir"]
364 
365  if not self.test_mode:
366  if self.vars["do_clean_first"]:
367  pass
368 
369  if self.vars["do_create_directories"]:
370 
371  try:
372  os.makedirs(globaldir)
373  except:
374  pass
375  try:
376  os.makedirs(rmf_dir)
377  except:
378  pass
379 
380  if not self.is_multi_state:
381  try:
382  os.makedirs(pdb_dir)
383  except:
384  pass
385  else:
386  for n in range(self.vars["number_of_states"]):
387  try:
388  os.makedirs(pdb_dir + "/" + str(n))
389  except:
390  pass
391 
392 # -------------------------------------------------------------------------
393 
395  if self.output_objects is not None:
396  self.output_objects.append(sw)
397  if self.rmf_output_objects is not None:
398  self.rmf_output_objects.append(sw)
399 
400  print("Setting up stat file")
401  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
402  low_temp_stat_file = globaldir + \
403  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
404 
405  # Ensure model is updated before saving init files
406  if not self.test_mode:
407  self.model.update()
408 
409  if not self.test_mode:
410  if self.output_objects is not None:
411  output.init_stat2(low_temp_stat_file,
412  self.output_objects,
413  extralabels=["rmf_file", "rmf_frame_index"])
414  else:
415  print("Stat file writing is disabled")
416 
417  if self.rmf_output_objects is not None:
418  print("Stat info being written in the rmf file")
419 
420  print("Setting up replica stat file")
421  replica_stat_file = globaldir + \
422  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
423  if not self.test_mode:
424  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
425 
426  if not self.test_mode:
427  print("Setting up best pdb files")
428  if not self.is_multi_state:
429  if self.vars["number_of_best_scoring_models"] > 0:
430  output.init_pdb_best_scoring(pdb_dir + "/" +
431  self.vars["best_pdb_name_suffix"],
432  self.root_hier,
433  self.vars[
434  "number_of_best_scoring_models"],
435  replica_exchange=True)
436  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
437  self.vars["best_pdb_name_suffix"]+".0.pdb")
438  else:
439  if self.vars["number_of_best_scoring_models"] > 0:
440  for n in range(self.vars["number_of_states"]):
441  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
442  self.vars["best_pdb_name_suffix"],
443  self.root_hiers[n],
444  self.vars[
445  "number_of_best_scoring_models"],
446  replica_exchange=True)
447  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
448  self.vars["best_pdb_name_suffix"]+".0.pdb")
449 # ---------------------------------------------
450 
451  if self.em_object_for_rmf is not None:
452  if not self.is_multi_state or self.pmi2:
453  output_hierarchies = [
454  self.root_hier,
455  self.em_object_for_rmf.get_density_as_hierarchy(
456  )]
457  else:
458  output_hierarchies = self.root_hiers
459  output_hierarchies.append(
460  self.em_object_for_rmf.get_density_as_hierarchy())
461  else:
462  if not self.is_multi_state or self.pmi2:
463  output_hierarchies = [self.root_hier]
464  else:
465  output_hierarchies = self.root_hiers
466 
467 #----------------------------------------------
468  if not self.test_mode:
469  print("Setting up and writing initial rmf coordinate file")
470  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
471  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
472  output_hierarchies,listofobjects=self.rmf_output_objects)
473  if self._rmf_restraints:
474  output.add_restraints_to_rmf(
475  init_suffix + "." + str(myindex) + ".rmf3",
476  self._rmf_restraints)
477  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
478  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
479 
480 #----------------------------------------------
481 
482  if not self.test_mode:
483  mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
484 
485 #----------------------------------------------
486 
487  self._add_provenance(sampler_md, sampler_mc)
488 
489  if not self.test_mode:
490  print("Setting up production rmf files")
491  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
492  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"],
493  listofobjects = self.rmf_output_objects)
494 
495  if self._rmf_restraints:
496  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
497 
498  ntimes_at_low_temp = 0
499 
500  if myindex == 0:
501  self.show_info()
502  self.replica_exchange_object.set_was_used(True)
503  nframes = self.vars["number_of_frames"]
504  if self.test_mode:
505  nframes = 1
506  for i in range(nframes):
507  if self.test_mode:
508  score = 0.
509  else:
510  for nr in range(self.vars["num_sample_rounds"]):
511  if sampler_md is not None:
512  sampler_md.optimize(
513  self.vars["molecular_dynamics_steps"])
514  if sampler_mc is not None:
515  sampler_mc.optimize(self.vars["monte_carlo_steps"])
517  self.model).evaluate(False)
518  mpivs.set_value("score",score)
519  output.set_output_entry("score", score)
520 
521 
522 
523  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
524 
525  if self.vars["save_coordinates_mode"] == "lowest_temperature":
526  save_frame=(min_temp_index == my_temp_index)
527  elif self.vars["save_coordinates_mode"] == "25th_score":
528  score_perc=mpivs.get_percentile("score")
529  save_frame=(score_perc*100.0<=25.0)
530  elif self.vars["save_coordinates_mode"] == "50th_score":
531  score_perc=mpivs.get_percentile("score")
532  save_frame=(score_perc*100.0<=50.0)
533  elif self.vars["save_coordinates_mode"] == "75th_score":
534  score_perc=mpivs.get_percentile("score")
535  save_frame=(score_perc*100.0<=75.0)
536 
537  # Ensure model is updated before saving output files
538  if save_frame or not self.test_mode:
539  self.model.update()
540 
541  if save_frame:
542  print("--- frame %s score %s " % (str(i), str(score)))
543 
544  if not self.test_mode:
545  if i % self.vars["nframes_write_coordinates"]==0:
546  print('--- writing coordinates')
547  if self.vars["number_of_best_scoring_models"] > 0:
548  output.write_pdb_best_scoring(score)
549  output.write_rmf(rmfname)
550  output.set_output_entry("rmf_file", rmfname)
551  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
552  else:
553  output.set_output_entry("rmf_file", rmfname)
554  output.set_output_entry("rmf_frame_index", '-1')
555  if self.output_objects is not None:
556  output.write_stat2(low_temp_stat_file)
557  ntimes_at_low_temp += 1
558 
559  if not self.test_mode:
560  output.write_stat2(replica_stat_file)
561  if self.vars["replica_exchange_swap"]:
562  rex.swap_temp(i, score)
563  for p, state in IMP.pmi.tools._all_protocol_outputs(
564  [self.representation],
565  self.root_hier if self.pmi2 else None):
566  p.add_replica_exchange(state, self)
567 
568  if not self.test_mode:
569  print("closing production rmf files")
570  output.close_rmf(rmfname)
571 
572 
573 
574 # ----------------------------------------------------------------------
575 class BuildSystem(object):
576  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
577  Easily create multi-state systems by calling this macro
578  repeatedly with different TopologyReader objects!
579  A useful function is get_molecules() which returns the PMI Molecules grouped by state
580  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
581  Quick multi-state system:
582  @code{.python}
583  model = IMP.Model()
584  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
585  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
586  bs = IMP.pmi.macros.BuildSystem(model)
587  bs.add_state(reader1)
588  bs.add_state(reader2)
589  bs.execute_macro() # build everything including degrees of freedom
590  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
591  ### now you have a two state system, you add restraints etc
592  @endcode
593  @note The "domain name" entry of the topology reader is not used.
594  All molecules are set up by the component name, but split into rigid bodies
595  as requested.
596  """
597  def __init__(self,
598  model,
599  sequence_connectivity_scale=4.0,
600  force_create_gmm_files=False,
601  resolutions=[1,10]):
602  """Constructor
603  @param model An IMP Model
604  @param sequence_connectivity_scale For scaling the connectivity restraint
605  @param force_create_gmm_files If True, will sample and create GMMs
606  no matter what. If False, will only sample if the
607  files don't exist. If number of Gaussians is zero, won't
608  do anything.
609  @param resolutions The resolutions to build for structured regions
610  """
611  self.model = model
612  self.system = IMP.pmi.topology.System(self.model)
613  self._readers = [] # the TopologyReaders (one per state)
614  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
615  self._domains = [] # key = domain unique name, value = Component
616  self.force_create_gmm_files = force_create_gmm_files
617  self.resolutions = resolutions
618 
619  def add_state(self,
620  reader,
621  keep_chain_id=False, fasta_name_map=None):
622  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
623  When you are done adding states, call execute_macro()
624  @param reader The TopologyReader object
625  @param fasta_name_map dictionary for converting protein names found in the fasta file
626  """
627  state = self.system.create_state()
628  self._readers.append(reader)
629  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
630  these_domains = {} # key is unique name, value is _Component
631  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
632  numchain = 0
633 
634  ### setup representation
635  # loop over molecules, copies, then domains
636  for molname in reader.get_molecules():
637  copies = reader.get_molecules()[molname].domains
638  for nc,copyname in enumerate(copies):
639  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
640  copy = copies[copyname]
641  # option to not rename chains
642  if keep_chain_id == True:
643  chain_id = copy[0].chain
644  else:
645  chain_id = chain_ids[numchain]
646  if nc==0:
647  is_nucleic=False
648  fasta_flag=copy[0].fasta_flag
649  if fasta_flag == "RNA" or fasta_flag == "DNA": is_nucleic=True
650  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
651  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
652  orig_mol = state.create_molecule(molname,
653  seq,
654  chain_id,is_nucleic)
655  mol = orig_mol
656  numchain+=1
657  else:
658  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
659  mol = orig_mol.create_copy(chain_id)
660  numchain+=1
661 
662  for domainnumber,domain in enumerate(copy):
663  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
664  # we build everything in the residue range, even if it
665  # extends beyond what's in the actual PDB file
666  these_domains[domain.get_unique_name()] = domain
667  if domain.residue_range==[] or domain.residue_range is None:
668  domain_res = mol.get_residues()
669  else:
670  start = domain.residue_range[0]+domain.pdb_offset
671  if domain.residue_range[1]=='END':
672  end = len(mol.sequence)
673  else:
674  end = domain.residue_range[1]+domain.pdb_offset
675  domain_res = mol.residue_range(start-1,end-1)
676  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
677  if domain.pdb_file=="BEADS":
678  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
679  mol.add_representation(
680  domain_res,
681  resolutions=[domain.bead_size],
682  setup_particles_as_densities=(
683  domain.em_residues_per_gaussian!=0),
684  color = domain.color)
685  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
686  elif domain.pdb_file=="IDEAL_HELIX":
687  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
688  mol.add_representation(
689  domain_res,
690  resolutions=self.resolutions,
691  ideal_helix=True,
692  density_residues_per_component=domain.em_residues_per_gaussian,
693  density_prefix=domain.density_prefix,
694  density_force_compute=self.force_create_gmm_files,
695  color = domain.color)
696  these_domain_res[domain.get_unique_name()] = (domain_res,set())
697  else:
698  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
699  domain_atomic = mol.add_structure(domain.pdb_file,
700  domain.chain,
701  domain.residue_range,
702  domain.pdb_offset,
703  soft_check=True)
704  domain_non_atomic = domain_res - domain_atomic
705  if not domain.em_residues_per_gaussian:
706  mol.add_representation(domain_atomic,
707  resolutions=self.resolutions,
708  color = domain.color)
709  if len(domain_non_atomic)>0:
710  mol.add_representation(domain_non_atomic,
711  resolutions=[domain.bead_size],
712  color = domain.color)
713  else:
714  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
715  mol.add_representation(
716  domain_atomic,
717  resolutions=self.resolutions,
718  density_residues_per_component=domain.em_residues_per_gaussian,
719  density_prefix=domain.density_prefix,
720  density_force_compute=self.force_create_gmm_files,
721  color = domain.color)
722  if len(domain_non_atomic)>0:
723  mol.add_representation(domain_non_atomic,
724  resolutions=[domain.bead_size],
725  setup_particles_as_densities=True,
726  color = domain.color)
727  these_domain_res[domain.get_unique_name()] = (
728  domain_atomic,domain_non_atomic)
729  self._domain_res.append(these_domain_res)
730  self._domains.append(these_domains)
731  print('BuildSystem.add_state: State',len(self.system.states),'added')
732 
733  def get_molecules(self):
734  """Return list of all molecules grouped by state.
735  For each state, it's a dictionary of Molecules where key is the molecule name
736  """
737  return [s.get_molecules() for s in self.system.get_states()]
738 
739  def get_molecule(self,molname,copy_index=0,state_index=0):
740  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
741 
742  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
743  """Builds representations and sets up degrees of freedom"""
744  print("BuildSystem.execute_macro: building representations")
745  self.root_hier = self.system.build()
746 
747  print("BuildSystem.execute_macro: setting up degrees of freedom")
748  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
749  for nstate,reader in enumerate(self._readers):
750  rbs = reader.get_rigid_bodies()
751  srbs = reader.get_super_rigid_bodies()
752  csrbs = reader.get_chains_of_super_rigid_bodies()
753 
754  # add rigid bodies
755  domains_in_rbs = set()
756  for rblist in rbs:
757  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
758  all_res = IMP.pmi.tools.OrderedSet()
759  bead_res = IMP.pmi.tools.OrderedSet()
760  for dname in rblist:
761  domain = self._domains[nstate][dname]
762  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
763  all_res|=self._domain_res[nstate][dname][0]
764  bead_res|=self._domain_res[nstate][dname][1]
765  domains_in_rbs.add(dname)
766  all_res|=bead_res
767  print("BuildSystem.execute_macro: -------- \
768 creating rigid body with max_trans %s \
769 max_rot %s non_rigid_max_trans %s" \
770  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
771  self.dof.create_rigid_body(all_res,
772  nonrigid_parts=bead_res,
773  max_trans=max_rb_trans,
774  max_rot=max_rb_rot,
775  nonrigid_max_trans=max_bead_trans)
776 
777  # if you have any domains not in an RB, set them as flexible beads
778  for dname, domain in self._domains[nstate].items():
779  if dname not in domains_in_rbs:
780  if domain.pdb_file != "BEADS":
781  warnings.warn(
782  "Making %s flexible. This may distort the "
783  "structure; consider making it rigid" % dname,
785  self.dof.create_flexible_beads(
786  self._domain_res[nstate][dname][1],
787  max_trans=max_bead_trans)
788 
789  # add super rigid bodies
790  for srblist in srbs:
791  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
792  all_res = IMP.pmi.tools.OrderedSet()
793  for dname in srblist:
794  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
795  all_res|=self._domain_res[nstate][dname][0]
796  all_res|=self._domain_res[nstate][dname][1]
797 
798  print("BuildSystem.execute_macro: -------- \
799 creating super rigid body with max_trans %s max_rot %s " \
800  % (str(max_srb_trans),str(max_srb_rot)))
801  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
802 
803  # add chains of super rigid bodies
804  for csrblist in csrbs:
805  all_res = IMP.pmi.tools.OrderedSet()
806  for dname in csrblist:
807  all_res|=self._domain_res[nstate][dname][0]
808  all_res|=self._domain_res[nstate][dname][1]
809  all_res = list(all_res)
810  all_res.sort(key=lambda r:r.get_index())
811  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
812  self.dof.create_main_chain_mover(all_res)
813  return self.root_hier,self.dof
814 
815 
816 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
817 class AnalysisReplicaExchange0(object):
818  """A macro for running all the basic operations of analysis.
819  Includes clustering, precision analysis, and making ensemble density maps.
820  A number of plots are also supported.
821  """
822  def __init__(self, model,
823  merge_directories=["./"],
824  stat_file_name_suffix="stat",
825  best_pdb_name_suffix="model",
826  do_clean_first=True,
827  do_create_directories=True,
828  global_output_directory="output/",
829  replica_stat_file_suffix="stat_replica",
830  global_analysis_result_directory="./analysis/",
831  test_mode=False):
832  """Constructor.
833  @param model The IMP model
834  @param stat_file_name_suffix
835  @param merge_directories The directories containing output files
836  @param best_pdb_name_suffix
837  @param do_clean_first
838  @param do_create_directories
839  @param global_output_directory Where everything is
840  @param replica_stat_file_suffix
841  @param global_analysis_result_directory
842  @param test_mode If True, nothing is changed on disk
843  """
844 
845  try:
846  from mpi4py import MPI
847  self.comm = MPI.COMM_WORLD
848  self.rank = self.comm.Get_rank()
849  self.number_of_processes = self.comm.size
850  except ImportError:
851  self.rank = 0
852  self.number_of_processes = 1
853 
854  self.test_mode = test_mode
855  self._protocol_output = []
856  self.cluster_obj = None
857  self.model = model
858  stat_dir = global_output_directory
859  self.stat_files = []
860  # it contains the position of the root directories
861  for rd in merge_directories:
862  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
863  if len(stat_files)==0:
864  warnings.warn("no stat files found in %s"
865  % os.path.join(rd, stat_dir),
867  self.stat_files += stat_files
868 
869  def add_protocol_output(self, p):
870  """Capture details of the modeling protocol.
871  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
872  """
873  # Assume last state is the one we're interested in
874  self._protocol_output.append((p, p._last_state))
875 
876  def get_modeling_trajectory(self,
877  score_key="SimplifiedModel_Total_Score_None",
878  rmf_file_key="rmf_file",
879  rmf_file_frame_key="rmf_frame_index",
880  outputdir="./",
881  get_every=1,
882  nframes_trajectory=10000):
883  """ Get a trajectory of the modeling run, for generating demonstrative movies
884  @param score_key The score for ranking models
885  @param rmf_file_key Key pointing to RMF filename
886  @param rmf_file_frame_key Key pointing to RMF frame number
887  @param outputdir The local output directory used in the run
888  @param get_every Extract every nth frame
889  @param nframes_trajectory Total number of frames of the trajectory
890  """
891  from operator import itemgetter
892  import math
893 
894  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
895  score_key,
896  rmf_file_key,
897  rmf_file_frame_key,
898  get_every)
899  rmf_file_list=trajectory_models[0]
900  rmf_file_frame_list=trajectory_models[1]
901  score_list=list(map(float, trajectory_models[2]))
902 
903  max_score=max(score_list)
904  min_score=min(score_list)
905 
906 
907  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
908  binned_scores=[None]*nframes_trajectory
909  binned_model_indexes=[-1]*nframes_trajectory
910 
911  for model_index,s in enumerate(score_list):
912  bins_score_diffs=[abs(s-b) for b in bins]
913  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
914  if binned_scores[bin_index]==None:
915  binned_scores[bin_index]=s
916  binned_model_indexes[bin_index]=model_index
917  else:
918  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
919  new_diff=abs(s-bins[bin_index])
920  if new_diff < old_diff:
921  binned_scores[bin_index]=s
922  binned_model_indexes[bin_index]=model_index
923 
924  print(binned_scores)
925  print(binned_model_indexes)
926 
927 
928  def _expand_ambiguity(self,prot,d):
929  """If using PMI2, expand the dictionary to include copies as ambiguous options
930  This also keeps the states separate.
931  """
932  newdict = {}
933  for key in d:
934  val = d[key]
935  if '..' in key or (isinstance(val, tuple) and len(val) >= 3):
936  newdict[key] = val
937  continue
938  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
939  if isinstance(val, tuple):
940  start = val[0]
941  stop = val[1]
942  name = val[2]
943  else:
944  start = 1
945  stop = -1
946  name = val
947  for nst in range(len(states)):
948  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
949  copies = sel.get_selected_particles(with_representation=False)
950  if len(copies)>1:
951  for nc in range(len(copies)):
952  if len(states)>1:
953  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
954  else:
955  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
956  else:
957  newdict[key] = val
958  return newdict
959 
960 
961  def clustering(self,
962  score_key="SimplifiedModel_Total_Score_None",
963  rmf_file_key="rmf_file",
964  rmf_file_frame_key="rmf_frame_index",
965  state_number=0,
966  prefiltervalue=None,
967  feature_keys=[],
968  outputdir="./",
969  alignment_components=None,
970  number_of_best_scoring_models=10,
971  rmsd_calculation_components=None,
972  distance_matrix_file='distances.mat',
973  load_distance_matrix_file=False,
974  skip_clustering=False,
975  number_of_clusters=1,
976  display_plot=False,
977  exit_after_display=True,
978  get_every=1,
979  first_and_last_frames=None,
980  density_custom_ranges=None,
981  write_pdb_with_centered_coordinates=False,
982  voxel_size=5.0):
983  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
984  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
985  Can pass None for copy or state to ignore that field.
986  If you don't pass a specific copy number
987  @param score_key The score for ranking models. Use "Total_Score"
988  instead of default for PMI2.
989  @param rmf_file_key Key pointing to RMF filename
990  @param rmf_file_frame_key Key pointing to RMF frame number
991  @param state_number State number to analyze
992  @param prefiltervalue Only include frames where the
993  score key is below this value
994  @param feature_keys Keywords for which you want to
995  calculate average, medians, etc.
996  If you pass "Keyname" it'll include everything that matches "*Keyname*"
997  @param outputdir The local output directory used in the run
998  @param alignment_components Dictionary with keys=groupname, values are tuples
999  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1000  @param number_of_best_scoring_models Num models to keep per run
1001  @param rmsd_calculation_components For calculating RMSD
1002  (same format as alignment_components)
1003  @param distance_matrix_file Where to store/read the distance matrix
1004  @param load_distance_matrix_file Try to load the distance matrix file
1005  @param skip_clustering Just extract the best scoring models
1006  and save the pdbs
1007  @param number_of_clusters Number of k-means clusters
1008  @param display_plot Display the distance matrix
1009  @param exit_after_display Exit after displaying distance matrix
1010  @param get_every Extract every nth frame
1011  @param first_and_last_frames A tuple with the first and last frames to be
1012  analyzed. Values are percentages!
1013  Default: get all frames
1014  @param density_custom_ranges For density calculation
1015  (same format as alignment_components)
1016  @param write_pdb_with_centered_coordinates
1017  @param voxel_size Used for the density output
1018  """
1019  # Track provenance information to be added to each output model
1020  prov = []
1021  self._outputdir = os.path.abspath(outputdir)
1022  self._number_of_clusters = number_of_clusters
1023  for p, state in self._protocol_output:
1024  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1025 
1026  if self.test_mode:
1027  return
1028 
1029  if self.rank==0:
1030  try:
1031  os.mkdir(outputdir)
1032  except:
1033  pass
1034 
1035  if not load_distance_matrix_file:
1036  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1037  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1038  self.stat_files,self.number_of_processes)[self.rank]
1039 
1040  # read ahead to check if you need the PMI2 score key instead
1041  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1042  orig_score_key = score_key
1043  if score_key not in po.get_keys():
1044  if 'Total_Score' in po.get_keys():
1045  score_key = 'Total_Score'
1046  warnings.warn(
1047  "Using 'Total_Score' instead of "
1048  "'SimplifiedModel_Total_Score_None' for the score key",
1050  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1051  if k in feature_keys:
1052  warnings.warn(
1053  "no need to pass " + k + " to feature_keys.",
1055  feature_keys.remove(k)
1056 
1057  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1058  score_key,
1059  feature_keys,
1060  rmf_file_key,
1061  rmf_file_frame_key,
1062  prefiltervalue,
1063  get_every, provenance=prov)
1064  rmf_file_list=best_models[0]
1065  rmf_file_frame_list=best_models[1]
1066  score_list=best_models[2]
1067  feature_keyword_list_dict=best_models[3]
1068 
1069 # ------------------------------------------------------------------------
1070 # collect all the files and scores
1071 # ------------------------------------------------------------------------
1072 
1073  if self.number_of_processes > 1:
1074  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1075  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1076  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1077  rmf_file_frame_list)
1078  for k in feature_keyword_list_dict:
1079  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1080  feature_keyword_list_dict[k])
1081 
1082  # sort by score and get the best scoring ones
1083  score_rmf_tuples = list(zip(score_list,
1084  rmf_file_list,
1085  rmf_file_frame_list,
1086  list(range(len(score_list)))))
1087 
1088  if density_custom_ranges:
1089  for k in density_custom_ranges:
1090  if not isinstance(density_custom_ranges[k], list):
1091  raise Exception("Density custom ranges: values must be lists of tuples")
1092 
1093  # keep subset of frames if requested
1094  if first_and_last_frames is not None:
1095  nframes = len(score_rmf_tuples)
1096  first_frame = int(first_and_last_frames[0] * nframes)
1097  last_frame = int(first_and_last_frames[1] * nframes)
1098  if last_frame > len(score_rmf_tuples):
1099  last_frame = -1
1100  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1101 
1102  # sort RMFs by the score_key in ascending order, and store the rank
1103  best_score_rmf_tuples = sorted(score_rmf_tuples,
1104  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1105  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1106  # Note in the provenance info that we only kept best-scoring models
1107  prov.append(IMP.pmi.io.FilterProvenance("Best scoring",
1108  0, number_of_best_scoring_models))
1109  # sort the feature scores in the same way
1110  best_score_feature_keyword_list_dict = defaultdict(list)
1111  for tpl in best_score_rmf_tuples:
1112  index = tpl[3]
1113  for f in feature_keyword_list_dict:
1114  best_score_feature_keyword_list_dict[f].append(
1115  feature_keyword_list_dict[f][index])
1116  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1117  best_score_rmf_tuples,
1118  self.number_of_processes)[self.rank]
1119 
1120  # expand the dictionaries to include ambiguous copies
1121  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1122  0,
1123  my_best_score_rmf_tuples[0][1])[0]
1124  if IMP.pmi.get_is_canonical(prot_ahead):
1125  if rmsd_calculation_components is not None:
1126  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1127  if tmp!=rmsd_calculation_components:
1128  print('Detected ambiguity, expand rmsd components to',tmp)
1129  rmsd_calculation_components = tmp
1130  if alignment_components is not None:
1131  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1132  if tmp!=alignment_components:
1133  print('Detected ambiguity, expand alignment components to',tmp)
1134  alignment_components = tmp
1135 
1136 #-------------------------------------------------------------
1137 # read the coordinates
1138 # ------------------------------------------------------------
1139  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1140  my_best_score_rmf_tuples[0],
1141  rmsd_calculation_components,
1142  state_number=state_number)
1143  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1144  my_best_score_rmf_tuples,
1145  alignment_components,
1146  rmsd_calculation_components,
1147  state_number=state_number)
1148 
1149  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1150  # NOR particles, because these object cannot be serialized. We need serialization
1151  # for the parallel computation based on mpi.
1152  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1153  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1154  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1155  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1156  all_rmf_file_names=got_coords[4] # RMF file per hit
1157 
1158 # ------------------------------------------------------------------------
1159 # optionally don't compute distance matrix or cluster, just write top files
1160 # ------------------------------------------------------------------------
1161  if skip_clustering:
1162  if density_custom_ranges:
1163  DensModule = IMP.pmi.analysis.GetModelDensity(
1164  density_custom_ranges,
1165  voxel=voxel_size)
1166 
1167  dircluster=os.path.join(outputdir,"all_models."+str(n))
1168  try:
1169  os.mkdir(outputdir)
1170  except:
1171  pass
1172  try:
1173  os.mkdir(dircluster)
1174  except:
1175  pass
1176  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1177  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1178  rmf_name=tpl[1]
1179  rmf_frame_number=tpl[2]
1180  tmp_dict={}
1181  index=tpl[4]
1182  for key in best_score_feature_keyword_list_dict:
1183  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1184 
1185  if cnt==0:
1186  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1187  self.model,
1188  rmf_frame_number,
1189  rmf_name)
1190  else:
1191  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1192  self.model,
1193  prots,
1194  rs,
1195  rmf_frame_number,
1196  rmf_name)
1197  if not linking_successful:
1198  continue
1199 
1200  if not prots:
1201  continue
1202 
1203  if IMP.pmi.get_is_canonical(prots[0]):
1204  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1205  prot = states[state_number]
1206  else:
1207  prot = prots[state_number]
1208 
1209  # get transformation aligning coordinates of requested tuples
1210  # to the first RMF file
1211  if cnt==0:
1212  coords_f1=alignment_coordinates[cnt]
1213  if cnt > 0:
1214  coords_f2=alignment_coordinates[cnt]
1215  if coords_f2:
1216  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1217  transformation = Ali.align()[1]
1218  else:
1220 
1221  rbs = set()
1222  for p in IMP.atom.get_leaves(prot):
1223  if not IMP.core.XYZR.get_is_setup(p):
1225  IMP.core.XYZR(p).set_radius(0.0001)
1226  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1227 
1229  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1230  rbs.add(rb)
1231  else:
1233  transformation)
1234  for rb in rbs:
1235  IMP.core.transform(rb,transformation)
1236 
1238  self.model.update()
1239  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1240  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1241  o.init_pdb(out_pdb_fn,prot)
1242  o.write_pdb(out_pdb_fn,
1243  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1244 
1245  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1246  tmp_dict["rmf_file_full_path"]=rmf_name
1247  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1248  tmp_dict["local_rmf_frame_number"]=0
1249 
1250  clusstat.write(str(tmp_dict)+"\n")
1251 
1252  if IMP.pmi.get_is_canonical(prot):
1253  # create a single-state System and write that
1255  h.set_name("System")
1256  h.add_child(prot)
1257  o.init_rmf(out_rmf_fn, [h], rs)
1258  else:
1259  o.init_rmf(out_rmf_fn, [prot],rs)
1260 
1261  o.write_rmf(out_rmf_fn)
1262  o.close_rmf(out_rmf_fn)
1263  # add the density
1264  if density_custom_ranges:
1265  DensModule.add_subunits_density(prot)
1266 
1267  if density_custom_ranges:
1268  DensModule.write_mrc(path=dircluster)
1269  del DensModule
1270  return
1271 
1272 
1273 
1274  # broadcast the coordinates
1275  if self.number_of_processes > 1:
1276  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1277  all_coordinates)
1278  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1279  all_rmf_file_names)
1280  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1281  rmf_file_name_index_dict)
1282  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1283  alignment_coordinates)
1284  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1285  rmsd_coordinates)
1286 
1287  if self.rank == 0:
1288  # save needed informations in external files
1289  self.save_objects(
1290  [best_score_feature_keyword_list_dict,
1291  rmf_file_name_index_dict],
1292  ".macro.pkl")
1293 
1294 # ------------------------------------------------------------------------
1295 # Calculate distance matrix and cluster
1296 # ------------------------------------------------------------------------
1297  print("setup clustering class")
1298  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1299 
1300  for n, model_coordinate_dict in enumerate(all_coordinates):
1301  template_coordinate_dict = {}
1302  # let's try to align
1303  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1304  # set the first model as template coordinates
1305  self.cluster_obj.set_template(alignment_coordinates[n])
1306  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1307  print("Global calculating the distance matrix")
1308 
1309  # calculate distance matrix, all against all
1310  self.cluster_obj.dist_matrix()
1311 
1312  # perform clustering and optionally display
1313  if self.rank == 0:
1314  self.cluster_obj.do_cluster(number_of_clusters)
1315  if display_plot:
1316  if self.rank == 0:
1317  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1318  if exit_after_display:
1319  exit()
1320  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1321 
1322 # ------------------------------------------------------------------------
1323 # Alteratively, load the distance matrix from file and cluster that
1324 # ------------------------------------------------------------------------
1325  else:
1326  if self.rank==0:
1327  print("setup clustering class")
1328  self.cluster_obj = IMP.pmi.analysis.Clustering()
1329  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1330  print("clustering with %s clusters" % str(number_of_clusters))
1331  self.cluster_obj.do_cluster(number_of_clusters)
1332  [best_score_feature_keyword_list_dict,
1333  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1334  if display_plot:
1335  if self.rank == 0:
1336  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1337  if exit_after_display:
1338  exit()
1339  if self.number_of_processes > 1:
1340  self.comm.Barrier()
1341 
1342 # ------------------------------------------------------------------------
1343 # now save all informations about the clusters
1344 # ------------------------------------------------------------------------
1345 
1346  if self.rank == 0:
1347  print(self.cluster_obj.get_cluster_labels())
1348  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1349  print("rank %s " % str(self.rank))
1350  print("cluster %s " % str(n))
1351  print("cluster label %s " % str(cl))
1352  print(self.cluster_obj.get_cluster_label_names(cl))
1353  cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1354  cluster_prov = prov + \
1355  [IMP.pmi.io.ClusterProvenance(cluster_size)]
1356 
1357  # first initialize the Density class if requested
1358  if density_custom_ranges:
1359  DensModule = IMP.pmi.analysis.GetModelDensity(
1360  density_custom_ranges,
1361  voxel=voxel_size)
1362 
1363  dircluster = outputdir + "/cluster." + str(n) + "/"
1364  try:
1365  os.mkdir(dircluster)
1366  except:
1367  pass
1368 
1369  rmsd_dict = {"AVERAGE_RMSD":
1370  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1371  clusstat = open(dircluster + "stat.out", "w")
1372  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1373  # extract the features
1374  tmp_dict = {}
1375  tmp_dict.update(rmsd_dict)
1376  index = rmf_file_name_index_dict[structure_name]
1377  for key in best_score_feature_keyword_list_dict:
1378  tmp_dict[
1379  key] = best_score_feature_keyword_list_dict[
1380  key][
1381  index]
1382 
1383  # get the rmf name and the frame number from the list of
1384  # frame names
1385  rmf_name = structure_name.split("|")[0]
1386  rmf_frame_number = int(structure_name.split("|")[1])
1387  clusstat.write(str(tmp_dict) + "\n")
1388 
1389  # extract frame (open or link to existing)
1390  if k==0:
1391  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1392  self.model,
1393  rmf_frame_number,
1394  rmf_name)
1395  else:
1396  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1397  self.model,
1398  prots,
1399  rs,
1400  rmf_frame_number,
1401  rmf_name)
1402  if not linking_successful:
1403  continue
1404  if not prots:
1405  continue
1406 
1407  if IMP.pmi.get_is_canonical(prots[0]):
1408  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1409  prot = states[state_number]
1410  else:
1411  prot = prots[state_number]
1412  if k==0:
1413  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
1414 
1415  # transform clusters onto first
1416  if k > 0:
1417  model_index = self.cluster_obj.get_model_index_from_name(
1418  structure_name)
1419  transformation = self.cluster_obj.get_transformation_to_first_member(
1420  cl,
1421  model_index)
1422  rbs = set()
1423  for p in IMP.atom.get_leaves(prot):
1424  if not IMP.core.XYZR.get_is_setup(p):
1426  IMP.core.XYZR(p).set_radius(0.0001)
1427  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1428 
1430  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1431  rbs.add(rb)
1432  else:
1434  transformation)
1435  for rb in rbs:
1436  IMP.core.transform(rb,transformation)
1437 
1438  # add the density
1439  if density_custom_ranges:
1440  DensModule.add_subunits_density(prot)
1441 
1442  # pdb writing should be optimized!
1443  o = IMP.pmi.output.Output()
1444  self.model.update()
1445  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1446  o.write_pdb(dircluster + str(k) + ".pdb")
1447 
1448  if IMP.pmi.get_is_canonical(prot):
1449  # create a single-state System and write that
1451  h.set_name("System")
1452  h.add_child(prot)
1453  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
1454  else:
1455  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
1456  o.write_rmf(dircluster + str(k) + ".rmf3")
1457  o.close_rmf(dircluster + str(k) + ".rmf3")
1458 
1459  del o
1460  # IMP.atom.destroy(prot)
1461 
1462  if density_custom_ranges:
1463  DensModule.write_mrc(path=dircluster)
1464  del DensModule
1465 
1466  if self.number_of_processes>1:
1467  self.comm.Barrier()
1468 
1469  def get_cluster_rmsd(self,cluster_num):
1470  if self.cluster_obj is None:
1471  raise Exception("Run clustering first")
1472  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1473 
1474  def save_objects(self, objects, file_name):
1475  import pickle
1476  outf = open(file_name, 'wb')
1477  pickle.dump(objects, outf)
1478  outf.close()
1479 
1480  def load_objects(self, file_name):
1481  import pickle
1482  inputf = open(file_name, 'rb')
1483  objects = pickle.load(inputf)
1484  inputf.close()
1485  return objects
1486 
1488 
1489  """
1490  This class contains analysis utilities to investigate ReplicaExchange results.
1491  """
1492 
1493  ########################
1494  # Construction and Setup
1495  ########################
1496 
1497  def __init__(self,model,
1498  stat_files,
1499  best_models=None,
1500  score_key=None,
1501  alignment=True):
1502  """
1503  Construction of the Class.
1504  @param model IMP.Model()
1505  @param stat_files list of string. Can be ascii stat files, rmf files names
1506  @param best_models Integer. Number of best scoring models, if None: all models will be read
1507  @param alignment boolean (Default=True). Align before computing the rmsd.
1508  """
1509 
1510  self.model=model
1511  self.best_models=best_models
1512  self.stath0=IMP.pmi.output.StatHierarchyHandler(model,stat_files,self.best_models,score_key,cache=True)
1513  self.stath1=IMP.pmi.output.StatHierarchyHandler(StatHierarchyHandler=self.stath0)
1514 
1515  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath1))
1516  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath0))
1517  self.sel0_rmsd=IMP.atom.Selection(self.stath0)
1518  self.sel1_rmsd=IMP.atom.Selection(self.stath1)
1519  self.sel0_alignment=IMP.atom.Selection(self.stath0)
1520  self.sel1_alignment=IMP.atom.Selection(self.stath1)
1521  self.clusters=[]
1522  # fill the cluster list with a single cluster containing all models
1523  c = IMP.pmi.output.Cluster(0)
1524  self.clusters.append(c)
1525  for n0 in range(len(self.stath0)):
1526  c.add_member(n0)
1527  self.pairwise_rmsd={}
1528  self.pairwise_molecular_assignment={}
1529  self.alignment=alignment
1530  self.symmetric_molecules={}
1531  self.issymmetricsel={}
1532  self.update_seldicts()
1533  self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath0))
1534  self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath1))
1535 
1536  def set_rmsd_selection(self,**kwargs):
1537  """
1538  Setup the selection onto which the rmsd is computed
1539  @param kwargs use IMP.atom.Selection keywords
1540  """
1541  self.sel0_rmsd=IMP.atom.Selection(self.stath0,**kwargs)
1542  self.sel1_rmsd=IMP.atom.Selection(self.stath1,**kwargs)
1543  self.update_seldicts()
1544 
1545  def set_symmetric(self,molecule_name):
1546  """
1547  Store names of symmetric molecules
1548  """
1549  self.symmetric_molecules[molecule_name]=0
1550  self.update_seldicts()
1551 
1552  def set_alignment_selection(self,**kwargs):
1553  """
1554  Setup the selection onto which the alignment is computed
1555  @param kwargs use IMP.atom.Selection keywords
1556  """
1557  self.sel0_alignment=IMP.atom.Selection(self.stath0,**kwargs)
1558  self.sel1_alignment=IMP.atom.Selection(self.stath1,**kwargs)
1559 
1560  ######################
1561  # Clustering functions
1562  ######################
1563  def clean_clusters(self):
1564  for c in self.clusters: del c
1565  self.clusters=[]
1566 
1567 
1568  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1569  """
1570  Cluster the models based on RMSD.
1571  @param rmsd_cutoff Float the distance cutoff in Angstrom
1572  @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1573  """
1574  self.clean_clusters()
1575  not_clustered=set(range(len(self.stath1)))
1576  while len(not_clustered)>0:
1577  self.aggregate(not_clustered, rmsd_cutoff, metric)
1578  #self.merge_aggregates(rmsd_cutoff, metric)
1579  self.update_clusters()
1580 
1581  def refine(self,rmsd_cutoff=10):
1582  """
1583  Refine the clusters by merging the ones whose centers are close
1584  @param rmsd_cutoff cutoff distance in Angstorms
1585  """
1586  merge_pairs=[]
1587  clusters_copy=self.clusters
1588  for c0,c1 in itertools.combinations(self.clusters,2):
1589  if c0.center_index is None:
1590  self.compute_cluster_center(c0)
1591  if c1.center_index is None:
1592  self.compute_cluster_center(c1)
1593  d0=self.stath0[c0.center_index]
1594  d1=self.stath1[c1.center_index]
1595  rmsd, molecular_assignment = self.rmsd()
1596  if rmsd <= rmsd_cutoff:
1597  if c1 in self.clusters:
1598  clusters_copy.remove(c1)
1599  c0+=c1
1600  self.clusters=clusters_copy
1601  self.update_clusters()
1602 
1603  ####################
1604  # Input Output
1605  ####################
1606 
1607  def set_cluster_assignments(self, cluster_ids):
1608  if len(cluster_ids)!=len(self.stath0):
1609  raise ValueError('cluster ids has to be same length as number of frames')
1610 
1611  self.clusters=[]
1612  for i in sorted(list(set(cluster_ids))):
1613  self.clusters.append(IMP.pmi.output.Cluster(i))
1614  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
1615  self.clusters[idx].add_member(i,d)
1616 
1617  def get_cluster_data(self, cluster):
1618  """
1619  Return the model data from a cluster
1620  @param cluster IMP.pmi.output.Cluster object
1621  """
1622  data=[]
1623  for m in cluster:
1624  data.append(m)
1625  return data
1626 
1627  def save_data(self,filename='data.pkl'):
1628  """
1629  Save the data for the whole models into a pickle file
1630  @param filename string
1631  """
1632  self.stath0.save_data(filename)
1633 
1634  def set_data(self,data):
1635  """
1636  Set the data from an external IMP.pmi.output.Data
1637  @param data IMP.pmi.output.Data
1638  """
1639  self.stath0.data=data
1640  self.stath1.data=data
1641 
1642  def load_data(self,filename='data.pkl'):
1643  """
1644  Load the data from an external pickled file
1645  @param filename string
1646  """
1647  self.stath0.load_data(filename)
1648  self.stath1.load_data(filename)
1649  self.best_models=len(self.stath0)
1650 
1651  def add_cluster(self,rmf_name_list):
1652  c = IMP.pmi.output.Cluster(len(self.clusters))
1653  print("creating cluster index "+str(len(self.clusters)))
1654  self.clusters.append(c)
1655  current_len=len(self.stath0)
1656 
1657  for rmf in rmf_name_list:
1658  print("adding rmf "+rmf)
1659  self.stath0.add_stat_file(rmf)
1660  self.stath1.add_stat_file(rmf)
1661 
1662  for n0 in range(current_len,len(self.stath0)):
1663  d0=self.stath0[n0]
1664  c.add_member(n0,d0)
1665  self.update_clusters()
1666 
1667  def save_clusters(self,filename='clusters.pkl'):
1668  """
1669  Save the clusters into a pickle file
1670  @param filename string
1671  """
1672  try:
1673  import cPickle as pickle
1674  except ImportError:
1675  import pickle
1676  fl=open(filename,'wb')
1677  pickle.dump(self.clusters,fl)
1678 
1679  def load_clusters(self,filename='clusters.pkl',append=False):
1680  """
1681  Load the clusters from a pickle file
1682  @param filename string
1683  @param append bool (Default=False), if True. append the clusters to the ones currently present
1684  """
1685  try:
1686  import cPickle as pickle
1687  except ImportError:
1688  import pickle
1689  fl=open(filename,'rb')
1690  self.clean_clusters()
1691  if append:
1692  self.clusters+=pickle.load(fl)
1693  else:
1694  self.clusters=pickle.load(fl)
1695  self.update_clusters()
1696 
1697  ####################
1698  # Analysis Functions
1699  ####################
1700 
1701  def compute_cluster_center(self,cluster):
1702  """
1703  Compute the cluster center for a given cluster
1704  """
1705  member_distance=defaultdict(float)
1706 
1707  for n0,n1 in itertools.combinations(cluster.members,2):
1708  d0=self.stath0[n0]
1709  d1=self.stath1[n1]
1710  rmsd, _ = self.rmsd()
1711  member_distance[n0]+=rmsd
1712 
1713  if len(member_distance)>0:
1714  cluster.center_index=min(member_distance, key=member_distance.get)
1715  else:
1716  cluster.center_index=cluster.members[0]
1717 
1718  def save_coordinates(self,cluster,rmf_name=None,reference="Absolute", prefix="./"):
1719  """
1720  Save the coordinates of the current cluster a single rmf file
1721  """
1722  print("saving coordinates",cluster)
1723  if self.alignment: self.set_reference(reference,cluster)
1725  if rmf_name is None:
1726  rmf_name=prefix+'/'+str(cluster.cluster_id)+".rmf3"
1727 
1728  d1=self.stath1[cluster.members[0]]
1729  self.model.update()
1730  o.init_rmf(rmf_name, [self.stath1])
1731  for n1 in cluster.members:
1732  d1=self.stath1[n1]
1733  self.model.update()
1735  if self.alignment: self.align()
1736  o.write_rmf(rmf_name)
1738  o.close_rmf(rmf_name)
1739 
1740  def prune_redundant_structures(self,rmsd_cutoff=10):
1741  """
1742  remove structures that are similar
1743  append it to a new cluster
1744  """
1745  print("pruning models")
1746  selected=0
1747  filtered=[selected]
1748  remaining=range(1,len(self.stath1),10)
1749 
1750  while len(remaining)>0:
1751  d0=self.stath0[selected]
1752  rm=[]
1753  for n1 in remaining:
1754  d1=self.stath1[n1]
1755  if self.alignment: self.align()
1756  d, _ = self.rmsd()
1757  if d<=rmsd_cutoff:
1758  rm.append(n1)
1759  print("pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
1760  remaining=[x for x in remaining if x not in rm]
1761  if len(remaining)==0: break
1762  selected=remaining[0]
1763  filtered.append(selected)
1764  remaining.pop(0)
1765  c = IMP.pmi.output.Cluster(len(self.clusters))
1766  self.clusters.append(c)
1767  for n0 in filtered:
1768  d0=self.stath0[n0]
1769  c.add_member(n0,d0)
1770  self.update_clusters()
1771 
1772 
1773 
1774  def precision(self,cluster):
1775  """
1776  Compute the precision of a cluster
1777  """
1778  npairs=0
1779  rmsd=0.0
1780  precision=None
1781 
1782  if not cluster.center_index is None:
1783  members1=[cluster.center_index]
1784  else:
1785  members1=cluster.members
1786 
1787  for n0 in members1:
1788  d0=self.stath0[n0]
1789  for n1 in cluster.members:
1790  if n0!=n1:
1791  npairs+=1
1792  d1=self.stath1[n1]
1794  tmp_rmsd, _ = self.rmsd()
1795  rmsd+=tmp_rmsd
1797 
1798  if npairs>0:
1799  precision=rmsd/npairs
1800  cluster.precision=precision
1801  return precision
1802 
1803  def bipartite_precision(self,cluster1,cluster2,verbose=False):
1804  """
1805  Compute the bipartite precision (ie the cross-precision)
1806  between two clusters
1807  """
1808  npairs=0
1809  rmsd=0.0
1810  for cn0,n0 in enumerate(cluster1.members):
1811  d0=self.stath0[n0]
1812  for cn1,n1 in enumerate(cluster2.members):
1813  d1=self.stath1[n1]
1814  tmp_rmsd, _ =self.rmsd()
1815  if verbose: print("--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
1816  rmsd+=tmp_rmsd
1817  npairs+=1
1818  precision=rmsd/npairs
1819  return precision
1820 
1821  def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
1822  """
1823  Compute the Root mean square fluctuations
1824  of a molecule in a cluster
1825  Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
1826  """
1827  rmsf=IMP.pmi.tools.OrderedDict()
1828 
1829  #assumes that residue indexes are identical for stath0 and stath1
1830  if cluster_ref is not None:
1831  if not cluster_ref.center_index is None:
1832  members0 = [cluster_ref.center_index]
1833  else:
1834  members0 = cluster_ref.members
1835  else:
1836  if not cluster.center_index is None:
1837  members0 = [cluster.center_index]
1838  else:
1839  members0 = cluster.members
1840 
1841  s0=IMP.atom.Selection(self.stath0,molecule=molecule,resolution=1,
1842  copy_index=copy_index,state_index=state_index)
1843  ps0=s0.get_selected_particles()
1844  #get the residue indexes
1845  residue_indexes=list(IMP.pmi.tools.OrderedSet([IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
1846 
1847  #get the corresponding particles
1848  #s0=IMP.atom.Selection(stat_ref,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1849  # copy_index=copy_index,state_index=state_index)
1850  #ps0 = s0.get_selected_particles()
1851 
1852 
1853 
1854  npairs=0
1855  for n0 in members0:
1856  d0=self.stath0[n0]
1857  for n1 in cluster.members[::step]:
1858  if n0!=n1:
1859  print("--- rmsf %s %s"%(str(n0),str(n1)))
1861 
1862  s1=IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1863  copy_index=copy_index,state_index=state_index)
1864  ps1 = s1.get_selected_particles()
1865 
1866  d1=self.stath1[n1]
1867  if self.alignment: self.align()
1868  for n,(p0,p1) in enumerate(zip(ps0,ps1)):
1869  r=residue_indexes[n]
1870  d0=IMP.core.XYZ(p0)
1871  d1=IMP.core.XYZ(p1)
1872  if r in rmsf:
1873  rmsf[r]+=IMP.core.get_distance(d0,d1)
1874  else:
1875  rmsf[r]=IMP.core.get_distance(d0,d1)
1876  npairs+=1
1878  for r in rmsf:
1879  rmsf[r]/=npairs
1880 
1881  for stath in [self.stath0,self.stath1]:
1882  if molecule not in self.symmetric_molecules:
1883  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1884  copy_index=copy_index,state_index=state_index)
1885  else:
1886  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
1887  state_index=state_index)
1888 
1889  ps = s.get_selected_particles()
1890  for p in ps:
1892  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
1893  else:
1895 
1896  return rmsf
1897 
1898  def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
1899  if self.alignment: self.set_reference(reference,cluster)
1900  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
1901  voxel=voxel_size)
1902 
1903  for n1 in cluster.members[::step]:
1904  print("density "+str(n1))
1905  d1=self.stath1[n1]
1907  if self.alignment: self.align()
1908  dens.add_subunits_density(self.stath1)
1910  dens.write_mrc(path=prefix+'/',suffix=str(cluster.cluster_id))
1911  del dens
1912 
1913  def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
1914  if self.alignment: self.set_reference(reference,cluster)
1915  import numpy as np
1916  import matplotlib.pyplot as plt
1917  import matplotlib.cm as cm
1918  from scipy.spatial.distance import cdist
1919  import IMP.pmi.topology
1920  if molecules is None:
1922  else:
1923  mols=[IMP.pmi.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi.tools.get_molecules(IMP.atom.Selection(self.stath1,molecules=molecules).get_selected_particles())]
1924  unique_copies=[mol for mol in mols if mol.get_copy_index() == 0]
1925  mol_names_unique=dict((mol.get_name(),mol) for mol in unique_copies)
1926  total_len_unique=sum(max(mol.get_residue_indexes()) for mol in unique_copies)
1927 
1928 
1929  # coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
1930  index_dict={}
1931  prev_stop=0
1932 
1933  if not consolidate:
1934  for mol in mols:
1935  seqlen=max(mol.get_residue_indexes())
1936  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1937  prev_stop+=seqlen
1938 
1939  else:
1940  for mol in unique_copies:
1941  seqlen=max(mol.get_residue_indexes())
1942  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1943  prev_stop+=seqlen
1944 
1945 
1946  for ncl,n1 in enumerate(cluster.members):
1947  print(ncl)
1948  d1=self.stath1[n1]
1949  #self.apply_molecular_assignments(n1)
1950  coord_dict=IMP.pmi.tools.OrderedDict()
1951  for mol in mols:
1952  mol_name=mol.get_name()
1953  copy_index=mol.get_copy_index()
1954  rindexes = mol.get_residue_indexes()
1955  coords = np.ones((max(rindexes),3))
1956  for rnum in rindexes:
1957  sel = IMP.atom.Selection(mol, residue_index=rnum, resolution=1)
1958  selpart = sel.get_selected_particles()
1959  if len(selpart) == 0: continue
1960  selpart = selpart[0]
1961  coords[rnum - 1, :] = IMP.core.XYZ(selpart).get_coordinates()
1962  coord_dict[mol]=coords
1963 
1964  if not consolidate:
1965  coords=np.concatenate(list(coord_dict.values()))
1966  dists = cdist(coords, coords)
1967  binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1968  else:
1969  binary_dists_dict={}
1970  for mol1 in mols:
1971  len1 = max(mol1.get_residue_indexes())
1972  for mol2 in mols:
1973  name1=mol1.get_name()
1974  name2=mol2.get_name()
1975  dists = cdist(coord_dict[mol1], coord_dict[mol2])
1976  if (name1, name2) not in binary_dists_dict:
1977  binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
1978  binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1979  binary_dists=np.zeros((total_len_unique,total_len_unique))
1980 
1981  for name1,name2 in binary_dists_dict:
1982  r1=index_dict[mol_names_unique[name1]]
1983  r2=index_dict[mol_names_unique[name2]]
1984  binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
1985 
1986  if ncl==0:
1987  dist_maps = [dists]
1988  av_dist_map = dists
1989  contact_freqs = binary_dists
1990  else:
1991  dist_maps.append(dists)
1992  av_dist_map += dists
1993  contact_freqs += binary_dists
1994 
1995  #self.undo_apply_molecular_assignments(n1)
1996 
1997  if log_scale:
1998  contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
1999  else:
2000  contact_freqs =1.0/len(cluster)*contact_freqs
2001  av_dist_map=1.0/len(cluster)*contact_freqs
2002 
2003  fig = plt.figure(figsize=(100, 100))
2004  ax = fig.add_subplot(111)
2005  ax.set_xticks([])
2006  ax.set_yticks([])
2007  gap_between_components=50
2008  colormap = cm.Blues
2009  colornorm=None
2010 
2011 
2012  #if cbar_labels is not None:
2013  # if len(cbar_labels)!=4:
2014  # print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
2015  # exit()
2016  # set the list of proteins on the x axis
2017  if not consolidate:
2018  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_extended_name(),mol) for mol in mols])
2019  prot_list=list(zip(*sorted_tuple))[1]
2020  else:
2021  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(),mol) for mol in unique_copies])
2022  prot_list=list(zip(*sorted_tuple))[1]
2023 
2024  prot_listx = prot_list
2025  nresx = gap_between_components + \
2026  sum([max(mol.get_residue_indexes())
2027  + gap_between_components for mol in prot_listx])
2028 
2029  # set the list of proteins on the y axis
2030  prot_listy = prot_list
2031  nresy = gap_between_components + \
2032  sum([max(mol.get_residue_indexes())
2033  + gap_between_components for mol in prot_listy])
2034 
2035  # this is the residue offset for each protein
2036  resoffsetx = {}
2037  resendx = {}
2038  res = gap_between_components
2039  for mol in prot_listx:
2040  resoffsetx[mol] = res
2041  res += max(mol.get_residue_indexes())
2042  resendx[mol] = res
2043  res += gap_between_components
2044 
2045  resoffsety = {}
2046  resendy = {}
2047  res = gap_between_components
2048  for mol in prot_listy:
2049  resoffsety[mol] = res
2050  res += max(mol.get_residue_indexes())
2051  resendy[mol] = res
2052  res += gap_between_components
2053 
2054  resoffsetdiagonal = {}
2055  res = gap_between_components
2056  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2057  resoffsetdiagonal[mol] = res
2058  res += max(mol.get_residue_indexes())
2059  res += gap_between_components
2060 
2061  # plot protein boundaries
2062  xticks = []
2063  xlabels = []
2064  for n, prot in enumerate(prot_listx):
2065  res = resoffsetx[prot]
2066  end = resendx[prot]
2067  for proty in prot_listy:
2068  resy = resoffsety[proty]
2069  endy = resendy[proty]
2070  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
2071  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
2072  xticks.append((float(res) + float(end)) / 2)
2073  xlabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2074 
2075  yticks = []
2076  ylabels = []
2077  for n, prot in enumerate(prot_listy):
2078  res = resoffsety[prot]
2079  end = resendy[prot]
2080  for protx in prot_listx:
2081  resx = resoffsetx[protx]
2082  endx = resendx[protx]
2083  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
2084  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
2085  yticks.append((float(res) + float(end)) / 2)
2086  ylabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2087 
2088  # plot the contact map
2089 
2090  tmp_array = np.zeros((nresx, nresy))
2091  ret={}
2092  for px in prot_listx:
2093  for py in prot_listy:
2094  resx = resoffsetx[px]
2095  lengx = resendx[px] - 1
2096  resy = resoffsety[py]
2097  lengy = resendy[py] - 1
2098  indexes_x = index_dict[px]
2099  minx = min(indexes_x)
2100  maxx = max(indexes_x)
2101  indexes_y = index_dict[py]
2102  miny = min(indexes_y)
2103  maxy = max(indexes_y)
2104  tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2105  ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2106 
2107  cax = ax.imshow(tmp_array,
2108  cmap=colormap,
2109  norm=colornorm,
2110  origin='lower',
2111  alpha=0.6,
2112  interpolation='nearest')
2113 
2114  ax.set_xticks(xticks)
2115  ax.set_xticklabels(xlabels, rotation=90)
2116  ax.set_yticks(yticks)
2117  ax.set_yticklabels(ylabels)
2118  plt.setp(ax.get_xticklabels(), fontsize=6)
2119  plt.setp(ax.get_yticklabels(), fontsize=6)
2120 
2121  # display and write to file
2122  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2123  [i.set_linewidth(2.0) for i in ax.spines.values()]
2124  #if cbar_labels is not None:
2125  # cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
2126  # cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
2127 
2128  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf", dpi=300,transparent="False")
2129  return ret
2130 
2131 
2132  def plot_rmsd_matrix(self,filename):
2133  import numpy as np
2134  self.compute_all_pairwise_rmsd()
2135  distance_matrix = np.zeros(
2136  (len(self.stath0), len(self.stath1)))
2137  for (n0,n1) in self.pairwise_rmsd:
2138  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2139 
2140  import matplotlib as mpl
2141  mpl.use('Agg')
2142  import matplotlib.pylab as pl
2143  from scipy.cluster import hierarchy as hrc
2144 
2145  fig = pl.figure(figsize=(10,8))
2146  ax = fig.add_subplot(212)
2147  dendrogram = hrc.dendrogram(
2148  hrc.linkage(distance_matrix),
2149  color_threshold=7,
2150  no_labels=True)
2151  leaves_order = dendrogram['leaves']
2152  ax.set_xlabel('Model')
2153  ax.set_ylabel('RMSD [Angstroms]')
2154 
2155  ax2 = fig.add_subplot(221)
2156  cax = ax2.imshow(
2157  distance_matrix[leaves_order,
2158  :][:,
2159  leaves_order],
2160  interpolation='nearest')
2161  cb = fig.colorbar(cax)
2162  cb.set_label('RMSD [Angstroms]')
2163  ax2.set_xlabel('Model')
2164  ax2.set_ylabel('Model')
2165 
2166  pl.savefig(filename, dpi=300)
2167  pl.close(fig)
2168 
2169  ####################
2170  # Internal Functions
2171  ####################
2172 
2173  def update_clusters(self):
2174  """
2175  Update the cluster id numbers
2176  """
2177  for n,c in enumerate(self.clusters):
2178  c.cluster_id=n
2179 
2180  def get_molecule(self, hier, name, copy):
2181  s=IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2182  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2183 
2184  def update_seldicts(self):
2185  """
2186  Update the seldicts
2187  """
2188  self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2189  self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2190  for mol in self.seldict0:
2191  for sel in self.seldict0[mol]:
2192  self.issymmetricsel[sel]=False
2193  for mol in self.symmetric_molecules:
2194  self.symmetric_molecules[mol]=len(self.seldict0[mol])
2195  for sel in self.seldict0[mol]:
2196  self.issymmetricsel[sel]=True
2197 
2198 
2199  def align(self):
2200  print("alignment")
2201  tr = IMP.atom.get_transformation_aligning_first_to_second(self.sel1_alignment, self.sel0_alignment)
2202 
2203  for rb in self.rbs1:
2204  IMP.core.transform(rb, tr)
2205 
2206  for bead in self.beads1:
2207  IMP.core.transform(IMP.core.XYZ(bead), tr)
2208 
2209  self.model.update()
2210 
2211  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2212  '''
2213  initial filling of the clusters.
2214  '''
2215  n0 = idxs.pop()
2216  print("clustering model "+str(n0))
2217  d0 = self.stath0[n0]
2218  c = IMP.pmi.output.Cluster(len(self.clusters))
2219  print("creating cluster index "+str(len(self.clusters)))
2220  self.clusters.append(c)
2221  c.add_member(n0,d0)
2222  clustered = set([n0])
2223  for n1 in idxs:
2224  print("--- trying to add model "+str(n1)+" to cluster "+str(len(self.clusters)))
2225  d1 = self.stath1[n1]
2226  if self.alignment: self.align()
2227  rmsd, _ = self.rmsd(metric=metric)
2228  if rmsd<rmsd_cutoff:
2229  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2230  c.add_member(n1,d1)
2231  clustered.add(n1)
2232  else:
2233  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2234  idxs-=clustered
2235 
2236  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2237  """
2238  merge the clusters that have close members
2239  @param rmsd_cutoff cutoff distance in Angstorms
2240  """
2241  # before merging, clusters are spheres of radius rmsd_cutoff centered on the 1st element
2242  # here we only try to merge clusters whose centers are closer than 2*rmsd_cutoff
2243  to_merge = []
2244  print("merging...")
2245  for c0, c1 in filter(lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2246  n0, n1 = [c.members[0] for c in (c0,c1)]
2247  d0 = self.stath0[n0]
2248  d1 = self.stath1[n1]
2249  rmsd, _ = self.rmsd()
2250  if rmsd<2*rmsd_cutoff and self.have_close_members(c0,c1,rmsd_cutoff,metric):
2251  to_merge.append((c0,c1))
2252 
2253  for c0, c in reversed(to_merge):
2254  self.merge(c0,c)
2255 
2256  #keep only full clusters
2257  self.clusters = [c for c in filter(lambda x: len(x.members)>0, self.clusters)]
2258 
2259 
2260  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2261  '''
2262  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2263  '''
2264  print("check close members for clusters "+str(c0.cluster_id)+" and "+str(c1.cluster_id))
2265  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2266  d0 = self.stath0[n0]
2267  d1 = self.stath1[n1]
2268  rmsd, _ = self.rmsd(metric=metric)
2269  if rmsd<rmsd_cutoff:
2270  return True
2271 
2272  return False
2273 
2274 
2275  def merge(self, c0, c1):
2276  '''
2277  merge two clusters
2278  '''
2279  c0+=c1
2280  c1.members=[]
2281  c1.data={}
2282 
2283  def rmsd_helper(self, sels0, sels1, metric):
2284  '''
2285  a function that returns the permutation best_sel of sels0 that minimizes metric
2286  '''
2287  best_rmsd2 = float('inf')
2288  best_sel = None
2289  if self.issymmetricsel[sels0[0]]:
2290  #this cases happens when symmetries were defined
2291  N = len(sels0)
2292  for offset in range(N):
2293  order=[(offset+i)%N for i in range(N)]
2294  sels = [sels0[(offset+i)%N] for i in range(N)]
2295  sel0 = sels[0]
2296  sel1 = sels1[0]
2297  r=metric(sel0, sel1)
2298  rmsd2=r*r*N
2299  ###print(order,rmsd2)
2300  if rmsd2 < best_rmsd2:
2301  best_rmsd2 = rmsd2
2302  best_sel = sels
2303  best_order=order
2304  else:
2305  for sels in itertools.permutations(sels0):
2306  rmsd2=0.0
2307  for sel0, sel1 in itertools.takewhile(lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2308  r=metric(sel0, sel1)
2309  rmsd2+=r*r
2310  if rmsd2 < best_rmsd2:
2311  best_rmsd2 = rmsd2
2312  best_sel = sels
2313  ###for i,sel in enumerate(best_sel):
2314  ### p0 = sel.get_selected_particles()[0]
2315  ### p1 = sels1[i].get_selected_particles()[0]
2316  ### m0 = IMP.pmi.tools.get_molecules([p0])[0]
2317  ### m1 = IMP.pmi.tools.get_molecules([p1])[0]
2318  ### c0 = IMP.atom.Copy(m0).get_copy_index()
2319  ### c1 = IMP.atom.Copy(m1).get_copy_index()
2320  ### name0=m0.get_name()
2321  ### name1=m1.get_name()
2322  ### print("WWW",name0,name1,c0,c1)
2323  ###print(best_order,best_rmsd2,m0,m1)
2324  return best_sel, best_rmsd2
2325 
2326  def compute_all_pairwise_rmsd(self):
2327  for d0 in self.stath0:
2328  for d1 in self.stath1:
2329  rmsd, _ = self.rmsd()
2330 
2331  def rmsd(self,metric=IMP.atom.get_rmsd):
2332  '''
2333  Computes the RMSD. Resolves ambiguous pairs assignments
2334  '''
2335  # here we memoize the rmsd and molecular assignment so that it's not done multiple times
2336  n0=self.stath0.current_index
2337  n1=self.stath1.current_index
2338  if ((n0,n1) in self.pairwise_rmsd) and ((n0,n1) in self.pairwise_molecular_assignment):
2339  return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2340 
2341  if self.alignment:
2342  self.align()
2343  #if it's not yet memoized
2344  total_rmsd=0.0
2345  total_N=0
2346  # this is a dictionary which keys are the molecule names, and values are the list of IMP.atom.Selection for all molecules that share the molecule name
2347  seldict_best_order={}
2348  molecular_assignment={}
2349  for molname, sels0 in self.seldict0.items():
2350  sels_best_order, best_rmsd2 = self.rmsd_helper(sels0, self.seldict1[molname], metric)
2351 
2352  Ncoords = len(sels_best_order[0].get_selected_particles())
2353  Ncopies = len(self.seldict1[molname])
2354  total_rmsd += Ncoords*best_rmsd2
2355  total_N += Ncoords*Ncopies
2356 
2357  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2358  p0 = sel0.get_selected_particles()[0]
2359  p1 = sel1.get_selected_particles()[0]
2360  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2361  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2362  c0 = IMP.atom.Copy(m0).get_copy_index()
2363  c1 = IMP.atom.Copy(m1).get_copy_index()
2364  ###print(molname,c0,c1)
2365  molecular_assignment[(molname,c0)]=(molname,c1)
2366 
2367  total_rmsd = math.sqrt(total_rmsd/total_N)
2368 
2369  self.pairwise_rmsd[(n0,n1)]=total_rmsd
2370  self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2371  self.pairwise_rmsd[(n1,n0)]=total_rmsd
2372  self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2373  ###print(n0,n1,molecular_assignment)
2374  return total_rmsd, molecular_assignment
2375 
2376  def set_reference(self,reference,cluster):
2377  """
2378  Fix the reference structure for structural alignment, rmsd and chain assignemnt
2379  @param reference can be either "Absolute" (cluster center of the first cluster)
2380  or Relative (cluster center of the current cluster)
2381  """
2382  if reference=="Absolute":
2383  d0=self.stath0[0]
2384  elif reference=="Relative":
2385  if cluster.center_index:
2386  n0=cluster.center_index
2387  else:
2388  n0=cluster.members[0]
2389  d0=self.stath0[n0]
2390 
2392  """
2393  compute the molecular assignemnts between multiple copies
2394  of the same sequence. It changes the Copy index of Molecules
2395  """
2396  d1=self.stath1[n1]
2397  _, molecular_assignment = self.rmsd()
2398  for (m0, c0), (m1,c1) in molecular_assignment.items():
2399  mol0 = self.molcopydict0[m0][c0]
2400  mol1 = self.molcopydict1[m1][c1]
2401  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2402  p1=IMP.atom.Copy(mol1).get_particle()
2403  p1.set_value(cik0,c0)
2404 
2406  """
2407  Undo the Copy index assignment
2408  """
2409  d1=self.stath1[n1]
2410  _, molecular_assignment = self.rmsd()
2411  mols_newcopys = []
2412  for (m0, c0), (m1,c1) in molecular_assignment.items():
2413  mol0 = self.molcopydict0[m0][c0]
2414  mol1 = self.molcopydict1[m1][c1]
2415  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2416  p1=IMP.atom.Copy(mol1).get_particle()
2417  p1.set_value(cik0,c1)
2418 
2419  ####################
2420  # Container Functions
2421  ####################
2422 
2423  def __repr__(self):
2424  s= "AnalysisReplicaExchange\n"
2425  s+="---- number of clusters %s \n"%str(len(self.clusters))
2426  s+="---- number of models %s \n"%str(len(self.stath0))
2427  return s
2428 
2429  def __getitem__(self,int_slice_adaptor):
2430  if isinstance(int_slice_adaptor, int):
2431  return self.clusters[int_slice_adaptor]
2432  elif isinstance(int_slice_adaptor, slice):
2433  return self.__iter__(int_slice_adaptor)
2434  else:
2435  raise TypeError("Unknown Type")
2436 
2437  def __len__(self):
2438  return len(self.clusters)
2439 
2440  def __iter__(self,slice_key=None):
2441  if slice_key is None:
2442  for i in range(len(self)):
2443  yield self[i]
2444  else:
2445  for i in range(len(self))[slice_key]:
2446  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2331
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2376
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:1679
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1482
def precision
Compute the precision of a cluster.
Definition: macros.py:1774
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:816
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:105
A container for models organized into clusters.
Definition: output.py:1317
Sample using molecular dynamics.
Definition: samplers.py:389
def aggregate
initial filling of the clusters.
Definition: macros.py:2211
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:769
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:575
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:576
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:1740
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:1821
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1573
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:1536
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:1617
def __init__
Construction of the Class.
Definition: macros.py:1497
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:733
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:1634
def __init__
Constructor.
Definition: macros.py:113
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:2405
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:1552
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2283
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:1718
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:961
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:2391
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:1487
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:575
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2236
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:869
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
Representation of the system.
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:1701
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:878
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def update_seldicts
Update the seldicts.
Definition: macros.py:2184
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2173
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:924
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:1581
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:62
Collect timing information.
Definition: tools.py:119
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:1545
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9704
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2275
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:619
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1081
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:1627
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:742
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:1803
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:597
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:1568
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:1667
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2260
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
Definition: macros.py:57
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1550
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:1642
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:882
Sample using replica exchange.
Definition: samplers.py:496
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27