IMP logo
IMP Reference Guide  2.10.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
8 import IMP.pmi.tools
9 import IMP.pmi.samplers
10 import IMP.pmi.output
11 import IMP.pmi.analysis
12 import IMP.pmi.io
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import RMF
17 import os
18 import glob
19 from operator import itemgetter
20 from collections import defaultdict
21 import numpy as np
22 import string
23 import itertools
24 import math
25 
26 class _RMFRestraints(object):
27  """All restraints that are written out to the RMF file"""
28  def __init__(self, model, user_restraints):
29  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
30  self._user_restraints = user_restraints if user_restraints else []
31 
32  def __len__(self):
33  return (len(self._user_restraints)
34  + self._rmf_rs.get_number_of_restraints())
35 
36  def __bool__(self):
37  return len(self) > 0
38  __nonzero__ = __bool__ # Python 2 compatibility
39 
40  def __getitem__(self, i):
41  class FakePMIWrapper(object):
42  def __init__(self, r):
43  self.r = IMP.RestraintSet.get_from(r)
44  get_restraint = lambda self: self.r
45  lenuser = len(self._user_restraints)
46  if 0 <= i < lenuser:
47  return self._user_restraints[i]
48  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
49  r = self._rmf_rs.get_restraint(i - lenuser)
50  return FakePMIWrapper(r)
51  else:
52  raise IndexError("Out of range")
53 
54 
55 class ReplicaExchange0(object):
56  """A macro to help setup and run replica exchange.
57  Supports Monte Carlo and molecular dynamics.
58  Produces trajectory RMF files, best PDB structures,
59  and output stat files.
60  """
61  def __init__(self, model,
62  representation=None,
63  root_hier=None,
64  sample_objects=None, # DEPRECATED
65  monte_carlo_sample_objects=None,
66  molecular_dynamics_sample_objects=None,
67  output_objects=[],
68  rmf_output_objects=None,
69  crosslink_restraints=None,
70  monte_carlo_temperature=1.0,
71  simulated_annealing=False,
72  simulated_annealing_minimum_temperature=1.0,
73  simulated_annealing_maximum_temperature=2.5,
74  simulated_annealing_minimum_temperature_nframes=100,
75  simulated_annealing_maximum_temperature_nframes=100,
76  replica_exchange_minimum_temperature=1.0,
77  replica_exchange_maximum_temperature=2.5,
78  replica_exchange_swap=True,
79  num_sample_rounds=1,
80  number_of_best_scoring_models=500,
81  monte_carlo_steps=10,
82  self_adaptive=False,
83  molecular_dynamics_steps=10,
84  molecular_dynamics_max_time_step=1.0,
85  number_of_frames=1000,
86  save_coordinates_mode="lowest_temperature",
87  nframes_write_coordinates=1,
88  write_initial_rmf=True,
89  initial_rmf_name_suffix="initial",
90  stat_file_name_suffix="stat",
91  best_pdb_name_suffix="model",
92  do_clean_first=True,
93  do_create_directories=True,
94  global_output_directory="./",
95  rmf_dir="rmfs/",
96  best_pdb_dir="pdbs/",
97  replica_stat_file_suffix="stat_replica",
98  em_object_for_rmf=None,
99  atomistic=False,
100  replica_exchange_object=None,
101  test_mode=False):
102  """Constructor.
103  @param model The IMP model
104  @param representation PMI.representation.Representation object
105  (or list of them, for multi-state modeling)
106  @param root_hier Instead of passing Representation, pass the System hierarchy
107  @param monte_carlo_sample_objects Objects for MC sampling; a list of
108  structural components (generally the representation) that will
109  be moved and restraints with parameters that need to
110  be sampled.
111  For PMI2: just pass flat list of movers
112  @param molecular_dynamics_sample_objects Objects for MD sampling
113  For PMI2: just pass flat list of particles
114  @param output_objects A list of structural objects and restraints
115  that will be included in output (ie, statistics "stat" files). Any object
116  that provides a get_output() method can be used here. If None is passed
117  the macro will not write stat files.
118  @param rmf_output_objects A list of structural objects and restraints
119  that will be included in rmf. Any object
120  that provides a get_output() method can be used here.
121  @param crosslink_restraints List of cross-link restraints that will
122  be included in output RMF files (for visualization).
123  @param monte_carlo_temperature MC temp (may need to be optimized
124  based on post-sampling analysis)
125  @param simulated_annealing If True, perform simulated annealing
126  @param simulated_annealing_minimum_temperature Should generally be
127  the same as monte_carlo_temperature.
128  @param simulated_annealing_minimum_temperature_nframes Number of
129  frames to compute at minimum temperature.
130  @param simulated_annealing_maximum_temperature_nframes Number of
131  frames to compute at
132  temps > simulated_annealing_maximum_temperature.
133  @param replica_exchange_minimum_temperature Low temp for REX; should
134  generally be the same as monte_carlo_temperature.
135  @param replica_exchange_maximum_temperature High temp for REX
136  @param replica_exchange_swap Boolean, enable disable temperature
137  swap (Default=True)
138  @param num_sample_rounds Number of rounds of MC/MD per cycle
139  @param number_of_best_scoring_models Number of top-scoring PDB models
140  to keep around for analysis
141  @param monte_carlo_steps Number of MC steps per round
142  @param self_adaptive self adaptive scheme for monte carlo movers
143  @param molecular_dynamics_steps Number of MD steps per round
144  @param molecular_dynamics_max_time_step Max time step for MD
145  @param number_of_frames Number of REX frames to run
146  @param save_coordinates_mode string: how to save coordinates.
147  "lowest_temperature" (default) only the lowest temperatures is saved
148  "25th_score" all replicas whose score is below the 25th percentile
149  "50th_score" all replicas whose score is below the 50th percentile
150  "75th_score" all replicas whose score is below the 75th percentile
151  @param nframes_write_coordinates How often to write the coordinates
152  of a frame
153  @param write_initial_rmf Write the initial configuration
154  @param global_output_directory Folder that will be created to house
155  output.
156  @param test_mode Set to True to avoid writing any files, just test one frame.
157  """
158  self.model = model
159  self.vars = {}
160  self.pmi2 = False
161 
162  ### add check hierarchy is multistate
163  self.output_objects = output_objects
164  self.rmf_output_objects=rmf_output_objects
165  self.representation = representation
166  if representation:
167  if type(representation) == list:
168  self.is_multi_state = True
169  self.root_hiers = [r.prot for r in representation]
170  self.vars["number_of_states"] = len(representation)
171  else:
172  self.is_multi_state = False
173  self.root_hier = representation.prot
174  self.vars["number_of_states"] = 1
175  elif root_hier and type(root_hier) == IMP.atom.Hierarchy and root_hier.get_name()=='System':
176  self.pmi2 = True
177  if self.output_objects is not None:
178  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
179  if self.rmf_output_objects is not None:
180  self.rmf_output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
181  self.root_hier = root_hier
182  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
183  self.vars["number_of_states"] = len(states)
184  if len(states)>1:
185  self.root_hiers = states
186  self.is_multi_state = True
187  else:
188  self.root_hier = root_hier
189  self.is_multi_state = False
190  else:
191  raise Exception("Must provide representation or System hierarchy (root_hier)")
192 
193  self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
194  self.em_object_for_rmf = em_object_for_rmf
195  self.monte_carlo_sample_objects = monte_carlo_sample_objects
196  self.vars["self_adaptive"]=self_adaptive
197  if sample_objects is not None:
198  self.monte_carlo_sample_objects+=sample_objects
199  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
200  self.replica_exchange_object = replica_exchange_object
201  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
202  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
203  self.vars[
204  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
205  self.vars[
206  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
207  self.vars["replica_exchange_swap"] = replica_exchange_swap
208  self.vars["simulated_annealing"]=\
209  simulated_annealing
210  self.vars["simulated_annealing_minimum_temperature"]=\
211  simulated_annealing_minimum_temperature
212  self.vars["simulated_annealing_maximum_temperature"]=\
213  simulated_annealing_maximum_temperature
214  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
215  simulated_annealing_minimum_temperature_nframes
216  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
217  simulated_annealing_maximum_temperature_nframes
218 
219  self.vars["num_sample_rounds"] = num_sample_rounds
220  self.vars[
221  "number_of_best_scoring_models"] = number_of_best_scoring_models
222  self.vars["monte_carlo_steps"] = monte_carlo_steps
223  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
224  self.vars["number_of_frames"] = number_of_frames
225  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
226  raise Exception("save_coordinates_mode has unrecognized value")
227  else:
228  self.vars["save_coordinates_mode"] = save_coordinates_mode
229  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
230  self.vars["write_initial_rmf"] = write_initial_rmf
231  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
232  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
233  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
234  self.vars["do_clean_first"] = do_clean_first
235  self.vars["do_create_directories"] = do_create_directories
236  self.vars["global_output_directory"] = global_output_directory
237  self.vars["rmf_dir"] = rmf_dir
238  self.vars["best_pdb_dir"] = best_pdb_dir
239  self.vars["atomistic"] = atomistic
240  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
241  self.vars["geometries"] = None
242  self.test_mode = test_mode
243 
244  def add_geometries(self, geometries):
245  if self.vars["geometries"] is None:
246  self.vars["geometries"] = list(geometries)
247  else:
248  self.vars["geometries"].extend(geometries)
249 
250  def show_info(self):
251  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
252  print("--- it stores the best scoring pdb models in pdbs/")
253  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
254  print("--- variables:")
255  keys = list(self.vars.keys())
256  keys.sort()
257  for v in keys:
258  print("------", v.ljust(30), self.vars[v])
259 
260  def get_replica_exchange_object(self):
261  return self.replica_exchange_object
262 
263  def _add_provenance(self, sampler_md, sampler_mc):
264  """Record details about the sampling in the IMP Hierarchies"""
265  if not self.is_multi_state or self.pmi2:
266  output_hierarchies = [self.root_hier]
267  else:
268  output_hierarchies = self.root_hiers
269 
270  iterations = 0
271  if sampler_md:
272  method = "Molecular Dynamics"
273  iterations += self.vars["molecular_dynamics_steps"]
274  if sampler_mc:
275  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
276  iterations += self.vars["monte_carlo_steps"]
277  # If no sampling is actually done, no provenance to write
278  if iterations == 0 or self.vars["number_of_frames"] == 0:
279  return
280  iterations *= self.vars["num_sample_rounds"]
281 
282  for h in output_hierarchies:
283  pi = self.model.add_particle("sampling")
285  self.model, pi, method, self.vars["number_of_frames"],
286  iterations)
287  p.set_number_of_replicas(
288  self.replica_exchange_object.get_number_of_replicas())
289  IMP.pmi.tools._add_pmi_provenance(h)
290  IMP.core.add_provenance(self.model, h, p)
291 
292  def execute_macro(self):
293  temp_index_factor = 100000.0
294  samplers=[]
295  sampler_mc=None
296  sampler_md=None
297  if self.monte_carlo_sample_objects is not None:
298  print("Setting up MonteCarlo")
299  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
300  self.monte_carlo_sample_objects,
301  self.vars["monte_carlo_temperature"])
302  if self.vars["simulated_annealing"]:
303  tmin=self.vars["simulated_annealing_minimum_temperature"]
304  tmax=self.vars["simulated_annealing_maximum_temperature"]
305  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
306  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
307  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
308  if self.vars["self_adaptive"]:
309  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
310  if self.output_objects is not None:
311  self.output_objects.append(sampler_mc)
312  if self.rmf_output_objects is not None:
313  self.rmf_output_objects.append(sampler_mc)
314  samplers.append(sampler_mc)
315 
316 
317  if self.molecular_dynamics_sample_objects is not None:
318  print("Setting up MolecularDynamics")
319  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
320  self.molecular_dynamics_sample_objects,
321  self.vars["monte_carlo_temperature"],
322  maximum_time_step=self.molecular_dynamics_max_time_step)
323  if self.vars["simulated_annealing"]:
324  tmin=self.vars["simulated_annealing_minimum_temperature"]
325  tmax=self.vars["simulated_annealing_maximum_temperature"]
326  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
327  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
328  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
329  if self.output_objects is not None:
330  self.output_objects.append(sampler_md)
331  if self.rmf_output_objects is not None:
332  self.rmf_output_objects.append(sampler_md)
333  samplers.append(sampler_md)
334 # -------------------------------------------------------------------------
335 
336  print("Setting up ReplicaExchange")
337  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
338  self.vars[
339  "replica_exchange_minimum_temperature"],
340  self.vars[
341  "replica_exchange_maximum_temperature"],
342  samplers,
343  replica_exchange_object=self.replica_exchange_object)
344  self.replica_exchange_object = rex.rem
345 
346  myindex = rex.get_my_index()
347  if self.output_objects is not None:
348  self.output_objects.append(rex)
349  if self.rmf_output_objects is not None:
350  self.rmf_output_objects.append(rex)
351  # must reset the minimum temperature due to the
352  # different binary length of rem.get_my_parameter double and python
353  # float
354  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
355 
356 # -------------------------------------------------------------------------
357 
358  globaldir = self.vars["global_output_directory"] + "/"
359  rmf_dir = globaldir + self.vars["rmf_dir"]
360  pdb_dir = globaldir + self.vars["best_pdb_dir"]
361 
362  if not self.test_mode:
363  if self.vars["do_clean_first"]:
364  pass
365 
366  if self.vars["do_create_directories"]:
367 
368  try:
369  os.makedirs(globaldir)
370  except:
371  pass
372  try:
373  os.makedirs(rmf_dir)
374  except:
375  pass
376 
377  if not self.is_multi_state:
378  try:
379  os.makedirs(pdb_dir)
380  except:
381  pass
382  else:
383  for n in range(self.vars["number_of_states"]):
384  try:
385  os.makedirs(pdb_dir + "/" + str(n))
386  except:
387  pass
388 
389 # -------------------------------------------------------------------------
390 
392  if self.output_objects is not None:
393  self.output_objects.append(sw)
394  if self.rmf_output_objects is not None:
395  self.rmf_output_objects.append(sw)
396 
397  print("Setting up stat file")
398  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
399  low_temp_stat_file = globaldir + \
400  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
401 
402  # Ensure model is updated before saving init files
403  if not self.test_mode:
404  self.model.update()
405 
406  if not self.test_mode:
407  if self.output_objects is not None:
408  output.init_stat2(low_temp_stat_file,
409  self.output_objects,
410  extralabels=["rmf_file", "rmf_frame_index"])
411  else:
412  print("Stat file writing is disabled")
413 
414  if self.rmf_output_objects is not None:
415  print("Stat info being written in the rmf file")
416 
417  print("Setting up replica stat file")
418  replica_stat_file = globaldir + \
419  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
420  if not self.test_mode:
421  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
422 
423  if not self.test_mode:
424  print("Setting up best pdb files")
425  if not self.is_multi_state:
426  if self.vars["number_of_best_scoring_models"] > 0:
427  output.init_pdb_best_scoring(pdb_dir + "/" +
428  self.vars["best_pdb_name_suffix"],
429  self.root_hier,
430  self.vars[
431  "number_of_best_scoring_models"],
432  replica_exchange=True)
433  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
434  self.vars["best_pdb_name_suffix"]+".0.pdb")
435  else:
436  if self.vars["number_of_best_scoring_models"] > 0:
437  for n in range(self.vars["number_of_states"]):
438  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
439  self.vars["best_pdb_name_suffix"],
440  self.root_hiers[n],
441  self.vars[
442  "number_of_best_scoring_models"],
443  replica_exchange=True)
444  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
445  self.vars["best_pdb_name_suffix"]+".0.pdb")
446 # ---------------------------------------------
447 
448  if self.em_object_for_rmf is not None:
449  if not self.is_multi_state or self.pmi2:
450  output_hierarchies = [
451  self.root_hier,
452  self.em_object_for_rmf.get_density_as_hierarchy(
453  )]
454  else:
455  output_hierarchies = self.root_hiers
456  output_hierarchies.append(
457  self.em_object_for_rmf.get_density_as_hierarchy())
458  else:
459  if not self.is_multi_state or self.pmi2:
460  output_hierarchies = [self.root_hier]
461  else:
462  output_hierarchies = self.root_hiers
463 
464 #----------------------------------------------
465  if not self.test_mode:
466  print("Setting up and writing initial rmf coordinate file")
467  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
468  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
469  output_hierarchies,listofobjects=self.rmf_output_objects)
470  if self._rmf_restraints:
471  output.add_restraints_to_rmf(
472  init_suffix + "." + str(myindex) + ".rmf3",
473  self._rmf_restraints)
474  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
475  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
476 
477 #----------------------------------------------
478 
479  if not self.test_mode:
480  mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
481 
482 #----------------------------------------------
483 
484  self._add_provenance(sampler_md, sampler_mc)
485 
486  if not self.test_mode:
487  print("Setting up production rmf files")
488  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
489  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"],
490  listofobjects = self.rmf_output_objects)
491 
492  if self._rmf_restraints:
493  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
494 
495  ntimes_at_low_temp = 0
496 
497  if myindex == 0:
498  self.show_info()
499  self.replica_exchange_object.set_was_used(True)
500  nframes = self.vars["number_of_frames"]
501  if self.test_mode:
502  nframes = 1
503  for i in range(nframes):
504  if self.test_mode:
505  score = 0.
506  else:
507  for nr in range(self.vars["num_sample_rounds"]):
508  if sampler_md is not None:
509  sampler_md.optimize(
510  self.vars["molecular_dynamics_steps"])
511  if sampler_mc is not None:
512  sampler_mc.optimize(self.vars["monte_carlo_steps"])
514  self.model).evaluate(False)
515  mpivs.set_value("score",score)
516  output.set_output_entry("score", score)
517 
518 
519 
520  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
521 
522  if self.vars["save_coordinates_mode"] == "lowest_temperature":
523  save_frame=(min_temp_index == my_temp_index)
524  elif self.vars["save_coordinates_mode"] == "25th_score":
525  score_perc=mpivs.get_percentile("score")
526  save_frame=(score_perc*100.0<=25.0)
527  elif self.vars["save_coordinates_mode"] == "50th_score":
528  score_perc=mpivs.get_percentile("score")
529  save_frame=(score_perc*100.0<=50.0)
530  elif self.vars["save_coordinates_mode"] == "75th_score":
531  score_perc=mpivs.get_percentile("score")
532  save_frame=(score_perc*100.0<=75.0)
533 
534  # Ensure model is updated before saving output files
535  if save_frame or not self.test_mode:
536  self.model.update()
537 
538  if save_frame:
539  print("--- frame %s score %s " % (str(i), str(score)))
540 
541  if not self.test_mode:
542  if i % self.vars["nframes_write_coordinates"]==0:
543  print('--- writing coordinates')
544  if self.vars["number_of_best_scoring_models"] > 0:
545  output.write_pdb_best_scoring(score)
546  output.write_rmf(rmfname)
547  output.set_output_entry("rmf_file", rmfname)
548  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
549  else:
550  output.set_output_entry("rmf_file", rmfname)
551  output.set_output_entry("rmf_frame_index", '-1')
552  if self.output_objects is not None:
553  output.write_stat2(low_temp_stat_file)
554  ntimes_at_low_temp += 1
555 
556  if not self.test_mode:
557  output.write_stat2(replica_stat_file)
558  if self.vars["replica_exchange_swap"]:
559  rex.swap_temp(i, score)
560  for p, state in IMP.pmi.tools._all_protocol_outputs(
561  [self.representation],
562  self.root_hier if self.pmi2 else None):
563  p.add_replica_exchange(state, self)
564 
565  if not self.test_mode:
566  print("closing production rmf files")
567  output.close_rmf(rmfname)
568 
569 
570 
571 # ----------------------------------------------------------------------
572 class BuildSystem(object):
573  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
574  Easily create multi-state systems by calling this macro
575  repeatedly with different TopologyReader objects!
576  A useful function is get_molecules() which returns the PMI Molecules grouped by state
577  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
578  Quick multi-state system:
579  @code{.python}
580  model = IMP.Model()
581  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
582  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
583  bs = IMP.pmi.macros.BuildSystem(model)
584  bs.add_state(reader1)
585  bs.add_state(reader2)
586  bs.execute_macro() # build everything including degrees of freedom
587  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
588  ### now you have a two state system, you add restraints etc
589  @endcode
590  @note The "domain name" entry of the topology reader is not used.
591  All molecules are set up by the component name, but split into rigid bodies
592  as requested.
593  """
594  def __init__(self,
595  model,
596  sequence_connectivity_scale=4.0,
597  force_create_gmm_files=False,
598  resolutions=[1,10]):
599  """Constructor
600  @param model An IMP Model
601  @param sequence_connectivity_scale For scaling the connectivity restraint
602  @param force_create_gmm_files If True, will sample and create GMMs
603  no matter what. If False, will only sample if the
604  files don't exist. If number of Gaussians is zero, won't
605  do anything.
606  @param resolutions The resolutions to build for structured regions
607  """
608  self.model = model
609  self.system = IMP.pmi.topology.System(self.model)
610  self._readers = [] # the TopologyReaders (one per state)
611  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
612  self._domains = [] # key = domain unique name, value = Component
613  self.force_create_gmm_files = force_create_gmm_files
614  self.resolutions = resolutions
615 
616  @property
617  @IMP.deprecated_method("2.10", "Model should be accessed with `.model`.")
618  def mdl(self):
619  return self.model
620 
621  def add_state(self,
622  reader,
623  keep_chain_id=False, fasta_name_map=None):
624  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
625  When you are done adding states, call execute_macro()
626  @param reader The TopologyReader object
627  @param fasta_name_map dictionary for converting protein names found in the fasta file
628  """
629  state = self.system.create_state()
630  self._readers.append(reader)
631  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
632  these_domains = {} # key is unique name, value is _Component
633  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
634  numchain = 0
635 
636  ### setup representation
637  # loop over molecules, copies, then domains
638  for molname in reader.get_molecules():
639  copies = reader.get_molecules()[molname].domains
640  for nc,copyname in enumerate(copies):
641  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
642  copy = copies[copyname]
643  # option to not rename chains
644  if keep_chain_id == True:
645  chain_id = copy[0].chain
646  else:
647  chain_id = chain_ids[numchain]
648  if nc==0:
649  is_nucleic=False
650  fasta_flag=copy[0].fasta_flag
651  if fasta_flag == "RNA" or fasta_flag == "DNA": is_nucleic=True
652  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
653  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
654  orig_mol = state.create_molecule(molname,
655  seq,
656  chain_id,is_nucleic)
657  mol = orig_mol
658  numchain+=1
659  else:
660  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
661  mol = orig_mol.create_copy(chain_id)
662  numchain+=1
663 
664  for domainnumber,domain in enumerate(copy):
665  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
666  # we build everything in the residue range, even if it
667  # extends beyond what's in the actual PDB file
668  these_domains[domain.get_unique_name()] = domain
669  if domain.residue_range==[] or domain.residue_range is None:
670  domain_res = mol.get_residues()
671  else:
672  start = domain.residue_range[0]+domain.pdb_offset
673  if domain.residue_range[1]=='END':
674  end = len(mol.sequence)
675  else:
676  end = domain.residue_range[1]+domain.pdb_offset
677  domain_res = mol.residue_range(start-1,end-1)
678  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
679  if domain.pdb_file=="BEADS":
680  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
681  mol.add_representation(
682  domain_res,
683  resolutions=[domain.bead_size],
684  setup_particles_as_densities=(
685  domain.em_residues_per_gaussian!=0),
686  color = domain.color)
687  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
688  elif domain.pdb_file=="IDEAL_HELIX":
689  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
690  mol.add_representation(
691  domain_res,
692  resolutions=self.resolutions,
693  ideal_helix=True,
694  density_residues_per_component=domain.em_residues_per_gaussian,
695  density_prefix=domain.density_prefix,
696  density_force_compute=self.force_create_gmm_files,
697  color = domain.color)
698  these_domain_res[domain.get_unique_name()] = (domain_res,set())
699  else:
700  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
701  domain_atomic = mol.add_structure(domain.pdb_file,
702  domain.chain,
703  domain.residue_range,
704  domain.pdb_offset,
705  soft_check=True)
706  domain_non_atomic = domain_res - domain_atomic
707  if not domain.em_residues_per_gaussian:
708  mol.add_representation(domain_atomic,
709  resolutions=self.resolutions,
710  color = domain.color)
711  if len(domain_non_atomic)>0:
712  mol.add_representation(domain_non_atomic,
713  resolutions=[domain.bead_size],
714  color = domain.color)
715  else:
716  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
717  mol.add_representation(
718  domain_atomic,
719  resolutions=self.resolutions,
720  density_residues_per_component=domain.em_residues_per_gaussian,
721  density_prefix=domain.density_prefix,
722  density_force_compute=self.force_create_gmm_files,
723  color = domain.color)
724  if len(domain_non_atomic)>0:
725  mol.add_representation(domain_non_atomic,
726  resolutions=[domain.bead_size],
727  setup_particles_as_densities=True,
728  color = domain.color)
729  these_domain_res[domain.get_unique_name()] = (
730  domain_atomic,domain_non_atomic)
731  self._domain_res.append(these_domain_res)
732  self._domains.append(these_domains)
733  print('BuildSystem.add_state: State',len(self.system.states),'added')
734 
735  def get_molecules(self):
736  """Return list of all molecules grouped by state.
737  For each state, it's a dictionary of Molecules where key is the molecule name
738  """
739  return [s.get_molecules() for s in self.system.get_states()]
740 
741  def get_molecule(self,molname,copy_index=0,state_index=0):
742  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
743 
744  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
745  """Builds representations and sets up degrees of freedom"""
746  print("BuildSystem.execute_macro: building representations")
747  self.root_hier = self.system.build()
748 
749  print("BuildSystem.execute_macro: setting up degrees of freedom")
750  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.model)
751  for nstate,reader in enumerate(self._readers):
752  rbs = reader.get_rigid_bodies()
753  srbs = reader.get_super_rigid_bodies()
754  csrbs = reader.get_chains_of_super_rigid_bodies()
755 
756  # add rigid bodies
757  domains_in_rbs = set()
758  for rblist in rbs:
759  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
760  all_res = IMP.pmi.tools.OrderedSet()
761  bead_res = IMP.pmi.tools.OrderedSet()
762  for dname in rblist:
763  domain = self._domains[nstate][dname]
764  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
765  all_res|=self._domain_res[nstate][dname][0]
766  bead_res|=self._domain_res[nstate][dname][1]
767  domains_in_rbs.add(dname)
768  all_res|=bead_res
769  print("BuildSystem.execute_macro: -------- \
770 creating rigid body with max_trans %s \
771 max_rot %s non_rigid_max_trans %s" \
772  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
773  self.dof.create_rigid_body(all_res,
774  nonrigid_parts=bead_res,
775  max_trans=max_rb_trans,
776  max_rot=max_rb_rot,
777  nonrigid_max_trans=max_bead_trans)
778 
779  # if you have any BEAD domains not in an RB, set them as flexible beads
780  for dname in self._domains[nstate]:
781  domain = self._domains[nstate][dname]
782  if domain.pdb_file=="BEADS" and dname not in domains_in_rbs:
783  self.dof.create_flexible_beads(
784  self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
785 
786  # add super rigid bodies
787  for srblist in srbs:
788  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
789  all_res = IMP.pmi.tools.OrderedSet()
790  for dname in srblist:
791  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
792  all_res|=self._domain_res[nstate][dname][0]
793  all_res|=self._domain_res[nstate][dname][1]
794 
795  print("BuildSystem.execute_macro: -------- \
796 creating super rigid body with max_trans %s max_rot %s " \
797  % (str(max_srb_trans),str(max_srb_rot)))
798  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
799 
800  # add chains of super rigid bodies
801  for csrblist in csrbs:
802  all_res = IMP.pmi.tools.OrderedSet()
803  for dname in csrblist:
804  all_res|=self._domain_res[nstate][dname][0]
805  all_res|=self._domain_res[nstate][dname][1]
806  all_res = list(all_res)
807  all_res.sort(key=lambda r:r.get_index())
808  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
809  self.dof.create_main_chain_mover(all_res)
810  return self.root_hier,self.dof
811 
813  "use IMP.pmi.macros.BuildSystem instead")
814 class BuildModel(object):
815  """A macro to build a Representation based on a Topology and lists of movers
816  DEPRECATED - Use BuildSystem instead.
817  """
818  def __init__(self,
819  model,
820  component_topologies,
821  list_of_rigid_bodies=[],
822  list_of_super_rigid_bodies=[],
823  chain_of_super_rigid_bodies=[],
824  sequence_connectivity_scale=4.0,
825  add_each_domain_as_rigid_body=False,
826  force_create_gmm_files=False):
827  """Constructor.
828  @param model The IMP model
829  @param component_topologies List of
830  IMP.pmi.topology.ComponentTopology items
831  @param list_of_rigid_bodies List of lists of domain names that will
832  be moved as rigid bodies.
833  @param list_of_super_rigid_bodies List of lists of domain names
834  that will move together in an additional Monte Carlo move.
835  @param chain_of_super_rigid_bodies List of lists of domain names
836  (choices can only be from the same molecule). Each of these
837  groups will be moved rigidly. This helps to sample more
838  efficiently complex topologies, made of several rigid bodies,
839  connected by flexible linkers.
840  @param sequence_connectivity_scale For scaling the connectivity
841  restraint
842  @param add_each_domain_as_rigid_body That way you don't have to
843  put all of them in the list
844  @param force_create_gmm_files If True, will sample and create GMMs
845  no matter what. If False, will only sample if the
846  files don't exist. If number of Gaussians is zero, won't
847  do anything.
848  """
849  self.model = model
850  self.simo = IMP.pmi.representation.Representation(self.model,
851  upperharmonic=True,
852  disorderedlength=False)
853 
854  data=component_topologies
855  if list_of_rigid_bodies==[]:
856  print("WARNING: No list of rigid bodies inputted to build_model()")
857  if list_of_super_rigid_bodies==[]:
858  print("WARNING: No list of super rigid bodies inputted to build_model()")
859  if chain_of_super_rigid_bodies==[]:
860  print("WARNING: No chain of super rigid bodies inputted to build_model()")
861  all_dnames = set([d for sublist in list_of_rigid_bodies+list_of_super_rigid_bodies\
862  +chain_of_super_rigid_bodies for d in sublist])
863  all_available = set([c._domain_name for c in component_topologies])
864  if not all_dnames <= all_available:
865  raise ValueError("All requested movers must reference domain "
866  "names in the component topologies")
867 
868  self.domain_dict={}
869  self.resdensities={}
870  super_rigid_bodies={}
871  chain_super_rigid_bodies={}
872  rigid_bodies={}
873 
874  for c in data:
875  comp_name = c.molname
876  hier_name = c._domain_name
877  color = 0. # Can't use new-style string colors
878  fasta_file = c.fasta_file
879  fasta_id = c.fasta_id
880  pdb_name = c.pdb_file
881  chain_id = c.chain
882  res_range = c.residue_range
883  offset = c.pdb_offset
884  bead_size = c.bead_size
885  em_num_components = c.em_residues_per_gaussian
886  em_txt_file_name = c.gmm_file
887  em_mrc_file_name = c.mrc_file
888 
889  if comp_name not in self.simo.get_component_names():
890  self.simo.create_component(comp_name,color=0.0)
891  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
892 
893  # create hierarchy (adds resolutions, beads) with autobuild and optionally add EM data
894  if em_num_components==0:
895  read_em_files=False
896  include_res0=False
897  else:
898  if (not os.path.isfile(em_txt_file_name)) or force_create_gmm_files:
899  read_em_files=False
900  include_res0=True
901  else:
902  read_em_files=True
903  include_res0=False
904 
905  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
906  res_range,include_res0,beadsize=bead_size,
907  color=color,offset=offset)
908  if em_num_components!=0:
909  if read_em_files:
910  print("will read GMM files")
911  else:
912  print("will calculate GMMs")
913 
914  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
915  em_mrc_file_name,em_num_components,read_em_files)
916  self.simo.add_all_atom_densities(comp_name, particles=beads)
917  dens_hier+=beads
918  else:
919  dens_hier=[]
920 
921  self.resdensities[hier_name]=dens_hier
922  self.domain_dict[hier_name]=outhier+dens_hier
923 
924  # setup basic restraints
925  for c in self.simo.get_component_names():
926  self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
927  self.simo.setup_component_geometry(c)
928 
929  # create movers
930  for rblist in list_of_rigid_bodies:
931  rb=[]
932  for rbmember in rblist:
933  rb+=[h for h in self.domain_dict[rbmember]]
934  self.simo.set_rigid_body_from_hierarchies(rb)
935  for srblist in list_of_super_rigid_bodies:
936  srb=[]
937  for srbmember in rblist:
938  srb+=[h for h in self.domain_dict[srbmember]]
939  self.simo.set_super_rigid_body_from_hierarchies(srb)
940  for clist in chain_of_super_rigid_bodies:
941  crb=[]
942  for crbmember in rblist:
943  crb+=[h for h in self.domain_dict[crbmember]]
944  self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
945 
946  self.simo.set_floppy_bodies()
947  self.simo.setup_bonds()
948 
949  @property
950  @IMP.deprecated_method("2.10", "Model should be accessed with `.model`.")
951  def m(self):
952  return self.model
953 
955  '''Return the Representation object'''
956  return self.simo
957 
958  def get_density_hierarchies(self,hier_name_list):
959  # return a list of density hierarchies
960  # specify the list of hierarchy names
961  dens_hier_list=[]
962  for hn in hier_name_list:
963  print(hn)
964  dens_hier_list+=self.resdensities[hn]
965  return dens_hier_list
966 
967  def set_gmm_models_directory(self,directory_name):
968  self.gmm_models_directory=directory_name
969 
970  def get_pdb_bead_bits(self,hierarchy):
971  pdbbits=[]
972  beadbits=[]
973  helixbits=[]
974  for h in hierarchy:
975  if "_pdb" in h.get_name():pdbbits.append(h)
976  if "_bead" in h.get_name():beadbits.append(h)
977  if "_helix" in h.get_name():helixbits.append(h)
978  return (pdbbits,beadbits,helixbits)
979 
980  def scale_bead_radii(self,nresidues,scale):
981  scaled_beads=set()
982  for h in self.domain_dict:
983  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
984  slope=(1.0-scale)/(1.0-float(nresidues))
985 
986  for b in beadbits:
987  # I have to do the following
988  # because otherwise we'll scale more than once
989  if b not in scaled_beads:
990  scaled_beads.add(b)
991  else:
992  continue
993  radius=IMP.core.XYZR(b).get_radius()
994  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
995  scale_factor=slope*float(num_residues)+1.0
996  print(scale_factor)
997  new_radius=scale_factor*radius
998  IMP.core.XYZR(b).set_radius(new_radius)
999  print(b.get_name())
1000  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
1001 
1002 
1003  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1004  #density generation for the EM restraint
1005  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1006  #get the number of residues from the pdb bits
1007  res_ind=[]
1008  for pb in pdbbits+helixbits:
1009  for p in IMP.core.get_leaves(pb):
1011 
1012  number_of_residues=len(set(res_ind))
1013  outhier=[]
1014  if read:
1015  if len(pdbbits)!=0:
1016  outhier+=simo.add_component_density(compname,
1017  pdbbits,
1018  num_components=num_components,
1019  resolution=0,
1020  inputfile=txtfilename)
1021  if len(helixbits)!=0:
1022  outhier+=simo.add_component_density(compname,
1023  helixbits,
1024  num_components=num_components,
1025  resolution=1,
1026  inputfile=txtfilename)
1027 
1028 
1029  else:
1030  if len(pdbbits)!=0:
1031  num_components=number_of_residues//abs(num_components)+1
1032  outhier+=simo.add_component_density(compname,
1033  pdbbits,
1034  num_components=num_components,
1035  resolution=0,
1036  outputfile=txtfilename,
1037  outputmap=mrcfilename,
1038  multiply_by_total_mass=True)
1039 
1040  if len(helixbits)!=0:
1041  num_components=number_of_residues//abs(num_components)+1
1042  outhier+=simo.add_component_density(compname,
1043  helixbits,
1044  num_components=num_components,
1045  resolution=1,
1046  outputfile=txtfilename,
1047  outputmap=mrcfilename,
1048  multiply_by_total_mass=True)
1049 
1050  return outhier,beadbits
1051 
1052  def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
1053  beadsize=5,color=0.0,offset=0):
1054  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :
1055  if include_res0:
1056  outhier=simo.autobuild_model(comname,
1057  pdbname=pdbname,
1058  chain=chain,
1059  resrange=resrange,
1060  resolutions=[0,1,10],
1061  offset=offset,
1062  color=color,
1063  missingbeadsize=beadsize)
1064  else:
1065  outhier=simo.autobuild_model(comname,
1066  pdbname=pdbname,
1067  chain=chain,
1068  resrange=resrange,
1069  resolutions=[1,10],
1070  offset=offset,
1071  color=color,
1072  missingbeadsize=beadsize)
1073 
1074 
1075  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :
1076  outhier=simo.add_component_ideal_helix(comname,
1077  resolutions=[1,10],
1078  resrange=resrange,
1079  color=color,
1080  show=False)
1081 
1082  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :
1083  outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
1084 
1085  else:
1086 
1087  seq_len=len(simo.sequence_dict[comname])
1088  outhier=simo.add_component_necklace(comname,
1089  begin=1,
1090  end=seq_len,
1091  length=beadsize)
1092 
1093  return outhier
1094 
1095 
1096 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
1097 class BuildModel1(object):
1098  """Deprecated building macro - use BuildSystem()"""
1099 
1100  def __init__(self, representation):
1101  """Constructor.
1102  @param representation The PMI representation
1103  """
1104  self.simo=representation
1105  self.gmm_models_directory="."
1106  self.rmf_file={}
1107  self.rmf_frame_number={}
1108  self.rmf_names_map={}
1109  self.rmf_component_name={}
1110 
1111  def set_gmm_models_directory(self,directory_name):
1112  self.gmm_models_directory=directory_name
1113 
1114  def build_model(self,data_structure,sequence_connectivity_scale=4.0,
1115  sequence_connectivity_resolution=10,
1116  rmf_file=None,rmf_frame_number=0,rmf_file_map=None,
1117  skip_connectivity_these_domains=None,
1118  skip_gaussian_in_rmf=False, skip_gaussian_in_representation=False):
1119  """Create model.
1120  @param data_structure List of lists containing these entries:
1121  comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
1122  res_range, read_em_files, bead_size, rb, super_rb,
1123  em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
1124  keep_gaussian_flexible_beads (optional)
1125  @param sequence_connectivity_scale
1126  @param rmf_file
1127  @param rmf_frame_number
1128  @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
1129  rmf_frame_number,
1130  rmf_component_name)
1131  """
1132  self.domain_dict={}
1133  self.resdensities={}
1134  super_rigid_bodies={}
1135  chain_super_rigid_bodies={}
1136  rigid_bodies={}
1137 
1138  for d in data_structure:
1139  comp_name = d[0]
1140  hier_name = d[1]
1141  color = d[2]
1142  fasta_file = d[3]
1143  fasta_id = d[4]
1144  pdb_name = d[5]
1145  chain_id = d[6]
1146  res_range = d[7][0:2]
1147  try:
1148  offset = d[7][2]
1149  except:
1150  offset = 0
1151  read_em_files = d[8]
1152  bead_size = d[9]
1153  rb = d[10]
1154  super_rb = d[11]
1155  em_num_components = d[12]
1156  em_txt_file_name = d[13]
1157  em_mrc_file_name = d[14]
1158  chain_of_super_rb = d[15]
1159  try:
1160  keep_gaussian_flexible_beads = d[16]
1161  except:
1162  keep_gaussian_flexible_beads = True
1163 
1164  if comp_name not in self.simo.get_component_names():
1165  self.simo.create_component(comp_name,color=0.0)
1166  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1167  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1168 
1169 
1170  if not read_em_files is None:
1171  if em_txt_file_name is " ": em_txt_file_name=self.gmm_models_directory+"/"+hier_name+".txt"
1172  if em_mrc_file_name is " ": em_mrc_file_name=self.gmm_models_directory+"/"+hier_name+".mrc"
1173 
1174 
1175  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1176 
1177  if (keep_gaussian_flexible_beads):
1178  self.simo.add_all_atom_densities(comp_name, particles=beads)
1179  dens_hier+=beads
1180  else:
1181  dens_hier=[]
1182 
1183  self.resdensities[hier_name]=dens_hier
1184  self.domain_dict[hier_name]=outhier+dens_hier
1185 
1186  if rb is not None:
1187  if rb not in rigid_bodies:
1188  rigid_bodies[rb]=[h for h in self.domain_dict[hier_name]]
1189  else:
1190  rigid_bodies[rb]+=[h for h in self.domain_dict[hier_name]]
1191 
1192 
1193  if super_rb is not None:
1194  for k in super_rb:
1195  if k not in super_rigid_bodies:
1196  super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1197  else:
1198  super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1199 
1200  if chain_of_super_rb is not None:
1201  for k in chain_of_super_rb:
1202  if k not in chain_super_rigid_bodies:
1203  chain_super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1204  else:
1205  chain_super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1206 
1207 
1208 
1209  self.rigid_bodies=rigid_bodies
1210 
1211  for c in self.simo.get_component_names():
1212  if rmf_file is not None:
1213  rf=rmf_file
1214  rfn=rmf_frame_number
1215  self.simo.set_coordinates_from_rmf(c, rf,rfn,
1216  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1217  elif rmf_file_map:
1218  for k in rmf_file_map:
1219  cname=k
1220  rf=rmf_file_map[k][0]
1221  rfn=rmf_file_map[k][1]
1222  rcname=rmf_file_map[k][2]
1223  self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1224  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1225  else:
1226  if c in self.rmf_file:
1227  rf=self.rmf_file[c]
1228  rfn=self.rmf_frame_number[c]
1229  rfm=self.rmf_names_map[c]
1230  rcname=self.rmf_component_name[c]
1231  self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1232  rmf_component_name=rcname,
1233  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1234  if (not skip_connectivity_these_domains) or (c not in skip_connectivity_these_domains):
1235  self.simo.setup_component_sequence_connectivity(c,
1236  resolution=sequence_connectivity_resolution,
1237  scale=sequence_connectivity_scale)
1238  self.simo.setup_component_geometry(c)
1239 
1240  for rb in rigid_bodies:
1241  self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1242 
1243  for k in super_rigid_bodies:
1244  self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1245 
1246  for k in chain_super_rigid_bodies:
1247  self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1248 
1249  self.simo.set_floppy_bodies()
1250  self.simo.setup_bonds()
1251 
1252  def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1253  hiers=self.domain_dict[hier_name]
1254  for length in lengths:
1255  for n in range(len(hiers)-length):
1256  hs=hiers[n+1:n+length-1]
1257  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1258  for n in range(1,len(hiers)-1,5):
1259  hs=hiers[n+1:]
1260  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1261  hs=hiers[:n-1]
1262  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1263 
1264 
1265  def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1266  self.rmf_file[component_name]=rmf_file
1267  self.rmf_frame_number[component_name]=rmf_frame_number
1268  self.rmf_names_map[component_name]=rmf_names_map
1269  self.rmf_component_name[component_name]=rmf_component_name
1270 
1271  def get_density_hierarchies(self,hier_name_list):
1272  # return a list of density hierarchies
1273  # specify the list of hierarchy names
1274  dens_hier_list=[]
1275  for hn in hier_name_list:
1276  print(hn)
1277  dens_hier_list+=self.resdensities[hn]
1278  return dens_hier_list
1279 
1280  def get_pdb_bead_bits(self,hierarchy):
1281  pdbbits=[]
1282  beadbits=[]
1283  helixbits=[]
1284  for h in hierarchy:
1285  if "_pdb" in h.get_name():pdbbits.append(h)
1286  if "_bead" in h.get_name():beadbits.append(h)
1287  if "_helix" in h.get_name():helixbits.append(h)
1288  return (pdbbits,beadbits,helixbits)
1289 
1290  def scale_bead_radii(self,nresidues,scale):
1291  scaled_beads=set()
1292  for h in self.domain_dict:
1293  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1294  slope=(1.0-scale)/(1.0-float(nresidues))
1295 
1296  for b in beadbits:
1297  # I have to do the following
1298  # because otherwise we'll scale more than once
1299  if b not in scaled_beads:
1300  scaled_beads.add(b)
1301  else:
1302  continue
1303  radius=IMP.core.XYZR(b).get_radius()
1304  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
1305  scale_factor=slope*float(num_residues)+1.0
1306  print(scale_factor)
1307  new_radius=scale_factor*radius
1308  IMP.core.XYZR(b).set_radius(new_radius)
1309  print(b.get_name())
1310  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
1311 
1312 
1313  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1314  #density generation for the EM restraint
1315  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1316 
1317  #get the number of residues from the pdb bits
1318  res_ind=[]
1319  for pb in pdbbits+helixbits:
1320  for p in IMP.core.get_leaves(pb):
1322 
1323  number_of_residues=len(set(res_ind))
1324  outhier=[]
1325  if read:
1326  if len(pdbbits)!=0:
1327  outhier+=simo.add_component_density(compname,
1328  pdbbits,
1329  num_components=num_components, # number of gaussian into which the simulated density is approximated
1330  resolution=0, # resolution that you want to calculate the simulated density
1331  inputfile=txtfilename) # read what it was calculated before
1332  if len(helixbits)!=0:
1333  outhier+=simo.add_component_density(compname,
1334  helixbits,
1335  num_components=num_components, # number of gaussian into which the simulated density is approximated
1336  resolution=1, # resolution that you want to calculate the simulated density
1337  inputfile=txtfilename) # read what it was calculated before
1338 
1339 
1340  else:
1341  if len(pdbbits)!=0:
1342  if num_components<0:
1343  #if negative calculate the number of gmm components automatically
1344  # from the number of residues
1345  num_components=number_of_residues/abs(num_components)
1346  outhier+=simo.add_component_density(compname,
1347  pdbbits,
1348  num_components=num_components, # number of gaussian into which the simulated density is approximated
1349  resolution=0, # resolution that you want to calculate the simulated density
1350  outputfile=txtfilename, # do the calculation
1351  outputmap=mrcfilename,
1352  multiply_by_total_mass=True) # do the calculation and output the mrc
1353 
1354  if len(helixbits)!=0:
1355  if num_components<0:
1356  #if negative calculate the number of gmm components automatically
1357  # from the number of residues
1358  num_components=number_of_residues/abs(num_components)
1359  outhier+=simo.add_component_density(compname,
1360  helixbits,
1361  num_components=num_components, # number of gaussian into which the simulated density is approximated
1362  resolution=1, # resolution that you want to calculate the simulated density
1363  outputfile=txtfilename, # do the calculation
1364  outputmap=mrcfilename,
1365  multiply_by_total_mass=True) # do the calculation and output the mrc
1366 
1367  return outhier,beadbits
1368 
1369  def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1370 
1371  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1372  if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1373  if read==False:
1374  outhier=simo.autobuild_model(comname,
1375  pdbname=pdbname,
1376  chain=chain,
1377  resrange=resrange,
1378  resolutions=[0,1,10],
1379  offset=offset,
1380  color=color,
1381  missingbeadsize=beadsize)
1382  else:
1383  outhier=simo.autobuild_model(comname,
1384  pdbname=pdbname,
1385  chain=chain,
1386  resrange=resrange,
1387  resolutions=[1,10],
1388  offset=offset,
1389  color=color,
1390  missingbeadsize=beadsize)
1391 
1392  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1393  outhier=simo.add_component_ideal_helix(comname,
1394  resolutions=[1,10],
1395  resrange=resrange,
1396  color=color,
1397  show=False)
1398 
1399  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" and pdbname is not "DENSITY" :
1400  seq_len=resrange[1]
1401  if resrange[1]==-1:
1402  seq_len=len(simo.sequence_dict[comname])
1403  outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1404 
1405  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is "DENSITY" :
1406  outhier=[]
1407 
1408  else:
1409 
1410  seq_len=len(simo.sequence_dict[comname])
1411  outhier=simo.add_component_necklace(comname,
1412  begin=1,
1413  end=seq_len,
1414  length=beadsize)
1415 
1416  return outhier
1417 
1418  def set_coordinates(self,hier_name,xyz_tuple):
1419  hier=self.domain_dict[hier_name]
1420  for h in IMP.atom.get_leaves(hier):
1421  p=h.get_particle()
1423  pass
1424  else:
1425  IMP.core.XYZ(p).set_coordinates(xyz_tuple)
1426 
1427  def save_rmf(self,rmfname):
1428 
1430  self.simo.model.update()
1431  o.init_rmf(rmfname,[self.simo.prot])
1432  o.write_rmf(rmfname)
1433  o.close_rmf(rmfname)
1434 # -----------------------------------------------------------------------
1435 
1436 
1437 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
1438 class AnalysisReplicaExchange0(object):
1439  """A macro for running all the basic operations of analysis.
1440  Includes clustering, precision analysis, and making ensemble density maps.
1441  A number of plots are also supported.
1442  """
1443  def __init__(self, model,
1444  merge_directories=["./"],
1445  stat_file_name_suffix="stat",
1446  best_pdb_name_suffix="model",
1447  do_clean_first=True,
1448  do_create_directories=True,
1449  global_output_directory="output/",
1450  replica_stat_file_suffix="stat_replica",
1451  global_analysis_result_directory="./analysis/",
1452  test_mode=False):
1453  """Constructor.
1454  @param model The IMP model
1455  @param stat_file_name_suffix
1456  @param merge_directories The directories containing output files
1457  @param best_pdb_name_suffix
1458  @param do_clean_first
1459  @param do_create_directories
1460  @param global_output_directory Where everything is
1461  @param replica_stat_file_suffix
1462  @param global_analysis_result_directory
1463  @param test_mode If True, nothing is changed on disk
1464  """
1465 
1466  try:
1467  from mpi4py import MPI
1468  self.comm = MPI.COMM_WORLD
1469  self.rank = self.comm.Get_rank()
1470  self.number_of_processes = self.comm.size
1471  except ImportError:
1472  self.rank = 0
1473  self.number_of_processes = 1
1474 
1475  self.test_mode = test_mode
1476  self._protocol_output = []
1477  self.cluster_obj = None
1478  self.model = model
1479  stat_dir = global_output_directory
1480  self.stat_files = []
1481  # it contains the position of the root directories
1482  for rd in merge_directories:
1483  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
1484  if len(stat_files)==0:
1485  print("WARNING: no stat files found in",os.path.join(rd,stat_dir))
1486  self.stat_files += stat_files
1487 
1488  def add_protocol_output(self, p):
1489  """Capture details of the modeling protocol.
1490  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1491  """
1492  # Assume last state is the one we're interested in
1493  self._protocol_output.append((p, p._last_state))
1494 
1495  def get_modeling_trajectory(self,
1496  score_key="SimplifiedModel_Total_Score_None",
1497  rmf_file_key="rmf_file",
1498  rmf_file_frame_key="rmf_frame_index",
1499  outputdir="./",
1500  get_every=1,
1501  nframes_trajectory=10000):
1502  """ Get a trajectory of the modeling run, for generating demonstrative movies
1503  @param score_key The score for ranking models
1504  @param rmf_file_key Key pointing to RMF filename
1505  @param rmf_file_frame_key Key pointing to RMF frame number
1506  @param outputdir The local output directory used in the run
1507  @param get_every Extract every nth frame
1508  @param nframes_trajectory Total number of frames of the trajectory
1509  """
1510  from operator import itemgetter
1511  import math
1512 
1513  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
1514  score_key,
1515  rmf_file_key,
1516  rmf_file_frame_key,
1517  get_every)
1518  rmf_file_list=trajectory_models[0]
1519  rmf_file_frame_list=trajectory_models[1]
1520  score_list=list(map(float, trajectory_models[2]))
1521 
1522  max_score=max(score_list)
1523  min_score=min(score_list)
1524 
1525 
1526  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
1527  binned_scores=[None]*nframes_trajectory
1528  binned_model_indexes=[-1]*nframes_trajectory
1529 
1530  for model_index,s in enumerate(score_list):
1531  bins_score_diffs=[abs(s-b) for b in bins]
1532  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1533  if binned_scores[bin_index]==None:
1534  binned_scores[bin_index]=s
1535  binned_model_indexes[bin_index]=model_index
1536  else:
1537  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1538  new_diff=abs(s-bins[bin_index])
1539  if new_diff < old_diff:
1540  binned_scores[bin_index]=s
1541  binned_model_indexes[bin_index]=model_index
1542 
1543  print(binned_scores)
1544  print(binned_model_indexes)
1545 
1546 
1547  def _expand_ambiguity(self,prot,d):
1548  """If using PMI2, expand the dictionary to include copies as ambiguous options
1549  This also keeps the states separate.
1550  """
1551  newdict = {}
1552  for key in d:
1553  val = d[key]
1554  if '..' in key or (type(val) is tuple and len(val)>=3):
1555  newdict[key] = val
1556  continue
1557  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1558  if type(val) is tuple:
1559  start = val[0]
1560  stop = val[1]
1561  name = val[2]
1562  else:
1563  start = 1
1564  stop = -1
1565  name = val
1566  for nst in range(len(states)):
1567  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
1568  copies = sel.get_selected_particles(with_representation=False)
1569  if len(copies)>1:
1570  for nc in range(len(copies)):
1571  if len(states)>1:
1572  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1573  else:
1574  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1575  else:
1576  newdict[key] = val
1577  return newdict
1578 
1579 
1580  def clustering(self,
1581  score_key="SimplifiedModel_Total_Score_None",
1582  rmf_file_key="rmf_file",
1583  rmf_file_frame_key="rmf_frame_index",
1584  state_number=0,
1585  prefiltervalue=None,
1586  feature_keys=[],
1587  outputdir="./",
1588  alignment_components=None,
1589  number_of_best_scoring_models=10,
1590  rmsd_calculation_components=None,
1591  distance_matrix_file='distances.mat',
1592  load_distance_matrix_file=False,
1593  skip_clustering=False,
1594  number_of_clusters=1,
1595  display_plot=False,
1596  exit_after_display=True,
1597  get_every=1,
1598  first_and_last_frames=None,
1599  density_custom_ranges=None,
1600  write_pdb_with_centered_coordinates=False,
1601  voxel_size=5.0):
1602  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1603  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1604  Can pass None for copy or state to ignore that field.
1605  If you don't pass a specific copy number
1606  @param score_key The score for ranking models. Use "Total_Score"
1607  instead of default for PMI2.
1608  @param rmf_file_key Key pointing to RMF filename
1609  @param rmf_file_frame_key Key pointing to RMF frame number
1610  @param state_number State number to analyze
1611  @param prefiltervalue Only include frames where the
1612  score key is below this value
1613  @param feature_keys Keywords for which you want to
1614  calculate average, medians, etc.
1615  If you pass "Keyname" it'll include everything that matches "*Keyname*"
1616  @param outputdir The local output directory used in the run
1617  @param alignment_components Dictionary with keys=groupname, values are tuples
1618  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1619  @param number_of_best_scoring_models Num models to keep per run
1620  @param rmsd_calculation_components For calculating RMSD
1621  (same format as alignment_components)
1622  @param distance_matrix_file Where to store/read the distance matrix
1623  @param load_distance_matrix_file Try to load the distance matrix file
1624  @param skip_clustering Just extract the best scoring models
1625  and save the pdbs
1626  @param number_of_clusters Number of k-means clusters
1627  @param display_plot Display the distance matrix
1628  @param exit_after_display Exit after displaying distance matrix
1629  @param get_every Extract every nth frame
1630  @param first_and_last_frames A tuple with the first and last frames to be
1631  analyzed. Values are percentages!
1632  Default: get all frames
1633  @param density_custom_ranges For density calculation
1634  (same format as alignment_components)
1635  @param write_pdb_with_centered_coordinates
1636  @param voxel_size Used for the density output
1637  """
1638  # Track provenance information to be added to each output model
1639  prov = []
1640  self._outputdir = os.path.abspath(outputdir)
1641  self._number_of_clusters = number_of_clusters
1642  for p, state in self._protocol_output:
1643  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1644 
1645  if self.test_mode:
1646  return
1647 
1648  if self.rank==0:
1649  try:
1650  os.mkdir(outputdir)
1651  except:
1652  pass
1653 
1654  if not load_distance_matrix_file:
1655  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1656  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1657  self.stat_files,self.number_of_processes)[self.rank]
1658 
1659  # read ahead to check if you need the PMI2 score key instead
1660  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1661  orig_score_key = score_key
1662  if score_key not in po.get_keys():
1663  if 'Total_Score' in po.get_keys():
1664  score_key = 'Total_Score'
1665  print("WARNING: Using 'Total_Score' instead of "
1666  "'SimplifiedModel_Total_Score_None' for the score key")
1667  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1668  if k in feature_keys:
1669  print("WARNING: no need to pass " +k+" to feature_keys.")
1670  feature_keys.remove(k)
1671 
1672  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1673  score_key,
1674  feature_keys,
1675  rmf_file_key,
1676  rmf_file_frame_key,
1677  prefiltervalue,
1678  get_every, provenance=prov)
1679  rmf_file_list=best_models[0]
1680  rmf_file_frame_list=best_models[1]
1681  score_list=best_models[2]
1682  feature_keyword_list_dict=best_models[3]
1683 
1684 # ------------------------------------------------------------------------
1685 # collect all the files and scores
1686 # ------------------------------------------------------------------------
1687 
1688  if self.number_of_processes > 1:
1689  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1690  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1691  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1692  rmf_file_frame_list)
1693  for k in feature_keyword_list_dict:
1694  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1695  feature_keyword_list_dict[k])
1696 
1697  # sort by score and get the best scoring ones
1698  score_rmf_tuples = list(zip(score_list,
1699  rmf_file_list,
1700  rmf_file_frame_list,
1701  list(range(len(score_list)))))
1702 
1703  if density_custom_ranges:
1704  for k in density_custom_ranges:
1705  if type(density_custom_ranges[k]) is not list:
1706  raise Exception("Density custom ranges: values must be lists of tuples")
1707 
1708  # keep subset of frames if requested
1709  if first_and_last_frames is not None:
1710  nframes = len(score_rmf_tuples)
1711  first_frame = int(first_and_last_frames[0] * nframes)
1712  last_frame = int(first_and_last_frames[1] * nframes)
1713  if last_frame > len(score_rmf_tuples):
1714  last_frame = -1
1715  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1716 
1717  # sort RMFs by the score_key in ascending order, and store the rank
1718  best_score_rmf_tuples = sorted(score_rmf_tuples,
1719  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1720  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1721  # Note in the provenance info that we only kept best-scoring models
1722  prov.append(IMP.pmi.io.FilterProvenance("Best scoring",
1723  0, number_of_best_scoring_models))
1724  # sort the feature scores in the same way
1725  best_score_feature_keyword_list_dict = defaultdict(list)
1726  for tpl in best_score_rmf_tuples:
1727  index = tpl[3]
1728  for f in feature_keyword_list_dict:
1729  best_score_feature_keyword_list_dict[f].append(
1730  feature_keyword_list_dict[f][index])
1731  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1732  best_score_rmf_tuples,
1733  self.number_of_processes)[self.rank]
1734 
1735  # expand the dictionaries to include ambiguous copies
1736  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1737  0,
1738  my_best_score_rmf_tuples[0][1])[0]
1739  if IMP.pmi.get_is_canonical(prot_ahead):
1740  if rmsd_calculation_components is not None:
1741  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1742  if tmp!=rmsd_calculation_components:
1743  print('Detected ambiguity, expand rmsd components to',tmp)
1744  rmsd_calculation_components = tmp
1745  if alignment_components is not None:
1746  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1747  if tmp!=alignment_components:
1748  print('Detected ambiguity, expand alignment components to',tmp)
1749  alignment_components = tmp
1750 
1751 #-------------------------------------------------------------
1752 # read the coordinates
1753 # ------------------------------------------------------------
1754  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1755  my_best_score_rmf_tuples[0],
1756  rmsd_calculation_components,
1757  state_number=state_number)
1758  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1759  my_best_score_rmf_tuples,
1760  alignment_components,
1761  rmsd_calculation_components,
1762  state_number=state_number)
1763 
1764  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1765  # NOR particles, because these object cannot be serialized. We need serialization
1766  # for the parallel computation based on mpi.
1767  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1768  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1769  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1770  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1771  all_rmf_file_names=got_coords[4] # RMF file per hit
1772 
1773 # ------------------------------------------------------------------------
1774 # optionally don't compute distance matrix or cluster, just write top files
1775 # ------------------------------------------------------------------------
1776  if skip_clustering:
1777  if density_custom_ranges:
1778  DensModule = IMP.pmi.analysis.GetModelDensity(
1779  density_custom_ranges,
1780  voxel=voxel_size)
1781 
1782  dircluster=os.path.join(outputdir,"all_models."+str(n))
1783  try:
1784  os.mkdir(outputdir)
1785  except:
1786  pass
1787  try:
1788  os.mkdir(dircluster)
1789  except:
1790  pass
1791  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1792  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1793  rmf_name=tpl[1]
1794  rmf_frame_number=tpl[2]
1795  tmp_dict={}
1796  index=tpl[4]
1797  for key in best_score_feature_keyword_list_dict:
1798  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1799 
1800  if cnt==0:
1801  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1802  self.model,
1803  rmf_frame_number,
1804  rmf_name)
1805  else:
1806  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1807  self.model,
1808  prots,
1809  rs,
1810  rmf_frame_number,
1811  rmf_name)
1812  if not linking_successful:
1813  continue
1814 
1815  if not prots:
1816  continue
1817 
1818  if IMP.pmi.get_is_canonical(prots[0]):
1819  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1820  prot = states[state_number]
1821  else:
1822  prot = prots[state_number]
1823 
1824  # get transformation aligning coordinates of requested tuples
1825  # to the first RMF file
1826  if cnt==0:
1827  coords_f1=alignment_coordinates[cnt]
1828  if cnt > 0:
1829  coords_f2=alignment_coordinates[cnt]
1830  if coords_f2:
1831  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1832  transformation = Ali.align()[1]
1833  else:
1835 
1836  rbs = set()
1837  for p in IMP.atom.get_leaves(prot):
1838  if not IMP.core.XYZR.get_is_setup(p):
1840  IMP.core.XYZR(p).set_radius(0.0001)
1841  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1842 
1844  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1845  rbs.add(rb)
1846  else:
1848  transformation)
1849  for rb in rbs:
1850  IMP.core.transform(rb,transformation)
1851 
1853  self.model.update()
1854  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1855  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1856  o.init_pdb(out_pdb_fn,prot)
1857  o.write_pdb(out_pdb_fn,
1858  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1859 
1860  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1861  tmp_dict["rmf_file_full_path"]=rmf_name
1862  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1863  tmp_dict["local_rmf_frame_number"]=0
1864 
1865  clusstat.write(str(tmp_dict)+"\n")
1866 
1867  if IMP.pmi.get_is_canonical(prot):
1868  # create a single-state System and write that
1870  h.set_name("System")
1871  h.add_child(prot)
1872  o.init_rmf(out_rmf_fn, [h], rs)
1873  else:
1874  o.init_rmf(out_rmf_fn, [prot],rs)
1875 
1876  o.write_rmf(out_rmf_fn)
1877  o.close_rmf(out_rmf_fn)
1878  # add the density
1879  if density_custom_ranges:
1880  DensModule.add_subunits_density(prot)
1881 
1882  if density_custom_ranges:
1883  DensModule.write_mrc(path=dircluster)
1884  del DensModule
1885  return
1886 
1887 
1888 
1889  # broadcast the coordinates
1890  if self.number_of_processes > 1:
1891  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1892  all_coordinates)
1893  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1894  all_rmf_file_names)
1895  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1896  rmf_file_name_index_dict)
1897  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1898  alignment_coordinates)
1899  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1900  rmsd_coordinates)
1901 
1902  if self.rank == 0:
1903  # save needed informations in external files
1904  self.save_objects(
1905  [best_score_feature_keyword_list_dict,
1906  rmf_file_name_index_dict],
1907  ".macro.pkl")
1908 
1909 # ------------------------------------------------------------------------
1910 # Calculate distance matrix and cluster
1911 # ------------------------------------------------------------------------
1912  print("setup clustering class")
1913  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1914 
1915  for n, model_coordinate_dict in enumerate(all_coordinates):
1916  template_coordinate_dict = {}
1917  # let's try to align
1918  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1919  # set the first model as template coordinates
1920  self.cluster_obj.set_template(alignment_coordinates[n])
1921  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1922  print("Global calculating the distance matrix")
1923 
1924  # calculate distance matrix, all against all
1925  self.cluster_obj.dist_matrix()
1926 
1927  # perform clustering and optionally display
1928  if self.rank == 0:
1929  self.cluster_obj.do_cluster(number_of_clusters)
1930  if display_plot:
1931  if self.rank == 0:
1932  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1933  if exit_after_display:
1934  exit()
1935  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1936 
1937 # ------------------------------------------------------------------------
1938 # Alteratively, load the distance matrix from file and cluster that
1939 # ------------------------------------------------------------------------
1940  else:
1941  if self.rank==0:
1942  print("setup clustering class")
1943  self.cluster_obj = IMP.pmi.analysis.Clustering()
1944  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1945  print("clustering with %s clusters" % str(number_of_clusters))
1946  self.cluster_obj.do_cluster(number_of_clusters)
1947  [best_score_feature_keyword_list_dict,
1948  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1949  if display_plot:
1950  if self.rank == 0:
1951  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1952  if exit_after_display:
1953  exit()
1954  if self.number_of_processes > 1:
1955  self.comm.Barrier()
1956 
1957 # ------------------------------------------------------------------------
1958 # now save all informations about the clusters
1959 # ------------------------------------------------------------------------
1960 
1961  if self.rank == 0:
1962  print(self.cluster_obj.get_cluster_labels())
1963  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1964  print("rank %s " % str(self.rank))
1965  print("cluster %s " % str(n))
1966  print("cluster label %s " % str(cl))
1967  print(self.cluster_obj.get_cluster_label_names(cl))
1968  cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1969  cluster_prov = prov + \
1970  [IMP.pmi.io.ClusterProvenance(cluster_size)]
1971 
1972  # first initialize the Density class if requested
1973  if density_custom_ranges:
1974  DensModule = IMP.pmi.analysis.GetModelDensity(
1975  density_custom_ranges,
1976  voxel=voxel_size)
1977 
1978  dircluster = outputdir + "/cluster." + str(n) + "/"
1979  try:
1980  os.mkdir(dircluster)
1981  except:
1982  pass
1983 
1984  rmsd_dict = {"AVERAGE_RMSD":
1985  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1986  clusstat = open(dircluster + "stat.out", "w")
1987  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1988  # extract the features
1989  tmp_dict = {}
1990  tmp_dict.update(rmsd_dict)
1991  index = rmf_file_name_index_dict[structure_name]
1992  for key in best_score_feature_keyword_list_dict:
1993  tmp_dict[
1994  key] = best_score_feature_keyword_list_dict[
1995  key][
1996  index]
1997 
1998  # get the rmf name and the frame number from the list of
1999  # frame names
2000  rmf_name = structure_name.split("|")[0]
2001  rmf_frame_number = int(structure_name.split("|")[1])
2002  clusstat.write(str(tmp_dict) + "\n")
2003 
2004  # extract frame (open or link to existing)
2005  if k==0:
2006  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
2007  self.model,
2008  rmf_frame_number,
2009  rmf_name)
2010  else:
2011  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
2012  self.model,
2013  prots,
2014  rs,
2015  rmf_frame_number,
2016  rmf_name)
2017  if not linking_successful:
2018  continue
2019  if not prots:
2020  continue
2021 
2022  if IMP.pmi.get_is_canonical(prots[0]):
2023  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
2024  prot = states[state_number]
2025  else:
2026  prot = prots[state_number]
2027  if k==0:
2028  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
2029 
2030  # transform clusters onto first
2031  if k > 0:
2032  model_index = self.cluster_obj.get_model_index_from_name(
2033  structure_name)
2034  transformation = self.cluster_obj.get_transformation_to_first_member(
2035  cl,
2036  model_index)
2037  rbs = set()
2038  for p in IMP.atom.get_leaves(prot):
2039  if not IMP.core.XYZR.get_is_setup(p):
2041  IMP.core.XYZR(p).set_radius(0.0001)
2042  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
2043 
2045  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
2046  rbs.add(rb)
2047  else:
2049  transformation)
2050  for rb in rbs:
2051  IMP.core.transform(rb,transformation)
2052 
2053  # add the density
2054  if density_custom_ranges:
2055  DensModule.add_subunits_density(prot)
2056 
2057  # pdb writing should be optimized!
2058  o = IMP.pmi.output.Output()
2059  self.model.update()
2060  o.init_pdb(dircluster + str(k) + ".pdb", prot)
2061  o.write_pdb(dircluster + str(k) + ".pdb")
2062 
2063  if IMP.pmi.get_is_canonical(prot):
2064  # create a single-state System and write that
2066  h.set_name("System")
2067  h.add_child(prot)
2068  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
2069  else:
2070  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
2071  o.write_rmf(dircluster + str(k) + ".rmf3")
2072  o.close_rmf(dircluster + str(k) + ".rmf3")
2073 
2074  del o
2075  # IMP.atom.destroy(prot)
2076 
2077  if density_custom_ranges:
2078  DensModule.write_mrc(path=dircluster)
2079  del DensModule
2080 
2081  if self.number_of_processes>1:
2082  self.comm.Barrier()
2083 
2084  def get_cluster_rmsd(self,cluster_num):
2085  if self.cluster_obj is None:
2086  raise Exception("Run clustering first")
2087  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2088 
2089  def save_objects(self, objects, file_name):
2090  import pickle
2091  outf = open(file_name, 'wb')
2092  pickle.dump(objects, outf)
2093  outf.close()
2094 
2095  def load_objects(self, file_name):
2096  import pickle
2097  inputf = open(file_name, 'rb')
2098  objects = pickle.load(inputf)
2099  inputf.close()
2100  return objects
2101 
2103 
2104  """
2105  This class contains analysis utilities to investigate ReplicaExchange results.
2106  """
2107 
2108  ########################
2109  # Construction and Setup
2110  ########################
2111 
2112  def __init__(self,model,
2113  stat_files,
2114  best_models=None,
2115  score_key=None,
2116  alignment=True):
2117  """
2118  Construction of the Class.
2119  @param model IMP.Model()
2120  @param stat_files list of string. Can be ascii stat files, rmf files names
2121  @param best_models Integer. Number of best scoring models, if None: all models will be read
2122  @param alignment boolean (Default=True). Align before computing the rmsd.
2123  """
2124 
2125  self.model=model
2126  self.best_models=best_models
2127  self.stath0=IMP.pmi.output.StatHierarchyHandler(model,stat_files,self.best_models,score_key,cache=True)
2128  self.stath1=IMP.pmi.output.StatHierarchyHandler(StatHierarchyHandler=self.stath0)
2129 
2130  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath1))
2131  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath0))
2132  self.sel0_rmsd=IMP.atom.Selection(self.stath0)
2133  self.sel1_rmsd=IMP.atom.Selection(self.stath1)
2134  self.sel0_alignment=IMP.atom.Selection(self.stath0)
2135  self.sel1_alignment=IMP.atom.Selection(self.stath1)
2136  self.clusters=[]
2137  # fill the cluster list with a single cluster containing all models
2138  c = IMP.pmi.output.Cluster(0)
2139  self.clusters.append(c)
2140  for n0 in range(len(self.stath0)):
2141  c.add_member(n0)
2142  self.pairwise_rmsd={}
2143  self.pairwise_molecular_assignment={}
2144  self.alignment=alignment
2145  self.symmetric_molecules={}
2146  self.issymmetricsel={}
2147  self.update_seldicts()
2148  self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath0))
2149  self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath1))
2150 
2151  def set_rmsd_selection(self,**kwargs):
2152  """
2153  Setup the selection onto which the rmsd is computed
2154  @param kwargs use IMP.atom.Selection keywords
2155  """
2156  self.sel0_rmsd=IMP.atom.Selection(self.stath0,**kwargs)
2157  self.sel1_rmsd=IMP.atom.Selection(self.stath1,**kwargs)
2158  self.update_seldicts()
2159 
2160  def set_symmetric(self,molecule_name):
2161  """
2162  Store names of symmetric molecules
2163  """
2164  self.symmetric_molecules[molecule_name]=0
2165  self.update_seldicts()
2166 
2167  def set_alignment_selection(self,**kwargs):
2168  """
2169  Setup the selection onto which the alignment is computed
2170  @param kwargs use IMP.atom.Selection keywords
2171  """
2172  self.sel0_alignment=IMP.atom.Selection(self.stath0,**kwargs)
2173  self.sel1_alignment=IMP.atom.Selection(self.stath1,**kwargs)
2174 
2175  ######################
2176  # Clustering functions
2177  ######################
2178  def clean_clusters(self):
2179  for c in self.clusters: del c
2180  self.clusters=[]
2181 
2182 
2183  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2184  """
2185  Cluster the models based on RMSD.
2186  @param rmsd_cutoff Float the distance cutoff in Angstrom
2187  @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
2188  """
2189  self.clean_clusters()
2190  not_clustered=set(range(len(self.stath1)))
2191  while len(not_clustered)>0:
2192  self.aggregate(not_clustered, rmsd_cutoff, metric)
2193  #self.merge_aggregates(rmsd_cutoff, metric)
2194  self.update_clusters()
2195 
2196  def refine(self,rmsd_cutoff=10):
2197  """
2198  Refine the clusters by merging the ones whose centers are close
2199  @param rmsd_cutoff cutoff distance in Angstorms
2200  """
2201  merge_pairs=[]
2202  clusters_copy=self.clusters
2203  for c0,c1 in itertools.combinations(self.clusters,2):
2204  if c0.center_index is None:
2205  self.compute_cluster_center(c0)
2206  if c1.center_index is None:
2207  self.compute_cluster_center(c1)
2208  d0=self.stath0[c0.center_index]
2209  d1=self.stath1[c1.center_index]
2210  rmsd, molecular_assignment = self.rmsd()
2211  if rmsd <= rmsd_cutoff:
2212  if c1 in self.clusters:
2213  clusters_copy.remove(c1)
2214  c0+=c1
2215  self.clusters=clusters_copy
2216  self.update_clusters()
2217 
2218  ####################
2219  # Input Output
2220  ####################
2221 
2222  def set_cluster_assignments(self, cluster_ids):
2223  if len(cluster_ids)!=len(self.stath0):
2224  raise ValueError('cluster ids has to be same length as number of frames')
2225 
2226  self.clusters=[]
2227  for i in sorted(list(set(cluster_ids))):
2228  self.clusters.append(IMP.pmi.output.Cluster(i))
2229  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
2230  self.clusters[idx].add_member(i,d)
2231 
2232  def get_cluster_data(self, cluster):
2233  """
2234  Return the model data from a cluster
2235  @param cluster IMP.pmi.output.Cluster object
2236  """
2237  data=[]
2238  for m in cluster:
2239  data.append(m)
2240  return data
2241 
2242  def save_data(self,filename='data.pkl'):
2243  """
2244  Save the data for the whole models into a pickle file
2245  @param filename string
2246  """
2247  self.stath0.save_data(filename)
2248 
2249  def set_data(self,data):
2250  """
2251  Set the data from an external IMP.pmi.output.Data
2252  @param data IMP.pmi.output.Data
2253  """
2254  self.stath0.data=data
2255  self.stath1.data=data
2256 
2257  def load_data(self,filename='data.pkl'):
2258  """
2259  Load the data from an external pickled file
2260  @param filename string
2261  """
2262  self.stath0.load_data(filename)
2263  self.stath1.load_data(filename)
2264  self.best_models=len(self.stath0)
2265 
2266  def add_cluster(self,rmf_name_list):
2267  c = IMP.pmi.output.Cluster(len(self.clusters))
2268  print("creating cluster index "+str(len(self.clusters)))
2269  self.clusters.append(c)
2270  current_len=len(self.stath0)
2271 
2272  for rmf in rmf_name_list:
2273  print("adding rmf "+rmf)
2274  self.stath0.add_stat_file(rmf)
2275  self.stath1.add_stat_file(rmf)
2276 
2277  for n0 in range(current_len,len(self.stath0)):
2278  d0=self.stath0[n0]
2279  c.add_member(n0,d0)
2280  self.update_clusters()
2281 
2282  def save_clusters(self,filename='clusters.pkl'):
2283  """
2284  Save the clusters into a pickle file
2285  @param filename string
2286  """
2287  try:
2288  import cPickle as pickle
2289  except ImportError:
2290  import pickle
2291  fl=open(filename,'wb')
2292  pickle.dump(self.clusters,fl)
2293 
2294  def load_clusters(self,filename='clusters.pkl',append=False):
2295  """
2296  Load the clusters from a pickle file
2297  @param filename string
2298  @param append bool (Default=False), if True. append the clusters to the ones currently present
2299  """
2300  try:
2301  import cPickle as pickle
2302  except ImportError:
2303  import pickle
2304  fl=open(filename,'rb')
2305  self.clean_clusters()
2306  if append:
2307  self.clusters+=pickle.load(fl)
2308  else:
2309  self.clusters=pickle.load(fl)
2310  self.update_clusters()
2311 
2312  ####################
2313  # Analysis Functions
2314  ####################
2315 
2316  def compute_cluster_center(self,cluster):
2317  """
2318  Compute the cluster center for a given cluster
2319  """
2320  member_distance=defaultdict(float)
2321 
2322  for n0,n1 in itertools.combinations(cluster.members,2):
2323  d0=self.stath0[n0]
2324  d1=self.stath1[n1]
2325  rmsd, _ = self.rmsd()
2326  member_distance[n0]+=rmsd
2327 
2328  if len(member_distance)>0:
2329  cluster.center_index=min(member_distance, key=member_distance.get)
2330  else:
2331  cluster.center_index=cluster.members[0]
2332 
2333  def save_coordinates(self,cluster,rmf_name=None,reference="Absolute", prefix="./"):
2334  """
2335  Save the coordinates of the current cluster a single rmf file
2336  """
2337  print("saving coordinates",cluster)
2338  if self.alignment: self.set_reference(reference,cluster)
2340  if rmf_name is None:
2341  rmf_name=prefix+'/'+str(cluster.cluster_id)+".rmf3"
2342 
2343  d1=self.stath1[cluster.members[0]]
2344  self.model.update()
2345  o.init_rmf(rmf_name, [self.stath1])
2346  for n1 in cluster.members:
2347  d1=self.stath1[n1]
2348  self.model.update()
2350  if self.alignment: self.align()
2351  o.write_rmf(rmf_name)
2353  o.close_rmf(rmf_name)
2354 
2355  def prune_redundant_structures(self,rmsd_cutoff=10):
2356  """
2357  remove structures that are similar
2358  append it to a new cluster
2359  """
2360  print("pruning models")
2361  selected=0
2362  filtered=[selected]
2363  remaining=range(1,len(self.stath1),10)
2364 
2365  while len(remaining)>0:
2366  d0=self.stath0[selected]
2367  rm=[]
2368  for n1 in remaining:
2369  d1=self.stath1[n1]
2370  if self.alignment: self.align()
2371  d, _ = self.rmsd()
2372  if d<=rmsd_cutoff:
2373  rm.append(n1)
2374  print("pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
2375  remaining=[x for x in remaining if x not in rm]
2376  if len(remaining)==0: break
2377  selected=remaining[0]
2378  filtered.append(selected)
2379  remaining.pop(0)
2380  c = IMP.pmi.output.Cluster(len(self.clusters))
2381  self.clusters.append(c)
2382  for n0 in filtered:
2383  d0=self.stath0[n0]
2384  c.add_member(n0,d0)
2385  self.update_clusters()
2386 
2387 
2388 
2389  def precision(self,cluster):
2390  """
2391  Compute the precision of a cluster
2392  """
2393  npairs=0
2394  rmsd=0.0
2395  precision=None
2396 
2397  if not cluster.center_index is None:
2398  members1=[cluster.center_index]
2399  else:
2400  members1=cluster.members
2401 
2402  for n0 in members1:
2403  d0=self.stath0[n0]
2404  for n1 in cluster.members:
2405  if n0!=n1:
2406  npairs+=1
2407  d1=self.stath1[n1]
2409  tmp_rmsd, _ = self.rmsd()
2410  rmsd+=tmp_rmsd
2412 
2413  if npairs>0:
2414  precision=rmsd/npairs
2415  cluster.precision=precision
2416  return precision
2417 
2418  def bipartite_precision(self,cluster1,cluster2,verbose=False):
2419  """
2420  Compute the bipartite precision (ie the cross-precision)
2421  between two clusters
2422  """
2423  npairs=0
2424  rmsd=0.0
2425  for cn0,n0 in enumerate(cluster1.members):
2426  d0=self.stath0[n0]
2427  for cn1,n1 in enumerate(cluster2.members):
2428  d1=self.stath1[n1]
2429  tmp_rmsd, _ =self.rmsd()
2430  if verbose: print("--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
2431  rmsd+=tmp_rmsd
2432  npairs+=1
2433  precision=rmsd/npairs
2434  return precision
2435 
2436  def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
2437  """
2438  Compute the Root mean square fluctuations
2439  of a molecule in a cluster
2440  Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
2441  """
2442  rmsf=IMP.pmi.tools.OrderedDict()
2443 
2444  #assumes that residue indexes are identical for stath0 and stath1
2445  if cluster_ref is not None:
2446  if not cluster_ref.center_index is None:
2447  members0 = [cluster_ref.center_index]
2448  else:
2449  members0 = cluster_ref.members
2450  else:
2451  if not cluster.center_index is None:
2452  members0 = [cluster.center_index]
2453  else:
2454  members0 = cluster.members
2455 
2456  s0=IMP.atom.Selection(self.stath0,molecule=molecule,resolution=1,
2457  copy_index=copy_index,state_index=state_index)
2458  ps0=s0.get_selected_particles()
2459  #get the residue indexes
2460  residue_indexes=list(IMP.pmi.tools.OrderedSet([IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
2461 
2462  #get the corresponding particles
2463  #s0=IMP.atom.Selection(stat_ref,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2464  # copy_index=copy_index,state_index=state_index)
2465  #ps0 = s0.get_selected_particles()
2466 
2467 
2468 
2469  npairs=0
2470  for n0 in members0:
2471  d0=self.stath0[n0]
2472  for n1 in cluster.members[::step]:
2473  if n0!=n1:
2474  print("--- rmsf %s %s"%(str(n0),str(n1)))
2476 
2477  s1=IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2478  copy_index=copy_index,state_index=state_index)
2479  ps1 = s1.get_selected_particles()
2480 
2481  d1=self.stath1[n1]
2482  if self.alignment: self.align()
2483  for n,(p0,p1) in enumerate(zip(ps0,ps1)):
2484  r=residue_indexes[n]
2485  d0=IMP.core.XYZ(p0)
2486  d1=IMP.core.XYZ(p1)
2487  if r in rmsf:
2488  rmsf[r]+=IMP.core.get_distance(d0,d1)
2489  else:
2490  rmsf[r]=IMP.core.get_distance(d0,d1)
2491  npairs+=1
2493  for r in rmsf:
2494  rmsf[r]/=npairs
2495 
2496  for stath in [self.stath0,self.stath1]:
2497  if molecule not in self.symmetric_molecules:
2498  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
2499  copy_index=copy_index,state_index=state_index)
2500  else:
2501  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
2502  state_index=state_index)
2503 
2504  ps = s.get_selected_particles()
2505  for p in ps:
2507  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2508  else:
2510 
2511  return rmsf
2512 
2513  def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
2514  if self.alignment: self.set_reference(reference,cluster)
2515  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2516  voxel=voxel_size)
2517 
2518  for n1 in cluster.members[::step]:
2519  print("density "+str(n1))
2520  d1=self.stath1[n1]
2522  if self.alignment: self.align()
2523  dens.add_subunits_density(self.stath1)
2525  dens.write_mrc(path=prefix+'/',suffix=str(cluster.cluster_id))
2526  del dens
2527 
2528  def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
2529  if self.alignment: self.set_reference(reference,cluster)
2530  import numpy as np
2531  import matplotlib.pyplot as plt
2532  import matplotlib.cm as cm
2533  from scipy.spatial.distance import cdist
2534  import IMP.pmi.topology
2535  if molecules is None:
2536  mols=[IMP.pmi.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi.tools.get_molecules(IMP.atom.get_leaves(self.stath1))]
2537  else:
2538  mols=[IMP.pmi.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi.tools.get_molecules(IMP.atom.Selection(self.stath1,molecules=molecules).get_selected_particles())]
2539  unique_copies=[mol for mol in mols if mol.get_copy_index() == 0]
2540  mol_names_unique=dict((mol.get_name(),mol) for mol in unique_copies)
2541  total_len_unique=sum(max(mol.get_residue_indexes()) for mol in unique_copies)
2542 
2543 
2544  # coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
2545  index_dict={}
2546  prev_stop=0
2547 
2548  if not consolidate:
2549  for mol in mols:
2550  seqlen=max(mol.get_residue_indexes())
2551  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2552  prev_stop+=seqlen
2553 
2554  else:
2555  for mol in unique_copies:
2556  seqlen=max(mol.get_residue_indexes())
2557  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2558  prev_stop+=seqlen
2559 
2560 
2561  for ncl,n1 in enumerate(cluster.members):
2562  print(ncl)
2563  d1=self.stath1[n1]
2564  #self.apply_molecular_assignments(n1)
2565  coord_dict=IMP.pmi.tools.OrderedDict()
2566  for mol in mols:
2567  mol_name=mol.get_name()
2568  copy_index=mol.get_copy_index()
2569  rindexes = mol.get_residue_indexes()
2570  coords = np.ones((max(rindexes),3))
2571  for rnum in rindexes:
2572  sel = IMP.atom.Selection(mol, residue_index=rnum, resolution=1)
2573  selpart = sel.get_selected_particles()
2574  if len(selpart) == 0: continue
2575  selpart = selpart[0]
2576  coords[rnum - 1, :] = IMP.core.XYZ(selpart).get_coordinates()
2577  coord_dict[mol]=coords
2578 
2579  if not consolidate:
2580  coords=np.concatenate(list(coord_dict.values()))
2581  dists = cdist(coords, coords)
2582  binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2583  else:
2584  binary_dists_dict={}
2585  for mol1 in mols:
2586  len1 = max(mol1.get_residue_indexes())
2587  for mol2 in mols:
2588  name1=mol1.get_name()
2589  name2=mol2.get_name()
2590  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2591  if (name1, name2) not in binary_dists_dict:
2592  binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
2593  binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2594  binary_dists=np.zeros((total_len_unique,total_len_unique))
2595 
2596  for name1,name2 in binary_dists_dict:
2597  r1=index_dict[mol_names_unique[name1]]
2598  r2=index_dict[mol_names_unique[name2]]
2599  binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
2600 
2601  if ncl==0:
2602  dist_maps = [dists]
2603  av_dist_map = dists
2604  contact_freqs = binary_dists
2605  else:
2606  dist_maps.append(dists)
2607  av_dist_map += dists
2608  contact_freqs += binary_dists
2609 
2610  #self.undo_apply_molecular_assignments(n1)
2611 
2612  if log_scale:
2613  contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2614  else:
2615  contact_freqs =1.0/len(cluster)*contact_freqs
2616  av_dist_map=1.0/len(cluster)*contact_freqs
2617 
2618  fig = plt.figure(figsize=(100, 100))
2619  ax = fig.add_subplot(111)
2620  ax.set_xticks([])
2621  ax.set_yticks([])
2622  gap_between_components=50
2623  colormap = cm.Blues
2624  colornorm=None
2625 
2626 
2627  #if cbar_labels is not None:
2628  # if len(cbar_labels)!=4:
2629  # print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
2630  # exit()
2631  # set the list of proteins on the x axis
2632  if not consolidate:
2633  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_extended_name(),mol) for mol in mols])
2634  prot_list=list(zip(*sorted_tuple))[1]
2635  else:
2636  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(),mol) for mol in unique_copies])
2637  prot_list=list(zip(*sorted_tuple))[1]
2638 
2639  prot_listx = prot_list
2640  nresx = gap_between_components + \
2641  sum([max(mol.get_residue_indexes())
2642  + gap_between_components for mol in prot_listx])
2643 
2644  # set the list of proteins on the y axis
2645  prot_listy = prot_list
2646  nresy = gap_between_components + \
2647  sum([max(mol.get_residue_indexes())
2648  + gap_between_components for mol in prot_listy])
2649 
2650  # this is the residue offset for each protein
2651  resoffsetx = {}
2652  resendx = {}
2653  res = gap_between_components
2654  for mol in prot_listx:
2655  resoffsetx[mol] = res
2656  res += max(mol.get_residue_indexes())
2657  resendx[mol] = res
2658  res += gap_between_components
2659 
2660  resoffsety = {}
2661  resendy = {}
2662  res = gap_between_components
2663  for mol in prot_listy:
2664  resoffsety[mol] = res
2665  res += max(mol.get_residue_indexes())
2666  resendy[mol] = res
2667  res += gap_between_components
2668 
2669  resoffsetdiagonal = {}
2670  res = gap_between_components
2671  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2672  resoffsetdiagonal[mol] = res
2673  res += max(mol.get_residue_indexes())
2674  res += gap_between_components
2675 
2676  # plot protein boundaries
2677  xticks = []
2678  xlabels = []
2679  for n, prot in enumerate(prot_listx):
2680  res = resoffsetx[prot]
2681  end = resendx[prot]
2682  for proty in prot_listy:
2683  resy = resoffsety[proty]
2684  endy = resendy[proty]
2685  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
2686  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
2687  xticks.append((float(res) + float(end)) / 2)
2688  xlabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2689 
2690  yticks = []
2691  ylabels = []
2692  for n, prot in enumerate(prot_listy):
2693  res = resoffsety[prot]
2694  end = resendy[prot]
2695  for protx in prot_listx:
2696  resx = resoffsetx[protx]
2697  endx = resendx[protx]
2698  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
2699  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
2700  yticks.append((float(res) + float(end)) / 2)
2701  ylabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2702 
2703  # plot the contact map
2704 
2705  tmp_array = np.zeros((nresx, nresy))
2706  ret={}
2707  for px in prot_listx:
2708  for py in prot_listy:
2709  resx = resoffsetx[px]
2710  lengx = resendx[px] - 1
2711  resy = resoffsety[py]
2712  lengy = resendy[py] - 1
2713  indexes_x = index_dict[px]
2714  minx = min(indexes_x)
2715  maxx = max(indexes_x)
2716  indexes_y = index_dict[py]
2717  miny = min(indexes_y)
2718  maxy = max(indexes_y)
2719  tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2720  ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2721 
2722  cax = ax.imshow(tmp_array,
2723  cmap=colormap,
2724  norm=colornorm,
2725  origin='lower',
2726  alpha=0.6,
2727  interpolation='nearest')
2728 
2729  ax.set_xticks(xticks)
2730  ax.set_xticklabels(xlabels, rotation=90)
2731  ax.set_yticks(yticks)
2732  ax.set_yticklabels(ylabels)
2733  plt.setp(ax.get_xticklabels(), fontsize=6)
2734  plt.setp(ax.get_yticklabels(), fontsize=6)
2735 
2736  # display and write to file
2737  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2738  [i.set_linewidth(2.0) for i in ax.spines.values()]
2739  #if cbar_labels is not None:
2740  # cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
2741  # cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
2742 
2743  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf", dpi=300,transparent="False")
2744  return ret
2745 
2746 
2747  def plot_rmsd_matrix(self,filename):
2748  import numpy as np
2749  self.compute_all_pairwise_rmsd()
2750  distance_matrix = np.zeros(
2751  (len(self.stath0), len(self.stath1)))
2752  for (n0,n1) in self.pairwise_rmsd:
2753  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2754 
2755  import matplotlib as mpl
2756  mpl.use('Agg')
2757  import matplotlib.pylab as pl
2758  from scipy.cluster import hierarchy as hrc
2759 
2760  fig = pl.figure(figsize=(10,8))
2761  ax = fig.add_subplot(212)
2762  dendrogram = hrc.dendrogram(
2763  hrc.linkage(distance_matrix),
2764  color_threshold=7,
2765  no_labels=True)
2766  leaves_order = dendrogram['leaves']
2767  ax.set_xlabel('Model')
2768  ax.set_ylabel('RMSD [Angstroms]')
2769 
2770  ax2 = fig.add_subplot(221)
2771  cax = ax2.imshow(
2772  distance_matrix[leaves_order,
2773  :][:,
2774  leaves_order],
2775  interpolation='nearest')
2776  cb = fig.colorbar(cax)
2777  cb.set_label('RMSD [Angstroms]')
2778  ax2.set_xlabel('Model')
2779  ax2.set_ylabel('Model')
2780 
2781  pl.savefig(filename, dpi=300)
2782  pl.close(fig)
2783 
2784  ####################
2785  # Internal Functions
2786  ####################
2787 
2788  def update_clusters(self):
2789  """
2790  Update the cluster id numbers
2791  """
2792  for n,c in enumerate(self.clusters):
2793  c.cluster_id=n
2794 
2795  def get_molecule(self, hier, name, copy):
2796  s=IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2797  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2798 
2799  def update_seldicts(self):
2800  """
2801  Update the seldicts
2802  """
2803  self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2804  self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2805  for mol in self.seldict0:
2806  for sel in self.seldict0[mol]:
2807  self.issymmetricsel[sel]=False
2808  for mol in self.symmetric_molecules:
2809  self.symmetric_molecules[mol]=len(self.seldict0[mol])
2810  for sel in self.seldict0[mol]:
2811  self.issymmetricsel[sel]=True
2812 
2813 
2814  def align(self):
2815  print("alignment")
2816  tr = IMP.atom.get_transformation_aligning_first_to_second(self.sel1_alignment, self.sel0_alignment)
2817 
2818  for rb in self.rbs1:
2819  IMP.core.transform(rb, tr)
2820 
2821  for bead in self.beads1:
2822  IMP.core.transform(IMP.core.XYZ(bead), tr)
2823 
2824  self.model.update()
2825 
2826  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2827  '''
2828  initial filling of the clusters.
2829  '''
2830  n0 = idxs.pop()
2831  print("clustering model "+str(n0))
2832  d0 = self.stath0[n0]
2833  c = IMP.pmi.output.Cluster(len(self.clusters))
2834  print("creating cluster index "+str(len(self.clusters)))
2835  self.clusters.append(c)
2836  c.add_member(n0,d0)
2837  clustered = set([n0])
2838  for n1 in idxs:
2839  print("--- trying to add model "+str(n1)+" to cluster "+str(len(self.clusters)))
2840  d1 = self.stath1[n1]
2841  if self.alignment: self.align()
2842  rmsd, _ = self.rmsd(metric=metric)
2843  if rmsd<rmsd_cutoff:
2844  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2845  c.add_member(n1,d1)
2846  clustered.add(n1)
2847  else:
2848  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2849  idxs-=clustered
2850 
2851  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2852  """
2853  merge the clusters that have close members
2854  @param rmsd_cutoff cutoff distance in Angstorms
2855  """
2856  # before merging, clusters are spheres of radius rmsd_cutoff centered on the 1st element
2857  # here we only try to merge clusters whose centers are closer than 2*rmsd_cutoff
2858  to_merge = []
2859  print("merging...")
2860  for c0, c1 in filter(lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2861  n0, n1 = [c.members[0] for c in (c0,c1)]
2862  d0 = self.stath0[n0]
2863  d1 = self.stath1[n1]
2864  rmsd, _ = self.rmsd()
2865  if rmsd<2*rmsd_cutoff and self.have_close_members(c0,c1,rmsd_cutoff,metric):
2866  to_merge.append((c0,c1))
2867 
2868  for c0, c in reversed(to_merge):
2869  self.merge(c0,c)
2870 
2871  #keep only full clusters
2872  self.clusters = [c for c in filter(lambda x: len(x.members)>0, self.clusters)]
2873 
2874 
2875  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2876  '''
2877  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2878  '''
2879  print("check close members for clusters "+str(c0.cluster_id)+" and "+str(c1.cluster_id))
2880  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2881  d0 = self.stath0[n0]
2882  d1 = self.stath1[n1]
2883  rmsd, _ = self.rmsd(metric=metric)
2884  if rmsd<rmsd_cutoff:
2885  return True
2886 
2887  return False
2888 
2889 
2890  def merge(self, c0, c1):
2891  '''
2892  merge two clusters
2893  '''
2894  c0+=c1
2895  c1.members=[]
2896  c1.data={}
2897 
2898  def rmsd_helper(self, sels0, sels1, metric):
2899  '''
2900  a function that returns the permutation best_sel of sels0 that minimizes metric
2901  '''
2902  best_rmsd2 = float('inf')
2903  best_sel = None
2904  if self.issymmetricsel[sels0[0]]:
2905  #this cases happens when symmetries were defined
2906  N = len(sels0)
2907  for offset in range(N):
2908  order=[(offset+i)%N for i in range(N)]
2909  sels = [sels0[(offset+i)%N] for i in range(N)]
2910  sel0 = sels[0]
2911  sel1 = sels1[0]
2912  r=metric(sel0, sel1)
2913  rmsd2=r*r*N
2914  ###print(order,rmsd2)
2915  if rmsd2 < best_rmsd2:
2916  best_rmsd2 = rmsd2
2917  best_sel = sels
2918  best_order=order
2919  else:
2920  for sels in itertools.permutations(sels0):
2921  rmsd2=0.0
2922  for sel0, sel1 in itertools.takewhile(lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2923  r=metric(sel0, sel1)
2924  rmsd2+=r*r
2925  if rmsd2 < best_rmsd2:
2926  best_rmsd2 = rmsd2
2927  best_sel = sels
2928  ###for i,sel in enumerate(best_sel):
2929  ### p0 = sel.get_selected_particles()[0]
2930  ### p1 = sels1[i].get_selected_particles()[0]
2931  ### m0 = IMP.pmi.tools.get_molecules([p0])[0]
2932  ### m1 = IMP.pmi.tools.get_molecules([p1])[0]
2933  ### c0 = IMP.atom.Copy(m0).get_copy_index()
2934  ### c1 = IMP.atom.Copy(m1).get_copy_index()
2935  ### name0=m0.get_name()
2936  ### name1=m1.get_name()
2937  ### print("WWW",name0,name1,c0,c1)
2938  ###print(best_order,best_rmsd2,m0,m1)
2939  return best_sel, best_rmsd2
2940 
2941  def compute_all_pairwise_rmsd(self):
2942  for d0 in self.stath0:
2943  for d1 in self.stath1:
2944  rmsd, _ = self.rmsd()
2945 
2946  def rmsd(self,metric=IMP.atom.get_rmsd):
2947  '''
2948  Computes the RMSD. Resolves ambiguous pairs assignments
2949  '''
2950  # here we memoize the rmsd and molecular assignment so that it's not done multiple times
2951  n0=self.stath0.current_index
2952  n1=self.stath1.current_index
2953  if ((n0,n1) in self.pairwise_rmsd) and ((n0,n1) in self.pairwise_molecular_assignment):
2954  return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2955 
2956  if self.alignment:
2957  self.align()
2958  #if it's not yet memoized
2959  total_rmsd=0.0
2960  total_N=0
2961  # this is a dictionary which keys are the molecule names, and values are the list of IMP.atom.Selection for all molecules that share the molecule name
2962  seldict_best_order={}
2963  molecular_assignment={}
2964  for molname, sels0 in self.seldict0.items():
2965  sels_best_order, best_rmsd2 = self.rmsd_helper(sels0, self.seldict1[molname], metric)
2966 
2967  Ncoords = len(sels_best_order[0].get_selected_particles())
2968  Ncopies = len(self.seldict1[molname])
2969  total_rmsd += Ncoords*best_rmsd2
2970  total_N += Ncoords*Ncopies
2971 
2972  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2973  p0 = sel0.get_selected_particles()[0]
2974  p1 = sel1.get_selected_particles()[0]
2975  m0 = IMP.pmi.tools.get_molecules([p0])[0]
2976  m1 = IMP.pmi.tools.get_molecules([p1])[0]
2977  c0 = IMP.atom.Copy(m0).get_copy_index()
2978  c1 = IMP.atom.Copy(m1).get_copy_index()
2979  ###print(molname,c0,c1)
2980  molecular_assignment[(molname,c0)]=(molname,c1)
2981 
2982  total_rmsd = math.sqrt(total_rmsd/total_N)
2983 
2984  self.pairwise_rmsd[(n0,n1)]=total_rmsd
2985  self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2986  self.pairwise_rmsd[(n1,n0)]=total_rmsd
2987  self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2988  ###print(n0,n1,molecular_assignment)
2989  return total_rmsd, molecular_assignment
2990 
2991  def set_reference(self,reference,cluster):
2992  """
2993  Fix the reference structure for structural alignment, rmsd and chain assignemnt
2994  @param reference can be either "Absolute" (cluster center of the first cluster)
2995  or Relative (cluster center of the current cluster)
2996  """
2997  if reference=="Absolute":
2998  d0=self.stath0[0]
2999  elif reference=="Relative":
3000  if cluster.center_index:
3001  n0=cluster.center_index
3002  else:
3003  n0=cluster.members[0]
3004  d0=self.stath0[n0]
3005 
3007  """
3008  compute the molecular assignemnts between multiple copies
3009  of the same sequence. It changes the Copy index of Molecules
3010  """
3011  d1=self.stath1[n1]
3012  _, molecular_assignment = self.rmsd()
3013  for (m0, c0), (m1,c1) in molecular_assignment.items():
3014  mol0 = self.molcopydict0[m0][c0]
3015  mol1 = self.molcopydict1[m1][c1]
3016  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
3017  p1=IMP.atom.Copy(mol1).get_particle()
3018  p1.set_value(cik0,c0)
3019 
3021  """
3022  Undo the Copy index assignment
3023  """
3024  d1=self.stath1[n1]
3025  _, molecular_assignment = self.rmsd()
3026  mols_newcopys = []
3027  for (m0, c0), (m1,c1) in molecular_assignment.items():
3028  mol0 = self.molcopydict0[m0][c0]
3029  mol1 = self.molcopydict1[m1][c1]
3030  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
3031  p1=IMP.atom.Copy(mol1).get_particle()
3032  p1.set_value(cik0,c1)
3033 
3034  ####################
3035  # Container Functions
3036  ####################
3037 
3038  def __repr__(self):
3039  s= "AnalysisReplicaExchange\n"
3040  s+="---- number of clusters %s \n"%str(len(self.clusters))
3041  s+="---- number of models %s \n"%str(len(self.stath0))
3042  return s
3043 
3044  def __getitem__(self,int_slice_adaptor):
3045  if type(int_slice_adaptor) is int:
3046  return self.clusters[int_slice_adaptor]
3047  elif type(int_slice_adaptor) is slice:
3048  return self.__iter__(int_slice_adaptor)
3049  else:
3050  raise TypeError("Unknown Type")
3051 
3052  def __len__(self):
3053  return len(self.clusters)
3054 
3055  def __iter__(self,slice_key=None):
3056  if slice_key is None:
3057  for i in range(len(self)):
3058  yield self[i]
3059  else:
3060  for i in range(len(self))[slice_key]:
3061  yield self[i]
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:2946
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:2991
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:2294
def precision
Compute the precision of a cluster.
Definition: macros.py:2389
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:1437
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:88
A container for models organized into clusters.
Definition: output.py:1337
Sample using molecular dynamics.
Definition: samplers.py:393
def aggregate
initial filling of the clusters.
Definition: macros.py:2826
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:773
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:575
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:576
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:2355
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:2436
def __init__
Constructor.
Definition: macros.py:834
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:2151
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:2232
def __init__
Construction of the Class.
Definition: macros.py:2112
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:735
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:2249
def __init__
Constructor.
Definition: macros.py:111
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:3020
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:2167
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2898
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:2333
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1580
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:3006
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:2102
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:572
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2851
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:1488
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
Representation of the system.
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:2316
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1497
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def deprecated_pmi1_object
Mark a PMI1 class as deprecated.
def build_model
Create model.
Definition: macros.py:1114
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def update_seldicts
Update the seldicts.
Definition: macros.py:2799
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2788
def deprecated_method
Python decorator to mark a method as deprecated.
Definition: __init__.py:9703
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:1115
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:2196
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:62
Collect timing information.
Definition: tools.py:101
def __init__
Constructor.
Definition: macros.py:1100
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:2160
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Definition: macros.py:812
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9687
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2890
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:621
Deprecated building macro - use BuildSystem()
Definition: macros.py:1096
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1088
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:2242
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:744
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:2418
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:594
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:2183
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:2282
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2875
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
Definition: macros.py:55
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:2257
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1063
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:711
Sample using replica exchange.
Definition: samplers.py:510
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
Definition: macros.py:954
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27