IMP logo
IMP Reference Guide  2.9.0
The Integrative Modeling Platform
macros.py
1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
8 import IMP.pmi.tools
9 import IMP.pmi.samplers
10 import IMP.pmi.output
11 import IMP.pmi.analysis
12 import IMP.pmi.io
13 import IMP.rmf
14 import IMP.isd
15 import IMP.pmi.dof
16 import RMF
17 import os
18 import glob
19 from operator import itemgetter
20 from collections import defaultdict
21 import numpy as np
22 import string
23 import itertools
24 import math
25 
26 class _RMFRestraints(object):
27  """All restraints that are written out to the RMF file"""
28  def __init__(self, model, user_restraints):
29  self._rmf_rs = IMP.pmi.tools.get_restraint_set(model, rmf=True)
30  self._user_restraints = user_restraints if user_restraints else []
31 
32  def __len__(self):
33  return (len(self._user_restraints)
34  + self._rmf_rs.get_number_of_restraints())
35 
36  def __bool__(self):
37  return len(self) > 0
38  __nonzero__ = __bool__ # Python 2 compatibility
39 
40  def __getitem__(self, i):
41  class FakePMIWrapper(object):
42  def __init__(self, r):
43  self.r = IMP.RestraintSet.get_from(r)
44  get_restraint = lambda self: self.r
45  lenuser = len(self._user_restraints)
46  if 0 <= i < lenuser:
47  return self._user_restraints[i]
48  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
49  r = self._rmf_rs.get_restraint(i - lenuser)
50  return FakePMIWrapper(r)
51  else:
52  raise IndexError("Out of range")
53 
54 
55 class ReplicaExchange0(object):
56  """A macro to help setup and run replica exchange.
57  Supports Monte Carlo and molecular dynamics.
58  Produces trajectory RMF files, best PDB structures,
59  and output stat files.
60  """
61  def __init__(self, model,
62  representation=None,
63  root_hier=None,
64  sample_objects=None, # DEPRECATED
65  monte_carlo_sample_objects=None,
66  molecular_dynamics_sample_objects=None,
67  output_objects=[],
68  rmf_output_objects=None,
69  crosslink_restraints=None,
70  monte_carlo_temperature=1.0,
71  simulated_annealing=False,
72  simulated_annealing_minimum_temperature=1.0,
73  simulated_annealing_maximum_temperature=2.5,
74  simulated_annealing_minimum_temperature_nframes=100,
75  simulated_annealing_maximum_temperature_nframes=100,
76  replica_exchange_minimum_temperature=1.0,
77  replica_exchange_maximum_temperature=2.5,
78  replica_exchange_swap=True,
79  num_sample_rounds=1,
80  number_of_best_scoring_models=500,
81  monte_carlo_steps=10,
82  self_adaptive=False,
83  molecular_dynamics_steps=10,
84  molecular_dynamics_max_time_step=1.0,
85  number_of_frames=1000,
86  save_coordinates_mode="lowest_temperature",
87  nframes_write_coordinates=1,
88  write_initial_rmf=True,
89  initial_rmf_name_suffix="initial",
90  stat_file_name_suffix="stat",
91  best_pdb_name_suffix="model",
92  do_clean_first=True,
93  do_create_directories=True,
94  global_output_directory="./",
95  rmf_dir="rmfs/",
96  best_pdb_dir="pdbs/",
97  replica_stat_file_suffix="stat_replica",
98  em_object_for_rmf=None,
99  atomistic=False,
100  replica_exchange_object=None,
101  test_mode=False):
102  """Constructor.
103  @param model The IMP model
104  @param representation PMI.representation.Representation object
105  (or list of them, for multi-state modeling)
106  @param root_hier Instead of passing Representation, pass the System hierarchy
107  @param monte_carlo_sample_objects Objects for MC sampling; a list of
108  structural components (generally the representation) that will
109  be moved and restraints with parameters that need to
110  be sampled.
111  For PMI2: just pass flat list of movers
112  @param molecular_dynamics_sample_objects Objects for MD sampling
113  For PMI2: just pass flat list of particles
114  @param output_objects A list of structural objects and restraints
115  that will be included in output (ie, statistics "stat" files). Any object
116  that provides a get_output() method can be used here. If None is passed
117  the macro will not write stat files.
118  @param rmf_output_objects A list of structural objects and restraints
119  that will be included in rmf. Any object
120  that provides a get_output() method can be used here.
121  @param crosslink_restraints List of cross-link restraints that will
122  be included in output RMF files (for visualization).
123  @param monte_carlo_temperature MC temp (may need to be optimized
124  based on post-sampling analysis)
125  @param simulated_annealing If True, perform simulated annealing
126  @param simulated_annealing_minimum_temperature Should generally be
127  the same as monte_carlo_temperature.
128  @param simulated_annealing_minimum_temperature_nframes Number of
129  frames to compute at minimum temperature.
130  @param simulated_annealing_maximum_temperature_nframes Number of
131  frames to compute at
132  temps > simulated_annealing_maximum_temperature.
133  @param replica_exchange_minimum_temperature Low temp for REX; should
134  generally be the same as monte_carlo_temperature.
135  @param replica_exchange_maximum_temperature High temp for REX
136  @param replica_exchange_swap Boolean, enable disable temperature
137  swap (Default=True)
138  @param num_sample_rounds Number of rounds of MC/MD per cycle
139  @param number_of_best_scoring_models Number of top-scoring PDB models
140  to keep around for analysis
141  @param monte_carlo_steps Number of MC steps per round
142  @param self_adaptive self adaptive scheme for monte carlo movers
143  @param molecular_dynamics_steps Number of MD steps per round
144  @param molecular_dynamics_max_time_step Max time step for MD
145  @param number_of_frames Number of REX frames to run
146  @param save_coordinates_mode string: how to save coordinates.
147  "lowest_temperature" (default) only the lowest temperatures is saved
148  "25th_score" all replicas whose score is below the 25th percentile
149  "50th_score" all replicas whose score is below the 50th percentile
150  "75th_score" all replicas whose score is below the 75th percentile
151  @param nframes_write_coordinates How often to write the coordinates
152  of a frame
153  @param write_initial_rmf Write the initial configuration
154  @param global_output_directory Folder that will be created to house
155  output.
156  @param test_mode Set to True to avoid writing any files, just test one frame.
157  """
158  self.model = model
159  self.vars = {}
160  self.pmi2 = False
161 
162  ### add check hierarchy is multistate
163  self.output_objects = output_objects
164  self.rmf_output_objects=rmf_output_objects
165  self.representation = representation
166  if representation:
167  if type(representation) == list:
168  self.is_multi_state = True
169  self.root_hiers = [r.prot for r in representation]
170  self.vars["number_of_states"] = len(representation)
171  else:
172  self.is_multi_state = False
173  self.root_hier = representation.prot
174  self.vars["number_of_states"] = 1
175  elif root_hier and type(root_hier) == IMP.atom.Hierarchy and root_hier.get_name()=='System':
176  self.pmi2 = True
177  if self.output_objects is not None:
178  self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
179  if self.rmf_output_objects is not None:
180  self.rmf_output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
181  self.root_hier = root_hier
182  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
183  self.vars["number_of_states"] = len(states)
184  if len(states)>1:
185  self.root_hiers = states
186  self.is_multi_state = True
187  else:
188  self.root_hier = root_hier
189  self.is_multi_state = False
190  else:
191  raise Exception("Must provide representation or System hierarchy (root_hier)")
192 
193  self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
194  self.em_object_for_rmf = em_object_for_rmf
195  self.monte_carlo_sample_objects = monte_carlo_sample_objects
196  self.vars["self_adaptive"]=self_adaptive
197  if sample_objects is not None:
198  self.monte_carlo_sample_objects+=sample_objects
199  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
200  self.replica_exchange_object = replica_exchange_object
201  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
202  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
203  self.vars[
204  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
205  self.vars[
206  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
207  self.vars["replica_exchange_swap"] = replica_exchange_swap
208  self.vars["simulated_annealing"]=\
209  simulated_annealing
210  self.vars["simulated_annealing_minimum_temperature"]=\
211  simulated_annealing_minimum_temperature
212  self.vars["simulated_annealing_maximum_temperature"]=\
213  simulated_annealing_maximum_temperature
214  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
215  simulated_annealing_minimum_temperature_nframes
216  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
217  simulated_annealing_maximum_temperature_nframes
218 
219  self.vars["num_sample_rounds"] = num_sample_rounds
220  self.vars[
221  "number_of_best_scoring_models"] = number_of_best_scoring_models
222  self.vars["monte_carlo_steps"] = monte_carlo_steps
223  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
224  self.vars["number_of_frames"] = number_of_frames
225  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
226  raise Exception("save_coordinates_mode has unrecognized value")
227  else:
228  self.vars["save_coordinates_mode"] = save_coordinates_mode
229  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
230  self.vars["write_initial_rmf"] = write_initial_rmf
231  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
232  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
233  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
234  self.vars["do_clean_first"] = do_clean_first
235  self.vars["do_create_directories"] = do_create_directories
236  self.vars["global_output_directory"] = global_output_directory
237  self.vars["rmf_dir"] = rmf_dir
238  self.vars["best_pdb_dir"] = best_pdb_dir
239  self.vars["atomistic"] = atomistic
240  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
241  self.vars["geometries"] = None
242  self.test_mode = test_mode
243 
244  def add_geometries(self, geometries):
245  if self.vars["geometries"] is None:
246  self.vars["geometries"] = list(geometries)
247  else:
248  self.vars["geometries"].extend(geometries)
249 
250  def show_info(self):
251  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
252  print("--- it stores the best scoring pdb models in pdbs/")
253  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
254  print("--- variables:")
255  keys = list(self.vars.keys())
256  keys.sort()
257  for v in keys:
258  print("------", v.ljust(30), self.vars[v])
259 
260  def get_replica_exchange_object(self):
261  return self.replica_exchange_object
262 
263  def _add_provenance(self, sampler_md, sampler_mc):
264  """Record details about the sampling in the IMP Hierarchies"""
265  if not self.is_multi_state or self.pmi2:
266  output_hierarchies = [self.root_hier]
267  else:
268  output_hierarchies = self.root_hiers
269 
270  iterations = 0
271  if sampler_md:
272  method = "Molecular Dynamics"
273  iterations += self.vars["molecular_dynamics_steps"]
274  if sampler_mc:
275  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
276  iterations += self.vars["monte_carlo_steps"]
277  # If no sampling is actually done, no provenance to write
278  if iterations == 0 or self.vars["number_of_frames"] == 0:
279  return
280  iterations *= self.vars["num_sample_rounds"]
281 
282  for h in output_hierarchies:
283  pi = self.model.add_particle("sampling")
285  self.model, pi, method, self.vars["number_of_frames"],
286  iterations)
287  p.set_number_of_replicas(
288  self.replica_exchange_object.get_number_of_replicas())
289  IMP.pmi.tools._add_pmi_provenance(h)
290  IMP.core.add_provenance(self.model, h, p)
291 
292  def execute_macro(self):
293  temp_index_factor = 100000.0
294  samplers=[]
295  sampler_mc=None
296  sampler_md=None
297  if self.monte_carlo_sample_objects is not None:
298  print("Setting up MonteCarlo")
299  sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
300  self.monte_carlo_sample_objects,
301  self.vars["monte_carlo_temperature"])
302  if self.vars["simulated_annealing"]:
303  tmin=self.vars["simulated_annealing_minimum_temperature"]
304  tmax=self.vars["simulated_annealing_maximum_temperature"]
305  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
306  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
307  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
308  if self.vars["self_adaptive"]:
309  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
310  if self.output_objects is not None:
311  self.output_objects.append(sampler_mc)
312  if self.rmf_output_objects is not None:
313  self.rmf_output_objects.append(sampler_mc)
314  samplers.append(sampler_mc)
315 
316 
317  if self.molecular_dynamics_sample_objects is not None:
318  print("Setting up MolecularDynamics")
319  sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
320  self.molecular_dynamics_sample_objects,
321  self.vars["monte_carlo_temperature"],
322  maximum_time_step=self.molecular_dynamics_max_time_step)
323  if self.vars["simulated_annealing"]:
324  tmin=self.vars["simulated_annealing_minimum_temperature"]
325  tmax=self.vars["simulated_annealing_maximum_temperature"]
326  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
327  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
328  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
329  if self.output_objects is not None:
330  self.output_objects.append(sampler_md)
331  if self.rmf_output_objects is not None:
332  self.rmf_output_objects.append(sampler_md)
333  samplers.append(sampler_md)
334 # -------------------------------------------------------------------------
335 
336  print("Setting up ReplicaExchange")
337  rex = IMP.pmi.samplers.ReplicaExchange(self.model,
338  self.vars[
339  "replica_exchange_minimum_temperature"],
340  self.vars[
341  "replica_exchange_maximum_temperature"],
342  samplers,
343  replica_exchange_object=self.replica_exchange_object)
344  self.replica_exchange_object = rex.rem
345 
346  myindex = rex.get_my_index()
347  if self.output_objects is not None:
348  self.output_objects.append(rex)
349  if self.rmf_output_objects is not None:
350  self.rmf_output_objects.append(rex)
351  # must reset the minimum temperature due to the
352  # different binary length of rem.get_my_parameter double and python
353  # float
354  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
355 
356 # -------------------------------------------------------------------------
357 
358  globaldir = self.vars["global_output_directory"] + "/"
359  rmf_dir = globaldir + self.vars["rmf_dir"]
360  pdb_dir = globaldir + self.vars["best_pdb_dir"]
361 
362  if not self.test_mode:
363  if self.vars["do_clean_first"]:
364  pass
365 
366  if self.vars["do_create_directories"]:
367 
368  try:
369  os.makedirs(globaldir)
370  except:
371  pass
372  try:
373  os.makedirs(rmf_dir)
374  except:
375  pass
376 
377  if not self.is_multi_state:
378  try:
379  os.makedirs(pdb_dir)
380  except:
381  pass
382  else:
383  for n in range(self.vars["number_of_states"]):
384  try:
385  os.makedirs(pdb_dir + "/" + str(n))
386  except:
387  pass
388 
389 # -------------------------------------------------------------------------
390 
392  if self.output_objects is not None:
393  self.output_objects.append(sw)
394  if self.rmf_output_objects is not None:
395  self.rmf_output_objects.append(sw)
396 
397  print("Setting up stat file")
398  output = IMP.pmi.output.Output(atomistic=self.vars["atomistic"])
399  low_temp_stat_file = globaldir + \
400  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
401  if not self.test_mode:
402  if self.output_objects is not None:
403  output.init_stat2(low_temp_stat_file,
404  self.output_objects,
405  extralabels=["rmf_file", "rmf_frame_index"])
406  else:
407  print("Stat file writing is disabled")
408 
409  if self.rmf_output_objects is not None:
410  print("Stat info being written in the rmf file")
411 
412  print("Setting up replica stat file")
413  replica_stat_file = globaldir + \
414  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
415  if not self.test_mode:
416  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
417 
418  if not self.test_mode:
419  print("Setting up best pdb files")
420  if not self.is_multi_state:
421  if self.vars["number_of_best_scoring_models"] > 0:
422  output.init_pdb_best_scoring(pdb_dir + "/" +
423  self.vars["best_pdb_name_suffix"],
424  self.root_hier,
425  self.vars[
426  "number_of_best_scoring_models"],
427  replica_exchange=True)
428  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
429  self.vars["best_pdb_name_suffix"]+".0.pdb")
430  else:
431  if self.vars["number_of_best_scoring_models"] > 0:
432  for n in range(self.vars["number_of_states"]):
433  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
434  self.vars["best_pdb_name_suffix"],
435  self.root_hiers[n],
436  self.vars[
437  "number_of_best_scoring_models"],
438  replica_exchange=True)
439  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
440  self.vars["best_pdb_name_suffix"]+".0.pdb")
441 # ---------------------------------------------
442 
443  if self.em_object_for_rmf is not None:
444  if not self.is_multi_state or self.pmi2:
445  output_hierarchies = [
446  self.root_hier,
447  self.em_object_for_rmf.get_density_as_hierarchy(
448  )]
449  else:
450  output_hierarchies = self.root_hiers
451  output_hierarchies.append(
452  self.em_object_for_rmf.get_density_as_hierarchy())
453  else:
454  if not self.is_multi_state or self.pmi2:
455  output_hierarchies = [self.root_hier]
456  else:
457  output_hierarchies = self.root_hiers
458 
459 #----------------------------------------------
460  if not self.test_mode:
461  print("Setting up and writing initial rmf coordinate file")
462  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
463  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
464  output_hierarchies,listofobjects=self.rmf_output_objects)
465  if self._rmf_restraints:
466  output.add_restraints_to_rmf(
467  init_suffix + "." + str(myindex) + ".rmf3",
468  self._rmf_restraints)
469  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
470  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
471 
472 #----------------------------------------------
473 
474  if not self.test_mode:
475  mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
476 
477 #----------------------------------------------
478 
479  self._add_provenance(sampler_md, sampler_mc)
480 
481  if not self.test_mode:
482  print("Setting up production rmf files")
483  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
484  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"],
485  listofobjects = self.rmf_output_objects)
486 
487  if self._rmf_restraints:
488  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
489 
490  ntimes_at_low_temp = 0
491 
492  if myindex == 0:
493  self.show_info()
494  self.replica_exchange_object.set_was_used(True)
495  nframes = self.vars["number_of_frames"]
496  if self.test_mode:
497  nframes = 1
498  for i in range(nframes):
499  if self.test_mode:
500  score = 0.
501  else:
502  for nr in range(self.vars["num_sample_rounds"]):
503  if sampler_md is not None:
504  sampler_md.optimize(
505  self.vars["molecular_dynamics_steps"])
506  if sampler_mc is not None:
507  sampler_mc.optimize(self.vars["monte_carlo_steps"])
509  self.model).evaluate(False)
510  mpivs.set_value("score",score)
511  output.set_output_entry("score", score)
512 
513 
514 
515  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
516 
517  if self.vars["save_coordinates_mode"] == "lowest_temperature":
518  save_frame=(min_temp_index == my_temp_index)
519  elif self.vars["save_coordinates_mode"] == "25th_score":
520  score_perc=mpivs.get_percentile("score")
521  save_frame=(score_perc*100.0<=25.0)
522  elif self.vars["save_coordinates_mode"] == "50th_score":
523  score_perc=mpivs.get_percentile("score")
524  save_frame=(score_perc*100.0<=50.0)
525  elif self.vars["save_coordinates_mode"] == "75th_score":
526  score_perc=mpivs.get_percentile("score")
527  save_frame=(score_perc*100.0<=75.0)
528 
529  if save_frame:
530  print("--- frame %s score %s " % (str(i), str(score)))
531 
532  if not self.test_mode:
533  if i % self.vars["nframes_write_coordinates"]==0:
534  print('--- writing coordinates')
535  if self.vars["number_of_best_scoring_models"] > 0:
536  output.write_pdb_best_scoring(score)
537  output.write_rmf(rmfname)
538  output.set_output_entry("rmf_file", rmfname)
539  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
540  else:
541  output.set_output_entry("rmf_file", rmfname)
542  output.set_output_entry("rmf_frame_index", '-1')
543  if self.output_objects is not None:
544  output.write_stat2(low_temp_stat_file)
545  ntimes_at_low_temp += 1
546 
547  if not self.test_mode:
548  output.write_stat2(replica_stat_file)
549  if self.vars["replica_exchange_swap"]:
550  rex.swap_temp(i, score)
551  if self.representation:
552  for p, state in self.representation._protocol_output:
553  p.add_replica_exchange(state, self)
554 
555  if not self.test_mode:
556  print("closing production rmf files")
557  output.close_rmf(rmfname)
558 
559 
560 
561 # ----------------------------------------------------------------------
562 class BuildSystem(object):
563  """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
564  Easily create multi-state systems by calling this macro
565  repeatedly with different TopologyReader objects!
566  A useful function is get_molecules() which returns the PMI Molecules grouped by state
567  as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
568  Quick multi-state system:
569  @code{.python}
570  mdl = IMP.Model()
571  reader1 = IMP.pmi.topology.TopologyReader(tfile1)
572  reader2 = IMP.pmi.topology.TopologyReader(tfile2)
573  bs = IMP.pmi.macros.BuildSystem(mdl)
574  bs.add_state(reader1)
575  bs.add_state(reader2)
576  bs.execute_macro() # build everything including degrees of freedom
577  IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
578  ### now you have a two state system, you add restraints etc
579  @endcode
580  \note The "domain name" entry of the topology reader is not used.
581  All molecules are set up by the component name, but split into rigid bodies
582  as requested.
583  """
584  def __init__(self,
585  mdl,
586  sequence_connectivity_scale=4.0,
587  force_create_gmm_files=False,
588  resolutions=[1,10]):
589  """Constructor
590  @param mdl An IMP Model
591  @param sequence_connectivity_scale For scaling the connectivity restraint
592  @param force_create_gmm_files If True, will sample and create GMMs
593  no matter what. If False, will only only sample if the
594  files don't exist. If number of Gaussians is zero, won't
595  do anything.
596  @param resolutions The resolutions to build for structured regions
597  """
598  self.mdl = mdl
599  self.system = IMP.pmi.topology.System(self.mdl)
600  self._readers = [] # the TopologyReaders (one per state)
601  self._domain_res = [] # TempResidues for each domain key=unique name, value=(atomic_res,non_atomic_res).
602  self._domains = [] # key = domain unique name, value = Component
603  self.force_create_gmm_files = force_create_gmm_files
604  self.resolutions = resolutions
605 
606  def add_state(self,
607  reader,
608  keep_chain_id=False, fasta_name_map=None):
609  """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
610  When you are done adding states, call execute_macro()
611  @param reader The TopologyReader object
612  @param fasta_name_map dictionary for converting protein names found in the fasta file
613  """
614  state = self.system.create_state()
615  self._readers.append(reader)
616  these_domain_res = {} # key is unique name, value is (atomic res, nonatomicres)
617  these_domains = {} # key is unique name, value is _Component
618  chain_ids = string.ascii_uppercase+string.ascii_lowercase+'0123456789'
619  numchain = 0
620 
621  ### setup representation
622  # loop over molecules, copies, then domains
623  for molname in reader.get_molecules():
624  copies = reader.get_molecules()[molname].domains
625  for nc,copyname in enumerate(copies):
626  print("BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
627  copy = copies[copyname]
628  # option to not rename chains
629  if keep_chain_id == True:
630  chain_id = copy[0].chain
631  else:
632  chain_id = chain_ids[numchain]
633  if nc==0:
634  is_nucleic=False
635  fasta_flag=copy[0].fasta_flag
636  if fasta_flag == "RNA" or fasta_flag == "DNA": is_nucleic=True
637  seq = IMP.pmi.topology.Sequences(copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
638  print("BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
639  orig_mol = state.create_molecule(molname,
640  seq,
641  chain_id,is_nucleic)
642  mol = orig_mol
643  numchain+=1
644  else:
645  print("BuildSystem.add_state: creating a copy for molecule %s" % molname)
646  mol = orig_mol.create_copy(chain_id)
647  numchain+=1
648 
649  for domainnumber,domain in enumerate(copy):
650  print("BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
651  # we build everything in the residue range, even if it
652  # extends beyond what's in the actual PDB file
653  these_domains[domain.get_unique_name()] = domain
654  if domain.residue_range==[] or domain.residue_range is None:
655  domain_res = mol.get_residues()
656  else:
657  start = domain.residue_range[0]+domain.pdb_offset
658  if domain.residue_range[1]=='END':
659  end = len(mol.sequence)
660  else:
661  end = domain.residue_range[1]+domain.pdb_offset
662  domain_res = mol.residue_range(start-1,end-1)
663  print("BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
664  if domain.pdb_file=="BEADS":
665  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
666  mol.add_representation(
667  domain_res,
668  resolutions=[domain.bead_size],
669  setup_particles_as_densities=(
670  domain.em_residues_per_gaussian!=0),
671  color = domain.color)
672  these_domain_res[domain.get_unique_name()] = (set(),domain_res)
673  elif domain.pdb_file=="IDEAL_HELIX":
674  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
675  mol.add_representation(
676  domain_res,
677  resolutions=self.resolutions,
678  ideal_helix=True,
679  density_residues_per_component=domain.em_residues_per_gaussian,
680  density_prefix=domain.density_prefix,
681  density_force_compute=self.force_create_gmm_files,
682  color = domain.color)
683  these_domain_res[domain.get_unique_name()] = (domain_res,set())
684  else:
685  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
686  domain_atomic = mol.add_structure(domain.pdb_file,
687  domain.chain,
688  domain.residue_range,
689  domain.pdb_offset,
690  soft_check=True)
691  domain_non_atomic = domain_res - domain_atomic
692  if not domain.em_residues_per_gaussian:
693  mol.add_representation(domain_atomic,
694  resolutions=self.resolutions,
695  color = domain.color)
696  if len(domain_non_atomic)>0:
697  mol.add_representation(domain_non_atomic,
698  resolutions=[domain.bead_size],
699  color = domain.color)
700  else:
701  print("BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
702  mol.add_representation(
703  domain_atomic,
704  resolutions=self.resolutions,
705  density_residues_per_component=domain.em_residues_per_gaussian,
706  density_prefix=domain.density_prefix,
707  density_force_compute=self.force_create_gmm_files,
708  color = domain.color)
709  if len(domain_non_atomic)>0:
710  mol.add_representation(domain_non_atomic,
711  resolutions=[domain.bead_size],
712  setup_particles_as_densities=True,
713  color = domain.color)
714  these_domain_res[domain.get_unique_name()] = (
715  domain_atomic,domain_non_atomic)
716  self._domain_res.append(these_domain_res)
717  self._domains.append(these_domains)
718  print('BuildSystem.add_state: State',len(self.system.states),'added')
719 
720  def get_molecules(self):
721  """Return list of all molecules grouped by state.
722  For each state, it's a dictionary of Molecules where key is the molecule name
723  """
724  return [s.get_molecules() for s in self.system.get_states()]
725 
726  def get_molecule(self,molname,copy_index=0,state_index=0):
727  return self.system.get_states()[state_index].get_molecules()[molname][copy_index]
728 
729  def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
730  """Builds representations and sets up degrees of freedom"""
731  print("BuildSystem.execute_macro: building representations")
732  self.root_hier = self.system.build()
733 
734  print("BuildSystem.execute_macro: setting up degrees of freedom")
735  self.dof = IMP.pmi.dof.DegreesOfFreedom(self.mdl)
736  for nstate,reader in enumerate(self._readers):
737  rbs = reader.get_rigid_bodies()
738  srbs = reader.get_super_rigid_bodies()
739  csrbs = reader.get_chains_of_super_rigid_bodies()
740 
741  # add rigid bodies
742  domains_in_rbs = set()
743  for rblist in rbs:
744  print("BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
745  all_res = IMP.pmi.tools.OrderedSet()
746  bead_res = IMP.pmi.tools.OrderedSet()
747  for dname in rblist:
748  domain = self._domains[nstate][dname]
749  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
750  all_res|=self._domain_res[nstate][dname][0]
751  bead_res|=self._domain_res[nstate][dname][1]
752  domains_in_rbs.add(dname)
753  all_res|=bead_res
754  print("BuildSystem.execute_macro: -------- \
755 creating rigid body with max_trans %s \
756 max_rot %s non_rigid_max_trans %s" \
757  % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
758  self.dof.create_rigid_body(all_res,
759  nonrigid_parts=bead_res,
760  max_trans=max_rb_trans,
761  max_rot=max_rb_rot,
762  nonrigid_max_trans=max_bead_trans)
763 
764  # if you have any BEAD domains not in an RB, set them as flexible beads
765  for dname in self._domains[nstate]:
766  domain = self._domains[nstate][dname]
767  if domain.pdb_file=="BEADS" and dname not in domains_in_rbs:
768  self.dof.create_flexible_beads(
769  self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
770 
771  # add super rigid bodies
772  for srblist in srbs:
773  print("BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
774  all_res = IMP.pmi.tools.OrderedSet()
775  for dname in srblist:
776  print("BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
777  all_res|=self._domain_res[nstate][dname][0]
778  all_res|=self._domain_res[nstate][dname][1]
779 
780  print("BuildSystem.execute_macro: -------- \
781 creating super rigid body with max_trans %s max_rot %s " \
782  % (str(max_srb_trans),str(max_srb_rot)))
783  self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
784 
785  # add chains of super rigid bodies
786  for csrblist in csrbs:
787  all_res = IMP.pmi.tools.OrderedSet()
788  for dname in csrblist:
789  all_res|=self._domain_res[nstate][dname][0]
790  all_res|=self._domain_res[nstate][dname][1]
791  all_res = list(all_res)
792  all_res.sort(key=lambda r:r.get_index())
793  #self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
794  self.dof.create_main_chain_mover(all_res)
795  return self.root_hier,self.dof
796 
797 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
798 class BuildModel(object):
799  """A macro to build a Representation based on a Topology and lists of movers
800  DEPRECATED - Use BuildSystem instead.
801  """
802  def __init__(self,
803  model,
804  component_topologies,
805  list_of_rigid_bodies=[],
806  list_of_super_rigid_bodies=[],
807  chain_of_super_rigid_bodies=[],
808  sequence_connectivity_scale=4.0,
809  add_each_domain_as_rigid_body=False,
810  force_create_gmm_files=False):
811  """Constructor.
812  @param model The IMP model
813  @param component_topologies List of
814  IMP.pmi.topology.ComponentTopology items
815  @param list_of_rigid_bodies List of lists of domain names that will
816  be moved as rigid bodies.
817  @param list_of_super_rigid_bodies List of lists of domain names
818  that will move together in an additional Monte Carlo move.
819  @param chain_of_super_rigid_bodies List of lists of domain names
820  (choices can only be from the same molecule). Each of these
821  groups will be moved rigidly. This helps to sample more
822  efficiently complex topologies, made of several rigid bodies,
823  connected by flexible linkers.
824  @param sequence_connectivity_scale For scaling the connectivity
825  restraint
826  @param add_each_domain_as_rigid_body That way you don't have to
827  put all of them in the list
828  @param force_create_gmm_files If True, will sample and create GMMs
829  no matter what. If False, will only only sample if the
830  files don't exist. If number of Gaussians is zero, won't
831  do anything.
832  """
833  self.m = model
834  self.simo = IMP.pmi.representation.Representation(self.m,
835  upperharmonic=True,
836  disorderedlength=False)
837 
838  data=component_topologies
839  if list_of_rigid_bodies==[]:
840  print("WARNING: No list of rigid bodies inputted to build_model()")
841  if list_of_super_rigid_bodies==[]:
842  print("WARNING: No list of super rigid bodies inputted to build_model()")
843  if chain_of_super_rigid_bodies==[]:
844  print("WARNING: No chain of super rigid bodies inputted to build_model()")
845  all_dnames = set([d for sublist in list_of_rigid_bodies+list_of_super_rigid_bodies\
846  +chain_of_super_rigid_bodies for d in sublist])
847  all_available = set([c._domain_name for c in component_topologies])
848  if not all_dnames <= all_available:
849  raise ValueError("All requested movers must reference domain "
850  "names in the component topologies")
851 
852  self.domain_dict={}
853  self.resdensities={}
854  super_rigid_bodies={}
855  chain_super_rigid_bodies={}
856  rigid_bodies={}
857 
858  for c in data:
859  comp_name = c.molname
860  hier_name = c._domain_name
861  color = 0. # Can't use new-style string colors
862  fasta_file = c.fasta_file
863  fasta_id = c.fasta_id
864  pdb_name = c.pdb_file
865  chain_id = c.chain
866  res_range = c.residue_range
867  offset = c.pdb_offset
868  bead_size = c.bead_size
869  em_num_components = c.em_residues_per_gaussian
870  em_txt_file_name = c.gmm_file
871  em_mrc_file_name = c.mrc_file
872 
873  if comp_name not in self.simo.get_component_names():
874  self.simo.create_component(comp_name,color=0.0)
875  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
876 
877  # create hierarchy (adds resolutions, beads) with autobuild and optionally add EM data
878  if em_num_components==0:
879  read_em_files=False
880  include_res0=False
881  else:
882  if (not os.path.isfile(em_txt_file_name)) or force_create_gmm_files:
883  read_em_files=False
884  include_res0=True
885  else:
886  read_em_files=True
887  include_res0=False
888 
889  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
890  res_range,include_res0,beadsize=bead_size,
891  color=color,offset=offset)
892  if em_num_components!=0:
893  if read_em_files:
894  print("will read GMM files")
895  else:
896  print("will calculate GMMs")
897 
898  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
899  em_mrc_file_name,em_num_components,read_em_files)
900  self.simo.add_all_atom_densities(comp_name, particles=beads)
901  dens_hier+=beads
902  else:
903  dens_hier=[]
904 
905  self.resdensities[hier_name]=dens_hier
906  self.domain_dict[hier_name]=outhier+dens_hier
907 
908  # setup basic restraints
909  for c in self.simo.get_component_names():
910  self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
911  self.simo.setup_component_geometry(c)
912 
913  # create movers
914  for rblist in list_of_rigid_bodies:
915  rb=[]
916  for rbmember in rblist:
917  rb+=[h for h in self.domain_dict[rbmember]]
918  self.simo.set_rigid_body_from_hierarchies(rb)
919  for srblist in list_of_super_rigid_bodies:
920  srb=[]
921  for srbmember in rblist:
922  srb+=[h for h in self.domain_dict[srbmember]]
923  self.simo.set_super_rigid_body_from_hierarchies(srb)
924  for clist in chain_of_super_rigid_bodies:
925  crb=[]
926  for crbmember in rblist:
927  crb+=[h for h in self.domain_dict[crbmember]]
928  self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
929 
930  self.simo.set_floppy_bodies()
931  self.simo.setup_bonds()
932 
934  '''Return the Representation object'''
935  return self.simo
936 
937  def get_density_hierarchies(self,hier_name_list):
938  # return a list of density hierarchies
939  # specify the list of hierarchy names
940  dens_hier_list=[]
941  for hn in hier_name_list:
942  print(hn)
943  dens_hier_list+=self.resdensities[hn]
944  return dens_hier_list
945 
946  def set_gmm_models_directory(self,directory_name):
947  self.gmm_models_directory=directory_name
948 
949  def get_pdb_bead_bits(self,hierarchy):
950  pdbbits=[]
951  beadbits=[]
952  helixbits=[]
953  for h in hierarchy:
954  if "_pdb" in h.get_name():pdbbits.append(h)
955  if "_bead" in h.get_name():beadbits.append(h)
956  if "_helix" in h.get_name():helixbits.append(h)
957  return (pdbbits,beadbits,helixbits)
958 
959  def scale_bead_radii(self,nresidues,scale):
960  scaled_beads=set()
961  for h in self.domain_dict:
962  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
963  slope=(1.0-scale)/(1.0-float(nresidues))
964 
965  for b in beadbits:
966  # I have to do the following
967  # because otherwise we'll scale more than once
968  if b not in scaled_beads:
969  scaled_beads.add(b)
970  else:
971  continue
972  radius=IMP.core.XYZR(b).get_radius()
973  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
974  scale_factor=slope*float(num_residues)+1.0
975  print(scale_factor)
976  new_radius=scale_factor*radius
977  IMP.core.XYZR(b).set_radius(new_radius)
978  print(b.get_name())
979  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
980 
981 
982  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
983  #density generation for the EM restraint
984  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
985  #get the number of residues from the pdb bits
986  res_ind=[]
987  for pb in pdbbits+helixbits:
988  for p in IMP.core.get_leaves(pb):
990 
991  number_of_residues=len(set(res_ind))
992  outhier=[]
993  if read:
994  if len(pdbbits)!=0:
995  outhier+=simo.add_component_density(compname,
996  pdbbits,
997  num_components=num_components,
998  resolution=0,
999  inputfile=txtfilename)
1000  if len(helixbits)!=0:
1001  outhier+=simo.add_component_density(compname,
1002  helixbits,
1003  num_components=num_components,
1004  resolution=1,
1005  inputfile=txtfilename)
1006 
1007 
1008  else:
1009  if len(pdbbits)!=0:
1010  num_components=number_of_residues//abs(num_components)+1
1011  outhier+=simo.add_component_density(compname,
1012  pdbbits,
1013  num_components=num_components,
1014  resolution=0,
1015  outputfile=txtfilename,
1016  outputmap=mrcfilename,
1017  multiply_by_total_mass=True)
1018 
1019  if len(helixbits)!=0:
1020  num_components=number_of_residues//abs(num_components)+1
1021  outhier+=simo.add_component_density(compname,
1022  helixbits,
1023  num_components=num_components,
1024  resolution=1,
1025  outputfile=txtfilename,
1026  outputmap=mrcfilename,
1027  multiply_by_total_mass=True)
1028 
1029  return outhier,beadbits
1030 
1031  def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
1032  beadsize=5,color=0.0,offset=0):
1033  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" :
1034  if include_res0:
1035  outhier=simo.autobuild_model(comname,
1036  pdbname=pdbname,
1037  chain=chain,
1038  resrange=resrange,
1039  resolutions=[0,1,10],
1040  offset=offset,
1041  color=color,
1042  missingbeadsize=beadsize)
1043  else:
1044  outhier=simo.autobuild_model(comname,
1045  pdbname=pdbname,
1046  chain=chain,
1047  resrange=resrange,
1048  resolutions=[1,10],
1049  offset=offset,
1050  color=color,
1051  missingbeadsize=beadsize)
1052 
1053 
1054  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" :
1055  outhier=simo.add_component_ideal_helix(comname,
1056  resolutions=[1,10],
1057  resrange=resrange,
1058  color=color,
1059  show=False)
1060 
1061  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" :
1062  outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
1063 
1064  else:
1065 
1066  seq_len=len(simo.sequence_dict[comname])
1067  outhier=simo.add_component_necklace(comname,
1068  begin=1,
1069  end=seq_len,
1070  length=beadsize)
1071 
1072  return outhier
1073 
1074 
1075 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
1076 class BuildModel1(object):
1077  """Deprecated building macro - use BuildSystem()"""
1078 
1079  def __init__(self, representation):
1080  """Constructor.
1081  @param representation The PMI representation
1082  """
1083  self.simo=representation
1084  self.gmm_models_directory="."
1085  self.rmf_file={}
1086  self.rmf_frame_number={}
1087  self.rmf_names_map={}
1088  self.rmf_component_name={}
1089 
1090  def set_gmm_models_directory(self,directory_name):
1091  self.gmm_models_directory=directory_name
1092 
1093  def build_model(self,data_structure,sequence_connectivity_scale=4.0,
1094  sequence_connectivity_resolution=10,
1095  rmf_file=None,rmf_frame_number=0,rmf_file_map=None,
1096  skip_connectivity_these_domains=None,
1097  skip_gaussian_in_rmf=False, skip_gaussian_in_representation=False):
1098  """Create model.
1099  @param data_structure List of lists containing these entries:
1100  comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
1101  res_range, read_em_files, bead_size, rb, super_rb,
1102  em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
1103  keep_gaussian_flexible_beads (optional)
1104  @param sequence_connectivity_scale
1105  @param rmf_file
1106  @param rmf_frame_number
1107  @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
1108  rmf_frame_number,
1109  rmf_component_name)
1110  """
1111  self.domain_dict={}
1112  self.resdensities={}
1113  super_rigid_bodies={}
1114  chain_super_rigid_bodies={}
1115  rigid_bodies={}
1116 
1117  for d in data_structure:
1118  comp_name = d[0]
1119  hier_name = d[1]
1120  color = d[2]
1121  fasta_file = d[3]
1122  fasta_id = d[4]
1123  pdb_name = d[5]
1124  chain_id = d[6]
1125  res_range = d[7][0:2]
1126  try:
1127  offset = d[7][2]
1128  except:
1129  offset = 0
1130  read_em_files = d[8]
1131  bead_size = d[9]
1132  rb = d[10]
1133  super_rb = d[11]
1134  em_num_components = d[12]
1135  em_txt_file_name = d[13]
1136  em_mrc_file_name = d[14]
1137  chain_of_super_rb = d[15]
1138  try:
1139  keep_gaussian_flexible_beads = d[16]
1140  except:
1141  keep_gaussian_flexible_beads = True
1142 
1143  if comp_name not in self.simo.get_component_names():
1144  self.simo.create_component(comp_name,color=0.0)
1145  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1146  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1147 
1148 
1149  if not read_em_files is None:
1150  if em_txt_file_name is " ": em_txt_file_name=self.gmm_models_directory+"/"+hier_name+".txt"
1151  if em_mrc_file_name is " ": em_mrc_file_name=self.gmm_models_directory+"/"+hier_name+".mrc"
1152 
1153 
1154  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1155 
1156  if (keep_gaussian_flexible_beads):
1157  self.simo.add_all_atom_densities(comp_name, particles=beads)
1158  dens_hier+=beads
1159  else:
1160  dens_hier=[]
1161 
1162  self.resdensities[hier_name]=dens_hier
1163  self.domain_dict[hier_name]=outhier+dens_hier
1164 
1165  if rb is not None:
1166  if rb not in rigid_bodies:
1167  rigid_bodies[rb]=[h for h in self.domain_dict[hier_name]]
1168  else:
1169  rigid_bodies[rb]+=[h for h in self.domain_dict[hier_name]]
1170 
1171 
1172  if super_rb is not None:
1173  for k in super_rb:
1174  if k not in super_rigid_bodies:
1175  super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1176  else:
1177  super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1178 
1179  if chain_of_super_rb is not None:
1180  for k in chain_of_super_rb:
1181  if k not in chain_super_rigid_bodies:
1182  chain_super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
1183  else:
1184  chain_super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
1185 
1186 
1187 
1188  self.rigid_bodies=rigid_bodies
1189 
1190  for c in self.simo.get_component_names():
1191  if rmf_file is not None:
1192  rf=rmf_file
1193  rfn=rmf_frame_number
1194  self.simo.set_coordinates_from_rmf(c, rf,rfn,
1195  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1196  elif rmf_file_map:
1197  for k in rmf_file_map:
1198  cname=k
1199  rf=rmf_file_map[k][0]
1200  rfn=rmf_file_map[k][1]
1201  rcname=rmf_file_map[k][2]
1202  self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1203  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1204  else:
1205  if c in self.rmf_file:
1206  rf=self.rmf_file[c]
1207  rfn=self.rmf_frame_number[c]
1208  rfm=self.rmf_names_map[c]
1209  rcname=self.rmf_component_name[c]
1210  self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1211  rmf_component_name=rcname,
1212  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1213  if (not skip_connectivity_these_domains) or (c not in skip_connectivity_these_domains):
1214  self.simo.setup_component_sequence_connectivity(c,
1215  resolution=sequence_connectivity_resolution,
1216  scale=sequence_connectivity_scale)
1217  self.simo.setup_component_geometry(c)
1218 
1219  for rb in rigid_bodies:
1220  self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1221 
1222  for k in super_rigid_bodies:
1223  self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1224 
1225  for k in chain_super_rigid_bodies:
1226  self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1227 
1228  self.simo.set_floppy_bodies()
1229  self.simo.setup_bonds()
1230 
1231  def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1232  hiers=self.domain_dict[hier_name]
1233  for length in lengths:
1234  for n in range(len(hiers)-length):
1235  hs=hiers[n+1:n+length-1]
1236  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1237  for n in range(1,len(hiers)-1,5):
1238  hs=hiers[n+1:]
1239  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1240  hs=hiers[:n-1]
1241  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1242 
1243 
1244  def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1245  self.rmf_file[component_name]=rmf_file
1246  self.rmf_frame_number[component_name]=rmf_frame_number
1247  self.rmf_names_map[component_name]=rmf_names_map
1248  self.rmf_component_name[component_name]=rmf_component_name
1249 
1250  def get_density_hierarchies(self,hier_name_list):
1251  # return a list of density hierarchies
1252  # specify the list of hierarchy names
1253  dens_hier_list=[]
1254  for hn in hier_name_list:
1255  print(hn)
1256  dens_hier_list+=self.resdensities[hn]
1257  return dens_hier_list
1258 
1259  def get_pdb_bead_bits(self,hierarchy):
1260  pdbbits=[]
1261  beadbits=[]
1262  helixbits=[]
1263  for h in hierarchy:
1264  if "_pdb" in h.get_name():pdbbits.append(h)
1265  if "_bead" in h.get_name():beadbits.append(h)
1266  if "_helix" in h.get_name():helixbits.append(h)
1267  return (pdbbits,beadbits,helixbits)
1268 
1269  def scale_bead_radii(self,nresidues,scale):
1270  scaled_beads=set()
1271  for h in self.domain_dict:
1272  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1273  slope=(1.0-scale)/(1.0-float(nresidues))
1274 
1275  for b in beadbits:
1276  # I have to do the following
1277  # because otherwise we'll scale more than once
1278  if b not in scaled_beads:
1279  scaled_beads.add(b)
1280  else:
1281  continue
1282  radius=IMP.core.XYZR(b).get_radius()
1283  num_residues=len(IMP.pmi.tools.get_residue_indexes(b))
1284  scale_factor=slope*float(num_residues)+1.0
1285  print(scale_factor)
1286  new_radius=scale_factor*radius
1287  IMP.core.XYZR(b).set_radius(new_radius)
1288  print(b.get_name())
1289  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
1290 
1291 
1292  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1293  #density generation for the EM restraint
1294  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1295 
1296  #get the number of residues from the pdb bits
1297  res_ind=[]
1298  for pb in pdbbits+helixbits:
1299  for p in IMP.core.get_leaves(pb):
1301 
1302  number_of_residues=len(set(res_ind))
1303  outhier=[]
1304  if read:
1305  if len(pdbbits)!=0:
1306  outhier+=simo.add_component_density(compname,
1307  pdbbits,
1308  num_components=num_components, # number of gaussian into which the simulated density is approximated
1309  resolution=0, # resolution that you want to calculate the simulated density
1310  inputfile=txtfilename) # read what it was calculated before
1311  if len(helixbits)!=0:
1312  outhier+=simo.add_component_density(compname,
1313  helixbits,
1314  num_components=num_components, # number of gaussian into which the simulated density is approximated
1315  resolution=1, # resolution that you want to calculate the simulated density
1316  inputfile=txtfilename) # read what it was calculated before
1317 
1318 
1319  else:
1320  if len(pdbbits)!=0:
1321  if num_components<0:
1322  #if negative calculate the number of gmm components automatically
1323  # from the number of residues
1324  num_components=number_of_residues/abs(num_components)
1325  outhier+=simo.add_component_density(compname,
1326  pdbbits,
1327  num_components=num_components, # number of gaussian into which the simulated density is approximated
1328  resolution=0, # resolution that you want to calculate the simulated density
1329  outputfile=txtfilename, # do the calculation
1330  outputmap=mrcfilename,
1331  multiply_by_total_mass=True) # do the calculation and output the mrc
1332 
1333  if len(helixbits)!=0:
1334  if num_components<0:
1335  #if negative calculate the number of gmm components automatically
1336  # from the number of residues
1337  num_components=number_of_residues/abs(num_components)
1338  outhier+=simo.add_component_density(compname,
1339  helixbits,
1340  num_components=num_components, # number of gaussian into which the simulated density is approximated
1341  resolution=1, # resolution that you want to calculate the simulated density
1342  outputfile=txtfilename, # do the calculation
1343  outputmap=mrcfilename,
1344  multiply_by_total_mass=True) # do the calculation and output the mrc
1345 
1346  return outhier,beadbits
1347 
1348  def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1349 
1350  if pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1351  if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1352  if read==False:
1353  outhier=simo.autobuild_model(comname,
1354  pdbname=pdbname,
1355  chain=chain,
1356  resrange=resrange,
1357  resolutions=[0,1,10],
1358  offset=offset,
1359  color=color,
1360  missingbeadsize=beadsize)
1361  else:
1362  outhier=simo.autobuild_model(comname,
1363  pdbname=pdbname,
1364  chain=chain,
1365  resrange=resrange,
1366  resolutions=[1,10],
1367  offset=offset,
1368  color=color,
1369  missingbeadsize=beadsize)
1370 
1371  elif pdbname is not None and pdbname is "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is not "DENSITY" :
1372  outhier=simo.add_component_ideal_helix(comname,
1373  resolutions=[1,10],
1374  resrange=resrange,
1375  color=color,
1376  show=False)
1377 
1378  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is "BEADS" and pdbname is not "DENSITY" :
1379  seq_len=resrange[1]
1380  if resrange[1]==-1:
1381  seq_len=len(simo.sequence_dict[comname])
1382  outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1383 
1384  elif pdbname is not None and pdbname is not "IDEAL_HELIX" and pdbname is not "BEADS" and pdbname is "DENSITY" :
1385  outhier=[]
1386 
1387  else:
1388 
1389  seq_len=len(simo.sequence_dict[comname])
1390  outhier=simo.add_component_necklace(comname,
1391  begin=1,
1392  end=seq_len,
1393  length=beadsize)
1394 
1395  return outhier
1396 
1397  def set_coordinates(self,hier_name,xyz_tuple):
1398  hier=self.domain_dict[hier_name]
1399  for h in IMP.atom.get_leaves(hier):
1400  p=h.get_particle()
1402  pass
1403  else:
1404  IMP.core.XYZ(p).set_coordinates(xyz_tuple)
1405 
1406  def save_rmf(self,rmfname):
1407 
1409  o.init_rmf(rmfname,[self.simo.prot])
1410  o.write_rmf(rmfname)
1411  o.close_rmf(rmfname)
1412 # -----------------------------------------------------------------------
1413 
1414 @IMP.deprecated_object("2.5", "Use BuildSystem instead")
1415 def BuildModel0(
1416  m,
1417  data,
1418  resolutions=[1,
1419  10],
1420  missing_bead_size=20,
1421  residue_per_gaussian=None):
1422  '''
1423  Construct a component for each subunit (no splitting, nothing fancy).
1424  You can pass the resolutions and the bead size for the missing residue regions.
1425  To use this macro, you must provide the following data structure:
1426  Component pdbfile chainid rgb color fastafile sequence id
1427  in fastafile
1428 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1429  ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1430  ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1431  ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1432  ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1433  ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1434  ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1435  ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1436  ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1437  ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1438  ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1439  ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1440  '''
1441 
1443 
1444  # the dictionary for the hierarchies,
1445  hierarchies = {}
1446 
1447  for d in data:
1448  # retrieve the information from the data structure
1449  component_name = d[0]
1450  pdb_file = d[1]
1451  chain_id = d[2]
1452  color_id = d[3]
1453  fasta_file = d[4][0]
1454  # this function
1455  fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
1456  fasta_file_id = d[4][1]
1457  # avoid to add a component with the same name
1458  r.create_component(component_name,
1459  color=color_id)
1460 
1461  r.add_component_sequence(component_name,
1462  fasta_file,
1463  id=fastids[fasta_file_id])
1464 
1465  hierarchies = r.autobuild_model(component_name,
1466  pdb_file,
1467  chain_id,
1468  resolutions=resolutions,
1469  missingbeadsize=missing_bead_size)
1470 
1471  r.show_component_table(component_name)
1472 
1473  r.set_rigid_bodies([component_name])
1474 
1475  r.set_chain_of_super_rigid_bodies(
1476  hierarchies,
1477  min_length=2,
1478  max_length=2)
1479 
1480  r.setup_component_sequence_connectivity(component_name, resolution=1)
1481  r.setup_component_geometry(component_name)
1482 
1483  r.setup_bonds()
1484  # put it at the end of rigid bodies
1485  r.set_floppy_bodies()
1486 
1487  # set current coordinates as reference for RMSD calculation
1488  r.set_current_coordinates_as_reference_for_rmsd("Reference")
1489 
1490  return r
1491 
1492 # ----------------------------------------------------------------------
1493 
1494 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
1495 class AnalysisReplicaExchange0(object):
1496  """A macro for running all the basic operations of analysis.
1497  Includes clustering, precision analysis, and making ensemble density maps.
1498  A number of plots are also supported.
1499  """
1500  def __init__(self, model,
1501  merge_directories=["./"],
1502  stat_file_name_suffix="stat",
1503  best_pdb_name_suffix="model",
1504  do_clean_first=True,
1505  do_create_directories=True,
1506  global_output_directory="output/",
1507  replica_stat_file_suffix="stat_replica",
1508  global_analysis_result_directory="./analysis/",
1509  test_mode=False):
1510  """Constructor.
1511  @param model The IMP model
1512  @param stat_file_name_suffix
1513  @param merge_directories The directories containing output files
1514  @param best_pdb_name_suffix
1515  @param do_clean_first
1516  @param do_create_directories
1517  @param global_output_directory Where everything is
1518  @param replica_stat_file_suffix
1519  @param global_analysis_result_directory
1520  @param test_mode If True, nothing is changed on disk
1521  """
1522 
1523  try:
1524  from mpi4py import MPI
1525  self.comm = MPI.COMM_WORLD
1526  self.rank = self.comm.Get_rank()
1527  self.number_of_processes = self.comm.size
1528  except ImportError:
1529  self.rank = 0
1530  self.number_of_processes = 1
1531 
1532  self.test_mode = test_mode
1533  self._protocol_output = []
1534  self.cluster_obj = None
1535  self.model = model
1536  stat_dir = global_output_directory
1537  self.stat_files = []
1538  # it contains the position of the root directories
1539  for rd in merge_directories:
1540  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
1541  if len(stat_files)==0:
1542  print("WARNING: no stat files found in",os.path.join(rd,stat_dir))
1543  self.stat_files += stat_files
1544 
1545  def add_protocol_output(self, p):
1546  """Capture details of the modeling protocol.
1547  @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1548  """
1549  # Assume last state is the one we're interested in
1550  self._protocol_output.append((p, p._last_state))
1551 
1552  def get_modeling_trajectory(self,
1553  score_key="SimplifiedModel_Total_Score_None",
1554  rmf_file_key="rmf_file",
1555  rmf_file_frame_key="rmf_frame_index",
1556  outputdir="./",
1557  get_every=1,
1558  nframes_trajectory=10000):
1559  """ Get a trajectory of the modeling run, for generating demonstrative movies
1560  @param score_key The score for ranking models
1561  @param rmf_file_key Key pointing to RMF filename
1562  @param rmf_file_frame_key Key pointing to RMF frame number
1563  @param outputdir The local output directory used in the run
1564  @param get_every Extract every nth frame
1565  @param nframes_trajectory Total number of frames of the trajectory
1566  """
1567  from operator import itemgetter
1568  import math
1569 
1570  trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
1571  score_key,
1572  rmf_file_key,
1573  rmf_file_frame_key,
1574  get_every)
1575  rmf_file_list=trajectory_models[0]
1576  rmf_file_frame_list=trajectory_models[1]
1577  score_list=list(map(float, trajectory_models[2]))
1578 
1579  max_score=max(score_list)
1580  min_score=min(score_list)
1581 
1582 
1583  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
1584  binned_scores=[None]*nframes_trajectory
1585  binned_model_indexes=[-1]*nframes_trajectory
1586 
1587  for model_index,s in enumerate(score_list):
1588  bins_score_diffs=[abs(s-b) for b in bins]
1589  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1590  if binned_scores[bin_index]==None:
1591  binned_scores[bin_index]=s
1592  binned_model_indexes[bin_index]=model_index
1593  else:
1594  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1595  new_diff=abs(s-bins[bin_index])
1596  if new_diff < old_diff:
1597  binned_scores[bin_index]=s
1598  binned_model_indexes[bin_index]=model_index
1599 
1600  print(binned_scores)
1601  print(binned_model_indexes)
1602 
1603 
1604  def _expand_ambiguity(self,prot,d):
1605  """If using PMI2, expand the dictionary to include copies as ambiguous options
1606  This also keeps the states separate.
1607  """
1608  newdict = {}
1609  for key in d:
1610  val = d[key]
1611  if '..' in key or (type(val) is tuple and len(val)>=3):
1612  newdict[key] = val
1613  continue
1614  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1615  if type(val) is tuple:
1616  start = val[0]
1617  stop = val[1]
1618  name = val[2]
1619  else:
1620  start = 1
1621  stop = -1
1622  name = val
1623  for nst in range(len(states)):
1624  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
1625  copies = sel.get_selected_particles(with_representation=False)
1626  if len(copies)>1:
1627  for nc in range(len(copies)):
1628  if len(states)>1:
1629  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1630  else:
1631  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1632  else:
1633  newdict[key] = val
1634  return newdict
1635 
1636 
1637  def clustering(self,
1638  score_key="SimplifiedModel_Total_Score_None",
1639  rmf_file_key="rmf_file",
1640  rmf_file_frame_key="rmf_frame_index",
1641  state_number=0,
1642  prefiltervalue=None,
1643  feature_keys=[],
1644  outputdir="./",
1645  alignment_components=None,
1646  number_of_best_scoring_models=10,
1647  rmsd_calculation_components=None,
1648  distance_matrix_file='distances.mat',
1649  load_distance_matrix_file=False,
1650  skip_clustering=False,
1651  number_of_clusters=1,
1652  display_plot=False,
1653  exit_after_display=True,
1654  get_every=1,
1655  first_and_last_frames=None,
1656  density_custom_ranges=None,
1657  write_pdb_with_centered_coordinates=False,
1658  voxel_size=5.0):
1659  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1660  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1661  Can pass None for copy or state to ignore that field.
1662  If you don't pass a specific copy number
1663  @param score_key The score for ranking models. Use "Total_Score"
1664  instead of default for PMI2.
1665  @param rmf_file_key Key pointing to RMF filename
1666  @param rmf_file_frame_key Key pointing to RMF frame number
1667  @param state_number State number to analyze
1668  @param prefiltervalue Only include frames where the
1669  score key is below this value
1670  @param feature_keys Keywords for which you want to
1671  calculate average, medians, etc.
1672  If you pass "Keyname" it'll include everything that matches "*Keyname*"
1673  @param outputdir The local output directory used in the run
1674  @param alignment_components Dictionary with keys=groupname, values are tuples
1675  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1676  @param number_of_best_scoring_models Num models to keep per run
1677  @param rmsd_calculation_components For calculating RMSD
1678  (same format as alignment_components)
1679  @param distance_matrix_file Where to store/read the distance matrix
1680  @param load_distance_matrix_file Try to load the distance matrix file
1681  @param skip_clustering Just extract the best scoring models
1682  and save the pdbs
1683  @param number_of_clusters Number of k-means clusters
1684  @param display_plot Display the distance matrix
1685  @param exit_after_display Exit after displaying distance matrix
1686  @param get_every Extract every nth frame
1687  @param first_and_last_frames A tuple with the first and last frames to be
1688  analyzed. Values are percentages!
1689  Default: get all frames
1690  @param density_custom_ranges For density calculation
1691  (same format as alignment_components)
1692  @param write_pdb_with_centered_coordinates
1693  @param voxel_size Used for the density output
1694  """
1695  # Track provenance information to be added to each output model
1696  prov = []
1697  self._outputdir = os.path.abspath(outputdir)
1698  self._number_of_clusters = number_of_clusters
1699  for p, state in self._protocol_output:
1700  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1701 
1702  if self.test_mode:
1703  return
1704 
1705  if self.rank==0:
1706  try:
1707  os.mkdir(outputdir)
1708  except:
1709  pass
1710 
1711  if not load_distance_matrix_file:
1712  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1713  my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1714  self.stat_files,self.number_of_processes)[self.rank]
1715 
1716  # read ahead to check if you need the PMI2 score key instead
1717  po = IMP.pmi.output.ProcessOutput(my_stat_files[0])
1718  orig_score_key = score_key
1719  if score_key not in po.get_keys():
1720  if 'Total_Score' in po.get_keys():
1721  score_key = 'Total_Score'
1722  print("WARNING: Using 'Total_Score' instead of "
1723  "'SimplifiedModel_Total_Score_None' for the score key")
1724  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1725  if k in feature_keys:
1726  print("WARNING: no need to pass " +k+" to feature_keys.")
1727  feature_keys.remove(k)
1728 
1729  best_models = IMP.pmi.io.get_best_models(my_stat_files,
1730  score_key,
1731  feature_keys,
1732  rmf_file_key,
1733  rmf_file_frame_key,
1734  prefiltervalue,
1735  get_every, provenance=prov)
1736  rmf_file_list=best_models[0]
1737  rmf_file_frame_list=best_models[1]
1738  score_list=best_models[2]
1739  feature_keyword_list_dict=best_models[3]
1740 
1741 # ------------------------------------------------------------------------
1742 # collect all the files and scores
1743 # ------------------------------------------------------------------------
1744 
1745  if self.number_of_processes > 1:
1746  score_list = IMP.pmi.tools.scatter_and_gather(score_list)
1747  rmf_file_list = IMP.pmi.tools.scatter_and_gather(rmf_file_list)
1748  rmf_file_frame_list = IMP.pmi.tools.scatter_and_gather(
1749  rmf_file_frame_list)
1750  for k in feature_keyword_list_dict:
1751  feature_keyword_list_dict[k] = IMP.pmi.tools.scatter_and_gather(
1752  feature_keyword_list_dict[k])
1753 
1754  # sort by score and get the best scoring ones
1755  score_rmf_tuples = list(zip(score_list,
1756  rmf_file_list,
1757  rmf_file_frame_list,
1758  list(range(len(score_list)))))
1759 
1760  if density_custom_ranges:
1761  for k in density_custom_ranges:
1762  if type(density_custom_ranges[k]) is not list:
1763  raise Exception("Density custom ranges: values must be lists of tuples")
1764 
1765  # keep subset of frames if requested
1766  if first_and_last_frames is not None:
1767  nframes = len(score_rmf_tuples)
1768  first_frame = int(first_and_last_frames[0] * nframes)
1769  last_frame = int(first_and_last_frames[1] * nframes)
1770  if last_frame > len(score_rmf_tuples):
1771  last_frame = -1
1772  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1773 
1774  # sort RMFs by the score_key in ascending order, and store the rank
1775  best_score_rmf_tuples = sorted(score_rmf_tuples,
1776  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1777  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1778  # Note in the provenance info that we only kept best-scoring models
1779  prov.append(IMP.pmi.io.FilterProvenance("Best scoring",
1780  0, number_of_best_scoring_models))
1781  # sort the feature scores in the same way
1782  best_score_feature_keyword_list_dict = defaultdict(list)
1783  for tpl in best_score_rmf_tuples:
1784  index = tpl[3]
1785  for f in feature_keyword_list_dict:
1786  best_score_feature_keyword_list_dict[f].append(
1787  feature_keyword_list_dict[f][index])
1788  my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1789  best_score_rmf_tuples,
1790  self.number_of_processes)[self.rank]
1791 
1792  # expand the dictionaries to include ambiguous copies
1793  prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1794  0,
1795  my_best_score_rmf_tuples[0][1])[0]
1796  if IMP.pmi.get_is_canonical(prot_ahead):
1797  if rmsd_calculation_components is not None:
1798  tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1799  if tmp!=rmsd_calculation_components:
1800  print('Detected ambiguity, expand rmsd components to',tmp)
1801  rmsd_calculation_components = tmp
1802  if alignment_components is not None:
1803  tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1804  if tmp!=alignment_components:
1805  print('Detected ambiguity, expand alignment components to',tmp)
1806  alignment_components = tmp
1807 
1808 #-------------------------------------------------------------
1809 # read the coordinates
1810 # ------------------------------------------------------------
1811  rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1812  my_best_score_rmf_tuples[0],
1813  rmsd_calculation_components,
1814  state_number=state_number)
1815  got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1816  my_best_score_rmf_tuples,
1817  alignment_components,
1818  rmsd_calculation_components,
1819  state_number=state_number)
1820 
1821  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1822  # NOR particles, because these object cannot be serialized. We need serialization
1823  # for the parallel computation based on mpi.
1824  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1825  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1826  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1827  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1828  all_rmf_file_names=got_coords[4] # RMF file per hit
1829 
1830 # ------------------------------------------------------------------------
1831 # optionally don't compute distance matrix or cluster, just write top files
1832 # ------------------------------------------------------------------------
1833  if skip_clustering:
1834  if density_custom_ranges:
1835  DensModule = IMP.pmi.analysis.GetModelDensity(
1836  density_custom_ranges,
1837  voxel=voxel_size)
1838 
1839  dircluster=os.path.join(outputdir,"all_models."+str(n))
1840  try:
1841  os.mkdir(outputdir)
1842  except:
1843  pass
1844  try:
1845  os.mkdir(dircluster)
1846  except:
1847  pass
1848  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1849  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1850  rmf_name=tpl[1]
1851  rmf_frame_number=tpl[2]
1852  tmp_dict={}
1853  index=tpl[4]
1854  for key in best_score_feature_keyword_list_dict:
1855  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1856 
1857  if cnt==0:
1858  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1859  self.model,
1860  rmf_frame_number,
1861  rmf_name)
1862  else:
1863  linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1864  self.model,
1865  prots,
1866  rs,
1867  rmf_frame_number,
1868  rmf_name)
1869  if not linking_successful:
1870  continue
1871 
1872  if not prots:
1873  continue
1874 
1875  if IMP.pmi.get_is_canonical(prots[0]):
1876  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1877  prot = states[state_number]
1878  else:
1879  prot = prots[state_number]
1880 
1881  # get transformation aligning coordinates of requested tuples
1882  # to the first RMF file
1883  if cnt==0:
1884  coords_f1=alignment_coordinates[cnt]
1885  if cnt > 0:
1886  coords_f2=alignment_coordinates[cnt]
1887  if coords_f2:
1888  Ali = IMP.pmi.analysis.Alignment(coords_f1, coords_f2)
1889  transformation = Ali.align()[1]
1890  else:
1892 
1893  rbs = set()
1894  for p in IMP.atom.get_leaves(prot):
1895  if not IMP.core.XYZR.get_is_setup(p):
1897  IMP.core.XYZR(p).set_radius(0.0001)
1898  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1899 
1901  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1902  rbs.add(rb)
1903  else:
1905  transformation)
1906  for rb in rbs:
1907  IMP.core.transform(rb,transformation)
1908 
1910  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1911  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1912  o.init_pdb(out_pdb_fn,prot)
1913  o.write_pdb(out_pdb_fn,
1914  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1915 
1916  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1917  tmp_dict["rmf_file_full_path"]=rmf_name
1918  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1919  tmp_dict["local_rmf_frame_number"]=0
1920 
1921  clusstat.write(str(tmp_dict)+"\n")
1922 
1923  if IMP.pmi.get_is_canonical(prot):
1924  # create a single-state System and write that
1926  h.set_name("System")
1927  h.add_child(prot)
1928  o.init_rmf(out_rmf_fn, [h], rs)
1929  else:
1930  o.init_rmf(out_rmf_fn, [prot],rs)
1931 
1932  o.write_rmf(out_rmf_fn)
1933  o.close_rmf(out_rmf_fn)
1934  # add the density
1935  if density_custom_ranges:
1936  DensModule.add_subunits_density(prot)
1937 
1938  if density_custom_ranges:
1939  DensModule.write_mrc(path=dircluster)
1940  del DensModule
1941  return
1942 
1943 
1944 
1945  # broadcast the coordinates
1946  if self.number_of_processes > 1:
1947  all_coordinates = IMP.pmi.tools.scatter_and_gather(
1948  all_coordinates)
1949  all_rmf_file_names = IMP.pmi.tools.scatter_and_gather(
1950  all_rmf_file_names)
1951  rmf_file_name_index_dict = IMP.pmi.tools.scatter_and_gather(
1952  rmf_file_name_index_dict)
1953  alignment_coordinates=IMP.pmi.tools.scatter_and_gather(
1954  alignment_coordinates)
1955  rmsd_coordinates=IMP.pmi.tools.scatter_and_gather(
1956  rmsd_coordinates)
1957 
1958  if self.rank == 0:
1959  # save needed informations in external files
1960  self.save_objects(
1961  [best_score_feature_keyword_list_dict,
1962  rmf_file_name_index_dict],
1963  ".macro.pkl")
1964 
1965 # ------------------------------------------------------------------------
1966 # Calculate distance matrix and cluster
1967 # ------------------------------------------------------------------------
1968  print("setup clustering class")
1969  self.cluster_obj = IMP.pmi.analysis.Clustering(rmsd_weights)
1970 
1971  for n, model_coordinate_dict in enumerate(all_coordinates):
1972  template_coordinate_dict = {}
1973  # let's try to align
1974  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1975  # set the first model as template coordinates
1976  self.cluster_obj.set_template(alignment_coordinates[n])
1977  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1978  print("Global calculating the distance matrix")
1979 
1980  # calculate distance matrix, all against all
1981  self.cluster_obj.dist_matrix()
1982 
1983  # perform clustering and optionally display
1984  if self.rank == 0:
1985  self.cluster_obj.do_cluster(number_of_clusters)
1986  if display_plot:
1987  if self.rank == 0:
1988  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1989  if exit_after_display:
1990  exit()
1991  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1992 
1993 # ------------------------------------------------------------------------
1994 # Alteratively, load the distance matrix from file and cluster that
1995 # ------------------------------------------------------------------------
1996  else:
1997  if self.rank==0:
1998  print("setup clustering class")
1999  self.cluster_obj = IMP.pmi.analysis.Clustering()
2000  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
2001  print("clustering with %s clusters" % str(number_of_clusters))
2002  self.cluster_obj.do_cluster(number_of_clusters)
2003  [best_score_feature_keyword_list_dict,
2004  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
2005  if display_plot:
2006  if self.rank == 0:
2007  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
2008  if exit_after_display:
2009  exit()
2010  if self.number_of_processes > 1:
2011  self.comm.Barrier()
2012 
2013 # ------------------------------------------------------------------------
2014 # now save all informations about the clusters
2015 # ------------------------------------------------------------------------
2016 
2017  if self.rank == 0:
2018  print(self.cluster_obj.get_cluster_labels())
2019  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
2020  print("rank %s " % str(self.rank))
2021  print("cluster %s " % str(n))
2022  print("cluster label %s " % str(cl))
2023  print(self.cluster_obj.get_cluster_label_names(cl))
2024  cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
2025  cluster_prov = prov + \
2026  [IMP.pmi.io.ClusterProvenance(cluster_size)]
2027 
2028  # first initialize the Density class if requested
2029  if density_custom_ranges:
2030  DensModule = IMP.pmi.analysis.GetModelDensity(
2031  density_custom_ranges,
2032  voxel=voxel_size)
2033 
2034  dircluster = outputdir + "/cluster." + str(n) + "/"
2035  try:
2036  os.mkdir(dircluster)
2037  except:
2038  pass
2039 
2040  rmsd_dict = {"AVERAGE_RMSD":
2041  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
2042  clusstat = open(dircluster + "stat.out", "w")
2043  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
2044  # extract the features
2045  tmp_dict = {}
2046  tmp_dict.update(rmsd_dict)
2047  index = rmf_file_name_index_dict[structure_name]
2048  for key in best_score_feature_keyword_list_dict:
2049  tmp_dict[
2050  key] = best_score_feature_keyword_list_dict[
2051  key][
2052  index]
2053 
2054  # get the rmf name and the frame number from the list of
2055  # frame names
2056  rmf_name = structure_name.split("|")[0]
2057  rmf_frame_number = int(structure_name.split("|")[1])
2058  clusstat.write(str(tmp_dict) + "\n")
2059 
2060  # extract frame (open or link to existing)
2061  if k==0:
2062  prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
2063  self.model,
2064  rmf_frame_number,
2065  rmf_name)
2066  else:
2067  linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
2068  self.model,
2069  prots,
2070  rs,
2071  rmf_frame_number,
2072  rmf_name)
2073  if not linking_successful:
2074  continue
2075  if not prots:
2076  continue
2077 
2078  if IMP.pmi.get_is_canonical(prots[0]):
2079  states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
2080  prot = states[state_number]
2081  else:
2082  prot = prots[state_number]
2083  if k==0:
2084  IMP.pmi.io.add_provenance(cluster_prov, (prot,))
2085 
2086  # transform clusters onto first
2087  if k > 0:
2088  model_index = self.cluster_obj.get_model_index_from_name(
2089  structure_name)
2090  transformation = self.cluster_obj.get_transformation_to_first_member(
2091  cl,
2092  model_index)
2093  rbs = set()
2094  for p in IMP.atom.get_leaves(prot):
2095  if not IMP.core.XYZR.get_is_setup(p):
2097  IMP.core.XYZR(p).set_radius(0.0001)
2098  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
2099 
2101  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
2102  rbs.add(rb)
2103  else:
2105  transformation)
2106  for rb in rbs:
2107  IMP.core.transform(rb,transformation)
2108 
2109  # add the density
2110  if density_custom_ranges:
2111  DensModule.add_subunits_density(prot)
2112 
2113  # pdb writing should be optimized!
2114  o = IMP.pmi.output.Output()
2115  o.init_pdb(dircluster + str(k) + ".pdb", prot)
2116  o.write_pdb(dircluster + str(k) + ".pdb")
2117 
2118  if IMP.pmi.get_is_canonical(prot):
2119  # create a single-state System and write that
2121  h.set_name("System")
2122  h.add_child(prot)
2123  o.init_rmf(dircluster + str(k) + ".rmf3", [h], rs)
2124  else:
2125  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
2126  o.write_rmf(dircluster + str(k) + ".rmf3")
2127  o.close_rmf(dircluster + str(k) + ".rmf3")
2128 
2129  del o
2130  # IMP.atom.destroy(prot)
2131 
2132  if density_custom_ranges:
2133  DensModule.write_mrc(path=dircluster)
2134  del DensModule
2135 
2136  if self.number_of_processes>1:
2137  self.comm.Barrier()
2138 
2139  def get_cluster_rmsd(self,cluster_num):
2140  if self.cluster_obj is None:
2141  raise Exception("Run clustering first")
2142  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2143 
2144  def save_objects(self, objects, file_name):
2145  import pickle
2146  outf = open(file_name, 'wb')
2147  pickle.dump(objects, outf)
2148  outf.close()
2149 
2150  def load_objects(self, file_name):
2151  import pickle
2152  inputf = open(file_name, 'rb')
2153  objects = pickle.load(inputf)
2154  inputf.close()
2155  return objects
2156 
2158 
2159  """
2160  This class contains analysis utilities to investigate ReplicaExchange results.
2161  """
2162 
2163  ########################
2164  # Construction and Setup
2165  ########################
2166 
2167  def __init__(self,model,
2168  stat_files,
2169  best_models=None,
2170  score_key=None,
2171  alignment=True):
2172  """
2173  Construction of the Class.
2174  @param model IMP.Model()
2175  @param stat_files list of string. Can be ascii stat files, rmf files names
2176  @param best_models Integer. Number of best scoring models, if None: all models will be read
2177  @param alignment boolean (Default=True). Align before computing the rmsd.
2178  """
2179 
2180  self.model=model
2181  self.best_models=best_models
2182  self.stath0=IMP.pmi.output.StatHierarchyHandler(model,stat_files,self.best_models,score_key,cache=True)
2183  self.stath1=IMP.pmi.output.StatHierarchyHandler(StatHierarchyHandler=self.stath0)
2184 
2185  self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath1))
2186  self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath0))
2187  self.sel0_rmsd=IMP.atom.Selection(self.stath0)
2188  self.sel1_rmsd=IMP.atom.Selection(self.stath1)
2189  self.sel0_alignment=IMP.atom.Selection(self.stath0)
2190  self.sel1_alignment=IMP.atom.Selection(self.stath1)
2191  self.clusters=[]
2192  # fill the cluster list with a single cluster containing all models
2193  c = IMP.pmi.output.Cluster(0)
2194  self.clusters.append(c)
2195  for n0 in range(len(self.stath0)):
2196  c.add_member(n0)
2197  self.pairwise_rmsd={}
2198  self.pairwise_molecular_assignment={}
2199  self.alignment=alignment
2200  self.symmetric_molecules={}
2201  self.issymmetricsel={}
2202  self.update_seldicts()
2203  self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath0))
2204  self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath1))
2205 
2206  def set_rmsd_selection(self,**kwargs):
2207  """
2208  Setup the selection onto which the rmsd is computed
2209  @param kwargs use IMP.atom.Selection keywords
2210  """
2211  self.sel0_rmsd=IMP.atom.Selection(self.stath0,**kwargs)
2212  self.sel1_rmsd=IMP.atom.Selection(self.stath1,**kwargs)
2213  self.update_seldicts()
2214 
2215  def set_symmetric(self,molecule_name):
2216  """
2217  Store names of symmetric molecules
2218  """
2219  self.symmetric_molecules[molecule_name]=0
2220  self.update_seldicts()
2221 
2222  def set_alignment_selection(self,**kwargs):
2223  """
2224  Setup the selection onto which the alignment is computed
2225  @param kwargs use IMP.atom.Selection keywords
2226  """
2227  self.sel0_alignment=IMP.atom.Selection(self.stath0,**kwargs)
2228  self.sel1_alignment=IMP.atom.Selection(self.stath1,**kwargs)
2229 
2230  ######################
2231  # Clustering functions
2232  ######################
2233  def clean_clusters(self):
2234  for c in self.clusters: del c
2235  self.clusters=[]
2236 
2237 
2238  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2239  """
2240  Cluster the models based on RMSD.
2241  @param rmsd_cutoff Float the distance cutoff in Angstrom
2242  @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
2243  """
2244  self.clean_clusters()
2245  not_clustered=set(range(len(self.stath1)))
2246  while len(not_clustered)>0:
2247  self.aggregate(not_clustered, rmsd_cutoff, metric)
2248  #self.merge_aggregates(rmsd_cutoff, metric)
2249  self.update_clusters()
2250 
2251  def refine(self,rmsd_cutoff=10):
2252  """
2253  Refine the clusters by merging the ones whose centers are close
2254  @param rmsd_cutoff cutoff distance in Angstorms
2255  """
2256  merge_pairs=[]
2257  clusters_copy=self.clusters
2258  for c0,c1 in itertools.combinations(self.clusters,2):
2259  if c0.center_index is None:
2260  self.compute_cluster_center(c0)
2261  if c1.center_index is None:
2262  self.compute_cluster_center(c1)
2263  d0=self.stath0[c0.center_index]
2264  d1=self.stath1[c1.center_index]
2265  rmsd, molecular_assignment = self.rmsd()
2266  if rmsd <= rmsd_cutoff:
2267  if c1 in self.clusters:
2268  clusters_copy.remove(c1)
2269  c0+=c1
2270  self.clusters=clusters_copy
2271  self.update_clusters()
2272 
2273  ####################
2274  # Input Output
2275  ####################
2276 
2277  def set_cluster_assignments(self, cluster_ids):
2278  if len(cluster_ids)!=len(self.stath0):
2279  raise ValueError('cluster ids has to be same length as number of frames')
2280 
2281  self.clusters=[]
2282  for i in sorted(list(set(cluster_ids))):
2283  self.clusters.append(IMP.pmi.output.Cluster(i))
2284  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
2285  self.clusters[idx].add_member(i,d)
2286 
2287  def get_cluster_data(self, cluster):
2288  """
2289  Return the model data from a cluster
2290  @param cluster IMP.pmi.output.Cluster object
2291  """
2292  data=[]
2293  for m in cluster:
2294  data.append(m)
2295  return data
2296 
2297  def save_data(self,filename='data.pkl'):
2298  """
2299  Save the data for the whole models into a pickle file
2300  @param filename string
2301  """
2302  self.stath0.save_data(filename)
2303 
2304  def set_data(self,data):
2305  """
2306  Set the data from an external IMP.pmi.output.Data
2307  @param data IMP.pmi.output.Data
2308  """
2309  self.stath0.data=data
2310  self.stath1.data=data
2311 
2312  def load_data(self,filename='data.pkl'):
2313  """
2314  Load the data from an external pickled file
2315  @param filename string
2316  """
2317  self.stath0.load_data(filename)
2318  self.stath1.load_data(filename)
2319  self.best_models=len(self.stath0)
2320 
2321  def add_cluster(self,rmf_name_list):
2322  c = IMP.pmi.output.Cluster(len(self.clusters))
2323  print("creating cluster index "+str(len(self.clusters)))
2324  self.clusters.append(c)
2325  current_len=len(self.stath0)
2326 
2327  for rmf in rmf_name_list:
2328  print("adding rmf "+rmf)
2329  self.stath0.add_stat_file(rmf)
2330  self.stath1.add_stat_file(rmf)
2331 
2332  for n0 in range(current_len,len(self.stath0)):
2333  d0=self.stath0[n0]
2334  c.add_member(n0,d0)
2335  self.update_clusters()
2336 
2337  def save_clusters(self,filename='clusters.pkl'):
2338  """
2339  Save the clusters into a pickle file
2340  @param filename string
2341  """
2342  try:
2343  import cPickle as pickle
2344  except ImportError:
2345  import pickle
2346  fl=open(filename,'wb')
2347  pickle.dump(self.clusters,fl)
2348 
2349  def load_clusters(self,filename='clusters.pkl',append=False):
2350  """
2351  Load the clusters from a pickle file
2352  @param filename string
2353  @param append bool (Default=False), if True. append the clusters to the ones currently present
2354  """
2355  try:
2356  import cPickle as pickle
2357  except ImportError:
2358  import pickle
2359  fl=open(filename,'rb')
2360  self.clean_clusters()
2361  if append:
2362  self.clusters+=pickle.load(fl)
2363  else:
2364  self.clusters=pickle.load(fl)
2365  self.update_clusters()
2366 
2367  ####################
2368  # Analysis Functions
2369  ####################
2370 
2371  def compute_cluster_center(self,cluster):
2372  """
2373  Compute the cluster center for a given cluster
2374  """
2375  member_distance=defaultdict(float)
2376 
2377  for n0,n1 in itertools.combinations(cluster.members,2):
2378  d0=self.stath0[n0]
2379  d1=self.stath1[n1]
2380  rmsd, _ = self.rmsd()
2381  member_distance[n0]+=rmsd
2382 
2383  if len(member_distance)>0:
2384  cluster.center_index=min(member_distance, key=member_distance.get)
2385  else:
2386  cluster.center_index=cluster.members[0]
2387 
2388  def save_coordinates(self,cluster,rmf_name=None,reference="Absolute", prefix="./"):
2389  """
2390  Save the coordinates of the current cluster a single rmf file
2391  """
2392  print("saving coordinates",cluster)
2393  if self.alignment: self.set_reference(reference,cluster)
2395  if rmf_name is None:
2396  rmf_name=prefix+'/'+str(cluster.cluster_id)+".rmf3"
2397 
2398  d1=self.stath1[cluster.members[0]]
2399  o.init_rmf(rmf_name, [self.stath1])
2400  for n1 in cluster.members:
2401  d1=self.stath1[n1]
2402  self.model.update()
2404  if self.alignment: self.align()
2405  o.write_rmf(rmf_name)
2407  o.close_rmf(rmf_name)
2408 
2409  def prune_redundant_structures(self,rmsd_cutoff=10):
2410  """
2411  remove structures that are similar
2412  append it to a new cluster
2413  """
2414  print("pruning models")
2415  selected=0
2416  filtered=[selected]
2417  remaining=range(1,len(self.stath1),10)
2418 
2419  while len(remaining)>0:
2420  d0=self.stath0[selected]
2421  rm=[]
2422  for n1 in remaining:
2423  d1=self.stath1[n1]
2424  if self.alignment: self.align()
2425  d, _ = self.rmsd()
2426  if d<=rmsd_cutoff:
2427  rm.append(n1)
2428  print("pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
2429  remaining=[x for x in remaining if x not in rm]
2430  if len(remaining)==0: break
2431  selected=remaining[0]
2432  filtered.append(selected)
2433  remaining.pop(0)
2434  c = IMP.pmi.output.Cluster(len(self.clusters))
2435  self.clusters.append(c)
2436  for n0 in filtered:
2437  d0=self.stath0[n0]
2438  c.add_member(n0,d0)
2439  self.update_clusters()
2440 
2441 
2442 
2443  def precision(self,cluster):
2444  """
2445  Compute the precision of a cluster
2446  """
2447  npairs=0
2448  rmsd=0.0
2449  precision=None
2450 
2451  if not cluster.center_index is None:
2452  members1=[cluster.center_index]
2453  else:
2454  members1=cluster.members
2455 
2456  for n0 in members1:
2457  d0=self.stath0[n0]
2458  for n1 in cluster.members:
2459  if n0!=n1:
2460  npairs+=1
2461  d1=self.stath1[n1]
2463  tmp_rmsd, _ = self.rmsd()
2464  rmsd+=tmp_rmsd
2466 
2467  if npairs>0:
2468  precision=rmsd/npairs
2469  cluster.precision=precision
2470  return precision
2471 
2472  def bipartite_precision(self,cluster1,cluster2,verbose=False):
2473  """
2474  Compute the bipartite precision (ie the cross-precision)
2475  between two clusters
2476  """
2477  npairs=0
2478  rmsd=0.0
2479  for cn0,n0 in enumerate(cluster1.members):
2480  d0=self.stath0[n0]
2481  for cn1,n1 in enumerate(cluster2.members):
2482  d1=self.stath1[n1]
2483  tmp_rmsd, _ =self.rmsd()
2484  if verbose: print("--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
2485  rmsd+=tmp_rmsd
2486  npairs+=1
2487  precision=rmsd/npairs
2488  return precision
2489 
2490  def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
2491  """
2492  Compute the Root mean square fluctuations
2493  of a molecule in a cluster
2494  Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
2495  """
2496  rmsf=IMP.pmi.tools.OrderedDict()
2497 
2498  #assumes that residue indexes are identical for stath0 and stath1
2499  if cluster_ref is not None:
2500  if not cluster_ref.center_index is None:
2501  members0 = [cluster_ref.center_index]
2502  else:
2503  members0 = cluster_ref.members
2504  else:
2505  if not cluster.center_index is None:
2506  members0 = [cluster.center_index]
2507  else:
2508  members0 = cluster.members
2509 
2510  s0=IMP.atom.Selection(self.stath0,molecule=molecule,resolution=1,
2511  copy_index=copy_index,state_index=state_index)
2512  ps0=s0.get_selected_particles()
2513  #get the residue indexes
2514  residue_indexes=list(IMP.pmi.tools.OrderedSet([IMP.pmi.tools.get_residue_indexes(p)[0] for p in ps0]))
2515 
2516  #get the corresponding particles
2517  #s0=IMP.atom.Selection(stat_ref,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2518  # copy_index=copy_index,state_index=state_index)
2519  #ps0 = s0.get_selected_particles()
2520 
2521 
2522 
2523  npairs=0
2524  for n0 in members0:
2525  d0=self.stath0[n0]
2526  for n1 in cluster.members[::step]:
2527  if n0!=n1:
2528  print("--- rmsf %s %s"%(str(n0),str(n1)))
2530 
2531  s1=IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2532  copy_index=copy_index,state_index=state_index)
2533  ps1 = s1.get_selected_particles()
2534 
2535  d1=self.stath1[n1]
2536  if self.alignment: self.align()
2537  for n,(p0,p1) in enumerate(zip(ps0,ps1)):
2538  r=residue_indexes[n]
2539  d0=IMP.core.XYZ(p0)
2540  d1=IMP.core.XYZ(p1)
2541  if r in rmsf:
2542  rmsf[r]+=IMP.core.get_distance(d0,d1)
2543  else:
2544  rmsf[r]=IMP.core.get_distance(d0,d1)
2545  npairs+=1
2547  for r in rmsf:
2548  rmsf[r]/=npairs
2549 
2550  for stath in [self.stath0,self.stath1]:
2551  if molecule not in self.symmetric_molecules:
2552  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
2553  copy_index=copy_index,state_index=state_index)
2554  else:
2555  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
2556  state_index=state_index)
2557 
2558  ps = s.get_selected_particles()
2559  for p in ps:
2561  IMP.pmi.Uncertainty(p).set_uncertainty(rmsf[r])
2562  else:
2564 
2565  return rmsf
2566 
2567  def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
2568  if self.alignment: self.set_reference(reference,cluster)
2569  dens = IMP.pmi.analysis.GetModelDensity(density_custom_ranges,
2570  voxel=voxel_size)
2571 
2572  for n1 in cluster.members[::step]:
2573  print("density "+str(n1))
2574  d1=self.stath1[n1]
2576  if self.alignment: self.align()
2577  dens.add_subunits_density(self.stath1)
2579  dens.write_mrc(path=prefix+'/',suffix=str(cluster.cluster_id))
2580  del dens
2581 
2582  def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
2583  if self.alignment: self.set_reference(reference,cluster)
2584  import numpy as np
2585  import matplotlib.pyplot as plt
2586  import matplotlib.cm as cm
2587  from scipy.spatial.distance import cdist
2588  import IMP.pmi.topology
2589  if molecules is None:
2591  else:
2592  mols=[IMP.pmi.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi.tools.get_molecules(IMP.atom.Selection(self.stath1,molecules=molecules).get_selected_particles())]
2593  unique_copies=[mol for mol in mols if mol.get_copy_index() == 0]
2594  mol_names_unique=dict((mol.get_name(),mol) for mol in unique_copies)
2595  total_len_unique=sum(max(mol.get_residue_indexes()) for mol in unique_copies)
2596 
2597 
2598  # coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
2599  index_dict={}
2600  prev_stop=0
2601 
2602  if not consolidate:
2603  for mol in mols:
2604  seqlen=max(mol.get_residue_indexes())
2605  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2606  prev_stop+=seqlen
2607 
2608  else:
2609  for mol in unique_copies:
2610  seqlen=max(mol.get_residue_indexes())
2611  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2612  prev_stop+=seqlen
2613 
2614 
2615  for ncl,n1 in enumerate(cluster.members):
2616  print(ncl)
2617  d1=self.stath1[n1]
2618  #self.apply_molecular_assignments(n1)
2619  coord_dict=IMP.pmi.tools.OrderedDict()
2620  for mol in mols:
2621  mol_name=mol.get_name()
2622  copy_index=mol.get_copy_index()
2623  rindexes = mol.get_residue_indexes()
2624  coords = np.ones((max(rindexes),3))
2625  for rnum in rindexes:
2626  sel = IMP.atom.Selection(mol, residue_index=rnum, resolution=1)
2627  selpart = sel.get_selected_particles()
2628  if len(selpart) == 0: continue
2629  selpart = selpart[0]
2630  coords[rnum - 1, :] = IMP.core.XYZ(selpart).get_coordinates()
2631  coord_dict[mol]=coords
2632 
2633  if not consolidate:
2634  coords=np.concatenate(list(coord_dict.values()))
2635  dists = cdist(coords, coords)
2636  binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2637  else:
2638  binary_dists_dict={}
2639  for mol1 in mols:
2640  len1 = max(mol1.get_residue_indexes())
2641  for mol2 in mols:
2642  name1=mol1.get_name()
2643  name2=mol2.get_name()
2644  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2645  if (name1, name2) not in binary_dists_dict:
2646  binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
2647  binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2648  binary_dists=np.zeros((total_len_unique,total_len_unique))
2649 
2650  for name1,name2 in binary_dists_dict:
2651  r1=index_dict[mol_names_unique[name1]]
2652  r2=index_dict[mol_names_unique[name2]]
2653  binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
2654 
2655  if ncl==0:
2656  dist_maps = [dists]
2657  av_dist_map = dists
2658  contact_freqs = binary_dists
2659  else:
2660  dist_maps.append(dists)
2661  av_dist_map += dists
2662  contact_freqs += binary_dists
2663 
2664  #self.undo_apply_molecular_assignments(n1)
2665 
2666  if log_scale:
2667  contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2668  else:
2669  contact_freqs =1.0/len(cluster)*contact_freqs
2670  av_dist_map=1.0/len(cluster)*contact_freqs
2671 
2672  fig = plt.figure(figsize=(100, 100))
2673  ax = fig.add_subplot(111)
2674  ax.set_xticks([])
2675  ax.set_yticks([])
2676  gap_between_components=50
2677  colormap = cm.Blues
2678  colornorm=None
2679 
2680 
2681  #if cbar_labels is not None:
2682  # if len(cbar_labels)!=4:
2683  # print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
2684  # exit()
2685  # set the list of proteins on the x axis
2686  if not consolidate:
2687  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_extended_name(),mol) for mol in mols])
2688  prot_list=list(zip(*sorted_tuple))[1]
2689  else:
2690  sorted_tuple=sorted([(IMP.pmi.topology.PMIMoleculeHierarchy(mol).get_name(),mol) for mol in unique_copies])
2691  prot_list=list(zip(*sorted_tuple))[1]
2692 
2693  prot_listx = prot_list
2694  nresx = gap_between_components + \
2695  sum([max(mol.get_residue_indexes())
2696  + gap_between_components for mol in prot_listx])
2697 
2698  # set the list of proteins on the y axis
2699  prot_listy = prot_list
2700  nresy = gap_between_components + \
2701  sum([max(mol.get_residue_indexes())
2702  + gap_between_components for mol in prot_listy])
2703 
2704  # this is the residue offset for each protein
2705  resoffsetx = {}
2706  resendx = {}
2707  res = gap_between_components
2708  for mol in prot_listx:
2709  resoffsetx[mol] = res
2710  res += max(mol.get_residue_indexes())
2711  resendx[mol] = res
2712  res += gap_between_components
2713 
2714  resoffsety = {}
2715  resendy = {}
2716  res = gap_between_components
2717  for mol in prot_listy:
2718  resoffsety[mol] = res
2719  res += max(mol.get_residue_indexes())
2720  resendy[mol] = res
2721  res += gap_between_components
2722 
2723  resoffsetdiagonal = {}
2724  res = gap_between_components
2725  for mol in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2726  resoffsetdiagonal[mol] = res
2727  res += max(mol.get_residue_indexes())
2728  res += gap_between_components
2729 
2730  # plot protein boundaries
2731  xticks = []
2732  xlabels = []
2733  for n, prot in enumerate(prot_listx):
2734  res = resoffsetx[prot]
2735  end = resendx[prot]
2736  for proty in prot_listy:
2737  resy = resoffsety[proty]
2738  endy = resendy[proty]
2739  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
2740  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
2741  xticks.append((float(res) + float(end)) / 2)
2742  xlabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2743 
2744  yticks = []
2745  ylabels = []
2746  for n, prot in enumerate(prot_listy):
2747  res = resoffsety[prot]
2748  end = resendy[prot]
2749  for protx in prot_listx:
2750  resx = resoffsetx[protx]
2751  endx = resendx[protx]
2752  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
2753  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
2754  yticks.append((float(res) + float(end)) / 2)
2755  ylabels.append(IMP.pmi.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2756 
2757  # plot the contact map
2758 
2759  tmp_array = np.zeros((nresx, nresy))
2760  ret={}
2761  for px in prot_listx:
2762  for py in prot_listy:
2763  resx = resoffsetx[px]
2764  lengx = resendx[px] - 1
2765  resy = resoffsety[py]
2766  lengy = resendy[py] - 1
2767  indexes_x = index_dict[px]
2768  minx = min(indexes_x)
2769  maxx = max(indexes_x)
2770  indexes_y = index_dict[py]
2771  miny = min(indexes_y)
2772  maxy = max(indexes_y)
2773  tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2774  ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2775 
2776  cax = ax.imshow(tmp_array,
2777  cmap=colormap,
2778  norm=colornorm,
2779  origin='lower',
2780  alpha=0.6,
2781  interpolation='nearest')
2782 
2783  ax.set_xticks(xticks)
2784  ax.set_xticklabels(xlabels, rotation=90)
2785  ax.set_yticks(yticks)
2786  ax.set_yticklabels(ylabels)
2787  plt.setp(ax.get_xticklabels(), fontsize=6)
2788  plt.setp(ax.get_yticklabels(), fontsize=6)
2789 
2790  # display and write to file
2791  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2792  [i.set_linewidth(2.0) for i in ax.spines.values()]
2793  #if cbar_labels is not None:
2794  # cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
2795  # cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
2796 
2797  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf", dpi=300,transparent="False")
2798  return ret
2799 
2800 
2801  def plot_rmsd_matrix(self,filename):
2802  import numpy as np
2803  self.compute_all_pairwise_rmsd()
2804  distance_matrix = np.zeros(
2805  (len(self.stath0), len(self.stath1)))
2806  for (n0,n1) in self.pairwise_rmsd:
2807  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2808 
2809  import matplotlib as mpl
2810  mpl.use('Agg')
2811  import matplotlib.pylab as pl
2812  from scipy.cluster import hierarchy as hrc
2813 
2814  fig = pl.figure(figsize=(10,8))
2815  ax = fig.add_subplot(212)
2816  dendrogram = hrc.dendrogram(
2817  hrc.linkage(distance_matrix),
2818  color_threshold=7,
2819  no_labels=True)
2820  leaves_order = dendrogram['leaves']
2821  ax.set_xlabel('Model')
2822  ax.set_ylabel('RMSD [Angstroms]')
2823 
2824  ax2 = fig.add_subplot(221)
2825  cax = ax2.imshow(
2826  distance_matrix[leaves_order,
2827  :][:,
2828  leaves_order],
2829  interpolation='nearest')
2830  cb = fig.colorbar(cax)
2831  cb.set_label('RMSD [Angstroms]')
2832  ax2.set_xlabel('Model')
2833  ax2.set_ylabel('Model')
2834 
2835  pl.savefig(filename, dpi=300)
2836  pl.close(fig)
2837 
2838  ####################
2839  # Internal Functions
2840  ####################
2841 
2842  def update_clusters(self):
2843  """
2844  Update the cluster id numbers
2845  """
2846  for n,c in enumerate(self.clusters):
2847  c.cluster_id=n
2848 
2849  def get_molecule(self, hier, name, copy):
2850  s=IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2851  return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2852 
2853  def update_seldicts(self):
2854  """
2855  Update the seldicts
2856  """
2857  self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2858  self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2859  for mol in self.seldict0:
2860  for sel in self.seldict0[mol]:
2861  self.issymmetricsel[sel]=False
2862  for mol in self.symmetric_molecules:
2863  self.symmetric_molecules[mol]=len(self.seldict0[mol])
2864  for sel in self.seldict0[mol]:
2865  self.issymmetricsel[sel]=True
2866 
2867 
2868  def align(self):
2869  print("alignment")
2870  tr = IMP.atom.get_transformation_aligning_first_to_second(self.sel1_alignment, self.sel0_alignment)
2871 
2872  for rb in self.rbs1:
2873  IMP.core.transform(rb, tr)
2874 
2875  for bead in self.beads1:
2876  IMP.core.transform(IMP.core.XYZ(bead), tr)
2877 
2878  self.model.update()
2879 
2880  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2881  '''
2882  initial filling of the clusters.
2883  '''
2884  n0 = idxs.pop()
2885  print("clustering model "+str(n0))
2886  d0 = self.stath0[n0]
2887  c = IMP.pmi.output.Cluster(len(self.clusters))
2888  print("creating cluster index "+str(len(self.clusters)))
2889  self.clusters.append(c)
2890  c.add_member(n0,d0)
2891  clustered = set([n0])
2892  for n1 in idxs:
2893  print("--- trying to add model "+str(n1)+" to cluster "+str(len(self.clusters)))
2894  d1 = self.stath1[n1]
2895  if self.alignment: self.align()
2896  rmsd, _ = self.rmsd(metric=metric)
2897  if rmsd<rmsd_cutoff:
2898  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2899  c.add_member(n1,d1)
2900  clustered.add(n1)
2901  else:
2902  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2903  idxs-=clustered
2904 
2905  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2906  """
2907  merge the clusters that have close members
2908  @param rmsd_cutoff cutoff distance in Angstorms
2909  """
2910  # before merging, clusters are spheres of radius rmsd_cutoff centered on the 1st element
2911  # here we only try to merge clusters whose centers are closer than 2*rmsd_cutoff
2912  to_merge = []
2913  print("merging...")
2914  for c0, c1 in filter(lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2915  n0, n1 = [c.members[0] for c in (c0,c1)]
2916  d0 = self.stath0[n0]
2917  d1 = self.stath1[n1]
2918  rmsd, _ = self.rmsd()
2919  if rmsd<2*rmsd_cutoff and self.have_close_members(c0,c1,rmsd_cutoff,metric):
2920  to_merge.append((c0,c1))
2921 
2922  for c0, c in reversed(to_merge):
2923  self.merge(c0,c)
2924 
2925  #keep only full clusters
2926  self.clusters = [c for c in filter(lambda x: len(x.members)>0, self.clusters)]
2927 
2928 
2929  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2930  '''
2931  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2932  '''
2933  print("check close members for clusters "+str(c0.cluster_id)+" and "+str(c1.cluster_id))
2934  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2935  d0 = self.stath0[n0]
2936  d1 = self.stath1[n1]
2937  rmsd, _ = self.rmsd(metric=metric)
2938  if rmsd<rmsd_cutoff:
2939  return True
2940 
2941  return False
2942 
2943 
2944  def merge(self, c0, c1):
2945  '''
2946  merge two clusters
2947  '''
2948  c0+=c1
2949  c1.members=[]
2950  c1.data={}
2951 
2952  def rmsd_helper(self, sels0, sels1, metric):
2953  '''
2954  a function that returns the permutation best_sel of sels0 that minimizes metric
2955  '''
2956  best_rmsd2 = float('inf')
2957  best_sel = None
2958  if self.issymmetricsel[sels0[0]]:
2959  #this cases happens when symmetries were defined
2960  N = len(sels0)
2961  for offset in range(N):
2962  order=[(offset+i)%N for i in range(N)]
2963  sels = [sels0[(offset+i)%N] for i in range(N)]
2964  sel0 = sels[0]
2965  sel1 = sels1[0]
2966  r=metric(sel0, sel1)
2967  rmsd2=r*r*N
2968  ###print(order,rmsd2)
2969  if rmsd2 < best_rmsd2:
2970  best_rmsd2 = rmsd2
2971  best_sel = sels
2972  best_order=order
2973  else:
2974  for sels in itertools.permutations(sels0):
2975  rmsd2=0.0
2976  for sel0, sel1 in itertools.takewhile(lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2977  r=metric(sel0, sel1)
2978  rmsd2+=r*r
2979  if rmsd2 < best_rmsd2:
2980  best_rmsd2 = rmsd2
2981  best_sel = sels
2982  ###for i,sel in enumerate(best_sel):
2983  ### p0 = sel.get_selected_particles()[0]
2984  ### p1 = sels1[i].get_selected_particles()[0]
2985  ### m0 = IMP.pmi.tools.get_molecules([p0])[0]
2986  ### m1 = IMP.pmi.tools.get_molecules([p1])[0]
2987  ### c0 = IMP.atom.Copy(m0).get_copy_index()
2988  ### c1 = IMP.atom.Copy(m1).get_copy_index()
2989  ### name0=m0.get_name()
2990  ### name1=m1.get_name()
2991  ### print("WWW",name0,name1,c0,c1)
2992  ###print(best_order,best_rmsd2,m0,m1)
2993  return best_sel, best_rmsd2
2994 
2995  def compute_all_pairwise_rmsd(self):
2996  for d0 in self.stath0:
2997  for d1 in self.stath1:
2998  rmsd, _ = self.rmsd()
2999 
3000  def rmsd(self,metric=IMP.atom.get_rmsd):
3001  '''
3002  Computes the RMSD. Resolves ambiguous pairs assignments
3003  '''
3004  # here we memoize the rmsd and molecular assignment so that it's not done multiple times
3005  n0=self.stath0.current_index
3006  n1=self.stath1.current_index
3007  if ((n0,n1) in self.pairwise_rmsd) and ((n0,n1) in self.pairwise_molecular_assignment):
3008  return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
3009 
3010  if self.alignment:
3011  self.align()
3012  #if it's not yet memoized
3013  total_rmsd=0.0
3014  total_N=0
3015  # this is a dictionary which keys are the molecule names, and values are the list of IMP.atom.Selection for all molecules that share the molecule name
3016  seldict_best_order={}
3017  molecular_assignment={}
3018  for molname, sels0 in self.seldict0.items():
3019  sels_best_order, best_rmsd2 = self.rmsd_helper(sels0, self.seldict1[molname], metric)
3020 
3021  Ncoords = len(sels_best_order[0].get_selected_particles())
3022  Ncopies = len(self.seldict1[molname])
3023  total_rmsd += Ncoords*best_rmsd2
3024  total_N += Ncoords*Ncopies
3025 
3026  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
3027  p0 = sel0.get_selected_particles()[0]
3028  p1 = sel1.get_selected_particles()[0]
3029  m0 = IMP.pmi.tools.get_molecules([p0])[0]
3030  m1 = IMP.pmi.tools.get_molecules([p1])[0]
3031  c0 = IMP.atom.Copy(m0).get_copy_index()
3032  c1 = IMP.atom.Copy(m1).get_copy_index()
3033  ###print(molname,c0,c1)
3034  molecular_assignment[(molname,c0)]=(molname,c1)
3035 
3036  total_rmsd = math.sqrt(total_rmsd/total_N)
3037 
3038  self.pairwise_rmsd[(n0,n1)]=total_rmsd
3039  self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
3040  self.pairwise_rmsd[(n1,n0)]=total_rmsd
3041  self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
3042  ###print(n0,n1,molecular_assignment)
3043  return total_rmsd, molecular_assignment
3044 
3045  def set_reference(self,reference,cluster):
3046  """
3047  Fix the reference structure for structural alignment, rmsd and chain assignemnt
3048  @param reference can be either "Absolute" (cluster center of the first cluster)
3049  or Relative (cluster center of the current cluster)
3050  """
3051  if reference=="Absolute":
3052  d0=self.stath0[0]
3053  elif reference=="Relative":
3054  if cluster.center_index:
3055  n0=cluster.center_index
3056  else:
3057  n0=cluster.members[0]
3058  d0=self.stath0[n0]
3059 
3061  """
3062  compute the molecular assignemnts between multiple copies
3063  of the same sequence. It changes the Copy index of Molecules
3064  """
3065  d1=self.stath1[n1]
3066  _, molecular_assignment = self.rmsd()
3067  for (m0, c0), (m1,c1) in molecular_assignment.items():
3068  mol0 = self.molcopydict0[m0][c0]
3069  mol1 = self.molcopydict1[m1][c1]
3070  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
3071  p1=IMP.atom.Copy(mol1).get_particle()
3072  p1.set_value(cik0,c0)
3073 
3075  """
3076  Undo the Copy index assignment
3077  """
3078  d1=self.stath1[n1]
3079  _, molecular_assignment = self.rmsd()
3080  mols_newcopys = []
3081  for (m0, c0), (m1,c1) in molecular_assignment.items():
3082  mol0 = self.molcopydict0[m0][c0]
3083  mol1 = self.molcopydict1[m1][c1]
3084  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
3085  p1=IMP.atom.Copy(mol1).get_particle()
3086  p1.set_value(cik0,c1)
3087 
3088  ####################
3089  # Container Functions
3090  ####################
3091 
3092  def __repr__(self):
3093  s= "AnalysisReplicaExchange\n"
3094  s+="---- number of clusters %s \n"%str(len(self.clusters))
3095  s+="---- number of models %s \n"%str(len(self.stath0))
3096  return s
3097 
3098  def __getitem__(self,int_slice_adaptor):
3099  if type(int_slice_adaptor) is int:
3100  return self.clusters[int_slice_adaptor]
3101  elif type(int_slice_adaptor) is slice:
3102  return self.__iter__(int_slice_adaptor)
3103  else:
3104  raise TypeError("Unknown Type")
3105 
3106  def __len__(self):
3107  return len(self.clusters)
3108 
3109  def __iter__(self,slice_key=None):
3110  if slice_key is None:
3111  for i in range(len(self)):
3112  yield self[i]
3113  else:
3114  for i in range(len(self))[slice_key]:
3115  yield self[i]
A class to simplify create of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
Definition: macros.py:3000
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
Definition: macros.py:3045
def load_clusters
Load the clusters from a pickle file.
Definition: macros.py:2349
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: tools.py:1821
def precision
Compute the precision of a cluster.
Definition: macros.py:2443
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
Definition: macros.py:1494
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: tools.py:69
A container for models organized into clusters.
Definition: output.py:1351
Sample using molecular dynamics.
Definition: samplers.py:392
def aggregate
initial filling of the clusters.
Definition: macros.py:2880
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:785
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:521
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:522
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: macros.py:2409
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: macros.py:2490
def __init__
Constructor.
Definition: macros.py:819
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1911
A helper output for model evaluation.
Miscellaneous utilities.
Definition: tools.py:1
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: macros.py:2206
def get_cluster_data
Return the model data from a cluster.
Definition: macros.py:2287
def __init__
Construction of the Class.
Definition: macros.py:2167
def get_molecules
Return list of all molecules grouped by state.
Definition: macros.py:720
def set_data
Set the data from an external IMP.pmi.output.Data.
Definition: macros.py:2304
def __init__
Constructor.
Definition: macros.py:111
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: macros.py:3074
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: macros.py:2222
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: macros.py:2952
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: macros.py:2388
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: macros.py:1637
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
Definition: macros.py:3060
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: macros.py:2157
Add uncertainty to a particle.
Definition: Uncertainty.h:24
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
Definition: macros.py:562
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
def merge_aggregates
merge the clusters that have close members
Definition: macros.py:2905
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Definition: macros.py:1545
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: Uncertainty.h:45
Representation of the system.
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: macros.py:2371
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: macros.py:1554
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
Definition: macros.py:1093
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def update_seldicts
Update the seldicts.
Definition: macros.py:2853
def update_clusters
Update the cluster id numbers.
Definition: macros.py:2842
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:1111
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: macros.py:2251
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Definition: macros.py:1414
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:65
Collect timing information.
Definition: tools.py:82
def __init__
Constructor.
Definition: macros.py:1079
def set_symmetric
Store names of symmetric molecules.
Definition: macros.py:2215
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Definition: macros.py:797
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: utilities.h:91
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9803
Sampling of the system.
Definition: samplers.py:1
Sample using Monte Carlo.
Definition: samplers.py:36
Create movers and setup constraints for PMI objects.
def merge
merge two clusters
Definition: macros.py:2944
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Definition: macros.py:606
Deprecated building macro - use BuildSystem()
Definition: macros.py:1075
The general base class for IMP exceptions.
Definition: exception.h:49
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
class to link stat files to several rmf files
Definition: output.py:1102
def save_data
Save the data for the whole models into a pickle file.
Definition: macros.py:2297
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def execute_macro
Builds representations and sets up degrees of freedom.
Definition: macros.py:729
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: macros.py:2472
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def __init__
Constructor.
Definition: macros.py:584
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
Definition: macros.py:2238
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Uncertainty.h:30
def save_clusters
Save the clusters into a pickle file.
Definition: macros.py:2337
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: macros.py:2929
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
Definition: macros.py:55
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
def get_rbs_and_beads
Returns unique objects in original order.
Definition: tools.py:1888
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Definition: macros.py:2312
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1061
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:653
Sample using replica exchange.
Definition: samplers.py:502
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
Definition: macros.py:933
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27