IMP logo
IMP Reference Guide  develop.1a86c4215a,2024/04/24
The Integrative Modeling Platform
/macros.py
1 """@namespace IMP.pmi1.macros
2 Protocols for sampling structures and analyzing them.
3 """
4 
5 from __future__ import print_function, division
6 import IMP
8 import IMP.pmi1.tools
10 import IMP.pmi1.output
11 import IMP.pmi1.analysis
12 import IMP.pmi1.io
13 import IMP.rmf
14 import IMP.isd
15 import RMF
16 import os
17 import glob
18 from operator import itemgetter
19 from collections import defaultdict
20 import numpy as np
21 import string
22 import itertools
23 import math
24 
25 class _RMFRestraints(object):
26  """All restraints that are written out to the RMF file"""
27  def __init__(self, model, user_restraints):
28  self._rmf_rs = IMP.pmi1.tools.get_restraint_set(model, rmf=True)
29  self._user_restraints = user_restraints if user_restraints else []
30 
31  def __len__(self):
32  return (len(self._user_restraints)
33  + self._rmf_rs.get_number_of_restraints())
34 
35  def __bool__(self):
36  return len(self) > 0
37  __nonzero__ = __bool__ # Python 2 compatibility
38 
39  def __getitem__(self, i):
40  class FakePMIWrapper(object):
41  def __init__(self, r):
42  self.r = IMP.RestraintSet.get_from(r)
43  get_restraint = lambda self: self.r
44  lenuser = len(self._user_restraints)
45  if 0 <= i < lenuser:
46  return self._user_restraints[i]
47  elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
48  r = self._rmf_rs.get_restraint(i - lenuser)
49  return FakePMIWrapper(r)
50  else:
51  raise IndexError("Out of range")
52 
53 
54 class ReplicaExchange0(object):
55  """A macro to help setup and run replica exchange.
56  Supports Monte Carlo and molecular dynamics.
57  Produces trajectory RMF files, best PDB structures,
58  and output stat files.
59  """
60  def __init__(self, model,
61  representation=None,
62  root_hier=None,
63  sample_objects=None, # DEPRECATED
64  monte_carlo_sample_objects=None,
65  molecular_dynamics_sample_objects=None,
66  output_objects=[],
67  rmf_output_objects=None,
68  crosslink_restraints=None,
69  monte_carlo_temperature=1.0,
70  simulated_annealing=False,
71  simulated_annealing_minimum_temperature=1.0,
72  simulated_annealing_maximum_temperature=2.5,
73  simulated_annealing_minimum_temperature_nframes=100,
74  simulated_annealing_maximum_temperature_nframes=100,
75  replica_exchange_minimum_temperature=1.0,
76  replica_exchange_maximum_temperature=2.5,
77  replica_exchange_swap=True,
78  num_sample_rounds=1,
79  number_of_best_scoring_models=500,
80  monte_carlo_steps=10,
81  self_adaptive=False,
82  molecular_dynamics_steps=10,
83  molecular_dynamics_max_time_step=1.0,
84  number_of_frames=1000,
85  save_coordinates_mode="lowest_temperature",
86  nframes_write_coordinates=1,
87  write_initial_rmf=True,
88  initial_rmf_name_suffix="initial",
89  stat_file_name_suffix="stat",
90  best_pdb_name_suffix="model",
91  do_clean_first=True,
92  do_create_directories=True,
93  global_output_directory="./",
94  rmf_dir="rmfs/",
95  best_pdb_dir="pdbs/",
96  replica_stat_file_suffix="stat_replica",
97  em_object_for_rmf=None,
98  atomistic=False,
99  replica_exchange_object=None,
100  test_mode=False):
101  """Constructor.
102  @param model The IMP model
103  @param representation PMI.representation.Representation object
104  (or list of them, for multi-state modeling)
105  @param root_hier Instead of passing Representation, pass the System hierarchy
106  @param monte_carlo_sample_objects Objects for MC sampling; a list of
107  structural components (generally the representation) that will
108  be moved and restraints with parameters that need to
109  be sampled.
110  For PMI2: just pass flat list of movers
111  @param molecular_dynamics_sample_objects Objects for MD sampling
112  For PMI2: just pass flat list of particles
113  @param output_objects A list of structural objects and restraints
114  that will be included in output (ie, statistics "stat" files). Any object
115  that provides a get_output() method can be used here. If None is passed
116  the macro will not write stat files.
117  @param rmf_output_objects A list of structural objects and restraints
118  that will be included in rmf. Any object
119  that provides a get_output() method can be used here.
120  @param crosslink_restraints List of cross-link restraints that will
121  be included in output RMF files (for visualization).
122  @param monte_carlo_temperature MC temp (may need to be optimized
123  based on post-sampling analysis)
124  @param simulated_annealing If True, perform simulated annealing
125  @param simulated_annealing_minimum_temperature Should generally be
126  the same as monte_carlo_temperature.
127  @param simulated_annealing_minimum_temperature_nframes Number of
128  frames to compute at minimum temperature.
129  @param simulated_annealing_maximum_temperature_nframes Number of
130  frames to compute at
131  temps > simulated_annealing_maximum_temperature.
132  @param replica_exchange_minimum_temperature Low temp for REX; should
133  generally be the same as monte_carlo_temperature.
134  @param replica_exchange_maximum_temperature High temp for REX
135  @param replica_exchange_swap Boolean, enable disable temperature
136  swap (Default=True)
137  @param num_sample_rounds Number of rounds of MC/MD per cycle
138  @param number_of_best_scoring_models Number of top-scoring PDB models
139  to keep around for analysis
140  @param monte_carlo_steps Number of MC steps per round
141  @param self_adaptive self adaptive scheme for monte carlo movers
142  @param molecular_dynamics_steps Number of MD steps per round
143  @param molecular_dynamics_max_time_step Max time step for MD
144  @param number_of_frames Number of REX frames to run
145  @param save_coordinates_mode string: how to save coordinates.
146  "lowest_temperature" (default) only the lowest temperatures is saved
147  "25th_score" all replicas whose score is below the 25th percentile
148  "50th_score" all replicas whose score is below the 50th percentile
149  "75th_score" all replicas whose score is below the 75th percentile
150  @param nframes_write_coordinates How often to write the coordinates
151  of a frame
152  @param write_initial_rmf Write the initial configuration
153  @param global_output_directory Folder that will be created to house
154  output.
155  @param test_mode Set to True to avoid writing any files, just test one frame.
156  """
157  self.model = model
158  self.vars = {}
159  self.pmi2 = False
160 
161  ### add check hierarchy is multistate
162  self.output_objects = output_objects
163  self.rmf_output_objects=rmf_output_objects
164  self.representation = representation
165  if representation:
166  if type(representation) == list:
167  self.is_multi_state = True
168  self.root_hiers = [r.prot for r in representation]
169  self.vars["number_of_states"] = len(representation)
170  else:
171  self.is_multi_state = False
172  self.root_hier = representation.prot
173  self.vars["number_of_states"] = 1
174  elif root_hier and type(root_hier) == IMP.atom.Hierarchy and root_hier.get_name()=='System':
175  self.pmi2 = True
176  if self.output_objects is not None:
177  self.output_objects.append(IMP.pmi1.io.TotalScoreOutput(self.model))
178  if self.rmf_output_objects is not None:
179  self.rmf_output_objects.append(IMP.pmi1.io.TotalScoreOutput(self.model))
180  self.root_hier = root_hier
181  states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
182  self.vars["number_of_states"] = len(states)
183  if len(states)>1:
184  self.root_hiers = states
185  self.is_multi_state = True
186  else:
187  self.root_hier = root_hier
188  self.is_multi_state = False
189  else:
190  raise Exception("Must provide representation or System hierarchy (root_hier)")
191 
192  self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
193  self.em_object_for_rmf = em_object_for_rmf
194  self.monte_carlo_sample_objects = monte_carlo_sample_objects
195  self.vars["self_adaptive"]=self_adaptive
196  if sample_objects is not None:
197  self.monte_carlo_sample_objects+=sample_objects
198  self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
199  self.replica_exchange_object = replica_exchange_object
200  self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
201  self.vars["monte_carlo_temperature"] = monte_carlo_temperature
202  self.vars[
203  "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
204  self.vars[
205  "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
206  self.vars["replica_exchange_swap"] = replica_exchange_swap
207  self.vars["simulated_annealing"]=\
208  simulated_annealing
209  self.vars["simulated_annealing_minimum_temperature"]=\
210  simulated_annealing_minimum_temperature
211  self.vars["simulated_annealing_maximum_temperature"]=\
212  simulated_annealing_maximum_temperature
213  self.vars["simulated_annealing_minimum_temperature_nframes"]=\
214  simulated_annealing_minimum_temperature_nframes
215  self.vars["simulated_annealing_maximum_temperature_nframes"]=\
216  simulated_annealing_maximum_temperature_nframes
217 
218  self.vars["num_sample_rounds"] = num_sample_rounds
219  self.vars[
220  "number_of_best_scoring_models"] = number_of_best_scoring_models
221  self.vars["monte_carlo_steps"] = monte_carlo_steps
222  self.vars["molecular_dynamics_steps"]=molecular_dynamics_steps
223  self.vars["number_of_frames"] = number_of_frames
224  if not save_coordinates_mode in ["lowest_temperature","25th_score","50th_score","75th_score"]:
225  raise Exception("save_coordinates_mode has unrecognized value")
226  else:
227  self.vars["save_coordinates_mode"] = save_coordinates_mode
228  self.vars["nframes_write_coordinates"] = nframes_write_coordinates
229  self.vars["write_initial_rmf"] = write_initial_rmf
230  self.vars["initial_rmf_name_suffix"] = initial_rmf_name_suffix
231  self.vars["best_pdb_name_suffix"] = best_pdb_name_suffix
232  self.vars["stat_file_name_suffix"] = stat_file_name_suffix
233  self.vars["do_clean_first"] = do_clean_first
234  self.vars["do_create_directories"] = do_create_directories
235  self.vars["global_output_directory"] = global_output_directory
236  self.vars["rmf_dir"] = rmf_dir
237  self.vars["best_pdb_dir"] = best_pdb_dir
238  self.vars["atomistic"] = atomistic
239  self.vars["replica_stat_file_suffix"] = replica_stat_file_suffix
240  self.vars["geometries"] = None
241  self.test_mode = test_mode
242 
243  def add_geometries(self, geometries):
244  if self.vars["geometries"] is None:
245  self.vars["geometries"] = list(geometries)
246  else:
247  self.vars["geometries"].extend(geometries)
248 
249  def show_info(self):
250  print("ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
251  print("--- it stores the best scoring pdb models in pdbs/")
252  print("--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
253  print("--- variables:")
254  keys = list(self.vars.keys())
255  keys.sort()
256  for v in keys:
257  print("------", v.ljust(30), self.vars[v])
258 
259  def get_replica_exchange_object(self):
260  return self.replica_exchange_object
261 
262  def _add_provenance(self, sampler_md, sampler_mc):
263  """Record details about the sampling in the IMP Hierarchies"""
264  if not self.is_multi_state or self.pmi2:
265  output_hierarchies = [self.root_hier]
266  else:
267  output_hierarchies = self.root_hiers
268 
269  iterations = 0
270  if sampler_md:
271  method = "Molecular Dynamics"
272  iterations += self.vars["molecular_dynamics_steps"]
273  if sampler_mc:
274  method = "Hybrid MD/MC" if sampler_md else "Monte Carlo"
275  iterations += self.vars["monte_carlo_steps"]
276  # If no sampling is actually done, no provenance to write
277  if iterations == 0 or self.vars["number_of_frames"] == 0:
278  return
279  iterations *= self.vars["num_sample_rounds"]
280 
281  for h in output_hierarchies:
282  pi = self.model.add_particle("sampling")
284  self.model, pi, method, self.vars["number_of_frames"],
285  iterations)
286  p.set_number_of_replicas(
287  self.replica_exchange_object.get_number_of_replicas())
288  IMP.pmi1.tools._add_pmi_provenance(h)
289  IMP.core.add_provenance(self.model, h, p)
290 
291  def execute_macro(self):
292  temp_index_factor = 100000.0
293  samplers=[]
294  sampler_mc=None
295  sampler_md=None
296  if self.monte_carlo_sample_objects is not None:
297  print("Setting up MonteCarlo")
298  sampler_mc = IMP.pmi1.samplers.MonteCarlo(self.model,
299  self.monte_carlo_sample_objects,
300  self.vars["monte_carlo_temperature"])
301  if self.vars["simulated_annealing"]:
302  tmin=self.vars["simulated_annealing_minimum_temperature"]
303  tmax=self.vars["simulated_annealing_maximum_temperature"]
304  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
305  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
306  sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
307  if self.vars["self_adaptive"]:
308  sampler_mc.set_self_adaptive(isselfadaptive=self.vars["self_adaptive"])
309  if self.output_objects is not None:
310  self.output_objects.append(sampler_mc)
311  if self.rmf_output_objects is not None:
312  self.rmf_output_objects.append(sampler_mc)
313  samplers.append(sampler_mc)
314 
315 
316  if self.molecular_dynamics_sample_objects is not None:
317  print("Setting up MolecularDynamics")
318  sampler_md = IMP.pmi1.samplers.MolecularDynamics(self.model,
319  self.molecular_dynamics_sample_objects,
320  self.vars["monte_carlo_temperature"],
321  maximum_time_step=self.molecular_dynamics_max_time_step)
322  if self.vars["simulated_annealing"]:
323  tmin=self.vars["simulated_annealing_minimum_temperature"]
324  tmax=self.vars["simulated_annealing_maximum_temperature"]
325  nfmin=self.vars["simulated_annealing_minimum_temperature_nframes"]
326  nfmax=self.vars["simulated_annealing_maximum_temperature_nframes"]
327  sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
328  if self.output_objects is not None:
329  self.output_objects.append(sampler_md)
330  if self.rmf_output_objects is not None:
331  self.rmf_output_objects.append(sampler_md)
332  samplers.append(sampler_md)
333 # -------------------------------------------------------------------------
334 
335  print("Setting up ReplicaExchange")
336  rex = IMP.pmi1.samplers.ReplicaExchange(self.model,
337  self.vars[
338  "replica_exchange_minimum_temperature"],
339  self.vars[
340  "replica_exchange_maximum_temperature"],
341  samplers,
342  replica_exchange_object=self.replica_exchange_object)
343  self.replica_exchange_object = rex.rem
344 
345  myindex = rex.get_my_index()
346  if self.output_objects is not None:
347  self.output_objects.append(rex)
348  if self.rmf_output_objects is not None:
349  self.rmf_output_objects.append(rex)
350  # must reset the minimum temperature due to the
351  # different binary length of rem.get_my_parameter double and python
352  # float
353  min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
354 
355 # -------------------------------------------------------------------------
356 
357  globaldir = self.vars["global_output_directory"] + "/"
358  rmf_dir = globaldir + self.vars["rmf_dir"]
359  pdb_dir = globaldir + self.vars["best_pdb_dir"]
360 
361  if not self.test_mode:
362  if self.vars["do_clean_first"]:
363  pass
364 
365  if self.vars["do_create_directories"]:
366 
367  try:
368  os.makedirs(globaldir)
369  except:
370  pass
371  try:
372  os.makedirs(rmf_dir)
373  except:
374  pass
375 
376  if not self.is_multi_state:
377  try:
378  os.makedirs(pdb_dir)
379  except:
380  pass
381  else:
382  for n in range(self.vars["number_of_states"]):
383  try:
384  os.makedirs(pdb_dir + "/" + str(n))
385  except:
386  pass
387 
388 # -------------------------------------------------------------------------
389 
391  if self.output_objects is not None:
392  self.output_objects.append(sw)
393  if self.rmf_output_objects is not None:
394  self.rmf_output_objects.append(sw)
395 
396  print("Setting up stat file")
397  output = IMP.pmi1.output.Output(atomistic=self.vars["atomistic"])
398  low_temp_stat_file = globaldir + \
399  self.vars["stat_file_name_suffix"] + "." + str(myindex) + ".out"
400 
401  # Ensure model is updated before saving init files
402  if not self.test_mode:
403  self.model.update()
404 
405  if not self.test_mode:
406  if self.output_objects is not None:
407  output.init_stat2(low_temp_stat_file,
408  self.output_objects,
409  extralabels=["rmf_file", "rmf_frame_index"])
410  else:
411  print("Stat file writing is disabled")
412 
413  if self.rmf_output_objects is not None:
414  print("Stat info being written in the rmf file")
415 
416  print("Setting up replica stat file")
417  replica_stat_file = globaldir + \
418  self.vars["replica_stat_file_suffix"] + "." + str(myindex) + ".out"
419  if not self.test_mode:
420  output.init_stat2(replica_stat_file, [rex], extralabels=["score"])
421 
422  if not self.test_mode:
423  print("Setting up best pdb files")
424  if not self.is_multi_state:
425  if self.vars["number_of_best_scoring_models"] > 0:
426  output.init_pdb_best_scoring(pdb_dir + "/" +
427  self.vars["best_pdb_name_suffix"],
428  self.root_hier,
429  self.vars[
430  "number_of_best_scoring_models"],
431  replica_exchange=True)
432  output.write_psf(pdb_dir + "/" +"model.psf",pdb_dir + "/" +
433  self.vars["best_pdb_name_suffix"]+".0.pdb")
434  else:
435  if self.vars["number_of_best_scoring_models"] > 0:
436  for n in range(self.vars["number_of_states"]):
437  output.init_pdb_best_scoring(pdb_dir + "/" + str(n) + "/" +
438  self.vars["best_pdb_name_suffix"],
439  self.root_hiers[n],
440  self.vars[
441  "number_of_best_scoring_models"],
442  replica_exchange=True)
443  output.write_psf(pdb_dir + "/" + str(n) + "/" +"model.psf",pdb_dir + "/" + str(n) + "/" +
444  self.vars["best_pdb_name_suffix"]+".0.pdb")
445 # ---------------------------------------------
446 
447  if self.em_object_for_rmf is not None:
448  if not self.is_multi_state or self.pmi2:
449  output_hierarchies = [
450  self.root_hier,
451  self.em_object_for_rmf.get_density_as_hierarchy(
452  )]
453  else:
454  output_hierarchies = self.root_hiers
455  output_hierarchies.append(
456  self.em_object_for_rmf.get_density_as_hierarchy())
457  else:
458  if not self.is_multi_state or self.pmi2:
459  output_hierarchies = [self.root_hier]
460  else:
461  output_hierarchies = self.root_hiers
462 
463 #----------------------------------------------
464  if not self.test_mode:
465  print("Setting up and writing initial rmf coordinate file")
466  init_suffix = globaldir + self.vars["initial_rmf_name_suffix"]
467  output.init_rmf(init_suffix + "." + str(myindex) + ".rmf3",
468  output_hierarchies,listofobjects=self.rmf_output_objects)
469  if self._rmf_restraints:
470  output.add_restraints_to_rmf(
471  init_suffix + "." + str(myindex) + ".rmf3",
472  self._rmf_restraints)
473  output.write_rmf(init_suffix + "." + str(myindex) + ".rmf3")
474  output.close_rmf(init_suffix + "." + str(myindex) + ".rmf3")
475 
476 #----------------------------------------------
477 
478  if not self.test_mode:
479  mpivs=IMP.pmi1.samplers.MPI_values(self.replica_exchange_object)
480 
481 #----------------------------------------------
482 
483  self._add_provenance(sampler_md, sampler_mc)
484 
485  if not self.test_mode:
486  print("Setting up production rmf files")
487  rmfname = rmf_dir + "/" + str(myindex) + ".rmf3"
488  output.init_rmf(rmfname, output_hierarchies, geometries=self.vars["geometries"],
489  listofobjects = self.rmf_output_objects)
490 
491  if self._rmf_restraints:
492  output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
493 
494  ntimes_at_low_temp = 0
495 
496  if myindex == 0:
497  self.show_info()
498  self.replica_exchange_object.set_was_used(True)
499  nframes = self.vars["number_of_frames"]
500  if self.test_mode:
501  nframes = 1
502  for i in range(nframes):
503  if self.test_mode:
504  score = 0.
505  else:
506  for nr in range(self.vars["num_sample_rounds"]):
507  if sampler_md is not None:
508  sampler_md.optimize(
509  self.vars["molecular_dynamics_steps"])
510  if sampler_mc is not None:
511  sampler_mc.optimize(self.vars["monte_carlo_steps"])
513  self.model).evaluate(False)
514  mpivs.set_value("score",score)
515  output.set_output_entry("score", score)
516 
517 
518 
519  my_temp_index = int(rex.get_my_temp() * temp_index_factor)
520 
521  if self.vars["save_coordinates_mode"] == "lowest_temperature":
522  save_frame=(min_temp_index == my_temp_index)
523  elif self.vars["save_coordinates_mode"] == "25th_score":
524  score_perc=mpivs.get_percentile("score")
525  save_frame=(score_perc*100.0<=25.0)
526  elif self.vars["save_coordinates_mode"] == "50th_score":
527  score_perc=mpivs.get_percentile("score")
528  save_frame=(score_perc*100.0<=50.0)
529  elif self.vars["save_coordinates_mode"] == "75th_score":
530  score_perc=mpivs.get_percentile("score")
531  save_frame=(score_perc*100.0<=75.0)
532 
533  # Ensure model is updated before saving output files
534  if save_frame or not self.test_mode:
535  self.model.update()
536 
537  if save_frame:
538  print("--- frame %s score %s " % (str(i), str(score)))
539 
540  if not self.test_mode:
541  if i % self.vars["nframes_write_coordinates"]==0:
542  print('--- writing coordinates')
543  if self.vars["number_of_best_scoring_models"] > 0:
544  output.write_pdb_best_scoring(score)
545  output.write_rmf(rmfname)
546  output.set_output_entry("rmf_file", rmfname)
547  output.set_output_entry("rmf_frame_index", ntimes_at_low_temp)
548  else:
549  output.set_output_entry("rmf_file", rmfname)
550  output.set_output_entry("rmf_frame_index", '-1')
551  if self.output_objects is not None:
552  output.write_stat2(low_temp_stat_file)
553  ntimes_at_low_temp += 1
554 
555  if not self.test_mode:
556  output.write_stat2(replica_stat_file)
557  if self.vars["replica_exchange_swap"]:
558  rex.swap_temp(i, score)
559  if self.representation:
560  for p, state in self.representation._protocol_output:
561  p.add_replica_exchange(state, self)
562 
563  if not self.test_mode:
564  print("closing production rmf files")
565  output.close_rmf(rmfname)
566 
567 
568 class BuildModel(object):
569  """A macro to build a Representation based on a Topology and lists of movers
570  """
571  def __init__(self,
572  model,
573  component_topologies,
574  list_of_rigid_bodies=[],
575  list_of_super_rigid_bodies=[],
576  chain_of_super_rigid_bodies=[],
577  sequence_connectivity_scale=4.0,
578  add_each_domain_as_rigid_body=False,
579  force_create_gmm_files=False):
580  """Constructor.
581  @param model The IMP model
582  @param component_topologies List of
583  IMP.pmi1.topology.ComponentTopology items
584  @param list_of_rigid_bodies List of lists of domain names that will
585  be moved as rigid bodies.
586  @param list_of_super_rigid_bodies List of lists of domain names
587  that will move together in an additional Monte Carlo move.
588  @param chain_of_super_rigid_bodies List of lists of domain names
589  (choices can only be from the same molecule). Each of these
590  groups will be moved rigidly. This helps to sample more
591  efficiently complex topologies, made of several rigid bodies,
592  connected by flexible linkers.
593  @param sequence_connectivity_scale For scaling the connectivity
594  restraint
595  @param add_each_domain_as_rigid_body That way you don't have to
596  put all of them in the list
597  @param force_create_gmm_files If True, will sample and create GMMs
598  no matter what. If False, will only sample if the
599  files don't exist. If number of Gaussians is zero, won't
600  do anything.
601  """
602  self.model = model
603  self.simo = IMP.pmi1.representation.Representation(self.model,
604  upperharmonic=True,
605  disorderedlength=False)
606 
607  data=component_topologies
608  if list_of_rigid_bodies==[]:
609  print("WARNING: No list of rigid bodies inputted to build_model()")
610  if list_of_super_rigid_bodies==[]:
611  print("WARNING: No list of super rigid bodies inputted to build_model()")
612  if chain_of_super_rigid_bodies==[]:
613  print("WARNING: No chain of super rigid bodies inputted to build_model()")
614  all_dnames = set([d for sublist in list_of_rigid_bodies+list_of_super_rigid_bodies\
615  +chain_of_super_rigid_bodies for d in sublist])
616  all_available = set([c._domain_name for c in component_topologies])
617  if not all_dnames <= all_available:
618  raise ValueError("All requested movers must reference domain "
619  "names in the component topologies")
620 
621  self.domain_dict={}
622  self.resdensities={}
623  super_rigid_bodies={}
624  chain_super_rigid_bodies={}
625  rigid_bodies={}
626 
627  for c in data:
628  comp_name = c.molname
629  hier_name = c._domain_name
630  color = 0. # Can't use new-style string colors
631  fasta_file = c.fasta_file
632  fasta_id = c.fasta_id
633  pdb_name = c.pdb_file
634  chain_id = c.chain
635  res_range = c.residue_range
636  offset = c.pdb_offset
637  bead_size = c.bead_size
638  em_num_components = c.em_residues_per_gaussian
639  em_txt_file_name = c.gmm_file
640  em_mrc_file_name = c.mrc_file
641 
642  if comp_name not in self.simo.get_component_names():
643  self.simo.create_component(comp_name,color=0.0)
644  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
645 
646  # create hierarchy (adds resolutions, beads) with autobuild and optionally add EM data
647  if em_num_components==0:
648  read_em_files=False
649  include_res0=False
650  else:
651  if (not os.path.isfile(em_txt_file_name)) or force_create_gmm_files:
652  read_em_files=False
653  include_res0=True
654  else:
655  read_em_files=True
656  include_res0=False
657 
658  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
659  res_range,include_res0,beadsize=bead_size,
660  color=color,offset=offset)
661  if em_num_components!=0:
662  if read_em_files:
663  print("will read GMM files")
664  else:
665  print("will calculate GMMs")
666 
667  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
668  em_mrc_file_name,em_num_components,read_em_files)
669  self.simo.add_all_atom_densities(comp_name, particles=beads)
670  dens_hier+=beads
671  else:
672  dens_hier=[]
673 
674  self.resdensities[hier_name]=dens_hier
675  self.domain_dict[hier_name]=outhier+dens_hier
676 
677  # setup basic restraints
678  for c in self.simo.get_component_names():
679  self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
680  self.simo.setup_component_geometry(c)
681 
682  # create movers
683  for rblist in list_of_rigid_bodies:
684  rb=[]
685  for rbmember in rblist:
686  rb+=[h for h in self.domain_dict[rbmember]]
687  self.simo.set_rigid_body_from_hierarchies(rb)
688  for srblist in list_of_super_rigid_bodies:
689  srb=[]
690  for srbmember in rblist:
691  srb+=[h for h in self.domain_dict[srbmember]]
692  self.simo.set_super_rigid_body_from_hierarchies(srb)
693  for clist in chain_of_super_rigid_bodies:
694  crb=[]
695  for crbmember in rblist:
696  crb+=[h for h in self.domain_dict[crbmember]]
697  self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
698 
699  self.simo.set_floppy_bodies()
700  self.simo.setup_bonds()
701 
702  @property
703  @IMP.deprecated_method("3.0", "Model should be accessed with `.model`.")
704  def m(self):
705  return self.model
706 
708  '''Return the Representation object'''
709  return self.simo
710 
711  def get_density_hierarchies(self,hier_name_list):
712  # return a list of density hierarchies
713  # specify the list of hierarchy names
714  dens_hier_list=[]
715  for hn in hier_name_list:
716  print(hn)
717  dens_hier_list+=self.resdensities[hn]
718  return dens_hier_list
719 
720  def set_gmm_models_directory(self,directory_name):
721  self.gmm_models_directory=directory_name
722 
723  def get_pdb_bead_bits(self,hierarchy):
724  pdbbits=[]
725  beadbits=[]
726  helixbits=[]
727  for h in hierarchy:
728  if "_pdb" in h.get_name():pdbbits.append(h)
729  if "_bead" in h.get_name():beadbits.append(h)
730  if "_helix" in h.get_name():helixbits.append(h)
731  return (pdbbits,beadbits,helixbits)
732 
733  def scale_bead_radii(self,nresidues,scale):
734  scaled_beads=set()
735  for h in self.domain_dict:
736  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
737  slope=(1.0-scale)/(1.0-float(nresidues))
738 
739  for b in beadbits:
740  # I have to do the following
741  # because otherwise we'll scale more than once
742  if b not in scaled_beads:
743  scaled_beads.add(b)
744  else:
745  continue
746  radius=IMP.core.XYZR(b).get_radius()
747  num_residues=len(IMP.pmi1.tools.get_residue_indexes(b))
748  scale_factor=slope*float(num_residues)+1.0
749  print(scale_factor)
750  new_radius=scale_factor*radius
751  IMP.core.XYZR(b).set_radius(new_radius)
752  print(b.get_name())
753  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
754 
755 
756  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
757  #density generation for the EM restraint
758  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
759  #get the number of residues from the pdb bits
760  res_ind=[]
761  for pb in pdbbits+helixbits:
762  for p in IMP.core.get_leaves(pb):
764 
765  number_of_residues=len(set(res_ind))
766  outhier=[]
767  if read:
768  if len(pdbbits)!=0:
769  outhier+=simo.add_component_density(compname,
770  pdbbits,
771  num_components=num_components,
772  resolution=0,
773  inputfile=txtfilename)
774  if len(helixbits)!=0:
775  outhier+=simo.add_component_density(compname,
776  helixbits,
777  num_components=num_components,
778  resolution=1,
779  inputfile=txtfilename)
780 
781 
782  else:
783  if len(pdbbits)!=0:
784  num_components=number_of_residues//abs(num_components)+1
785  outhier+=simo.add_component_density(compname,
786  pdbbits,
787  num_components=num_components,
788  resolution=0,
789  outputfile=txtfilename,
790  outputmap=mrcfilename,
791  multiply_by_total_mass=True)
792 
793  if len(helixbits)!=0:
794  num_components=number_of_residues//abs(num_components)+1
795  outhier+=simo.add_component_density(compname,
796  helixbits,
797  num_components=num_components,
798  resolution=1,
799  outputfile=txtfilename,
800  outputmap=mrcfilename,
801  multiply_by_total_mass=True)
802 
803  return outhier,beadbits
804 
805  def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
806  beadsize=5,color=0.0,offset=0):
807  if (pdbname is not None and pdbname != "IDEAL_HELIX"
808  and pdbname != "BEADS"):
809  if include_res0:
810  outhier=simo.autobuild_model(comname,
811  pdbname=pdbname,
812  chain=chain,
813  resrange=resrange,
814  resolutions=[0,1,10],
815  offset=offset,
816  color=color,
817  missingbeadsize=beadsize)
818  else:
819  outhier=simo.autobuild_model(comname,
820  pdbname=pdbname,
821  chain=chain,
822  resrange=resrange,
823  resolutions=[1,10],
824  offset=offset,
825  color=color,
826  missingbeadsize=beadsize)
827 
828 
829  elif pdbname == "IDEAL_HELIX":
830  outhier=simo.add_component_ideal_helix(comname,
831  resolutions=[1,10],
832  resrange=resrange,
833  color=color,
834  show=False)
835 
836  elif pdbname == "BEADS":
837  outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
838 
839  else:
840 
841  seq_len=len(simo.sequence_dict[comname])
842  outhier=simo.add_component_necklace(comname,
843  begin=1,
844  end=seq_len,
845  length=beadsize)
846 
847  return outhier
848 
849 
850 class BuildModel1(object):
851  """Building macro"""
852 
853  def __init__(self, representation):
854  """Constructor.
855  @param representation The PMI representation
856  """
857  self.simo=representation
858  self.gmm_models_directory="."
859  self.rmf_file={}
860  self.rmf_frame_number={}
861  self.rmf_names_map={}
862  self.rmf_component_name={}
863 
864  def set_gmm_models_directory(self,directory_name):
865  self.gmm_models_directory=directory_name
866 
867  def build_model(self,data_structure,sequence_connectivity_scale=4.0,
868  sequence_connectivity_resolution=10,
869  rmf_file=None,rmf_frame_number=0,rmf_file_map=None,
870  skip_connectivity_these_domains=None,
871  skip_gaussian_in_rmf=False, skip_gaussian_in_representation=False):
872  """Create model.
873  @param data_structure List of lists containing these entries:
874  comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
875  res_range, read_em_files, bead_size, rb, super_rb,
876  em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
877  keep_gaussian_flexible_beads (optional)
878  @param sequence_connectivity_scale
879  @param rmf_file
880  @param rmf_frame_number
881  @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
882  rmf_frame_number,
883  rmf_component_name)
884  """
885  self.domain_dict={}
886  self.resdensities={}
887  super_rigid_bodies={}
888  chain_super_rigid_bodies={}
889  rigid_bodies={}
890 
891  for d in data_structure:
892  comp_name = d[0]
893  hier_name = d[1]
894  color = d[2]
895  fasta_file = d[3]
896  fasta_id = d[4]
897  pdb_name = d[5]
898  chain_id = d[6]
899  res_range = d[7][0:2]
900  try:
901  offset = d[7][2]
902  except:
903  offset = 0
904  read_em_files = d[8]
905  bead_size = d[9]
906  rb = d[10]
907  super_rb = d[11]
908  em_num_components = d[12]
909  em_txt_file_name = d[13]
910  em_mrc_file_name = d[14]
911  chain_of_super_rb = d[15]
912  try:
913  keep_gaussian_flexible_beads = d[16]
914  except:
915  keep_gaussian_flexible_beads = True
916 
917  if comp_name not in self.simo.get_component_names():
918  self.simo.create_component(comp_name,color=0.0)
919  self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
920  outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
921 
922 
923  if not read_em_files is None:
924  if em_txt_file_name == " ": em_txt_file_name=self.gmm_models_directory+"/"+hier_name+".txt"
925  if em_mrc_file_name == " ": em_mrc_file_name=self.gmm_models_directory+"/"+hier_name+".mrc"
926 
927 
928  dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
929 
930  if (keep_gaussian_flexible_beads):
931  self.simo.add_all_atom_densities(comp_name, particles=beads)
932  dens_hier+=beads
933  else:
934  dens_hier=[]
935 
936  self.resdensities[hier_name]=dens_hier
937  self.domain_dict[hier_name]=outhier+dens_hier
938 
939  if rb is not None:
940  if rb not in rigid_bodies:
941  rigid_bodies[rb]=[h for h in self.domain_dict[hier_name]]
942  else:
943  rigid_bodies[rb]+=[h for h in self.domain_dict[hier_name]]
944 
945 
946  if super_rb is not None:
947  for k in super_rb:
948  if k not in super_rigid_bodies:
949  super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
950  else:
951  super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
952 
953  if chain_of_super_rb is not None:
954  for k in chain_of_super_rb:
955  if k not in chain_super_rigid_bodies:
956  chain_super_rigid_bodies[k]=[h for h in self.domain_dict[hier_name]]
957  else:
958  chain_super_rigid_bodies[k]+=[h for h in self.domain_dict[hier_name]]
959 
960 
961 
962  self.rigid_bodies=rigid_bodies
963 
964  for c in self.simo.get_component_names():
965  if rmf_file is not None:
966  rf=rmf_file
967  rfn=rmf_frame_number
968  self.simo.set_coordinates_from_rmf(c, rf,rfn,
969  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
970  elif rmf_file_map:
971  for k in rmf_file_map:
972  cname=k
973  rf=rmf_file_map[k][0]
974  rfn=rmf_file_map[k][1]
975  rcname=rmf_file_map[k][2]
976  self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
977  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
978  else:
979  if c in self.rmf_file:
980  rf=self.rmf_file[c]
981  rfn=self.rmf_frame_number[c]
982  rfm=self.rmf_names_map[c]
983  rcname=self.rmf_component_name[c]
984  self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
985  rmf_component_name=rcname,
986  skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
987  if (not skip_connectivity_these_domains) or (c not in skip_connectivity_these_domains):
988  self.simo.setup_component_sequence_connectivity(c,
989  resolution=sequence_connectivity_resolution,
990  scale=sequence_connectivity_scale)
991  self.simo.setup_component_geometry(c)
992 
993  for rb in rigid_bodies:
994  self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
995 
996  for k in super_rigid_bodies:
997  self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
998 
999  for k in chain_super_rigid_bodies:
1000  self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1001 
1002  self.simo.set_floppy_bodies()
1003  self.simo.setup_bonds()
1004 
1005  def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1006  hiers=self.domain_dict[hier_name]
1007  for length in lengths:
1008  for n in range(len(hiers)-length):
1009  hs=hiers[n+1:n+length-1]
1010  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1011  for n in range(1,len(hiers)-1,5):
1012  hs=hiers[n+1:]
1013  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1014  hs=hiers[:n-1]
1015  self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1016 
1017 
1018  def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1019  self.rmf_file[component_name]=rmf_file
1020  self.rmf_frame_number[component_name]=rmf_frame_number
1021  self.rmf_names_map[component_name]=rmf_names_map
1022  self.rmf_component_name[component_name]=rmf_component_name
1023 
1024  def get_density_hierarchies(self,hier_name_list):
1025  # return a list of density hierarchies
1026  # specify the list of hierarchy names
1027  dens_hier_list=[]
1028  for hn in hier_name_list:
1029  print(hn)
1030  dens_hier_list+=self.resdensities[hn]
1031  return dens_hier_list
1032 
1033  def get_pdb_bead_bits(self,hierarchy):
1034  pdbbits=[]
1035  beadbits=[]
1036  helixbits=[]
1037  for h in hierarchy:
1038  if "_pdb" in h.get_name():pdbbits.append(h)
1039  if "_bead" in h.get_name():beadbits.append(h)
1040  if "_helix" in h.get_name():helixbits.append(h)
1041  return (pdbbits,beadbits,helixbits)
1042 
1043  def scale_bead_radii(self,nresidues,scale):
1044  scaled_beads=set()
1045  for h in self.domain_dict:
1046  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1047  slope=(1.0-scale)/(1.0-float(nresidues))
1048 
1049  for b in beadbits:
1050  # I have to do the following
1051  # because otherwise we'll scale more than once
1052  if b not in scaled_beads:
1053  scaled_beads.add(b)
1054  else:
1055  continue
1056  radius=IMP.core.XYZR(b).get_radius()
1057  num_residues=len(IMP.pmi1.tools.get_residue_indexes(b))
1058  scale_factor=slope*float(num_residues)+1.0
1059  print(scale_factor)
1060  new_radius=scale_factor*radius
1061  IMP.core.XYZR(b).set_radius(new_radius)
1062  print(b.get_name())
1063  print("particle with radius "+str(radius)+" and "+str(num_residues)+" residues scaled to a new radius "+str(new_radius))
1064 
1065 
1066  def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1067  #density generation for the EM restraint
1068  (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1069 
1070  #get the number of residues from the pdb bits
1071  res_ind=[]
1072  for pb in pdbbits+helixbits:
1073  for p in IMP.core.get_leaves(pb):
1074  res_ind.extend(IMP.pmi1.tools.get_residue_indexes(p))
1075 
1076  number_of_residues=len(set(res_ind))
1077  outhier=[]
1078  if read:
1079  if len(pdbbits)!=0:
1080  outhier+=simo.add_component_density(compname,
1081  pdbbits,
1082  num_components=num_components, # number of gaussian into which the simulated density is approximated
1083  resolution=0, # resolution that you want to calculate the simulated density
1084  inputfile=txtfilename) # read what it was calculated before
1085  if len(helixbits)!=0:
1086  outhier+=simo.add_component_density(compname,
1087  helixbits,
1088  num_components=num_components, # number of gaussian into which the simulated density is approximated
1089  resolution=1, # resolution that you want to calculate the simulated density
1090  inputfile=txtfilename) # read what it was calculated before
1091 
1092 
1093  else:
1094  if len(pdbbits)!=0:
1095  if num_components<0:
1096  #if negative calculate the number of gmm components automatically
1097  # from the number of residues
1098  num_components=number_of_residues/abs(num_components)
1099  outhier+=simo.add_component_density(compname,
1100  pdbbits,
1101  num_components=num_components, # number of gaussian into which the simulated density is approximated
1102  resolution=0, # resolution that you want to calculate the simulated density
1103  outputfile=txtfilename, # do the calculation
1104  outputmap=mrcfilename,
1105  multiply_by_total_mass=True) # do the calculation and output the mrc
1106 
1107  if len(helixbits)!=0:
1108  if num_components<0:
1109  #if negative calculate the number of gmm components automatically
1110  # from the number of residues
1111  num_components=number_of_residues/abs(num_components)
1112  outhier+=simo.add_component_density(compname,
1113  helixbits,
1114  num_components=num_components, # number of gaussian into which the simulated density is approximated
1115  resolution=1, # resolution that you want to calculate the simulated density
1116  outputfile=txtfilename, # do the calculation
1117  outputmap=mrcfilename,
1118  multiply_by_total_mass=True) # do the calculation and output the mrc
1119 
1120  return outhier,beadbits
1121 
1122  def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1123 
1124  if (pdbname is not None and pdbname != "IDEAL_HELIX"
1125  and pdbname != "BEADS" and pdbname != "DENSITY"):
1126  if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1127  if read==False:
1128  outhier=simo.autobuild_model(comname,
1129  pdbname=pdbname,
1130  chain=chain,
1131  resrange=resrange,
1132  resolutions=[0,1,10],
1133  offset=offset,
1134  color=color,
1135  missingbeadsize=beadsize)
1136  else:
1137  outhier=simo.autobuild_model(comname,
1138  pdbname=pdbname,
1139  chain=chain,
1140  resrange=resrange,
1141  resolutions=[1,10],
1142  offset=offset,
1143  color=color,
1144  missingbeadsize=beadsize)
1145 
1146  elif pdbname == "IDEAL_HELIX":
1147  outhier=simo.add_component_ideal_helix(comname,
1148  resolutions=[1,10],
1149  resrange=resrange,
1150  color=color,
1151  show=False)
1152 
1153  elif pdbname == "BEADS":
1154  seq_len=resrange[1]
1155  if resrange[1]==-1:
1156  seq_len=len(simo.sequence_dict[comname])
1157  outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1158 
1159  elif pdbname == "DENSITY":
1160  outhier=[]
1161 
1162  else:
1163 
1164  seq_len=len(simo.sequence_dict[comname])
1165  outhier=simo.add_component_necklace(comname,
1166  begin=1,
1167  end=seq_len,
1168  length=beadsize)
1169 
1170  return outhier
1171 
1172  def set_coordinates(self,hier_name,xyz_tuple):
1173  hier=self.domain_dict[hier_name]
1174  for h in IMP.atom.get_leaves(hier):
1175  p=h.get_particle()
1177  pass
1178  else:
1179  IMP.core.XYZ(p).set_coordinates(xyz_tuple)
1180 
1181  def save_rmf(self,rmfname):
1182 
1184  self.simo.model.update()
1185  o.init_rmf(rmfname,[self.simo.prot])
1186  o.write_rmf(rmfname)
1187  o.close_rmf(rmfname)
1188 # -----------------------------------------------------------------------
1189 
1191  m,
1192  data,
1193  resolutions=[1,
1194  10],
1195  missing_bead_size=20,
1196  residue_per_gaussian=None):
1197  '''
1198  Construct a component for each subunit (no splitting, nothing fancy).
1199  You can pass the resolutions and the bead size for the missing residue regions.
1200  To use this macro, you must provide the following data structure:
1201  Component pdbfile chainid rgb color fastafile sequence id
1202  in fastafile
1203 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1204  ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1205  ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1206  ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1207  ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1208  ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1209  ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1210  ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1211  ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1212  ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1213  ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1214  ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1215  '''
1216 
1218 
1219  # the dictionary for the hierarchies,
1220  hierarchies = {}
1221 
1222  for d in data:
1223  # retrieve the information from the data structure
1224  component_name = d[0]
1225  pdb_file = d[1]
1226  chain_id = d[2]
1227  color_id = d[3]
1228  fasta_file = d[4][0]
1229  # this function
1230  fastids = IMP.pmi1.tools.get_ids_from_fasta_file(fasta_file)
1231  fasta_file_id = d[4][1]
1232  # avoid to add a component with the same name
1233  r.create_component(component_name,
1234  color=color_id)
1235 
1236  r.add_component_sequence(component_name,
1237  fasta_file,
1238  id=fastids[fasta_file_id])
1239 
1240  hierarchies = r.autobuild_model(component_name,
1241  pdb_file,
1242  chain_id,
1243  resolutions=resolutions,
1244  missingbeadsize=missing_bead_size)
1245 
1246  r.show_component_table(component_name)
1247 
1248  r.set_rigid_bodies([component_name])
1249 
1250  r.set_chain_of_super_rigid_bodies(
1251  hierarchies,
1252  min_length=2,
1253  max_length=2)
1254 
1255  r.setup_component_sequence_connectivity(component_name, resolution=1)
1256  r.setup_component_geometry(component_name)
1257 
1258  r.setup_bonds()
1259  # put it at the end of rigid bodies
1260  r.set_floppy_bodies()
1261 
1262  # set current coordinates as reference for RMSD calculation
1263  r.set_current_coordinates_as_reference_for_rmsd("Reference")
1264 
1265  return r
1266 
1267 # ----------------------------------------------------------------------
1268 
1269 @IMP.deprecated_object("2.8", "Use AnalysisReplicaExchange instead")
1270 class AnalysisReplicaExchange0(object):
1271  """A macro for running all the basic operations of analysis.
1272  Includes clustering, precision analysis, and making ensemble density maps.
1273  A number of plots are also supported.
1274  """
1275  def __init__(self, model,
1276  merge_directories=["./"],
1277  stat_file_name_suffix="stat",
1278  best_pdb_name_suffix="model",
1279  do_clean_first=True,
1280  do_create_directories=True,
1281  global_output_directory="output/",
1282  replica_stat_file_suffix="stat_replica",
1283  global_analysis_result_directory="./analysis/",
1284  test_mode=False):
1285  """Constructor.
1286  @param model The IMP model
1287  @param stat_file_name_suffix
1288  @param merge_directories The directories containing output files
1289  @param best_pdb_name_suffix
1290  @param do_clean_first
1291  @param do_create_directories
1292  @param global_output_directory Where everything is
1293  @param replica_stat_file_suffix
1294  @param global_analysis_result_directory
1295  @param test_mode If True, nothing is changed on disk
1296  """
1297 
1298  try:
1299  from mpi4py import MPI
1300  self.comm = MPI.COMM_WORLD
1301  self.rank = self.comm.Get_rank()
1302  self.number_of_processes = self.comm.size
1303  except ImportError:
1304  self.rank = 0
1305  self.number_of_processes = 1
1306 
1307  self.test_mode = test_mode
1308  self._protocol_output = []
1309  self.cluster_obj = None
1310  self.model = model
1311  stat_dir = global_output_directory
1312  self.stat_files = []
1313  # it contains the position of the root directories
1314  for rd in merge_directories:
1315  stat_files = glob.glob(os.path.join(rd,stat_dir,"stat.*.out"))
1316  if len(stat_files)==0:
1317  print("WARNING: no stat files found in",os.path.join(rd,stat_dir))
1318  self.stat_files += stat_files
1319 
1320  def add_protocol_output(self, p):
1321  """Capture details of the modeling protocol.
1322  @param p an instance of IMP.pmi1.output.ProtocolOutput or a subclass.
1323  """
1324  # Assume last state is the one we're interested in
1325  self._protocol_output.append((p, p._last_state))
1326 
1327  def get_modeling_trajectory(self,
1328  score_key="SimplifiedModel_Total_Score_None",
1329  rmf_file_key="rmf_file",
1330  rmf_file_frame_key="rmf_frame_index",
1331  outputdir="./",
1332  get_every=1,
1333  nframes_trajectory=10000):
1334  """ Get a trajectory of the modeling run, for generating demonstrative movies
1335  @param score_key The score for ranking models
1336  @param rmf_file_key Key pointing to RMF filename
1337  @param rmf_file_frame_key Key pointing to RMF frame number
1338  @param outputdir The local output directory used in the run
1339  @param get_every Extract every nth frame
1340  @param nframes_trajectory Total number of frames of the trajectory
1341  """
1342  from operator import itemgetter
1343  import math
1344 
1345  trajectory_models = IMP.pmi1.io.get_trajectory_models(self.stat_files,
1346  score_key,
1347  rmf_file_key,
1348  rmf_file_frame_key,
1349  get_every)
1350  rmf_file_list=trajectory_models[0]
1351  rmf_file_frame_list=trajectory_models[1]
1352  score_list=list(map(float, trajectory_models[2]))
1353 
1354  max_score=max(score_list)
1355  min_score=min(score_list)
1356 
1357 
1358  bins=[(max_score-min_score)*math.exp(-float(i))+min_score for i in range(nframes_trajectory)]
1359  binned_scores=[None]*nframes_trajectory
1360  binned_model_indexes=[-1]*nframes_trajectory
1361 
1362  for model_index,s in enumerate(score_list):
1363  bins_score_diffs=[abs(s-b) for b in bins]
1364  bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1365  if binned_scores[bin_index]==None:
1366  binned_scores[bin_index]=s
1367  binned_model_indexes[bin_index]=model_index
1368  else:
1369  old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1370  new_diff=abs(s-bins[bin_index])
1371  if new_diff < old_diff:
1372  binned_scores[bin_index]=s
1373  binned_model_indexes[bin_index]=model_index
1374 
1375  print(binned_scores)
1376  print(binned_model_indexes)
1377 
1378 
1379  def _expand_ambiguity(self,prot,d):
1380  """If using PMI2, expand the dictionary to include copies as ambiguous options
1381  This also keeps the states separate.
1382  """
1383  newdict = {}
1384  for key in d:
1385  val = d[key]
1386  if '..' in key or (type(val) is tuple and len(val)>=3):
1387  newdict[key] = val
1388  continue
1389  states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1390  if type(val) is tuple:
1391  start = val[0]
1392  stop = val[1]
1393  name = val[2]
1394  else:
1395  start = 1
1396  stop = -1
1397  name = val
1398  for nst in range(len(states)):
1399  sel = IMP.atom.Selection(prot,molecule=name,state_index=nst)
1400  copies = sel.get_selected_particles(with_representation=False)
1401  if len(copies)>1:
1402  for nc in range(len(copies)):
1403  if len(states)>1:
1404  newdict['%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1405  else:
1406  newdict['%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1407  else:
1408  newdict[key] = val
1409  return newdict
1410 
1411 
1412  def clustering(self,
1413  score_key="SimplifiedModel_Total_Score_None",
1414  rmf_file_key="rmf_file",
1415  rmf_file_frame_key="rmf_frame_index",
1416  state_number=0,
1417  prefiltervalue=None,
1418  feature_keys=[],
1419  outputdir="./",
1420  alignment_components=None,
1421  number_of_best_scoring_models=10,
1422  rmsd_calculation_components=None,
1423  distance_matrix_file='distances.mat',
1424  load_distance_matrix_file=False,
1425  skip_clustering=False,
1426  number_of_clusters=1,
1427  display_plot=False,
1428  exit_after_display=True,
1429  get_every=1,
1430  first_and_last_frames=None,
1431  density_custom_ranges=None,
1432  write_pdb_with_centered_coordinates=False,
1433  voxel_size=5.0):
1434  """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1435  Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1436  Can pass None for copy or state to ignore that field.
1437  If you don't pass a specific copy number
1438  @param score_key The score for ranking models. Use "Total_Score"
1439  instead of default for PMI2.
1440  @param rmf_file_key Key pointing to RMF filename
1441  @param rmf_file_frame_key Key pointing to RMF frame number
1442  @param state_number State number to analyze
1443  @param prefiltervalue Only include frames where the
1444  score key is below this value
1445  @param feature_keys Keywords for which you want to
1446  calculate average, medians, etc.
1447  If you pass "Keyname" it'll include everything that matches "*Keyname*"
1448  @param outputdir The local output directory used in the run
1449  @param alignment_components Dictionary with keys=groupname, values are tuples
1450  for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1451  @param number_of_best_scoring_models Num models to keep per run
1452  @param rmsd_calculation_components For calculating RMSD
1453  (same format as alignment_components)
1454  @param distance_matrix_file Where to store/read the distance matrix
1455  @param load_distance_matrix_file Try to load the distance matrix file
1456  @param skip_clustering Just extract the best scoring models
1457  and save the pdbs
1458  @param number_of_clusters Number of k-means clusters
1459  @param display_plot Display the distance matrix
1460  @param exit_after_display Exit after displaying distance matrix
1461  @param get_every Extract every nth frame
1462  @param first_and_last_frames A tuple with the first and last frames to be
1463  analyzed. Values are percentages!
1464  Default: get all frames
1465  @param density_custom_ranges For density calculation
1466  (same format as alignment_components)
1467  @param write_pdb_with_centered_coordinates
1468  @param voxel_size Used for the density output
1469  """
1470  # Track provenance information to be added to each output model
1471  prov = []
1472  self._outputdir = os.path.abspath(outputdir)
1473  self._number_of_clusters = number_of_clusters
1474  for p, state in self._protocol_output:
1475  p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1476 
1477  if self.test_mode:
1478  return
1479 
1480  if self.rank==0:
1481  try:
1482  os.mkdir(outputdir)
1483  except:
1484  pass
1485 
1486  if not load_distance_matrix_file:
1487  if len(self.stat_files)==0: print("ERROR: no stat file found in the given path"); return
1488  my_stat_files = IMP.pmi1.tools.chunk_list_into_segments(
1489  self.stat_files,self.number_of_processes)[self.rank]
1490 
1491  # read ahead to check if you need the PMI2 score key instead
1492  po = IMP.pmi1.output.ProcessOutput(my_stat_files[0])
1493  orig_score_key = score_key
1494  if score_key not in po.get_keys():
1495  if 'Total_Score' in po.get_keys():
1496  score_key = 'Total_Score'
1497  print("WARNING: Using 'Total_Score' instead of "
1498  "'SimplifiedModel_Total_Score_None' for the score key")
1499  for k in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1500  if k in feature_keys:
1501  print("WARNING: no need to pass " +k+" to feature_keys.")
1502  feature_keys.remove(k)
1503 
1504  best_models = IMP.pmi1.io.get_best_models(my_stat_files,
1505  score_key,
1506  feature_keys,
1507  rmf_file_key,
1508  rmf_file_frame_key,
1509  prefiltervalue,
1510  get_every, provenance=prov)
1511  rmf_file_list=best_models[0]
1512  rmf_file_frame_list=best_models[1]
1513  score_list=best_models[2]
1514  feature_keyword_list_dict=best_models[3]
1515 
1516 # ------------------------------------------------------------------------
1517 # collect all the files and scores
1518 # ------------------------------------------------------------------------
1519 
1520  if self.number_of_processes > 1:
1521  score_list = IMP.pmi1.tools.scatter_and_gather(score_list)
1522  rmf_file_list = IMP.pmi1.tools.scatter_and_gather(rmf_file_list)
1523  rmf_file_frame_list = IMP.pmi1.tools.scatter_and_gather(
1524  rmf_file_frame_list)
1525  for k in feature_keyword_list_dict:
1526  feature_keyword_list_dict[k] = IMP.pmi1.tools.scatter_and_gather(
1527  feature_keyword_list_dict[k])
1528 
1529  # sort by score and get the best scoring ones
1530  score_rmf_tuples = list(zip(score_list,
1531  rmf_file_list,
1532  rmf_file_frame_list,
1533  list(range(len(score_list)))))
1534 
1535  if density_custom_ranges:
1536  for k in density_custom_ranges:
1537  if type(density_custom_ranges[k]) is not list:
1538  raise Exception("Density custom ranges: values must be lists of tuples")
1539 
1540  # keep subset of frames if requested
1541  if first_and_last_frames is not None:
1542  nframes = len(score_rmf_tuples)
1543  first_frame = int(first_and_last_frames[0] * nframes)
1544  last_frame = int(first_and_last_frames[1] * nframes)
1545  if last_frame > len(score_rmf_tuples):
1546  last_frame = -1
1547  score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1548 
1549  # sort RMFs by the score_key in ascending order, and store the rank
1550  best_score_rmf_tuples = sorted(score_rmf_tuples,
1551  key=lambda x: float(x[0]))[:number_of_best_scoring_models]
1552  best_score_rmf_tuples=[t+(n,) for n,t in enumerate(best_score_rmf_tuples)]
1553  # Note in the provenance info that we only kept best-scoring models
1554  prov.append(IMP.pmi1.io.FilterProvenance("Best scoring",
1555  0, number_of_best_scoring_models))
1556  # sort the feature scores in the same way
1557  best_score_feature_keyword_list_dict = defaultdict(list)
1558  for tpl in best_score_rmf_tuples:
1559  index = tpl[3]
1560  for f in feature_keyword_list_dict:
1561  best_score_feature_keyword_list_dict[f].append(
1562  feature_keyword_list_dict[f][index])
1563  my_best_score_rmf_tuples = IMP.pmi1.tools.chunk_list_into_segments(
1564  best_score_rmf_tuples,
1565  self.number_of_processes)[self.rank]
1566 
1567  # expand the dictionaries to include ambiguous copies
1568  prot_ahead = IMP.pmi1.analysis.get_hiers_from_rmf(self.model,
1569  0,
1570  my_best_score_rmf_tuples[0][1])[0]
1571 
1572 #-------------------------------------------------------------
1573 # read the coordinates
1574 # ------------------------------------------------------------
1575  rmsd_weights = IMP.pmi1.io.get_bead_sizes(self.model,
1576  my_best_score_rmf_tuples[0],
1577  rmsd_calculation_components,
1578  state_number=state_number)
1579  got_coords = IMP.pmi1.io.read_coordinates_of_rmfs(self.model,
1580  my_best_score_rmf_tuples,
1581  alignment_components,
1582  rmsd_calculation_components,
1583  state_number=state_number)
1584 
1585  # note! the coordinates are simply float tuples, NOT decorators, NOT Vector3D,
1586  # NOR particles, because these object cannot be serialized. We need serialization
1587  # for the parallel computation based on mpi.
1588  all_coordinates=got_coords[0] # dict:key=component name,val=coords per hit
1589  alignment_coordinates=got_coords[1] # same as above, limited to alignment bits
1590  rmsd_coordinates=got_coords[2] # same as above, limited to RMSD bits
1591  rmf_file_name_index_dict=got_coords[3] # dictionary with key=RMF, value=score rank
1592  all_rmf_file_names=got_coords[4] # RMF file per hit
1593 
1594 # ------------------------------------------------------------------------
1595 # optionally don't compute distance matrix or cluster, just write top files
1596 # ------------------------------------------------------------------------
1597  if skip_clustering:
1598  if density_custom_ranges:
1599  DensModule = IMP.pmi1.analysis.GetModelDensity(
1600  density_custom_ranges,
1601  voxel=voxel_size)
1602 
1603  dircluster=os.path.join(outputdir,"all_models."+str(n))
1604  try:
1605  os.mkdir(outputdir)
1606  except:
1607  pass
1608  try:
1609  os.mkdir(dircluster)
1610  except:
1611  pass
1612  clusstat=open(os.path.join(dircluster,"stat."+str(self.rank)+".out"),"w")
1613  for cnt,tpl in enumerate(my_best_score_rmf_tuples):
1614  rmf_name=tpl[1]
1615  rmf_frame_number=tpl[2]
1616  tmp_dict={}
1617  index=tpl[4]
1618  for key in best_score_feature_keyword_list_dict:
1619  tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1620 
1621  if cnt==0:
1622  prots,rs = IMP.pmi1.analysis.get_hiers_and_restraints_from_rmf(
1623  self.model,
1624  rmf_frame_number,
1625  rmf_name)
1626  else:
1627  linking_successful=IMP.pmi1.analysis.link_hiers_and_restraints_to_rmf(
1628  self.model,
1629  prots,
1630  rs,
1631  rmf_frame_number,
1632  rmf_name)
1633  if not linking_successful:
1634  continue
1635 
1636  if not prots:
1637  continue
1638 
1639  prot = prots[state_number]
1640 
1641  # get transformation aligning coordinates of requested tuples
1642  # to the first RMF file
1643  if cnt==0:
1644  coords_f1=alignment_coordinates[cnt]
1645  if cnt > 0:
1646  coords_f2=alignment_coordinates[cnt]
1647  if coords_f2:
1648  Ali = IMP.pmi1.analysis.Alignment(coords_f1, coords_f2)
1649  transformation = Ali.align()[1]
1650  else:
1652 
1653  rbs = set()
1654  for p in IMP.atom.get_leaves(prot):
1655  if not IMP.core.XYZR.get_is_setup(p):
1657  IMP.core.XYZR(p).set_radius(0.0001)
1658  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1659 
1661  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1662  rbs.add(rb)
1663  else:
1665  transformation)
1666  for rb in rbs:
1667  IMP.core.transform(rb,transformation)
1668 
1670  self.model.update()
1671  out_pdb_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".pdb")
1672  out_rmf_fn=os.path.join(dircluster,str(cnt)+"."+str(self.rank)+".rmf3")
1673  o.init_pdb(out_pdb_fn,prot)
1674  o.write_pdb(out_pdb_fn,
1675  translate_to_geometric_center=write_pdb_with_centered_coordinates)
1676 
1677  tmp_dict["local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1678  tmp_dict["rmf_file_full_path"]=rmf_name
1679  tmp_dict["local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1680  tmp_dict["local_rmf_frame_number"]=0
1681 
1682  clusstat.write(str(tmp_dict)+"\n")
1683 
1684  o.init_rmf(out_rmf_fn, [prot],rs)
1685 
1686  o.write_rmf(out_rmf_fn)
1687  o.close_rmf(out_rmf_fn)
1688  # add the density
1689  if density_custom_ranges:
1690  DensModule.add_subunits_density(prot)
1691 
1692  if density_custom_ranges:
1693  DensModule.write_mrc(path=dircluster)
1694  del DensModule
1695  return
1696 
1697 
1698 
1699  # broadcast the coordinates
1700  if self.number_of_processes > 1:
1701  all_coordinates = IMP.pmi1.tools.scatter_and_gather(
1702  all_coordinates)
1703  all_rmf_file_names = IMP.pmi1.tools.scatter_and_gather(
1704  all_rmf_file_names)
1705  rmf_file_name_index_dict = IMP.pmi1.tools.scatter_and_gather(
1706  rmf_file_name_index_dict)
1707  alignment_coordinates=IMP.pmi1.tools.scatter_and_gather(
1708  alignment_coordinates)
1709  rmsd_coordinates=IMP.pmi1.tools.scatter_and_gather(
1710  rmsd_coordinates)
1711 
1712  if self.rank == 0:
1713  # save needed information in external files
1714  self.save_objects(
1715  [best_score_feature_keyword_list_dict,
1716  rmf_file_name_index_dict],
1717  ".macro.pkl")
1718 
1719 # ------------------------------------------------------------------------
1720 # Calculate distance matrix and cluster
1721 # ------------------------------------------------------------------------
1722  print("setup clustering class")
1723  self.cluster_obj = IMP.pmi1.analysis.Clustering(rmsd_weights)
1724 
1725  for n, model_coordinate_dict in enumerate(all_coordinates):
1726  template_coordinate_dict = {}
1727  # let's try to align
1728  if alignment_components is not None and len(self.cluster_obj.all_coords) == 0:
1729  # set the first model as template coordinates
1730  self.cluster_obj.set_template(alignment_coordinates[n])
1731  self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1732  print("Global calculating the distance matrix")
1733 
1734  # calculate distance matrix, all against all
1735  self.cluster_obj.dist_matrix()
1736 
1737  # perform clustering and optionally display
1738  if self.rank == 0:
1739  self.cluster_obj.do_cluster(number_of_clusters)
1740  if display_plot:
1741  if self.rank == 0:
1742  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1743  if exit_after_display:
1744  exit()
1745  self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1746 
1747 # ------------------------------------------------------------------------
1748 # Alternatively, load the distance matrix from file and cluster that
1749 # ------------------------------------------------------------------------
1750  else:
1751  if self.rank==0:
1752  print("setup clustering class")
1753  self.cluster_obj = IMP.pmi1.analysis.Clustering()
1754  self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1755  print("clustering with %s clusters" % str(number_of_clusters))
1756  self.cluster_obj.do_cluster(number_of_clusters)
1757  [best_score_feature_keyword_list_dict,
1758  rmf_file_name_index_dict] = self.load_objects(".macro.pkl")
1759  if display_plot:
1760  if self.rank == 0:
1761  self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,'dist_matrix.pdf'))
1762  if exit_after_display:
1763  exit()
1764  if self.number_of_processes > 1:
1765  self.comm.Barrier()
1766 
1767 # ------------------------------------------------------------------------
1768 # now save all information about the clusters
1769 # ------------------------------------------------------------------------
1770 
1771  if self.rank == 0:
1772  print(self.cluster_obj.get_cluster_labels())
1773  for n, cl in enumerate(self.cluster_obj.get_cluster_labels()):
1774  print("rank %s " % str(self.rank))
1775  print("cluster %s " % str(n))
1776  print("cluster label %s " % str(cl))
1777  print(self.cluster_obj.get_cluster_label_names(cl))
1778  cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1779  cluster_prov = prov + \
1780  [IMP.pmi1.io.ClusterProvenance(cluster_size)]
1781 
1782  # first initialize the Density class if requested
1783  if density_custom_ranges:
1784  DensModule = IMP.pmi1.analysis.GetModelDensity(
1785  density_custom_ranges,
1786  voxel=voxel_size)
1787 
1788  dircluster = outputdir + "/cluster." + str(n) + "/"
1789  try:
1790  os.mkdir(dircluster)
1791  except:
1792  pass
1793 
1794  rmsd_dict = {"AVERAGE_RMSD":
1795  str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1796  clusstat = open(dircluster + "stat.out", "w")
1797  for k, structure_name in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1798  # extract the features
1799  tmp_dict = {}
1800  tmp_dict.update(rmsd_dict)
1801  index = rmf_file_name_index_dict[structure_name]
1802  for key in best_score_feature_keyword_list_dict:
1803  tmp_dict[
1804  key] = best_score_feature_keyword_list_dict[
1805  key][
1806  index]
1807 
1808  # get the rmf name and the frame number from the list of
1809  # frame names
1810  rmf_name = structure_name.split("|")[0]
1811  rmf_frame_number = int(structure_name.split("|")[1])
1812  clusstat.write(str(tmp_dict) + "\n")
1813 
1814  # extract frame (open or link to existing)
1815  if k==0:
1816  prots,rs = IMP.pmi1.analysis.get_hiers_and_restraints_from_rmf(
1817  self.model,
1818  rmf_frame_number,
1819  rmf_name)
1820  else:
1821  linking_successful = IMP.pmi1.analysis.link_hiers_and_restraints_to_rmf(
1822  self.model,
1823  prots,
1824  rs,
1825  rmf_frame_number,
1826  rmf_name)
1827  if not linking_successful:
1828  continue
1829  if not prots:
1830  continue
1831 
1832  prot = prots[state_number]
1833  if k==0:
1834  IMP.pmi1.io.add_provenance(cluster_prov, (prot,))
1835 
1836  # transform clusters onto first
1837  if k > 0:
1838  model_index = self.cluster_obj.get_model_index_from_name(
1839  structure_name)
1840  transformation = self.cluster_obj.get_transformation_to_first_member(
1841  cl,
1842  model_index)
1843  rbs = set()
1844  for p in IMP.atom.get_leaves(prot):
1845  if not IMP.core.XYZR.get_is_setup(p):
1847  IMP.core.XYZR(p).set_radius(0.0001)
1848  IMP.core.XYZR(p).set_coordinates((0, 0, 0))
1849 
1851  rb = IMP.core.RigidBodyMember(p).get_rigid_body()
1852  rbs.add(rb)
1853  else:
1855  transformation)
1856  for rb in rbs:
1857  IMP.core.transform(rb,transformation)
1858 
1859  # add the density
1860  if density_custom_ranges:
1861  DensModule.add_subunits_density(prot)
1862 
1863  # pdb writing should be optimized!
1865  self.model.update()
1866  o.init_pdb(dircluster + str(k) + ".pdb", prot)
1867  o.write_pdb(dircluster + str(k) + ".pdb")
1868 
1869  o.init_rmf(dircluster + str(k) + ".rmf3", [prot],rs)
1870  o.write_rmf(dircluster + str(k) + ".rmf3")
1871  o.close_rmf(dircluster + str(k) + ".rmf3")
1872 
1873  del o
1874  # IMP.atom.destroy(prot)
1875 
1876  if density_custom_ranges:
1877  DensModule.write_mrc(path=dircluster)
1878  del DensModule
1879 
1880  if self.number_of_processes>1:
1881  self.comm.Barrier()
1882 
1883  def get_cluster_rmsd(self,cluster_num):
1884  if self.cluster_obj is None:
1885  raise Exception("Run clustering first")
1886  return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1887 
1888  def save_objects(self, objects, file_name):
1889  import pickle
1890  outf = open(file_name, 'wb')
1891  pickle.dump(objects, outf)
1892  outf.close()
1893 
1894  def load_objects(self, file_name):
1895  import pickle
1896  inputf = open(file_name, 'rb')
1897  objects = pickle.load(inputf)
1898  inputf.close()
1899  return objects
1900 
1902 
1903  """
1904  This class contains analysis utilities to investigate ReplicaExchange results.
1905  """
1906 
1907  ########################
1908  # Construction and Setup
1909  ########################
1910 
1911  def __init__(self,model,
1912  stat_files,
1913  best_models=None,
1914  score_key=None,
1915  alignment=True):
1916  """
1917  Construction of the Class.
1918  @param model IMP.Model()
1919  @param stat_files list of string. Can be ascii stat files, rmf files names
1920  @param best_models Integer. Number of best scoring models, if None: all models will be read
1921  @param alignment boolean (Default=True). Align before computing the rmsd.
1922  """
1923 
1924  self.model=model
1925  self.best_models=best_models
1926  self.stath0=IMP.pmi1.output.StatHierarchyHandler(model,stat_files,self.best_models,score_key,cache=True)
1927  self.stath1=IMP.pmi1.output.StatHierarchyHandler(StatHierarchyHandler=self.stath0)
1928 
1929  self.rbs1, self.beads1 = IMP.pmi1.tools.get_rbs_and_beads(IMP.pmi1.tools.select_at_all_resolutions(self.stath1))
1930  self.rbs0, self.beads0 = IMP.pmi1.tools.get_rbs_and_beads(IMP.pmi1.tools.select_at_all_resolutions(self.stath0))
1931  self.sel0_rmsd=IMP.atom.Selection(self.stath0)
1932  self.sel1_rmsd=IMP.atom.Selection(self.stath1)
1933  self.sel0_alignment=IMP.atom.Selection(self.stath0)
1934  self.sel1_alignment=IMP.atom.Selection(self.stath1)
1935  self.clusters=[]
1936  # fill the cluster list with a single cluster containing all models
1938  self.clusters.append(c)
1939  for n0 in range(len(self.stath0)):
1940  c.add_member(n0)
1941  self.pairwise_rmsd={}
1942  self.pairwise_molecular_assignment={}
1943  self.alignment=alignment
1944  self.symmetric_molecules={}
1945  self.issymmetricsel={}
1946  self.update_seldicts()
1947  self.molcopydict0=IMP.pmi1.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath0))
1948  self.molcopydict1=IMP.pmi1.tools.get_molecules_dictionary_by_copy(IMP.atom.get_leaves(self.stath1))
1949 
1950  def set_rmsd_selection(self,**kwargs):
1951  """
1952  Setup the selection onto which the rmsd is computed
1953  @param kwargs use IMP.atom.Selection keywords
1954  """
1955  self.sel0_rmsd=IMP.atom.Selection(self.stath0,**kwargs)
1956  self.sel1_rmsd=IMP.atom.Selection(self.stath1,**kwargs)
1957  self.update_seldicts()
1958 
1959  def set_symmetric(self,molecule_name):
1960  """
1961  Store names of symmetric molecules
1962  """
1963  self.symmetric_molecules[molecule_name]=0
1964  self.update_seldicts()
1965 
1966  def set_alignment_selection(self,**kwargs):
1967  """
1968  Setup the selection onto which the alignment is computed
1969  @param kwargs use IMP.atom.Selection keywords
1970  """
1971  self.sel0_alignment=IMP.atom.Selection(self.stath0,**kwargs)
1972  self.sel1_alignment=IMP.atom.Selection(self.stath1,**kwargs)
1973 
1974  ######################
1975  # Clustering functions
1976  ######################
1977  def clean_clusters(self):
1978  for c in self.clusters: del c
1979  self.clusters=[]
1980 
1981 
1982  def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1983  """
1984  Cluster the models based on RMSD.
1985  @param rmsd_cutoff Float the distance cutoff in Angstrom
1986  @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1987  """
1988  self.clean_clusters()
1989  not_clustered=set(range(len(self.stath1)))
1990  while len(not_clustered)>0:
1991  self.aggregate(not_clustered, rmsd_cutoff, metric)
1992  #self.merge_aggregates(rmsd_cutoff, metric)
1993  self.update_clusters()
1994 
1995  def refine(self,rmsd_cutoff=10):
1996  """
1997  Refine the clusters by merging the ones whose centers are close
1998  @param rmsd_cutoff cutoff distance in Angstorms
1999  """
2000  merge_pairs=[]
2001  clusters_copy=self.clusters
2002  for c0,c1 in itertools.combinations(self.clusters,2):
2003  if c0.center_index is None:
2004  self.compute_cluster_center(c0)
2005  if c1.center_index is None:
2006  self.compute_cluster_center(c1)
2007  d0=self.stath0[c0.center_index]
2008  d1=self.stath1[c1.center_index]
2009  rmsd, molecular_assignment = self.rmsd()
2010  if rmsd <= rmsd_cutoff:
2011  if c1 in self.clusters:
2012  clusters_copy.remove(c1)
2013  c0+=c1
2014  self.clusters=clusters_copy
2015  self.update_clusters()
2016 
2017  ####################
2018  # Input Output
2019  ####################
2020 
2021  def set_cluster_assignments(self, cluster_ids):
2022  if len(cluster_ids)!=len(self.stath0):
2023  raise ValueError('cluster ids has to be same length as number of frames')
2024 
2025  self.clusters=[]
2026  for i in sorted(list(set(cluster_ids))):
2027  self.clusters.append(IMP.pmi1.output.Cluster(i))
2028  for i, (idx, d) in enumerate(zip(cluster_ids, self.stath0)):
2029  self.clusters[idx].add_member(i,d)
2030 
2031  def get_cluster_data(self, cluster):
2032  """
2033  Return the model data from a cluster
2034  @param cluster IMP.pmi1.output.Cluster object
2035  """
2036  data=[]
2037  for m in cluster:
2038  data.append(m)
2039  return data
2040 
2041  def save_data(self,filename='data.pkl'):
2042  """
2043  Save the data for the whole models into a pickle file
2044  @param filename string
2045  """
2046  self.stath0.save_data(filename)
2047 
2048  def set_data(self,data):
2049  """
2050  Set the data from an external IMP.pmi1.output.Data
2051  @param data IMP.pmi1.output.Data
2052  """
2053  self.stath0.data=data
2054  self.stath1.data=data
2055 
2056  def load_data(self,filename='data.pkl'):
2057  """
2058  Load the data from an external pickled file
2059  @param filename string
2060  """
2061  self.stath0.load_data(filename)
2062  self.stath1.load_data(filename)
2063  self.best_models=len(self.stath0)
2064 
2065  def add_cluster(self,rmf_name_list):
2066  c = IMP.pmi1.output.Cluster(len(self.clusters))
2067  print("creating cluster index "+str(len(self.clusters)))
2068  self.clusters.append(c)
2069  current_len=len(self.stath0)
2070 
2071  for rmf in rmf_name_list:
2072  print("adding rmf "+rmf)
2073  self.stath0.add_stat_file(rmf)
2074  self.stath1.add_stat_file(rmf)
2075 
2076  for n0 in range(current_len,len(self.stath0)):
2077  d0=self.stath0[n0]
2078  c.add_member(n0,d0)
2079  self.update_clusters()
2080 
2081  def save_clusters(self,filename='clusters.pkl'):
2082  """
2083  Save the clusters into a pickle file
2084  @param filename string
2085  """
2086  try:
2087  import cPickle as pickle
2088  except ImportError:
2089  import pickle
2090  fl=open(filename,'wb')
2091  pickle.dump(self.clusters,fl)
2092 
2093  def load_clusters(self,filename='clusters.pkl',append=False):
2094  """
2095  Load the clusters from a pickle file
2096  @param filename string
2097  @param append bool (Default=False), if True. append the clusters to the ones currently present
2098  """
2099  try:
2100  import cPickle as pickle
2101  except ImportError:
2102  import pickle
2103  fl=open(filename,'rb')
2104  self.clean_clusters()
2105  if append:
2106  self.clusters+=pickle.load(fl)
2107  else:
2108  self.clusters=pickle.load(fl)
2109  self.update_clusters()
2110 
2111  ####################
2112  # Analysis Functions
2113  ####################
2114 
2115  def compute_cluster_center(self,cluster):
2116  """
2117  Compute the cluster center for a given cluster
2118  """
2119  member_distance=defaultdict(float)
2120 
2121  for n0,n1 in itertools.combinations(cluster.members,2):
2122  d0=self.stath0[n0]
2123  d1=self.stath1[n1]
2124  rmsd, _ = self.rmsd()
2125  member_distance[n0]+=rmsd
2126 
2127  if len(member_distance)>0:
2128  cluster.center_index=min(member_distance, key=member_distance.get)
2129  else:
2130  cluster.center_index=cluster.members[0]
2131 
2132  def save_coordinates(self,cluster,rmf_name=None,reference="Absolute", prefix="./"):
2133  """
2134  Save the coordinates of the current cluster a single rmf file
2135  """
2136  print("saving coordinates",cluster)
2137  if self.alignment: self.set_reference(reference,cluster)
2139  if rmf_name is None:
2140  rmf_name=prefix+'/'+str(cluster.cluster_id)+".rmf3"
2141 
2142  d1=self.stath1[cluster.members[0]]
2143  self.model.update()
2144  o.init_rmf(rmf_name, [self.stath1])
2145  for n1 in cluster.members:
2146  d1=self.stath1[n1]
2147  self.model.update()
2149  if self.alignment: self.align()
2150  o.write_rmf(rmf_name)
2152  o.close_rmf(rmf_name)
2153 
2154  def prune_redundant_structures(self,rmsd_cutoff=10):
2155  """
2156  remove structures that are similar
2157  append it to a new cluster
2158  """
2159  print("pruning models")
2160  selected=0
2161  filtered=[selected]
2162  remaining=range(1,len(self.stath1),10)
2163 
2164  while len(remaining)>0:
2165  d0=self.stath0[selected]
2166  rm=[]
2167  for n1 in remaining:
2168  d1=self.stath1[n1]
2169  if self.alignment: self.align()
2170  d, _ = self.rmsd()
2171  if d<=rmsd_cutoff:
2172  rm.append(n1)
2173  print("pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
2174  remaining=[x for x in remaining if x not in rm]
2175  if len(remaining)==0: break
2176  selected=remaining[0]
2177  filtered.append(selected)
2178  remaining.pop(0)
2179  c = IMP.pmi1.output.Cluster(len(self.clusters))
2180  self.clusters.append(c)
2181  for n0 in filtered:
2182  d0=self.stath0[n0]
2183  c.add_member(n0,d0)
2184  self.update_clusters()
2185 
2186 
2187 
2188  def precision(self,cluster):
2189  """
2190  Compute the precision of a cluster
2191  """
2192  npairs=0
2193  rmsd=0.0
2194  precision=None
2195 
2196  if not cluster.center_index is None:
2197  members1=[cluster.center_index]
2198  else:
2199  members1=cluster.members
2200 
2201  for n0 in members1:
2202  d0=self.stath0[n0]
2203  for n1 in cluster.members:
2204  if n0!=n1:
2205  npairs+=1
2206  d1=self.stath1[n1]
2208  tmp_rmsd, _ = self.rmsd()
2209  rmsd+=tmp_rmsd
2211 
2212  if npairs>0:
2213  precision=rmsd/npairs
2214  cluster.precision=precision
2215  return precision
2216 
2217  def bipartite_precision(self,cluster1,cluster2,verbose=False):
2218  """
2219  Compute the bipartite precision (ie the cross-precision)
2220  between two clusters
2221  """
2222  npairs=0
2223  rmsd=0.0
2224  for cn0,n0 in enumerate(cluster1.members):
2225  d0=self.stath0[n0]
2226  for cn1,n1 in enumerate(cluster2.members):
2227  d1=self.stath1[n1]
2228  tmp_rmsd, _ =self.rmsd()
2229  if verbose: print("--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
2230  rmsd+=tmp_rmsd
2231  npairs+=1
2232  precision=rmsd/npairs
2233  return precision
2234 
2235  def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
2236  """
2237  Compute the Root mean square fluctuations
2238  of a molecule in a cluster
2239  Returns an IMP.pmi1.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
2240  """
2241  rmsf=IMP.pmi1.tools.OrderedDict()
2242 
2243  #assumes that residue indexes are identical for stath0 and stath1
2244  if cluster_ref is not None:
2245  if not cluster_ref.center_index is None:
2246  members0 = [cluster_ref.center_index]
2247  else:
2248  members0 = cluster_ref.members
2249  else:
2250  if not cluster.center_index is None:
2251  members0 = [cluster.center_index]
2252  else:
2253  members0 = cluster.members
2254 
2255  s0=IMP.atom.Selection(self.stath0,molecule=molecule,resolution=1,
2256  copy_index=copy_index,state_index=state_index)
2257  ps0=s0.get_selected_particles()
2258  #get the residue indexes
2259  residue_indexes=list(IMP.pmi1.tools.OrderedSet([IMP.pmi1.tools.get_residue_indexes(p)[0] for p in ps0]))
2260 
2261  #get the corresponding particles
2262  #s0=IMP.atom.Selection(stat_ref,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2263  # copy_index=copy_index,state_index=state_index)
2264  #ps0 = s0.get_selected_particles()
2265 
2266 
2267 
2268  npairs=0
2269  for n0 in members0:
2270  d0=self.stath0[n0]
2271  for n1 in cluster.members[::step]:
2272  if n0!=n1:
2273  print("--- rmsf %s %s"%(str(n0),str(n1)))
2275 
2276  s1=IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2277  copy_index=copy_index,state_index=state_index)
2278  ps1 = s1.get_selected_particles()
2279 
2280  d1=self.stath1[n1]
2281  if self.alignment: self.align()
2282  for n,(p0,p1) in enumerate(zip(ps0,ps1)):
2283  r=residue_indexes[n]
2284  d0=IMP.core.XYZ(p0)
2285  d1=IMP.core.XYZ(p1)
2286  if r in rmsf:
2287  rmsf[r]+=IMP.core.get_distance(d0,d1)
2288  else:
2289  rmsf[r]=IMP.core.get_distance(d0,d1)
2290  npairs+=1
2292  for r in rmsf:
2293  rmsf[r]/=npairs
2294 
2295  for stath in [self.stath0,self.stath1]:
2296  if molecule not in self.symmetric_molecules:
2297  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
2298  copy_index=copy_index,state_index=state_index)
2299  else:
2300  s=IMP.atom.Selection(stath,molecule=molecule,residue_index=r,resolution=1,
2301  state_index=state_index)
2302 
2303  ps = s.get_selected_particles()
2304  for p in ps:
2306  IMP.pmi1.Uncertainty(p).set_uncertainty(rmsf[r])
2307  else:
2309 
2310  return rmsf
2311 
2312  def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
2313  if self.alignment: self.set_reference(reference,cluster)
2314  dens = IMP.pmi1.analysis.GetModelDensity(density_custom_ranges,
2315  voxel=voxel_size)
2316 
2317  for n1 in cluster.members[::step]:
2318  print("density "+str(n1))
2319  d1=self.stath1[n1]
2321  if self.alignment: self.align()
2322  dens.add_subunits_density(self.stath1)
2324  dens.write_mrc(path=prefix+'/',suffix=str(cluster.cluster_id))
2325  del dens
2326 
2327  def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
2328  if self.alignment: self.set_reference(reference,cluster)
2329  import numpy as np
2330  import matplotlib.pyplot as plt
2331  import matplotlib.cm as cm
2332  from scipy.spatial.distance import cdist
2333  import IMP.pmi1.topology
2334  if molecules is None:
2336  else:
2337  mols=[IMP.pmi1.topology.PMIMoleculeHierarchy(mol) for mol in IMP.pmi1.tools.get_molecules(IMP.atom.Selection(self.stath1,molecules=molecules).get_selected_particles())]
2338  unique_copies=[mol for mol in mols if mol.get_copy_index() == 0]
2339  mol_names_unique=dict((mol.get_name(),mol) for mol in unique_copies)
2340  total_len_unique=sum(max(mol.get_residue_indexes()) for mol in unique_copies)
2341 
2342 
2343  # coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
2344  index_dict={}
2345  prev_stop=0
2346 
2347  if not consolidate:
2348  for mol in mols:
2349  seqlen=max(mol.get_residue_indexes())
2350  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2351  prev_stop+=seqlen
2352 
2353  else:
2354  for mol in unique_copies:
2355  seqlen=max(mol.get_residue_indexes())
2356  index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2357  prev_stop+=seqlen
2358 
2359 
2360  for ncl,n1 in enumerate(cluster.members):
2361  print(ncl)
2362  d1=self.stath1[n1]
2363  #self.apply_molecular_assignments(n1)
2364  coord_dict=IMP.pmi1.tools.OrderedDict()
2365  for mol in mols:
2366  mol_name=mol.get_name()
2367  copy_index=mol.get_copy_index()
2368  rindexes = mol.get_residue_indexes()
2369  coords = np.ones((max(rindexes),3))
2370  for rnum in rindexes:
2371  sel = IMP.atom.Selection(mol, residue_index=rnum, resolution=1)
2372  selpart = sel.get_selected_particles()
2373  if len(selpart) == 0: continue
2374  selpart = selpart[0]
2375  coords[rnum - 1, :] = IMP.core.XYZ(selpart).get_coordinates()
2376  coord_dict[mol]=coords
2377 
2378  if not consolidate:
2379  coords=np.concatenate(list(coord_dict.values()))
2380  dists = cdist(coords, coords)
2381  binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2382  else:
2383  binary_dists_dict={}
2384  for mol1 in mols:
2385  len1 = max(mol1.get_residue_indexes())
2386  for mol2 in mols:
2387  name1=mol1.get_name()
2388  name2=mol2.get_name()
2389  dists = cdist(coord_dict[mol1], coord_dict[mol2])
2390  if (name1, name2) not in binary_dists_dict:
2391  binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
2392  binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2393  binary_dists=np.zeros((total_len_unique,total_len_unique))
2394 
2395  for name1,name2 in binary_dists_dict:
2396  r1=index_dict[mol_names_unique[name1]]
2397  r2=index_dict[mol_names_unique[name2]]
2398  binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
2399 
2400  if ncl==0:
2401  dist_maps = [dists]
2402  av_dist_map = dists
2403  contact_freqs = binary_dists
2404  else:
2405  dist_maps.append(dists)
2406  av_dist_map += dists
2407  contact_freqs += binary_dists
2408 
2409  #self.undo_apply_molecular_assignments(n1)
2410 
2411  if log_scale:
2412  contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2413  else:
2414  contact_freqs =1.0/len(cluster)*contact_freqs
2415  av_dist_map=1.0/len(cluster)*contact_freqs
2416 
2417  fig = plt.figure(figsize=(100, 100))
2418  ax = fig.add_subplot(111)
2419  ax.set_xticks([])
2420  ax.set_yticks([])
2421  gap_between_components=50
2422  colormap = cm.Blues
2423  colornorm=None
2424 
2425 
2426  #if cbar_labels is not None:
2427  # if len(cbar_labels)!=4:
2428  # print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
2429  # exit()
2430  # set the list of proteins on the x axis
2431  if not consolidate:
2432  sorted_tuple=sorted([(IMP.pmi1.topology.PMIMoleculeHierarchy(mol).get_extended_name(),mol) for mol in mols])
2433  prot_list=list(zip(*sorted_tuple))[1]
2434  else:
2435  sorted_tuple=sorted([(IMP.pmi1.topology.PMIMoleculeHierarchy(mol).get_name(),mol) for mol in unique_copies])
2436  prot_list=list(zip(*sorted_tuple))[1]
2437 
2438  prot_listx = prot_list
2439  nresx = gap_between_components + \
2440  sum([max(mol.get_residue_indexes())
2441  + gap_between_components for mol in prot_listx])
2442 
2443  # set the list of proteins on the y axis
2444  prot_listy = prot_list
2445  nresy = gap_between_components + \
2446  sum([max(mol.get_residue_indexes())
2447  + gap_between_components for mol in prot_listy])
2448 
2449  # this is the residue offset for each protein
2450  resoffsetx = {}
2451  resendx = {}
2452  res = gap_between_components
2453  for mol in prot_listx:
2454  resoffsetx[mol] = res
2455  res += max(mol.get_residue_indexes())
2456  resendx[mol] = res
2457  res += gap_between_components
2458 
2459  resoffsety = {}
2460  resendy = {}
2461  res = gap_between_components
2462  for mol in prot_listy:
2463  resoffsety[mol] = res
2464  res += max(mol.get_residue_indexes())
2465  resendy[mol] = res
2466  res += gap_between_components
2467 
2468  resoffsetdiagonal = {}
2469  res = gap_between_components
2470  for mol in IMP.pmi1.tools.OrderedSet(prot_listx + prot_listy):
2471  resoffsetdiagonal[mol] = res
2472  res += max(mol.get_residue_indexes())
2473  res += gap_between_components
2474 
2475  # plot protein boundaries
2476  xticks = []
2477  xlabels = []
2478  for n, prot in enumerate(prot_listx):
2479  res = resoffsetx[prot]
2480  end = resendx[prot]
2481  for proty in prot_listy:
2482  resy = resoffsety[proty]
2483  endy = resendy[proty]
2484  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
2485  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
2486  xticks.append((float(res) + float(end)) / 2)
2487  xlabels.append(IMP.pmi1.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2488 
2489  yticks = []
2490  ylabels = []
2491  for n, prot in enumerate(prot_listy):
2492  res = resoffsety[prot]
2493  end = resendy[prot]
2494  for protx in prot_listx:
2495  resx = resoffsetx[protx]
2496  endx = resendx[protx]
2497  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
2498  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
2499  yticks.append((float(res) + float(end)) / 2)
2500  ylabels.append(IMP.pmi1.topology.PMIMoleculeHierarchy(prot).get_extended_name())
2501 
2502  # plot the contact map
2503 
2504  tmp_array = np.zeros((nresx, nresy))
2505  ret={}
2506  for px in prot_listx:
2507  for py in prot_listy:
2508  resx = resoffsetx[px]
2509  lengx = resendx[px] - 1
2510  resy = resoffsety[py]
2511  lengy = resendy[py] - 1
2512  indexes_x = index_dict[px]
2513  minx = min(indexes_x)
2514  maxx = max(indexes_x)
2515  indexes_y = index_dict[py]
2516  miny = min(indexes_y)
2517  maxy = max(indexes_y)
2518  tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2519  ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2520 
2521  cax = ax.imshow(tmp_array,
2522  cmap=colormap,
2523  norm=colornorm,
2524  origin='lower',
2525  alpha=0.6,
2526  interpolation='nearest')
2527 
2528  ax.set_xticks(xticks)
2529  ax.set_xticklabels(xlabels, rotation=90)
2530  ax.set_yticks(yticks)
2531  ax.set_yticklabels(ylabels)
2532  plt.setp(ax.get_xticklabels(), fontsize=6)
2533  plt.setp(ax.get_yticklabels(), fontsize=6)
2534 
2535  # display and write to file
2536  fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2537  [i.set_linewidth(2.0) for i in ax.spines.values()]
2538  #if cbar_labels is not None:
2539  # cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
2540  # cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
2541 
2542  plt.savefig(prefix+"/contact_map."+str(cluster.cluster_id)+".pdf", dpi=300,transparent="False")
2543  return ret
2544 
2545 
2546  def plot_rmsd_matrix(self,filename):
2547  import numpy as np
2548  self.compute_all_pairwise_rmsd()
2549  distance_matrix = np.zeros(
2550  (len(self.stath0), len(self.stath1)))
2551  for (n0,n1) in self.pairwise_rmsd:
2552  distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2553 
2554  import matplotlib as mpl
2555  mpl.use('Agg')
2556  import matplotlib.pylab as pl
2557  from scipy.cluster import hierarchy as hrc
2558 
2559  fig = pl.figure(figsize=(10,8))
2560  ax = fig.add_subplot(212)
2561  dendrogram = hrc.dendrogram(
2562  hrc.linkage(distance_matrix),
2563  color_threshold=7,
2564  no_labels=True)
2565  leaves_order = dendrogram['leaves']
2566  ax.set_xlabel('Model')
2567  ax.set_ylabel('RMSD [Angstroms]')
2568 
2569  ax2 = fig.add_subplot(221)
2570  cax = ax2.imshow(
2571  distance_matrix[leaves_order,
2572  :][:,
2573  leaves_order],
2574  interpolation='nearest')
2575  cb = fig.colorbar(cax)
2576  cb.set_label('RMSD [Angstroms]')
2577  ax2.set_xlabel('Model')
2578  ax2.set_ylabel('Model')
2579 
2580  pl.savefig(filename, dpi=300)
2581  pl.close(fig)
2582 
2583  ####################
2584  # Internal Functions
2585  ####################
2586 
2587  def update_clusters(self):
2588  """
2589  Update the cluster id numbers
2590  """
2591  for n,c in enumerate(self.clusters):
2592  c.cluster_id=n
2593 
2594  def get_molecule(self, hier, name, copy):
2595  s=IMP.atom.Selection(hier, molecule=name, copy_index=copy)
2596  return IMP.pmi1.tools.get_molecules(s.get_selected_particles()[0])[0]
2597 
2598  def update_seldicts(self):
2599  """
2600  Update the seldicts
2601  """
2602  self.seldict0=IMP.pmi1.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2603  self.seldict1=IMP.pmi1.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2604  for mol in self.seldict0:
2605  for sel in self.seldict0[mol]:
2606  self.issymmetricsel[sel]=False
2607  for mol in self.symmetric_molecules:
2608  self.symmetric_molecules[mol]=len(self.seldict0[mol])
2609  for sel in self.seldict0[mol]:
2610  self.issymmetricsel[sel]=True
2611 
2612 
2613  def align(self):
2614  print("alignment")
2615  tr = IMP.atom.get_transformation_aligning_first_to_second(self.sel1_alignment, self.sel0_alignment)
2616 
2617  for rb in self.rbs1:
2618  IMP.core.transform(rb, tr)
2619 
2620  for bead in self.beads1:
2621  IMP.core.transform(IMP.core.XYZ(bead), tr)
2622 
2623  self.model.update()
2624 
2625  def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2626  '''
2627  initial filling of the clusters.
2628  '''
2629  n0 = idxs.pop()
2630  print("clustering model "+str(n0))
2631  d0 = self.stath0[n0]
2632  c = IMP.pmi1.output.Cluster(len(self.clusters))
2633  print("creating cluster index "+str(len(self.clusters)))
2634  self.clusters.append(c)
2635  c.add_member(n0,d0)
2636  clustered = set([n0])
2637  for n1 in idxs:
2638  print("--- trying to add model "+str(n1)+" to cluster "+str(len(self.clusters)))
2639  d1 = self.stath1[n1]
2640  if self.alignment: self.align()
2641  rmsd, _ = self.rmsd(metric=metric)
2642  if rmsd<rmsd_cutoff:
2643  print("--- model "+str(n1)+" added, rmsd="+str(rmsd))
2644  c.add_member(n1,d1)
2645  clustered.add(n1)
2646  else:
2647  print("--- model "+str(n1)+" NOT added, rmsd="+str(rmsd))
2648  idxs-=clustered
2649 
2650  def merge_aggregates(self, rmsd_cutoff, metric=IMP.atom.get_rmsd):
2651  """
2652  merge the clusters that have close members
2653  @param rmsd_cutoff cutoff distance in Angstorms
2654  """
2655  # before merging, clusters are spheres of radius rmsd_cutoff centered on the 1st element
2656  # here we only try to merge clusters whose centers are closer than 2*rmsd_cutoff
2657  to_merge = []
2658  print("merging...")
2659  for c0, c1 in filter(lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2660  n0, n1 = [c.members[0] for c in (c0,c1)]
2661  d0 = self.stath0[n0]
2662  d1 = self.stath1[n1]
2663  rmsd, _ = self.rmsd()
2664  if rmsd<2*rmsd_cutoff and self.have_close_members(c0,c1,rmsd_cutoff,metric):
2665  to_merge.append((c0,c1))
2666 
2667  for c0, c in reversed(to_merge):
2668  self.merge(c0,c)
2669 
2670  #keep only full clusters
2671  self.clusters = [c for c in filter(lambda x: len(x.members)>0, self.clusters)]
2672 
2673 
2674  def have_close_members(self, c0, c1, rmsd_cutoff, metric):
2675  '''
2676  returns true if c0 and c1 have members that are closer than rmsd_cutoff
2677  '''
2678  print("check close members for clusters "+str(c0.cluster_id)+" and "+str(c1.cluster_id))
2679  for n0, n1 in itertools.product(c0.members[1:], c1.members):
2680  d0 = self.stath0[n0]
2681  d1 = self.stath1[n1]
2682  rmsd, _ = self.rmsd(metric=metric)
2683  if rmsd<rmsd_cutoff:
2684  return True
2685 
2686  return False
2687 
2688 
2689  def merge(self, c0, c1):
2690  '''
2691  merge two clusters
2692  '''
2693  c0+=c1
2694  c1.members=[]
2695  c1.data={}
2696 
2697  def rmsd_helper(self, sels0, sels1, metric):
2698  '''
2699  a function that returns the permutation best_sel of sels0 that minimizes metric
2700  '''
2701  best_rmsd2 = float('inf')
2702  best_sel = None
2703  if self.issymmetricsel[sels0[0]]:
2704  #this cases happens when symmetries were defined
2705  N = len(sels0)
2706  for offset in range(N):
2707  order=[(offset+i)%N for i in range(N)]
2708  sels = [sels0[(offset+i)%N] for i in range(N)]
2709  sel0 = sels[0]
2710  sel1 = sels1[0]
2711  r=metric(sel0, sel1)
2712  rmsd2=r*r*N
2713  ###print(order,rmsd2)
2714  if rmsd2 < best_rmsd2:
2715  best_rmsd2 = rmsd2
2716  best_sel = sels
2717  best_order=order
2718  else:
2719  for sels in itertools.permutations(sels0):
2720  rmsd2=0.0
2721  for sel0, sel1 in itertools.takewhile(lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2722  r=metric(sel0, sel1)
2723  rmsd2+=r*r
2724  if rmsd2 < best_rmsd2:
2725  best_rmsd2 = rmsd2
2726  best_sel = sels
2727  ###for i,sel in enumerate(best_sel):
2728  ### p0 = sel.get_selected_particles()[0]
2729  ### p1 = sels1[i].get_selected_particles()[0]
2730  ### m0 = IMP.pmi1.tools.get_molecules([p0])[0]
2731  ### m1 = IMP.pmi1.tools.get_molecules([p1])[0]
2732  ### c0 = IMP.atom.Copy(m0).get_copy_index()
2733  ### c1 = IMP.atom.Copy(m1).get_copy_index()
2734  ### name0=m0.get_name()
2735  ### name1=m1.get_name()
2736  ### print("WWW",name0,name1,c0,c1)
2737  ###print(best_order,best_rmsd2,m0,m1)
2738  return best_sel, best_rmsd2
2739 
2740  def compute_all_pairwise_rmsd(self):
2741  for d0 in self.stath0:
2742  for d1 in self.stath1:
2743  rmsd, _ = self.rmsd()
2744 
2745  def rmsd(self,metric=IMP.atom.get_rmsd):
2746  '''
2747  Computes the RMSD. Resolves ambiguous pairs assignments
2748  '''
2749  # here we memoize the rmsd and molecular assignment so that it's not done multiple times
2750  n0=self.stath0.current_index
2751  n1=self.stath1.current_index
2752  if ((n0,n1) in self.pairwise_rmsd) and ((n0,n1) in self.pairwise_molecular_assignment):
2753  return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2754 
2755  if self.alignment:
2756  self.align()
2757  #if it's not yet memoized
2758  total_rmsd=0.0
2759  total_N=0
2760  # this is a dictionary which keys are the molecule names, and values are the list of IMP.atom.Selection for all molecules that share the molecule name
2761  seldict_best_order={}
2762  molecular_assignment={}
2763  for molname, sels0 in self.seldict0.items():
2764  sels_best_order, best_rmsd2 = self.rmsd_helper(sels0, self.seldict1[molname], metric)
2765 
2766  Ncoords = len(sels_best_order[0].get_selected_particles())
2767  Ncopies = len(self.seldict1[molname])
2768  total_rmsd += Ncoords*best_rmsd2
2769  total_N += Ncoords*Ncopies
2770 
2771  for sel0, sel1 in zip(sels_best_order, self.seldict1[molname]):
2772  p0 = sel0.get_selected_particles()[0]
2773  p1 = sel1.get_selected_particles()[0]
2774  m0 = IMP.pmi1.tools.get_molecules([p0])[0]
2775  m1 = IMP.pmi1.tools.get_molecules([p1])[0]
2776  c0 = IMP.atom.Copy(m0).get_copy_index()
2777  c1 = IMP.atom.Copy(m1).get_copy_index()
2778  ###print(molname,c0,c1)
2779  molecular_assignment[(molname,c0)]=(molname,c1)
2780 
2781  total_rmsd = math.sqrt(total_rmsd/total_N)
2782 
2783  self.pairwise_rmsd[(n0,n1)]=total_rmsd
2784  self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2785  self.pairwise_rmsd[(n1,n0)]=total_rmsd
2786  self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2787  ###print(n0,n1,molecular_assignment)
2788  return total_rmsd, molecular_assignment
2789 
2790  def set_reference(self,reference,cluster):
2791  """
2792  Fix the reference structure for structural alignment, rmsd and chain assignment
2793  @param reference can be either "Absolute" (cluster center of the first cluster)
2794  or Relative (cluster center of the current cluster)
2795  """
2796  if reference=="Absolute":
2797  d0=self.stath0[0]
2798  elif reference=="Relative":
2799  if cluster.center_index:
2800  n0=cluster.center_index
2801  else:
2802  n0=cluster.members[0]
2803  d0=self.stath0[n0]
2804 
2806  """
2807  compute the molecular assignments between multiple copies
2808  of the same sequence. It changes the Copy index of Molecules
2809  """
2810  d1=self.stath1[n1]
2811  _, molecular_assignment = self.rmsd()
2812  for (m0, c0), (m1,c1) in molecular_assignment.items():
2813  mol0 = self.molcopydict0[m0][c0]
2814  mol1 = self.molcopydict1[m1][c1]
2815  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2816  p1=IMP.atom.Copy(mol1).get_particle()
2817  p1.set_value(cik0,c0)
2818 
2820  """
2821  Undo the Copy index assignment
2822  """
2823  d1=self.stath1[n1]
2824  _, molecular_assignment = self.rmsd()
2825  mols_newcopys = []
2826  for (m0, c0), (m1,c1) in molecular_assignment.items():
2827  mol0 = self.molcopydict0[m0][c0]
2828  mol1 = self.molcopydict1[m1][c1]
2829  cik0=IMP.atom.Copy(mol0).get_copy_index_key()
2830  p1=IMP.atom.Copy(mol1).get_particle()
2831  p1.set_value(cik0,c1)
2832 
2833  ####################
2834  # Container Functions
2835  ####################
2836 
2837  def __repr__(self):
2838  s= "AnalysisReplicaExchange\n"
2839  s+="---- number of clusters %s \n"%str(len(self.clusters))
2840  s+="---- number of models %s \n"%str(len(self.stath0))
2841  return s
2842 
2843  def __getitem__(self,int_slice_adaptor):
2844  if type(int_slice_adaptor) is int:
2845  return self.clusters[int_slice_adaptor]
2846  elif type(int_slice_adaptor) is slice:
2847  return self.__iter__(int_slice_adaptor)
2848  else:
2849  raise TypeError("Unknown Type")
2850 
2851  def __len__(self):
2852  return len(self.clusters)
2853 
2854  def __iter__(self,slice_key=None):
2855  if slice_key is None:
2856  for i in range(len(self)):
2857  yield self[i]
2858  else:
2859  for i in range(len(self))[slice_key]:
2860  yield self[i]
class to link stat files to several rmf files
Definition: /output.py:1088
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
Definition: /macros.py:1412
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
Definition: /macros.py:2805
def set_alignment_selection
Setup the selection onto which the alignment is computed.
Definition: /macros.py:1966
def load_data
Load the data from an external pickled file.
Definition: /macros.py:2056
def get_rbs_and_beads
Returns unique objects in original order.
Definition: /tools.py:1891
Sampling of the system.
Definition: /samplers.py:1
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
Definition: /macros.py:2674
def select_at_all_resolutions
Perform selection using the usual keywords but return ALL resolutions (BEADS and GAUSSIANS).
Definition: /tools.py:1824
A helper output for model evaluation.
A member of a rigid body, it has internal (local) coordinates.
Definition: rigid_bodies.h:634
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:635
Sample using replica exchange.
Definition: /samplers.py:518
Sample using molecular dynamics.
Definition: /samplers.py:397
def load_clusters
Load the clusters from a pickle file.
Definition: /macros.py:2093
A container for models organized into clusters.
Definition: /output.py:1337
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Definition: /macros.py:1329
Class for easy writing of PDBs, RMFs, and stat files.
Definition: /output.py:65
Performs alignment and RMSD calculation for two sets of coordinates.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
def aggregate
initial filling of the clusters.
Definition: /macros.py:2625
def build_model
Create model.
Definition: /macros.py:867
A class to cluster structures.
A macro for running all the basic operations of analysis.
Definition: /macros.py:1269
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
Definition: /macros.py:2790
def __init__
Construction of the Class.
Definition: /macros.py:1911
def update_clusters
Update the cluster id numbers.
Definition: /macros.py:2587
Extends the functionality of IMP.atom.Molecule.
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
This class contains analysis utilities to investigate ReplicaExchange results.
Definition: /macros.py:1901
def save_data
Save the data for the whole models into a pickle file.
Definition: /macros.py:2041
Utility classes and functions for reading and storing PMI files.
A macro to help setup and run replica exchange.
Definition: /macros.py:54
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
def compute_cluster_center
Compute the cluster center for a given cluster.
Definition: /macros.py:2115
def merge_aggregates
merge the clusters that have close members
Definition: /macros.py:2650
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: /tools.py:1064
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
def set_data
Set the data from an external IMP.pmi1.output.Data.
Definition: /macros.py:2048
Sample using Monte Carlo.
Definition: /samplers.py:36
def undo_apply_molecular_assignments
Undo the Copy index assignment.
Definition: /macros.py:2819
def __init__
Constructor.
Definition: /macros.py:853
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: XYZR.h:47
Tools for clustering and cluster analysis.
Definition: pmi1/Analysis.py:1
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Definition: /macros.py:2154
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: /tools.py:1914
Add uncertainty to a particle.
Definition: /Uncertainty.h:24
Miscellaneous utilities.
Definition: /tools.py:1
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
def cluster
Cluster the models based on RMSD.
Definition: /macros.py:1982
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: /Uncertainty.h:30
Set up the representation of all proteins and nucleic acid macromolecules.
The standard decorator for manipulating molecular structures.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def deprecated_method
Python decorator to mark a method as deprecated.
Definition: __init__.py:9538
def update_seldicts
Update the seldicts.
Definition: /macros.py:2598
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def precision
Compute the precision of a cluster.
Definition: /macros.py:2188
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def get_cluster_data
Return the model data from a cluster.
Definition: /macros.py:2031
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
Definition: /macros.py:2217
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Definition: /macros.py:1190
def save_clusters
Save the clusters into a pickle file.
Definition: /macros.py:2081
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Definition: /Uncertainty.h:45
def get_representation
Return the Representation object.
Definition: /macros.py:707
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi1.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
Definition: /macros.py:2235
Building macro.
Definition: /macros.py:850
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9522
def get_restraint_set
Get a RestraintSet containing all PMI restraints added to the model.
Definition: /tools.py:72
A macro to build a Representation based on a Topology and lists of movers.
Definition: /macros.py:568
The general base class for IMP exceptions.
Definition: exception.h:48
def get_best_models
Given a list of stat files, read them all and find the best models.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
Definition: provenance.h:266
def add_protocol_output
Capture details of the modeling protocol.
Definition: /macros.py:1320
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: /output.py:771
Compute mean density maps from structures.
Collect timing information.
Definition: /tools.py:85
def scatter_and_gather
Synchronize data over a parallel run.
Definition: /tools.py:1114
def __init__
Constructor.
Definition: /macros.py:590
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
Definition: /macros.py:2697
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Classes for writing output files and processing them.
Definition: /output.py:1
def refine
Refine the clusters by merging the ones whose centers are close.
Definition: /macros.py:1995
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
Definition: /macros.py:1950
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Support for the RMF file format for storing hierarchical molecular data and markup.
def set_symmetric
Store names of symmetric molecules.
Definition: /macros.py:1959
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:770
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Definition: /macros.py:2132
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27