IMP logo
IMP Reference Guide  2.6.0
The Integrative Modeling Platform
domino_model.py
1 """@namespace IMP.EMageFit.domino_model
2  Classes to manage a model using DOMINO.
3 """
4 
5 import sys
6 import time
7 import csv
8 import logging
9 log = logging.getLogger("DominoModel")
10 
11 import IMP
12 import IMP.domino as domino
13 import IMP.core as core
14 import IMP.container as container
15 import IMP.display as display
16 import IMP.atom as atom
17 import IMP.algebra as alg
18 import IMP.em2d as em2d
19 import IMP.multifit as multifit
20 
21 import IMP.EMageFit.imp_general.comparisons as comparisons
22 import IMP.EMageFit.imp_general.alignments as alignments
23 import IMP.EMageFit.imp_general.io as io
24 import IMP.EMageFit.imp_general.representation as representation
25 
26 import IMP.EMageFit.restraints as restraints
27 import IMP.EMageFit.buildxlinks as buildxlinks
28 import IMP.EMageFit.solutions_io as solutions_io
29 import IMP.EMageFit.csv_related as csv_related
30 
31 
32 field_delim = "," # separate fields in the output text file
33 unit_delim = "/" # separate units within a field (eg, reference frames).
34  # It is used in output text files and in databse files
35 
36 
38 
39  """
40  Management of a model using DOMINO
41  """
42 
43  def __init__(self, name="my model"):
44  self.model = IMP.Model()
45  self.model.set_name(name)
46  self.configuration_sampling_done = False
47  self.assignments_sampling_done = False
48  self.rb_states_table = domino.ParticleStatesTable()
49  self.merge_tree = None
50  # measure_models is True only when doing benchmark
51  self.measure_models = False
52  # The first time that create_coarse is found and its value is True
53  # a coarse version of the assembly is built
54  self.create_coarse = True
55  self.restraints = dict()
56  self.xlinks_dict = buildxlinks.XlinksDict()
57 
58  def add_restraint(self, r, name, weight, max_score=False):
59  """
60  Adds a restraint to the model
61  @param r An IMP.Restraint object
62  @param name Name for the restraint
63  @param weight Weight for the restraint
64  @param max_score Maximum score allowed for the restraint. If
65  max_score is False, there is no limit for the value of the restraint
66  """
67  log.info("Adding restraint %s weight %s max_score %s",
68  name, weight, max_score)
69  if "-" in name:
70  raise ValueError("SQL database does not accept restraint names "
71  "containing -")
72  r.set_name(name)
73  if max_score is not None and max_score:
74  r.set_maximum_score(max_score)
75  r.set_weight(weight)
76  self.restraints[name] = r
77 
79  """
80  Aligns the set of structures considered for DOMINO sampling.
81  The function:
82  1) reads the possible reference frames that the
83  rb_states_table stores for each rigid body. This table
84  must be filled before using this function. Usually it is
85  filled with the results from a previous Monte Carlo sampling.
86  If the solutions from Monte Carlo seem to have the same structure
87  but they are not aligned to each other, this function can
88  help setting better starting positions to use with DOMINO.
89  2) Gets the first state for each of the rigid bodies and sets
90  a reference structure using such states.
91  3) Aligns all the rest of the structures respect to the reference
92  4) Replaces the values of the reference frames stored in the
93  rb_states_table with the new values obtained from the alignments.
94  It does it for all states of a rigid body.
95  @note: If this function is applied, the parameters "anchor" and "fixed"
96  are ignored, as they are superseded by the use of the alignments
97  calculated here.
98  """
99  log.debug("Align the configurations read from the database before "
100  "running DOMINO")
101  rb_states = []
102  n_states = []
103  for rb in self.components_rbs:
104  ps = self.rb_states_table.get_particle_states(rb)
105  n = ps.get_number_of_particle_states()
106  if n == 0:
107  raise ValueError(
108  "There are no particle states for %s" %
109  rb.get_name())
110  n_states.append(n)
111  rb_states.append(ps)
112  # coordinates of the first configuration (there is at least one for all
113  # the rbs)
114  for rb, states in zip(self.components_rbs, rb_states):
115  states.load_particle_state(0, rb)
116  reference_coords = get_coordinates(self.components_rbs)
117  aligned_transformations = [[] for rb in self.components_rbs]
118  for i in range(1, max(n_states)):
119  log.debug("Aligning rigid bodies configuration %s" % i)
120  # load the configuration
121  for j in range(len(self.components_rbs)):
122  # if there are no more particle states for this rigid body, use
123  # the last
124  state_index = min(i, n_states[j] - 1)
125  rb_states[j].load_particle_state(
126  state_index,
127  self.components_rbs[j])
128  coords = get_coordinates(self.components_rbs)
129  T = alg.get_transformation_aligning_first_to_second(
130  coords, reference_coords)
131  for j, rb in enumerate(self.components_rbs):
132  t = rb.get_reference_frame().get_transformation_to()
133  new_t = alg.compose(T, t)
134  aligned_transformations[j].append(new_t)
135  # set the new possible transformations
136  for i, rb in enumerate(self.components_rbs):
137  self.set_rb_states(rb, aligned_transformations[i])
138 
139  def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2,
140  distance, weight, stddev, max_score=False):
141  """
142  Set a restraint on the maximum distance between 2 residues
143  @param id1 Name of the first component
144  @param chain1
145  @param residue1 Residue number for the aminoacid in the first
146  component.The number is the number in the PDB file, not the
147  number relative to the beginning of the chain
148  @param id2 Name of the second component
149  @param chain2
150  @param residue2 Residue number for the aminoacid in the second
151  component.
152  @param distance Maximum distance tolerated
153  @param weight Weight of the restraint
154  @param stddev Standard deviation used to approximate the
155  HarmonicUpperBound function to a Gaussian
156  @param max_score See help for add_restraint(). If none is given,
157  the maximum score is set to allow a maximum distance of 10 Angstrom
158  greater than the parameter "distance".
159  """
160  xlink = buildxlinks.Xlink(
161  id1,
162  chain1,
163  residue1,
164  id2,
165  chain2,
166  residue2,
167  distance)
168  log.info("Setting cross-linking restraint ")
169  log.info("%s", xlink.show())
170  self.xlinks_dict.add(xlink)
171  # setup restraint
172  A = representation.get_component(self.assembly, xlink.first_id)
173  s1 = IMP.atom.Selection(A, chain=xlink.first_chain,
174  residue_index=xlink.first_residue)
175  p1 = s1.get_selected_particles()[0]
176  B = representation.get_component(self.assembly, xlink.second_id)
177  s2 = IMP.atom.Selection(B, chain=xlink.second_chain,
178  residue_index=xlink.second_residue)
179  p2 = s2.get_selected_particles()[0]
180  k = core.Harmonic.get_k_from_standard_deviation(stddev)
181  score = core.HarmonicUpperBound(xlink.distance, k)
182  pair_score = IMP.core.DistancePairScore(score)
183  r = IMP.core.PairRestraint(pair_score, IMP.ParticlePair(p1, p2))
184  if not max_score:
185  error_distance_allowed = 100
186  max_score = weight * \
187  score.evaluate(distance + error_distance_allowed)
188  log.info("%s, max_score %s", xlink.show(), max_score)
189  self.add_restraint(r, xlink.get_name(), weight, max_score)
190 
191  def set_complementarity_restraint(self, name1, name2, rname,
192  max_sep_distance, max_penetration,
193  weight, max_score=1e10):
194  """
195  Set a restraint for geometric complementarity between 2 components
196  @param name1 name of
197  @param name2 - The restraint is applied to this components
198  @param rname - The name given to the restraint
199  @param max_sep_distance - maximum distance between molecules
200  tolerated by the restraint
201  @param max_penetration - Maximum penetration allowed (angstrom)
202  @param weight
203  @param max_score
204  """
205  log.info("Setting geometric complementarity restraint %s: %s - %s",
206  rname, name1, name2)
207  A = representation.get_component(self.assembly, name1)
208  B = representation.get_component(self.assembly, name2)
209  restraint = multifit.ComplementarityRestraint(atom.get_leaves(A),
210  atom.get_leaves(B), rname)
211  restraint.set_maximum_separation(max_sep_distance)
212  restraint.set_maximum_penetration(max_penetration)
213  log.debug("Maximum separation: %s Maximum penetration score: %s",
214  max_sep_distance, max_penetration)
215  self.add_restraint(restraint, rname, weight, max_score)
216 
217  def create_coarse_assembly(self, n_residues):
218  """
219  Simplify the assembly with a coarse representation
220  @param n_residues Number of residues used for a coarse particle
221  """
222  if self.create_coarse:
223  log.info("Creating simplified assembly")
224  self.coarse_assembly = \
225  representation.create_simplified_assembly(self.assembly,
226  self.components_rbs, n_residues)
227  self.create_coarse = False
228 
229  def do_sampling(self, mode="assignments_heap_container", params=None):
230  """
231  Do Domino sampling
232  @param mode The mode used to recover solutions from domino.
233  It can have
234  the value "configuration", "assignments", or
235  "assignments_heap_container". The mode
236  "configuration" recovers all possible configurations
237  (takes a while to generate them).
238  The mode "assignments" only provides the assignments
239  (state numbers) of the solutions. It is faster but requires
240  generating the solutions afterwards
241  The mode "assignments_heap_container" selects the best solutions
242  after each merging in DOMINO, discarding the rest.
243  In practice I used the mode "assignments_heap_container"
244  @param params
245  """
246  t0 = time.time()
247  if mode == "configuration":
248  self.configuration_set = self.sampler.create_sample()
249  tf = time.time()
250  log.info("found %s configurations. Time %s",
251  self.configuration_set.get_number_of_configurations(),
252  tf - t0)
253  self.configuration_sampling_done = True
254  elif mode == "assignments":
255  subset = self.rb_states_table.get_subset()
256  log.info(
257  "Do sampling with assignments. Subset has %s elements: %s",
258  len(subset), subset)
259  self.solution_assignments = \
260  self.sampler.get_sample_assignments(subset)
261  tf = time.time()
262  log.info("found %s assignments. Time %s",
263  len(self.solution_assignments), tf - t0)
264  self.assignments_sampling_done = True
265  elif mode == "assignments_heap_container":
266  subset = self.rb_states_table.get_subset()
267  log.info("DOMINO sampling an assignments_heap_container. "
268  "Subset has %s elements: %s", len(subset), subset)
269  # last vertex is the root of the merge tree
270  root = self.merge_tree.get_vertices()[-1]
271  container = self.get_assignments_with_heap(root,
272  params.heap_solutions)
273  self.solution_assignments = container.get_assignments()
274  tf = time.time()
275  log.info("found %s assignments. Time %s",
276  len(self.solution_assignments), tf - t0)
277  self.assignments_sampling_done = True
278  else:
279  raise ValueError("Requested wrong mode for the DOMINO sampling")
280 
281  def get_assignment_text(self, assignment, subset):
282  """
283  Get the formatted text for an assignment
284  @param subset Subset of components of the assembly
285  @param assignment Assignment class with the states for the subset
286  @note: The order in assignment and subset is not always the order
287  of the rigid bodies in self.components_rbs. The function reorders
288  the terms in the assignment so there is correspondence.
289  """
290  ordered_assignment = []
291  for rb in self.components_rbs:
292  for i, particle in enumerate(subset):
293  if rb.get_particle() == particle:
294  ordered_assignment.append(assignment[i])
295  text = "|".join([str(i) for i in ordered_assignment])
296  return text
297 
298  def get_assignment_and_RFs(self, assignment, subset):
299  """
300  Return a line with texts for an assignment and the
301  the reference frames of the RigidBodies in the subset.
302  @param subset Subset of components of the assembly
303  @param assignment Assignment class with the states for the subset
304  @note: see the get_assignment_assignment_text() note.
305  """
306  ordered_assignment = []
307  RFs = []
308  for rb in self.components_rbs:
309  for i, particle in enumerate(subset):
310  if rb.get_particle() == particle:
311  j = assignment[i]
312  ordered_assignment.append(j)
313  pstates = self.rb_states_table.get_particle_states(rb)
314  pstates.load_particle_state(j, rb.get_particle())
315  RFs.append(rb.get_reference_frame())
316  RFs_text = unit_delim.join(
317  [io.ReferenceFrameToText(ref).get_text() for ref in RFs])
318  assignment_numbers = "|".join([str(i) for i in ordered_assignment])
319  return [assignment_numbers, RFs_text]
320 
321  def get_assignment_info(self, assignment, subset):
322  """
323  Info related to an assignment. Returns a list with text for the
324  assignment and all the scores for the restraints
325  @param subset Subset of components of the assembly
326  @param assignment Assignment class with the states for the subset
327  """
328  text = self.get_assignment_and_RFs(assignment, subset)
329  scores, total_score = self.get_assignment_scores(assignment, subset)
330  info = text + [total_score] + scores
331  return info
332 
333  def get_assignment_scores(self, assignment, subset):
334  """
335  Returns a list with the values for the restraints on a subset of
336  the components of the assembly
337  @param subset Subset of components of the assembly
338  @param assignment Assignment class with the states for the subset
339  """
340  restraints_to_evaluate = \
341  self.restraint_cache.get_restraints(subset, [])
342  scores = [self.restraint_cache.get_score(r, subset, assignment)
343  for r in restraints_to_evaluate]
344  total_score = sum(scores)
345  return scores, total_score
346 
347  def load_state(self, assignment):
348  """
349  Load the positions of the components given by the assignment
350  @param assignment Assignment class
351  """
352  subset = self.rb_states_table.get_subset()
353  domino.load_particle_states(subset, assignment, self.rb_states_table)
354 
355  def get_assignments_with_heap(self, vertex, k=0):
356  """
357  Domino sampling that recovers the assignments for the root of the
358  merge tree, but
359  conserving only the best k scores for each vertex of the tree.
360  @param[in] vertex Vertex with the root of the current merge tree. This
361  function is recursive.
362  @param[in] k
363  """
364  if(self.sampler.get_number_of_subset_filter_tables() == 0):
365  raise ValueError("No subset filter tables")
366  if(self.merge_tree is None):
367  raise ValueError("No merge tree")
368  # In the merge tree, the names of the vertices are the subsets.
369  # The type of the vertex name is a domino.Subset
370  subset = self.merge_tree.get_vertex_name(vertex)
371  log.info("computing assignments for vertex %s", subset)
372  t0 = time.time()
373  assignments_container = domino.HeapAssignmentContainer(subset, k,
374  self.restraint_cache, "my_heap_assignments_container")
375  neighbors = self.merge_tree.get_out_neighbors(vertex)
376  if len(neighbors) == 0: # A leaf
377  # Fill the container with the assignments for the leaf
378  self.sampler.load_vertex_assignments(vertex, assignments_container)
379  else:
380  if neighbors[0] > neighbors[1]:
381  # get_vertex_assignments() methods in domino
382  # expects the children in sorted order
383  neighbors = [neighbors[1], neighbors[0]]
384  # recurse on the two children
385  container_child0 = self.get_assignments_with_heap(neighbors[0], k)
386  container_child1 = self.get_assignments_with_heap(neighbors[1], k)
387  self.sampler.load_vertex_assignments(vertex,
388  container_child0,
389  container_child1,
390  assignments_container)
391  tf = time.time() - t0
392  log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset,
393  assignments_container.get_number_of_assignments(), tf)
394  return assignments_container
395 
396  def get_restraint_value_for_assignment(self, assignment, name):
397  """
398  Recover the value of a restraint
399  @param name of the restraint
400  @param assignment Assignment class containing the assignment for
401  the solution desired
402  """
403  if not self.assignments_sampling_done:
404  raise ValueError("DominoModel: sampling not done")
405  log.debug("Computing restraint value for assignment %s", assignment)
406  self.load_state(assignment)
407  for rb in self.components_rbs:
408  log.debug("rb - %s",
409  rb.get_reference_frame().get_transformation_to())
410  val = self.restraints[name].evaluate(False)
411  log.debug("restraint %s = %s", restraint.get_name(), val)
412  return val
413 
415  """
416  Sets the native model for benchmark, by reading the native
417  structure and set the rigid bodies.
418  """
419  self.measure_models = True
420  self.native_model = IMP.Model()
421  if hasattr(params.benchmark, "fn_pdb_native"):
422  self.native_assembly = \
423  representation.create_assembly_from_pdb(self.native_model,
424  params.benchmark.fn_pdb_native, params.names)
425  elif hasattr(params.benchmark, "fn_pdbs_native"):
426  self.native_assembly = \
427  representation.create_assembly(self.native_model,
428  params.benchmark.fn_pdbs_native, params.names)
429  else:
430  raise ValueError("set_native_assembly_for_benchmark: Requested "
431  "to set a native assembly for benchmark but did not provide "
432  "its pdb, or the pdbs of the components")
433  self.native_rbs = representation.create_rigid_bodies(
434  self.native_assembly)
435  anchor_assembly(self.native_rbs, params.anchor)
436 
437  def set_rb_states(self, rb, transformations):
438  """
439  Add a set of reference frames as possible states for a rigid body
440  @param rb The rigid body
441  @param transformations Transformations are used to build the
442  reference frames for the rigid body.
443  """
444  log.info("Set rigid body states for %s", rb.get_name())
445  RFs = [alg.ReferenceFrame3D(T) for T in transformations]
446  for st in RFs:
447  log.debug("%s", st)
448  rb_states = domino.RigidBodyStates(RFs)
449  self.rb_states_table.set_particle_states(rb, rb_states)
450  st = self.rb_states_table.get_particle_states(rb)
451  log.info("RigidBodyStates created %s",
452  st.get_number_of_particle_states())
453 
455  """
456  Add a RestraintScoreSubsetFilterTable to DOMINO that allows to
457  reject assignments that have score worse than the thresholds for
458  the restraints
459  """
460  log.info("Adding RestraintScoreSubsetFilterTable")
461  # Restraint Cache is a centralized container of the restraints and
462  # their scores
463  self.restraint_cache = domino.RestraintCache(self.rb_states_table)
464  self.restraint_cache.add_restraints(self.restraints.values())
465  rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache)
466  rssft.set_name('myRestraintScoreSubsetFilterTable')
467  self.sampler.add_subset_filter_table(rssft)
468 
469  def set_assembly_components(self, fn_pdbs, names):
470  """
471  Sets the components of an assembly, each one given as a separate
472  PDB file.
473  @param fn_pdbs List with the names of the pdb files
474  @param names List with the names of the components of the assembly.
475  """
476  if len(fn_pdbs) != len(names):
477  raise ValueError("Each PDB file needs a name")
478  self.names = names
479  self.assembly = representation.create_assembly(self.model, fn_pdbs,
480  names)
481  self.components_rbs = representation.create_rigid_bodies(self.assembly)
482  self.not_optimized_rbs = []
483 
484  def set_assembly(self, fn_pdb, names):
485  """
486  Sets an assembly from a single PDB file. The function assumes that
487  the components of the assembly are the chains of the PDB file.
488  @param fn_pdb PDB with the file for the assembly
489  @param names Names for each of the components (chains)
490  of the assembly
491  """
492  self.assembly = representation.create_assembly_from_pdb(self.model,
493  fn_pdb,
494  names)
495  self.components_rbs = representation.create_rigid_bodies(self.assembly)
496  self.not_optimized_rbs = []
497 
499  """
500  BranchAndBoundAssignmentsTable enumerate states based on provided
501  filtered using the provided the subset filter tables
502  """
503  log.info("Setting BranchAndBoundAssignmentsTable")
504  fts = []
505  for i in range(self.sampler.get_number_of_subset_filter_tables()):
506  ft = self.sampler.get_subset_filter_table(i)
507  fts.append(ft)
508  at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts)
509  self.sampler.set_assignments_table(at)
510  self.assignments_table = at
511 
513  """
514  ExclusionSubsetFilterTable ensures that two particles are not given
515  the same state at the same time
516  """
517  log.info("Setting ExclusionSubsetFilterTable")
518  ft = domino.ExclusionSubsetFilterTable(self.rb_states_table)
519  self.sampler.add_subset_filter_table(ft)
520 
522  """
523  Creates a DOMINO sampler and adds the required filter tables
524  """
525  if self.merge_tree is None:
526  raise ValueError("Merge tree for DOMINO not set. It is required "
527  "to setup the sampler")
528  log.info("Domino sampler")
529  self.sampler = domino.DominoSampler(self.model, self.rb_states_table)
530  self.sampler.set_restraints(self.restraints.values())
531  self.sampler.set_log_level(IMP.TERSE)
532  self.sampler.set_merge_tree(self.merge_tree)
536 
537  def read_merge_tree(self, fn):
538  """
539  Read and create a merge tree from a file.
540  The format of the file is the format written by write merge_tree()
541  It does not matter the order of the indices in the file, as
542  they are sorted by this function.
543  The only condition is that the index for the vertex that is the
544  root of the tree MUST be the largest. This is guaranteed when
545  creating a merge tree with create_merge_tree()
546  @param fn File containing the merge tree
547  """
548  log.info("Reading merge tree from %s", fn)
549  ps = self.rb_states_table.get_particles()
550  log.debug("particles")
551  for p in ps:
552  log.debug("%s", p.get_name())
553  rows = csv_related.read_csv(fn, delimiter=field_delim)
554  for i in range(len(rows)):
555  row = rows[i]
556  rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]]
557  # Sort rows by vertice index
558  rows.sort()
559  subsets = []
560  # build subsets from the rows text
561  for row in rows:
562  names = row[3].split(unit_delim)
563  log.debug("row %s names %s", row, names)
564  # get particles in the subset
565  particles = []
566  for name in names:
567  l = [p for p in ps if p.get_name() == name]
568  if (len(l) > 1):
569  ValueError("More than one particle with same name" % names)
570  particles.extend(l)
571  s = domino.Subset(particles)
572  subset_names = [p.get_name() for p in particles]
573  log.debug("Merge tree Subset %s. Particles %s ", s, subset_names)
574  subsets.append(s)
575  # The vertex with the largest index is the root.
576  # trick: get the merge tree object from a tree with only one node ...
577  jt = domino.SubsetGraph()
578  jt.add_vertex(subsets[0])
579  mt = domino.get_merge_tree(jt)
580  # ... and add the rest of the vertices
581  for i in range(1, len(subsets)):
582  mt.add_vertex(subsets[i]) # the name of the vertex is a subset
583  # set edges
584  for row in rows:
585  vid = row[0]
586  child_left = row[1]
587  child_right = row[2]
588  if child_left != -1:
589  mt.add_edge(vid, child_left)
590  if child_right != -1:
591  mt.add_edge(vid, child_right)
592  self.merge_tree = mt
593  log.info("%s", self.merge_tree.show_graphviz())
594 
595  def create_merge_tree(self):
596  """
597  Creates a merge tree from the restraints that are set already
598  """
599  rs = self.restraints.values()
600  ig = domino.get_interaction_graph(rs, self.rb_states_table)
601 # pruned_dep = IMP.get_pruned_dependency_graph(self.model)
602 # IMP.show_graphviz(pruned_dep)
603 # IMP.show_graphviz(ig)
604  jt = domino.get_junction_tree(ig)
605 # IMP.show_graphviz(jt)
606  self.merge_tree = domino.get_balanced_merge_tree(jt)
607 # IMP.show_graphviz(self.merge_tree)
608  log.info("Balanced merge tree created")
609  log.info("%s", self.merge_tree.show_graphviz())
610 
611  def write_merge_tree(self, fn):
612  """
613  Writes the current merge tree to file. The format is:
614  parent | child_left | child_right | labels
615  @param fn File for storing the merge tree
616  """
617  log.info("Writing merge to %s", fn)
618  if self.merge_tree is None:
619  raise ValueError("No merge tree")
620  f = open(fn, "w")
621  f.write(
622  "# Vertex index | Child0 | Child1 | label (names of the particles)\n")
623  w = csv.writer(f, delimiter=field_delim)
624  root = self.merge_tree.get_vertices()[-1]
625  self.write_subset(root, w)
626  f.close()
627 
628  def write_subset(self, v, w):
629  """
630  Writes lines describing a subset (vertex of a merge tree)
631  @param w A csv.writer
632  @param v Vertex index
633  """
634  no_child_index = -1
635  subset = self.merge_tree.get_vertex_name(v)
636  names = [p.get_name() for p in subset]
637  reps = [x for x in names if names.count(x) > 1] # repeated names
638  if(len(reps)):
639  raise ValueError("The names of the "
640  "particles in the subset %s are not unique" % subset)
641  names = unit_delim.join(names)
642  neighbors = self.merge_tree.get_out_neighbors(v)
643  if len(neighbors) == 0: # A leaf
644  # Fill the container with the assignments for the leaf
645  w.writerow([v, no_child_index, no_child_index, names])
646  else:
647  if neighbors[0] > neighbors[1]:
648  neighbors = [neighbors[1], neighbors[0]]
649  w.writerow([v, neighbors[0], neighbors[1], names])
650  self.write_subset(neighbors[0], w)
651  self.write_subset(neighbors[1], w)
652 
653  def set_connectivity_restraint(self, names, rname,
654  distance=10,
655  weight=1.0, max_score=None, n_pairs=1,
656  stddev=2):
657  """
658  Set a connectivity restraint between the elements of the assembly
659  @param names List with all the elements to be connected
660  @param distance Maximum distance tolerated by the restraint
661  @param rname Name for the restraint
662  @param weight Weight for the restraint
663  @param max_score Maximum score for the restraint
664  @param n_pairs Number of pairs of particles used in the
665  KClosePairScore of the connectivity restraint
666  @param stddev Standard deviation used to approximate the
667  HarmonicUpperBound function to a Gaussian
668  """
669  components = []
670  for name in names:
671  c = representation.get_component(self.coarse_assembly, name)
672  components.append(c)
673  log.info("Connectivity restraint for %s", components)
674  spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev)
675  if max_score is None:
676  max_score = weight * spring_constant * n_pairs * (stddev) ** 2
677  r = restraints.get_connectivity_restraint(
678  components, distance, n_pairs,
679  spring_constant)
680  self.add_restraint(r, rname, weight, max_score)
681 
682  def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution,
683  n_projections, weight, max_score=False,
684  mode="fast", n_optimized=1):
685  """
686  Set a Restraint computing the em2d score.
687  @param name Name for the restraint
688  @param fn_images_sel Selection file with the names of the images
689  used for the restraint
690  @param pixel_size - pixel size in the images
691  @param resolution - resolution used to downsample the projections
692  of the model when projecting
693  @param weight Weight of the restraint
694  @param max_score - Maximum value tolerated for the restraint
695  @param n_projections - Number of projections to generate
696  when projecting the model to do the coarse alignments
697  @param mode - Mode used for computing the restraints.
698  @param n_optimized - number of results from the coarse
699  alignment that are refined with the Simplex algorithm
700  to get a more accurate value for the restraint
701  """
702  log.info("Adding a em2D restraint: %s", fn_images_sel)
703  restraint_params = em2d.Em2DRestraintParameters(pixel_size,
704  resolution,
705  n_projections)
706  r = restraints.get_em2d_restraint(self.assembly,
707  fn_images_sel,
708  restraint_params,
709  mode, n_optimized)
710  self.add_restraint(r, name, weight, max_score)
711 
712  def set_not_optimized(self, name):
713  """
714  Set a part of the model as not optimized (it does not move during
715  the model optimization)
716  @param name of the component to optimized
717  """
718  if name not in self.names:
719  raise ValueError("DominoModel: There is not component "
720  "in the assembly with this name")
721  rb_name = representation.get_rb_name(name)
722  self.not_optimized_rbs.append(rb_name)
723 
724  def set_close_pairs_excluded_volume_restraint(self, distance=10,
725  weight=1.0, ratio=0.05, stddev=2,
726  max_score=False):
727  """
728  Creates an excluded volume restraint between all the components
729  of the coarse assembly.See help for
730  @param distance Maximum distance tolerated between particles
731  @param weight Weight for the restraint
732  @param stddev
733  @param max_score Maximum value for the restraint. If the parameter
734  is None, an automatic value is computed (using the ratio).
735  @param ratio Fraction of the number of possible pairs of
736  particles that are tolerated to overlap. The maximum
737  score is modified according to this ratio.
738  I have found that ratios of 0.05-0.1 work well allowing for
739  some interpenetration
740  @param stddev Standard deviation used to approximate the
741  HarmonicUpperBound function to a Gaussian
742  """
743  k = core.Harmonic.get_k_from_standard_deviation(stddev)
744  for i, c1 in enumerate(self.coarse_assembly.get_children()):
745  for j, c2 in enumerate(self.coarse_assembly.get_children()):
746  if j > i:
747  name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name())
748 
749  ls1 = atom.get_leaves(c1)
750  ls2 = atom.get_leaves(c2)
751  possible_pairs = len(ls1) * len(ls2)
752  n_pairs = possible_pairs * ratio
753 
754  marker1 = IMP.Particle(
755  self.model, "marker1 " + name)
756  marker2 = IMP.Particle(
757  self.model, "marker2 " + name)
758  table_refiner = core.TableRefiner()
759  table_refiner.add_particle(marker1, ls1)
760  table_refiner.add_particle(marker2, ls2)
761  score = core.HarmonicLowerBound(distance, k)
762  pair_score = core.SphereDistancePairScore(score)
763  close_pair_score = core.ClosePairsPairScore(pair_score,
764  table_refiner,
765  distance)
766  r = core.PairRestraint(close_pair_score,
767  IMP.ParticlePair(marker1, marker2))
768 
769  if not max_score:
770  minimum_distance_allowed = 0
771  max_score = weight * n_pairs * \
772  score.evaluate(minimum_distance_allowed)
773  log.debug("Setting close_pairs_excluded_volume_restraint(), "
774  "max_score %s", max_score)
775  self.add_restraint(r, name, weight, max_score)
776 
777  def set_pair_score_restraint(self, name1, name2,
778  restraint_name, distance=10,
779  weight=1.0, n_pairs=1, stddev=2, max_score=None):
780  """
781  Set a pair_score restraint between the coarse representation
782  of two components
783  @param name1 Name of the first component
784  @param name2 Name of the second component
785  @param restraint_name Name for the restraint
786  @param distance Maximum distance tolerated between particles
787  @param weight Weight of the restraint
788  @param n_pairs
789  @param max_score Maximum value tolerated for the restraint
790  @param stddev Standard deviation used to approximate the
791  HarmonicUpperBound function to a Gaussian
792  """
793  table_refiner = core.TableRefiner()
794 
795  # The particles in A are attached to a marker particle via a refiner.
796  # When the refiner gets a request for marker1, it returns the attached
797  # particles
798  A = representation.get_component(self.coarse_assembly, name1)
799  marker1 = IMP.Particle(self.model, "marker1 " + restraint_name)
800  table_refiner.add_particle(marker1, atom.get_leaves(A))
801  # same for B
802  B = representation.get_component(self.coarse_assembly, name2)
803  marker2 = IMP.Particle(self.model, "marker2 " + restraint_name)
804  table_refiner.add_particle(marker2, atom.get_leaves(B))
805 
806  k = core.Harmonic.get_k_from_standard_deviation(stddev)
808  # The score is set for the n_pairs closest pairs of particles
809  pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs)
810  # When KClosePairsPairScore is called, the refiner will provide the
811  # particles for A and B
812  if not max_score:
813  # Build a maximum score based on the function type that is used,
814  # an HarmonicUpperBound
815  temp_score = core.HarmonicUpperBound(distance, k)
816  error_distance_allowed = 10
817  max_score = weight * n_pairs * \
818  temp_score.evaluate(distance + error_distance_allowed)
819 
820  log.info("Setting pair score restraint for %s %s. k = %s, max_score "
821  "= %s, stddev %s", name1, name2, k, max_score, stddev)
822  r = core.PairRestraint(
823  pair_score,
825  marker1,
826  marker2))
827  self.add_restraint(r, restraint_name, weight, max_score)
828 
829  def write_solutions_database(self, fn_database, max_number=None):
830  """
831  Write the results of the DOMINO sampling to a SQLite database.
832  @param fn_database
833  @param max_number Maximum number of results to write
834  """
835  log.info("Creating the database of solutions")
836  if self.measure_models and not hasattr(self, "native_model"):
837  raise ValueError("The native model has not been set")
839  db.create(fn_database, overwrite=True)
840  db.connect(fn_database)
841  subset = self.rb_states_table.get_subset()
842  restraints_names = self.get_restraints_names_used(subset)
843  db.add_results_table(restraints_names, self.measure_models)
844  n = len(self.solution_assignments)
845  if max_number is not None:
846  n = min(n, max_number)
847  for sol_id, assignment in enumerate(self.solution_assignments[0:n]):
848  txt = self.get_assignment_text(assignment, subset)
849  # load the positions of the components in the state
850  self.load_state(assignment)
851  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
852  scores, total_score = self.get_assignment_scores(
853  assignment, subset)
854  measures = None
855  if(self.measure_models):
856  measures = measure_model(self.assembly, self.native_assembly,
857  self.components_rbs, self.native_rbs)
858  db.add_record(sol_id, txt, RFs, total_score, scores, measures)
859  # Add native state if doing benchmarking
860  if self.measure_models:
861  assignment = "native"
862  RFs = [rb.get_reference_frame() for rb in self.native_rbs]
863  scores, total_score = self.get_restraints_for_native(
864  restraints_names)
865  db.add_native_record(assignment, RFs, total_score, scores)
866  db.save_records()
867  db.close()
868 
869  def get_restraints_names_used(self, subset):
870  """ Get the names of the restraints used for a subset
871  @param subset Subset of components of the assembly
872  """
873  return [r.get_name() for r
874  in self.restraint_cache.get_restraints(subset, [])]
875 
876  def get_restraints_for_native(self, restraints_names):
877  """
878  Get values of the restraints for the native structure
879  @param restraints_names Names of the restraints requested
880  @return a list with the values of all scores, and the total score
881  """
882  saved = [rb.get_reference_frame() for rb in self.components_rbs]
883  # Set the state of the model to native
884  for rb, rn in zip(self.components_rbs, self.native_rbs):
885  # remove sub-rigid bodies
886  rb_members = [m for m in rb.get_members()
887  if not core.RigidBody.get_is_setup(m.get_particle())]
888  rn_members = [m for m in rn.get_members()
889  if not core.RigidBody.get_is_setup(m.get_particle())]
890  rb_coords = [m.get_coordinates() for m in rb_members]
891  rn_coords = [m.get_coordinates() for m in rn_members]
892 
893  # align and put rb in the position of rn
894  if len(rn_coords) != len(rb_coords):
895  raise ValueError("Mismatch in the number of members. "
896  "Reference %d Aligned %d " % (len(rn_coords), len(rb_coords)))
897  T = alg.get_transformation_aligning_first_to_second(rb_coords,
898  rn_coords)
899  t = rb.get_reference_frame().get_transformation_to()
900  new_t = alg.compose(T, t)
901  rb.set_reference_frame(alg.ReferenceFrame3D(new_t))
902  scores = []
903  for name in restraints_names:
904  scores.append(self.restraints[name].evaluate(False))
905  total_score = sum(scores)
906  representation.set_reference_frames(self.components_rbs, saved)
907  return scores, total_score
908 
909  def write_pdb_for_assignment(self, assignment, fn_pdb):
910  """
911  Write the solution in the assignment
912  @param assignment Assignment class with the states for the subset
913  @param fn_pdb File to write the model
914  """
915  if not self.assignments_sampling_done:
916  raise ValueError("DominoModel: sampling not done")
917  log.info("Writting PDB %s for assignment %s", fn_pdb, assignment)
918  self.load_state(assignment)
919  atom.write_pdb(self.assembly, fn_pdb)
920 
921  def write_pdb_for_configuration(self, n, fn_pdb):
922  """
923  Write a file with a configuration for the model (configuration
924  here means a configuration in DOMINO)
925  @param n Index of the configuration desired.
926  @param fn_pdb
927  """
928  if not self.configuration_sampling_done:
929  raise ValueError("DominoModel: sampling not done")
930  log.debug("Writting PDB %s for configuration %s", fn_pdb, n)
931  configuration_set.load_configuration(n)
932  atom.write_pdb(self.assembly, fn_pdb)
933 
934  def write_pdb_for_reference_frames(self, RFs, fn_pdb):
935  """
936  Write a PDB file with a solution given by a set of reference frames
937  @param RFs Reference frames for the elements of the complex
938  @param fn_pdb File to write the solution
939  """
940  log.debug("Writting PDB %s for reference frames %s", fn_pdb, RFs)
941  representation.set_reference_frames(self.components_rbs, RFs)
942  atom.write_pdb(self.assembly, fn_pdb)
943 
944  def write_pdbs_for_reference_frames(self, RFs, fn_base):
945  """
946  Write a separate PDB for each of the elements
947  @param RFs Reference frames for the elements of the complex
948  @param fn_base base string to build the names of the PDBs files
949  """
950  log.debug("Writting PDBs with basename %s", fn_base)
951  representation.set_reference_frames(self.components_rbs, RFs)
952  for i, ch in enumerate(self.assembly.get_children()):
953  atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i)
954 
955  def write_pdb_for_component(self, component_index, ref, fn_pdb):
956  """
957  Write one component of the assembly
958  @param component_index Position of the component in the assembly
959  @param ref Reference frame of the component
960  @param fn_pdb Output PDB
961  """
962  ("Writting PDB %s for component %s", fn_pdb, component_index)
963  self.components_rbs[component_index].set_reference_frame(ref)
964  hierarchy_component = self.assembly.get_child(component_index)
965  atom.write_pdb(hierarchy_component, fn_pdb)
966 
968  """Get a ScoringFunction that includes all restraints."""
969  return IMP.core.RestraintsScoringFunction(self.restraints.values())
970 
971  def write_monte_carlo_solution(self, fn_database):
972  """
973  Write the solution of a MonteCarlo run
974  @param fn_database Database file with the solution.
975  The database will contain only one record
976  """
977  total_score = 0
978  rnames = []
979  scores = []
980  for r in self.restraints.values():
981  score = r.evaluate(False)
982  rnames.append(r.get_name())
983  scores.append(score)
984  total_score += score
986  db.create(fn_database, overwrite=True)
987  db.connect(fn_database)
988  db.add_results_table(rnames, self.measure_models)
989  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
990  measures = None
991  if(self.measure_models):
992  measures = measure_model(self.assembly, self.native_assembly,
993  self.components_rbs, self.native_rbs)
994  solution_id = 0
995  db.add_record(solution_id, "", RFs, total_score, scores, measures)
996  db.save_records()
997  db.close()
998 
999  def print_restraints_values(self):
1000  print("Restraints: Name, weight, value, maximum_value")
1001  total_score = 0
1002  for r in self.restraints.values():
1003  score = r.evaluate(False)
1004  # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(),
1005  # score, r.get_maximum_score())
1006  print("%20s %18f %18f" % (r.get_name(), r.get_weight(), score))
1007  total_score += score
1008  print("total_score:", total_score)
1009 
1010 
1011 def anchor_assembly(components_rbs, anchored):
1012  """
1013  "Anchor" a set of rigid bodies, by setting the position of one of them
1014  at the origin (0,0,0).
1015  @param components_rbs Rigid bodies of the components
1016  @param anchored - List of True/False values indicating if the components
1017  of the assembly are anchored. The function sets the FIRST anchored
1018  component in the (0,0,0) coordinate and moves the other
1019  components with the same translation. If all the values are False, the
1020  function does not do anything
1021  """
1022  if True in anchored:
1023  anchored_rb = None
1024  for a, rb in zip(anchored, components_rbs):
1025  if a:
1026  anchored_rb = rb
1027  break
1028  center = anchored_rb.get_coordinates()
1029  log.info("Anchoring assembly at (0,0,0)")
1030  for rb in components_rbs:
1031  T = rb.get_reference_frame().get_transformation_to()
1032  t = T.get_translation()
1033  R = T.get_rotation()
1034  log.debug("%s: Rerefence frame BEFORE %s",
1035  rb.get_name(), rb.get_reference_frame())
1036  T2 = alg.Transformation3D(R, t - center)
1037  rb.set_reference_frame(alg.ReferenceFrame3D(T2))
1038  log.debug("%s Rerefence frame AFTER %s",
1039  rb.get_name(), rb.get_reference_frame())
1040 
1041 
1042 def measure_model(assembly, native_assembly, rbs, native_rbs):
1043  """
1044  Get the drms, cdrms and crmsd for a model
1045  @param assembly Hierarchy for an assembly
1046  @param native_assembly Hierarchy of the native structure
1047  @param rbs Rigid bodies for the components of the assembly
1048  @param native_rbs Rigid bodies for the components of the native
1049  structure
1050  @return cdrms, cdrms, crmsd
1051  - drms - typical drms measure
1052  - cdrms - drms for the centroids of the rigid bodies of the components
1053  of the assembly
1054  - crmsd - rmsd for the centroids
1055  """
1056  log.debug("Measure model")
1057  drms = comparisons.get_drms_for_backbone(assembly, native_assembly)
1058  RFs = [rb.get_reference_frame() for rb in rbs]
1059  vs = [r.get_transformation_to().get_translation() for r in RFs]
1060  nRFs = [rb.get_reference_frame() for rb in native_rbs]
1061  nvs = [r.get_transformation_to().get_translation() for r in nRFs]
1062  cdrms = atom.get_drms(vs, nvs)
1063  crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs)
1064  log.debug("drms %s cdrms %s crmsd %s", drms, cdrms, crmsd)
1065  return [drms, cdrms, crmsd]
1066 
1067 
1068 def get_coordinates(rigid_bodies):
1069  """
1070  Return a list of the coordinates of all the members of the rigid bodies
1071  """
1072  if len(rigid_bodies) == 0:
1073  raise ValueError(
1074  "get_coordinates: There are not rigid bodies to get coordinates from")
1075  coords = []
1076  for rb in rigid_bodies:
1077  # remove sub-rigid bodies
1078  rb_members = [m for m in rb.get_members()
1079  if not core.RigidBody.get_is_setup(m.get_particle())]
1080  coords.extend([m.get_coordinates() for m in rb_members])
1081  return coords
def write_pdb_for_component
Write one component of the assembly.
def get_assignment_and_RFs
Return a line with texts for an assignment and the the reference frames of the RigidBodies in the sub...
Utility functions to handle restraints.
Definition: restraints.py:1
A harmonic upper bound on the distance between two spheres.
Lower bound harmonic function (non-zero when feature < mean)
Compute the complementarity between two molecules.
def set_rb_states
Add a set of reference frames as possible states for a rigid body.
def add_exclusion_filter_table
ExclusionSubsetFilterTable ensures that two particles are not given the same state at the same time...
Restraints using electron microscopy 2D images (class averages).
def get_restraints_names_used
Get the names of the restraints used for a subset.
def write_solutions_database
Write the results of the DOMINO sampling to a SQLite database.
Various classes to hold sets of particles.
Upper bound harmonic function (non-zero when feature > mean)
A class to store an fixed array of same-typed values.
Definition: Array.h:33
def get_assignment_scores
Returns a list with the values for the restraints on a subset of the components of the assembly...
Utility functions to handle representation.
def anchor_assembly
"Anchor" a set of rigid bodies, by setting the position of one of them at the origin (0...
Utility functions to handle alignments.
Definition: alignments.py:1
Sample best solutions using Domino.
Definition: DominoSampler.h:32
def set_close_pairs_excluded_volume_restraint
Creates an excluded volume restraint between all the components of the coarse assembly.See help for.
def set_xlink_restraint
Set a restraint on the maximum distance between 2 residues.
def get_assignment_text
Get the formatted text for an assignment.
def load_state
Load the positions of the components given by the assignment.
Filter a configuration of the subset using the Model thresholds.
Represent a subset of the particles being optimized.
Definition: Subset.h:33
def get_coordinates
Return a list of the coordinates of all the members of the rigid bodies.
Utility functions to handle IO.
Definition: io.py:1
A score on the distance between the surfaces of two spheres.
def create_merge_tree
Creates a merge tree from the restraints that are set already.
Management of a model using DOMINO.
Definition: domino_model.py:37
def get_restraint_value_for_assignment
Recover the value of a restraint.
def write_merge_tree
Writes the current merge tree to file.
def set_connectivity_restraint
Set a connectivity restraint between the elements of the assembly.
def write_pdbs_for_reference_frames
Write a separate PDB for each of the elements.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
def write_pdb_for_reference_frames
Write a PDB file with a solution given by a set of reference frames.
Do not allow two particles to be in the same state.
def create_coarse_assembly
Simplify the assembly with a coarse representation.
def write_subset
Writes lines describing a subset (vertex of a merge tree)
def get_restraints_for_native
Get values of the restraints for the native structure.
Utility functions to store and retrieve solution information.
Definition: solutions_io.py:1
A lookup based particle refiner.
Definition: TableRefiner.h:21
def set_native_assembly_for_benchmark
Sets the native model for benchmark, by reading the native structure and set the rigid bodies...
def get_scoring_function
Get a ScoringFunction that includes all restraints.
def do_sampling
Do Domino sampling.
def write_pdb_for_configuration
Write a file with a configuration for the model (configuration here means a configuration in DOMINO) ...
def set_complementarity_restraint
Set a restraint for geometric complementarity between 2 components.
def measure_model
Get the drms, cdrms and crmsd for a model.
Fitting atomic structures into a cryo-electron microscopy density map.
Class for managing the results of the experiments.
def set_assembly
Sets an assembly from a single PDB file.
def get_assignment_info
Info related to an assignment.
def read_merge_tree
Read and create a merge tree from a file.
def setup_domino_sampler
Creates a DOMINO sampler and adds the required filter tables.
def align_rigid_bodies_states
Aligns the set of structures considered for DOMINO sampling.
Definition: domino_model.py:78
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Parameters used by Em2DRestraint and ProjectionFinder.
def get_assignments_with_heap
Domino sampling that recovers the assignments for the root of the merge tree, but conserving only the...
def set_pair_score_restraint
Set a pair_score restraint between the coarse representation of two components.
Utility functions for comparisons.
Definition: comparisons.py:1
Class to handle individual model particles.
Definition: Particle.h:37
def set_em2d_restraint
Set a Restraint computing the em2d score.
def add_branch_and_bound_assignments_table
BranchAndBoundAssignmentsTable enumerate states based on provided filtered using the provided the sub...
def set_not_optimized
Set a part of the model as not optimized (it does not move during the model optimization) ...
def add_restraint
Adds a restraint to the model.
Definition: domino_model.py:58
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
Output IMP model data in various file formats.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
def write_pdb_for_assignment
Write the solution in the assignment.
Divide-and-conquer inferential optimization in discrete space.
def set_assembly_components
Sets the components of an assembly, each one given as a separate PDB file.
def add_restraint_score_filter_table
Add a RestraintScoreSubsetFilterTable to DOMINO that allows to reject assignments that have score wor...
def write_monte_carlo_solution
Write the solution of a MonteCarlo run.