IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
domino_model.py
1 """@namespace IMP.EMageFit.domino_model
2  Classes to manage a model using DOMINO.
3 """
4 
5 import time
6 import csv
7 import logging
8 
9 import IMP
10 import IMP.domino as domino
11 import IMP.core as core
12 import IMP.atom as atom
13 import IMP.algebra as alg
14 import IMP.em2d as em2d
15 import IMP.multifit as multifit
16 
17 import IMP.EMageFit.imp_general.comparisons as comparisons
18 import IMP.EMageFit.imp_general.alignments as alignments
19 import IMP.EMageFit.imp_general.io as io
20 import IMP.EMageFit.imp_general.representation as representation
21 
22 import IMP.EMageFit.restraints as restraints
23 import IMP.EMageFit.buildxlinks as buildxlinks
24 import IMP.EMageFit.solutions_io as solutions_io
25 import IMP.EMageFit.csv_related as csv_related
26 
27 log = logging.getLogger("DominoModel")
28 
29 # separate fields in the output text file
30 field_delim = ","
31 
32 # separate units within a field (eg, reference frames).
33 # It is used in output text files and in database files
34 unit_delim = "/"
35 
36 
38 
39  """
40  Management of a model using DOMINO
41  """
42 
43  def __init__(self, name="my model"):
44  self.model = IMP.Model()
45  self.model.set_name(name)
46  self.configuration_sampling_done = False
47  self.assignments_sampling_done = False
48  self.rb_states_table = domino.ParticleStatesTable()
49  self.merge_tree = None
50  # measure_models is True only when doing benchmark
51  self.measure_models = False
52  # The first time that create_coarse is found and its value is True
53  # a coarse version of the assembly is built
54  self.create_coarse = True
55  self.restraints = dict()
56  self.xlinks_dict = buildxlinks.XlinksDict()
57 
58  def add_restraint(self, r, name, weight, max_score=False):
59  """
60  Adds a restraint to the model
61  @param r An IMP.Restraint object
62  @param name Name for the restraint
63  @param weight Weight for the restraint
64  @param max_score Maximum score allowed for the restraint. If
65  max_score is False, there is no limit for the value of
66  the restraint
67  """
68  log.info("Adding restraint %s weight %s max_score %s",
69  name, weight, max_score)
70  if "-" in name:
71  raise ValueError("SQL database does not accept restraint names "
72  "containing -")
73  r.set_name(name)
74  if max_score is not None and max_score:
75  r.set_maximum_score(max_score)
76  r.set_weight(weight)
77  self.restraints[name] = r
78 
79  def align_rigid_bodies_states(self):
80  """
81  Aligns the set of structures considered for DOMINO sampling.
82  The function:
83  1) reads the possible reference frames that the
84  rb_states_table stores for each rigid body. This table
85  must be filled before using this function. Usually it is
86  filled with the results from a previous Monte Carlo
87  sampling. If the solutions from Monte Carlo seem to have
88  the same structure but they are not aligned to each other,
89  this function can help setting better starting positions
90  to use with DOMINO.
91  2) Gets the first state for each of the rigid bodies and sets
92  a reference structure using such states.
93  3) Aligns all the rest of the structures respect to the
94  reference
95  4) Replaces the values of the reference frames stored in the
96  rb_states_table with the new values obtained from the
97  alignments. It does it for all states of a rigid body.
98  @note: If this function is applied, the parameters "anchor"
99  and "fixed" are ignored, as they are superseded by the use
100  of the alignments calculated here.
101  """
102  log.debug("Align the configurations read from the database before "
103  "running DOMINO")
104  rb_states = []
105  n_states = []
106  for rb in self.components_rbs:
107  ps = self.rb_states_table.get_particle_states(rb)
108  n = ps.get_number_of_particle_states()
109  if n == 0:
110  raise ValueError(
111  "There are no particle states for %s" %
112  rb.get_name())
113  n_states.append(n)
114  rb_states.append(ps)
115  # coordinates of the first configuration (there is at least one for all
116  # the rbs)
117  for rb, states in zip(self.components_rbs, rb_states):
118  states.load_particle_state(0, rb)
119  reference_coords = get_coordinates(self.components_rbs)
120  aligned_transformations = [[] for rb in self.components_rbs]
121  for i in range(1, max(n_states)):
122  log.debug("Aligning rigid bodies configuration %s" % i)
123  # load the configuration
124  for j in range(len(self.components_rbs)):
125  # if there are no more particle states for this rigid body, use
126  # the last
127  state_index = min(i, n_states[j] - 1)
128  rb_states[j].load_particle_state(
129  state_index,
130  self.components_rbs[j])
131  coords = get_coordinates(self.components_rbs)
132  T = alg.get_transformation_aligning_first_to_second(
133  coords, reference_coords)
134  for j, rb in enumerate(self.components_rbs):
135  t = rb.get_reference_frame().get_transformation_to()
136  new_t = alg.compose(T, t)
137  aligned_transformations[j].append(new_t)
138  # set the new possible transformations
139  for i, rb in enumerate(self.components_rbs):
140  self.set_rb_states(rb, aligned_transformations[i])
141 
142  def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2,
143  distance, weight, stddev, max_score=False):
144  """
145  Set a restraint on the maximum distance between 2 residues
146  @param id1 Name of the first component
147  @param chain1
148  @param residue1 Residue number for the aminoacid in the first
149  component.The number is the number in the PDB file, not the
150  number relative to the beginning of the chain
151  @param id2 Name of the second component
152  @param chain2
153  @param residue2 Residue number for the aminoacid in the second
154  component.
155  @param distance Maximum distance tolerated
156  @param weight Weight of the restraint
157  @param stddev Standard deviation used to approximate the
158  HarmonicUpperBound function to a Gaussian
159  @param max_score See help for add_restraint(). If none is given,
160  the maximum score is set to allow a maximum distance of 10 Angstrom
161  greater than the parameter "distance".
162  """
163  xlink = buildxlinks.Xlink(
164  id1,
165  chain1,
166  residue1,
167  id2,
168  chain2,
169  residue2,
170  distance)
171  log.info("Setting cross-linking restraint ")
172  log.info("%s", xlink.show())
173  self.xlinks_dict.add(xlink)
174  # setup restraint
175  A = representation.get_component(self.assembly, xlink.first_id)
176  s1 = IMP.atom.Selection(A, chain=xlink.first_chain,
177  residue_index=xlink.first_residue)
178  p1 = s1.get_selected_particles()[0]
179  B = representation.get_component(self.assembly, xlink.second_id)
180  s2 = IMP.atom.Selection(B, chain=xlink.second_chain,
181  residue_index=xlink.second_residue)
182  p2 = s2.get_selected_particles()[0]
183  k = core.Harmonic.get_k_from_standard_deviation(stddev)
184  score = core.HarmonicUpperBound(xlink.distance, k)
185  pair_score = IMP.core.DistancePairScore(score)
186  r = IMP.core.PairRestraint(self.model, pair_score, (p1, p2))
187  if not max_score:
188  error_distance_allowed = 100
189  max_score = weight * \
190  score.evaluate(distance + error_distance_allowed)
191  log.info("%s, max_score %s", xlink.show(), max_score)
192  self.add_restraint(r, xlink.get_name(), weight, max_score)
193 
194  def set_complementarity_restraint(self, name1, name2, rname,
195  max_sep_distance, max_penetration,
196  weight, max_score=1e10):
197  """
198  Set a restraint for geometric complementarity between 2 components
199  @param name1 name of
200  @param name2 - The restraint is applied to this components
201  @param rname - The name given to the restraint
202  @param max_sep_distance - maximum distance between molecules
203  tolerated by the restraint
204  @param max_penetration - Maximum penetration allowed (angstrom)
205  @param weight
206  @param max_score
207  """
208  log.info("Setting geometric complementarity restraint %s: %s - %s",
209  rname, name1, name2)
210  A = representation.get_component(self.assembly, name1)
211  B = representation.get_component(self.assembly, name2)
213  atom.get_leaves(A), atom.get_leaves(B), rname)
214  restraint.set_maximum_separation(max_sep_distance)
215  restraint.set_maximum_penetration(max_penetration)
216  log.debug("Maximum separation: %s Maximum penetration score: %s",
217  max_sep_distance, max_penetration)
218  self.add_restraint(restraint, rname, weight, max_score)
219 
220  def create_coarse_assembly(self, n_residues):
221  """
222  Simplify the assembly with a coarse representation
223  @param n_residues Number of residues used for a coarse particle
224  """
225  if self.create_coarse:
226  log.info("Creating simplified assembly")
227  self.coarse_assembly = representation.create_simplified_assembly(
228  self.assembly, self.components_rbs, n_residues)
229  self.create_coarse = False
230 
231  def do_sampling(self, mode="assignments_heap_container", params=None):
232  """
233  Do Domino sampling
234  @param mode The mode used to recover solutions from domino.
235  It can have
236  the value "configuration", "assignments", or
237  "assignments_heap_container". The mode
238  "configuration" recovers all possible configurations
239  (takes a while to generate them).
240  The mode "assignments" only provides the assignments
241  (state numbers) of the solutions. It is faster but requires
242  generating the solutions afterwards
243  The mode "assignments_heap_container" selects the best solutions
244  after each merging in DOMINO, discarding the rest.
245  In practice I used the mode "assignments_heap_container"
246  @param params
247  """
248  t0 = time.time()
249  if mode == "configuration":
250  self.configuration_set = self.sampler.create_sample()
251  tf = time.time()
252  log.info("found %s configurations. Time %s",
253  self.configuration_set.get_number_of_configurations(),
254  tf - t0)
255  self.configuration_sampling_done = True
256  elif mode == "assignments":
257  subset = self.rb_states_table.get_subset()
258  log.info(
259  "Do sampling with assignments. Subset has %s elements: %s",
260  len(subset), subset)
261  self.solution_assignments = \
262  self.sampler.get_sample_assignments(subset)
263  tf = time.time()
264  log.info("found %s assignments. Time %s",
265  len(self.solution_assignments), tf - t0)
266  self.assignments_sampling_done = True
267  elif mode == "assignments_heap_container":
268  subset = self.rb_states_table.get_subset()
269  log.info("DOMINO sampling an assignments_heap_container. "
270  "Subset has %s elements: %s", len(subset), subset)
271  # last vertex is the root of the merge tree
272  root = self.merge_tree.get_vertices()[-1]
273  container = self.get_assignments_with_heap(root,
274  params.heap_solutions)
275  self.solution_assignments = container.get_assignments()
276  tf = time.time()
277  log.info("found %s assignments. Time %s",
278  len(self.solution_assignments), tf - t0)
279  self.assignments_sampling_done = True
280  else:
281  raise ValueError("Requested wrong mode for the DOMINO sampling")
282 
283  def get_assignment_text(self, assignment, subset):
284  """
285  Get the formatted text for an assignment
286  @param subset Subset of components of the assembly
287  @param assignment Assignment class with the states for the subset
288  @note: The order in assignment and subset is not always the order
289  of the rigid bodies in self.components_rbs. The function reorders
290  the terms in the assignment so there is correspondence.
291  """
292  ordered_assignment = []
293  for rb in self.components_rbs:
294  for i, particle in enumerate(subset):
295  if rb.get_particle() == particle:
296  ordered_assignment.append(assignment[i])
297  text = "|".join([str(i) for i in ordered_assignment])
298  return text
299 
300  def get_assignment_and_RFs(self, assignment, subset):
301  """
302  Return a line with texts for an assignment and the
303  the reference frames of the RigidBodies in the subset.
304  @param subset Subset of components of the assembly
305  @param assignment Assignment class with the states for the subset
306  @note: see the get_assignment_assignment_text() note.
307  """
308  ordered_assignment = []
309  RFs = []
310  for rb in self.components_rbs:
311  for i, particle in enumerate(subset):
312  if rb.get_particle() == particle:
313  j = assignment[i]
314  ordered_assignment.append(j)
315  pstates = self.rb_states_table.get_particle_states(rb)
316  pstates.load_particle_state(j, rb.get_particle())
317  RFs.append(rb.get_reference_frame())
318  RFs_text = unit_delim.join(
319  [io.ReferenceFrameToText(ref).get_text() for ref in RFs])
320  assignment_numbers = "|".join([str(i) for i in ordered_assignment])
321  return [assignment_numbers, RFs_text]
322 
323  def get_assignment_info(self, assignment, subset):
324  """
325  Info related to an assignment. Returns a list with text for the
326  assignment and all the scores for the restraints
327  @param subset Subset of components of the assembly
328  @param assignment Assignment class with the states for the subset
329  """
330  text = self.get_assignment_and_RFs(assignment, subset)
331  scores, total_score = self.get_assignment_scores(assignment, subset)
332  info = text + [total_score] + scores
333  return info
334 
335  def get_assignment_scores(self, assignment, subset):
336  """
337  Returns a list with the values for the restraints on a subset of
338  the components of the assembly
339  @param subset Subset of components of the assembly
340  @param assignment Assignment class with the states for the subset
341  """
342  restraints_to_evaluate = \
343  self.restraint_cache.get_restraints(subset, [])
344  scores = [self.restraint_cache.get_score(r, subset, assignment)
345  for r in restraints_to_evaluate]
346  total_score = sum(scores)
347  return scores, total_score
348 
349  def load_state(self, assignment):
350  """
351  Load the positions of the components given by the assignment
352  @param assignment Assignment class
353  """
354  subset = self.rb_states_table.get_subset()
355  domino.load_particle_states(subset, assignment, self.rb_states_table)
356 
357  def get_assignments_with_heap(self, vertex, k=0):
358  """
359  Domino sampling that recovers the assignments for the root of the
360  merge tree, but
361  conserving only the best k scores for each vertex of the tree.
362  @param[in] vertex Vertex with the root of the current
363  merge tree. This function is recursive.
364  @param[in] k
365  """
366  if self.sampler.get_number_of_subset_filter_tables() == 0:
367  raise ValueError("No subset filter tables")
368  if self.merge_tree is None:
369  raise ValueError("No merge tree")
370  # In the merge tree, the names of the vertices are the subsets.
371  # The type of the vertex name is a domino.Subset
372  subset = self.merge_tree.get_vertex_name(vertex)
373  log.info("computing assignments for vertex %s", subset)
374  t0 = time.time()
375  assignments_container = domino.HeapAssignmentContainer(
376  subset, k, self.restraint_cache, "my_heap_assignments_container")
377  neighbors = self.merge_tree.get_out_neighbors(vertex)
378  if len(neighbors) == 0: # A leaf
379  # Fill the container with the assignments for the leaf
380  self.sampler.load_vertex_assignments(vertex, assignments_container)
381  else:
382  if neighbors[0] > neighbors[1]:
383  # get_vertex_assignments() methods in domino
384  # expects the children in sorted order
385  neighbors = [neighbors[1], neighbors[0]]
386  # recurse on the two children
387  container_child0 = self.get_assignments_with_heap(neighbors[0], k)
388  container_child1 = self.get_assignments_with_heap(neighbors[1], k)
389  self.sampler.load_vertex_assignments(vertex,
390  container_child0,
391  container_child1,
392  assignments_container)
393  tf = time.time() - t0
394  log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset,
395  assignments_container.get_number_of_assignments(), tf)
396  return assignments_container
397 
398  def get_restraint_value_for_assignment(self, assignment, name):
399  """
400  Recover the value of a restraint
401  @param name of the restraint
402  @param assignment Assignment class containing the assignment for
403  the solution desired
404  """
405  if not self.assignments_sampling_done:
406  raise ValueError("DominoModel: sampling not done")
407  log.debug("Computing restraint value for assignment %s", assignment)
408  self.load_state(assignment)
409  for rb in self.components_rbs:
410  log.debug("rb - %s",
411  rb.get_reference_frame().get_transformation_to())
412  val = self.restraints[name].evaluate(False)
413  log.debug("restraint %s = %s", name, val)
414  return val
415 
417  """
418  Sets the native model for benchmark, by reading the native
419  structure and set the rigid bodies.
420  """
421  self.measure_models = True
422  self.native_model = IMP.Model()
423  if hasattr(params.benchmark, "fn_pdb_native"):
424  self.native_assembly = representation.create_assembly_from_pdb(
425  self.native_model, params.benchmark.fn_pdb_native,
426  params.names)
427  elif hasattr(params.benchmark, "fn_pdbs_native"):
428  self.native_assembly = representation.create_assembly(
429  self.native_model, params.benchmark.fn_pdbs_native,
430  params.names)
431  else:
432  raise ValueError(
433  "set_native_assembly_for_benchmark: Requested "
434  "to set a native assembly for benchmark but did not provide "
435  "its pdb, or the pdbs of the components")
436  self.native_rbs = representation.create_rigid_bodies(
437  self.native_assembly)
438  anchor_assembly(self.native_rbs, params.anchor)
439 
440  def set_rb_states(self, rb, transformations):
441  """
442  Add a set of reference frames as possible states for a rigid body
443  @param rb The rigid body
444  @param transformations Transformations are used to build the
445  reference frames for the rigid body.
446  """
447  log.info("Set rigid body states for %s", rb.get_name())
448  RFs = [alg.ReferenceFrame3D(T) for T in transformations]
449  for st in RFs:
450  log.debug("%s", st)
451  rb_states = domino.RigidBodyStates(RFs)
452  self.rb_states_table.set_particle_states(rb, rb_states)
453  st = self.rb_states_table.get_particle_states(rb)
454  log.info("RigidBodyStates created %s",
455  st.get_number_of_particle_states())
456 
458  """
459  Add a RestraintScoreSubsetFilterTable to DOMINO that allows to
460  reject assignments that have score worse than the thresholds for
461  the restraints
462  """
463  log.info("Adding RestraintScoreSubsetFilterTable")
464  # Restraint Cache is a centralized container of the restraints and
465  # their scores
466  self.restraint_cache = domino.RestraintCache(self.rb_states_table)
467  self.restraint_cache.add_restraints(list(self.restraints.values()))
468  rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache)
469  rssft.set_name('myRestraintScoreSubsetFilterTable')
470  self.sampler.add_subset_filter_table(rssft)
471 
472  def set_assembly_components(self, fn_pdbs, names):
473  """
474  Sets the components of an assembly, each one given as a separate
475  PDB file.
476  @param fn_pdbs List with the names of the pdb files
477  @param names List with the names of the components of the assembly.
478  """
479  if len(fn_pdbs) != len(names):
480  raise ValueError("Each PDB file needs a name")
481  self.names = names
482  self.assembly = representation.create_assembly(self.model, fn_pdbs,
483  names)
484  self.components_rbs = representation.create_rigid_bodies(self.assembly)
485  self.not_optimized_rbs = []
486 
487  def set_assembly(self, fn_pdb, names):
488  """
489  Sets an assembly from a single PDB file. The function assumes that
490  the components of the assembly are the chains of the PDB file.
491  @param fn_pdb PDB with the file for the assembly
492  @param names Names for each of the components (chains)
493  of the assembly
494  """
495  self.assembly = representation.create_assembly_from_pdb(self.model,
496  fn_pdb,
497  names)
498  self.components_rbs = representation.create_rigid_bodies(self.assembly)
499  self.not_optimized_rbs = []
500 
502  """
503  BranchAndBoundAssignmentsTable enumerate states based on provided
504  filtered using the provided the subset filter tables
505  """
506  log.info("Setting BranchAndBoundAssignmentsTable")
507  fts = []
508  for i in range(self.sampler.get_number_of_subset_filter_tables()):
509  ft = self.sampler.get_subset_filter_table(i)
510  fts.append(ft)
511  at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts)
512  self.sampler.set_assignments_table(at)
513  self.assignments_table = at
514 
516  """
517  ExclusionSubsetFilterTable ensures that two particles are not given
518  the same state at the same time
519  """
520  log.info("Setting ExclusionSubsetFilterTable")
521  ft = domino.ExclusionSubsetFilterTable(self.rb_states_table)
522  self.sampler.add_subset_filter_table(ft)
523 
525  """
526  Creates a DOMINO sampler and adds the required filter tables
527  """
528  if self.merge_tree is None:
529  raise ValueError("Merge tree for DOMINO not set. It is required "
530  "to setup the sampler")
531  log.info("Domino sampler")
532  self.sampler = domino.DominoSampler(self.model, self.rb_states_table)
533  self.sampler.set_restraints(list(self.restraints.values()))
534  self.sampler.set_log_level(IMP.TERSE)
535  self.sampler.set_merge_tree(self.merge_tree)
539 
540  def read_merge_tree(self, fn):
541  """
542  Read and create a merge tree from a file.
543  The format of the file is the format written by write merge_tree()
544  It does not matter the order of the indices in the file, as
545  they are sorted by this function.
546  The only condition is that the index for the vertex that is the
547  root of the tree MUST be the largest. This is guaranteed when
548  creating a merge tree with create_merge_tree()
549  @param fn File containing the merge tree
550  """
551  log.info("Reading merge tree from %s", fn)
552  ps = self.rb_states_table.get_particles()
553  log.debug("particles")
554  for p in ps:
555  log.debug("%s", p.get_name())
556  rows = csv_related.read_csv(fn, delimiter=field_delim)
557  for i in range(len(rows)):
558  row = rows[i]
559  rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]]
560  # Sort rows by vertice index
561  rows.sort()
562  subsets = []
563  # build subsets from the rows text
564  for row in rows:
565  names = row[3].split(unit_delim)
566  log.debug("row %s names %s", row, names)
567  # get particles in the subset
568  particles = []
569  for name in names:
570  lp = [p for p in ps if p.get_name() == name]
571  if (len(lp) > 1):
572  ValueError("More than one particle with same name" % names)
573  particles.extend(lp)
574  s = domino.Subset(particles)
575  subset_names = [p.get_name() for p in particles]
576  log.debug("Merge tree Subset %s. Particles %s ", s, subset_names)
577  subsets.append(s)
578  # The vertex with the largest index is the root.
579  # trick: get the merge tree object from a tree with only one node ...
580  jt = domino.SubsetGraph()
581  jt.add_vertex(subsets[0])
582  mt = domino.get_merge_tree(jt)
583  # ... and add the rest of the vertices
584  for i in range(1, len(subsets)):
585  mt.add_vertex(subsets[i]) # the name of the vertex is a subset
586  # set edges
587  for row in rows:
588  vid = row[0]
589  child_left = row[1]
590  child_right = row[2]
591  if child_left != -1:
592  mt.add_edge(vid, child_left)
593  if child_right != -1:
594  mt.add_edge(vid, child_right)
595  self.merge_tree = mt
596  log.info("%s", self.merge_tree.show_graphviz())
597 
598  def create_merge_tree(self):
599  """
600  Creates a merge tree from the restraints that are set already
601  """
602  rs = list(self.restraints.values())
603  ig = domino.get_interaction_graph(rs, self.rb_states_table)
604 # pruned_dep = IMP.get_pruned_dependency_graph(self.model)
605 # IMP.show_graphviz(pruned_dep)
606 # IMP.show_graphviz(ig)
607  jt = domino.get_junction_tree(ig)
608 # IMP.show_graphviz(jt)
609  self.merge_tree = domino.get_balanced_merge_tree(jt)
610 # IMP.show_graphviz(self.merge_tree)
611  log.info("Balanced merge tree created")
612  log.info("%s", self.merge_tree.show_graphviz())
613 
614  def write_merge_tree(self, fn):
615  """
616  Writes the current merge tree to file. The format is:
617  parent | child_left | child_right | labels
618  @param fn File for storing the merge tree
619  """
620  log.info("Writing merge to %s", fn)
621  if self.merge_tree is None:
622  raise ValueError("No merge tree")
623  f = open(fn, "w")
624  f.write(
625  "# Vertex index | Child0 | Child1 | label "
626  "(names of the particles)\n")
627  w = csv.writer(f, delimiter=field_delim)
628  root = self.merge_tree.get_vertices()[-1]
629  self.write_subset(root, w)
630  f.close()
631 
632  def write_subset(self, v, w):
633  """
634  Writes lines describing a subset (vertex of a merge tree)
635  @param w A csv.writer
636  @param v Vertex index
637  """
638  no_child_index = -1
639  subset = self.merge_tree.get_vertex_name(v)
640  names = [p.get_name() for p in subset]
641  reps = [x for x in names if names.count(x) > 1] # repeated names
642  if len(reps):
643  raise ValueError(
644  "The names of the particles in the subset %s are not unique"
645  % subset)
646  names = unit_delim.join(names)
647  neighbors = self.merge_tree.get_out_neighbors(v)
648  if len(neighbors) == 0: # A leaf
649  # Fill the container with the assignments for the leaf
650  w.writerow([v, no_child_index, no_child_index, names])
651  else:
652  if neighbors[0] > neighbors[1]:
653  neighbors = [neighbors[1], neighbors[0]]
654  w.writerow([v, neighbors[0], neighbors[1], names])
655  self.write_subset(neighbors[0], w)
656  self.write_subset(neighbors[1], w)
657 
658  def set_connectivity_restraint(self, names, rname,
659  distance=10,
660  weight=1.0, max_score=None, n_pairs=1,
661  stddev=2):
662  """
663  Set a connectivity restraint between the elements of the assembly
664  @param names List with all the elements to be connected
665  @param distance Maximum distance tolerated by the restraint
666  @param rname Name for the restraint
667  @param weight Weight for the restraint
668  @param max_score Maximum score for the restraint
669  @param n_pairs Number of pairs of particles used in the
670  KClosePairScore of the connectivity restraint
671  @param stddev Standard deviation used to approximate the
672  HarmonicUpperBound function to a Gaussian
673  """
674  components = []
675  for name in names:
676  c = representation.get_component(self.coarse_assembly, name)
677  components.append(c)
678  log.info("Connectivity restraint for %s", components)
679  spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev)
680  if max_score is None:
681  max_score = weight * spring_constant * n_pairs * (stddev) ** 2
682  r = restraints.get_connectivity_restraint(
683  components, distance, n_pairs,
684  spring_constant)
685  self.add_restraint(r, rname, weight, max_score)
686 
687  def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution,
688  n_projections, weight, max_score=False,
689  mode="fast", n_optimized=1):
690  """
691  Set a Restraint computing the em2d score.
692  @param name Name for the restraint
693  @param fn_images_sel Selection file with the names of the images
694  used for the restraint
695  @param pixel_size - pixel size in the images
696  @param resolution - resolution used to downsample the projections
697  of the model when projecting
698  @param weight Weight of the restraint
699  @param max_score - Maximum value tolerated for the restraint
700  @param n_projections - Number of projections to generate
701  when projecting the model to do the coarse alignments
702  @param mode - Mode used for computing the restraints.
703  @param n_optimized - number of results from the coarse
704  alignment that are refined with the Simplex algorithm
705  to get a more accurate value for the restraint
706  """
707  log.info("Adding a em2D restraint: %s", fn_images_sel)
708  restraint_params = em2d.Em2DRestraintParameters(pixel_size,
709  resolution,
710  n_projections)
711  r = restraints.get_em2d_restraint(self.assembly,
712  fn_images_sel,
713  restraint_params,
714  mode, n_optimized)
715  self.add_restraint(r, name, weight, max_score)
716 
717  def set_not_optimized(self, name):
718  """
719  Set a part of the model as not optimized (it does not move during
720  the model optimization)
721  @param name of the component to optimized
722  """
723  if name not in self.names:
724  raise ValueError("DominoModel: There is not component "
725  "in the assembly with this name")
726  rb_name = representation.get_rb_name(name)
727  self.not_optimized_rbs.append(rb_name)
728 
730  self, distance=10, weight=1.0, ratio=0.05, stddev=2,
731  max_score=False):
732  """
733  Creates an excluded volume restraint between all the components
734  of the coarse assembly.See help for
735  @param distance Maximum distance tolerated between particles
736  @param weight Weight for the restraint
737  @param stddev
738  @param max_score Maximum value for the restraint. If the parameter
739  is None, an automatic value is computed (using the ratio).
740  @param ratio Fraction of the number of possible pairs of
741  particles that are tolerated to overlap. The maximum
742  score is modified according to this ratio.
743  I have found that ratios of 0.05-0.1 work well allowing for
744  some interpenetration
745  @param stddev Standard deviation used to approximate the
746  HarmonicUpperBound function to a Gaussian
747  """
748  k = core.Harmonic.get_k_from_standard_deviation(stddev)
749  for i, c1 in enumerate(self.coarse_assembly.get_children()):
750  for j, c2 in enumerate(self.coarse_assembly.get_children()):
751  if j > i:
752  name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name())
753 
754  ls1 = atom.get_leaves(c1)
755  ls2 = atom.get_leaves(c2)
756  possible_pairs = len(ls1) * len(ls2)
757  n_pairs = possible_pairs * ratio
758 
759  marker1 = IMP.Particle(
760  self.model, "marker1 " + name)
761  marker2 = IMP.Particle(
762  self.model, "marker2 " + name)
763  table_refiner = core.TableRefiner()
764  table_refiner.add_particle(marker1, ls1)
765  table_refiner.add_particle(marker2, ls2)
766  score = core.HarmonicLowerBound(distance, k)
767  pair_score = core.SphereDistancePairScore(score)
768  close_pair_score = core.ClosePairsPairScore(pair_score,
769  table_refiner,
770  distance)
771  r = core.PairRestraint(self.model, close_pair_score,
772  (marker1, marker2))
773 
774  if not max_score:
775  minimum_distance_allowed = 0
776  max_score = weight * n_pairs * \
777  score.evaluate(minimum_distance_allowed)
778  log.debug(
779  "Setting close_pairs_excluded_volume_restraint(), "
780  "max_score %s", max_score)
781  self.add_restraint(r, name, weight, max_score)
782 
783  def set_pair_score_restraint(self, name1, name2,
784  restraint_name, distance=10,
785  weight=1.0, n_pairs=1, stddev=2,
786  max_score=None):
787  """
788  Set a pair_score restraint between the coarse representation
789  of two components
790  @param name1 Name of the first component
791  @param name2 Name of the second component
792  @param restraint_name Name for the restraint
793  @param distance Maximum distance tolerated between particles
794  @param weight Weight of the restraint
795  @param n_pairs
796  @param max_score Maximum value tolerated for the restraint
797  @param stddev Standard deviation used to approximate the
798  HarmonicUpperBound function to a Gaussian
799  """
800  table_refiner = core.TableRefiner()
801 
802  # The particles in A are attached to a marker particle via a refiner.
803  # When the refiner gets a request for marker1, it returns the attached
804  # particles
805  A = representation.get_component(self.coarse_assembly, name1)
806  marker1 = IMP.Particle(self.model, "marker1 " + restraint_name)
807  table_refiner.add_particle(marker1, atom.get_leaves(A))
808  # same for B
809  B = representation.get_component(self.coarse_assembly, name2)
810  marker2 = IMP.Particle(self.model, "marker2 " + restraint_name)
811  table_refiner.add_particle(marker2, atom.get_leaves(B))
812 
813  k = core.Harmonic.get_k_from_standard_deviation(stddev)
815  # The score is set for the n_pairs closest pairs of particles
816  pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs)
817  # When KClosePairsPairScore is called, the refiner will provide the
818  # particles for A and B
819  if not max_score:
820  # Build a maximum score based on the function type that is used,
821  # an HarmonicUpperBound
822  temp_score = core.HarmonicUpperBound(distance, k)
823  error_distance_allowed = 10
824  max_score = weight * n_pairs * \
825  temp_score.evaluate(distance + error_distance_allowed)
826 
827  log.info("Setting pair score restraint for %s %s. k = %s, max_score "
828  "= %s, stddev %s", name1, name2, k, max_score, stddev)
829  r = core.PairRestraint(self.model, pair_score, (marker1, marker2))
830  self.add_restraint(r, restraint_name, weight, max_score)
831 
832  def write_solutions_database(self, fn_database, max_number=None):
833  """
834  Write the results of the DOMINO sampling to a SQLite database.
835  @param fn_database
836  @param max_number Maximum number of results to write
837  """
838  log.info("Creating the database of solutions")
839  if self.measure_models and not hasattr(self, "native_model"):
840  raise ValueError("The native model has not been set")
842  db.create(fn_database, overwrite=True)
843  db.connect(fn_database)
844  subset = self.rb_states_table.get_subset()
845  restraints_names = self.get_restraints_names_used(subset)
846  db.add_results_table(restraints_names, self.measure_models)
847  n = len(self.solution_assignments)
848  if max_number is not None:
849  n = min(n, max_number)
850  for sol_id, assignment in enumerate(self.solution_assignments[0:n]):
851  txt = self.get_assignment_text(assignment, subset)
852  # load the positions of the components in the state
853  self.load_state(assignment)
854  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
855  scores, total_score = self.get_assignment_scores(
856  assignment, subset)
857  measures = None
858  if self.measure_models:
859  measures = measure_model(self.assembly, self.native_assembly,
860  self.components_rbs, self.native_rbs)
861  db.add_record(sol_id, txt, RFs, total_score, scores, measures)
862  # Add native state if doing benchmarking
863  if self.measure_models:
864  assignment = "native"
865  RFs = [rb.get_reference_frame() for rb in self.native_rbs]
866  scores, total_score = self.get_restraints_for_native(
867  restraints_names)
868  db.add_native_record(assignment, RFs, total_score, scores)
869  db.save_records()
870  db.close()
871 
872  def get_restraints_names_used(self, subset):
873  """ Get the names of the restraints used for a subset
874  @param subset Subset of components of the assembly
875  """
876  return [r.get_name() for r
877  in self.restraint_cache.get_restraints(subset, [])]
878 
879  def get_restraints_for_native(self, restraints_names):
880  """
881  Get values of the restraints for the native structure
882  @param restraints_names Names of the restraints requested
883  @return a list with the values of all scores, and the total score
884  """
885  saved = [rb.get_reference_frame() for rb in self.components_rbs]
886  # Set the state of the model to native
887  for rb, rn in zip(self.components_rbs, self.native_rbs):
888  # remove sub-rigid bodies
889  rb_members = [m for m in rb.get_members()
890  if not core.RigidBody.get_is_setup(m.get_particle())]
891  rn_members = [m for m in rn.get_members()
892  if not core.RigidBody.get_is_setup(m.get_particle())]
893  rb_coords = [m.get_coordinates() for m in rb_members]
894  rn_coords = [m.get_coordinates() for m in rn_members]
895 
896  # align and put rb in the position of rn
897  if len(rn_coords) != len(rb_coords):
898  raise ValueError("Mismatch in the number of members. "
899  "Reference %d Aligned %d "
900  % (len(rn_coords), len(rb_coords)))
901  T = alg.get_transformation_aligning_first_to_second(rb_coords,
902  rn_coords)
903  t = rb.get_reference_frame().get_transformation_to()
904  new_t = alg.compose(T, t)
905  rb.set_reference_frame(alg.ReferenceFrame3D(new_t))
906  scores = []
907  for name in restraints_names:
908  scores.append(self.restraints[name].evaluate(False))
909  total_score = sum(scores)
910  representation.set_reference_frames(self.components_rbs, saved)
911  return scores, total_score
912 
913  def write_pdb_for_assignment(self, assignment, fn_pdb):
914  """
915  Write the solution in the assignment
916  @param assignment Assignment class with the states for the subset
917  @param fn_pdb File to write the model
918  """
919  if not self.assignments_sampling_done:
920  raise ValueError("DominoModel: sampling not done")
921  log.info("Writing PDB %s for assignment %s", fn_pdb, assignment)
922  self.load_state(assignment)
923  atom.write_pdb(self.assembly, fn_pdb)
924 
925  def write_pdb_for_configuration(self, n, fn_pdb):
926  """
927  Write a file with a configuration for the model (configuration
928  here means a configuration in DOMINO)
929  @param n Index of the configuration desired.
930  @param fn_pdb
931  """
932  if not self.configuration_sampling_done:
933  raise ValueError("DominoModel: sampling not done")
934  log.debug("Writing PDB %s for configuration %s", fn_pdb, n)
935  # configuration_set.load_configuration(n)
936  atom.write_pdb(self.assembly, fn_pdb)
937 
938  def write_pdb_for_reference_frames(self, RFs, fn_pdb):
939  """
940  Write a PDB file with a solution given by a set of reference frames
941  @param RFs Reference frames for the elements of the complex
942  @param fn_pdb File to write the solution
943  """
944  log.debug("Writing PDB %s for reference frames %s", fn_pdb, RFs)
945  representation.set_reference_frames(self.components_rbs, RFs)
946  atom.write_pdb(self.assembly, fn_pdb)
947 
948  def write_pdbs_for_reference_frames(self, RFs, fn_base):
949  """
950  Write a separate PDB for each of the elements
951  @param RFs Reference frames for the elements of the complex
952  @param fn_base base string to build the names of the PDBs files
953  """
954  log.debug("Writing PDBs with basename %s", fn_base)
955  representation.set_reference_frames(self.components_rbs, RFs)
956  for i, ch in enumerate(self.assembly.get_children()):
957  atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i)
958 
959  def write_pdb_for_component(self, component_index, ref, fn_pdb):
960  """
961  Write one component of the assembly
962  @param component_index Position of the component in the assembly
963  @param ref Reference frame of the component
964  @param fn_pdb Output PDB
965  """
966  ("Writing PDB %s for component %s", fn_pdb, component_index)
967  self.components_rbs[component_index].set_reference_frame(ref)
968  hierarchy_component = self.assembly.get_child(component_index)
969  atom.write_pdb(hierarchy_component, fn_pdb)
970 
972  """Get a ScoringFunction that includes all restraints."""
974  list(self.restraints.values()))
975 
976  def write_monte_carlo_solution(self, fn_database):
977  """
978  Write the solution of a MonteCarlo run
979  @param fn_database Database file with the solution.
980  The database will contain only one record
981  """
982  total_score = 0
983  rnames = []
984  scores = []
985  for r in self.restraints.values():
986  score = r.evaluate(False)
987  rnames.append(r.get_name())
988  scores.append(score)
989  total_score += score
991  db.create(fn_database, overwrite=True)
992  db.connect(fn_database)
993  db.add_results_table(rnames, self.measure_models)
994  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
995  measures = None
996  if self.measure_models:
997  measures = measure_model(self.assembly, self.native_assembly,
998  self.components_rbs, self.native_rbs)
999  solution_id = 0
1000  db.add_record(solution_id, "", RFs, total_score, scores, measures)
1001  db.save_records()
1002  db.close()
1003 
1004  def print_restraints_values(self):
1005  print("Restraints: Name, weight, value, maximum_value")
1006  total_score = 0
1007  for r in self.restraints.values():
1008  score = r.evaluate(False)
1009  # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(),
1010  # score, r.get_maximum_score())
1011  print("%20s %18f %18f" % (r.get_name(), r.get_weight(), score))
1012  total_score += score
1013  print("total_score:", total_score)
1014 
1015 
1016 def anchor_assembly(components_rbs, anchored):
1017  """
1018  "Anchor" a set of rigid bodies, by setting the position of one of them
1019  at the origin (0,0,0).
1020  @param components_rbs Rigid bodies of the components
1021  @param anchored - List of True/False values indicating if the
1022  components of the assembly are anchored. The function sets the
1023  FIRST anchored component in the (0,0,0) coordinate and moves
1024  the other components with the same translation. If all the
1025  values are False, the function does not do anything
1026  """
1027  if True in anchored:
1028  anchored_rb = None
1029  for a, rb in zip(anchored, components_rbs):
1030  if a:
1031  anchored_rb = rb
1032  break
1033  center = anchored_rb.get_coordinates()
1034  log.info("Anchoring assembly at (0,0,0)")
1035  for rb in components_rbs:
1036  T = rb.get_reference_frame().get_transformation_to()
1037  t = T.get_translation()
1038  R = T.get_rotation()
1039  log.debug("%s: Rerefence frame BEFORE %s",
1040  rb.get_name(), rb.get_reference_frame())
1041  T2 = alg.Transformation3D(R, t - center)
1042  rb.set_reference_frame(alg.ReferenceFrame3D(T2))
1043  log.debug("%s Rerefence frame AFTER %s",
1044  rb.get_name(), rb.get_reference_frame())
1045 
1046 
1047 def measure_model(assembly, native_assembly, rbs, native_rbs):
1048  """
1049  Get the drms, cdrms and crmsd for a model
1050  @param assembly Hierarchy for an assembly
1051  @param native_assembly Hierarchy of the native structure
1052  @param rbs Rigid bodies for the components of the assembly
1053  @param native_rbs Rigid bodies for the components of the native
1054  structure
1055  @return cdrms, cdrms, crmsd
1056  - drms - typical drms measure
1057  - cdrms - drms for the centroids of the rigid bodies of the components
1058  of the assembly
1059  - crmsd - rmsd for the centroids
1060  """
1061  log.debug("Measure model")
1062  drms = comparisons.get_drms_for_backbone(assembly, native_assembly)
1063  RFs = [rb.get_reference_frame() for rb in rbs]
1064  vs = [r.get_transformation_to().get_translation() for r in RFs]
1065  nRFs = [rb.get_reference_frame() for rb in native_rbs]
1066  nvs = [r.get_transformation_to().get_translation() for r in nRFs]
1067  cdrms = atom.get_drms(vs, nvs)
1068  crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs)
1069  log.debug("drms %s cdrms %s crmsd %s", drms, cdrms, crmsd)
1070  return [drms, cdrms, crmsd]
1071 
1072 
1073 def get_coordinates(rigid_bodies):
1074  """
1075  Return a list of the coordinates of all the members
1076  of the rigid bodies
1077  """
1078  if len(rigid_bodies) == 0:
1079  raise ValueError(
1080  "get_coordinates: There are no rigid bodies to get "
1081  "coordinates from")
1082  coords = []
1083  for rb in rigid_bodies:
1084  # remove sub-rigid bodies
1085  rb_members = [m for m in rb.get_members()
1086  if not core.RigidBody.get_is_setup(m.get_particle())]
1087  coords.extend([m.get_coordinates() for m in rb_members])
1088  return coords
def write_pdb_for_component
Write one component of the assembly.
def get_assignment_and_RFs
Return a line with texts for an assignment and the the reference frames of the RigidBodies in the sub...
Utility functions to handle restraints.
Definition: restraints.py:1
A harmonic upper bound on the distance between two spheres.
Lower bound harmonic function (non-zero when feature < mean)
Compute the complementarity between two molecules.
def set_rb_states
Add a set of reference frames as possible states for a rigid body.
Apply a score to a fixed number of close pairs from the two sets.
def add_exclusion_filter_table
ExclusionSubsetFilterTable ensures that two particles are not given the same state at the same time...
Restraints using electron microscopy 2D images (class averages).
def get_restraints_names_used
Get the names of the restraints used for a subset.
def write_solutions_database
Write the results of the DOMINO sampling to a SQLite database.
Upper bound harmonic function (non-zero when feature > mean)
Apply the score to all pairs whose spheres are within a distance threshold.
def get_assignment_scores
Returns a list with the values for the restraints on a subset of the components of the assembly...
Utility functions to handle representation.
def anchor_assembly
"Anchor" a set of rigid bodies, by setting the position of one of them at the origin (0...
Utility functions to handle alignments.
Definition: alignments.py:1
Sample best solutions using Domino.
Definition: DominoSampler.h:32
def set_close_pairs_excluded_volume_restraint
Creates an excluded volume restraint between all the components of the coarse assembly.See help for.
def set_xlink_restraint
Set a restraint on the maximum distance between 2 residues.
def get_assignment_text
Get the formatted text for an assignment.
def load_state
Load the positions of the components given by the assignment.
Filter a configuration of the subset using the Model thresholds.
Represent a subset of the particles being optimized.
Definition: Subset.h:33
def get_coordinates
Return a list of the coordinates of all the members of the rigid bodies.
Utility functions to handle IO.
Definition: io.py:1
Create a scoring function on a list of restraints.
A score on the distance between the surfaces of two spheres.
def create_merge_tree
Creates a merge tree from the restraints that are set already.
Management of a model using DOMINO.
Definition: domino_model.py:37
def get_restraint_value_for_assignment
Recover the value of a restraint.
def write_merge_tree
Writes the current merge tree to file.
def set_connectivity_restraint
Set a connectivity restraint between the elements of the assembly.
def write_pdbs_for_reference_frames
Write a separate PDB for each of the elements.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
def write_pdb_for_reference_frames
Write a PDB file with a solution given by a set of reference frames.
Do not allow two particles to be in the same state.
def create_coarse_assembly
Simplify the assembly with a coarse representation.
def write_subset
Writes lines describing a subset (vertex of a merge tree)
def get_restraints_for_native
Get values of the restraints for the native structure.
Utility functions to store and retrieve solution information.
Definition: solutions_io.py:1
A lookup based particle refiner.
Definition: TableRefiner.h:21
def set_native_assembly_for_benchmark
Sets the native model for benchmark, by reading the native structure and set the rigid bodies...
def get_scoring_function
Get a ScoringFunction that includes all restraints.
def do_sampling
Do Domino sampling.
def write_pdb_for_configuration
Write a file with a configuration for the model (configuration here means a configuration in DOMINO) ...
def set_complementarity_restraint
Set a restraint for geometric complementarity between 2 components.
def measure_model
Get the drms, cdrms and crmsd for a model.
Fitting atomic structures into a cryo-electron microscopy density map.
Class for managing the results of the experiments.
def set_assembly
Sets an assembly from a single PDB file.
def get_assignment_info
Info related to an assignment.
def read_merge_tree
Read and create a merge tree from a file.
def setup_domino_sampler
Creates a DOMINO sampler and adds the required filter tables.
Score a pair of particles based on the distance between their centers.
def align_rigid_bodies_states
Aligns the set of structures considered for DOMINO sampling.
Definition: domino_model.py:80
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Parameters used by Em2DRestraint and ProjectionFinder.
def get_assignments_with_heap
Domino sampling that recovers the assignments for the root of the merge tree, but conserving only the...
def set_pair_score_restraint
Set a pair_score restraint between the coarse representation of two components.
Utility functions for comparisons.
Definition: comparisons.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:43
def set_em2d_restraint
Set a Restraint computing the em2d score.
def add_branch_and_bound_assignments_table
BranchAndBoundAssignmentsTable enumerate states based on provided filtered using the provided the sub...
def set_not_optimized
Set a part of the model as not optimized (it does not move during the model optimization) ...
Store a set of k top scoring assignments.
def add_restraint
Adds a restraint to the model.
Definition: domino_model.py:58
Applies a PairScore to a Pair.
Definition: PairRestraint.h:31
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def write_pdb_for_assignment
Write the solution in the assignment.
Divide-and-conquer inferential optimization in discrete space.
def set_assembly_components
Sets the components of an assembly, each one given as a separate PDB file.
def add_restraint_score_filter_table
Add a RestraintScoreSubsetFilterTable to DOMINO that allows to reject assignments that have score wor...
def write_monte_carlo_solution
Write the solution of a MonteCarlo run.