IMP logo
IMP Reference Guide  2.17.0
The Integrative Modeling Platform
domino_model.py
1 """@namespace IMP.EMageFit.domino_model
2  Classes to manage a model using DOMINO.
3 """
4 
5 import time
6 import csv
7 import logging
8 
9 import IMP
10 import IMP.domino as domino
11 import IMP.core as core
12 import IMP.atom as atom
13 import IMP.algebra as alg
14 import IMP.em2d as em2d
15 import IMP.multifit as multifit
16 
17 import IMP.EMageFit.imp_general.comparisons as comparisons
18 import IMP.EMageFit.imp_general.alignments as alignments
19 import IMP.EMageFit.imp_general.io as io
20 import IMP.EMageFit.imp_general.representation as representation
21 
22 import IMP.EMageFit.restraints as restraints
23 import IMP.EMageFit.buildxlinks as buildxlinks
24 import IMP.EMageFit.solutions_io as solutions_io
25 import IMP.EMageFit.csv_related as csv_related
26 
27 log = logging.getLogger("DominoModel")
28 
29 field_delim = "," # separate fields in the output text file
30 unit_delim = "/" # separate units within a field (eg, reference frames).
31  # It is used in output text files and in databse files
32 
33 
35 
36  """
37  Management of a model using DOMINO
38  """
39 
40  def __init__(self, name="my model"):
41  self.model = IMP.Model()
42  self.model.set_name(name)
43  self.configuration_sampling_done = False
44  self.assignments_sampling_done = False
45  self.rb_states_table = domino.ParticleStatesTable()
46  self.merge_tree = None
47  # measure_models is True only when doing benchmark
48  self.measure_models = False
49  # The first time that create_coarse is found and its value is True
50  # a coarse version of the assembly is built
51  self.create_coarse = True
52  self.restraints = dict()
53  self.xlinks_dict = buildxlinks.XlinksDict()
54 
55  def add_restraint(self, r, name, weight, max_score=False):
56  """
57  Adds a restraint to the model
58  @param r An IMP.Restraint object
59  @param name Name for the restraint
60  @param weight Weight for the restraint
61  @param max_score Maximum score allowed for the restraint. If
62  max_score is False, there is no limit for the value of
63  the restraint
64  """
65  log.info("Adding restraint %s weight %s max_score %s",
66  name, weight, max_score)
67  if "-" in name:
68  raise ValueError("SQL database does not accept restraint names "
69  "containing -")
70  r.set_name(name)
71  if max_score is not None and max_score:
72  r.set_maximum_score(max_score)
73  r.set_weight(weight)
74  self.restraints[name] = r
75 
76  def align_rigid_bodies_states(self):
77  """
78  Aligns the set of structures considered for DOMINO sampling.
79  The function:
80  1) reads the possible reference frames that the
81  rb_states_table stores for each rigid body. This table
82  must be filled before using this function. Usually it is
83  filled with the results from a previous Monte Carlo
84  sampling. If the solutions from Monte Carlo seem to have
85  the same structure but they are not aligned to each other,
86  this function can help setting better starting positions
87  to use with DOMINO.
88  2) Gets the first state for each of the rigid bodies and sets
89  a reference structure using such states.
90  3) Aligns all the rest of the structures respect to the
91  reference
92  4) Replaces the values of the reference frames stored in the
93  rb_states_table with the new values obtained from the
94  alignments. It does it for all states of a rigid body.
95  @note: If this function is applied, the parameters "anchor"
96  and "fixed" are ignored, as they are superseded by the use
97  of the alignments calculated here.
98  """
99  log.debug("Align the configurations read from the database before "
100  "running DOMINO")
101  rb_states = []
102  n_states = []
103  for rb in self.components_rbs:
104  ps = self.rb_states_table.get_particle_states(rb)
105  n = ps.get_number_of_particle_states()
106  if n == 0:
107  raise ValueError(
108  "There are no particle states for %s" %
109  rb.get_name())
110  n_states.append(n)
111  rb_states.append(ps)
112  # coordinates of the first configuration (there is at least one for all
113  # the rbs)
114  for rb, states in zip(self.components_rbs, rb_states):
115  states.load_particle_state(0, rb)
116  reference_coords = get_coordinates(self.components_rbs)
117  aligned_transformations = [[] for rb in self.components_rbs]
118  for i in range(1, max(n_states)):
119  log.debug("Aligning rigid bodies configuration %s" % i)
120  # load the configuration
121  for j in range(len(self.components_rbs)):
122  # if there are no more particle states for this rigid body, use
123  # the last
124  state_index = min(i, n_states[j] - 1)
125  rb_states[j].load_particle_state(
126  state_index,
127  self.components_rbs[j])
128  coords = get_coordinates(self.components_rbs)
129  T = alg.get_transformation_aligning_first_to_second(
130  coords, reference_coords)
131  for j, rb in enumerate(self.components_rbs):
132  t = rb.get_reference_frame().get_transformation_to()
133  new_t = alg.compose(T, t)
134  aligned_transformations[j].append(new_t)
135  # set the new possible transformations
136  for i, rb in enumerate(self.components_rbs):
137  self.set_rb_states(rb, aligned_transformations[i])
138 
139  def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2,
140  distance, weight, stddev, max_score=False):
141  """
142  Set a restraint on the maximum distance between 2 residues
143  @param id1 Name of the first component
144  @param chain1
145  @param residue1 Residue number for the aminoacid in the first
146  component.The number is the number in the PDB file, not the
147  number relative to the beginning of the chain
148  @param id2 Name of the second component
149  @param chain2
150  @param residue2 Residue number for the aminoacid in the second
151  component.
152  @param distance Maximum distance tolerated
153  @param weight Weight of the restraint
154  @param stddev Standard deviation used to approximate the
155  HarmonicUpperBound function to a Gaussian
156  @param max_score See help for add_restraint(). If none is given,
157  the maximum score is set to allow a maximum distance of 10 Angstrom
158  greater than the parameter "distance".
159  """
160  xlink = buildxlinks.Xlink(
161  id1,
162  chain1,
163  residue1,
164  id2,
165  chain2,
166  residue2,
167  distance)
168  log.info("Setting cross-linking restraint ")
169  log.info("%s", xlink.show())
170  self.xlinks_dict.add(xlink)
171  # setup restraint
172  A = representation.get_component(self.assembly, xlink.first_id)
173  s1 = IMP.atom.Selection(A, chain=xlink.first_chain,
174  residue_index=xlink.first_residue)
175  p1 = s1.get_selected_particles()[0]
176  B = representation.get_component(self.assembly, xlink.second_id)
177  s2 = IMP.atom.Selection(B, chain=xlink.second_chain,
178  residue_index=xlink.second_residue)
179  p2 = s2.get_selected_particles()[0]
180  k = core.Harmonic.get_k_from_standard_deviation(stddev)
181  score = core.HarmonicUpperBound(xlink.distance, k)
182  pair_score = IMP.core.DistancePairScore(score)
183  r = IMP.core.PairRestraint(self.model, pair_score, (p1, p2))
184  if not max_score:
185  error_distance_allowed = 100
186  max_score = weight * \
187  score.evaluate(distance + error_distance_allowed)
188  log.info("%s, max_score %s", xlink.show(), max_score)
189  self.add_restraint(r, xlink.get_name(), weight, max_score)
190 
191  def set_complementarity_restraint(self, name1, name2, rname,
192  max_sep_distance, max_penetration,
193  weight, max_score=1e10):
194  """
195  Set a restraint for geometric complementarity between 2 components
196  @param name1 name of
197  @param name2 - The restraint is applied to this components
198  @param rname - The name given to the restraint
199  @param max_sep_distance - maximum distance between molecules
200  tolerated by the restraint
201  @param max_penetration - Maximum penetration allowed (angstrom)
202  @param weight
203  @param max_score
204  """
205  log.info("Setting geometric complementarity restraint %s: %s - %s",
206  rname, name1, name2)
207  A = representation.get_component(self.assembly, name1)
208  B = representation.get_component(self.assembly, name2)
210  atom.get_leaves(A), atom.get_leaves(B), rname)
211  restraint.set_maximum_separation(max_sep_distance)
212  restraint.set_maximum_penetration(max_penetration)
213  log.debug("Maximum separation: %s Maximum penetration score: %s",
214  max_sep_distance, max_penetration)
215  self.add_restraint(restraint, rname, weight, max_score)
216 
217  def create_coarse_assembly(self, n_residues):
218  """
219  Simplify the assembly with a coarse representation
220  @param n_residues Number of residues used for a coarse particle
221  """
222  if self.create_coarse:
223  log.info("Creating simplified assembly")
224  self.coarse_assembly = representation.create_simplified_assembly(
225  self.assembly, self.components_rbs, n_residues)
226  self.create_coarse = False
227 
228  def do_sampling(self, mode="assignments_heap_container", params=None):
229  """
230  Do Domino sampling
231  @param mode The mode used to recover solutions from domino.
232  It can have
233  the value "configuration", "assignments", or
234  "assignments_heap_container". The mode
235  "configuration" recovers all possible configurations
236  (takes a while to generate them).
237  The mode "assignments" only provides the assignments
238  (state numbers) of the solutions. It is faster but requires
239  generating the solutions afterwards
240  The mode "assignments_heap_container" selects the best solutions
241  after each merging in DOMINO, discarding the rest.
242  In practice I used the mode "assignments_heap_container"
243  @param params
244  """
245  t0 = time.time()
246  if mode == "configuration":
247  self.configuration_set = self.sampler.create_sample()
248  tf = time.time()
249  log.info("found %s configurations. Time %s",
250  self.configuration_set.get_number_of_configurations(),
251  tf - t0)
252  self.configuration_sampling_done = True
253  elif mode == "assignments":
254  subset = self.rb_states_table.get_subset()
255  log.info(
256  "Do sampling with assignments. Subset has %s elements: %s",
257  len(subset), subset)
258  self.solution_assignments = \
259  self.sampler.get_sample_assignments(subset)
260  tf = time.time()
261  log.info("found %s assignments. Time %s",
262  len(self.solution_assignments), tf - t0)
263  self.assignments_sampling_done = True
264  elif mode == "assignments_heap_container":
265  subset = self.rb_states_table.get_subset()
266  log.info("DOMINO sampling an assignments_heap_container. "
267  "Subset has %s elements: %s", len(subset), subset)
268  # last vertex is the root of the merge tree
269  root = self.merge_tree.get_vertices()[-1]
270  container = self.get_assignments_with_heap(root,
271  params.heap_solutions)
272  self.solution_assignments = container.get_assignments()
273  tf = time.time()
274  log.info("found %s assignments. Time %s",
275  len(self.solution_assignments), tf - t0)
276  self.assignments_sampling_done = True
277  else:
278  raise ValueError("Requested wrong mode for the DOMINO sampling")
279 
280  def get_assignment_text(self, assignment, subset):
281  """
282  Get the formatted text for an assignment
283  @param subset Subset of components of the assembly
284  @param assignment Assignment class with the states for the subset
285  @note: The order in assignment and subset is not always the order
286  of the rigid bodies in self.components_rbs. The function reorders
287  the terms in the assignment so there is correspondence.
288  """
289  ordered_assignment = []
290  for rb in self.components_rbs:
291  for i, particle in enumerate(subset):
292  if rb.get_particle() == particle:
293  ordered_assignment.append(assignment[i])
294  text = "|".join([str(i) for i in ordered_assignment])
295  return text
296 
297  def get_assignment_and_RFs(self, assignment, subset):
298  """
299  Return a line with texts for an assignment and the
300  the reference frames of the RigidBodies in the subset.
301  @param subset Subset of components of the assembly
302  @param assignment Assignment class with the states for the subset
303  @note: see the get_assignment_assignment_text() note.
304  """
305  ordered_assignment = []
306  RFs = []
307  for rb in self.components_rbs:
308  for i, particle in enumerate(subset):
309  if rb.get_particle() == particle:
310  j = assignment[i]
311  ordered_assignment.append(j)
312  pstates = self.rb_states_table.get_particle_states(rb)
313  pstates.load_particle_state(j, rb.get_particle())
314  RFs.append(rb.get_reference_frame())
315  RFs_text = unit_delim.join(
316  [io.ReferenceFrameToText(ref).get_text() for ref in RFs])
317  assignment_numbers = "|".join([str(i) for i in ordered_assignment])
318  return [assignment_numbers, RFs_text]
319 
320  def get_assignment_info(self, assignment, subset):
321  """
322  Info related to an assignment. Returns a list with text for the
323  assignment and all the scores for the restraints
324  @param subset Subset of components of the assembly
325  @param assignment Assignment class with the states for the subset
326  """
327  text = self.get_assignment_and_RFs(assignment, subset)
328  scores, total_score = self.get_assignment_scores(assignment, subset)
329  info = text + [total_score] + scores
330  return info
331 
332  def get_assignment_scores(self, assignment, subset):
333  """
334  Returns a list with the values for the restraints on a subset of
335  the components of the assembly
336  @param subset Subset of components of the assembly
337  @param assignment Assignment class with the states for the subset
338  """
339  restraints_to_evaluate = \
340  self.restraint_cache.get_restraints(subset, [])
341  scores = [self.restraint_cache.get_score(r, subset, assignment)
342  for r in restraints_to_evaluate]
343  total_score = sum(scores)
344  return scores, total_score
345 
346  def load_state(self, assignment):
347  """
348  Load the positions of the components given by the assignment
349  @param assignment Assignment class
350  """
351  subset = self.rb_states_table.get_subset()
352  domino.load_particle_states(subset, assignment, self.rb_states_table)
353 
354  def get_assignments_with_heap(self, vertex, k=0):
355  """
356  Domino sampling that recovers the assignments for the root of the
357  merge tree, but
358  conserving only the best k scores for each vertex of the tree.
359  @param[in] vertex Vertex with the root of the current
360  merge tree. This function is recursive.
361  @param[in] k
362  """
363  if(self.sampler.get_number_of_subset_filter_tables() == 0):
364  raise ValueError("No subset filter tables")
365  if(self.merge_tree is None):
366  raise ValueError("No merge tree")
367  # In the merge tree, the names of the vertices are the subsets.
368  # The type of the vertex name is a domino.Subset
369  subset = self.merge_tree.get_vertex_name(vertex)
370  log.info("computing assignments for vertex %s", subset)
371  t0 = time.time()
372  assignments_container = domino.HeapAssignmentContainer(
373  subset, k, self.restraint_cache, "my_heap_assignments_container")
374  neighbors = self.merge_tree.get_out_neighbors(vertex)
375  if len(neighbors) == 0: # A leaf
376  # Fill the container with the assignments for the leaf
377  self.sampler.load_vertex_assignments(vertex, assignments_container)
378  else:
379  if neighbors[0] > neighbors[1]:
380  # get_vertex_assignments() methods in domino
381  # expects the children in sorted order
382  neighbors = [neighbors[1], neighbors[0]]
383  # recurse on the two children
384  container_child0 = self.get_assignments_with_heap(neighbors[0], k)
385  container_child1 = self.get_assignments_with_heap(neighbors[1], k)
386  self.sampler.load_vertex_assignments(vertex,
387  container_child0,
388  container_child1,
389  assignments_container)
390  tf = time.time() - t0
391  log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset,
392  assignments_container.get_number_of_assignments(), tf)
393  return assignments_container
394 
395  def get_restraint_value_for_assignment(self, assignment, name):
396  """
397  Recover the value of a restraint
398  @param name of the restraint
399  @param assignment Assignment class containing the assignment for
400  the solution desired
401  """
402  if not self.assignments_sampling_done:
403  raise ValueError("DominoModel: sampling not done")
404  log.debug("Computing restraint value for assignment %s", assignment)
405  self.load_state(assignment)
406  for rb in self.components_rbs:
407  log.debug("rb - %s",
408  rb.get_reference_frame().get_transformation_to())
409  val = self.restraints[name].evaluate(False)
410  log.debug("restraint %s = %s", name, val)
411  return val
412 
414  """
415  Sets the native model for benchmark, by reading the native
416  structure and set the rigid bodies.
417  """
418  self.measure_models = True
419  self.native_model = IMP.Model()
420  if hasattr(params.benchmark, "fn_pdb_native"):
421  self.native_assembly = representation.create_assembly_from_pdb(
422  self.native_model, params.benchmark.fn_pdb_native,
423  params.names)
424  elif hasattr(params.benchmark, "fn_pdbs_native"):
425  self.native_assembly = representation.create_assembly(
426  self.native_model, params.benchmark.fn_pdbs_native,
427  params.names)
428  else:
429  raise ValueError(
430  "set_native_assembly_for_benchmark: Requested "
431  "to set a native assembly for benchmark but did not provide "
432  "its pdb, or the pdbs of the components")
433  self.native_rbs = representation.create_rigid_bodies(
434  self.native_assembly)
435  anchor_assembly(self.native_rbs, params.anchor)
436 
437  def set_rb_states(self, rb, transformations):
438  """
439  Add a set of reference frames as possible states for a rigid body
440  @param rb The rigid body
441  @param transformations Transformations are used to build the
442  reference frames for the rigid body.
443  """
444  log.info("Set rigid body states for %s", rb.get_name())
445  RFs = [alg.ReferenceFrame3D(T) for T in transformations]
446  for st in RFs:
447  log.debug("%s", st)
448  rb_states = domino.RigidBodyStates(RFs)
449  self.rb_states_table.set_particle_states(rb, rb_states)
450  st = self.rb_states_table.get_particle_states(rb)
451  log.info("RigidBodyStates created %s",
452  st.get_number_of_particle_states())
453 
455  """
456  Add a RestraintScoreSubsetFilterTable to DOMINO that allows to
457  reject assignments that have score worse than the thresholds for
458  the restraints
459  """
460  log.info("Adding RestraintScoreSubsetFilterTable")
461  # Restraint Cache is a centralized container of the restraints and
462  # their scores
463  self.restraint_cache = domino.RestraintCache(self.rb_states_table)
464  self.restraint_cache.add_restraints(list(self.restraints.values()))
465  rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache)
466  rssft.set_name('myRestraintScoreSubsetFilterTable')
467  self.sampler.add_subset_filter_table(rssft)
468 
469  def set_assembly_components(self, fn_pdbs, names):
470  """
471  Sets the components of an assembly, each one given as a separate
472  PDB file.
473  @param fn_pdbs List with the names of the pdb files
474  @param names List with the names of the components of the assembly.
475  """
476  if len(fn_pdbs) != len(names):
477  raise ValueError("Each PDB file needs a name")
478  self.names = names
479  self.assembly = representation.create_assembly(self.model, fn_pdbs,
480  names)
481  self.components_rbs = representation.create_rigid_bodies(self.assembly)
482  self.not_optimized_rbs = []
483 
484  def set_assembly(self, fn_pdb, names):
485  """
486  Sets an assembly from a single PDB file. The function assumes that
487  the components of the assembly are the chains of the PDB file.
488  @param fn_pdb PDB with the file for the assembly
489  @param names Names for each of the components (chains)
490  of the assembly
491  """
492  self.assembly = representation.create_assembly_from_pdb(self.model,
493  fn_pdb,
494  names)
495  self.components_rbs = representation.create_rigid_bodies(self.assembly)
496  self.not_optimized_rbs = []
497 
499  """
500  BranchAndBoundAssignmentsTable enumerate states based on provided
501  filtered using the provided the subset filter tables
502  """
503  log.info("Setting BranchAndBoundAssignmentsTable")
504  fts = []
505  for i in range(self.sampler.get_number_of_subset_filter_tables()):
506  ft = self.sampler.get_subset_filter_table(i)
507  fts.append(ft)
508  at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts)
509  self.sampler.set_assignments_table(at)
510  self.assignments_table = at
511 
513  """
514  ExclusionSubsetFilterTable ensures that two particles are not given
515  the same state at the same time
516  """
517  log.info("Setting ExclusionSubsetFilterTable")
518  ft = domino.ExclusionSubsetFilterTable(self.rb_states_table)
519  self.sampler.add_subset_filter_table(ft)
520 
522  """
523  Creates a DOMINO sampler and adds the required filter tables
524  """
525  if self.merge_tree is None:
526  raise ValueError("Merge tree for DOMINO not set. It is required "
527  "to setup the sampler")
528  log.info("Domino sampler")
529  self.sampler = domino.DominoSampler(self.model, self.rb_states_table)
530  self.sampler.set_restraints(list(self.restraints.values()))
531  self.sampler.set_log_level(IMP.TERSE)
532  self.sampler.set_merge_tree(self.merge_tree)
536 
537  def read_merge_tree(self, fn):
538  """
539  Read and create a merge tree from a file.
540  The format of the file is the format written by write merge_tree()
541  It does not matter the order of the indices in the file, as
542  they are sorted by this function.
543  The only condition is that the index for the vertex that is the
544  root of the tree MUST be the largest. This is guaranteed when
545  creating a merge tree with create_merge_tree()
546  @param fn File containing the merge tree
547  """
548  log.info("Reading merge tree from %s", fn)
549  ps = self.rb_states_table.get_particles()
550  log.debug("particles")
551  for p in ps:
552  log.debug("%s", p.get_name())
553  rows = csv_related.read_csv(fn, delimiter=field_delim)
554  for i in range(len(rows)):
555  row = rows[i]
556  rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]]
557  # Sort rows by vertice index
558  rows.sort()
559  subsets = []
560  # build subsets from the rows text
561  for row in rows:
562  names = row[3].split(unit_delim)
563  log.debug("row %s names %s", row, names)
564  # get particles in the subset
565  particles = []
566  for name in names:
567  lp = [p for p in ps if p.get_name() == name]
568  if (len(lp) > 1):
569  ValueError("More than one particle with same name" % names)
570  particles.extend(lp)
571  s = domino.Subset(particles)
572  subset_names = [p.get_name() for p in particles]
573  log.debug("Merge tree Subset %s. Particles %s ", s, subset_names)
574  subsets.append(s)
575  # The vertex with the largest index is the root.
576  # trick: get the merge tree object from a tree with only one node ...
577  jt = domino.SubsetGraph()
578  jt.add_vertex(subsets[0])
579  mt = domino.get_merge_tree(jt)
580  # ... and add the rest of the vertices
581  for i in range(1, len(subsets)):
582  mt.add_vertex(subsets[i]) # the name of the vertex is a subset
583  # set edges
584  for row in rows:
585  vid = row[0]
586  child_left = row[1]
587  child_right = row[2]
588  if child_left != -1:
589  mt.add_edge(vid, child_left)
590  if child_right != -1:
591  mt.add_edge(vid, child_right)
592  self.merge_tree = mt
593  log.info("%s", self.merge_tree.show_graphviz())
594 
595  def create_merge_tree(self):
596  """
597  Creates a merge tree from the restraints that are set already
598  """
599  rs = list(self.restraints.values())
600  ig = domino.get_interaction_graph(rs, self.rb_states_table)
601 # pruned_dep = IMP.get_pruned_dependency_graph(self.model)
602 # IMP.show_graphviz(pruned_dep)
603 # IMP.show_graphviz(ig)
604  jt = domino.get_junction_tree(ig)
605 # IMP.show_graphviz(jt)
606  self.merge_tree = domino.get_balanced_merge_tree(jt)
607 # IMP.show_graphviz(self.merge_tree)
608  log.info("Balanced merge tree created")
609  log.info("%s", self.merge_tree.show_graphviz())
610 
611  def write_merge_tree(self, fn):
612  """
613  Writes the current merge tree to file. The format is:
614  parent | child_left | child_right | labels
615  @param fn File for storing the merge tree
616  """
617  log.info("Writing merge to %s", fn)
618  if self.merge_tree is None:
619  raise ValueError("No merge tree")
620  f = open(fn, "w")
621  f.write(
622  "# Vertex index | Child0 | Child1 | label "
623  "(names of the particles)\n")
624  w = csv.writer(f, delimiter=field_delim)
625  root = self.merge_tree.get_vertices()[-1]
626  self.write_subset(root, w)
627  f.close()
628 
629  def write_subset(self, v, w):
630  """
631  Writes lines describing a subset (vertex of a merge tree)
632  @param w A csv.writer
633  @param v Vertex index
634  """
635  no_child_index = -1
636  subset = self.merge_tree.get_vertex_name(v)
637  names = [p.get_name() for p in subset]
638  reps = [x for x in names if names.count(x) > 1] # repeated names
639  if(len(reps)):
640  raise ValueError(
641  "The names of the particles in the subset %s are not unique"
642  % subset)
643  names = unit_delim.join(names)
644  neighbors = self.merge_tree.get_out_neighbors(v)
645  if len(neighbors) == 0: # A leaf
646  # Fill the container with the assignments for the leaf
647  w.writerow([v, no_child_index, no_child_index, names])
648  else:
649  if neighbors[0] > neighbors[1]:
650  neighbors = [neighbors[1], neighbors[0]]
651  w.writerow([v, neighbors[0], neighbors[1], names])
652  self.write_subset(neighbors[0], w)
653  self.write_subset(neighbors[1], w)
654 
655  def set_connectivity_restraint(self, names, rname,
656  distance=10,
657  weight=1.0, max_score=None, n_pairs=1,
658  stddev=2):
659  """
660  Set a connectivity restraint between the elements of the assembly
661  @param names List with all the elements to be connected
662  @param distance Maximum distance tolerated by the restraint
663  @param rname Name for the restraint
664  @param weight Weight for the restraint
665  @param max_score Maximum score for the restraint
666  @param n_pairs Number of pairs of particles used in the
667  KClosePairScore of the connectivity restraint
668  @param stddev Standard deviation used to approximate the
669  HarmonicUpperBound function to a Gaussian
670  """
671  components = []
672  for name in names:
673  c = representation.get_component(self.coarse_assembly, name)
674  components.append(c)
675  log.info("Connectivity restraint for %s", components)
676  spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev)
677  if max_score is None:
678  max_score = weight * spring_constant * n_pairs * (stddev) ** 2
679  r = restraints.get_connectivity_restraint(
680  components, distance, n_pairs,
681  spring_constant)
682  self.add_restraint(r, rname, weight, max_score)
683 
684  def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution,
685  n_projections, weight, max_score=False,
686  mode="fast", n_optimized=1):
687  """
688  Set a Restraint computing the em2d score.
689  @param name Name for the restraint
690  @param fn_images_sel Selection file with the names of the images
691  used for the restraint
692  @param pixel_size - pixel size in the images
693  @param resolution - resolution used to downsample the projections
694  of the model when projecting
695  @param weight Weight of the restraint
696  @param max_score - Maximum value tolerated for the restraint
697  @param n_projections - Number of projections to generate
698  when projecting the model to do the coarse alignments
699  @param mode - Mode used for computing the restraints.
700  @param n_optimized - number of results from the coarse
701  alignment that are refined with the Simplex algorithm
702  to get a more accurate value for the restraint
703  """
704  log.info("Adding a em2D restraint: %s", fn_images_sel)
705  restraint_params = em2d.Em2DRestraintParameters(pixel_size,
706  resolution,
707  n_projections)
708  r = restraints.get_em2d_restraint(self.assembly,
709  fn_images_sel,
710  restraint_params,
711  mode, n_optimized)
712  self.add_restraint(r, name, weight, max_score)
713 
714  def set_not_optimized(self, name):
715  """
716  Set a part of the model as not optimized (it does not move during
717  the model optimization)
718  @param name of the component to optimized
719  """
720  if name not in self.names:
721  raise ValueError("DominoModel: There is not component "
722  "in the assembly with this name")
723  rb_name = representation.get_rb_name(name)
724  self.not_optimized_rbs.append(rb_name)
725 
727  self, distance=10, weight=1.0, ratio=0.05, stddev=2, max_score=False):
728  """
729  Creates an excluded volume restraint between all the components
730  of the coarse assembly.See help for
731  @param distance Maximum distance tolerated between particles
732  @param weight Weight for the restraint
733  @param stddev
734  @param max_score Maximum value for the restraint. If the parameter
735  is None, an automatic value is computed (using the ratio).
736  @param ratio Fraction of the number of possible pairs of
737  particles that are tolerated to overlap. The maximum
738  score is modified according to this ratio.
739  I have found that ratios of 0.05-0.1 work well allowing for
740  some interpenetration
741  @param stddev Standard deviation used to approximate the
742  HarmonicUpperBound function to a Gaussian
743  """
744  k = core.Harmonic.get_k_from_standard_deviation(stddev)
745  for i, c1 in enumerate(self.coarse_assembly.get_children()):
746  for j, c2 in enumerate(self.coarse_assembly.get_children()):
747  if j > i:
748  name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name())
749 
750  ls1 = atom.get_leaves(c1)
751  ls2 = atom.get_leaves(c2)
752  possible_pairs = len(ls1) * len(ls2)
753  n_pairs = possible_pairs * ratio
754 
755  marker1 = IMP.Particle(
756  self.model, "marker1 " + name)
757  marker2 = IMP.Particle(
758  self.model, "marker2 " + name)
759  table_refiner = core.TableRefiner()
760  table_refiner.add_particle(marker1, ls1)
761  table_refiner.add_particle(marker2, ls2)
762  score = core.HarmonicLowerBound(distance, k)
763  pair_score = core.SphereDistancePairScore(score)
764  close_pair_score = core.ClosePairsPairScore(pair_score,
765  table_refiner,
766  distance)
767  r = core.PairRestraint(self.model, close_pair_score,
768  (marker1, marker2))
769 
770  if not max_score:
771  minimum_distance_allowed = 0
772  max_score = weight * n_pairs * \
773  score.evaluate(minimum_distance_allowed)
774  log.debug(
775  "Setting close_pairs_excluded_volume_restraint(), "
776  "max_score %s", max_score)
777  self.add_restraint(r, name, weight, max_score)
778 
779  def set_pair_score_restraint(self, name1, name2,
780  restraint_name, distance=10,
781  weight=1.0, n_pairs=1, stddev=2, max_score=None):
782  """
783  Set a pair_score restraint between the coarse representation
784  of two components
785  @param name1 Name of the first component
786  @param name2 Name of the second component
787  @param restraint_name Name for the restraint
788  @param distance Maximum distance tolerated between particles
789  @param weight Weight of the restraint
790  @param n_pairs
791  @param max_score Maximum value tolerated for the restraint
792  @param stddev Standard deviation used to approximate the
793  HarmonicUpperBound function to a Gaussian
794  """
795  table_refiner = core.TableRefiner()
796 
797  # The particles in A are attached to a marker particle via a refiner.
798  # When the refiner gets a request for marker1, it returns the attached
799  # particles
800  A = representation.get_component(self.coarse_assembly, name1)
801  marker1 = IMP.Particle(self.model, "marker1 " + restraint_name)
802  table_refiner.add_particle(marker1, atom.get_leaves(A))
803  # same for B
804  B = representation.get_component(self.coarse_assembly, name2)
805  marker2 = IMP.Particle(self.model, "marker2 " + restraint_name)
806  table_refiner.add_particle(marker2, atom.get_leaves(B))
807 
808  k = core.Harmonic.get_k_from_standard_deviation(stddev)
810  # The score is set for the n_pairs closest pairs of particles
811  pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs)
812  # When KClosePairsPairScore is called, the refiner will provide the
813  # particles for A and B
814  if not max_score:
815  # Build a maximum score based on the function type that is used,
816  # an HarmonicUpperBound
817  temp_score = core.HarmonicUpperBound(distance, k)
818  error_distance_allowed = 10
819  max_score = weight * n_pairs * \
820  temp_score.evaluate(distance + error_distance_allowed)
821 
822  log.info("Setting pair score restraint for %s %s. k = %s, max_score "
823  "= %s, stddev %s", name1, name2, k, max_score, stddev)
824  r = core.PairRestraint(self.model, pair_score, (marker1, marker2))
825  self.add_restraint(r, restraint_name, weight, max_score)
826 
827  def write_solutions_database(self, fn_database, max_number=None):
828  """
829  Write the results of the DOMINO sampling to a SQLite database.
830  @param fn_database
831  @param max_number Maximum number of results to write
832  """
833  log.info("Creating the database of solutions")
834  if self.measure_models and not hasattr(self, "native_model"):
835  raise ValueError("The native model has not been set")
837  db.create(fn_database, overwrite=True)
838  db.connect(fn_database)
839  subset = self.rb_states_table.get_subset()
840  restraints_names = self.get_restraints_names_used(subset)
841  db.add_results_table(restraints_names, self.measure_models)
842  n = len(self.solution_assignments)
843  if max_number is not None:
844  n = min(n, max_number)
845  for sol_id, assignment in enumerate(self.solution_assignments[0:n]):
846  txt = self.get_assignment_text(assignment, subset)
847  # load the positions of the components in the state
848  self.load_state(assignment)
849  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
850  scores, total_score = self.get_assignment_scores(
851  assignment, subset)
852  measures = None
853  if(self.measure_models):
854  measures = measure_model(self.assembly, self.native_assembly,
855  self.components_rbs, self.native_rbs)
856  db.add_record(sol_id, txt, RFs, total_score, scores, measures)
857  # Add native state if doing benchmarking
858  if self.measure_models:
859  assignment = "native"
860  RFs = [rb.get_reference_frame() for rb in self.native_rbs]
861  scores, total_score = self.get_restraints_for_native(
862  restraints_names)
863  db.add_native_record(assignment, RFs, total_score, scores)
864  db.save_records()
865  db.close()
866 
867  def get_restraints_names_used(self, subset):
868  """ Get the names of the restraints used for a subset
869  @param subset Subset of components of the assembly
870  """
871  return [r.get_name() for r
872  in self.restraint_cache.get_restraints(subset, [])]
873 
874  def get_restraints_for_native(self, restraints_names):
875  """
876  Get values of the restraints for the native structure
877  @param restraints_names Names of the restraints requested
878  @return a list with the values of all scores, and the total score
879  """
880  saved = [rb.get_reference_frame() for rb in self.components_rbs]
881  # Set the state of the model to native
882  for rb, rn in zip(self.components_rbs, self.native_rbs):
883  # remove sub-rigid bodies
884  rb_members = [m for m in rb.get_members()
885  if not core.RigidBody.get_is_setup(m.get_particle())]
886  rn_members = [m for m in rn.get_members()
887  if not core.RigidBody.get_is_setup(m.get_particle())]
888  rb_coords = [m.get_coordinates() for m in rb_members]
889  rn_coords = [m.get_coordinates() for m in rn_members]
890 
891  # align and put rb in the position of rn
892  if len(rn_coords) != len(rb_coords):
893  raise ValueError("Mismatch in the number of members. "
894  "Reference %d Aligned %d "
895  % (len(rn_coords), len(rb_coords)))
896  T = alg.get_transformation_aligning_first_to_second(rb_coords,
897  rn_coords)
898  t = rb.get_reference_frame().get_transformation_to()
899  new_t = alg.compose(T, t)
900  rb.set_reference_frame(alg.ReferenceFrame3D(new_t))
901  scores = []
902  for name in restraints_names:
903  scores.append(self.restraints[name].evaluate(False))
904  total_score = sum(scores)
905  representation.set_reference_frames(self.components_rbs, saved)
906  return scores, total_score
907 
908  def write_pdb_for_assignment(self, assignment, fn_pdb):
909  """
910  Write the solution in the assignment
911  @param assignment Assignment class with the states for the subset
912  @param fn_pdb File to write the model
913  """
914  if not self.assignments_sampling_done:
915  raise ValueError("DominoModel: sampling not done")
916  log.info("Writting PDB %s for assignment %s", fn_pdb, assignment)
917  self.load_state(assignment)
918  atom.write_pdb(self.assembly, fn_pdb)
919 
920  def write_pdb_for_configuration(self, n, fn_pdb):
921  """
922  Write a file with a configuration for the model (configuration
923  here means a configuration in DOMINO)
924  @param n Index of the configuration desired.
925  @param fn_pdb
926  """
927  if not self.configuration_sampling_done:
928  raise ValueError("DominoModel: sampling not done")
929  log.debug("Writting PDB %s for configuration %s", fn_pdb, n)
930  configuration_set.load_configuration(n)
931  atom.write_pdb(self.assembly, fn_pdb)
932 
933  def write_pdb_for_reference_frames(self, RFs, fn_pdb):
934  """
935  Write a PDB file with a solution given by a set of reference frames
936  @param RFs Reference frames for the elements of the complex
937  @param fn_pdb File to write the solution
938  """
939  log.debug("Writting PDB %s for reference frames %s", fn_pdb, RFs)
940  representation.set_reference_frames(self.components_rbs, RFs)
941  atom.write_pdb(self.assembly, fn_pdb)
942 
943  def write_pdbs_for_reference_frames(self, RFs, fn_base):
944  """
945  Write a separate PDB for each of the elements
946  @param RFs Reference frames for the elements of the complex
947  @param fn_base base string to build the names of the PDBs files
948  """
949  log.debug("Writting PDBs with basename %s", fn_base)
950  representation.set_reference_frames(self.components_rbs, RFs)
951  for i, ch in enumerate(self.assembly.get_children()):
952  atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i)
953 
954  def write_pdb_for_component(self, component_index, ref, fn_pdb):
955  """
956  Write one component of the assembly
957  @param component_index Position of the component in the assembly
958  @param ref Reference frame of the component
959  @param fn_pdb Output PDB
960  """
961  ("Writting PDB %s for component %s", fn_pdb, component_index)
962  self.components_rbs[component_index].set_reference_frame(ref)
963  hierarchy_component = self.assembly.get_child(component_index)
964  atom.write_pdb(hierarchy_component, fn_pdb)
965 
967  """Get a ScoringFunction that includes all restraints."""
969  list(self.restraints.values()))
970 
971  def write_monte_carlo_solution(self, fn_database):
972  """
973  Write the solution of a MonteCarlo run
974  @param fn_database Database file with the solution.
975  The database will contain only one record
976  """
977  total_score = 0
978  rnames = []
979  scores = []
980  for r in self.restraints.values():
981  score = r.evaluate(False)
982  rnames.append(r.get_name())
983  scores.append(score)
984  total_score += score
986  db.create(fn_database, overwrite=True)
987  db.connect(fn_database)
988  db.add_results_table(rnames, self.measure_models)
989  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
990  measures = None
991  if(self.measure_models):
992  measures = measure_model(self.assembly, self.native_assembly,
993  self.components_rbs, self.native_rbs)
994  solution_id = 0
995  db.add_record(solution_id, "", RFs, total_score, scores, measures)
996  db.save_records()
997  db.close()
998 
999  def print_restraints_values(self):
1000  print("Restraints: Name, weight, value, maximum_value")
1001  total_score = 0
1002  for r in self.restraints.values():
1003  score = r.evaluate(False)
1004  # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(),
1005  # score, r.get_maximum_score())
1006  print("%20s %18f %18f" % (r.get_name(), r.get_weight(), score))
1007  total_score += score
1008  print("total_score:", total_score)
1009 
1010 
1011 def anchor_assembly(components_rbs, anchored):
1012  """
1013  "Anchor" a set of rigid bodies, by setting the position of one of them
1014  at the origin (0,0,0).
1015  @param components_rbs Rigid bodies of the components
1016  @param anchored - List of True/False values indicating if the components
1017  of the assembly are anchored. The function sets the FIRST anchored
1018  component in the (0,0,0) coordinate and moves the other
1019  components with the same translation. If all the values are False, the
1020  function does not do anything
1021  """
1022  if True in anchored:
1023  anchored_rb = None
1024  for a, rb in zip(anchored, components_rbs):
1025  if a:
1026  anchored_rb = rb
1027  break
1028  center = anchored_rb.get_coordinates()
1029  log.info("Anchoring assembly at (0,0,0)")
1030  for rb in components_rbs:
1031  T = rb.get_reference_frame().get_transformation_to()
1032  t = T.get_translation()
1033  R = T.get_rotation()
1034  log.debug("%s: Rerefence frame BEFORE %s",
1035  rb.get_name(), rb.get_reference_frame())
1036  T2 = alg.Transformation3D(R, t - center)
1037  rb.set_reference_frame(alg.ReferenceFrame3D(T2))
1038  log.debug("%s Rerefence frame AFTER %s",
1039  rb.get_name(), rb.get_reference_frame())
1040 
1041 
1042 def measure_model(assembly, native_assembly, rbs, native_rbs):
1043  """
1044  Get the drms, cdrms and crmsd for a model
1045  @param assembly Hierarchy for an assembly
1046  @param native_assembly Hierarchy of the native structure
1047  @param rbs Rigid bodies for the components of the assembly
1048  @param native_rbs Rigid bodies for the components of the native
1049  structure
1050  @return cdrms, cdrms, crmsd
1051  - drms - typical drms measure
1052  - cdrms - drms for the centroids of the rigid bodies of the components
1053  of the assembly
1054  - crmsd - rmsd for the centroids
1055  """
1056  log.debug("Measure model")
1057  drms = comparisons.get_drms_for_backbone(assembly, native_assembly)
1058  RFs = [rb.get_reference_frame() for rb in rbs]
1059  vs = [r.get_transformation_to().get_translation() for r in RFs]
1060  nRFs = [rb.get_reference_frame() for rb in native_rbs]
1061  nvs = [r.get_transformation_to().get_translation() for r in nRFs]
1062  cdrms = atom.get_drms(vs, nvs)
1063  crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs)
1064  log.debug("drms %s cdrms %s crmsd %s", drms, cdrms, crmsd)
1065  return [drms, cdrms, crmsd]
1066 
1067 
1068 def get_coordinates(rigid_bodies):
1069  """
1070  Return a list of the coordinates of all the members
1071  of the rigid bodies
1072  """
1073  if len(rigid_bodies) == 0:
1074  raise ValueError(
1075  "get_coordinates: There are no rigid bodies to get "
1076  "coordinates from")
1077  coords = []
1078  for rb in rigid_bodies:
1079  # remove sub-rigid bodies
1080  rb_members = [m for m in rb.get_members()
1081  if not core.RigidBody.get_is_setup(m.get_particle())]
1082  coords.extend([m.get_coordinates() for m in rb_members])
1083  return coords
def write_pdb_for_component
Write one component of the assembly.
def get_assignment_and_RFs
Return a line with texts for an assignment and the the reference frames of the RigidBodies in the sub...
Utility functions to handle restraints.
Definition: restraints.py:1
A harmonic upper bound on the distance between two spheres.
Lower bound harmonic function (non-zero when feature < mean)
Compute the complementarity between two molecules.
def set_rb_states
Add a set of reference frames as possible states for a rigid body.
Apply a score to a fixed number of close pairs from the two sets.
def add_exclusion_filter_table
ExclusionSubsetFilterTable ensures that two particles are not given the same state at the same time...
Restraints using electron microscopy 2D images (class averages).
def get_restraints_names_used
Get the names of the restraints used for a subset.
def write_solutions_database
Write the results of the DOMINO sampling to a SQLite database.
Upper bound harmonic function (non-zero when feature > mean)
Apply the score to all pairs whose spheres are within a distance threshold.
def get_assignment_scores
Returns a list with the values for the restraints on a subset of the components of the assembly...
Utility functions to handle representation.
def anchor_assembly
"Anchor" a set of rigid bodies, by setting the position of one of them at the origin (0...
Utility functions to handle alignments.
Definition: alignments.py:1
Sample best solutions using Domino.
Definition: DominoSampler.h:32
def set_close_pairs_excluded_volume_restraint
Creates an excluded volume restraint between all the components of the coarse assembly.See help for.
def set_xlink_restraint
Set a restraint on the maximum distance between 2 residues.
def get_assignment_text
Get the formatted text for an assignment.
def load_state
Load the positions of the components given by the assignment.
Filter a configuration of the subset using the Model thresholds.
Represent a subset of the particles being optimized.
Definition: Subset.h:33
def get_coordinates
Return a list of the coordinates of all the members of the rigid bodies.
Utility functions to handle IO.
Definition: io.py:1
Create a scoring function on a list of restraints.
A score on the distance between the surfaces of two spheres.
def create_merge_tree
Creates a merge tree from the restraints that are set already.
Management of a model using DOMINO.
Definition: domino_model.py:34
def get_restraint_value_for_assignment
Recover the value of a restraint.
def write_merge_tree
Writes the current merge tree to file.
def set_connectivity_restraint
Set a connectivity restraint between the elements of the assembly.
def write_pdbs_for_reference_frames
Write a separate PDB for each of the elements.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:73
def write_pdb_for_reference_frames
Write a PDB file with a solution given by a set of reference frames.
Do not allow two particles to be in the same state.
def create_coarse_assembly
Simplify the assembly with a coarse representation.
def write_subset
Writes lines describing a subset (vertex of a merge tree)
def get_restraints_for_native
Get values of the restraints for the native structure.
Utility functions to store and retrieve solution information.
Definition: solutions_io.py:1
A lookup based particle refiner.
Definition: TableRefiner.h:21
def set_native_assembly_for_benchmark
Sets the native model for benchmark, by reading the native structure and set the rigid bodies...
def get_scoring_function
Get a ScoringFunction that includes all restraints.
def do_sampling
Do Domino sampling.
def write_pdb_for_configuration
Write a file with a configuration for the model (configuration here means a configuration in DOMINO) ...
def set_complementarity_restraint
Set a restraint for geometric complementarity between 2 components.
def measure_model
Get the drms, cdrms and crmsd for a model.
Fitting atomic structures into a cryo-electron microscopy density map.
Class for managing the results of the experiments.
def set_assembly
Sets an assembly from a single PDB file.
def get_assignment_info
Info related to an assignment.
def read_merge_tree
Read and create a merge tree from a file.
def setup_domino_sampler
Creates a DOMINO sampler and adds the required filter tables.
def align_rigid_bodies_states
Aligns the set of structures considered for DOMINO sampling.
Definition: domino_model.py:77
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Parameters used by Em2DRestraint and ProjectionFinder.
def get_assignments_with_heap
Domino sampling that recovers the assignments for the root of the merge tree, but conserving only the...
def set_pair_score_restraint
Set a pair_score restraint between the coarse representation of two components.
Utility functions for comparisons.
Definition: comparisons.py:1
Class to handle individual particles of a Model object.
Definition: Particle.h:41
def set_em2d_restraint
Set a Restraint computing the em2d score.
def add_branch_and_bound_assignments_table
BranchAndBoundAssignmentsTable enumerate states based on provided filtered using the provided the sub...
def set_not_optimized
Set a part of the model as not optimized (it does not move during the model optimization) ...
Store a set of k top scoring assignments.
def add_restraint
Adds a restraint to the model.
Definition: domino_model.py:55
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
def write_pdb_for_assignment
Write the solution in the assignment.
Divide-and-conquer inferential optimization in discrete space.
def set_assembly_components
Sets the components of an assembly, each one given as a separate PDB file.
def add_restraint_score_filter_table
Add a RestraintScoreSubsetFilterTable to DOMINO that allows to reject assignments that have score wor...
def write_monte_carlo_solution
Write the solution of a MonteCarlo run.