IMP  2.4.0
The Integrative Modeling Platform
domino_model.py
1 """@namespace IMP.EMageFit.domino_model
2  Classes to manage a model using DOMINO.
3 """
4 
5 import sys
6 import time
7 import csv
8 import logging
9 log = logging.getLogger("DominoModel")
10 
11 import IMP
12 import IMP.domino as domino
13 import IMP.core as core
14 import IMP.container as container
15 import IMP.display as display
16 import IMP.atom as atom
17 import IMP.algebra as alg
18 import IMP.em2d as em2d
19 import IMP.base
20 import IMP.multifit as multifit
21 
22 import IMP.EMageFit.imp_general.comparisons as comparisons
23 import IMP.EMageFit.imp_general.alignments as alignments
24 import IMP.EMageFit.imp_general.io as io
25 import IMP.EMageFit.imp_general.representation as representation
26 
27 import IMP.EMageFit.restraints as restraints
28 import IMP.EMageFit.buildxlinks as buildxlinks
29 import IMP.EMageFit.solutions_io as solutions_io
30 import IMP.EMageFit.csv_related as csv_related
31 
32 
33 field_delim = "," # separate fields in the output text file
34 unit_delim = "/" # separate units within a field (eg, reference frames).
35  # It is used in output text files and in databse files
36 
37 
39 
40  """
41  Management of a model using DOMINO
42  """
43 
44  def __init__(self, name="my model"):
45  self.model = IMP.kernel.Model()
46  self.model.set_name(name)
47  self.configuration_sampling_done = False
48  self.assignments_sampling_done = False
49  self.rb_states_table = domino.ParticleStatesTable()
50  self.merge_tree = None
51  # measure_models is True only when doing benchmark
52  self.measure_models = False
53  # The first time that create_coarse is found and its value is True
54  # a coarse version of the assembly is built
55  self.create_coarse = True
56  self.restraints = dict()
57  self.xlinks_dict = buildxlinks.XlinksDict()
58 
59  def add_restraint(self, r, name, weight, max_score=False):
60  """
61  Adds a restraint to the model
62  @param r An IMP.kernel.Restraint object
63  @param name Name for the restraint
64  @param weight Weight for the restraint
65  @param max_score Maximum score allowed for the restraint. If
66  max_score is False, there is no limit for the value of the restraint
67  """
68  log.info("Adding restraint %s weight %s max_score %s",
69  name, weight, max_score)
70  if "-" in name:
71  raise ValueError("SQL database does not accept restraint names "
72  "containing -")
73  r.set_name(name)
74  if max_score is not None and max_score:
75  r.set_maximum_score(max_score)
76  r.set_weight(weight)
77  self.restraints[name] = r
78  self.model.add_restraint(r)
79 
81  """
82  Aligns the set of structures considered for DOMINO sampling.
83  The function:
84  1) reads the possible reference frames that the
85  rb_states_table stores for each rigid body. This table
86  must be filled before using this function. Usually it is
87  filled with the results from a previous Monte Carlo sampling.
88  If the solutions from Monte Carlo seem to have the same structure
89  but they are not aligned to each other, this function can
90  help setting better starting positions to use with DOMINO.
91  2) Gets the first state for each of the rigid bodies and sets
92  a reference structure using such states.
93  3) Aligns all the rest of the structures respect to the reference
94  4) Replaces the values of the reference frames stored in the
95  rb_states_table with the new values obtained from the alignments.
96  It does it for all states of a rigid body.
97  @note: If this function is applied, the parameters "anchor" and "fixed"
98  are ignored, as they are superseded by the use of the aligments
99  calculated here.
100  """
101  log.debug("Align the configurations read from the database before "
102  "running DOMINO")
103  rb_states = []
104  n_states = []
105  for rb in self.components_rbs:
106  ps = self.rb_states_table.get_particle_states(rb)
107  n = ps.get_number_of_particle_states()
108  if n == 0:
109  raise ValueError(
110  "There are no particle states for %s" %
111  rb.get_name())
112  n_states.append(n)
113  rb_states.append(ps)
114  # coordinates of the first configuration (there is at least one for all
115  # the rbs)
116  for rb, states in zip(self.components_rbs, rb_states):
117  states.load_particle_state(0, rb)
118  reference_coords = get_coordinates(self.components_rbs)
119  aligned_transformations = [[] for rb in self.components_rbs]
120  for i in range(1, max(n_states)):
121  log.debug("Aligning rigid bodies configuration %s" % i)
122  # load the configuration
123  for j in range(len(self.components_rbs)):
124  # if there are no more particle states for this rigid body, use
125  # the last
126  state_index = min(i, n_states[j] - 1)
127  rb_states[j].load_particle_state(
128  state_index,
129  self.components_rbs[j])
130  coords = get_coordinates(self.components_rbs)
131  T = alg.get_transformation_aligning_first_to_second(
132  coords, reference_coords)
133  for j, rb in enumerate(self.components_rbs):
134  t = rb.get_reference_frame().get_transformation_to()
135  new_t = alg.compose(T, t)
136  aligned_transformations[j].append(new_t)
137  # set the new possible transformations
138  for i, rb in enumerate(self.components_rbs):
139  self.set_rb_states(rb, aligned_transformations[i])
140 
141  def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2,
142  distance, weight, stddev, max_score=False):
143  """
144  Set a restraint on the maximum distance between 2 residues
145  @param id1 Name of the first component
146  @param chain1
147  @param residue1 Residue number for the aminoacid in the first
148  component.The number is the number in the PDB file, not the
149  number relative to the beginning of the chain
150  @param id2 Name of the second component
151  @param chain2
152  @param residue2 Residue number for the aminoacid in the second
153  component.
154  @param distance Maximum distance tolerated
155  @param weight Weight of the restraint
156  @param stddev Standard deviation used to approximate the
157  HarmonicUpperBound function to a Gaussian
158  @param max_score See help for add_restraint(). If none is given,
159  the maximum score is set to allow a maximum distance of 10 Angstrom
160  greater than the parameter "distance".
161  """
162  xlink = buildxlinks.Xlink(
163  id1,
164  chain1,
165  residue1,
166  id2,
167  chain2,
168  residue2,
169  distance)
170  log.info("Setting cross-linking restraint ")
171  log.info("%s", xlink.show())
172  self.xlinks_dict.add(xlink)
173  # setup restraint
174  A = representation.get_component(self.assembly, xlink.first_id)
175  s1 = IMP.atom.Selection(A, chain=xlink.first_chain,
176  residue_index=xlink.first_residue)
177  p1 = s1.get_selected_particles()[0]
178  B = representation.get_component(self.assembly, xlink.second_id)
179  s2 = IMP.atom.Selection(B, chain=xlink.second_chain,
180  residue_index=xlink.second_residue)
181  p2 = s2.get_selected_particles()[0]
182  k = core.Harmonic.get_k_from_standard_deviation(stddev)
183  score = core.HarmonicUpperBound(xlink.distance, k)
184  pair_score = IMP.core.DistancePairScore(score)
185  r = IMP.core.PairRestraint(pair_score, IMP.kernel.ParticlePair(p1, p2))
186  if not max_score:
187  error_distance_allowed = 100
188  max_score = weight * \
189  score.evaluate(distance + error_distance_allowed)
190  log.info("%s, max_score %s", xlink.show(), max_score)
191  self.add_restraint(r, xlink.get_name(), weight, max_score)
192 
193  def set_complementarity_restraint(self, name1, name2, rname,
194  max_sep_distance, max_penetration,
195  weight, max_score=1e10):
196  """
197  Set a restraint for geometric complementarity between 2 components
198  @param name1 name of
199  @param name2 - The restraint is applied to this components
200  @param rname - The name given to the restraint
201  @param max_sep_distance - maximum distance between molecules
202  tolerated by the restraint
203  @param max_penetration - Maximum penetration allowd (angstrom)
204  @param weight
205  @param max_score
206  """
207  log.info("Setting geometric complementarity restraint %s: %s - %s",
208  rname, name1, name2)
209  A = representation.get_component(self.assembly, name1)
210  B = representation.get_component(self.assembly, name2)
211  restraint = multifit.ComplementarityRestraint(atom.get_leaves(A),
212  atom.get_leaves(B), rname)
213  restraint.set_maximum_separation(max_sep_distance)
214  restraint.set_maximum_penetration(max_penetration)
215  log.debug("Maximum separation: %s Maximum penetration score: %s",
216  max_sep_distance, max_penetration)
217  self.add_restraint(restraint, rname, weight, max_score)
218 
219  def create_coarse_assembly(self, n_residues, write=False):
220  """
221  Simplify the assembly with a coarse representation
222  @param n_residues Number of residues used for a coarse particle
223  @param write If True, write the coarse assembly to a
224  format that Chimera can display
225  """
226  if self.create_coarse:
227  log.info("Creating simplified assembly")
228  self.coarse_assembly = \
229  representation.create_simplified_assembly(self.assembly,
230  self.components_rbs, n_residues)
231  if write:
232  io.write_hierarchy_to_chimera(self.assembly, "coarse_assembly.py")
233  self.create_coarse = False
234 
235  def do_sampling(self, mode="assignments_heap_container", params=None):
236  """
237  Do Domino sampling
238  @param mode The mode used to recover solutions from domino.
239  It can have
240  the value "configuration", "assignments", or
241  "assignments_heap_container". The mode
242  "configuration" recovers all possible configurations
243  (takes a while to generate them).
244  The mode "assignments" only provides the assignments
245  (state numbers) of the solutions. It is faster but requires
246  generating the solutions afterwards
247  The mode "assignments_heap_container" selects the best solutions
248  after ecah merging in DOMINO, discarding the rest.
249  In practice I used the mode "assignments_heap_container"
250  @param params
251  """
252  t0 = time.time()
253  if mode == "configuration":
254  self.configuration_set = self.sampler.create_sample()
255  tf = time.time()
256  log.info("found %s configurations. Time %s",
257  self.configuration_set.get_number_of_configurations(),
258  tf - t0)
259  self.configuration_sampling_done = True
260  elif mode == "assignments":
261  subset = self.rb_states_table.get_subset()
262  log.info(
263  "Do sampling with assignments. Subset has %s elements: %s",
264  len(subset), subset)
265  self.solution_assignments = \
266  self.sampler.get_sample_assignments(subset)
267  tf = time.time()
268  log.info("found %s assignments. Time %s",
269  len(self.solution_assignments), tf - t0)
270  self.assignments_sampling_done = True
271  elif mode == "assignments_heap_container":
272  subset = self.rb_states_table.get_subset()
273  log.info("DOMINO sampling an assignments_heap_container. "
274  "Subset has %s elements: %s", len(subset), subset)
275  # last vertex is the root of the merge tree
276  root = self.merge_tree.get_vertices()[-1]
277  container = self.get_assignments_with_heap(root,
278  params.heap_solutions)
279  self.solution_assignments = container.get_assignments()
280  tf = time.time()
281  log.info("found %s assignments. Time %s",
282  len(self.solution_assignments), tf - t0)
283  self.assignments_sampling_done = True
284  else:
285  raise ValueError("Requested wrong mode for the DOMINO sampling")
286 
287  def get_assignment_text(self, assignment, subset):
288  """
289  Get the formatted text for an assignment
290  @param subset Subset of components of the assembly
291  @param assignment Assignment class with the states for the subset
292  @note: The order in assignment and subset is not always the order
293  of the rigid bodies in self.components_rbs. The function reorders
294  the terms in the assignment so there is correspondence.
295  """
296  ordered_assignment = []
297  for rb in self.components_rbs:
298  for i, particle in enumerate(subset):
299  if rb.get_particle() == particle:
300  ordered_assignment.append(assignment[i])
301  text = "|".join([str(i) for i in ordered_assignment])
302  return text
303 
304  def get_assignment_and_RFs(self, assignment, subset):
305  """
306  Return a line with texts for an assignment and the
307  the reference frames of the RigidBodies in the subset.
308  @param subset Subset of components of the assembly
309  @param assignment Assignment class with the states for the subset
310  @note: see the get_assignment_assignment_text() note.
311  """
312  ordered_assignment = []
313  RFs = []
314  for rb in self.components_rbs:
315  for i, particle in enumerate(subset):
316  if rb.get_particle() == particle:
317  j = assignment[i]
318  ordered_assignment.append(j)
319  pstates = self.rb_states_table.get_particle_states(rb)
320  pstates.load_particle_state(j, rb.get_particle())
321  RFs.append(rb.get_reference_frame())
322  RFs_text = unit_delim.join(
323  [io.ReferenceFrameToText(ref).get_text() for ref in RFs])
324  assignment_numbers = "|".join([str(i) for i in ordered_assignment])
325  return [assignment_numbers, RFs_text]
326 
327  def get_assignment_info(self, assignment, subset):
328  """
329  Info related to an assignment. Returns a list with text for the
330  assignment and all the scores for the restraints
331  @param subset Subset of components of the assembly
332  @param assignment Assignment class with the states for the subset
333  """
334  text = self.get_assignment_and_RFs(assignment, subset)
335  scores, total_score = self.get_assignment_scores(assignment, subset)
336  info = text + [total_score] + scores
337  return info
338 
339  def get_assignment_scores(self, assignment, subset):
340  """
341  Returns a list with the values for the restraints on a subset of
342  the components of the assembly
343  @param subset Subset of components of the assembly
344  @param assignment Assignment class with the states for the subset
345  """
346  restraints_to_evaluate = \
347  self.restraint_cache.get_restraints(subset, [])
348  scores = [self.restraint_cache.get_score(r, subset, assignment)
349  for r in restraints_to_evaluate]
350  total_score = sum(scores)
351  return scores, total_score
352 
353  def load_state(self, assignment):
354  """
355  Load the positions of the components given by the assignment
356  @param assignment Assignment class
357  """
358  subset = self.rb_states_table.get_subset()
359  domino.load_particle_states(subset, assignment, self.rb_states_table)
360 
361  def get_assignments_with_heap(self, vertex, k=0):
362  """
363  Domino sampling that recovers the assignments for the root of the
364  merge tree, but
365  conserving only the best k scores for each vertex of the tree.
366  @param[in] vertex Vertex with the root of the current merge tree. This
367  function is recursive.
368  @param[in] k
369  """
370  if(self.sampler.get_number_of_subset_filter_tables() == 0):
371  raise ValueError("No subset filter tables")
372  if(self.merge_tree is None):
373  raise ValueError("No merge tree")
374  # In the merge tree, the names of the vertices are the subsets.
375  # The type of the vertex name is a domino.Subset
376  subset = self.merge_tree.get_vertex_name(vertex)
377  log.info("computing assignments for vertex %s", subset)
378  t0 = time.time()
379  assignments_container = domino.HeapAssignmentContainer(subset, k,
380  self.restraint_cache, "my_heap_assignments_container")
381  neighbors = self.merge_tree.get_out_neighbors(vertex)
382  if len(neighbors) == 0: # A leaf
383  # Fill the container with the assignments for the leaf
384  self.sampler.load_vertex_assignments(vertex, assignments_container)
385  else:
386  if neighbors[0] > neighbors[1]:
387  # get_vertex_assignments() methods in domino
388  # expects the children in sorted order
389  neighbors = [neighbors[1], neighbors[0]]
390  # recurse on the two children
391  container_child0 = self.get_assignments_with_heap(neighbors[0], k)
392  container_child1 = self.get_assignments_with_heap(neighbors[1], k)
393  self.sampler.load_vertex_assignments(vertex,
394  container_child0,
395  container_child1,
396  assignments_container)
397  tf = time.time() - t0
398  log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset,
399  assignments_container.get_number_of_assignments(), tf)
400  return assignments_container
401 
402  def get_restraint_value_for_assignment(self, assignment, name):
403  """
404  Recover the value of a restraint
405  @param name of the restraint
406  @param assignment Assignment class containing the assignment for
407  the solution desired
408  """
409  if not self.assignments_sampling_done:
410  raise ValueError("DominoModel: sampling not done")
411  log.debug("Computing restraint value for assignment %s", assignment)
412  self.load_state(assignment)
413  for rb in self.components_rbs:
414  log.debug("rb - %s",
415  rb.get_reference_frame().get_transformation_to())
416  val = self.restraints[name].evaulate(False)
417  log.debug("restraint %s = %s", restraint.get_name(), val)
418  return val
419 
421  """
422  Sets the native model for benchmark, by reading the native
423  structure and set the rigid bodies.
424  """
425  self.measure_models = True
426  self.native_model = IMP.kernel.Model()
427  if hasattr(params.benchmark, "fn_pdb_native"):
428  self.native_assembly = \
429  representation.create_assembly_from_pdb(self.native_model,
430  params.benchmark.fn_pdb_native, params.names)
431  elif hasattr(params.benchmark, "fn_pdbs_native"):
432  self.native_assembly = \
433  representation.create_assembly(self.native_model,
434  params.benchmark.fn_pdbs_native, params.names)
435  else:
436  raise ValueError("set_native_assembly_for_benchmark: Requested "
437  "to set a native assembly for benchmark but did not provide "
438  "its pdb, or the pdbs of the components")
439  self.native_rbs = representation.create_rigid_bodies(
440  self.native_assembly)
441  anchor_assembly(self.native_rbs, params.anchor)
442 
443  def set_rb_states(self, rb, transformations):
444  """
445  Add a set of reference frames as possible states for a rigid body
446  @param rb The rigid body
447  @param transformations Transformations are used to build the
448  reference frames for the rigid body.
449  """
450  log.info("Set rigid body states for %s", rb.get_name())
451  RFs = [alg.ReferenceFrame3D(T) for T in transformations]
452  for st in RFs:
453  log.debug("%s", st)
454  rb_states = domino.RigidBodyStates(RFs)
455  self.rb_states_table.set_particle_states(rb, rb_states)
456  st = self.rb_states_table.get_particle_states(rb)
457  log.info("RigidBodyStates created %s",
458  st.get_number_of_particle_states())
459 
461  """
462  Add a RestraintScoreSubsetFilterTable to DOMINO that allows to
463  reject assignments that have score worse than the thresholds for
464  the restraints
465  """
466  log.info("Adding RestraintScoreSubsetFilterTable")
467  # Restraint Cache is a centralized container of the restraints and
468  # their scores
469  self.restraint_cache = domino.RestraintCache(self.rb_states_table)
470  self.restraint_cache.add_restraints(self.model.get_restraints())
471  rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache)
472  rssft.set_name('myRestraintScoreSubsetFilterTable')
473  self.sampler.add_subset_filter_table(rssft)
474 
475  def set_assembly_components(self, fn_pdbs, names):
476  """
477  Sets the components of an assembly, each one given as a separate
478  PDB file.
479  @param fn_pdbs List with the names of the pdb files
480  @param names List with the names of the components of the assembly.
481  """
482  if len(fn_pdbs) != len(names):
483  raise ValueError("Each PDB file needs a name")
484  self.names = names
485  self.assembly = representation.create_assembly(self.model, fn_pdbs,
486  names)
487  self.components_rbs = representation.create_rigid_bodies(self.assembly)
488  self.not_optimized_rbs = []
489 
490  def set_assembly(self, fn_pdb, names):
491  """
492  Sets an assembly from a single PDB file. The function assumes that
493  the components of the assembly are the chains of the PDB file.
494  @param fn_pdb PDB with the file for the asembly
495  @param names Names for each of the components (chains)
496  of the asembly
497  """
498  self.assembly = representation.create_assembly_from_pdb(self.model,
499  fn_pdb,
500  names)
501  self.components_rbs = representation.create_rigid_bodies(self.assembly)
502  self.not_optimized_rbs = []
503 
505  """
506  BranchAndBoundAssignmentsTable enumerate states based on provided
507  filtered using the provided the subset filter tables
508  """
509  log.info("Setting BranchAndBoundAssignmentsTable")
510  fts = []
511  for i in range(self.sampler.get_number_of_subset_filter_tables()):
512  ft = self.sampler.get_subset_filter_table(i)
513  fts.append(ft)
514  at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts)
515  self.sampler.set_assignments_table(at)
516  self.assignments_table = at
517 
519  """
520  ExclusionSubsetFilterTable ensures that two particles are not given
521  the same state at the same time
522  """
523  log.info("Setting ExclusionSubsetFilterTable")
524  ft = domino.ExclusionSubsetFilterTable(self.rb_states_table)
525  self.sampler.add_subset_filter_table(ft)
526 
528  """
529  Creates a DOMINO sampler and adds the required filter tables
530  """
531  if self.merge_tree is None:
532  raise ValueError("Merge tree for DOMINO not set. It is required "
533  "to setup the sampler")
534  log.info("Domino sampler")
535  self.sampler = domino.DominoSampler(self.model, self.rb_states_table)
536  self.sampler.set_log_level(IMP.base.TERSE)
537  self.sampler.set_merge_tree(self.merge_tree)
541 
542  def read_merge_tree(self, fn):
543  """
544  Read and create a merge tree from a file.
545  The format of the file is the format written by write merge_tree()
546  It does not matter the order of the indices in the file, as
547  they are sorted by this function.
548  The only condition is that the index for the vertex that is the
549  root of the tree MUST be the largest. This is guaranteed when
550  creating a merge tree with create_merge_tree()
551  @param fn File containing the merge tree
552  """
553  log.info("Reading merge tree from %s", fn)
554  ps = self.rb_states_table.get_particles()
555  log.debug("particles")
556  for p in ps:
557  log.debug("%s", p.get_name())
558  rows = csv_related.read_csv(fn, delimiter=field_delim)
559  for i in range(len(rows)):
560  row = rows[i]
561  rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]]
562  # Sort rows by vertice index
563  rows.sort()
564  subsets = []
565  # build subsets from the rows text
566  for row in rows:
567  names = row[3].split(unit_delim)
568  log.debug("row %s names %s", row, names)
569  # get particles in the subset
570  particles = []
571  for name in names:
572  l = [p for p in ps if p.get_name() == name]
573  if (len(l) > 1):
574  ValueError("More than one particle with same name" % names)
575  particles.extend(l)
576  s = domino.Subset(particles)
577  subset_names = [p.get_name() for p in particles]
578  log.debug("Merge tree Subset %s. Particles %s ", s, subset_names)
579  subsets.append(s)
580  # The vertex with the largest index is the root.
581  # trick: get the merge tree object from a tree with only one node ...
582  jt = domino.SubsetGraph()
583  jt.add_vertex(subsets[0])
584  mt = domino.get_merge_tree(jt)
585  # ... and add the rest of the vertices
586  for i in range(1, len(subsets)):
587  mt.add_vertex(subsets[i]) # the name of the vertex is a subset
588  # set edges
589  for row in rows:
590  vid = row[0]
591  child_left = row[1]
592  child_right = row[2]
593  if child_left != -1:
594  mt.add_edge(vid, child_left)
595  if child_right != -1:
596  mt.add_edge(vid, child_right)
597  self.merge_tree = mt
598  log.info("%s", self.merge_tree.show_graphviz())
599 
600  def create_merge_tree(self):
601  """
602  Creates a merge tree from the restraints that are set already
603  """
604  rs = self.model.get_restraints()
605  ig = domino.get_interaction_graph(rs, self.rb_states_table)
606 # pruned_dep = IMP.get_pruned_dependency_graph(self.model)
607 # IMP.base.show_graphviz(pruned_dep)
608 # IMP.base.show_graphviz(ig)
609  jt = domino.get_junction_tree(ig)
610 # IMP.base.show_graphviz(jt)
611  self.merge_tree = domino.get_balanced_merge_tree(jt)
612 # IMP.base.show_graphviz(self.merge_tree)
613  log.info("Balanced merge tree created")
614  log.info("%s", self.merge_tree.show_graphviz())
615 
616  def write_merge_tree(self, fn):
617  """
618  Writes the current merge tree to file. The format is:
619  parent | child_left | child_right | labels
620  @param fn File for storing the merge tree
621  """
622  log.info("Writing merge to %s", fn)
623  if self.merge_tree is None:
624  raise ValueError("No merge tree")
625  f = open(fn, "w")
626  f.write(
627  "# Vertex index | Child0 | Child1 | label (names of the particles)\n")
628  w = csv.writer(f, delimiter=field_delim)
629  root = self.merge_tree.get_vertices()[-1]
630  self.write_subset(root, w)
631  f.close()
632 
633  def write_subset(self, v, w):
634  """
635  Writes lines describing a subset (vertex of a merge tree)
636  @param w A csv.writer
637  @param v Vertex index
638  """
639  no_child_index = -1
640  subset = self.merge_tree.get_vertex_name(v)
641  names = [p.get_name() for p in subset]
642  reps = [x for x in names if names.count(x) > 1] # repeated names
643  if(len(reps)):
644  raise ValueError("The names of the "
645  "particles in the subset %s are not unique" % subset)
646  names = unit_delim.join(names)
647  neighbors = self.merge_tree.get_out_neighbors(v)
648  if len(neighbors) == 0: # A leaf
649  # Fill the container with the assignments for the leaf
650  w.writerow([v, no_child_index, no_child_index, names])
651  else:
652  if neighbors[0] > neighbors[1]:
653  neighbors = [neighbors[1], neighbors[0]]
654  w.writerow([v, neighbors[0], neighbors[1], names])
655  self.write_subset(neighbors[0], w)
656  self.write_subset(neighbors[1], w)
657 
658  def set_connectivity_restraint(self, names, rname,
659  distance=10,
660  weight=1.0, max_score=None, n_pairs=1,
661  stddev=2):
662  """
663  Set a connectivity restraint between the elements of the assembly
664  @param names List with all the elements to be connected
665  @param distance Maximum distance tolerated by the restraint
666  @param rname Name for the restraint
667  @param weight Weight for the restraint
668  @param max_score Maximum score for the restraint
669  @param n_pairs Number of pairs of particles used in the
670  KClosePairScore of the connecitivity restraint
671  @param stddev Standard deviation used to approximate the
672  HarmonicUpperBound function to a Gaussian
673  """
674  components = []
675  for name in names:
676  c = representation.get_component(self.coarse_assembly, name)
677  components.append(c)
678  log.info("Connectivity restraint for %s", components)
679  spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev)
680  if max_score is None:
681  max_score = weight * spring_constant * n_pairs * (stddev) ** 2
682  r = restraints.get_connectivity_restraint(
683  components, distance, n_pairs,
684  spring_constant)
685  self.add_restraint(r, rname, weight, max_score)
686 
687  def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution,
688  n_projections, weight, max_score=False,
689  mode="fast", n_optimized=1):
690  """
691  Set a Restraint computing the em2d score.
692  @param name Name for the restraint
693  @param fn_images_sel Selection file with the names of the images
694  used for the restraint
695  @param pixel_size - pixel size in the images
696  @param resolution - resolution used to downsample the projections
697  of the model when projecting
698  @param weight Weight of the restraint
699  @param max_score - Maximum value tolerated for the restraint
700  @param n_projections - Number of projections to generate
701  when projecting the model to do the coarse alignments
702  @param mode - Mode used for computing the restraints.
703  @param n_optimized - number of results from the coarse
704  alignment that are refined with the Simplex algorithm
705  to get a more accurate value for the restraint
706  """
707  log.info("Adding a em2D restraint: %s", fn_images_sel)
708  restraint_params = em2d.Em2DRestraintParameters(pixel_size,
709  resolution,
710  n_projections)
711  r = restraints.get_em2d_restraint(self.assembly,
712  fn_images_sel,
713  restraint_params,
714  mode, n_optimized)
715  self.add_restraint(r, name, weight, max_score)
716 
717  def set_not_optimized(self, name):
718  """
719  Set a part of the model as not optimized (it does not move during
720  the model optimization)
721  @param name of the component to optimized
722  """
723  if name not in self.names:
724  raise ValueError("DominoModel: There is not component "
725  "in the assembly with this name")
726  rb_name = representation.get_rb_name(name)
727  self.not_optimized_rbs.append(rb_name)
728 
729  def set_close_pairs_excluded_volume_restraint(self, distance=10,
730  weight=1.0, ratio=0.05, stddev=2,
731  max_score=False):
732  """
733  Creates an excluded volume restraint between all the components
734  of the coarse assembly.See help for
735  @param distance Maximum distance tolerated between particles
736  @param weight Weight for the restraint
737  @param stddev
738  @param max_score Maximum value for the restraint. If the parameter
739  is None, an automatic value is computed (using the ratio).
740  @param ratio Fraction of the number of possible pairs of
741  particles that are tolerated to overlap. The maximum
742  score is modified according to this ratio.
743  I have found that ratios of 0.05-0.1 work well allowing for
744  some interpenetration
745  @param stddev Standard deviation used to approximate the
746  HarmonicUpperBound function to a Gaussian
747  """
748  k = core.Harmonic.get_k_from_standard_deviation(stddev)
749  for i, c1 in enumerate(self.coarse_assembly.get_children()):
750  for j, c2 in enumerate(self.coarse_assembly.get_children()):
751  if j > i:
752  name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name())
753 
754  ls1 = atom.get_leaves(c1)
755  ls2 = atom.get_leaves(c2)
756  possible_pairs = len(ls1) * len(ls2)
757  n_pairs = possible_pairs * ratio
758 
759  marker1 = IMP.kernel.Particle(
760  self.model, "marker1 " + name)
761  marker2 = IMP.kernel.Particle(
762  self.model, "marker2 " + name)
763  table_refiner = core.TableRefiner()
764  table_refiner.add_particle(marker1, ls1)
765  table_refiner.add_particle(marker2, ls2)
766  score = core.HarmonicLowerBound(distance, k)
767  pair_score = core.SphereDistancePairScore(score)
768  close_pair_score = core.ClosePairsPairScore(pair_score,
769  table_refiner,
770  distance)
771  r = core.PairRestraint(close_pair_score,
772  IMP.kernel.ParticlePair(marker1, marker2))
773 
774  if not max_score:
775  minimum_distance_allowed = 0
776  max_score = weight * n_pairs * \
777  score.evaluate(minimum_distance_allowed)
778  log.debug("Setting close_pairs_excluded_volume_restraint(), "
779  "max_score %s", max_score)
780  self.add_restraint(r, name, weight, max_score)
781 
782  def set_pair_score_restraint(self, name1, name2,
783  restraint_name, distance=10,
784  weight=1.0, n_pairs=1, stddev=2, max_score=None):
785  """
786  Set a pair_score restraint between the coarse representation
787  of two components
788  @param name1 Name of the first component
789  @param name2 Name of the second component
790  @param restraint_name Name for the restraint
791  @param distance Maximum distance tolerated between particles
792  @param weight Weight of the restraint
793  @param n_pairs
794  @param max_score Maximum value tolerated for the restraint
795  @param stddev Standard deviation used to approximate the
796  HarmonicUpperBound function to a Gaussian
797  """
798  table_refiner = core.TableRefiner()
799 
800  # The particles in A are attached to a marker particle via a refiner.
801  # When the refiner gets a request for marker1, it returns the attached
802  # particles
803  A = representation.get_component(self.coarse_assembly, name1)
804  marker1 = IMP.kernel.Particle(self.model, "marker1 " + restraint_name)
805  table_refiner.add_particle(marker1, atom.get_leaves(A))
806  # same for B
807  B = representation.get_component(self.coarse_assembly, name2)
808  marker2 = IMP.kernel.Particle(self.model, "marker2 " + restraint_name)
809  table_refiner.add_particle(marker2, atom.get_leaves(B))
810 
811  k = core.Harmonic.get_k_from_standard_deviation(stddev)
813  # The score is set for the n_pairs closest pairs of particles
814  pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs)
815  # When KClosePairsPairScore is called, the refiner will provide the
816  # particles for A and B
817  if not max_score:
818  # Build a maximum score based on the function type that is used,
819  # an HarmonicUpperBound
820  temp_score = core.HarmonicUpperBound(distance, k)
821  error_distance_allowed = 10
822  max_score = weight * n_pairs * \
823  temp_score.evaluate(distance + error_distance_allowed)
824 
825  log.info("Setting pair score restraint for %s %s. k = %s, max_score "
826  "= %s, stddev %s", name1, name2, k, max_score, stddev)
827  r = core.PairRestraint(
828  pair_score,
830  marker1,
831  marker2))
832  self.add_restraint(r, restraint_name, weight, max_score)
833 
834  def write_solutions_database(self, fn_database, max_number=None):
835  """
836  Write the results of the DOMINO sampling to a SQLite database.
837  @param fn_database
838  @param max_number Maximum number of results to write
839  """
840  log.info("Creating the database of solutions")
841  if self.measure_models and not hasattr(self, "native_model"):
842  raise ValueError("The native model has not been set")
844  db.create(fn_database, overwrite=True)
845  db.connect(fn_database)
846  subset = self.rb_states_table.get_subset()
847  restraints_names = self.get_restraints_names_used(subset)
848  db.add_results_table(restraints_names, self.measure_models)
849  n = len(self.solution_assignments)
850  if max_number is not None:
851  n = min(n, max_number)
852  for sol_id, assignment in enumerate(self.solution_assignments[0:n]):
853  txt = self.get_assignment_text(assignment, subset)
854  # load the positions of the components in the state
855  self.load_state(assignment)
856  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
857  scores, total_score = self.get_assignment_scores(
858  assignment, subset)
859  measures = None
860  if(self.measure_models):
861  measures = measure_model(self.assembly, self.native_assembly,
862  self.components_rbs, self.native_rbs)
863  db.add_record(sol_id, txt, RFs, total_score, scores, measures)
864  # Add native state if doing benchmarking
865  if self.measure_models:
866  assignment = "native"
867  RFs = [rb.get_reference_frame() for rb in self.native_rbs]
868  scores, total_score = self.get_restraints_for_native(
869  restraints_names)
870  db.add_native_record(assignment, RFs, total_score, scores)
871  db.save_records()
872  db.close()
873 
874  def get_restraints_names_used(self, subset):
875  """ Get the names of the restraints used for a subset
876  @param subset Subset of components of the assembly
877  """
878  return [r.get_name() for r
879  in self.restraint_cache.get_restraints(subset, [])]
880 
881  def get_restraints_for_native(self, restraints_names):
882  """
883  Get values of the restraints for the native structure
884  @param restraints_names Names of the restraints requested
885  @return a list with the values of all scores, and the total score
886  """
887  saved = [rb.get_reference_frame() for rb in self.components_rbs]
888  # Set the state of the model to native
889  for rb, rn in zip(self.components_rbs, self.native_rbs):
890  # remove sub-rigid bodies
891  rb_members = [m for m in rb.get_members()
892  if not core.RigidBody.get_is_setup(m.get_particle())]
893  rn_members = [m for m in rn.get_members()
894  if not core.RigidBody.get_is_setup(m.get_particle())]
895  rb_coords = [m.get_coordinates() for m in rb_members]
896  rn_coords = [m.get_coordinates() for m in rn_members]
897 
898  # align and put rb in the position of rn
899  if len(rn_coords) != len(rb_coords):
900  raise ValueError("Mismatch in the number of members. "
901  "Reference %d Aligned %d " % (len(rn_coords), len(rb_coords)))
902  T = alg.get_transformation_aligning_first_to_second(rb_coords,
903  rn_coords)
904  t = rb.get_reference_frame().get_transformation_to()
905  new_t = alg.compose(T, t)
906  rb.set_reference_frame(alg.ReferenceFrame3D(new_t))
907  scores = []
908  for name in restraints_names:
909  scores.append(self.restraints[name].evaluate(False))
910  total_score = sum(scores)
911  representation.set_reference_frames(self.components_rbs, saved)
912  return scores, total_score
913 
914  def write_pdb_for_assignment(self, assignment, fn_pdb):
915  """
916  Write the solution in the assignment
917  @param assignment Assignment class with the states for the subset
918  @param fn_pdb File to write the model
919  """
920  if not self.assignments_sampling_done:
921  raise ValueError("DominoModel: sampling not done")
922  log.info("Writting PDB %s for assignment %s", fn_pdb, assignment)
923  self.load_state(assignment)
924  atom.write_pdb(self.assembly, fn_pdb)
925 
926  def write_pdb_for_configuration(self, n, fn_pdb):
927  """
928  Write a file with a configuration for the model (configuration
929  here means a configuration in DOMINO)
930  @param n Index of the configuration desired.
931  @param fn_pdb
932  """
933  if not self.configuration_sampling_done:
934  raise ValueError("DominoModel: sampling not done")
935  log.debug("Writting PDB %s for configuration %s", fn_pdb, n)
936  configuration_set.load_configuration(n)
937  atom.write_pdb(self.assembly, fn_pdb)
938 
939  def write_pdb_for_reference_frames(self, RFs, fn_pdb):
940  """
941  Write a PDB file with a solution given by a set of reference frames
942  @param RFs Reference frames for the elements of the complex
943  @param fn_pdb File to write the solution
944  """
945  log.debug("Writting PDB %s for reference frames %s", fn_pdb, RFs)
946  representation.set_reference_frames(self.components_rbs, RFs)
947  atom.write_pdb(self.assembly, fn_pdb)
948 
949  def write_pdbs_for_reference_frames(self, RFs, fn_base):
950  """
951  Write a separate PDB for each of the elements
952  @param RFs Reference frames for the elements of the complex
953  @param fn_base base string to buid the names of the PDBs files
954  """
955  log.debug("Writting PDBs with basename %s", fn_base)
956  representation.set_reference_frames(self.components_rbs, RFs)
957  for i, ch in enumerate(self.assembly.get_children()):
958  atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i)
959 
960  def write_pdb_for_component(self, component_index, ref, fn_pdb):
961  """
962  Write one component of the assembly
963  @param component_index Position of the component in the assembly
964  @param ref Reference frame of the component
965  @param fn_pdb Output PDB
966  """
967  ("Writting PDB %s for component %s", fn_pdb, component_index)
968  self.components_rbs[component_index].set_reference_frame(ref)
969  hierarchy_component = self.assembly.get_child(component_index)
970  atom.write_pdb(hierarchy_component, fn_pdb)
971 
972  def write_monte_carlo_solution(self, fn_database):
973  """
974  Write the solution of a MonteCarlo run
975  @param fn_database Database file with the solution.
976  The database will contain only one record
977  """
978  total_score = 0
979  rnames = []
980  scores = []
981  for i in range(self.model.get_number_of_restraints()):
982  r = self.model.get_restraint(i)
983  score = r.evaluate(False)
984  rnames.append(r.get_name())
985  scores.append(score)
986  total_score += score
988  db.create(fn_database, overwrite=True)
989  db.connect(fn_database)
990  db.add_results_table(rnames, self.measure_models)
991  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
992  measures = None
993  if(self.measure_models):
994  measures = measure_model(self.assembly, self.native_assembly,
995  self.components_rbs, self.native_rbs)
996  solution_id = 0
997  db.add_record(solution_id, "", RFs, total_score, scores, measures)
998  db.save_records()
999  db.close()
1000 
1001 
1002 def print_restraints_values(model):
1003  print("Restraints: Name, weight, value, maximum_value")
1004  total_score = 0
1005  for i in range(model.get_number_of_restraints()):
1006  r = model.get_restraint(i)
1007  score = r.evaluate(False)
1008 # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(),
1009 # score, r.get_maximum_score())
1010  print("%20s %18f %18f" % (r.get_name(), r.get_weight(), score))
1011  total_score += score
1012  print("total_score:", total_score)
1013 
1014 
1015 def anchor_assembly(components_rbs, anchored):
1016  """
1017  "Anchor" a set of rigid bodies, by setting the position of one of them
1018  at the origin (0,0,0).
1019  @param components_rbs Rigid bodies of the components
1020  @param anchored - List of True/False values indicating if the components
1021  of the assembly are anchored. The function sets the FIRST anchored
1022  component in the (0,0,0) coordinate and moves the other
1023  components with the same translation. If all the values are False, the
1024  function does not do anything
1025  """
1026  if True in anchored:
1027  anchored_rb = None
1028  for a, rb in zip(anchored, components_rbs):
1029  if a:
1030  anchored_rb = rb
1031  break
1032  center = anchored_rb.get_coordinates()
1033  log.info("Anchoring assembly at (0,0,0)")
1034  for rb in components_rbs:
1035  T = rb.get_reference_frame().get_transformation_to()
1036  t = T.get_translation()
1037  R = T.get_rotation()
1038  log.debug("%s: Rerefence frame BEFORE %s",
1039  rb.get_name(), rb.get_reference_frame())
1040  T2 = alg.Transformation3D(R, t - center)
1041  rb.set_reference_frame(alg.ReferenceFrame3D(T2))
1042  log.debug("%s Rerefence frame AFTER %s",
1043  rb.get_name(), rb.get_reference_frame())
1044 
1045 
1046 def measure_model(assembly, native_assembly, rbs, native_rbs):
1047  """
1048  Get the drms, cdrms and crmsd for a model
1049  @param assembly Hierachy for an assembly
1050  @param native_assembly Hierarchy of the native structure
1051  @param rbs Rigid bodies for the components of the assembly
1052  @param native_rbs Rigid bodies for the components of the native
1053  structure
1054  @return cdrms, cdrms, crmsd
1055  - drms - typical drms measure
1056  - cdrms - drms for the centroids of the rigid bodies of the components
1057  of the assembly
1058  - crmsd - rmsd for the centroids
1059  """
1060  log.debug("Measure model")
1061  drms = comparisons.get_drms_for_backbone(assembly, native_assembly)
1062  RFs = [rb.get_reference_frame() for rb in rbs]
1063  vs = [r.get_transformation_to().get_translation() for r in RFs]
1064  nRFs = [rb.get_reference_frame() for rb in native_rbs]
1065  nvs = [r.get_transformation_to().get_translation() for r in nRFs]
1066  cdrms = atom.get_drms(vs, nvs)
1067  crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs)
1068  log.debug("drms %s cdrms %s crmsd %s", drms, cdrms, crmsd)
1069  return [drms, cdrms, crmsd]
1070 
1071 
1072 def get_coordinates(rigid_bodies):
1073  """
1074  Return a list of the coordinates of all the members of the rigid bodies
1075  """
1076  if len(rigid_bodies) == 0:
1077  raise ValueError(
1078  "get_coordinates: There are not rigid bodies to get coordinates from")
1079  coords = []
1080  for rb in rigid_bodies:
1081  # remove sub-rigid bodies
1082  rb_members = [m for m in rb.get_members()
1083  if not core.RigidBody.get_is_setup(m.get_particle())]
1084  coords.extend([m.get_coordinates() for m in rb_members])
1085  return coords
def write_pdb_for_component
Write one component of the assembly.
def get_assignment_and_RFs
Return a line with texts for an assignment and the the reference frames of the RigidBodies in the sub...
Utility functions to handle restraints.
Definition: restraints.py:1
A harmonic upper bound on the distance between two spheres.
Lower bound harmonic function (non-zero when feature < mean)
Compute the complementarity between two molecules.
def set_rb_states
Add a set of reference frames as possible states for a rigid body.
def add_exclusion_filter_table
ExclusionSubsetFilterTable ensures that two particles are not given the same state at the same time...
Restraints using electron microscopy 2D images (class averages).
def get_restraints_names_used
Get the names of the restraints used for a subset.
def write_solutions_database
Write the results of the DOMINO sampling to a SQLite database.
Various classes to hold sets of particles.
Upper bound harmonic function (non-zero when feature > mean)
def get_assignment_scores
Returns a list with the values for the restraints on a subset of the components of the assembly...
Utility functions to handle representation.
def anchor_assembly
"Anchor" a set of rigid bodies, by setting the position of one of them at the origin (0...
Utility functions to handle alignments.
Definition: alignments.py:1
Low level functionality (logging, error handling, profiling, command line flags etc) that is used by ...
Sample best solutions using Domino.
Definition: DominoSampler.h:32
def set_close_pairs_excluded_volume_restraint
Creates an excluded volume restraint between all the components of the coarse assembly.See help for.
def set_xlink_restraint
Set a restraint on the maximum distance between 2 residues.
def get_assignment_text
Get the formatted text for an assignment.
def load_state
Load the positions of the components given by the assignment.
Filter a configuration of the subset using the kernel::Model thresholds.
Represent a subset of the particles being optimized.
Definition: Subset.h:33
def get_coordinates
Return a list of the coordinates of all the members of the rigid bodies.
Utility functions to handle IO.
Definition: io.py:1
A score on the distance between the surfaces of two spheres.
def create_merge_tree
Creates a merge tree from the restraints that are set already.
Management of a model using DOMINO.
Definition: domino_model.py:38
def get_restraint_value_for_assignment
Recover the value of a restraint.
def write_merge_tree
Writes the current merge tree to file.
def set_connectivity_restraint
Set a connectivity restraint between the elements of the assembly.
def write_pdbs_for_reference_frames
Write a separate PDB for each of the elements.
def write_pdb_for_reference_frames
Write a PDB file with a solution given by a set of reference frames.
A class to store an fixed array of same-typed values.
Definition: Array.h:33
Do not allow two particles to be in the same state.
def create_coarse_assembly
Simplify the assembly with a coarse representation.
def write_subset
Writes lines describing a subset (vertex of a merge tree)
def get_restraints_for_native
Get values of the restraints for the native structure.
Utility functions to store and retrieve solution information.
Definition: solutions_io.py:1
A lookup based particle refiner.
Definition: TableRefiner.h:21
def set_native_assembly_for_benchmark
Sets the native model for benchmark, by reading the native structure and set the rigid bodies...
def do_sampling
Do Domino sampling.
def write_pdb_for_configuration
Write a file with a configuration for the model (configuration here means a configuration in DOMINO) ...
def set_complementarity_restraint
Set a restraint for geometric complementarity between 2 components.
def measure_model
Get the drms, cdrms and crmsd for a model.
Fitting atomic structures into a cryo-electron microscopy density map.
Class for managing the results of the experiments.
def set_assembly
Sets an assembly from a single PDB file.
def get_assignment_info
Info related to an assignment.
def read_merge_tree
Read and create a merge tree from a file.
def setup_domino_sampler
Creates a DOMINO sampler and adds the required filter tables.
Class to handle individual model particles.
def align_rigid_bodies_states
Aligns the set of structures considered for DOMINO sampling.
Definition: domino_model.py:80
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Parameters used by Em2DRestraint and ProjectionFinder.
def get_assignments_with_heap
Domino sampling that recovers the assignments for the root of the merge tree, but conserving only the...
def set_pair_score_restraint
Set a pair_score restraint between the coarse representation of two components.
Utility functions for comparisons.
Definition: comparisons.py:1
def set_em2d_restraint
Set a Restraint computing the em2d score.
def add_branch_and_bound_assignments_table
BranchAndBoundAssignmentsTable enumerate states based on provided filtered using the provided the sub...
def set_not_optimized
Set a part of the model as not optimized (it does not move during the model optimization) ...
def add_restraint
Adds a restraint to the model.
Definition: domino_model.py:59
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
Output IMP model data in various file formats.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62
def write_pdb_for_assignment
Write the solution in the assignment.
Divide-and-conquer inferential optimization in discrete space.
def set_assembly_components
Sets the components of an assembly, each one given as a separate PDB file.
def add_restraint_score_filter_table
Add a RestraintScoreSubsetFilterTable to DOMINO that allows to reject assignments that have score wor...
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:73
def write_monte_carlo_solution
Write the solution of a MonteCarlo run.