IMP  2.3.0
The Integrative Modeling Platform
DominoModel.py
1 import sys
2 import time
3 import csv
4 import logging
5 log = logging.getLogger("DominoModel")
6 
7 import IMP
8 import IMP.domino as domino
9 import IMP.core as core
10 import IMP.container as container
11 import IMP.display as display
12 import IMP.atom as atom
13 import IMP.algebra as alg
14 import IMP.em2d as em2d
15 import IMP.base
16 import IMP.multifit as multifit
17 
18 import IMP.em2d.imp_general.comparisons as comparisons
19 import IMP.em2d.imp_general.alignments as alignments
20 import IMP.em2d.imp_general.io as io
21 import IMP.em2d.imp_general.representation as representation
22 
23 import IMP.em2d.restraints as restraints
24 import IMP.em2d.buildxlinks as buildxlinks
25 import IMP.em2d.solutions_io as solutions_io
26 import IMP.em2d.csv_related as csv_related
27 
28 
29 field_delim = "," # separate fields in the output text file
30 unit_delim = "/" # separate units within a field (eg, reference frames).
31  # It is used in output text files and in databse files
32 
33 
35 
36  """
37  Management of a model using DOMINO
38  """
39 
40  def __init__(self, name="my model"):
41  self.model = IMP.kernel.Model()
42  self.model.set_name(name)
43  self.configuration_sampling_done = False
44  self.assignments_sampling_done = False
45  self.rb_states_table = domino.ParticleStatesTable()
46  self.merge_tree = None
47  # measure_models is True only when doing benchmark
48  self.measure_models = False
49  # The first time that create_coarse is found and its value is True
50  # a coarse version of the assembly is built
51  self.create_coarse = True
52  self.restraints = dict()
53  self.xlinks_dict = buildxlinks.XlinksDict()
54 
55  def add_restraint(self, r, name, weight, max_score=False):
56  """
57  Adds a restraint to the model
58  @param r An IMP.kernel.Restraint object
59  @param name Name for the restraint
60  @param weight Weight for the restraint
61  @param max_score Maximum score allowed for the restraint. If
62  max_score is False, there is no limit for the value of the restraint
63  """
64  log.info("Adding restraint %s weight %s max_score %s",
65  name, weight, max_score)
66  if "-" in name:
67  raise ValueError("SQL database does not accept restraint names "
68  "containing -")
69  r.set_name(name)
70  if max_score is not None and max_score:
71  r.set_maximum_score(max_score)
72  r.set_weight(weight)
73  self.restraints[name] = r
74  self.model.add_restraint(r)
75 
77  """
78  Aligns the set of structures considered for DOMINO sampling.
79  The function:
80  1) reads the possible reference frames that the
81  rb_states_table stores for each rigid body. This table
82  must be filled before using this function. Usually it is
83  filled with the results from a previous Monte Carlo sampling.
84  If the solutions from Monte Carlo seem to have the same structure
85  but they are not aligned to each other, this function can
86  help setting better starting positions to use with DOMINO.
87  2) Gets the first state for each of the rigid bodies and sets
88  a reference structure using such states.
89  3) Aligns all the rest of the structures respect to the reference
90  4) Replaces the values of the reference frames stored in the
91  rb_states_table with the new values obtained from the alignments.
92  It does it for all states of a rigid body.
93  @note: If this function is applied, the parameters "anchor" and "fixed"
94  are ignored, as they are superseded by the use of the aligments
95  calculated here.
96  """
97  log.debug("Align the configurations read from the database before "
98  "running DOMINO")
99  rb_states = []
100  n_states = []
101  for rb in self.components_rbs:
102  ps = self.rb_states_table.get_particle_states(rb)
103  n = ps.get_number_of_particle_states()
104  if n == 0:
105  raise ValueError(
106  "There are no particle states for %s" %
107  rb.get_name())
108  n_states.append(n)
109  rb_states.append(ps)
110  # coordinates of the first configuration (there is at least one for all
111  # the rbs)
112  for rb, states in zip(self.components_rbs, rb_states):
113  states.load_particle_state(0, rb)
114  reference_coords = get_coordinates(self.components_rbs)
115  aligned_transformations = [[] for rb in self.components_rbs]
116  for i in range(1, max(n_states)):
117  log.debug("Aligning rigid bodies configuration %s" % i)
118  # load the configuration
119  for j in range(len(self.components_rbs)):
120  # if there are no more particle states for this rigid body, use
121  # the last
122  state_index = min(i, n_states[j] - 1)
123  rb_states[j].load_particle_state(
124  state_index,
125  self.components_rbs[j])
126  coords = get_coordinates(self.components_rbs)
127  T = alg.get_transformation_aligning_first_to_second(
128  coords, reference_coords)
129  for j, rb in enumerate(self.components_rbs):
130  t = rb.get_reference_frame().get_transformation_to()
131  new_t = alg.compose(T, t)
132  aligned_transformations[j].append(new_t)
133  # set the new possible transformations
134  for i, rb in enumerate(self.components_rbs):
135  self.set_rb_states(rb, aligned_transformations[i])
136 
137  def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2,
138  distance, weight, stddev, max_score=False):
139  """
140  Set a restraint on the maximum distance between 2 residues
141  @param id1 Name of the first component
142  @param chain1
143  @param residue1 Residue number for the aminoacid in the first
144  component.The number is the number in the PDB file, not the
145  number relative to the beginning of the chain
146  @param id2 Name of the second component
147  @param chain2
148  @param residue2 Residue number for the aminoacid in the second
149  component.
150  @param distance Maximum distance tolerated
151  @param weight Weight of the restraint
152  @param stddev Standard deviation used to approximate the
153  HarmonicUpperBound function to a Gaussian
154  @param max_score See help for add_restraint(). If none is given,
155  the maximum score is set to allow a maximum distance of 10 Angstrom
156  greater than the parameter "distance".
157  """
158  xlink = buildxlinks.Xlink(
159  id1,
160  chain1,
161  residue1,
162  id2,
163  chain2,
164  residue2,
165  distance)
166  log.info("Setting cross-linking restraint ")
167  log.info("%s", xlink.show())
168  self.xlinks_dict.add(xlink)
169  # setup restraint
170  A = representation.get_component(self.assembly, xlink.first_id)
171  s1 = IMP.atom.Selection(A, chain=xlink.first_chain,
172  residue_index=xlink.first_residue)
173  p1 = s1.get_selected_particles()[0]
174  B = representation.get_component(self.assembly, xlink.second_id)
175  s2 = IMP.atom.Selection(B, chain=xlink.second_chain,
176  residue_index=xlink.second_residue)
177  p2 = s2.get_selected_particles()[0]
178  k = core.Harmonic.get_k_from_standard_deviation(stddev)
179  score = core.HarmonicUpperBound(xlink.distance, k)
180  pair_score = IMP.core.DistancePairScore(score)
181  r = IMP.core.PairRestraint(pair_score, IMP.kernel.ParticlePair(p1, p2))
182  if not max_score:
183  error_distance_allowed = 100
184  max_score = weight * \
185  score.evaluate(distance + error_distance_allowed)
186  log.info("%s, max_score %s", xlink.show(), max_score)
187  self.add_restraint(r, xlink.get_name(), weight, max_score)
188 
189  def set_complementarity_restraint(self, name1, name2, rname,
190  max_sep_distance, max_penetration,
191  weight, max_score=1e10):
192  """
193  Set a restraint for geometric complementarity between 2 components
194  @param name1 name of
195  @param name2 - The restraint is applied to this components
196  @param rname - The name given to the restraint
197  @param max_sep_distance - maximum distance between molecules
198  tolerated by the restraint
199  @param max_penetration - Maximum penetration allowd (angstrom)
200  @param weight
201  @param max_score
202  """
203  log.info("Setting geometric complementarity restraint %s: %s - %s",
204  rname, name1, name2)
205  A = representation.get_component(self.assembly, name1)
206  B = representation.get_component(self.assembly, name2)
207  restraint = multifit.ComplementarityRestraint(atom.get_leaves(A),
208  atom.get_leaves(B), rname)
209  restraint.set_maximum_separation(max_sep_distance)
210  restraint.set_maximum_penetration(max_penetration)
211  log.debug("Maximum separation: %s Maximum penetration score: %s",
212  max_sep_distance, max_penetration)
213  self.add_restraint(restraint, rname, weight, max_score)
214 
215  def create_coarse_assembly(self, n_residues, write=False):
216  """
217  Simplify the assembly with a coarse representation
218  @param n_residues Number of residues used for a coarse particle
219  @param write If True, write the coarse assembly to a
220  format that Chimera can display
221  """
222  if self.create_coarse:
223  log.info("Creating simplified assembly")
224  self.coarse_assembly = \
225  representation.create_simplified_assembly(self.assembly,
226  self.components_rbs, n_residues)
227  if write:
228  io.write_hierarchy_to_chimera(self.assembly, "coarse_assembly.py")
229  self.create_coarse = False
230 
231  def do_sampling(self, mode="assignments_heap_container", params=None):
232  """
233  Do Domino sampling
234  @param mode The mode used to recover solutions from domino.
235  It can have
236  the value "configuration", "assignments", or
237  "assignments_heap_container". The mode
238  "configuration" recovers all possible configurations
239  (takes a while to generate them).
240  The mode "assignments" only provides the assignments
241  (state numbers) of the solutions. It is faster but requires
242  generating the solutions afterwards
243  The mode "assignments_heap_container" selects the best solutions
244  after ecah merging in DOMINO, discarding the rest.
245  In practice I used the mode "assignments_heap_container"
246  @param params
247  """
248  t0 = time.time()
249  if mode == "configuration":
250  self.configuration_set = self.sampler.create_sample()
251  tf = time.time()
252  log.info("found %s configurations. Time %s",
253  self.configuration_set.get_number_of_configurations(),
254  tf - t0)
255  self.configuration_sampling_done = True
256  elif mode == "assignments":
257  subset = self.rb_states_table.get_subset()
258  log.info(
259  "Do sampling with assignments. Subset has %s elements: %s",
260  len(subset), subset)
261  self.solution_assignments = \
262  self.sampler.get_sample_assignments(subset)
263  tf = time.time()
264  log.info("found %s assignments. Time %s",
265  len(self.solution_assignments), tf - t0)
266  self.assignments_sampling_done = True
267  elif mode == "assignments_heap_container":
268  subset = self.rb_states_table.get_subset()
269  log.info("DOMINO sampling an assignments_heap_container. "
270  "Subset has %s elements: %s", len(subset), subset)
271  # last vertex is the root of the merge tree
272  root = self.merge_tree.get_vertices()[-1]
273  container = self.get_assignments_with_heap(root,
274  params.heap_solutions)
275  self.solution_assignments = container.get_assignments()
276  tf = time.time()
277  log.info("found %s assignments. Time %s",
278  len(self.solution_assignments), tf - t0)
279  self.assignments_sampling_done = True
280  else:
281  raise ValueError("Requested wrong mode for the DOMINO sampling")
282 
283  def get_assignment_text(self, assignment, subset):
284  """
285  Get the formatted text for an assignment
286  @param subset Subset of components of the assembly
287  @param assignment Assignment class with the states for the subset
288  @note: The order in assignment and subset is not always the order
289  of the rigid bodies in self.components_rbs. The function reorders
290  the terms in the assignment so there is correspondence.
291  """
292  ordered_assignment = []
293  for rb in self.components_rbs:
294  for i, particle in enumerate(subset):
295  if rb.get_particle() == particle:
296  ordered_assignment.append(assignment[i])
297  text = "|".join([str(i) for i in ordered_assignment])
298  return text
299 
300  def get_assignment_and_RFs(self, assignment, subset):
301  """
302  Return a line with texts for an assignment and the
303  the reference frames of the RigidBodies in the subset.
304  @param subset Subset of components of the assembly
305  @param assignment Assignment class with the states for the subset
306  @note: see the get_assignment_assignment_text() note.
307  """
308  ordered_assignment = []
309  RFs = []
310  for rb in self.components_rbs:
311  for i, particle in enumerate(subset):
312  if rb.get_particle() == particle:
313  j = assignment[i]
314  ordered_assignment.append(j)
315  pstates = self.rb_states_table.get_particle_states(rb)
316  pstates.load_particle_state(j, rb.get_particle())
317  RFs.append(rb.get_reference_frame())
318  RFs_text = unit_delim.join(
319  [io.ReferenceFrameToText(ref).get_text() for ref in RFs])
320  assignment_numbers = "|".join([str(i) for i in ordered_assignment])
321  return [assignment_numbers, RFs_text]
322 
323  def get_assignment_info(self, assignment, subset):
324  """
325  Info related to an assignment. Returns a list with text for the
326  assignment and all the scores for the restraints
327  @param subset Subset of components of the assembly
328  @param assignment Assignment class with the states for the subset
329  """
330  text = self.get_assignment_and_RFs(assignment, subset)
331  scores, total_score = self.get_assignment_scores(assignment, subset)
332  info = text + [total_score] + scores
333  return info
334 
335  def get_assignment_scores(self, assignment, subset):
336  """
337  Returns a list with the values for the restraints on a subset of
338  the components of the assembly
339  @param subset Subset of components of the assembly
340  @param assignment Assignment class with the states for the subset
341  """
342  restraints_to_evaluate = \
343  self.restraint_cache.get_restraints(subset, [])
344  scores = [self.restraint_cache.get_score(r, subset, assignment)
345  for r in restraints_to_evaluate]
346  total_score = sum(scores)
347  return scores, total_score
348 
349  def load_state(self, assignment):
350  """
351  Load the positions of the components given by the assignment
352  @param assignment Assignment class
353  """
354  subset = self.rb_states_table.get_subset()
355  domino.load_particle_states(subset, assignment, self.rb_states_table)
356 
357  def get_assignments_with_heap(self, vertex, k=0):
358  """
359  Domino sampling that recovers the assignments for the root of the
360  merge tree, but
361  conserving only the best k scores for each vertex of the tree.
362  @param[in] vertex Vertex with the root of the current merge tree. This
363  function is recursive.
364  @param[in] k
365  """
366  if(self.sampler.get_number_of_subset_filter_tables() == 0):
367  raise ValueError("No subset filter tables")
368  if(self.merge_tree is None):
369  raise ValueError("No merge tree")
370  # In the merge tree, the names of the vertices are the subsets.
371  # The type of the vertex name is a domino.Subset
372  subset = self.merge_tree.get_vertex_name(vertex)
373  log.info("computing assignments for vertex %s", subset)
374  t0 = time.time()
375  assignments_container = domino.HeapAssignmentContainer(subset, k,
376  self.restraint_cache, "my_heap_assignments_container")
377  neighbors = self.merge_tree.get_out_neighbors(vertex)
378  if len(neighbors) == 0: # A leaf
379  # Fill the container with the assignments for the leaf
380  self.sampler.load_vertex_assignments(vertex, assignments_container)
381  else:
382  if neighbors[0] > neighbors[1]:
383  # get_vertex_assignments() methods in domino
384  # expects the children in sorted order
385  neighbors = [neighbors[1], neighbors[0]]
386  # recurse on the two children
387  container_child0 = self.get_assignments_with_heap(neighbors[0], k)
388  container_child1 = self.get_assignments_with_heap(neighbors[1], k)
389  self.sampler.load_vertex_assignments(vertex,
390  container_child0,
391  container_child1,
392  assignments_container)
393  tf = time.time() - t0
394  log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset,
395  assignments_container.get_number_of_assignments(), tf)
396  return assignments_container
397 
398  def get_restraint_value_for_assignment(self, assignment, name):
399  """
400  Recover the value of a restraint
401  @param name of the restraint
402  @param assignment Assignment class containing the assignment for
403  the solution desired
404  """
405  if not self.assignments_sampling_done:
406  raise ValueError("DominoModel: sampling not done")
407  log.debug("Computing restraint value for assignment %s", assignment)
408  self.load_state(assignment)
409  for rb in self.components_rbs:
410  log.debug("rb - %s",
411  rb.get_reference_frame().get_transformation_to())
412  val = self.restraints[name].evaulate(False)
413  log.debug("restraint %s = %s", restraint.get_name(), val)
414  return val
415 
417  """
418  Sets the native model for benchmark, by reading the native
419  structure and set the rigid bodies.
420  """
421  self.measure_models = True
422  self.native_model = IMP.kernel.Model()
423  if hasattr(params.benchmark, "fn_pdb_native"):
424  self.native_assembly = \
425  representation.create_assembly_from_pdb(self.native_model,
426  params.benchmark.fn_pdb_native, params.names)
427  elif hasattr(params.benchmark, "fn_pdbs_native"):
428  self.native_assembly = \
429  representation.create_assembly(self.native_model,
430  params.benchmark.fn_pdbs_native, params.names)
431  else:
432  raise ValueError("set_native_assembly_for_benchmark: Requested "
433  "to set a native assembly for benchmark but did not provide "
434  "its pdb, or the pdbs of the components")
435  self.native_rbs = representation.create_rigid_bodies(
436  self.native_assembly)
437  anchor_assembly(self.native_rbs, params.anchor)
438 
439  def set_rb_states(self, rb, transformations):
440  """
441  Add a set of reference frames as possible states for a rigid body
442  @param rb The rigid body
443  @param transformations Transformations are used to build the
444  reference frames for the rigid body.
445  """
446  log.info("Set rigid body states for %s", rb.get_name())
447  RFs = [alg.ReferenceFrame3D(T) for T in transformations]
448  for st in RFs:
449  log.debug("%s", st)
450  rb_states = domino.RigidBodyStates(RFs)
451  self.rb_states_table.set_particle_states(rb, rb_states)
452  st = self.rb_states_table.get_particle_states(rb)
453  log.info("RigidBodyStates created %s",
454  st.get_number_of_particle_states())
455 
457  """
458  Add a RestraintScoreSubsetFilterTable to DOMINO that allows to
459  reject assignments that have score worse than the thresholds for
460  the restraints
461  """
462  log.info("Adding RestraintScoreSubsetFilterTable")
463  # Restraint Cache is a centralized container of the restraints and
464  # their scores
465  self.restraint_cache = domino.RestraintCache(self.rb_states_table)
466  self.restraint_cache.add_restraints(self.model.get_restraints())
467  rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache)
468  rssft.set_name('myRestraintScoreSubsetFilterTable')
469  self.sampler.add_subset_filter_table(rssft)
470 
471  def set_assembly_components(self, fn_pdbs, names):
472  """
473  Sets the components of an assembly, each one given as a separate
474  PDB file.
475  @param fn_pdbs List with the names of the pdb files
476  @param names List with the names of the components of the assembly.
477  """
478  if len(fn_pdbs) != len(names):
479  raise ValueError("Each PDB file needs a name")
480  self.names = names
481  self.assembly = representation.create_assembly(self.model, fn_pdbs,
482  names)
483  self.components_rbs = representation.create_rigid_bodies(self.assembly)
484  self.not_optimized_rbs = []
485 
486  def set_assembly(self, fn_pdb, names):
487  """
488  Sets an assembly from a single PDB file. The function assumes that
489  the components of the assembly are the chains of the PDB file.
490  @param fn_pdb PDB with the file for the asembly
491  @param names Names for each of the components (chains)
492  of the asembly
493  """
494  self.assembly = representation.create_assembly_from_pdb(self.model,
495  fn_pdb,
496  names)
497  self.components_rbs = representation.create_rigid_bodies(self.assembly)
498  self.not_optimized_rbs = []
499 
501  """
502  BranchAndBoundAssignmentsTable enumerate states based on provided
503  filtered using the provided the subset filter tables
504  """
505  log.info("Setting BranchAndBoundAssignmentsTable")
506  fts = []
507  for i in range(self.sampler.get_number_of_subset_filter_tables()):
508  ft = self.sampler.get_subset_filter_table(i)
509  fts.append(ft)
510  at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts)
511  self.sampler.set_assignments_table(at)
512  self.assignments_table = at
513 
515  """
516  ExclusionSubsetFilterTable ensures that two particles are not given
517  the same state at the same time
518  """
519  log.info("Setting ExclusionSubsetFilterTable")
520  ft = domino.ExclusionSubsetFilterTable(self.rb_states_table)
521  self.sampler.add_subset_filter_table(ft)
522 
524  """
525  Creates a DOMINO sampler and adds the required filter tables
526  """
527  if self.merge_tree is None:
528  raise ValueError("Merge tree for DOMINO not set. It is required "
529  "to setup the sampler")
530  log.info("Domino sampler")
531  self.sampler = domino.DominoSampler(self.model, self.rb_states_table)
532  self.sampler.set_log_level(IMP.base.TERSE)
533  self.sampler.set_merge_tree(self.merge_tree)
537 
538  def read_merge_tree(self, fn):
539  """
540  Read and create a merge tree from a file.
541  The format of the file is the format written by write merge_tree()
542  It does not matter the order of the indices in the file, as
543  they are sorted by this function.
544  The only condition is that the index for the vertex that is the
545  root of the tree MUST be the largest. This is guaranteed when
546  creating a merge tree with create_merge_tree()
547  @param fn File containing the merge tree
548  """
549  log.info("Reading merge tree from %s", fn)
550  ps = self.rb_states_table.get_particles()
551  log.debug("particles")
552  for p in ps:
553  log.debug("%s", p.get_name())
554  rows = csv_related.read_csv(fn, delimiter=field_delim)
555  for i in range(len(rows)):
556  row = rows[i]
557  rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]]
558  # Sort rows by vertice index
559  rows.sort()
560  subsets = []
561  # build subsets from the rows text
562  for row in rows:
563  names = row[3].split(unit_delim)
564  log.debug("row %s names %s", row, names)
565  # get particles in the subset
566  particles = []
567  for name in names:
568  l = filter(lambda p: p.get_name() == name, ps)
569  if (len(l) > 1):
570  ValueError("More than one particle with same name" % names)
571  particles.extend(l)
572  s = domino.Subset(particles)
573  subset_names = [p.get_name() for p in particles]
574  log.debug("Merge tree Subset %s. Particles %s ", s, subset_names)
575  subsets.append(s)
576  # The vertex with the largest index is the root.
577  # trick: get the merge tree object from a tree with only one node ...
578  jt = domino.SubsetGraph()
579  jt.add_vertex(subsets[0])
580  mt = domino.get_merge_tree(jt)
581  # ... and add the rest of the vertices
582  for i in range(1, len(subsets)):
583  mt.add_vertex(subsets[i]) # the name of the vertex is a subset
584  # set edges
585  for row in rows:
586  vid = row[0]
587  child_left = row[1]
588  child_right = row[2]
589  if child_left != -1:
590  mt.add_edge(vid, child_left)
591  if child_right != -1:
592  mt.add_edge(vid, child_right)
593  self.merge_tree = mt
594  log.info("%s", self.merge_tree.show_graphviz())
595 
596  def create_merge_tree(self):
597  """
598  Creates a merge tree from the restraints that are set already
599  """
600  rs = self.model.get_restraints()
601  ig = domino.get_interaction_graph(rs, self.rb_states_table)
602 # pruned_dep = IMP.get_pruned_dependency_graph(self.model)
603 # IMP.base.show_graphviz(pruned_dep)
604 # IMP.base.show_graphviz(ig)
605  jt = domino.get_junction_tree(ig)
606 # IMP.base.show_graphviz(jt)
607  self.merge_tree = domino.get_balanced_merge_tree(jt)
608 # IMP.base.show_graphviz(self.merge_tree)
609  log.info("Balanced merge tree created")
610  log.info("%s", self.merge_tree.show_graphviz())
611 
612  def write_merge_tree(self, fn):
613  """
614  Writes the current merge tree to file. The format is:
615  parent | child_left | child_right | labels
616  @param fn File for storing the merge tree
617  """
618  log.info("Writing merge to %s", fn)
619  if self.merge_tree is None:
620  raise ValueError("No merge tree")
621  f = open(fn, "w")
622  f.write(
623  "# Vertex index | Child0 | Child1 | label (names of the particles)\n")
624  w = csv.writer(f, delimiter=field_delim)
625  root = self.merge_tree.get_vertices()[-1]
626  self.write_subset(root, w)
627  f.close()
628 
629  def write_subset(self, v, w):
630  """
631  Writes lines describing a subset (vertex of a merge tree)
632  @param w A csv.writer
633  @param v Vertex index
634  """
635  no_child_index = -1
636  subset = self.merge_tree.get_vertex_name(v)
637  names = [p.get_name() for p in subset]
638  reps = filter(lambda x: names.count(x) > 1, names) # repeated names
639  if(len(reps)):
640  raise ValueError("The names of the "
641  "particles in the subset %s are not unique" % subset)
642  names = unit_delim.join(names)
643  neighbors = self.merge_tree.get_out_neighbors(v)
644  if len(neighbors) == 0: # A leaf
645  # Fill the container with the assignments for the leaf
646  w.writerow([v, no_child_index, no_child_index, names])
647  else:
648  if neighbors[0] > neighbors[1]:
649  neighbors = [neighbors[1], neighbors[0]]
650  w.writerow([v, neighbors[0], neighbors[1], names])
651  self.write_subset(neighbors[0], w)
652  self.write_subset(neighbors[1], w)
653 
654  def set_connectivity_restraint(self, names, rname,
655  distance=10,
656  weight=1.0, max_score=None, n_pairs=1,
657  stddev=2):
658  """
659  Set a connectivity restraint between the elements of the assembly
660  @param names List with all the elements to be connected
661  @param distance Maximum distance tolerated by the restraint
662  @param rname Name for the restraint
663  @param weight Weight for the restraint
664  @param max_score Maximum score for the restraint
665  @param n_pairs Number of pairs of particles used in the
666  KClosePairScore of the connecitivity restraint
667  @param stddev Standard deviation used to approximate the
668  HarmonicUpperBound function to a Gaussian
669  """
670  components = []
671  for name in names:
672  c = representation.get_component(self.coarse_assembly, name)
673  components.append(c)
674  log.info("Connectivity restraint for %s", components)
675  spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev)
676  if max_score is None:
677  max_score = weight * spring_constant * n_pairs * (stddev) ** 2
678  r = restraints.get_connectivity_restraint(
679  components, distance, n_pairs,
680  spring_constant)
681  self.add_restraint(r, rname, weight, max_score)
682 
683  def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution,
684  n_projections, weight, max_score=False,
685  mode="fast", n_optimized=1):
686  """
687  Set a Restraint computing the em2d score.
688  @param name Name for the restraint
689  @param fn_images_sel Selection file with the names of the images
690  used for the restraint
691  @param pixel_size - pixel size in the images
692  @param resolution - resolution used to downsample the projections
693  of the model when projecting
694  @param weight Weight of the restraint
695  @param max_score - Maximum value tolerated for the restraint
696  @param n_projections - Number of projections to generate
697  when projecting the model to do the coarse alignments
698  @param mode - Mode used for computing the restraints.
699  @param n_optimized - number of results from the coarse
700  alignment that are refined with the Simplex algorithm
701  to get a more accurate value for the restraint
702  """
703  log.info("Adding a em2D restraint: %s", fn_images_sel)
704  restraint_params = em2d.Em2DRestraintParameters(pixel_size,
705  resolution,
706  n_projections)
707  r = restraints.get_em2d_restraint(self.assembly,
708  fn_images_sel,
709  restraint_params,
710  mode, n_optimized)
711  self.add_restraint(r, name, weight, max_score)
712 
713  def set_not_optimized(self, name):
714  """
715  Set a part of the model as not optimized (it does not move during
716  the model optimization)
717  @param name of the component to optimized
718  """
719  if name not in self.names:
720  raise ValueError("DominoModel: There is not component "
721  "in the assembly with this name")
722  rb_name = representation.get_rb_name(name)
723  self.not_optimized_rbs.append(rb_name)
724 
725  def set_close_pairs_excluded_volume_restraint(self, distance=10,
726  weight=1.0, ratio=0.05, stddev=2,
727  max_score=False):
728  """
729  Creates an excluded volume restraint between all the components
730  of the coarse assembly.See help for
731  @param distance Maximum distance tolerated between particles
732  @param weight Weight for the restraint
733  @param stddev
734  @param max_score Maximum value for the restraint. If the parameter
735  is None, an automatic value is computed (using the ratio).
736  @param ratio Fraction of the number of possible pairs of
737  particles that are tolerated to overlap. The maximum
738  score is modified according to this ratio.
739  I have found that ratios of 0.05-0.1 work well allowing for
740  some interpenetration
741  @param stddev Standard deviation used to approximate the
742  HarmonicUpperBound function to a Gaussian
743  """
744  k = core.Harmonic.get_k_from_standard_deviation(stddev)
745  for i, c1 in enumerate(self.coarse_assembly.get_children()):
746  for j, c2 in enumerate(self.coarse_assembly.get_children()):
747  if j > i:
748  name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name())
749 
750  ls1 = atom.get_leaves(c1)
751  ls2 = atom.get_leaves(c2)
752  possible_pairs = len(ls1) * len(ls2)
753  n_pairs = possible_pairs * ratio
754 
755  marker1 = IMP.kernel.Particle(
756  self.model, "marker1 " + name)
757  marker2 = IMP.kernel.Particle(
758  self.model, "marker2 " + name)
759  table_refiner = core.TableRefiner()
760  table_refiner.add_particle(marker1, ls1)
761  table_refiner.add_particle(marker2, ls2)
762  score = core.HarmonicLowerBound(distance, k)
763  pair_score = core.SphereDistancePairScore(score)
764  close_pair_score = core.ClosePairsPairScore(pair_score,
765  table_refiner,
766  distance)
767  r = core.PairRestraint(close_pair_score,
768  IMP.kernel.ParticlePair(marker1, marker2))
769 
770  if not max_score:
771  minimum_distance_allowed = 0
772  max_score = weight * n_pairs * \
773  score.evaluate(minimum_distance_allowed)
774  log.debug("Setting close_pairs_excluded_volume_restraint(), "
775  "max_score %s", max_score)
776  self.add_restraint(r, name, weight, max_score)
777 
778  def set_pair_score_restraint(self, name1, name2,
779  restraint_name, distance=10,
780  weight=1.0, n_pairs=1, stddev=2, max_score=None):
781  """
782  Set a pair_score restraint between the coarse representation
783  of two components
784  @param name1 Name of the first component
785  @param name2 Name of the second component
786  @param restraint_name Name for the restraint
787  @param distance Maximum distance tolerated between particles
788  @param weight Weight of the restraint
789  @param n_pairs
790  @param max_score Maximum value tolerated for the restraint
791  @param stddev Standard deviation used to approximate the
792  HarmonicUpperBound function to a Gaussian
793  """
794  table_refiner = core.TableRefiner()
795 
796  # The particles in A are attached to a marker particle via a refiner.
797  # When the refiner gets a request for marker1, it returns the attached
798  # particles
799  A = representation.get_component(self.coarse_assembly, name1)
800  marker1 = IMP.kernel.Particle(self.model, "marker1 " + restraint_name)
801  table_refiner.add_particle(marker1, atom.get_leaves(A))
802  # same for B
803  B = representation.get_component(self.coarse_assembly, name2)
804  marker2 = IMP.kernel.Particle(self.model, "marker2 " + restraint_name)
805  table_refiner.add_particle(marker2, atom.get_leaves(B))
806 
807  k = core.Harmonic.get_k_from_standard_deviation(stddev)
809  # The score is set for the n_pairs closest pairs of particles
810  pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs)
811  # When KClosePairsPairScore is called, the refiner will provide the
812  # particles for A and B
813  if not max_score:
814  # Build a maximum score based on the function type that is used,
815  # an HarmonicUpperBound
816  temp_score = core.HarmonicUpperBound(distance, k)
817  error_distance_allowed = 10
818  max_score = weight * n_pairs * \
819  temp_score.evaluate(distance + error_distance_allowed)
820 
821  log.info("Setting pair score restraint for %s %s. k = %s, max_score "
822  "= %s, stddev %s", name1, name2, k, max_score, stddev)
823  r = core.PairRestraint(
824  pair_score,
826  marker1,
827  marker2))
828  self.add_restraint(r, restraint_name, weight, max_score)
829 
830  def write_solutions_database(self, fn_database, max_number=None):
831  """
832  Write the results of the DOMINO sampling to a SQLite database.
833  @param fn_database
834  @param max_number Maximum number of results to write
835  """
836  log.info("Creating the database of solutions")
837  if self.measure_models and not hasattr(self, "native_model"):
838  raise ValueError("The native model has not been set")
840  db.create(fn_database, overwrite=True)
841  db.connect(fn_database)
842  subset = self.rb_states_table.get_subset()
843  restraints_names = self.get_restraints_names_used(subset)
844  db.add_results_table(restraints_names, self.measure_models)
845  n = len(self.solution_assignments)
846  if max_number is not None:
847  n = min(n, max_number)
848  for sol_id, assignment in enumerate(self.solution_assignments[0:n]):
849  txt = self.get_assignment_text(assignment, subset)
850  # load the positions of the components in the state
851  self.load_state(assignment)
852  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
853  scores, total_score = self.get_assignment_scores(
854  assignment, subset)
855  measures = None
856  if(self.measure_models):
857  measures = measure_model(self.assembly, self.native_assembly,
858  self.components_rbs, self.native_rbs)
859  db.add_record(sol_id, txt, RFs, total_score, scores, measures)
860  # Add native state if doing benchmarking
861  if self.measure_models:
862  assignment = "native"
863  RFs = [rb.get_reference_frame() for rb in self.native_rbs]
864  scores, total_score = self.get_restraints_for_native(
865  restraints_names)
866  db.add_native_record(assignment, RFs, total_score, scores)
867  db.save_records()
868  db.close()
869 
870  def get_restraints_names_used(self, subset):
871  """ Get the names of the restraints used for a subset
872  @param subset Subset of components of the assembly
873  """
874  return [r.get_name() for r
875  in self.restraint_cache.get_restraints(subset, [])]
876 
877  def get_restraints_for_native(self, restraints_names):
878  """
879  Get values of the restraints for the native structure
880  @param restraints_names Names of the restraints requested
881  @return a list with the values of all scores, and the total score
882  """
883  saved = [rb.get_reference_frame() for rb in self.components_rbs]
884  # Set the state of the model to native
885  for rb, rn in zip(self.components_rbs, self.native_rbs):
886  # remove sub-rigid bodies
887  rb_members = filter(
888  lambda m: not core.RigidBody.get_is_setup(
889  m.get_particle()), rb.get_members())
890  rn_members = filter(
891  lambda m: not core.RigidBody.get_is_setup(
892  m.get_particle()), rn.get_members())
893  rb_coords = [m.get_coordinates() for m in rb_members]
894  rn_coords = [m.get_coordinates() for m in rn_members]
895 
896  # align and put rb in the position of rn
897  if len(rn_coords) != len(rb_coords):
898  raise ValueError("Mismatch in the number of members. "
899  "Reference %d Aligned %d " % (len(rn_coords), len(rb_coords)))
900  T = alg.get_transformation_aligning_first_to_second(rb_coords,
901  rn_coords)
902  t = rb.get_reference_frame().get_transformation_to()
903  new_t = alg.compose(T, t)
904  rb.set_reference_frame(alg.ReferenceFrame3D(new_t))
905  scores = []
906  for name in restraints_names:
907  scores.append(self.restraints[name].evaluate(False))
908  total_score = sum(scores)
909  representation.set_reference_frames(self.components_rbs, saved)
910  return scores, total_score
911 
912  def write_pdb_for_assignment(self, assignment, fn_pdb):
913  """
914  Write the solution in the assignment
915  @param assignment Assignment class with the states for the subset
916  @param fn_pdb File to write the model
917  """
918  if not self.assignments_sampling_done:
919  raise ValueError("DominoModel: sampling not done")
920  log.info("Writting PDB %s for assignment %s", fn_pdb, assignment)
921  self.load_state(assignment)
922  atom.write_pdb(self.assembly, fn_pdb)
923 
924  def write_pdb_for_configuration(self, n, fn_pdb):
925  """
926  Write a file with a configuration for the model (configuration
927  here means a configuration in DOMINO)
928  @param n Index of the configuration desired.
929  @param fn_pdb
930  """
931  if not self.configuration_sampling_done:
932  raise ValueError("DominoModel: sampling not done")
933  log.debug("Writting PDB %s for configuration %s", fn_pdb, n)
934  configuration_set.load_configuration(n)
935  atom.write_pdb(self.assembly, fn_pdb)
936 
937  def write_pdb_for_reference_frames(self, RFs, fn_pdb):
938  """
939  Write a PDB file with a solution given by a set of reference frames
940  @param RFs Reference frames for the elements of the complex
941  @param fn_pdb File to write the solution
942  """
943  log.debug("Writting PDB %s for reference frames %s", fn_pdb, RFs)
944  representation.set_reference_frames(self.components_rbs, RFs)
945  atom.write_pdb(self.assembly, fn_pdb)
946 
947  def write_pdbs_for_reference_frames(self, RFs, fn_base):
948  """
949  Write a separate PDB for each of the elements
950  @param RFs Reference frames for the elements of the complex
951  @param fn_base base string to buid the names of the PDBs files
952  """
953  log.debug("Writting PDBs with basename %s", fn_base)
954  representation.set_reference_frames(self.components_rbs, RFs)
955  for i, ch in enumerate(self.assembly.get_children()):
956  atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i)
957 
958  def write_pdb_for_component(self, component_index, ref, fn_pdb):
959  """
960  Write one component of the assembly
961  @param component_index Position of the component in the assembly
962  @param ref Reference frame of the component
963  @param fn_pdb Output PDB
964  """
965  ("Writting PDB %s for component %s", fn_pdb, component_index)
966  self.components_rbs[component_index].set_reference_frame(ref)
967  hierarchy_component = self.assembly.get_child(component_index)
968  atom.write_pdb(hierarchy_component, fn_pdb)
969 
970  def write_monte_carlo_solution(self, fn_database):
971  """
972  Write the solution of a MonteCarlo run
973  @param fn_database Database file with the solution.
974  The database will contain only one record
975  """
976  total_score = 0
977  rnames = []
978  scores = []
979  for i in range(self.model.get_number_of_restraints()):
980  r = self.model.get_restraint(i)
981  score = r.evaluate(False)
982  rnames.append(r.get_name())
983  scores.append(score)
984  total_score += score
986  db.create(fn_database, overwrite=True)
987  db.connect(fn_database)
988  db.add_results_table(rnames, self.measure_models)
989  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
990  measures = None
991  if(self.measure_models):
992  measures = measure_model(self.assembly, self.native_assembly,
993  self.components_rbs, self.native_rbs)
994  solution_id = 0
995  db.add_record(solution_id, "", RFs, total_score, scores, measures)
996  db.save_records()
997  db.close()
998 
999 
1000 def print_restraints_values(model):
1001  print "Restraints: Name, weight, value, maximum_value"
1002  total_score = 0
1003  for i in range(model.get_number_of_restraints()):
1004  r = model.get_restraint(i)
1005  score = r.evaluate(False)
1006 # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(),
1007 # score, r.get_maximum_score())
1008  print "%20s %18f %18f" % (r.get_name(), r.get_weight(), score)
1009  total_score += score
1010  print "total_score:", total_score
1011 
1012 
1013 def anchor_assembly(components_rbs, anchored):
1014  """
1015  "Anchor" a set of rigid bodies, by setting the position of one of them
1016  at the origin (0,0,0).
1017  @param components_rbs Rigid bodies of the components
1018  @param anchored - List of True/False values indicating if the components
1019  of the assembly are anchored. The function sets the FIRST anchored
1020  component in the (0,0,0) coordinate and moves the other
1021  components with the same translation. If all the values are False, the
1022  function does not do anything
1023  """
1024  if True in anchored:
1025  anchored_rb = None
1026  for a, rb in zip(anchored, components_rbs):
1027  if a:
1028  anchored_rb = rb
1029  break
1030  center = anchored_rb.get_coordinates()
1031  log.info("Anchoring assembly at (0,0,0)")
1032  for rb in components_rbs:
1033  T = rb.get_reference_frame().get_transformation_to()
1034  t = T.get_translation()
1035  R = T.get_rotation()
1036  log.debug("%s: Rerefence frame BEFORE %s",
1037  rb.get_name(), rb.get_reference_frame())
1038  T2 = alg.Transformation3D(R, t - center)
1039  rb.set_reference_frame(alg.ReferenceFrame3D(T2))
1040  log.debug("%s Rerefence frame AFTER %s",
1041  rb.get_name(), rb.get_reference_frame())
1042 
1043 
1044 def measure_model(assembly, native_assembly, rbs, native_rbs):
1045  """
1046  Get the drms, cdrms and crmsd for a model
1047  @param assembly Hierachy for an assembly
1048  @param native_assembly Hierarchy of the native structure
1049  @param rbs Rigid bodies for the components of the assembly
1050  @param native_rbs Rigid bodies for the components of the native
1051  structure
1052  @return cdrms, cdrms, crmsd
1053  - drms - typical drms measure
1054  - cdrms - drms for the centroids of the rigid bodies of the components
1055  of the assembly
1056  - crmsd - rmsd for the centroids
1057  """
1058  log.debug("Measure model")
1059  drms = comparisons.get_drms_for_backbone(assembly, native_assembly)
1060  RFs = [rb.get_reference_frame() for rb in rbs]
1061  vs = [r.get_transformation_to().get_translation() for r in RFs]
1062  nRFs = [rb.get_reference_frame() for rb in native_rbs]
1063  nvs = [r.get_transformation_to().get_translation() for r in nRFs]
1064  cdrms = atom.get_drms(vs, nvs)
1065  crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs)
1066  log.debug("drms %s cdrms %s crmsd %s", drms, cdrms, crmsd)
1067  return [drms, cdrms, crmsd]
1068 
1069 
1070 def get_coordinates(rigid_bodies):
1071  """
1072  Return a list of the coordinates of all the members of the rigid bodies
1073  """
1074  if len(rigid_bodies) == 0:
1075  raise ValueError(
1076  "get_coordinates: There are not rigid bodies to get coordinates from")
1077  coords = []
1078  for rb in rigid_bodies:
1079  # remove sub-rigid bodies
1080  rb_members = filter(
1081  lambda m: not core.RigidBody.get_is_setup(
1082  m.get_particle()), rb.get_members())
1083  coords.extend([m.get_coordinates() for m in rb_members])
1084  return coords
A harmonic upper bound on the distance between two spheres.
Lower bound harmonic function (non-zero when feature < mean)
Compute the complementarity between two molecules.
def get_assignments_with_heap
Domino sampling that recovers the assignments for the root of the merge tree, but conserving only the...
Definition: DominoModel.py:357
Restraints using electron microscopy 2D images (class averages).
def set_not_optimized
Set a part of the model as not optimized (it does not move during the model optimization) ...
Definition: DominoModel.py:713
Various classes to hold sets of particles.
Upper bound harmonic function (non-zero when feature > mean)
def add_restraint
Adds a restraint to the model.
Definition: DominoModel.py:55
def write_pdb_for_assignment
Write the solution in the assignment.
Definition: DominoModel.py:912
def add_branch_and_bound_assignments_table
BranchAndBoundAssignmentsTable enumerate states based on provided filtered using the provided the sub...
Definition: DominoModel.py:500
def set_assembly
Sets an assembly from a single PDB file.
Definition: DominoModel.py:486
Low level functionality (logging, error handling, profiling, command line flags etc) that is used by ...
def set_em2d_restraint
Set a Restraint computing the em2d score.
Definition: DominoModel.py:683
Sample best solutions using Domino.
Definition: DominoSampler.h:32
def create_coarse_assembly
Simplify the assembly with a coarse representation.
Definition: DominoModel.py:215
def add_exclusion_filter_table
ExclusionSubsetFilterTable ensures that two particles are not given the same state at the same time...
Definition: DominoModel.py:514
Filter a configuration of the subset using the kernel::Model thresholds.
Represent a subset of the particles being optimized.
Definition: Subset.h:33
def set_pair_score_restraint
Set a pair_score restraint between the coarse representation of two components.
Definition: DominoModel.py:778
A score on the distance between the surfaces of two spheres.
def write_pdbs_for_reference_frames
Write a separate PDB for each of the elements.
Definition: DominoModel.py:947
def create_merge_tree
Creates a merge tree from the restraints that are set already.
Definition: DominoModel.py:596
def write_solutions_database
Write the results of the DOMINO sampling to a SQLite database.
Definition: DominoModel.py:830
def do_sampling
Do Domino sampling.
Definition: DominoModel.py:231
def get_assignment_scores
Returns a list with the values for the restraints on a subset of the components of the assembly...
Definition: DominoModel.py:335
A class to store an fixed array of same-typed values.
Definition: Array.h:33
Do not allow two particles to be in the same state.
def read_merge_tree
Read and create a merge tree from a file.
Definition: DominoModel.py:538
def write_monte_carlo_solution
Write the solution of a MonteCarlo run.
Definition: DominoModel.py:970
def get_restraint_value_for_assignment
Recover the value of a restraint.
Definition: DominoModel.py:398
A lookup based particle refiner.
Definition: TableRefiner.h:21
def set_complementarity_restraint
Set a restraint for geometric complementarity between 2 components.
Definition: DominoModel.py:189
def add_restraint_score_filter_table
Add a RestraintScoreSubsetFilterTable to DOMINO that allows to reject assignments that have score wor...
Definition: DominoModel.py:456
def set_xlink_restraint
Set a restraint on the maximum distance between 2 residues.
Definition: DominoModel.py:137
Utility functions to handle restraints.
Definition: restraints.py:1
def write_merge_tree
Writes the current merge tree to file.
Definition: DominoModel.py:612
Management of a model using DOMINO.
Definition: DominoModel.py:34
Fitting atomic structures into a cryo-electron microscopy density map.
def get_restraints_for_native
Get values of the restraints for the native structure.
Definition: DominoModel.py:877
Utility functions to store and retrieve solution information.
Definition: solutions_io.py:1
Class to handle individual model particles.
Class for managing the results of the experiments.
def set_native_assembly_for_benchmark
Sets the native model for benchmark, by reading the native structure and set the rigid bodies...
Definition: DominoModel.py:416
def setup_domino_sampler
Creates a DOMINO sampler and adds the required filter tables.
Definition: DominoModel.py:523
def get_assignment_info
Info related to an assignment.
Definition: DominoModel.py:323
def get_assignment_text
Get the formatted text for an assignment.
Definition: DominoModel.py:283
def write_pdb_for_component
Write one component of the assembly.
Definition: DominoModel.py:958
def write_pdb_for_reference_frames
Write a PDB file with a solution given by a set of reference frames.
Definition: DominoModel.py:937
def load_state
Load the positions of the components given by the assignment.
Definition: DominoModel.py:349
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Parameters used by Em2DRestraint and ProjectionFinder.
def write_pdb_for_configuration
Write a file with a configuration for the model (configuration here means a configuration in DOMINO) ...
Definition: DominoModel.py:924
def set_connectivity_restraint
Set a connectivity restraint between the elements of the assembly.
Definition: DominoModel.py:654
def align_rigid_bodies_states
Aligns the set of structures considered for DOMINO sampling.
Definition: DominoModel.py:76
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
def set_rb_states
Add a set of reference frames as possible states for a rigid body.
Definition: DominoModel.py:439
Output IMP model data in various file formats.
Functionality for loading, creating, manipulating and scoring atomic structures.
def set_close_pairs_excluded_volume_restraint
Creates an excluded volume restraint between all the components of the coarse assembly.See help for.
Definition: DominoModel.py:729
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62
def get_restraints_names_used
Get the names of the restraints used for a subset.
Definition: DominoModel.py:870
def set_assembly_components
Sets the components of an assembly, each one given as a separate PDB file.
Definition: DominoModel.py:471
Divide-and-conquer inferential optimization in discrete space.
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:73
def write_subset
Writes lines describing a subset (vertex of a merge tree)
Definition: DominoModel.py:629
def get_assignment_and_RFs
Return a line with texts for an assignment and the the reference frames of the RigidBodies in the sub...
Definition: DominoModel.py:300