IMP  2.1.1
The Integrative Modeling Platform
DominoModel.py
1 import sys
2 import time
3 import csv
4 import logging
5 log = logging.getLogger("DominoModel")
6 
7 import IMP
8 import IMP.domino as domino
9 import IMP.core as core
10 import IMP.container as container
11 import IMP.display as display
12 import IMP.atom as atom
13 import IMP.algebra as alg
14 import IMP.em2d as em2d
15 import IMP.base
16 import IMP.multifit as multifit
17 
18 import IMP.em2d.imp_general.comparisons as comparisons
19 import IMP.em2d.imp_general.alignments as alignments
20 import IMP.em2d.imp_general.io as io
21 import IMP.em2d.imp_general.representation as representation
22 
23 import IMP.em2d.restraints as restraints
24 import IMP.em2d.buildxlinks as buildxlinks
25 import IMP.em2d.solutions_io as solutions_io
26 import IMP.em2d.csv_related as csv_related
27 
28 
29 field_delim = "," # separate fields in the output text file
30 unit_delim = "/" # separate units within a field (eg, reference frames).
31  # It is used in output text files and in databse files
32 
33 
34 
36  """
37  Management of a model using DOMINO
38  """
39  def __init__(self, name="my model"):
40  self.model = IMP.kernel.Model()
41  self.model.set_name(name)
42  self.configuration_sampling_done = False
43  self.assignments_sampling_done = False
44  self.rb_states_table = domino.ParticleStatesTable()
45  self.merge_tree = None
46  # measure_models is True only when doing benchmark
47  self.measure_models = False
48  # The first time that create_coarse is found and its value is True
49  # a coarse version of the assembly is built
50  self.create_coarse = True
51  self.restraints = dict()
52  self.xlinks_dict = buildxlinks.XlinksDict()
53 
54  def add_restraint(self, r, name, weight, max_score=False):
55  """
56  Adds a restraint to the model
57  @param r An IMP.kernel.Restraint object
58  @param name Name for the restraint
59  @param weight Weight for the restraint
60  @param max_score Maximum score allowed for the restraint. If
61  max_score is False, there is no limit for the value of the restraint
62  """
63  log.info("Adding restraint %s weight %s max_score %s",
64  name, weight, max_score)
65  if "-" in name:
66  raise ValueError("SQL database does not accept restraint names "\
67  "containing -")
68  r.set_name(name)
69  if max_score != None and max_score != False:
70  r.set_maximum_score(max_score)
71  r.set_weight(weight)
72  self.restraints[name] = r
73  self.model.add_restraint(r)
74 
75 
77  """
78  Aligns the set of structures considered for DOMINO sampling.
79  The function:
80  1) reads the possible reference frames that the
81  rb_states_table stores for each rigid body. This table
82  must be filled before using this function. Usually it is
83  filled with the results from a previous Monte Carlo sampling.
84  If the solutions from Monte Carlo seem to have the same structure
85  but they are not aligned to each other, this function can
86  help setting better starting positions to use with DOMINO.
87  2) Gets the first state for each of the rigid bodies and sets
88  a reference structure using such states.
89  3) Aligns all the rest of the structures respect to the reference
90  4) Replaces the values of the reference frames stored in the
91  rb_states_table with the new values obtained from the alignments.
92  It does it for all states of a rigid body.
93  @note: If this function is applied, the parameters "anchor" and "fixed"
94  are ignored, as they are superseded by the use of the aligments
95  calculated here.
96  """
97  log.debug("Align the configurations read from the database before " \
98  "running DOMINO")
99  rb_states = []
100  n_states = []
101  for rb in self.components_rbs:
102  ps = self.rb_states_table.get_particle_states(rb)
103  n = ps.get_number_of_particle_states()
104  if n == 0:
105  raise ValueError("There are no particle states for %s" % rb.get_name())
106  n_states.append(n)
107  rb_states.append(ps)
108  # coordinates of the first configuration (there is at least one for all the rbs)
109  for rb, states in zip(self.components_rbs, rb_states):
110  states.load_particle_state(0, rb)
111  reference_coords = get_coordinates(self.components_rbs)
112  aligned_transformations = [[] for rb in self.components_rbs]
113  for i in range(1, max(n_states)):
114  log.debug("Aligning rigid bodies configuration %s" % i)
115  # load the configuration
116  for j in range(len(self.components_rbs)):
117  # if there are no more particle states for this rigid body, use the last
118  state_index = min(i,n_states[j] - 1)
119  rb_states[j].load_particle_state(state_index, self.components_rbs[j])
120  coords = get_coordinates(self.components_rbs)
121  T = alg.get_transformation_aligning_first_to_second(coords, reference_coords)
122  for j, rb in enumerate(self.components_rbs):
123  t = rb.get_reference_frame().get_transformation_to()
124  new_t = alg.compose(T, t)
125  aligned_transformations[j].append(new_t)
126  # set the new possible transformations
127  for i, rb in enumerate(self.components_rbs):
128  self.set_rb_states(rb, aligned_transformations[i])
129 
130 
131  def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2,
132  distance, weight, stddev, max_score=False):
133  """
134  Set a restraint on the maximum distance between 2 residues
135  @param id1 Name of the first component
136  @param chain1
137  @param residue1 Residue number for the aminoacid in the first
138  component.The number is the number in the PDB file, not the
139  number relative to the beginning of the chain
140  @param id2 Name of the second component
141  @param chain2
142  @param residue2 Residue number for the aminoacid in the second
143  component.
144  @param distance Maximum distance tolerated
145  @param weight Weight of the restraint
146  @param stddev Standard deviation used to approximate the
147  HarmonicUpperBound function to a Gaussian
148  @param max_score See help for add_restraint(). If none is given,
149  the maximum score is set to allow a maximum distance of 10 Angstrom
150  greater than the parameter "distance".
151  """
152  xlink = buildxlinks.Xlink(id1, chain1, residue1, id2, chain2, residue2, distance)
153  log.info("Setting cross-linking restraint ")
154  log.info("%s", xlink.show())
155  self.xlinks_dict.add(xlink)
156  # setup restraint
157  A = representation.get_component(self.assembly, xlink.first_id)
158  s1=IMP.atom.Selection(A, chain=xlink.first_chain,
159  residue_index=xlink.first_residue)
160  p1 = s1.get_selected_particles()[0]
161  B = representation.get_component(self.assembly, xlink.second_id)
162  s2=IMP.atom.Selection(B, chain=xlink.second_chain,
163  residue_index=xlink.second_residue)
164  p2 = s2.get_selected_particles()[0]
165  k = core.Harmonic.get_k_from_standard_deviation(stddev)
166  score = core.HarmonicUpperBound(xlink.distance, k)
167  pair_score = IMP.core.DistancePairScore(score)
168  r = IMP.core.PairRestraint(pair_score, IMP.kernel.ParticlePair(p1, p2))
169  if not max_score:
170  error_distance_allowed = 100
171  max_score = weight * score.evaluate(distance + error_distance_allowed)
172  log.info("%s, max_score %s", xlink.show(), max_score)
173  self.add_restraint(r, xlink.get_name(), weight, max_score)
174 
175  def set_complementarity_restraint(self, name1, name2, rname,
176  max_sep_distance, max_penetration,
177  weight,max_score=1e10):
178  """
179  Set a restraint for geometric complementarity between 2 components
180  @param name1 name of
181  @param name2 - The restraint is applied to this components
182  @param rname - The name given to the restraint
183  @param max_sep_distance - maximum distance between molecules
184  tolerated by the restraint
185  @param max_penetration - Maximum penetration allowd (angstrom)
186  @param weight
187  @param max_score
188  """
189  log.info("Setting geometric complementarity restraint %s: %s - %s",
190  rname, name1, name2)
191  A = representation.get_component(self.assembly, name1)
192  B = representation.get_component(self.assembly, name2)
193  restraint = multifit.ComplementarityRestraint(atom.get_leaves(A),
194  atom.get_leaves(B), rname)
195  restraint.set_maximum_separation(max_sep_distance)
196  restraint.set_maximum_penetration(max_penetration)
197  log.debug("Maximum separation: %s Maximum penetration score: %s",
198  max_sep_distance, max_penetration)
199  self.add_restraint(restraint, rname, weight, max_score)
200 
201 
202  def create_coarse_assembly(self, n_residues, write=False):
203  """
204  Simplify the assembly with a coarse representation
205  @param n_residues Number of residues used for a coarse particle
206  @param write If True, write the coarse assembly to a
207  format that Chimera can display
208  """
209  if self.create_coarse:
210  log.info("Creating simplified assembly")
211  self.coarse_assembly = \
212  representation.create_simplified_assembly(self.assembly,
213  self.components_rbs, n_residues)
214  if write:
215  io.write_hierarchy_to_chimera(self.assembly, "coarse_assembly.py")
216  self.create_coarse = False
217 
218 
219  def do_sampling(self, mode="assignments_heap_container", params=None):
220  """
221  Do Domino sampling
222  @param mode The mode used to recover solutions from domino.
223  It can have
224  the value "configuration", "assignments", or
225  "assignments_heap_container". The mode
226  "configuration" recovers all possible configurations
227  (takes a while to generate them).
228  The mode "assignments" only provides the assignments
229  (state numbers) of the solutions. It is faster but requires
230  generating the solutions afterwards
231  The mode "assignments_heap_container" selects the best solutions
232  after ecah merging in DOMINO, discarding the rest.
233  In practice I used the mode "assignments_heap_container"
234  @param params
235  """
236  t0 = time.time()
237  if mode == "configuration":
238  self.configuration_set = self.sampler.get_sample()
239  tf = time.time()
240  log.info("found %s configurations. Time %s",
241  self.configuration_set.get_number_of_configurations(),
242  tf-t0)
243  self.configuration_sampling_done = True
244  elif mode == "assignments":
245  subset = self.rb_states_table.get_subset()
246  log.info("Do sampling with assignments. Subset has %s elements: %s",
247  len(subset), subset)
248  self.solution_assignments = \
249  self.sampler.get_sample_assignments(subset)
250  tf = time.time()
251  log.info("found %s assignments. Time %s",
252  len(self.solution_assignments), tf-t0)
253  self.assignments_sampling_done = True
254  elif mode == "assignments_heap_container":
255  subset = self.rb_states_table.get_subset()
256  log.info("DOMINO sampling an assignments_heap_container. "\
257  "Subset has %s elements: %s", len(subset), subset)
258  # last vertex is the root of the merge tree
259  root = self.merge_tree.get_vertices()[-1]
260  container = self.get_assignments_with_heap(root,
261  params.heap_solutions)
262  self.solution_assignments = container.get_assignments()
263  tf = time.time()
264  log.info("found %s assignments. Time %s",
265  len(self.solution_assignments), tf-t0)
266  self.assignments_sampling_done = True
267  else:
268  raise ValueError("Requested wrong mode for the DOMINO sampling")
269 
270 
271  def get_assignment_text(self, assignment, subset):
272  """
273  Get the formatted text for an assignment
274  @param subset Subset of components of the assembly
275  @param assignment Assignment class with the states for the subset
276  @note: The order in assignment and subset is not always the order
277  of the rigid bodies in self.components_rbs. The function reorders
278  the terms in the assignment so there is correspondence.
279  """
280  ordered_assignment = []
281  for rb in self.components_rbs:
282  for i, particle in enumerate(subset):
283  if rb.get_particle() == particle:
284  ordered_assignment.append( assignment[i])
285  text = "|".join([str(i) for i in ordered_assignment])
286  return text
287 
288 
289  def get_assignment_and_RFs(self, assignment, subset):
290  """
291  Return a line with texts for an assignment and the
292  the reference frames of the RigidBodies in the subset.
293  @param subset Subset of components of the assembly
294  @param assignment Assignment class with the states for the subset
295  @note: see the get_assignment_assignment_text() note.
296  """
297  ordered_assignment = []
298  RFs = []
299  for rb in self.components_rbs:
300  for i, particle in enumerate(subset):
301  if rb.get_particle() == particle:
302  j = assignment[i]
303  ordered_assignment.append(j)
304  pstates = self.rb_states_table.get_particle_states(rb)
305  pstates.load_particle_state(j, rb.get_particle())
306  RFs.append(rb.get_reference_frame())
307  RFs_text = unit_delim.join(
308  [io.ReferenceFrameToText(ref).get_text() for ref in RFs])
309  assignment_numbers = "|".join([str(i) for i in ordered_assignment])
310  return [assignment_numbers, RFs_text]
311 
312  def get_assignment_info(self, assignment, subset):
313  """
314  Info related to an assignment. Returns a list with text for the
315  assignment and all the scores for the restraints
316  @param subset Subset of components of the assembly
317  @param assignment Assignment class with the states for the subset
318  """
319  text = self.get_assignment_and_RFs(assignment, subset)
320  scores, total_score = self.get_assignment_scores(assignment, subset)
321  info = text + [total_score] + scores
322  return info
323 
324  def get_assignment_scores(self, assignment, subset):
325  """
326  Returns a list with the values for the restraints on a subset of
327  the components of the assembly
328  @param subset Subset of components of the assembly
329  @param assignment Assignment class with the states for the subset
330  """
331  restraints_to_evaluate = \
332  self.restraint_cache.get_restraints(subset, [])
333  scores = [self.restraint_cache.get_score(r, subset, assignment)
334  for r in restraints_to_evaluate]
335  total_score = sum(scores)
336  return scores, total_score
337 
338 
339  def load_state(self, assignment):
340  """
341  Load the positions of the components given by the assignment
342  @param assignment Assignment class
343  """
344  subset = self.rb_states_table.get_subset()
345  domino.load_particle_states(subset, assignment, self.rb_states_table)
346 
347 
348  def get_assignments_with_heap(self, vertex, k=0):
349  """
350  Domino sampling that recovers the assignments for the root of the
351  merge tree, but
352  conserving only the best k scores for each vertex of the tree.
353  @param[in] vertex Vertex with the root of the current merge tree. This
354  function is recursive.
355  @param[in] k
356  """
357  if(self.sampler.get_number_of_subset_filter_tables() == 0 ):
358  raise ValueError("No subset filter tables")
359  if(self.merge_tree == None):
360  raise ValueError("No merge tree")
361  # In the merge tree, the names of the vertices are the subsets.
362  # The type of the vertex name is a domino.Subset
363  subset = self.merge_tree.get_vertex_name(vertex)
364  log.info("computing assignments for vertex %s",subset)
365  t0 = time.time()
366  assignments_container = domino.HeapAssignmentContainer(subset, k,
367  self.restraint_cache, "my_heap_assignments_container" )
368  neighbors = self.merge_tree.get_out_neighbors(vertex)
369  if len(neighbors) == 0: # A leaf
370  # Fill the container with the assignments for the leaf
371  self.sampler.load_vertex_assignments(vertex, assignments_container)
372  else:
373  if neighbors[0] > neighbors[1]:
374  # get_vertex_assignments() methods in domino
375  # expects the children in sorted order
376  neighbors =[neighbors[1], neighbors[0]]
377  # recurse on the two children
378  container_child0 = self.get_assignments_with_heap(neighbors[0], k)
379  container_child1 = self.get_assignments_with_heap(neighbors[1], k)
380  self.sampler.load_vertex_assignments(vertex,
381  container_child0,
382  container_child1,
383  assignments_container)
384  tf = time.time() - t0
385  log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset,
386  assignments_container.get_number_of_assignments(), tf)
387  return assignments_container
388 
389  def get_restraint_value_for_assignment(self, assignment, name):
390  """
391  Recover the value of a restraint
392  @param name of the restraint
393  @param assignment Assignment class containing the assignment for
394  the solution desired
395  """
396  if not self.assignments_sampling_done:
397  raise ValueError("DominoModel: sampling not done")
398  log.debug("Computing restraint value for assignment %s",assignment)
399  self.load_state(assignment)
400  for rb in self.components_rbs:
401  log.debug("rb - %s",
402  rb.get_reference_frame().get_transformation_to())
403  val = self.restraints[name].evaulate(False)
404  log.debug("restraint %s = %s",restraint.get_name(), val)
405  return val
406 
408  """
409  Sets the native model for benchmark, by reading the native
410  structure and set the rigid bodies.
411  """
412  self.measure_models = True
413  self.native_model = IMP.kernel.Model()
414  if hasattr(params.benchmark, "fn_pdb_native"):
415  self.native_assembly = \
416  representation.create_assembly_from_pdb(self.native_model,
417  params.benchmark.fn_pdb_native, params.names)
418  elif hasattr(params.benchmark, "fn_pdbs_native"):
419  self.native_assembly = \
420  representation.create_assembly(self.native_model,
421  params.benchmark.fn_pdbs_native, params.names)
422  else:
423  raise ValueError("set_native_assembly_for_benchmark: Requested " \
424  "to set a native assembly for benchmark but did not provide " \
425  "its pdb, or the pdbs of the components" )
426  self.native_rbs = representation.create_rigid_bodies(self.native_assembly)
427  anchor_assembly(self.native_rbs, params.anchor)
428 
429 
430  def set_rb_states(self, rb, transformations):
431  """
432  Add a set of reference frames as possible states for a rigid body
433  @param rb The rigid body
434  @param transformations Transformations are used to build the
435  reference frames for the rigid body.
436  """
437  log.info("Set rigid body states for %s",rb.get_name())
438  RFs = [ alg.ReferenceFrame3D(T) for T in transformations ]
439  for st in RFs:
440  log.debug("%s",st)
441  rb_states = domino.RigidBodyStates(RFs)
442  self.rb_states_table.set_particle_states(rb, rb_states)
443  st = self.rb_states_table.get_particle_states(rb)
444  log.info("RigidBodyStates created %s",
445  st.get_number_of_particle_states())
446 
448  """
449  Add a RestraintScoreSubsetFilterTable to DOMINO that allows to
450  reject assignments that have score worse than the thresholds for
451  the restraints
452  """
453  log.info("Adding RestraintScoreSubsetFilterTable")
454  # Restraint Cache is a centralized container of the restraints and
455  # their scores
456  self.restraint_cache = domino.RestraintCache(self.rb_states_table)
457  self.restraint_cache.add_restraints(self.model.get_restraints())
458  rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache)
459  rssft.set_name('myRestraintScoreSubsetFilterTable')
460  self.sampler.add_subset_filter_table(rssft)
461 
462  def set_assembly_components(self, fn_pdbs, names):
463  """
464  Sets the components of an assembly, each one given as a separate
465  PDB file.
466  @param fn_pdbs List with the names of the pdb files
467  @param names List with the names of the components of the assembly.
468  """
469  if len(fn_pdbs) != len(names):
470  raise ValueError("Each PDB file needs a name")
471  self.names = names
472  self.assembly = representation.create_assembly(self.model, fn_pdbs,
473  names)
474  self.components_rbs = representation.create_rigid_bodies(self.assembly)
475  self.not_optimized_rbs = []
476 
477  def set_assembly(self, fn_pdb, names):
478  """
479  Sets an assembly from a single PDB file. The function assumes that
480  the components of the assembly are the chains of the PDB file.
481  @param fn_pdb PDB with the file for the asembly
482  @param names Names for each of the components (chains)
483  of the asembly
484  """
485  self.assembly = representation.create_assembly_from_pdb(self.model,
486  fn_pdb,
487  names)
488  self.components_rbs = representation.create_rigid_bodies(self.assembly)
489  self.not_optimized_rbs = []
490 
491 
493  """
494  BranchAndBoundAssignmentsTable enumerate states based on provided
495  filtered using the provided the subset filter tables
496  """
497  log.info("Setting BranchAndBoundAssignmentsTable")
498  fts = []
499  for i in range(self.sampler.get_number_of_subset_filter_tables()):
500  ft = self.sampler.get_subset_filter_table(i)
501  fts.append(ft)
502  at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts)
503  self.sampler.set_assignments_table(at)
504  self.assignments_table = at
505 
507  """
508  ExclusionSubsetFilterTable ensures that two particles are not given
509  the same state at the same time
510  """
511  log.info("Setting ExclusionSubsetFilterTable")
512  ft = domino.ExclusionSubsetFilterTable(self.rb_states_table)
513  self.sampler.add_subset_filter_table(ft)
514 
516  """
517  Creates a DOMINO sampler and adds the required filter tables
518  """
519  if self.merge_tree == None:
520  raise ValueError("Merge tree for DOMINO not set. It is required " \
521  "to setup the sampler")
522  log.info("Domino sampler")
523  self.sampler = domino.DominoSampler(self.model, self.rb_states_table)
524  self.sampler.set_log_level(IMP.base.TERSE)
525  self.sampler.set_merge_tree(self.merge_tree)
529 
530  def read_merge_tree(self, fn):
531  """
532  Read and create a merge tree from a file.
533  The format of the file is the format written by write merge_tree()
534  It does not matter the order of the indices in the file, as
535  they are sorted by this function.
536  The only condition is that the index for the vertex that is the
537  root of the tree MUST be the largest. This is guaranteed when
538  creating a merge tree with create_merge_tree()
539  @param fn File containing the merge tree
540  """
541  log.info("Reading merge tree from %s", fn)
542  ps = self.rb_states_table.get_particles()
543  log.debug("particles")
544  for p in ps:
545  log.debug("%s", p.get_name())
546  rows = csv_related.read_csv(fn, delimiter = field_delim)
547  for i in range(len(rows)):
548  row = rows[i]
549  rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]]
550  # Sort rows by vertice index
551  rows.sort()
552  subsets = []
553  # build subsets from the rows text
554  for row in rows:
555  names = row[3].split(unit_delim)
556  log.debug("row %s names %s", row, names)
557  # get particles in the subset
558  particles = []
559  for name in names:
560  l = filter(lambda p: p.get_name() == name, ps)
561  if (len(l) > 1):
562  ValueError("More than one particle with same name" % names)
563  particles.extend(l)
564  s = domino.Subset(particles)
565  subset_names = [p.get_name() for p in particles]
566  log.debug("Merge tree Subset %s. Particles %s ",s, subset_names)
567  subsets.append(s)
568  # The vertex with the largest index is the root.
569  # trick: get the merge tree object from a tree with only one node ...
570  jt = domino.SubsetGraph()
571  jt.add_vertex(subsets[0])
572  mt = domino.get_merge_tree(jt)
573  # ... and add the rest of the vertices
574  for i in range(1,len(subsets)):
575  mt.add_vertex(subsets[i]) # the name of the vertex is a subset
576  # set edges
577  for row in rows:
578  vid = row[0]
579  child_left = row[1]
580  child_right = row[2]
581  if child_left != -1:
582  mt.add_edge(vid, child_left)
583  if child_right != -1:
584  mt.add_edge(vid, child_right)
585  self.merge_tree = mt
586  log.info("%s",self.merge_tree.show_graphviz() )
587 
588 
589  def create_merge_tree(self):
590  """
591  Creates a merge tree from the restraints that are set already
592  """
593  rs = self.model.get_restraints()
594  ig = domino.get_interaction_graph(rs, self.rb_states_table)
595 # pruned_dep = IMP.get_pruned_dependency_graph(self.model)
596 # IMP.base.show_graphviz(pruned_dep)
597 # IMP.base.show_graphviz(ig)
598  jt = domino.get_junction_tree(ig)
599 # IMP.base.show_graphviz(jt)
600  self.merge_tree = domino.get_balanced_merge_tree(jt)
601 # IMP.base.show_graphviz(self.merge_tree)
602  log.info("Balanced merge tree created")
603  log.info("%s",self.merge_tree.show_graphviz() )
604 
605 
606  def write_merge_tree(self, fn):
607  """
608  Writes the current merge tree to file. The format is:
609  parent | child_left | child_right | labels
610  @param fn File for storing the merge tree
611  """
612  log.info("Writing merge to %s", fn)
613  if self.merge_tree == None:
614  raise ValueError("No merge tree")
615  f = open(fn, "w")
616  f.write(
617  "# Vertex index | Child0 | Child1 | label (names of the particles)\n")
618  w = csv.writer(f, delimiter = field_delim)
619  root = self.merge_tree.get_vertices()[-1]
620  self.write_subset(root, w)
621  f.close()
622 
623 
624  def write_subset(self, v, w):
625  """
626  Writes lines describing a subset (vertex of a merge tree)
627  @param w A csv.writer
628  @param v Vertex index
629  """
630  no_child_index = -1
631  subset = self.merge_tree.get_vertex_name(v)
632  names = [p.get_name() for p in subset]
633  reps = filter(lambda x: names.count(x) > 1, names) # repeated names
634  if(len(reps)):
635  raise ValueError("The names of the "\
636  "particles in the subset %s are not unique" % subset)
637  names = unit_delim.join(names)
638  neighbors = self.merge_tree.get_out_neighbors(v)
639  if len(neighbors) == 0: # A leaf
640  # Fill the container with the assignments for the leaf
641  w.writerow([v,no_child_index,no_child_index, names])
642  else:
643  if neighbors[0] > neighbors[1]:
644  neighbors =[neighbors[1], neighbors[0]]
645  w.writerow([v,neighbors[0],neighbors[1], names])
646  self.write_subset(neighbors[0], w)
647  self.write_subset(neighbors[1], w)
648 
649 
650  def set_connectivity_restraint(self, names, rname,
651  distance=10,
652  weight=1.0, max_score=None, n_pairs=1,
653  stddev=2):
654  """
655  Set a connectivity restraint between the elements of the assembly
656  @param names List with all the elements to be connected
657  @param distance Maximum distance tolerated by the restraint
658  @param rname Name for the restraint
659  @param weight Weight for the restraint
660  @param max_score Maximum score for the restraint
661  @param n_pairs Number of pairs of particles used in the
662  KClosePairScore of the connecitivity restraint
663  @param stddev Standard deviation used to approximate the
664  HarmonicUpperBound function to a Gaussian
665  """
666  components = []
667  for name in names:
668  c = representation.get_component(self.coarse_assembly,name)
669  components.append(c)
670  log.info("Connectivity restraint for %s",components)
671  spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev)
672  if max_score == None:
673  max_score = weight * spring_constant * n_pairs * (stddev)**2
674  r = restraints.get_connectivity_restraint(components, distance, n_pairs,
675  spring_constant)
676  self.add_restraint(r, rname, weight, max_score)
677 
678  def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution,
679  n_projections, weight, max_score=False,
680  mode="fast", n_optimized=1):
681  """
682  Set a Restraint computing the em2d score.
683  @param name Name for the restraint
684  @param fn_images_sel Selection file with the names of the images
685  used for the restraint
686  @param pixel_size - pixel size in the images
687  @param resolution - resolution used to downsample the projections
688  of the model when projecting
689  @param weight Weight of the restraint
690  @param max_score - Maximum value tolerated for the restraint
691  @param n_projections - Number of projections to generate
692  when projecting the model to do the coarse alignments
693  @param mode - Mode used for computing the restraints.
694  @param n_optimized - number of results from the coarse
695  alignment that are refined with the Simplex algorithm
696  to get a more accurate value for the restraint
697  """
698  log.info("Adding a em2D restraint: %s", fn_images_sel)
699  restraint_params = em2d.Em2DRestraintParameters(pixel_size,
700  resolution,
701  n_projections)
702  r = restraints.get_em2d_restraint( self.assembly,
703  fn_images_sel,
704  restraint_params,
705  mode, n_optimized)
706  self.add_restraint(r, name, weight, max_score)
707 
708 
709  def set_not_optimized(self, name):
710  """
711  Set a part of the model as not optimized (it does not move during
712  the model optimization)
713  @param name of the component to optimized
714  """
715  if name not in self.names:
716  raise ValueError("DominoModel: There is not component " \
717  "in the assembly with this name")
718  rb_name = representation.get_rb_name(name)
719  self.not_optimized_rbs.append(rb_name)
720 
721 
722  def set_close_pairs_excluded_volume_restraint(self, distance=10,
723  weight=1.0, ratio=0.05, stddev=2,
724  max_score=False ):
725  """
726  Creates an excluded volume restraint between all the components
727  of the coarse assembly.See help for
728  @param distance Maximum distance tolerated between particles
729  @param weight Weight for the restraint
730  @param stddev
731  @param max_score Maximum value for the restraint. If the parameter
732  is None, an automatic value is computed (using the ratio).
733  @param ratio Fraction of the number of possible pairs of
734  particles that are tolerated to overlap. The maximum
735  score is modified according to this ratio.
736  I have found that ratios of 0.05-0.1 work well allowing for
737  some interpenetration
738  @param stddev Standard deviation used to approximate the
739  HarmonicUpperBound function to a Gaussian
740  """
741  k = core.Harmonic.get_k_from_standard_deviation(stddev)
742  for i, c1 in enumerate(self.coarse_assembly.get_children()):
743  for j, c2 in enumerate(self.coarse_assembly.get_children()):
744  if j > i:
745  name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name())
746 
747  ls1 = atom.get_leaves(c1)
748  ls2 = atom.get_leaves(c2)
749  possible_pairs = len(ls1) * len(ls2)
750  n_pairs = possible_pairs * ratio
751 
752  marker1 = IMP.kernel.Particle(self.model, "marker1 " + name)
753  marker2 = IMP.kernel.Particle(self.model, "marker2 " + name)
754  table_refiner = core.TableRefiner()
755  table_refiner.add_particle(marker1, ls1)
756  table_refiner.add_particle(marker2, ls2)
757  score = core.HarmonicLowerBound(distance, k)
758  pair_score = core.SphereDistancePairScore(score)
759  close_pair_score = core.ClosePairsPairScore(pair_score,
760  table_refiner,
761  distance)
762  r = core.PairRestraint(close_pair_score,
763  IMP.kernel.ParticlePair(marker1,marker2))
764 
765  if not max_score:
766  minimum_distance_allowed = 0
767  max_score = weight * n_pairs * \
768  score.evaluate(minimum_distance_allowed)
769  log.debug("Setting close_pairs_excluded_volume_restraint(), " \
770  "max_score %s", max_score)
771  self.add_restraint(r, name, weight, max_score)
772 
773  def set_pair_score_restraint(self, name1, name2,
774  restraint_name, distance=10,
775  weight=1.0, n_pairs = 1, stddev=2, max_score=None):
776  """
777  Set a pair_score restraint between the coarse representation
778  of two components
779  @param name1 Name of the first component
780  @param name2 Name of the second component
781  @param restraint_name Name for the restraint
782  @param distance Maximum distance tolerated between particles
783  @param weight Weight of the restraint
784  @param n_pairs
785  @param max_score Maximum value tolerated for the restraint
786  @param stddev Standard deviation used to approximate the
787  HarmonicUpperBound function to a Gaussian
788  """
789  table_refiner = core.TableRefiner()
790 
791  # The particles in A are attached to a marker particle via a refiner.
792  # When the refiner gets a request for marker1, it returns the attached
793  # particles
794  A = representation.get_component(self.coarse_assembly, name1)
795  marker1 = IMP.kernel.Particle(self.model, "marker1 "+restraint_name)
796  table_refiner.add_particle(marker1, atom.get_leaves(A))
797  # same for B
798  B = representation.get_component(self.coarse_assembly, name2)
799  marker2 = IMP.kernel.Particle(self.model, "marker2 "+restraint_name)
800  table_refiner.add_particle(marker2, atom.get_leaves(B))
801 
802  k = core.Harmonic.get_k_from_standard_deviation(stddev)
804  # The score is set for the n_pairs closest pairs of particles
805  pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs)
806  # When KClosePairsPairScore is called, the refiner will provide the
807  # particles for A and B
808  if not max_score:
809  # Build a maximum score based on the function type that is used,
810  # an HarmonicUpperBound
811  temp_score = core.HarmonicUpperBound(distance, k)
812  error_distance_allowed = 10
813  max_score = weight * n_pairs * \
814  temp_score.evaluate(distance + error_distance_allowed)
815 
816  log.info("Setting pair score restraint for %s %s. k = %s, max_score " \
817  "= %s, stddev %s", name1, name2, k, max_score,stddev)
818  r = core.PairRestraint(pair_score, IMP.kernel.ParticlePair( marker1, marker2 ) )
819  self.add_restraint(r, restraint_name, weight, max_score)
820 
821 
822  def write_solutions_database(self, fn_database, max_number=None):
823  """
824  Write the results of the DOMINO sampling to a SQLite database.
825  @param fn_database
826  @param max_number Maximum number of results to write
827  """
828  log.info("Creating the database of solutions")
829  if self.measure_models == True and not hasattr(self, "native_model"):
830  raise ValueError("The native model has not been set")
832  db.create(fn_database, overwrite=True)
833  db.connect(fn_database)
834  subset = self.rb_states_table.get_subset()
835  restraints_names = self.get_restraints_names_used(subset)
836  db.add_results_table(restraints_names, self.measure_models)
837  n = len(self.solution_assignments)
838  if max_number != None:
839  n = min(n, max_number)
840  for sol_id, assignment in enumerate(self.solution_assignments[0:n]):
841  txt = self.get_assignment_text(assignment, subset)
842  # load the positions of the components in the state
843  self.load_state(assignment)
844  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
845  scores, total_score = self.get_assignment_scores(assignment, subset)
846  measures = None
847  if(self.measure_models):
848  measures = measure_model(self.assembly, self.native_assembly,
849  self.components_rbs, self.native_rbs)
850  db.add_record(sol_id, txt, RFs, total_score, scores, measures)
851  # Add native state if doing benchmarking
852  if self.measure_models:
853  assignment = "native"
854  RFs = [rb.get_reference_frame() for rb in self.native_rbs]
855  scores, total_score = self.get_restraints_for_native(
856  restraints_names)
857  db.add_native_record(assignment, RFs, total_score, scores)
858  db.save_records()
859  db.close()
860 
861  def get_restraints_names_used(self, subset):
862  """ Get the names of the restraints used for a subset
863  @param subset Subset of components of the assembly
864  """
865  return [r.get_name() for r
866  in self.restraint_cache.get_restraints(subset, [])]
867 
868  def get_restraints_for_native(self, restraints_names):
869  """
870  Get values of the restraints for the native structure
871  @param restraints_names Names of the restraints requested
872  @return a list with the values of all scores, and the total score
873  """
874  saved = [rb.get_reference_frame() for rb in self.components_rbs]
875  # Set the state of the model to native
876  for rb,rn in zip(self.components_rbs, self.native_rbs):
877  # remove sub-rigid bodies
878  rb_members = filter(
879  lambda m: not core.RigidBody.get_is_setup(
880  m.get_particle()), rb.get_members())
881  rn_members = filter(
882  lambda m: not core.RigidBody.get_is_setup(
883  m.get_particle()), rn.get_members())
884  rb_coords = [m.get_coordinates() for m in rb_members]
885  rn_coords = [m.get_coordinates() for m in rn_members]
886 
887  # align and put rb in the position of rn
888  if len(rn_coords) != len(rb_coords) :
889  raise ValueError("Mismatch in the number of members. "
890  "Reference %d Aligned %d " % (len(rn_coords), len(rb_coords)))
891  T = alg.get_transformation_aligning_first_to_second(rb_coords,
892  rn_coords)
893  t = rb.get_reference_frame().get_transformation_to()
894  new_t = alg.compose(T, t)
895  rb.set_reference_frame(alg.ReferenceFrame3D(new_t))
896  scores = []
897  for name in restraints_names:
898  scores.append( self.restraints[name].evaluate(False))
899  total_score = sum(scores)
900  representation.set_reference_frames(self.components_rbs, saved)
901  return scores, total_score
902 
903  def write_pdb_for_assignment(self, assignment, fn_pdb):
904  """
905  Write the solution in the assignment
906  @param assignment Assignment class with the states for the subset
907  @param fn_pdb File to write the model
908  """
909  if not self.assignments_sampling_done:
910  raise ValueError("DominoModel: sampling not done")
911  log.info("Writting PDB %s for assignment %s",fn_pdb, assignment)
912  self.load_state(assignment)
913  atom.write_pdb(self.assembly, fn_pdb)
914 
915  def write_pdb_for_configuration(self, n, fn_pdb):
916  """
917  Write a file with a configuration for the model (configuration
918  here means a configuration in DOMINO)
919  @param n Index of the configuration desired.
920  @param fn_pdb
921  """
922  if not self.configuration_sampling_done:
923  raise ValueError("DominoModel: sampling not done")
924  log.debug("Writting PDB %s for configuration %s", fn_pdb, n)
925  configuration_set.load_configuration(n)
926  atom.write_pdb(self.assembly, fn_pdb)
927 
928  def write_pdb_for_reference_frames(self,RFs, fn_pdb):
929  """
930  Write a PDB file with a solution given by a set of reference frames
931  @param RFs Reference frames for the elements of the complex
932  @param fn_pdb File to write the solution
933  """
934  log.debug("Writting PDB %s for reference frames %s",fn_pdb, RFs)
935  representation.set_reference_frames(self.components_rbs,RFs)
936  atom.write_pdb(self.assembly, fn_pdb)
937 
938  def write_pdbs_for_reference_frames(self,RFs, fn_base):
939  """
940  Write a separate PDB for each of the elements
941  @param RFs Reference frames for the elements of the complex
942  @param fn_base base string to buid the names of the PDBs files
943  """
944  log.debug("Writting PDBs with basename %s",fn_base)
945  representation.set_reference_frames(self.components_rbs,RFs)
946  for i,ch in enumerate(self.assembly.get_children()):
947  atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i )
948 
949  def write_pdb_for_component(self, component_index, ref, fn_pdb):
950  """
951  Write one component of the assembly
952  @param component_index Position of the component in the assembly
953  @param ref Reference frame of the component
954  @param fn_pdb Output PDB
955  """
956  ("Writting PDB %s for component %s",fn_pdb, component_index)
957  self.components_rbs[component_index].set_reference_frame(ref)
958  hierarchy_component = self.assembly.get_child(component_index)
959  atom.write_pdb(hierarchy_component, fn_pdb)
960 
961 
962  def write_monte_carlo_solution(self, fn_database):
963  """
964  Write the solution of a MonteCarlo run
965  @param fn_database Database file with the solution.
966  The database will contain only one record
967  """
968  total_score = 0
969  rnames = []
970  scores = []
971  for i in range(self.model.get_number_of_restraints()):
972  r = self.model.get_restraint(i)
973  score = r.evaluate(False)
974  rnames.append(r.get_name())
975  scores.append(score)
976  total_score += score
978  db.create(fn_database, overwrite=True)
979  db.connect(fn_database)
980  db.add_results_table(rnames, self.measure_models)
981  RFs = [ rb.get_reference_frame() for rb in self.components_rbs]
982  measures = None
983  if(self.measure_models):
984  measures = measure_model(self.assembly, self.native_assembly,
985  self.components_rbs, self.native_rbs)
986  solution_id = 0
987  db.add_record(solution_id, "", RFs, total_score, scores, measures)
988  db.save_records()
989  db.close()
990 
991 def print_restraints_values(model):
992  print "Restraints: Name, weight, value, maximum_value"
993  total_score = 0
994  for i in range(model.get_number_of_restraints()):
995  r = model.get_restraint(i)
996  score = r.evaluate(False)
997 # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(),
998 # score, r.get_maximum_score())
999  print "%20s %18f %18f" % (r.get_name(), r.get_weight(), score)
1000  total_score += score
1001  print "total_score:",total_score
1002 
1003 
1004 def anchor_assembly(components_rbs, anchored):
1005  """
1006  "Anchor" a set of rigid bodies, by setting the position of one of them
1007  at the origin (0,0,0).
1008  @param components_rbs Rigid bodies of the components
1009  @param anchored - List of True/False values indicating if the components
1010  of the assembly are anchored. The function sets the FIRST anchored
1011  component in the (0,0,0) coordinate and moves the other
1012  components with the same translation. If all the values are False, the
1013  function does not do anything
1014  """
1015  if True in anchored:
1016  anchored_rb = None
1017  for a, rb in zip(anchored, components_rbs) :
1018  if a:
1019  anchored_rb = rb
1020  break
1021  center = anchored_rb.get_coordinates()
1022  log.info("Anchoring assembly at (0,0,0)")
1023  for rb in components_rbs:
1024  T = rb.get_reference_frame().get_transformation_to()
1025  t = T.get_translation()
1026  R = T.get_rotation()
1027  log.debug("%s: Rerefence frame BEFORE %s",
1028  rb.get_name(),rb.get_reference_frame())
1029  T2 = alg.Transformation3D(R, t - center)
1030  rb.set_reference_frame(alg.ReferenceFrame3D(T2))
1031  log.debug("%s Rerefence frame AFTER %s",
1032  rb.get_name(),rb.get_reference_frame())
1033 
1034 def measure_model(assembly, native_assembly, rbs, native_rbs):
1035  """
1036  Get the drms, cdrms and crmsd for a model
1037  @param assembly Hierachy for an assembly
1038  @param native_assembly Hierarchy of the native structure
1039  @param rbs Rigid bodies for the components of the assembly
1040  @param native_rbs Rigid bodies for the components of the native
1041  structure
1042  @return cdrms, cdrms, crmsd
1043  - drms - typical drms measure
1044  - cdrms - drms for the centroids of the rigid bodies of the components
1045  of the assembly
1046  - crmsd - rmsd for the centroids
1047  """
1048  log.debug("Measure model")
1049  drms = comparisons.get_drms_for_backbone(assembly, native_assembly)
1050  RFs = [rb.get_reference_frame() for rb in rbs]
1051  vs = [r.get_transformation_to().get_translation() for r in RFs]
1052  nRFs = [rb.get_reference_frame() for rb in native_rbs]
1053  nvs = [r.get_transformation_to().get_translation() for r in nRFs]
1054  cdrms = atom.get_drms(vs, nvs)
1055  crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs)
1056  log.debug("drms %s cdrms %s crmsd %s",drms,cdrms,crmsd)
1057  return [drms, cdrms, crmsd]
1058 
1059 
1060 
1061 def get_coordinates(rigid_bodies):
1062  """
1063  Return a list of the coordinates of all the members of the rigid bodies
1064  """
1065  if len(rigid_bodies) == 0:
1066  raise ValueError("get_coordinates: There are not rigid bodies to get coordinates from")
1067  coords = []
1068  for rb in rigid_bodies:
1069  # remove sub-rigid bodies
1070  rb_members = filter(
1071  lambda m: not core.RigidBody.get_is_setup(
1072  m.get_particle()), rb.get_members())
1073  coords.extend([m.get_coordinates() for m in rb_members])
1074  return coords
A harmonic upper bound on the distance between two spheres.
Lower bound harmonic function (non-zero when feature < mean)
Compute the complementarity between two molecules.
def get_assignments_with_heap
Domino sampling that recovers the assignments for the root of the merge tree, but conserving only the...
Definition: DominoModel.py:348
See IMP.em2d for more information.
Definition: align2D.h:18
def set_not_optimized
Set a part of the model as not optimized (it does not move during the model optimization) ...
Definition: DominoModel.py:709
See IMP.container for more information.
Upper bound harmonic function (non-zero when feature > mean)
def add_restraint
Adds a restraint to the model.
Definition: DominoModel.py:54
def write_pdb_for_assignment
Write the solution in the assignment.
Definition: DominoModel.py:903
def add_branch_and_bound_assignments_table
BranchAndBoundAssignmentsTable enumerate states based on provided filtered using the provided the sub...
Definition: DominoModel.py:492
def set_assembly
Sets an assembly from a single PDB file.
Definition: DominoModel.py:477
See IMP.base for more information.
Definition: base/Array.h:20
def set_em2d_restraint
Set a Restraint computing the em2d score.
Definition: DominoModel.py:678
Sample best solutions using Domino.
Definition: DominoSampler.h:32
def create_coarse_assembly
Simplify the assembly with a coarse representation.
Definition: DominoModel.py:202
def add_exclusion_filter_table
ExclusionSubsetFilterTable ensures that two particles are not given the same state at the same time...
Definition: DominoModel.py:506
Filter a configuration of the subset using the kernel::Model thresholds.
Represent a subset of the particles being optimized.
Definition: Subset.h:33
def set_pair_score_restraint
Set a pair_score restraint between the coarse representation of two components.
Definition: DominoModel.py:773
A score on the distance between the surfaces of two spheres.
def write_pdbs_for_reference_frames
Write a separate PDB for each of the elements.
Definition: DominoModel.py:938
def create_merge_tree
Creates a merge tree from the restraints that are set already.
Definition: DominoModel.py:589
def write_solutions_database
Write the results of the DOMINO sampling to a SQLite database.
Definition: DominoModel.py:822
def do_sampling
Do Domino sampling.
Definition: DominoModel.py:219
def get_assignment_scores
Returns a list with the values for the restraints on a subset of the components of the assembly...
Definition: DominoModel.py:324
A class to store an fixed array of same-typed values.
Definition: base/Array.h:33
Do not allow two particles to be in the same state.
def read_merge_tree
Read and create a merge tree from a file.
Definition: DominoModel.py:530
def write_monte_carlo_solution
Write the solution of a MonteCarlo run.
Definition: DominoModel.py:962
def get_restraint_value_for_assignment
Recover the value of a restraint.
Definition: DominoModel.py:389
A lookup based particle refiner.
Definition: TableRefiner.h:22
def set_complementarity_restraint
Set a restraint for geometric complementarity between 2 components.
Definition: DominoModel.py:175
def add_restraint_score_filter_table
Add a RestraintScoreSubsetFilterTable to DOMINO that allows to reject assignments that have score wor...
Definition: DominoModel.py:447
def set_xlink_restraint
Set a restraint on the maximum distance between 2 residues.
Definition: DominoModel.py:131
Utility functions to handle restraints.
Definition: restraints.py:1
def write_merge_tree
Writes the current merge tree to file.
Definition: DominoModel.py:606
Management of a model using DOMINO.
Definition: DominoModel.py:35
See IMP.multifit for more information.
def get_restraints_for_native
Get values of the restraints for the native structure.
Definition: DominoModel.py:868
Utility functions to store and retrieve solution information.
Definition: solutions_io.py:1
Class to handle individual model particles.
Class for managing the results of the experiments.
def set_native_assembly_for_benchmark
Sets the native model for benchmark, by reading the native structure and set the rigid bodies...
Definition: DominoModel.py:407
def setup_domino_sampler
Creates a DOMINO sampler and adds the required filter tables.
Definition: DominoModel.py:515
def get_assignment_info
Info related to an assignment.
Definition: DominoModel.py:312
def get_assignment_text
Get the formatted text for an assignment.
Definition: DominoModel.py:271
def write_pdb_for_component
Write one component of the assembly.
Definition: DominoModel.py:949
def write_pdb_for_reference_frames
Write a PDB file with a solution given by a set of reference frames.
Definition: DominoModel.py:928
def load_state
Load the positions of the components given by the assignment.
Definition: DominoModel.py:339
See IMP.core for more information.
See IMP.algebra for more information.
def write_pdb_for_configuration
Write a file with a configuration for the model (configuration here means a configuration in DOMINO) ...
Definition: DominoModel.py:915
def set_connectivity_restraint
Set a connectivity restraint between the elements of the assembly.
Definition: DominoModel.py:650
def align_rigid_bodies_states
Aligns the set of structures considered for DOMINO sampling.
Definition: DominoModel.py:76
Applies a PairScore to a Pair.
Definition: PairRestraint.h:30
def set_rb_states
Add a set of reference frames as possible states for a rigid body.
Definition: DominoModel.py:430
See IMP.display for more information.
Definition: BildWriter.h:20
See IMP.atom for more information.
def set_close_pairs_excluded_volume_restraint
Creates an excluded volume restraint between all the components of the coarse assembly.See help for.
Definition: DominoModel.py:725
def get_restraints_names_used
Get the names of the restraints used for a subset.
Definition: DominoModel.py:861
def set_assembly_components
Sets the components of an assembly, each one given as a separate PDB file.
Definition: DominoModel.py:462
See IMP.domino for more information.
Definition: analysis.h:15
Class for storing model, its restraints, constraints, and particles.
def write_subset
Writes lines describing a subset (vertex of a merge tree)
Definition: DominoModel.py:624
def get_assignment_and_RFs
Return a line with texts for an assignment and the the reference frames of the RigidBodies in the sub...
Definition: DominoModel.py:289