IMP  2.0.0
The Integrative Modeling Platform
DominoModel.py
1 import sys
2 import time
3 import csv
4 import logging
5 log = logging.getLogger("DominoModel")
6 
7 import IMP
8 import IMP.domino as domino
9 import IMP.core as core
10 import IMP.container as container
11 import IMP.display as display
12 import IMP.atom as atom
13 import IMP.algebra as alg
14 import IMP.em2d as em2d
15 import IMP.base
16 import IMP.multifit as multifit
17 
18 import IMP.em2d.imp_general.comparisons as comparisons
19 import IMP.em2d.imp_general.alignments as alignments
20 import IMP.em2d.imp_general.io as io
21 import IMP.em2d.imp_general.representation as representation
22 
23 import IMP.em2d.restraints as restraints
24 import IMP.em2d.buildxlinks as buildxlinks
25 import IMP.em2d.solutions_io as solutions_io
26 import IMP.em2d.csv_related as csv_related
27 
28 
29 field_delim = "," # separate fields in the output text file
30 unit_delim = "/" # separate units within a field (eg, reference frames).
31  # It is used in output text files and in databse files
32 
33 
34 
35 class DominoModel:
36  """
37  Management of a model using DOMINO
38  """
39  def __init__(self, name="my model"):
40  self.model = IMP.Model()
41  self.model.set_name(name)
42  self.configuration_sampling_done = False
43  self.assignments_sampling_done = False
44  self.rb_states_table = domino.ParticleStatesTable()
45  self.merge_tree = None
46  # measure_models is True only when doing benchmark
47  self.measure_models = False
48  # The first time that create_coarse is found and its value is True
49  # a coarse version of the assembly is built
50  self.create_coarse = True
51  self.restraints = dict()
52  self.xlinks_dict = buildxlinks.XlinksDict()
53 
54  def add_restraint(self, r, name, weight, max_score=False):
55  """
56  Adds a restraint to the model
57  @param r An IMP.Restraint object
58  @param name Name for the restraint
59  @param weight Weight for the restraint
60  @param max_score Maximum score allowed for the restraint. If
61  max_score is False, there is no limit for the value of the restraint
62  """
63  log.info("Adding restraint %s weight %s max_score %s",
64  name, weight, max_score)
65  if "-" in name:
66  raise ValueError("SQL database does not accept restraint names "\
67  "containing -")
68  r.set_name(name)
69  if max_score != None and max_score != False:
70  r.set_maximum_score(max_score)
71  r.set_weight(weight)
72  self.restraints[name] = r
73  self.model.add_restraint(r)
74 
75 
76  def align_rigid_bodies_states(self):
77  """
78  Aligns the set of structures considered for DOMINO sampling.
79  The function:
80  1) reads the possible reference frames that the
81  rb_states_table stores for each rigid body. This table
82  must be filled before using this function. Usually it is
83  filled with the results from a previous Monte Carlo sampling.
84  If the solutions from Monte Carlo seem to have the same structure
85  but they are not aligned to each other, this function can
86  help setting better starting positions to use with DOMINO.
87  2) Gets the first state for each of the rigid bodies and sets
88  a reference structure using such states.
89  3) Aligns all the rest of the structures respect to the reference
90  4) Replaces the values of the reference frames stored in the
91  rb_states_table with the new values obtained from the alignments.
92  It does it for all states of a rigid body.
93  @note: If this function is applied, the parameters "anchor" and "fixed"
94  are ignored, as they are superseded by the use of the aligments
95  calculated here.
96  """
97  log.debug("Align the configurations read from the database before " \
98  "running DOMINO")
99  rb_states = []
100  n_states = []
101  for rb in self.components_rbs:
102  ps = self.rb_states_table.get_particle_states(rb)
103  n = ps.get_number_of_particle_states()
104  if n == 0:
105  raise ValueError("There are no particle states for %s" % rb.get_name())
106  n_states.append(n)
107  rb_states.append(ps)
108  # coordinates of the first configuration (there is at least one for all the rbs)
109  for rb, states in zip(self.components_rbs, rb_states):
110  states.load_particle_state(0, rb)
111  reference_coords = get_coordinates(self.components_rbs)
112  aligned_transformations = [[] for rb in self.components_rbs]
113  for i in range(1, max(n_states)):
114  log.debug("Aligning rigid bodies configuration %s" % i)
115  # load the configuration
116  for j in range(len(self.components_rbs)):
117  # if there are no more particle states for this rigid body, use the last
118  state_index = min(i,n_states[j] - 1)
119  rb_states[j].load_particle_state(state_index, self.components_rbs[j])
120  coords = get_coordinates(self.components_rbs)
121  T = alg.get_transformation_aligning_first_to_second(coords, reference_coords)
122  for j, rb in enumerate(self.components_rbs):
123  t = rb.get_reference_frame().get_transformation_to()
124  new_t = alg.compose(T, t)
125  aligned_transformations[j].append(new_t)
126  # set the new possible transformations
127  for i, rb in enumerate(self.components_rbs):
128  self.set_rb_states(rb, aligned_transformations[i])
129 
130 
131  def set_xlink_restraint(self, id1, chain1, residue1, id2, chain2, residue2,
132  distance, weight, stddev, max_score=False):
133  """
134  Set a restraint on the maximum distance between 2 residues
135  @param id1 Name of the first component
136  @param residue1 Residue number for the aminoacid in the first
137  component.The number is the number in the PDB file, not the
138  number relative to the beginning of the chain
139  @param id2 Name of the second component
140  @param residue2 Residue number for the aminoacid in the second
141  component.
142  @param distance Maximum distance tolerated
143  @param weight Weight of the restraint
144  @param stddev Standard deviation used to approximate the
145  HarmonicUpperBound function to a Gaussian
146  @param max_score See help for add_restraint(). If none is given,
147  the maximum score is set to allow a maximum distance of 10 Angstrom
148  greater than the parameter "distance".
149  """
150  xlink = buildxlinks.Xlink(id1, chain1, residue1, id2, chain2, residue2, distance)
151  log.info("Setting cross-linking restraint ")
152  log.info("%s", xlink.show())
153  self.xlinks_dict.add(xlink)
154  # setup restraint
155  A = representation.get_component(self.assembly, xlink.first_id)
156  s1=IMP.atom.Selection(A, chain=xlink.first_chain,
157  residue_index=xlink.first_residue)
158  p1 = s1.get_selected_particles()[0]
159  B = representation.get_component(self.assembly, xlink.second_id)
160  s2=IMP.atom.Selection(B, chain=xlink.second_chain,
161  residue_index=xlink.second_residue)
162  p2 = s2.get_selected_particles()[0]
163  k = core.Harmonic.get_k_from_standard_deviation(stddev)
164  score = core.HarmonicUpperBound(xlink.distance, k)
165  pair_score = IMP.core.DistancePairScore(score)
166  r = IMP.core.PairRestraint(pair_score, IMP.ParticlePair(p1, p2))
167  if not max_score:
168  error_distance_allowed = 100
169  max_score = weight * score.evaluate(distance + error_distance_allowed)
170  log.info("%s, max_score %s", xlink.show(), max_score)
171  self.add_restraint(r, xlink.get_name(), weight, max_score)
173  def set_complementarity_restraint(self, name1, name2, rname,
174  max_sep_distance, max_penetration,
175  weight,max_score=1e10):
176  """
177  Set a restraint for geometric complementarity between 2 components
178  @param name1 name of
179  @param name2 - The restraint is applied to this components
180  - rname - The name given to the restraint
181  - max_sep_distance - maximum distance between molecules
182  tolerated by the restraint
183  - max_penetration - Maximum penetration allowd (angstrom)
184  """
185  log.info("Setting geometric complementarity restraint %s: %s - %s",
186  rname, name1, name2)
187  A = representation.get_component(self.assembly, name1)
188  B = representation.get_component(self.assembly, name2)
189  restraint = multifit.ComplementarityRestraint(atom.get_leaves(A),
190  atom.get_leaves(B), rname)
191  restraint.set_maximum_separation(max_sep_distance)
192  restraint.set_maximum_penetration(max_penetration)
193  log.debug("Maximum separation: %s Maximum penetration score: %s",
194  max_sep_distance, max_penetration)
195  self.add_restraint(restraint, rname, weight, max_score)
196 
198  def create_coarse_assembly(self, n_residues, write=False):
199  """
200  Simplify the assembly with a coarse representation
201  @param n_residues Number of residues used for a coarse particle
202  @param write If True, write the coarse assembly to a
203  format that Chimera can display
204  """
205  if self.create_coarse:
206  log.info("Creating simplified assembly")
207  self.coarse_assembly = \
208  representation.create_simplified_assembly(self.assembly,
209  self.components_rbs, n_residues)
210  if write:
211  io.write_hierarchy_to_chimera(self.assembly, "coarse_assembly.py")
212  self.create_coarse = False
214 
215  def do_sampling(self, mode="assignments_heap_container", params=None):
216  """
217  Do Domino sampling
218  @param mode The mode used to recover solutions from domino.
219  It can have
220  the value "configuration", "assignments", or
221  "assignments_heap_container". The mode
222  "configuration" recovers all possible configurations
223  (takes a while to generate them).
224  The mode "assignments" only provides the assignments
225  (state numbers) of the solutions. It is faster but requires
226  generating the solutions afterwards
227  The mode "assignments_heap_container" selects the best solutions
228  after ecah merging in DOMINO, discarding the rest.
229  In practice I used the mode "assignments_heap_container"
230  """
231  t0 = time.time()
232  if mode == "configuration":
233  self.configuration_set = self.sampler.get_sample()
234  tf = time.time()
235  log.info("found %s configurations. Time %s",
236  self.configuration_set.get_number_of_configurations(),
237  tf-t0)
238  self.configuration_sampling_done = True
239  elif mode == "assignments":
240  subset = self.rb_states_table.get_subset()
241  log.info("Do sampling with assignments. Subset has %s elements: %s",
242  len(subset), subset)
243  self.solution_assignments = \
244  self.sampler.get_sample_assignments(subset)
245  tf = time.time()
246  log.info("found %s assignments. Time %s",
247  len(self.solution_assignments), tf-t0)
248  self.assignments_sampling_done = True
249  elif mode == "assignments_heap_container":
250  subset = self.rb_states_table.get_subset()
251  log.info("DOMINO sampling an assignments_heap_container. "\
252  "Subset has %s elements: %s", len(subset), subset)
253  # last vertex is the root of the merge tree
254  root = self.merge_tree.get_vertices()[-1]
255  container = self.get_assignments_with_heap(root,
256  params.heap_solutions)
257  self.solution_assignments = container.get_assignments()
258  tf = time.time()
259  log.info("found %s assignments. Time %s",
260  len(self.solution_assignments), tf-t0)
261  self.assignments_sampling_done = True
262  else:
263  raise ValueError("Requested wrong mode for the DOMINO sampling")
264 
265 
266  def get_assignment_text(self, assignment, subset):
267  """
268  Get the formatted text for an assignment
269  @param subset Subset of components of the assembly
270  @param assignment Assignment class with the states for the subset
271  @note: The order in assignment and subset is not always the order
272  of the rigid bodies in self.components_rbs. The function reorders
273  the terms in the assignment so there is correspondence.
274  """
275  ordered_assignment = []
276  for rb in self.components_rbs:
277  for i, particle in enumerate(subset):
278  if rb.get_particle() == particle:
279  ordered_assignment.append( assignment[i])
280  text = "|".join([str(i) for i in ordered_assignment])
281  return text
282 
283 
284  def get_assignment_and_RFs(self, assignment, subset):
285  """
286  Return a line with texts for an assignment and the
287  the reference frames of the RigidBodies in the subset.
288  @param subset Subset of components of the assembly
289  @param assignment Assignment class with the states for the subset
290  @note: see the get_assignment_assignment_text() note.
291  """
292  ordered_assignment = []
293  RFs = []
294  for rb in self.components_rbs:
295  for i, particle in enumerate(subset):
296  if rb.get_particle() == particle:
297  j = assignment[i]
298  ordered_assignment.append(j)
299  pstates = self.rb_states_table.get_particle_states(rb)
300  pstates.load_particle_state(j, rb.get_particle())
301  RFs.append(rb.get_reference_frame())
302  RFs_text = unit_delim.join(
303  [io.ReferenceFrameToText(ref).get_text() for ref in RFs])
304  assignment_numbers = "|".join([str(i) for i in ordered_assignment])
305  return [assignment_numbers, RFs_text]
306 
307  def get_assignment_info(self, assignment, subset):
308  """
309  Info related to an assignment. Returns a list with text for the
310  assignment and all the scores for the restraints
311  @param subset Subset of components of the assembly
312  @param assignment Assignment class with the states for the subset
313  """
314  text = self.get_assignment_and_RFs(assignment, subset)
315  scores, total_score = self.get_assignment_scores(assignment, subset)
316  info = text + [total_score] + scores
317  return info
318 
319  def get_assignment_scores(self, assignment, subset):
320  """
321  Returns a list with the values for the restraints on a subset of
322  the components of the assembly
323  @param subset Subset of components of the assembly
324  @param assignment Assignment class with the states for the subset
325  """
326  restraints_to_evaluate = \
327  self.restraint_cache.get_restraints(subset, [])
328  scores = [self.restraint_cache.get_score(r, subset, assignment)
329  for r in restraints_to_evaluate]
330  total_score = sum(scores)
331  return scores, total_score
333 
334  def load_state(self, assignment):
335  """
336  Load the positions of the components given by the assignment
337  @param assignment Assignment class
338  """
339  subset = self.rb_states_table.get_subset()
340  domino.load_particle_states(subset, assignment, self.rb_states_table)
341 
342 
343  def get_assignments_with_heap(self, vertex, k=0):
344  """
345  Domino sampling that recovers the assignments for the root of the
346  merge tree, but
347  conserving only the best k scores for each vertex of the tree.
348  @param vertex Vertex with the root of the current merge tree. This
349  function is recursive.
350  """
351  if(self.sampler.get_number_of_subset_filter_tables() == 0 ):
352  raise ValueError("No subset filter tables")
353  if(self.merge_tree == None):
354  raise ValueError("No merge tree")
355  # In the merge tree, the names of the vertices are the subsets.
356  # The type of the vertex name is a domino.Subset
357  subset = self.merge_tree.get_vertex_name(vertex)
358  log.info("computing assignments for vertex %s",subset)
359  t0 = time.time()
360  assignments_container = domino.HeapAssignmentContainer(subset, k,
361  self.restraint_cache, "my_heap_assignments_container" )
362  neighbors = self.merge_tree.get_out_neighbors(vertex)
363  if len(neighbors) == 0: # A leaf
364  # Fill the container with the assignments for the leaf
365  self.sampler.load_vertex_assignments(vertex, assignments_container)
366  else:
367  if neighbors[0] > neighbors[1]:
368  # get_vertex_assignments() methods in domino
369  # expects the children in sorted order
370  neighbors =[neighbors[1], neighbors[0]]
371  # recurse on the two children
372  container_child0 = self.get_assignments_with_heap(neighbors[0], k)
373  container_child1 = self.get_assignments_with_heap(neighbors[1], k)
374  self.sampler.load_vertex_assignments(vertex,
375  container_child0,
376  container_child1,
377  assignments_container)
378  tf = time.time() - t0
379  log.info("Merge tree vertex: %s assignments: %s Time %s sec.", subset,
380  assignments_container.get_number_of_assignments(), tf)
381  return assignments_container
382 
383  def get_restraint_value_for_assignment(self, assignment, name):
384  """
385  Recover the value of a restraint
386  @param name of the restraint
387  @param assignment Assignment class containing the assignment for
388  the solution desired
389  """
390  if not self.assignments_sampling_done:
391  raise ValueError("DominoModel: sampling not done")
392  log.debug("Computing restraint value for assignment %s",assignment)
393  self.load_state(assignment)
394  for rb in self.components_rbs:
395  log.debug("rb - %s",
396  rb.get_reference_frame().get_transformation_to())
397  val = self.restraints[name].evaulate(False)
398  log.debug("restraint %s = %s",restraint.get_name(), val)
399  return val
400 
401  def set_native_assembly_for_benchmark(self, params):
402  """
403  Sets the native model for benchmark, by reading the native
404  structure and set the rigid bodies.
405  @param fn_pdb_native PDB with the native structure
406  @param anchored List of True/False values indicating
407  if the components of the assembly are anchored. The function
408  sets the FIRST anchored component in the (0,0,0) coordinate
409  and moves the other components with the same translation
410  """
411  self.measure_models = True
412  self.native_model = IMP.Model()
413  if hasattr(params.benchmark, "fn_pdb_native"):
414  self.native_assembly = \
415  representation.create_assembly_from_pdb(self.native_model,
416  params.benchmark.fn_pdb_native, params.names)
417  elif hasattr(params.benchmark, "fn_pdbs_native"):
418  self.native_assembly = \
419  representation.create_assembly(self.native_model,
420  params.benchmark.fn_pdbs_native, params.names)
421  else:
422  raise ValueError("set_native_assembly_for_benchmark: Requested " \
423  "to set a native assembly for benchmark but did not provide " \
424  "its pdb, or the pdbs of the components" )
425  self.native_rbs = representation.create_rigid_bodies(self.native_assembly)
426  anchor_assembly(self.native_rbs, params.anchor)
427 
428 
429  def set_rb_states(self, rb, transformations):
430  """
431  Add a set of reference frames as possible states for a rigid body
432  @param rb The rigid body
433  @param transformations Transformations are used to build the
434  reference frames for the rigid body.
435  """
436  log.info("Set rigid body states for %s",rb.get_name())
437  RFs = [ alg.ReferenceFrame3D(T) for T in transformations ]
438  for st in RFs:
439  log.debug("%s",st)
440  rb_states = domino.RigidBodyStates(RFs)
441  self.rb_states_table.set_particle_states(rb, rb_states)
442  st = self.rb_states_table.get_particle_states(rb)
443  log.info("RigidBodyStates created %s",
444  st.get_number_of_particle_states())
445 
447  """
448  Add a RestraintScoreSubsetFilterTable to DOMINO that allows to
449  reject assignments that have score worse than the thresholds for
450  the restraints
451  """
452  log.info("Adding RestraintScoreSubsetFilterTable")
453  # Restraint Cache is a centralized container of the restraints and
454  # their scores
455  self.restraint_cache = domino.RestraintCache(self.rb_states_table)
456  self.restraint_cache.add_restraints(self.model.get_restraints())
457  rssft = domino.RestraintScoreSubsetFilterTable(self.restraint_cache)
458  rssft.set_name('myRestraintScoreSubsetFilterTable')
459  self.sampler.add_subset_filter_table(rssft)
460 
461  def set_assembly_components(self, fn_pdbs, names):
462  """
463  Sets the components of an assembly, each one given as a separate
464  PDB file.
465  @param fn_pdbs List with the names of the pdb files
466  @param names List with the names of the components of the assembly.
467  """
468  if len(fn_pdbs) != len(names):
469  raise ValueError("Each PDB file needs a name")
470  self.names = names
471  self.assembly = representation.create_assembly(self.model, fn_pdbs,
472  names)
473  self.components_rbs = representation.create_rigid_bodies(self.assembly)
474  self.not_optimized_rbs = []
475 
476  def set_assembly(self, fn_pdb, names):
477  """
478  Sets an assembly from a single PDB file. The function assumes that
479  the components of the assembly are the chains of the PDB file.
480  @param fn_pdb PDB with the file for the asembly
481  @param names Names for each of the components (chains)
482  of the asembly
483  """
484  self.assembly = representation.create_assembly_from_pdb(self.model,
485  fn_pdb,
486  names)
487  self.components_rbs = representation.create_rigid_bodies(self.assembly)
488  self.not_optimized_rbs = []
489 
490 
492  """
493  BranchAndBoundAssignmentsTable enumerate states based on provided
494  filtered using the provided the subset filter tables
495  """
496  log.info("Setting BranchAndBoundAssignmentsTable")
497  fts = []
498  for i in range(self.sampler.get_number_of_subset_filter_tables()):
499  ft = self.sampler.get_subset_filter_table(i)
500  fts.append(ft)
501  at = domino.BranchAndBoundAssignmentsTable(self.rb_states_table, fts)
502  self.sampler.set_assignments_table(at)
503  self.assignments_table = at
504 
505  def add_exclusion_filter_table(self):
506  """
507  ExclusionSubsetFilterTable ensures that two particles are not given
508  the same state at the same time
509  """
510  log.info("Setting ExclusionSubsetFilterTable")
511  ft = domino.ExclusionSubsetFilterTable(self.rb_states_table)
512  self.sampler.add_subset_filter_table(ft)
513 
514  def setup_domino_sampler(self):
515  """
516  Creates a DOMINO sampler and adds the required filter tables
517  """
518  if self.merge_tree == None:
519  raise ValueError("Merge tree for DOMINO not set. It is required " \
520  "to setup the sampler")
521  log.info("Domino sampler")
522  self.sampler = domino.DominoSampler(self.model, self.rb_states_table)
523  self.sampler.set_log_level(IMP.base.TERSE)
524  self.sampler.set_merge_tree(self.merge_tree)
528 
529  def read_merge_tree(self, fn):
530  """
531  Read and create a merge tree from a file.
532  The format of the file is the format written by write merge_tree()
533  It does not matter the order of the indices in the file, as
534  they are sorted by this function.
535  The only condition is that the index for the vertex that is the
536  root of the tree MUST be the largest. This is guaranteed when
537  creating a merge tree with create_merge_tree()
538  @param fn File containing the merge tree
539  """
540  log.info("Reading merge tree from %s", fn)
541  ps = self.rb_states_table.get_particles()
542  log.debug("particles")
543  for p in ps:
544  log.debug("%s", p.get_name())
545  rows = csv_related.read_csv(fn, delimiter = field_delim)
546  for i in range(len(rows)):
547  row = rows[i]
548  rows[i] = [int(row[0]), int(row[1]), int(row[2]), row[3]]
549  # Sort rows by vertice index
550  rows.sort()
551  subsets = []
552  # build subsets from the rows text
553  for row in rows:
554  names = row[3].split(unit_delim)
555  log.debug("row %s names %s", row, names)
556  # get particles in the subset
557  particles = []
558  for name in names:
559  l = filter(lambda p: p.get_name() == name, ps)
560  if (len(l) > 1):
561  ValueError("More than one particle with same name" % names)
562  particles.extend(l)
563  s = domino.Subset(particles)
564  subset_names = [p.get_name() for p in particles]
565  log.debug("Merge tree Subset %s. Particles %s ",s, subset_names)
566  subsets.append(s)
567  # The vertex with the largest index is the root.
568  # trick: get the merge tree object from a tree with only one node ...
569  jt = domino.SubsetGraph()
570  jt.add_vertex(subsets[0])
571  mt = domino.get_merge_tree(jt)
572  # ... and add the rest of the vertices
573  for i in range(1,len(subsets)):
574  mt.add_vertex(subsets[i]) # the name of the vertex is a subset
575  # set edges
576  for row in rows:
577  vid = row[0]
578  child_left = row[1]
579  child_right = row[2]
580  if child_left != -1:
581  mt.add_edge(vid, child_left)
582  if child_right != -1:
583  mt.add_edge(vid, child_right)
584  self.merge_tree = mt
585  log.info("%s",self.merge_tree.show_graphviz() )
586 
587 
588  def create_merge_tree(self):
589  """
590  Creates a merge tree from the restraints that are set already
591  """
592  rs = self.model.get_restraints()
593  ig = domino.get_interaction_graph(rs, self.rb_states_table)
594 # pruned_dep = IMP.get_pruned_dependency_graph(self.model)
595 # IMP.base.show_graphviz(pruned_dep)
596 # IMP.base.show_graphviz(ig)
597  jt = domino.get_junction_tree(ig)
598 # IMP.base.show_graphviz(jt)
599  self.merge_tree = domino.get_balanced_merge_tree(jt)
600 # IMP.base.show_graphviz(self.merge_tree)
601  log.info("Balanced merge tree created")
602  log.info("%s",self.merge_tree.show_graphviz() )
603 
604 
605  def write_merge_tree(self, fn):
606  """
607  Writes the current merge tree to file. The format is:
608  parent | child_left | child_right | labels
609  @param fn File for storing the merge tree
610  """
611  log.info("Writing merge to %s", fn)
612  if self.merge_tree == None:
613  raise ValueError("No merge tree")
614  f = open(fn, "w")
615  f.write(
616  "# Vertex index | Child0 | Child1 | label (names of the particles)\n")
617  w = csv.writer(f, delimiter = field_delim)
618  root = self.merge_tree.get_vertices()[-1]
619  self.write_subset(root, w)
620  f.close()
621 
622 
623  def write_subset(self, v, w):
624  """
625  Writes lines describing a subset (vertex of a merge tree)
626  @param w A csv.writer
627  @param v Vertex index
628  """
629  no_child_index = -1
630  subset = self.merge_tree.get_vertex_name(v)
631  names = [p.get_name() for p in subset]
632  reps = filter(lambda x: names.count(x) > 1, names) # repeated names
633  if(len(reps)):
634  raise ValueError("The names of the "\
635  "particles in the subset %s are not unique" % subset)
636  names = unit_delim.join(names)
637  neighbors = self.merge_tree.get_out_neighbors(v)
638  if len(neighbors) == 0: # A leaf
639  # Fill the container with the assignments for the leaf
640  w.writerow([v,no_child_index,no_child_index, names])
641  else:
642  if neighbors[0] > neighbors[1]:
643  neighbors =[neighbors[1], neighbors[0]]
644  w.writerow([v,neighbors[0],neighbors[1], names])
645  self.write_subset(neighbors[0], w)
646  self.write_subset(neighbors[1], w)
647 
648 
649  def set_connectivity_restraint(self, names, rname,
650  distance=10,
651  weight=1.0, max_score=None, n_pairs=1,
652  stddev=2):
653  """
654  Set a connectivity restraint between the elements of the assembly
655  @param names List with all the elements to be connected
656  @param distance Maximum distance tolerated by the restraint
657  @param rname Name for the restraint
658  @param weight Weight for the restraint
659  @param max_score Maximum score for the restraint
660  @param n_pairs Number of pairs of particles used in the
661  KClosePairScore of the connecitivity restraint
662  @param stddev Standard deviation used to approximate the
663  HarmonicUpperBound function to a Gaussian
664  """
665  components = []
666  for name in names:
667  c = representation.get_component(self.coarse_assembly,name)
668  components.append(c)
669  log.info("Connectivity restraint for %s",components)
670  spring_constant = core.Harmonic.get_k_from_standard_deviation(stddev)
671  if max_score == None:
672  max_score = weight * spring_constant * n_pairs * (stddev)**2
673  r = restraints.get_connectivity_restraint(components, distance, n_pairs,
674  spring_constant)
675  self.add_restraint(r, rname, weight, max_score)
676 
677  def set_em2d_restraint(self, name, fn_images_sel, pixel_size, resolution,
678  n_projections, weight, max_score=False,
679  mode="fast", n_optimized=1):
680  """
681  Set a Restraint computing the em2d score.
682  @param name Name for the restraint
683  @param fn_images_sel Selection file with the names of the images
684  used for the restraint
685  @param pixel_size - pixel size in the images
686  @param resolution - resolution used to downsample the projections
687  of the model when projecting
688  @param weight Weight of the restraint
689  @param max_score - Maximum value tolerated for the restraint
690  @param n_projections - Number of projections to generate
691  when projecting the model to do the coarse alignments
692  @param mode - Mode used for computing the restraints.
693  @param n_optimized - number of results from the coarse
694  alignment that are refined with the Simplex algorithm
695  to get a more accurate value for the restraint
696  """
697  log.info("Adding a em2D restraint: %s", fn_images_sel)
698  restraint_params = em2d.Em2DRestraintParameters(pixel_size,
699  resolution,
700  n_projections)
701  r = restraints.get_em2d_restraint( self.assembly,
702  fn_images_sel,
703  restraint_params,
704  mode, n_optimized)
705  self.add_restraint(r, name, weight, max_score)
706 
707 
708  def set_not_optimized(self, name):
709  """
710  Set a part of the model as not optimized (it does not move during
711  the model optimization)
712  @param Name of the component to optimized
713  """
714  if name not in self.names:
715  raise ValueError("DominoModel: There is not component " \
716  "in the assembly with this name")
717  rb_name = representation.get_rb_name(name)
718  self.not_optimized_rbs.append(rb_name)
719 
720 
721  def set_close_pairs_excluded_volume_restraint(self, distance=10,
722  weight=1.0, ratio=0.05, stddev=2,
723  max_score=False ):
724  """
725  Creates an excluded volume restraint between all the components
726  of the coarse assembly.See help for
727  @param distance Maximum distance tolerated between particles
728  @param weight Weight for the restraint
729  @param max_score Maximum value for the restraint. If the parameter
730  is None, an automatic value is computed (using the ratio).
731  @param ratio Fraction of the number of possible pairs of
732  particles that are tolerated to overlap. The maximum
733  score is modified according to this ratio.
734  I have found that ratios of 0.05-0.1 work well allowing for
735  some interpenetration
736  @param stddev Standard deviation used to approximate the
737  HarmonicUpperBound function to a Gaussian
738  """
739  k = core.Harmonic.get_k_from_standard_deviation(stddev)
740  for i, c1 in enumerate(self.coarse_assembly.get_children()):
741  for j, c2 in enumerate(self.coarse_assembly.get_children()):
742  if j > i:
743  name = "exc_vol_%s_%s" % (c1.get_name(), c2.get_name())
744 
745  ls1 = atom.get_leaves(c1)
746  ls2 = atom.get_leaves(c2)
747  possible_pairs = len(ls1) * len(ls2)
748  n_pairs = possible_pairs * ratio
749 
750  marker1 = IMP.Particle(self.model, "marker1 " + name)
751  marker2 = IMP.Particle(self.model, "marker2 " + name)
752  table_refiner = core.TableRefiner()
753  table_refiner.add_particle(marker1, ls1)
754  table_refiner.add_particle(marker2, ls2)
755  score = core.HarmonicLowerBound(distance, k)
756  pair_score = core.SphereDistancePairScore(score)
757  close_pair_score = core.ClosePairsPairScore(pair_score,
758  table_refiner,
759  distance)
760  r = core.PairRestraint(close_pair_score,
761  IMP.ParticlePair(marker1,marker2))
762 
763  if not max_score:
764  minimum_distance_allowed = 0
765  max_score = weight * n_pairs * \
766  score.evaluate(minimum_distance_allowed)
767  log.debug("Setting close_pairs_excluded_volume_restraint(), " \
768  "max_score %s", max_score)
769  self.add_restraint(r, name, weight, max_score)
770 
771  def set_pair_score_restraint(self, name1, name2,
772  restraint_name, distance=10,
773  weight=1.0, n_pairs = 1, stddev=2, max_score=None):
774  """
775  Set a pair_score restraint between the coarse representation
776  of two components
777  @param name1 Name of the first component
778  @param name2 Name of the second component
779  @param restraint_name Name for the restraint
780  @param distance Maximum distance tolerated between particles
781  @param weight. Weight of the restraint
782  @param max_score Maximum value tolerated for the restraint
783  @param stddev Standard deviation used to approximate the
784  HarmonicUpperBound function to a Gaussian
785  """
786  table_refiner = core.TableRefiner()
787 
788  # The particles in A are attached to a marker particle via a refiner.
789  # When the refiner gets a request for marker1, it returns the attached
790  # particles
791  A = representation.get_component(self.coarse_assembly, name1)
792  marker1 = IMP.Particle(self.model, "marker1 "+restraint_name)
793  table_refiner.add_particle(marker1, atom.get_leaves(A))
794  # same for B
795  B = representation.get_component(self.coarse_assembly, name2)
796  marker2 = IMP.Particle(self.model, "marker2 "+restraint_name)
797  table_refiner.add_particle(marker2, atom.get_leaves(B))
798 
799  k = core.Harmonic.get_k_from_standard_deviation(stddev)
801  # The score is set for the n_pairs closest pairs of particles
802  pair_score = core.KClosePairsPairScore(score, table_refiner, n_pairs)
803  # When KClosePairsPairScore is called, the refiner will provide the
804  # particles for A and B
805  if not max_score:
806  # Build a maximum score based on the function type that is used,
807  # an HarmonicUpperBound
808  temp_score = core.HarmonicUpperBound(distance, k)
809  error_distance_allowed = 10
810  max_score = weight * n_pairs * \
811  temp_score.evaluate(distance + error_distance_allowed)
812 
813  log.info("Setting pair score restraint for %s %s. k = %s, max_score " \
814  "= %s, stddev %s", name1, name2, k, max_score,stddev)
815  r = core.PairRestraint(pair_score, IMP.ParticlePair( marker1, marker2 ) )
816  self.add_restraint(r, restraint_name, weight, max_score)
817 
818 
819  def write_solutions_database(self, fn_database, max_number=None):
820  """
821  Write the results of the DOMINO sampling to a SQLite database.
822  @param max_number Maximum number of results to write
823  """
824  log.info("Creating the database of solutions")
825  if self.measure_models == True and not hasattr(self, "native_model"):
826  raise ValueError("The native model has not been set")
828  db.create(fn_database, overwrite=True)
829  db.connect(fn_database)
830  subset = self.rb_states_table.get_subset()
831  restraints_names = self.get_restraints_names_used(subset)
832  db.add_results_table(restraints_names, self.measure_models)
833  n = len(self.solution_assignments)
834  if max_number != None:
835  n = min(n, max_number)
836  for sol_id, assignment in enumerate(self.solution_assignments[0:n]):
837  txt = self.get_assignment_text(assignment, subset)
838  # load the positions of the components in the state
839  self.load_state(assignment)
840  RFs = [rb.get_reference_frame() for rb in self.components_rbs]
841  scores, total_score = self.get_assignment_scores(assignment, subset)
842  measures = None
843  if(self.measure_models):
844  measures = measure_model(self.assembly, self.native_assembly,
845  self.components_rbs, self.native_rbs)
846  db.add_record(sol_id, txt, RFs, total_score, scores, measures)
847  # Add native state if doing benchmarking
848  if self.measure_models:
849  assignment = "native"
850  RFs = [rb.get_reference_frame() for rb in self.native_rbs]
851  scores, total_score = self.get_restraints_for_native(
852  restraints_names)
853  db.add_native_record(assignment, RFs, total_score, scores)
854  db.save_records()
855  db.close()
856 
857  def get_restraints_names_used(self, subset):
858  """ Get the names of the restraints used for a subset
859  @param subset Subset of components of the assembly
860  """
861  return [r.get_name() for r
862  in self.restraint_cache.get_restraints(subset, [])]
863 
864  def get_restraints_for_native(self, restraints_names):
865  """
866  Get values of the restraints for the native structure
867  @param restraints_names Names of the restraints requested
868  @return a list with the values of all scores, and the total score
869  """
870  saved = [rb.get_reference_frame() for rb in self.components_rbs]
871  # Set the state of the model to native
872  for rb,rn in zip(self.components_rbs, self.native_rbs):
873  # remove sub-rigid bodies
874  rb_members = filter(
875  lambda m: not core.RigidBody.particle_is_instance(
876  m.get_particle()), rb.get_members())
877  rn_members = filter(
878  lambda m: not core.RigidBody.particle_is_instance(
879  m.get_particle()), rn.get_members())
880  rb_coords = [m.get_coordinates() for m in rb_members]
881  rn_coords = [m.get_coordinates() for m in rn_members]
882 
883  # align and put rb in the position of rn
884  if len(rn_coords) != len(rb_coords) :
885  raise ValueError("Mismatch in the number of members. "
886  "Reference %d Aligned %d " % (len(rn_coords), len(rb_coords)))
887  T = alg.get_transformation_aligning_first_to_second(rb_coords,
888  rn_coords)
889  t = rb.get_reference_frame().get_transformation_to()
890  new_t = alg.compose(T, t)
891  rb.set_reference_frame(alg.ReferenceFrame3D(new_t))
892  scores = []
893  for name in restraints_names:
894  scores.append( self.restraints[name].evaluate(False))
895  total_score = sum(scores)
896  representation.set_reference_frames(self.components_rbs, saved)
897  return scores, total_score
898 
899  def write_pdb_for_assignment(self, assignment, fn_pdb):
900  """
901  Write the solution in the assignment
902  @param assignment Assignment class with the states for the subset
903  @param fn_pdb File to write the model
904  """
905  if not self.assignments_sampling_done:
906  raise ValueError("DominoModel: sampling not done")
907  log.info("Writting PDB %s for assignment %s",fn_pdb, assignment)
908  self.load_state(assignment)
909  atom.write_pdb(self.assembly, fn_pdb)
910 
911  def write_pdb_for_configuration(self, n, fn_pdb):
912  """
913  Write a file with a configuration for the model (configuration
914  here means a configuration in DOMINO)
915  @param n Index of the configuration desired.
916  """
917  if not self.configuration_sampling_done:
918  raise ValueError("DominoModel: sampling not done")
919  log.debug("Writting PDB %s for configuration %s", fn_pdb, n)
920  configuration_set.load_configuration(n)
921  atom.write_pdb(self.assembly, fn_pdb)
922 
923  def write_pdb_for_reference_frames(self,RFs, fn_pdb):
924  """
925  Write a PDB file with a solution given by a set of reference frames
926  @param RFs Reference frames for the elements of the complex
927  @param fn_pdb File to write the solution
928  """
929  log.debug("Writting PDB %s for reference frames %s",fn_pdb, RFs)
930  representation.set_reference_frames(self.components_rbs,RFs)
931  atom.write_pdb(self.assembly, fn_pdb)
932 
933  def write_pdbs_for_reference_frames(self,RFs, fn_base):
934  """
935  Write a separate PDB for each of the elements
936  @param RFs Reference frames for the elements of the complex
937  @param fn_base base string to buid the names of the PDBs files
938  """
939  log.debug("Writting PDBs with basename %s",fn_base)
940  representation.set_reference_frames(self.components_rbs,RFs)
941  for i,ch in enumerate(self.assembly.get_children()):
942  atom.write_pdb(ch, fn_base + "component-%02d.pdb" % i )
943 
944  def write_pdb_for_component(self, component_index, ref, fn_pdb):
945  """
946  Write one component of the assembly
947  @param component_index Position of the component in the assembly
948  @param ref Reference frame of the component
949  @param fn_pdb Output PDB
950  """
951  ("Writting PDB %s for component %s",fn_pdb, component_index)
952  self.components_rbs[component_index].set_reference_frame(ref)
953  hierarchy_component = self.assembly.get_child(component_index)
954  atom.write_pdb(hierarchy_component, fn_pdb)
955 
956 
957  def write_monte_carlo_solution(self, fn_database):
958  """
959  Write the solution of a MonteCarlo run
960  @param fn_database Database file with the solution.
961  The database will contain only one record
962  """
963  total_score = 0
964  rnames = []
965  scores = []
966  for i in range(self.model.get_number_of_restraints()):
967  r = self.model.get_restraint(i)
968  score = r.evaluate(False)
969  rnames.append(r.get_name())
970  scores.append(score)
971  total_score += score
973  db.create(fn_database, overwrite=True)
974  db.connect(fn_database)
975  db.add_results_table(rnames, self.measure_models)
976  RFs = [ rb.get_reference_frame() for rb in self.components_rbs]
977  measures = None
978  if(self.measure_models):
979  measures = measure_model(self.assembly, self.native_assembly,
980  self.components_rbs, self.native_rbs)
981  solution_id = 0
982  db.add_record(solution_id, "", RFs, total_score, scores, measures)
983  db.save_records()
984  db.close()
985 
986 def print_restraints_values(model):
987  print "Restraints: Name, weight, value, maximum_value"
988  total_score = 0
989  for i in range(model.get_number_of_restraints()):
990  r = model.get_restraint(i)
991  score = r.evaluate(False)
992 # print "%20s %18f %18f %18f" % (r.get_name(), r.get_weight(),
993 # score, r.get_maximum_score())
994  print "%20s %18f %18f" % (r.get_name(), r.get_weight(), score)
995  total_score += score
996  print "total_score:",total_score
997 
998 
999 def anchor_assembly(components_rbs, anchored):
1000  """
1001  "Anchor" a set of rigid bodies, by setting the position of one of them
1002  at the origin (0,0,0).
1003  @param components_rbs Rigid bodies of the components
1004  @param anchored - List of True/False values indicating if the components
1005  of the assembly are anchored. The function sets the FIRST anchored
1006  component in the (0,0,0) coordinate and moves the other
1007  components with the same translation. If all the values are False, the
1008  function does not do anything
1009  """
1010  if True in anchored:
1011  anchored_rb = None
1012  for a, rb in zip(anchored, components_rbs) :
1013  if a:
1014  anchored_rb = rb
1015  break
1016  center = anchored_rb.get_coordinates()
1017  log.info("Anchoring assembly at (0,0,0)")
1018  for rb in components_rbs:
1019  T = rb.get_reference_frame().get_transformation_to()
1020  t = T.get_translation()
1021  R = T.get_rotation()
1022  log.debug("%s: Rerefence frame BEFORE %s",
1023  rb.get_name(),rb.get_reference_frame())
1024  T2 = alg.Transformation3D(R, t - center)
1025  rb.set_reference_frame(alg.ReferenceFrame3D(T2))
1026  log.debug("%s Rerefence frame AFTER %s",
1027  rb.get_name(),rb.get_reference_frame())
1028 
1029 def measure_model(assembly, native_assembly, rbs, native_rbs):
1030  """
1031  Get the drms, cdrms and crmsd for a model
1032  @param assembly Hierachy for an assembly
1033  @param native_assembly Hierarchy of the native structure
1034  @param rbs Rigid bodies for the components of the assembly
1035  @param native_rbs Rigid bodies for the components of the native
1036  structure
1037  @return cdrms, cdrms, crmsd
1038  - drms - typical drms measure
1039  - cdrms - drms for the centroids of the rigid bodies of the components
1040  of the assembly
1041  - crmsd - rmsd for the centroids
1042  """
1043  log.debug("Measure model")
1044  drms = comparisons.get_drms_for_backbone(assembly, native_assembly)
1045  RFs = [rb.get_reference_frame() for rb in rbs]
1046  vs = [r.get_transformation_to().get_translation() for r in RFs]
1047  nRFs = [rb.get_reference_frame() for rb in native_rbs]
1048  nvs = [r.get_transformation_to().get_translation() for r in nRFs]
1049  cdrms = atom.get_drms(vs, nvs)
1050  crmsd, RFs = alignments.align_centroids_using_pca(RFs, nRFs)
1051  log.debug("drms %s cdrms %s crmsd %s",drms,cdrms,crmsd)
1052  return [drms, cdrms, crmsd]
1053 
1054 
1055 
1056 def get_coordinates(rigid_bodies):
1057  """
1058  Return a list of the coordinates of all the members of the rigid bodies
1059  """
1060  if len(rigid_bodies) == 0:
1061  raise ValueError("get_coordinates: There are not rigid bodies to get coordinates from")
1062  coords = []
1063  for rb in rigid_bodies:
1064  # remove sub-rigid bodies
1065  rb_members = filter(
1066  lambda m: not core.RigidBody.particle_is_instance(
1067  m.get_particle()), rb.get_members())
1068  coords.extend([m.get_coordinates() for m in rb_members])
1069  return coords