IMP  2.0.0
The Integrative Modeling Platform
emagefit.py
1 #!/usr/bin/env python
2 
3 import ConfigParser
4 import sys
5 import os
6 import time
7 import logging
8 log = logging.getLogger("domino_model")
9 
10 import IMP
11 import IMP.atom as atom
12 import IMP.algebra as alg
13 import IMP.em2d as em2d
14 
15 
16 import IMP.em2d.Database as Database
17 import IMP.em2d.imp_general.io as io
18 import IMP.em2d.imp_general.alignments as alignments
19 import IMP.em2d.imp_general.representation as representation
20 import IMP.em2d.DominoModel as DominoModel
21 import IMP.em2d.MonteCarloRelativeMoves as MonteCarloRelativeMoves
22 import IMP.em2d.sampling as sampling
23 import IMP.em2d.solutions_io as solutions_io
24 import IMP.em2d.utility as utility
25 
26 
27 def setup_sampling_schema(model, params):
28  """
29  Set the discrete states for sampling for DOMINO.
30  @param model A DominoModel object
31  @param params See the help for the script
32  """
33  n_rbs = len(model.components_rbs)
34  sampling_schema = sampling.SamplingSchema(n_rbs, params.fixed, params.anchor)
35 
36  sampling_schema.read_from_database( params.sampling_positions.read,
37  ["reference_frames"],
38  params.sampling_positions.max_number,
39  params.sampling_positions.orderby)
40  for i, rb in enumerate(model.components_rbs):
41  model.set_rb_states(rb, sampling_schema.get_sampling_transformations(i))
42 
43 
44 def write_pdbs_for_solutions(params, fn_database, n=10, orderby="em2d",
45  split_components=False):
46  """
47  Write PDBs for the solutions in the database
48  @param params class with the parameters for the experiment
49  @param fn_database File containing the database of solutions
50  @param n Number of solutions to write
51  @param orderby Restraint used for sorting the solutions
52  @param split_components If true, each component of the assembly in a
53  solution is written as a separate PDB file
54  """
55  m = DominoModel.DominoModel()
56  m.set_assembly_components(params.fn_pdbs, params.names)
57  db = solutions_io.ResultsDB()
58  db.connect(fn_database)
59  data = db.get_solutions(["reference_frames"], n, orderby)
60  db.close()
61  for i, d in enumerate(data):
62  texts = d[0].split("/") # each row has only one value
63  log.debug("Reference frames to generate pdb %s",texts)
64  RFs = [io.TextToReferenceFrame(t).get_reference_frame() for t in texts]
65  if not split_components:
66  fn = "solution-%03d.pdb" % i
67  m.write_pdb_for_reference_frames(RFs,fn)
68  else:
69  fn = "solution-%03d-" % i
70  m.write_pdbs_for_reference_frames(RFs,fn)
71 
72 
73 def write_nth_largest_cluster(params, fn_database, fn_db_clusters,
74  position, table_name="clusters"):
75  """
76  @param params class with the parameters for the experiment
77  @param fn_database Database file with the reference frames of the
78  solutions
79  @param fn_db_clusters Database file for the clusters.
80  It can the same as fn_database or a different one.
81  @param table_name Table with the info for the clusters in
82  fn_db_clusters
83  @param position Cluster position (by number of elements) requested.
84  The index of largest cluster is 1.
85  """
86  m = DominoModel.DominoModel()
87  m.set_assembly_components(params.fn_pdbs, params.names)
88  # get cluster
89  db_clusters = solutions_io.ResultsDB()
90  db_clusters.connect(fn_db_clusters)
91  cl_record = db_clusters.get_nth_largest_cluster(position)
92  log.info("Size of %s largest cluster: %s",position, cl_record.n_elements)
93  # get solutions
94  db = solutions_io.ResultsDB()
95  db.connect(fn_database)
96  fields = ("reference_frames",)
97  solutions_ids = map(int, cl_record.solutions_ids.split("|"))
98  solutions = db.get_solutions_from_list(fields, solutions_ids)
99  db.close()
100  for i, d in zip(solutions_ids, solutions):
101  texts = d[0].split("/")
102  RFs = [io.TextToReferenceFrame(t).get_reference_frame() for t in texts]
103  fn = "cluster-%02d-solution-%03d.pdb" % (position, i)
104  log.debug(fn)
105  m.write_pdb_for_reference_frames(RFs, fn)
106 
107 def generate_domino_model(params, fn_database, fn_log = None):
108  """
109  Generate a model for an assembly using DOMINO.
110  @param params Class with the parameters for the experiment
111  @param fn_database Databse file that will contain the solutions
112  SQLite format
113  """
114  log.info(io.imp_info([IMP, em2d]))
115  t0 = time.time()
116  m = DominoModel.DominoModel()
117  if hasattr(params, "test_opts") and params.test_opts.do_test:
118  m.set_assembly(params.test_opts.test_fn_assembly, params.names)
119  else:
120  m.set_assembly_components(params.fn_pdbs, params.names)
121  setup_sampling_schema(m, params)
122  if hasattr(params.sampling_positions, "align_before_domino") and \
123  params.sampling_positions.align_before_domino:
124  m.align_rigid_bodies_states()
125 
126  if hasattr(params, "benchmark"):
127  m.set_native_assembly_for_benchmark(params)
128  set_pair_score_restraints(params, m)
129  set_xlink_restraints(params, m)
130  set_geometric_complementarity_restraints(params, m)
131  if hasattr(params.domino_params,"fn_merge_tree"):
132  fn = params.domino_params.fn_merge_tree
133  if not os.path.exists(fn):
134  raise IOError("merge tree file not found: %s" % fn)
135  m.read_merge_tree(fn)
136  else:
137  m.create_merge_tree()
138  set_connectivity_restraints(params, m)
139  set_pairs_excluded_restraint(params, m)
140  set_em2d_restraints(params, m)
141  m.setup_domino_sampler()
142  m.do_sampling("assignments_heap_container", params.domino_params)
143  tf = time.time()
144  log.info("Total time for the sampling (non-parallel): %s",tf-t0)
145  m.write_solutions_database(fn_database, params.n_solutions)
146 
147 
148 
149 def print_restraints(params):
150  """
151  Generate a model for an assembly using DOMINO.
152  @param params Class with the parameters for the experiment
153  @param fn_database Databse file that will contain the solutions
154  SQLite format
155  """
156  log.info(io.imp_info([IMP, em2d]))
157  t0 = time.time()
158  m = DominoModel.DominoModel()
159  if hasattr(params, "test_opts") and params.test_opts.do_test:
160  m.set_assembly(params.test_opts.test_fn_assembly, params.names)
161  else:
162  m.set_assembly_components(params.fn_pdbs, params.names)
163  if hasattr(params, "benchmark"):
164  m.set_native_assembly_for_benchmark(params)
165  set_pair_score_restraints(params, m)
166  set_xlink_restraints(params, m)
167  set_geometric_complementarity_restraints(params, m)
168  set_connectivity_restraints(params, m)
169  set_pairs_excluded_restraint(params, m)
170  set_em2d_restraints(params, m)
171  log.debug("RESTRAINTS FOR THE INPUT CONFIGURATION")
172  DominoModel.print_restraints_values(m.model)
173 
174 
175 def set_pair_score_restraints(params, model):
176  """ Set the pair score restraints in the DominoModel
177  @param model DominoModel class
178  @param params See the help for the script
179  """
180  if hasattr(params, "pair_score_restraints"):
181  model.create_coarse_assembly(params.n_residues)
182  for r_params in params.pair_score_restraints:
183  model.set_pair_score_restraint(*r_params)
184 
185 def set_xlink_restraints(params, model):
186  """ Set the cross-linking restraints in the DominoModel
187  @param model DominoModel class
188  @param params See the help for the script
189  """
190  if hasattr(params, "xlink_restraints"):
191  for params in params.xlink_restraints:
192  model.set_xlink_restraint(*params)
193 
194 def set_geometric_complementarity_restraints(params, model):
195  """ Set the geometric complementarity restraints in the DominoModel
196  @param model DominoModel class
197  @param params See the help for the script
198  """
199  if hasattr(params, "complementarity_restraints"):
200  model.create_coarse_assembly(params.n_residues)
201  for r_params in params.complementarity_restraints:
202  model.set_complementarity_restraint(*r_params)
203 
204 def set_connectivity_restraints(params, model):
205  """ Set the connectivity restraints in the DominoModel
206  @param model DominoModel class
207  @param params See the help for the script
208  """
209  if hasattr(params,"connectivity_restraints"):
210  model.create_coarse_assembly(params.n_residues)
211  for r_params in params.connectivity_restraints:
212  model.set_connectivity_restraint(*args)
213 
214 def set_pairs_excluded_restraint(params, model) :
215  """
216  All against all excluded volume restraints
217  @param model DominoModel class
218  @param params See the help for the script
219  """
220  if hasattr(params,"pairs_excluded_restraint"):
221  model.create_coarse_assembly(params.n_residues)
222  model.set_close_pairs_excluded_volume_restraint(
223  *params.pairs_excluded_restraint)
224 
225 def set_em2d_restraints(params, model):
226  """
227  Restraints related to the match to EM images
228  @param model DominoModel class
229  @param params See the help for the script
230  """
231  if hasattr(params,"em2d_restraints"):
232  for r_params in params.em2d_restraints:
233  model.set_em2d_restraint(*r_params)
234 
235 
236 def generate_monte_carlo_model(params, fn_database, seed,
237  write_solution=False, fn_log = None):
238  """
239  Generate a model for an assembly using MonteCarlo optimization
240  @param params Class with the parameters for the experiment
241  @param fn_database Datbase file where the solutions will be written.
242  SQLite format.
243  @param write_solution If True, writes a PDB with the final model
244  @param fn_log File for logging. If the value is None, no file is written
245  """
246  log.info(io.imp_info([IMP, em2d]))
247  m = DominoModel.DominoModel()
248  m.set_assembly_components(params.fn_pdbs, params.names)
249  set_pair_score_restraints(params, m)
250  set_xlink_restraints(params, m)
251  set_geometric_complementarity_restraints(params, m)
252  set_connectivity_restraints(params, m)
253  set_pairs_excluded_restraint(params, m)
254  set_em2d_restraints(params, m )
255  if hasattr(params, "benchmark"):
256  m.set_native_assembly_for_benchmark(params)
257 
258  MonteCarloRelativeMoves.set_random_seed(seed)
259  mc = MonteCarloRelativeMoves.MonteCarloRelativeMoves(m.model,
260  m.components_rbs, params.anchor)
261  mc.set_temperature_pattern(params.monte_carlo.temperatures,
262  params.monte_carlo.iterations,
263  params.monte_carlo.cycles)
264  mc.set_moving_parameters(params.monte_carlo.max_translations,
265  params.monte_carlo.max_rotations)
266  if not hasattr(params,"dock_transforms"):
267  mc.dock_transforms = []
268  else:
269  mc.dock_transforms = params.dock_transforms
270 
271  # Probability for a component of the assembly of doing random movement
272  # of doing a relative movement respect to another component
273  mc.non_relative_move_prob = params.monte_carlo.non_relative_move_prob
274  mc.run_monte_carlo_with_relative_movers()
275  m.write_monte_carlo_solution(fn_database)
276  if write_solution:
277  atom.write_pdb(m.assembly, fn_database + ".pdb")
278 
279 
280 
281 def create_dockings_from_xlinks(params):
282  """
283  Perform dockings that satisfy the cross-linking restraints.
284  1) Based on the number of restraints, creates an order for the
285  docking between pairs of subunits, favouring the subunits with
286  more crosslinks to be the "receptors"
287  2) Moves the subunits that are going to be docked to a position that
288  satisfies the x-linking restraints. There is no guarantee that this
289  position is correct. Its purpose is to help the docking
290  algorithm with a clue of the proximity/orientation between subunits
291  3) Performs docking between the subunits
292  4) Filters the results of the docking that are not consistent with
293  the cross-linking restraints
294  5) Computes the relative transformations between the rigid bodies
295  of the subunits that have been docked
296  @param params Class with the parameters for the experiment
297  """
298  log.info("Creating initial assembly from xlinks and docking")
299  import emagefit_dock as dock
300  import IMP.em2d.buildxlinks as bx
301  m = DominoModel.DominoModel()
302  m.set_assembly_components(params.fn_pdbs, params.names)
303  set_xlink_restraints(params, m)
304  order = bx.DockOrder()
305  order.set_xlinks(m.xlinks_dict)
306  docking_pairs = order.get_docking_order()
307 
308  if hasattr(params, "have_hexdock"):
309  if not params.have_hexdock:
310  return
311 
312  for rec, lig in docking_pairs:
313  xlinks_list = m.xlinks_dict.get_xlinks_for_pair((rec,lig))
314  log.debug("Xlinks for the pair %s %s",rec, lig)
315  for xl in xlinks_list:
316  log.debug("%s", xl.show())
317 
318  h_receptor = representation.get_component(m.assembly, rec)
319  h_ligand = representation.get_component(m.assembly, lig)
320  rb_receptor = representation.get_rigid_body(m.components_rbs,
321  representation.get_rb_name(rec))
322  rb_ligand = representation.get_rigid_body(m.components_rbs,
323  representation.get_rb_name(lig))
324  initial_ref = rb_ligand.get_reference_frame()
325  # move to the initial docking position
326  mv = bx.InitialDockingFromXlinks()
327  mv.set_xlinks(xlinks_list)
328  mv.set_hierarchies(h_receptor, h_ligand)
329  mv.set_rigid_bodies(rb_receptor, rb_ligand)
330  mv.move_ligand()
331  fn_initial_docking = "%s-%s_initial_docking.pdb" % (rec,lig)
332  mv.write_ligand(fn_initial_docking)
333  # dock
334  dock.check_for_hexdock()
335  hex_docking = dock.HexDocking()
336  receptor_index = params.names.index(rec)
337  fn_transforms = "hex_solutions_%s-%s.txt" % (rec, lig)
338  fn_docked = "%s-%s_hexdock.pdb" % (rec, lig)
339  hex_docking.dock(params.fn_pdbs[receptor_index],
340  fn_initial_docking, fn_transforms, fn_docked, False)
341 
342  sel = atom.ATOMPDBSelector()
343  new_m = IMP.Model()
344  # After reading the file with the initial solution, the reference frame
345  # for the rigid body of the ligand is not necessarily the same one
346  # that it had when saved.
347  # Thus reading the file again ensures consisten results when
348  # using the HEXDOCK transforms
349  new_h_ligand = atom.read_pdb(fn_initial_docking, new_m, sel)
350  new_rb_ligand = atom.create_rigid_body(new_h_ligand)
351  Tlig = new_rb_ligand.get_reference_frame().get_transformation_to()
352  fn_filtered = "hex_solutions_%s-%s_filtered.txt" % (rec, lig)
353  # new_h_ligand contains the coordinates of the ligand after moving it
354  # to the initial position for the docking
355  dock.filter_docking_results(h_receptor, new_h_ligand, xlinks_list,
356  fn_transforms, fn_filtered)
357  # transforms to apply to the ligand as it is in the file
358  # fn_initial_docking
359  transformations_hexdock = dock.read_hex_transforms(fn_filtered)
360 
361  # The initial position the ligand, by construction, is a "docking"
362  # solution, very bad probably, but is an starting point for Monte Carlo
363  # refinement. Hexdock therefore would "apply" an identity transformation
364  log.info("Adding the initial approximate transformation to the " \
365  " set of possible transformations")
366  transformations_hexdock.append(alg.get_identity_transformation_3d())
367 
368  Trec = rb_receptor.get_reference_frame().get_transformation_to()
369  Tinternal = []
370  for i,T in enumerate(transformations_hexdock):
371  Tdock = alg.compose(T, Tlig)
372  ref = alg.ReferenceFrame3D(Tdock)
373  new_rb_ligand.set_reference_frame(ref)
374  # internal transformation. The relationship is Tdock = Trec * Ti
375  Ti = alg.compose(Trec.get_inverse(), Tdock)
376  Tinternal.append(Ti)
377  fn_relative = "relative_positions_%s-%s.txt" % (rec, lig)
378  io.write_transforms(Tinternal, fn_relative)
379  rb_ligand.set_reference_frame(initial_ref)
380  #os.remove(fn_initial_docking)
381 
382 if __name__ == "__main__":
383  import ConfigParser
384  cparser = ConfigParser.SafeConfigParser()
385 
386  parser = IMP.OptionParser(description="Domino grid sampling",
387  imp_module=em2d)
388  parser.add_option("--o",
389  dest="fn_database",
390  default = False,
391  help="SQLite file. This file is an output when modeling. " \
392  " It is an input parameter when querying the database.")
393  parser.add_option("--exp",
394  dest="fn_params",
395  default=None,
396  help="Experiment file. This should be a Python file "
397  "containing a class called Experiment containing "
398  "all parameters required to do the modeling.")
399  parser.add_option("--w",
400  type=int,
401  dest="write",
402  default=None,
403  help="write the best scored solutions to PDB files.")
404  parser.add_option("--wcl",
405  dest="write_cluster",
406  metavar="fn_db_clusters position",
407  nargs = 2,
408  default=None,
409  help="Write the n-th largest cluster. This option has " \
410  "two arguments. The first one is the database file " \
411  "containing the clusters. The second one is the " \
412  "number (position) of the cluster to write. " \
413  "The index of the first cluster is 1.")
414  parser.add_option("--orderby",
415  dest="orderby",
416  default=None,
417  help="Measure used to sort solutions when saving them " \
418  "to PDB")
419  parser.add_option("--monte_carlo", dest="monte_carlo",
420  metavar = "number",
421  type=int,
422  default=False,
423  help="Run a MonteCarlo optimization. There is one " \
424  "parameter passed to the option. When using this " \
425  "script in a cluster and sending a lot of them at the "\
426  "same time, the effective random number obtained by " \
427  "seeding using the time is the same for all scripts. " \
428  "To avoid that situation, you can pass an integer. " \
429  "If the number is -1, the time is " \
430  "used as the seed (useful for one core)")
431  parser.add_option("--dock", dest="dock",
432  action="store_true",
433  default=False,
434  help="Perform docking of the components of the assembly "\
435  "guided by cross-linking restraints")
436  parser.add_option("--merge", dest="merge",
437  action="callback",
438  callback=utility.vararg_callback,
439  help="Merge the database files")
440  parser.add_option("--gather", dest="gather",
441  action="callback",
442  callback=utility.vararg_callback,
443  help="Gather database files and store then in a single " \
444  "file. The arguments are the names of the databases "\
445  "to gather.")
446  parser.add_option("--log",
447  dest="log",
448  default=None,
449  help="File for logging.")
450  parser.add_option("--cdrms",
451  dest="cdrms",
452  type=int,
453  default=False,
454  metavar="number",
455  help="Align the best solutions to the " \
456  "native using the DRMS of the centroids.")
457 
458  parser.add_option("--pr",
459  action="store_true",
460  dest="pr_rest",
461  default=False,
462  help="Print restraints of model and native")
463 
464 
465  args = parser.parse_args()
466  args = args[0]
467  if len(sys.argv) == 1:
468  parser.print_help()
469  quit()
470  if args.log:
471  logging.basicConfig(filename=args.log, filemode="w")
472  else:
473  logging.basicConfig(stream=sys.stdout)
474  logging.root.setLevel(logging.INFO)
475 
476  if args.merge:
477  max_number_of_results = 100000
478  t0 = time.time()
479  if not args.fn_database or not args.orderby:
480  raise ValueError("Merging databases requires the output database " \
481  "results and the name of the restraint to order by")
482  solutions_io.gather_best_solution_results(args.merge, args.fn_database,
483  max_number=max_number_of_results, orderby=args.orderby)
484  log.info("Merging done. Time %s", time.time() - t0)
485  quit()
486  if args.gather:
487  t0 = time.time()
488  solutions_io.gather_solution_results(args.gather, args.fn_database)
489  log.info("Gathering done. Time %s", time.time() - t0)
490  quit()
491 
492  if args.write:
493  if not args.fn_database or not args.orderby:
494  raise ValueError("Writing solutions requires the database of " \
495  "results and the name of the restraint to order by")
496  if args.orderby == "False":
497  args.orderby = False
498  params = utility.get_experiment_params(args.fn_params)
499  write_pdbs_for_solutions(params, args.fn_database,
500  args.write, args.orderby, )
501  quit()
502 
503  if args.write_cluster:
504  if not args.fn_database:
505  raise ValueError("Writing clusters requires the database file")
506  fn_db_clusters = args.write_cluster[0]
507  position = int(args.write_cluster[1])
508  params = utility.get_experiment_params(args.fn_params)
509  write_nth_largest_cluster(params, args.fn_database,
510  fn_db_clusters, position )
511  quit()
512 
513  if args.cdrms:
514  if not args.fn_database or not args.orderby:
515  raise ValueError("Writing models requires the database of " \
516  "results and the name of the restraint to order by")
517  params = utility.get_experiment_params(args.fn_params)
518 
519  write_pdbs_aligned_by_cdrms(params, args.fn_database,
520  args.cdrms, args.orderby)
521  quit()
522 
523  if args.monte_carlo:
524  params = utility.get_experiment_params(args.fn_params)
525  generate_monte_carlo_model(params,
526  args.fn_database, args.monte_carlo)
527  quit()
528 
529  if args.dock:
530  params = utility.get_experiment_params(args.fn_params)
531  create_dockings_from_xlinks(params)
532  quit()
533 
534 
535  if(args.pr_rest):
536  params = utility.get_experiment_params(args.fn_params)
537  print_restraints(params)
538  quit()
539 
540 
541  params = utility.get_experiment_params(args.fn_params)
542  generate_domino_model(params, args.fn_database)