8 log = logging.getLogger(
"domino_model")
17 import IMP.em2d.imp_general.io
as io
18 import IMP.em2d.imp_general.alignments
as alignments
19 import IMP.em2d.imp_general.representation
as representation
27 def setup_sampling_schema(model, params):
29 Set the discrete states for sampling for DOMINO.
30 @param model A DominoModel object
31 @param params See the help for the script
33 n_rbs = len(model.components_rbs)
34 sampling_schema = sampling.SamplingSchema(n_rbs, params.fixed, params.anchor)
36 sampling_schema.read_from_database( params.sampling_positions.read,
38 params.sampling_positions.max_number,
39 params.sampling_positions.orderby)
40 for i, rb
in enumerate(model.components_rbs):
41 model.set_rb_states(rb, sampling_schema.get_sampling_transformations(i))
44 def write_pdbs_for_solutions(params, fn_database, n=10, orderby="em2d",
45 split_components=
False):
47 Write PDBs for the solutions in the database
48 @param params class with the parameters for the experiment
49 @param fn_database File containing the database of solutions
50 @param n Number of solutions to write
51 @param orderby Restraint used for sorting the solutions
52 @param split_components If true, each component of the assembly in a
53 solution is written as a separate PDB file
55 m = DominoModel.DominoModel()
56 m.set_assembly_components(params.fn_pdbs, params.names)
57 db = solutions_io.ResultsDB()
58 db.connect(fn_database)
59 data = db.get_solutions([
"reference_frames"], n, orderby)
61 for i, d
in enumerate(data):
62 texts = d[0].split(
"/")
63 log.debug(
"Reference frames to generate pdb %s",texts)
64 RFs = [io.TextToReferenceFrame(t).get_reference_frame()
for t
in texts]
65 if not split_components:
66 fn =
"solution-%03d.pdb" % i
67 m.write_pdb_for_reference_frames(RFs,fn)
69 fn =
"solution-%03d-" % i
70 m.write_pdbs_for_reference_frames(RFs,fn)
73 def write_nth_largest_cluster(params, fn_database, fn_db_clusters,
74 position, table_name=
"clusters"):
76 @param params class with the parameters for the experiment
77 @param fn_database Database file with the reference frames of the
79 @param fn_db_clusters Database file for the clusters.
80 It can the same as fn_database or a different one.
81 @param table_name Table with the info for the clusters in
83 @param position Cluster position (by number of elements) requested.
84 The index of largest cluster is 1.
86 m = DominoModel.DominoModel()
87 m.set_assembly_components(params.fn_pdbs, params.names)
89 db_clusters = solutions_io.ResultsDB()
90 db_clusters.connect(fn_db_clusters)
91 cl_record = db_clusters.get_nth_largest_cluster(position)
92 log.info(
"Size of %s largest cluster: %s",position, cl_record.n_elements)
94 db = solutions_io.ResultsDB()
95 db.connect(fn_database)
96 fields = (
"reference_frames",)
97 solutions_ids = map(int, cl_record.solutions_ids.split(
"|"))
98 solutions = db.get_solutions_from_list(fields, solutions_ids)
100 for i, d
in zip(solutions_ids, solutions):
101 texts = d[0].split(
"/")
102 RFs = [io.TextToReferenceFrame(t).get_reference_frame()
for t
in texts]
103 fn =
"cluster-%02d-solution-%03d.pdb" % (position, i)
105 m.write_pdb_for_reference_frames(RFs, fn)
107 def generate_domino_model(params, fn_database, fn_log = None):
109 Generate a model for an assembly using DOMINO.
110 @param params Class with the parameters for the experiment
111 @param fn_database Databse file that will contain the solutions
114 log.info(io.imp_info([IMP, em2d]))
116 m = DominoModel.DominoModel()
117 if hasattr(params,
"test_opts")
and params.test_opts.do_test:
118 m.set_assembly(params.test_opts.test_fn_assembly, params.names)
120 m.set_assembly_components(params.fn_pdbs, params.names)
121 setup_sampling_schema(m, params)
122 if hasattr(params.sampling_positions,
"align_before_domino")
and \
123 params.sampling_positions.align_before_domino:
124 m.align_rigid_bodies_states()
126 if hasattr(params,
"benchmark"):
127 m.set_native_assembly_for_benchmark(params)
128 set_pair_score_restraints(params, m)
129 set_xlink_restraints(params, m)
130 set_geometric_complementarity_restraints(params, m)
131 if hasattr(params.domino_params,
"fn_merge_tree"):
132 fn = params.domino_params.fn_merge_tree
133 if not os.path.exists(fn):
134 raise IOError(
"merge tree file not found: %s" % fn)
135 m.read_merge_tree(fn)
137 m.create_merge_tree()
138 set_connectivity_restraints(params, m)
139 set_pairs_excluded_restraint(params, m)
140 set_em2d_restraints(params, m)
141 m.setup_domino_sampler()
142 m.do_sampling(
"assignments_heap_container", params.domino_params)
144 log.info(
"Total time for the sampling (non-parallel): %s",tf-t0)
145 m.write_solutions_database(fn_database, params.n_solutions)
149 def print_restraints(params):
151 Generate a model for an assembly using DOMINO.
152 @param params Class with the parameters for the experiment
153 @param fn_database Databse file that will contain the solutions
156 log.info(io.imp_info([IMP, em2d]))
158 m = DominoModel.DominoModel()
159 if hasattr(params,
"test_opts")
and params.test_opts.do_test:
160 m.set_assembly(params.test_opts.test_fn_assembly, params.names)
162 m.set_assembly_components(params.fn_pdbs, params.names)
163 if hasattr(params,
"benchmark"):
164 m.set_native_assembly_for_benchmark(params)
165 set_pair_score_restraints(params, m)
166 set_xlink_restraints(params, m)
167 set_geometric_complementarity_restraints(params, m)
168 set_connectivity_restraints(params, m)
169 set_pairs_excluded_restraint(params, m)
170 set_em2d_restraints(params, m)
171 log.debug(
"RESTRAINTS FOR THE INPUT CONFIGURATION")
172 DominoModel.print_restraints_values(m.model)
175 def set_pair_score_restraints(params, model):
176 """ Set the pair score restraints in the DominoModel
177 @param model DominoModel class
178 @param params See the help for the script
180 if hasattr(params,
"pair_score_restraints"):
181 model.create_coarse_assembly(params.n_residues)
182 for r_params
in params.pair_score_restraints:
183 model.set_pair_score_restraint(*r_params)
185 def set_xlink_restraints(params, model):
186 """ Set the cross-linking restraints in the DominoModel
187 @param model DominoModel class
188 @param params See the help for the script
190 if hasattr(params,
"xlink_restraints"):
191 for params
in params.xlink_restraints:
192 model.set_xlink_restraint(*params)
194 def set_geometric_complementarity_restraints(params, model):
195 """ Set the geometric complementarity restraints in the DominoModel
196 @param model DominoModel class
197 @param params See the help for the script
199 if hasattr(params,
"complementarity_restraints"):
200 model.create_coarse_assembly(params.n_residues)
201 for r_params
in params.complementarity_restraints:
202 model.set_complementarity_restraint(*r_params)
204 def set_connectivity_restraints(params, model):
205 """ Set the connectivity restraints in the DominoModel
206 @param model DominoModel class
207 @param params See the help for the script
209 if hasattr(params,
"connectivity_restraints"):
210 model.create_coarse_assembly(params.n_residues)
211 for r_params
in params.connectivity_restraints:
212 model.set_connectivity_restraint(*args)
214 def set_pairs_excluded_restraint(params, model) :
216 All against all excluded volume restraints
217 @param model DominoModel class
218 @param params See the help for the script
220 if hasattr(params,
"pairs_excluded_restraint"):
221 model.create_coarse_assembly(params.n_residues)
222 model.set_close_pairs_excluded_volume_restraint(
223 *params.pairs_excluded_restraint)
225 def set_em2d_restraints(params, model):
227 Restraints related to the match to EM images
228 @param model DominoModel class
229 @param params See the help for the script
231 if hasattr(params,
"em2d_restraints"):
232 for r_params
in params.em2d_restraints:
233 model.set_em2d_restraint(*r_params)
236 def generate_monte_carlo_model(params, fn_database, seed,
237 write_solution=
False, fn_log =
None):
239 Generate a model for an assembly using MonteCarlo optimization
240 @param params Class with the parameters for the experiment
241 @param fn_database Datbase file where the solutions will be written.
243 @param write_solution If True, writes a PDB with the final model
244 @param fn_log File for logging. If the value is None, no file is written
246 log.info(io.imp_info([IMP, em2d]))
247 m = DominoModel.DominoModel()
248 m.set_assembly_components(params.fn_pdbs, params.names)
249 set_pair_score_restraints(params, m)
250 set_xlink_restraints(params, m)
251 set_geometric_complementarity_restraints(params, m)
252 set_connectivity_restraints(params, m)
253 set_pairs_excluded_restraint(params, m)
254 set_em2d_restraints(params, m )
255 if hasattr(params,
"benchmark"):
256 m.set_native_assembly_for_benchmark(params)
258 MonteCarloRelativeMoves.set_random_seed(seed)
259 mc = MonteCarloRelativeMoves.MonteCarloRelativeMoves(m.model,
260 m.components_rbs, params.anchor)
261 mc.set_temperature_pattern(params.monte_carlo.temperatures,
262 params.monte_carlo.iterations,
263 params.monte_carlo.cycles)
264 mc.set_moving_parameters(params.monte_carlo.max_translations,
265 params.monte_carlo.max_rotations)
266 if not hasattr(params,
"dock_transforms"):
267 mc.dock_transforms = []
269 mc.dock_transforms = params.dock_transforms
273 mc.non_relative_move_prob = params.monte_carlo.non_relative_move_prob
274 mc.run_monte_carlo_with_relative_movers()
275 m.write_monte_carlo_solution(fn_database)
277 atom.write_pdb(m.assembly, fn_database +
".pdb")
281 def create_dockings_from_xlinks(params):
283 Perform dockings that satisfy the cross-linking restraints.
284 1) Based on the number of restraints, creates an order for the
285 docking between pairs of subunits, favouring the subunits with
286 more crosslinks to be the "receptors"
287 2) Moves the subunits that are going to be docked to a position that
288 satisfies the x-linking restraints. There is no guarantee that this
289 position is correct. Its purpose is to help the docking
290 algorithm with a clue of the proximity/orientation between subunits
291 3) Performs docking between the subunits
292 4) Filters the results of the docking that are not consistent with
293 the cross-linking restraints
294 5) Computes the relative transformations between the rigid bodies
295 of the subunits that have been docked
296 @param params Class with the parameters for the experiment
298 log.info(
"Creating initial assembly from xlinks and docking")
299 import emagefit_dock
as dock
301 m = DominoModel.DominoModel()
302 m.set_assembly_components(params.fn_pdbs, params.names)
303 set_xlink_restraints(params, m)
304 order = bx.DockOrder()
305 order.set_xlinks(m.xlinks_dict)
306 docking_pairs = order.get_docking_order()
308 if hasattr(params,
"have_hexdock"):
309 if not params.have_hexdock:
312 for rec, lig
in docking_pairs:
313 xlinks_list = m.xlinks_dict.get_xlinks_for_pair((rec,lig))
314 log.debug(
"Xlinks for the pair %s %s",rec, lig)
315 for xl
in xlinks_list:
316 log.debug(
"%s", xl.show())
318 h_receptor = representation.get_component(m.assembly, rec)
319 h_ligand = representation.get_component(m.assembly, lig)
320 rb_receptor = representation.get_rigid_body(m.components_rbs,
321 representation.get_rb_name(rec))
322 rb_ligand = representation.get_rigid_body(m.components_rbs,
323 representation.get_rb_name(lig))
324 initial_ref = rb_ligand.get_reference_frame()
326 mv = bx.InitialDockingFromXlinks()
327 mv.set_xlinks(xlinks_list)
328 mv.set_hierarchies(h_receptor, h_ligand)
329 mv.set_rigid_bodies(rb_receptor, rb_ligand)
331 fn_initial_docking =
"%s-%s_initial_docking.pdb" % (rec,lig)
332 mv.write_ligand(fn_initial_docking)
334 dock.check_for_hexdock()
335 hex_docking = dock.HexDocking()
336 receptor_index = params.names.index(rec)
337 fn_transforms =
"hex_solutions_%s-%s.txt" % (rec, lig)
338 fn_docked =
"%s-%s_hexdock.pdb" % (rec, lig)
339 hex_docking.dock(params.fn_pdbs[receptor_index],
340 fn_initial_docking, fn_transforms, fn_docked,
False)
342 sel = atom.ATOMPDBSelector()
349 new_h_ligand = atom.read_pdb(fn_initial_docking, new_m, sel)
350 new_rb_ligand = atom.create_rigid_body(new_h_ligand)
351 Tlig = new_rb_ligand.get_reference_frame().get_transformation_to()
352 fn_filtered =
"hex_solutions_%s-%s_filtered.txt" % (rec, lig)
355 dock.filter_docking_results(h_receptor, new_h_ligand, xlinks_list,
356 fn_transforms, fn_filtered)
359 transformations_hexdock = dock.read_hex_transforms(fn_filtered)
364 log.info(
"Adding the initial approximate transformation to the " \
365 " set of possible transformations")
366 transformations_hexdock.append(alg.get_identity_transformation_3d())
368 Trec = rb_receptor.get_reference_frame().get_transformation_to()
370 for i,T
in enumerate(transformations_hexdock):
371 Tdock = alg.compose(T, Tlig)
372 ref = alg.ReferenceFrame3D(Tdock)
373 new_rb_ligand.set_reference_frame(ref)
375 Ti = alg.compose(Trec.get_inverse(), Tdock)
377 fn_relative =
"relative_positions_%s-%s.txt" % (rec, lig)
378 io.write_transforms(Tinternal, fn_relative)
379 rb_ligand.set_reference_frame(initial_ref)
382 if __name__ ==
"__main__":
384 cparser = ConfigParser.SafeConfigParser()
388 parser.add_option(
"--o",
391 help=
"SQLite file. This file is an output when modeling. " \
392 " It is an input parameter when querying the database.")
393 parser.add_option(
"--exp",
396 help=
"Experiment file. This should be a Python file "
397 "containing a class called Experiment containing "
398 "all parameters required to do the modeling.")
399 parser.add_option(
"--w",
403 help=
"write the best scored solutions to PDB files.")
404 parser.add_option(
"--wcl",
405 dest=
"write_cluster",
406 metavar=
"fn_db_clusters position",
409 help=
"Write the n-th largest cluster. This option has " \
410 "two arguments. The first one is the database file " \
411 "containing the clusters. The second one is the " \
412 "number (position) of the cluster to write. " \
413 "The index of the first cluster is 1.")
414 parser.add_option(
"--orderby",
417 help=
"Measure used to sort solutions when saving them " \
419 parser.add_option(
"--monte_carlo", dest=
"monte_carlo",
423 help=
"Run a MonteCarlo optimization. There is one " \
424 "parameter passed to the option. When using this " \
425 "script in a cluster and sending a lot of them at the "\
426 "same time, the effective random number obtained by " \
427 "seeding using the time is the same for all scripts. " \
428 "To avoid that situation, you can pass an integer. " \
429 "If the number is -1, the time is " \
430 "used as the seed (useful for one core)")
431 parser.add_option(
"--dock", dest=
"dock",
434 help=
"Perform docking of the components of the assembly "\
435 "guided by cross-linking restraints")
436 parser.add_option(
"--merge", dest=
"merge",
438 callback=utility.vararg_callback,
439 help=
"Merge the database files")
440 parser.add_option(
"--gather", dest=
"gather",
442 callback=utility.vararg_callback,
443 help=
"Gather database files and store then in a single " \
444 "file. The arguments are the names of the databases "\
446 parser.add_option(
"--log",
449 help=
"File for logging.")
450 parser.add_option(
"--cdrms",
455 help=
"Align the best solutions to the " \
456 "native using the DRMS of the centroids.")
458 parser.add_option(
"--pr",
462 help=
"Print restraints of model and native")
465 args = parser.parse_args()
467 if len(sys.argv) == 1:
471 logging.basicConfig(filename=args.log, filemode=
"w")
473 logging.basicConfig(stream=sys.stdout)
474 logging.root.setLevel(logging.INFO)
477 max_number_of_results = 100000
479 if not args.fn_database
or not args.orderby:
480 raise ValueError(
"Merging databases requires the output database " \
481 "results and the name of the restraint to order by")
482 solutions_io.gather_best_solution_results(args.merge, args.fn_database,
483 max_number=max_number_of_results, orderby=args.orderby)
484 log.info(
"Merging done. Time %s", time.time() - t0)
488 solutions_io.gather_solution_results(args.gather, args.fn_database)
489 log.info(
"Gathering done. Time %s", time.time() - t0)
493 if not args.fn_database
or not args.orderby:
494 raise ValueError(
"Writing solutions requires the database of " \
495 "results and the name of the restraint to order by")
496 if args.orderby ==
"False":
498 params = utility.get_experiment_params(args.fn_params)
499 write_pdbs_for_solutions(params, args.fn_database,
500 args.write, args.orderby, )
503 if args.write_cluster:
504 if not args.fn_database:
505 raise ValueError(
"Writing clusters requires the database file")
506 fn_db_clusters = args.write_cluster[0]
507 position = int(args.write_cluster[1])
508 params = utility.get_experiment_params(args.fn_params)
509 write_nth_largest_cluster(params, args.fn_database,
510 fn_db_clusters, position )
514 if not args.fn_database
or not args.orderby:
515 raise ValueError(
"Writing models requires the database of " \
516 "results and the name of the restraint to order by")
517 params = utility.get_experiment_params(args.fn_params)
519 write_pdbs_aligned_by_cdrms(params, args.fn_database,
520 args.cdrms, args.orderby)
524 params = utility.get_experiment_params(args.fn_params)
525 generate_monte_carlo_model(params,
526 args.fn_database, args.monte_carlo)
530 params = utility.get_experiment_params(args.fn_params)
531 create_dockings_from_xlinks(params)
536 params = utility.get_experiment_params(args.fn_params)
537 print_restraints(params)
541 params = utility.get_experiment_params(args.fn_params)
542 generate_domino_model(params, args.fn_database)