1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
19 from operator
import itemgetter
20 from collections
import defaultdict
25 """A macro to help setup and run replica exchange.
26 Supports Monte Carlo and molecular dynamics.
27 Produces trajectory RMF files, best PDB structures,
28 and output stat files.
34 monte_carlo_sample_objects=
None,
35 molecular_dynamics_sample_objects=
None,
37 crosslink_restraints=
None,
38 monte_carlo_temperature=1.0,
39 simulated_annealing=
False,
40 simulated_annealing_minimum_temperature=1.0,
41 simulated_annealing_maximum_temperature=2.5,
42 simulated_annealing_minimum_temperature_nframes=100,
43 simulated_annealing_maximum_temperature_nframes=100,
44 replica_exchange_minimum_temperature=1.0,
45 replica_exchange_maximum_temperature=2.5,
46 replica_exchange_swap=
True,
48 number_of_best_scoring_models=500,
51 molecular_dynamics_steps=10,
52 molecular_dynamics_max_time_step=1.0,
53 number_of_frames=1000,
54 save_coordinates_mode=
"lowest_temperature",
55 nframes_write_coordinates=1,
56 write_initial_rmf=
True,
57 initial_rmf_name_suffix=
"initial",
58 stat_file_name_suffix=
"stat",
59 best_pdb_name_suffix=
"model",
61 do_create_directories=
True,
62 global_output_directory=
"./",
65 replica_stat_file_suffix=
"stat_replica",
66 em_object_for_rmf=
None,
68 replica_exchange_object=
None,
71 @param model The IMP model
72 @param representation PMI.representation.Representation object
73 (or list of them, for multi-state modeling)
74 @param root_hier Instead of passing Representation, pass the System hierarchy
75 @param monte_carlo_sample_objects Objects for MC sampling; a list of
76 structural components (generally the representation) that will
77 be moved and restraints with parameters that need to
79 For PMI2: just pass flat list of movers
80 @param molecular_dynamics_sample_objects Objects for MD sampling
81 For PMI2: just pass flat list of particles
82 @param output_objects A list of structural objects and restraints
83 that will be included in output (statistics files). Any object
84 that provides a get_output() method can be used here.
85 @param crosslink_restraints List of cross-link restraints that will
86 be included in output RMF files (for visualization).
87 @param monte_carlo_temperature MC temp (may need to be optimized
88 based on post-sampling analysis)
89 @param simulated_annealing If True, perform simulated annealing
90 @param simulated_annealing_minimum_temperature Should generally be
91 the same as monte_carlo_temperature.
92 @param simulated_annealing_minimum_temperature_nframes Number of
93 frames to compute at minimum temperature.
94 @param simulated_annealing_maximum_temperature_nframes Number of
96 temps > simulated_annealing_maximum_temperature.
97 @param replica_exchange_minimum_temperature Low temp for REX; should
98 generally be the same as monte_carlo_temperature.
99 @param replica_exchange_maximum_temperature High temp for REX
100 @param replica_exchange_swap Boolean, enable disable temperature
102 @param num_sample_rounds Number of rounds of MC/MD per cycle
103 @param number_of_best_scoring_models Number of top-scoring PDB models
104 to keep around for analysis
105 @param monte_carlo_steps Number of MC steps per round
106 @param self_adaptive self adaptive scheme for monte carlo movers
107 @param molecular_dynamics_steps Number of MD steps per round
108 @param molecular_dynamics_max_time_step Max time step for MD
109 @param number_of_frames Number of REX frames to run
110 @param save_coordinates_mode string: how to save coordinates.
111 "lowest_temperature" (default) only the lowest temperatures is saved
112 "25th_score" all replicas whose score is below the 25th percentile
113 "50th_score" all replicas whose score is below the 50th percentile
114 "75th_score" all replicas whose score is below the 75th percentile
115 @param nframes_write_coordinates How often to write the coordinates
117 @param write_initial_rmf Write the initial configuration
118 @param global_output_directory Folder that will be created to house
120 @param test_mode Set to True to avoid writing any files, just test one frame.
127 self.output_objects = output_objects
128 self.representation = representation
130 if type(representation) == list:
131 self.is_multi_state =
True
132 self.root_hiers = [r.prot
for r
in representation]
133 self.vars[
"number_of_states"] = len(representation)
135 self.is_multi_state =
False
136 self.root_hier = representation.prot
137 self.vars[
"number_of_states"] = 1
138 elif root_hier
and type(root_hier) ==
IMP.atom.Hierarchy and root_hier.get_name()==
'System':
141 self.root_hier = root_hier
142 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
143 self.vars[
"number_of_states"] = len(states)
145 self.root_hiers = states
146 self.is_multi_state =
True
148 self.root_hier = root_hier
149 self.is_multi_state =
False
151 raise Exception(
"Must provide representation or System hierarchy (root_hier)")
153 self.crosslink_restraints = crosslink_restraints
154 self.em_object_for_rmf = em_object_for_rmf
155 self.monte_carlo_sample_objects = monte_carlo_sample_objects
156 self.vars[
"self_adaptive"]=self_adaptive
157 if sample_objects
is not None:
158 self.monte_carlo_sample_objects+=sample_objects
159 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
160 self.replica_exchange_object = replica_exchange_object
161 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
162 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
164 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
166 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
167 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
168 self.vars[
"simulated_annealing"]=\
170 self.vars[
"simulated_annealing_minimum_temperature"]=\
171 simulated_annealing_minimum_temperature
172 self.vars[
"simulated_annealing_maximum_temperature"]=\
173 simulated_annealing_maximum_temperature
174 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
175 simulated_annealing_minimum_temperature_nframes
176 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
177 simulated_annealing_maximum_temperature_nframes
179 self.vars[
"num_sample_rounds"] = num_sample_rounds
181 "number_of_best_scoring_models"] = number_of_best_scoring_models
182 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
183 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
184 self.vars[
"number_of_frames"] = number_of_frames
185 if not save_coordinates_mode
in [
"lowest_temperature",
"25th_score",
"50th_score",
"75th_score"]:
186 raise Exception(
"save_coordinates_mode has unrecognized value")
188 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
189 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
190 self.vars[
"write_initial_rmf"] = write_initial_rmf
191 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
192 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
193 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
194 self.vars[
"do_clean_first"] = do_clean_first
195 self.vars[
"do_create_directories"] = do_create_directories
196 self.vars[
"global_output_directory"] = global_output_directory
197 self.vars[
"rmf_dir"] = rmf_dir
198 self.vars[
"best_pdb_dir"] = best_pdb_dir
199 self.vars[
"atomistic"] = atomistic
200 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
201 self.vars[
"geometries"] =
None
202 self.test_mode = test_mode
205 if self.vars[
"geometries"]
is None:
206 self.vars[
"geometries"] = list(geometries)
208 self.vars[
"geometries"].extend(geometries)
211 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
212 print(
"--- it stores the best scoring pdb models in pdbs/")
213 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
214 print(
"--- variables:")
215 keys = list(self.vars.keys())
218 print(
"------", v.ljust(30), self.vars[v])
220 def get_replica_exchange_object(self):
221 return self.replica_exchange_object
223 def execute_macro(self):
224 temp_index_factor = 100000.0
228 if self.monte_carlo_sample_objects
is not None:
229 print(
"Setting up MonteCarlo")
231 self.monte_carlo_sample_objects,
232 self.vars[
"monte_carlo_temperature"])
233 if self.vars[
"simulated_annealing"]:
234 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
235 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
236 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
237 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
238 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
239 if self.vars[
"self_adaptive"]:
240 sampler_mc.set_self_adaptive(isselfadaptive=self.vars[
"self_adaptive"])
241 self.output_objects.append(sampler_mc)
242 samplers.append(sampler_mc)
245 if self.molecular_dynamics_sample_objects
is not None:
246 print(
"Setting up MolecularDynamics")
248 self.molecular_dynamics_sample_objects,
249 self.vars[
"monte_carlo_temperature"],
250 maximum_time_step=self.molecular_dynamics_max_time_step)
251 if self.vars[
"simulated_annealing"]:
252 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
253 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
254 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
255 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
256 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
257 self.output_objects.append(sampler_md)
258 samplers.append(sampler_md)
261 print(
"Setting up ReplicaExchange")
264 "replica_exchange_minimum_temperature"],
266 "replica_exchange_maximum_temperature"],
268 replica_exchange_object=self.replica_exchange_object)
269 self.replica_exchange_object = rex.rem
271 myindex = rex.get_my_index()
272 self.output_objects.append(rex)
277 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
281 globaldir = self.vars[
"global_output_directory"] +
"/"
282 rmf_dir = globaldir + self.vars[
"rmf_dir"]
283 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
285 if not self.test_mode:
286 if self.vars[
"do_clean_first"]:
289 if self.vars[
"do_create_directories"]:
292 os.makedirs(globaldir)
300 if not self.is_multi_state:
306 for n
in range(self.vars[
"number_of_states"]):
308 os.makedirs(pdb_dir +
"/" + str(n))
315 self.output_objects.append(sw)
317 print(
"Setting up stat file")
319 low_temp_stat_file = globaldir + \
320 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
321 if not self.test_mode:
322 output.init_stat2(low_temp_stat_file,
324 extralabels=[
"rmf_file",
"rmf_frame_index"])
326 print(
"Setting up replica stat file")
327 replica_stat_file = globaldir + \
328 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
329 if not self.test_mode:
330 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
332 if not self.test_mode:
333 print(
"Setting up best pdb files")
334 if not self.is_multi_state:
335 if self.vars[
"number_of_best_scoring_models"] > 0:
336 output.init_pdb_best_scoring(pdb_dir +
"/" +
337 self.vars[
"best_pdb_name_suffix"],
340 "number_of_best_scoring_models"],
341 replica_exchange=
True)
342 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
343 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
345 if self.vars[
"number_of_best_scoring_models"] > 0:
346 for n
in range(self.vars[
"number_of_states"]):
347 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
348 self.vars[
"best_pdb_name_suffix"],
351 "number_of_best_scoring_models"],
352 replica_exchange=
True)
353 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
354 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
357 if self.em_object_for_rmf
is not None:
358 if not self.is_multi_state
or self.pmi2:
359 output_hierarchies = [
361 self.em_object_for_rmf.get_density_as_hierarchy(
364 output_hierarchies = self.root_hiers
365 output_hierarchies.append(
366 self.em_object_for_rmf.get_density_as_hierarchy())
368 if not self.is_multi_state
or self.pmi2:
369 output_hierarchies = [self.root_hier]
371 output_hierarchies = self.root_hiers
374 if not self.test_mode:
375 print(
"Setting up and writing initial rmf coordinate file")
376 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
377 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
379 if self.crosslink_restraints:
380 output.add_restraints_to_rmf(
381 init_suffix +
"." + str(myindex) +
".rmf3",
382 self.crosslink_restraints)
383 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
384 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
388 if not self.test_mode:
389 mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
393 if not self.test_mode:
394 print(
"Setting up production rmf files")
395 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
396 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"])
398 if self.crosslink_restraints:
399 output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
401 ntimes_at_low_temp = 0
405 self.replica_exchange_object.set_was_used(
True)
406 nframes = self.vars[
"number_of_frames"]
409 for i
in range(nframes):
413 for nr
in range(self.vars[
"num_sample_rounds"]):
414 if sampler_md
is not None:
416 self.vars[
"molecular_dynamics_steps"])
417 if sampler_mc
is not None:
418 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
420 self.model).evaluate(
False)
421 mpivs.set_value(
"score",score)
422 output.set_output_entry(
"score", score)
426 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
428 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
429 save_frame=(min_temp_index == my_temp_index)
430 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
431 score_perc=mpivs.get_percentile(
"score")
432 save_frame=(score_perc*100.0<=25.0)
433 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
434 score_perc=mpivs.get_percentile(
"score")
435 save_frame=(score_perc*100.0<=50.0)
436 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
437 score_perc=mpivs.get_percentile(
"score")
438 save_frame=(score_perc*100.0<=75.0)
441 print(
"--- frame %s score %s " % (str(i), str(score)))
443 if not self.test_mode:
444 if i % self.vars[
"nframes_write_coordinates"]==0:
445 print(
'--- writing coordinates')
446 if self.vars[
"number_of_best_scoring_models"] > 0:
447 output.write_pdb_best_scoring(score)
448 output.write_rmf(rmfname)
449 output.set_output_entry(
"rmf_file", rmfname)
450 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
452 output.set_output_entry(
"rmf_file", rmfname)
453 output.set_output_entry(
"rmf_frame_index",
'-1')
454 output.write_stat2(low_temp_stat_file)
455 ntimes_at_low_temp += 1
457 if not self.test_mode:
458 output.write_stat2(replica_stat_file)
459 if self.vars[
"replica_exchange_swap"]:
460 rex.swap_temp(i, score)
461 if self.representation:
462 for p, state
in self.representation._protocol_output:
463 p.add_replica_exchange(state, self)
465 if not self.test_mode:
466 print(
"closing production rmf files")
467 output.close_rmf(rmfname)
473 """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
474 Easily create multi-state systems by calling this macro
475 repeatedly with different TopologyReader objects!
476 A useful function is get_molecules() which returns the PMI Molecules grouped by state
477 as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
478 Quick multi-state system:
481 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
482 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
483 bs = IMP.pmi.macros.BuildSystem(mdl)
484 bs.add_state(reader1)
485 bs.add_state(reader2)
486 bs.execute_macro() # build everything including degrees of freedom
487 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
488 ### now you have a two state system, you add restraints etc
490 \note The "domain name" entry of the topology reader is not used.
491 All molecules are set up by the component name, but split into rigid bodies
496 sequence_connectivity_scale=4.0,
497 force_create_gmm_files=
False,
500 @param mdl An IMP Model
501 @param sequence_connectivity_scale For scaling the connectivity restraint
502 @param force_create_gmm_files If True, will sample and create GMMs
503 no matter what. If False, will only only sample if the
504 files don't exist. If number of Gaussians is zero, won't
506 @param resolutions The resolutions to build for structured regions
511 self._domain_res = []
513 self.force_create_gmm_files = force_create_gmm_files
514 self.resolutions = resolutions
518 keep_chain_id=
False, fasta_name_map=
None):
519 """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
520 When you are done adding states, call execute_macro()
521 @param reader The TopologyReader object
522 @param fasta_name_map dictionary for converting protein names found in the fasta file
524 state = self.system.create_state()
525 self._readers.append(reader)
526 these_domain_res = {}
528 chain_ids = string.ascii_uppercase+string.ascii_lowercase+
'0123456789'
533 for molname
in reader.get_molecules():
534 copies = reader.get_molecules()[molname].domains
535 for nc,copyname
in enumerate(copies):
536 print(
"BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
537 copy = copies[copyname]
539 if keep_chain_id ==
True:
540 chain_id = copy[0].chain
542 chain_id = chain_ids[numchain]
545 fasta_flag=copy[0].fasta_flag
546 if fasta_flag ==
"RNA" or fasta_flag ==
"DNA": is_nucleic=
True
548 print(
"BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
549 orig_mol = state.create_molecule(molname,
555 print(
"BuildSystem.add_state: creating a copy for molecule %s" % molname)
556 mol = orig_mol.create_copy(chain_id)
559 for domainnumber,domain
in enumerate(copy):
560 print(
"BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
563 these_domains[domain.get_unique_name()] = domain
564 if domain.residue_range==[]
or domain.residue_range
is None:
565 domain_res = mol.get_residues()
567 start = domain.residue_range[0]+domain.pdb_offset
568 if domain.residue_range[1]==
'END':
569 end = len(mol.sequence)
571 end = domain.residue_range[1]+domain.pdb_offset
572 domain_res = mol.residue_range(start-1,end-1)
573 print(
"BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
574 if domain.pdb_file==
"BEADS":
575 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
576 mol.add_representation(
578 resolutions=[domain.bead_size],
579 setup_particles_as_densities=(
580 domain.em_residues_per_gaussian!=0),
581 color = domain.color)
582 these_domain_res[domain.get_unique_name()] = (set(),domain_res)
583 elif domain.pdb_file==
"IDEAL_HELIX":
584 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
585 mol.add_representation(
587 resolutions=self.resolutions,
589 density_residues_per_component=domain.em_residues_per_gaussian,
590 density_prefix=domain.density_prefix,
591 density_force_compute=self.force_create_gmm_files,
592 color = domain.color)
593 these_domain_res[domain.get_unique_name()] = (domain_res,set())
595 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
596 domain_atomic = mol.add_structure(domain.pdb_file,
598 domain.residue_range,
601 domain_non_atomic = domain_res - domain_atomic
602 if not domain.em_residues_per_gaussian:
603 mol.add_representation(domain_atomic,
604 resolutions=self.resolutions,
605 color = domain.color)
606 if len(domain_non_atomic)>0:
607 mol.add_representation(domain_non_atomic,
608 resolutions=[domain.bead_size],
609 color = domain.color)
611 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
612 mol.add_representation(
614 resolutions=self.resolutions,
615 density_residues_per_component=domain.em_residues_per_gaussian,
616 density_prefix=domain.density_prefix,
617 density_force_compute=self.force_create_gmm_files,
618 color = domain.color)
619 if len(domain_non_atomic)>0:
620 mol.add_representation(domain_non_atomic,
621 resolutions=[domain.bead_size],
622 setup_particles_as_densities=
True,
623 color = domain.color)
624 these_domain_res[domain.get_unique_name()] = (
625 domain_atomic,domain_non_atomic)
626 self._domain_res.append(these_domain_res)
627 self._domains.append(these_domains)
628 print(
'BuildSystem.add_state: State',len(self.system.states),
'added')
631 """Return list of all molecules grouped by state.
632 For each state, it's a dictionary of Molecules where key is the molecule name
634 return [s.get_molecules()
for s
in self.system.get_states()]
636 def get_molecule(self,molname,copy_index=0,state_index=0):
637 return self.system.get_states()[state_index].
get_molecules()[molname][copy_index]
639 def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
640 """Builds representations and sets up degrees of freedom"""
641 print(
"BuildSystem.execute_macro: building representations")
642 self.root_hier = self.system.build()
644 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
646 for nstate,reader
in enumerate(self._readers):
647 rbs = reader.get_rigid_bodies()
648 srbs = reader.get_super_rigid_bodies()
649 csrbs = reader.get_chains_of_super_rigid_bodies()
652 domains_in_rbs = set()
654 print(
"BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
655 all_res = IMP.pmi.tools.OrderedSet()
656 bead_res = IMP.pmi.tools.OrderedSet()
658 domain = self._domains[nstate][dname]
659 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
660 all_res|=self._domain_res[nstate][dname][0]
661 bead_res|=self._domain_res[nstate][dname][1]
662 domains_in_rbs.add(dname)
664 print(
"BuildSystem.execute_macro: -------- \
665 creating rigid body with max_trans %s \
666 max_rot %s non_rigid_max_trans %s" \
667 % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
668 self.dof.create_rigid_body(all_res,
669 nonrigid_parts=bead_res,
670 max_trans=max_rb_trans,
672 nonrigid_max_trans=max_bead_trans)
675 for dname
in self._domains[nstate]:
676 domain = self._domains[nstate][dname]
677 if domain.pdb_file==
"BEADS" and dname
not in domains_in_rbs:
678 self.dof.create_flexible_beads(
679 self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
683 print(
"BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
684 all_res = IMP.pmi.tools.OrderedSet()
685 for dname
in srblist:
686 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
687 all_res|=self._domain_res[nstate][dname][0]
688 all_res|=self._domain_res[nstate][dname][1]
690 print(
"BuildSystem.execute_macro: -------- \
691 creating super rigid body with max_trans %s max_rot %s " \
692 % (str(max_srb_trans),str(max_srb_rot)))
693 self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
696 for csrblist
in csrbs:
697 all_res = IMP.pmi.tools.OrderedSet()
698 for dname
in csrblist:
699 all_res|=self._domain_res[nstate][dname][0]
700 all_res|=self._domain_res[nstate][dname][1]
701 all_res = list(all_res)
702 all_res.sort(key=
lambda r:r.get_index())
704 self.dof.create_main_chain_mover(all_res)
705 return self.root_hier,self.dof
709 """A macro to build a Representation based on a Topology and lists of movers
710 DEPRECATED - Use BuildSystem instead.
714 component_topologies,
715 list_of_rigid_bodies=[],
716 list_of_super_rigid_bodies=[],
717 chain_of_super_rigid_bodies=[],
718 sequence_connectivity_scale=4.0,
719 add_each_domain_as_rigid_body=
False,
720 force_create_gmm_files=
False):
722 @param model The IMP model
723 @param component_topologies List of
724 IMP.pmi.topology.ComponentTopology items
725 @param list_of_rigid_bodies List of lists of domain names that will
726 be moved as rigid bodies.
727 @param list_of_super_rigid_bodies List of lists of domain names
728 that will move together in an additional Monte Carlo move.
729 @param chain_of_super_rigid_bodies List of lists of domain names
730 (choices can only be from the same molecule). Each of these
731 groups will be moved rigidly. This helps to sample more
732 efficiently complex topologies, made of several rigid bodies,
733 connected by flexible linkers.
734 @param sequence_connectivity_scale For scaling the connectivity
736 @param add_each_domain_as_rigid_body That way you don't have to
737 put all of them in the list
738 @param force_create_gmm_files If True, will sample and create GMMs
739 no matter what. If False, will only only sample if the
740 files don't exist. If number of Gaussians is zero, won't
746 disorderedlength=
False)
748 data=component_topologies
749 if list_of_rigid_bodies==[]:
750 print(
"WARNING: No list of rigid bodies inputted to build_model()")
751 if list_of_super_rigid_bodies==[]:
752 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
753 if chain_of_super_rigid_bodies==[]:
754 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
755 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
756 +chain_of_super_rigid_bodies
for d
in sublist])
757 all_available = set([c._domain_name
for c
in component_topologies])
758 if not all_dnames <= all_available:
759 raise ValueError(
"All requested movers must reference domain "
760 "names in the component topologies")
764 super_rigid_bodies={}
765 chain_super_rigid_bodies={}
769 comp_name = c.molname
770 hier_name = c._domain_name
772 fasta_file = c.fasta_file
773 fasta_id = c.fasta_id
774 pdb_name = c.pdb_file
776 res_range = c.residue_range
777 offset = c.pdb_offset
778 bead_size = c.bead_size
779 em_num_components = c.em_residues_per_gaussian
780 em_txt_file_name = c.gmm_file
781 em_mrc_file_name = c.mrc_file
783 if comp_name
not in self.simo.get_component_names():
784 self.simo.create_component(comp_name,color=0.0)
785 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
788 if em_num_components==0:
792 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
799 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
800 res_range,include_res0,beadsize=bead_size,
801 color=color,offset=offset)
802 if em_num_components!=0:
804 print(
"will read GMM files")
806 print(
"will calculate GMMs")
808 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
809 em_mrc_file_name,em_num_components,read_em_files)
810 self.simo.add_all_atom_densities(comp_name, particles=beads)
815 self.resdensities[hier_name]=dens_hier
816 self.domain_dict[hier_name]=outhier+dens_hier
819 for c
in self.simo.get_component_names():
820 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
821 self.simo.setup_component_geometry(c)
824 for rblist
in list_of_rigid_bodies:
826 for rbmember
in rblist:
827 rb+=[h
for h
in self.domain_dict[rbmember]]
828 self.simo.set_rigid_body_from_hierarchies(rb)
829 for srblist
in list_of_super_rigid_bodies:
831 for srbmember
in rblist:
832 srb+=[h
for h
in self.domain_dict[srbmember]]
833 self.simo.set_super_rigid_body_from_hierarchies(srb)
834 for clist
in chain_of_super_rigid_bodies:
836 for crbmember
in rblist:
837 crb+=[h
for h
in self.domain_dict[crbmember]]
838 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
840 self.simo.set_floppy_bodies()
841 self.simo.setup_bonds()
844 '''Return the Representation object'''
847 def get_density_hierarchies(self,hier_name_list):
851 for hn
in hier_name_list:
853 dens_hier_list+=self.resdensities[hn]
854 return dens_hier_list
856 def set_gmm_models_directory(self,directory_name):
857 self.gmm_models_directory=directory_name
859 def get_pdb_bead_bits(self,hierarchy):
864 if "_pdb" in h.get_name():pdbbits.append(h)
865 if "_bead" in h.get_name():beadbits.append(h)
866 if "_helix" in h.get_name():helixbits.append(h)
867 return (pdbbits,beadbits,helixbits)
869 def scale_bead_radii(self,nresidues,scale):
871 for h
in self.domain_dict:
872 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
873 slope=(1.0-scale)/(1.0-float(nresidues))
878 if b
not in scaled_beads:
884 scale_factor=slope*float(num_residues)+1.0
886 new_radius=scale_factor*radius
889 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
892 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
894 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
897 for pb
in pdbbits+helixbits:
901 number_of_residues=len(set(res_ind))
905 outhier+=simo.add_component_density(compname,
907 num_components=num_components,
909 inputfile=txtfilename)
910 if len(helixbits)!=0:
911 outhier+=simo.add_component_density(compname,
913 num_components=num_components,
915 inputfile=txtfilename)
920 num_components=number_of_residues//abs(num_components)+1
921 outhier+=simo.add_component_density(compname,
923 num_components=num_components,
925 outputfile=txtfilename,
926 outputmap=mrcfilename,
927 multiply_by_total_mass=
True)
929 if len(helixbits)!=0:
930 num_components=number_of_residues//abs(num_components)+1
931 outhier+=simo.add_component_density(compname,
933 num_components=num_components,
935 outputfile=txtfilename,
936 outputmap=mrcfilename,
937 multiply_by_total_mass=
True)
939 return outhier,beadbits
941 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
942 beadsize=5,color=0.0,offset=0):
943 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
945 outhier=simo.autobuild_model(comname,
949 resolutions=[0,1,10],
952 missingbeadsize=beadsize)
954 outhier=simo.autobuild_model(comname,
961 missingbeadsize=beadsize)
964 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
965 outhier=simo.add_component_ideal_helix(comname,
971 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
972 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
976 seq_len=len(simo.sequence_dict[comname])
977 outhier=simo.add_component_necklace(comname,
987 """Deprecated building macro - use BuildSystem()"""
991 @param representation The PMI representation
993 self.simo=representation
994 self.gmm_models_directory=
"."
996 self.rmf_frame_number={}
997 self.rmf_names_map={}
998 self.rmf_component_name={}
1000 def set_gmm_models_directory(self,directory_name):
1001 self.gmm_models_directory=directory_name
1004 sequence_connectivity_resolution=10,
1005 rmf_file=
None,rmf_frame_number=0,rmf_file_map=
None,
1006 skip_connectivity_these_domains=
None,
1007 skip_gaussian_in_rmf=
False, skip_gaussian_in_representation=
False):
1009 @param data_structure List of lists containing these entries:
1010 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
1011 res_range, read_em_files, bead_size, rb, super_rb,
1012 em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
1013 keep_gaussian_flexible_beads (optional)
1014 @param sequence_connectivity_scale
1016 @param rmf_frame_number
1017 @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
1022 self.resdensities={}
1023 super_rigid_bodies={}
1024 chain_super_rigid_bodies={}
1027 for d
in data_structure:
1035 res_range = d[7][0:2]
1040 read_em_files = d[8]
1044 em_num_components = d[12]
1045 em_txt_file_name = d[13]
1046 em_mrc_file_name = d[14]
1047 chain_of_super_rb = d[15]
1049 keep_gaussian_flexible_beads = d[16]
1051 keep_gaussian_flexible_beads =
True
1053 if comp_name
not in self.simo.get_component_names():
1054 self.simo.create_component(comp_name,color=0.0)
1055 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1056 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1059 if not read_em_files
is None:
1060 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
1061 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
1064 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1066 if (keep_gaussian_flexible_beads):
1067 self.simo.add_all_atom_densities(comp_name, particles=beads)
1072 self.resdensities[hier_name]=dens_hier
1073 self.domain_dict[hier_name]=outhier+dens_hier
1076 if rb
not in rigid_bodies:
1077 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
1079 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
1082 if super_rb
is not None:
1084 if k
not in super_rigid_bodies:
1085 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1087 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1089 if chain_of_super_rb
is not None:
1090 for k
in chain_of_super_rb:
1091 if k
not in chain_super_rigid_bodies:
1092 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1094 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1098 self.rigid_bodies=rigid_bodies
1100 for c
in self.simo.get_component_names():
1101 if rmf_file
is not None:
1103 rfn=rmf_frame_number
1104 self.simo.set_coordinates_from_rmf(c, rf,rfn,
1105 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1107 for k
in rmf_file_map:
1109 rf=rmf_file_map[k][0]
1110 rfn=rmf_file_map[k][1]
1111 rcname=rmf_file_map[k][2]
1112 self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1113 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1115 if c
in self.rmf_file:
1117 rfn=self.rmf_frame_number[c]
1118 rfm=self.rmf_names_map[c]
1119 rcname=self.rmf_component_name[c]
1120 self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1121 rmf_component_name=rcname,
1122 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1123 if (
not skip_connectivity_these_domains)
or (c
not in skip_connectivity_these_domains):
1124 self.simo.setup_component_sequence_connectivity(c,
1125 resolution=sequence_connectivity_resolution,
1126 scale=sequence_connectivity_scale)
1127 self.simo.setup_component_geometry(c)
1129 for rb
in rigid_bodies:
1130 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1132 for k
in super_rigid_bodies:
1133 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1135 for k
in chain_super_rigid_bodies:
1136 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1138 self.simo.set_floppy_bodies()
1139 self.simo.setup_bonds()
1141 def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1142 hiers=self.domain_dict[hier_name]
1143 for length
in lengths:
1144 for n
in range(len(hiers)-length):
1145 hs=hiers[n+1:n+length-1]
1146 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1147 for n
in range(1,len(hiers)-1,5):
1149 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1151 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1154 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1155 self.rmf_file[component_name]=rmf_file
1156 self.rmf_frame_number[component_name]=rmf_frame_number
1157 self.rmf_names_map[component_name]=rmf_names_map
1158 self.rmf_component_name[component_name]=rmf_component_name
1160 def get_density_hierarchies(self,hier_name_list):
1164 for hn
in hier_name_list:
1166 dens_hier_list+=self.resdensities[hn]
1167 return dens_hier_list
1169 def get_pdb_bead_bits(self,hierarchy):
1174 if "_pdb" in h.get_name():pdbbits.append(h)
1175 if "_bead" in h.get_name():beadbits.append(h)
1176 if "_helix" in h.get_name():helixbits.append(h)
1177 return (pdbbits,beadbits,helixbits)
1179 def scale_bead_radii(self,nresidues,scale):
1181 for h
in self.domain_dict:
1182 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1183 slope=(1.0-scale)/(1.0-float(nresidues))
1188 if b
not in scaled_beads:
1194 scale_factor=slope*float(num_residues)+1.0
1196 new_radius=scale_factor*radius
1199 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1202 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1204 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1208 for pb
in pdbbits+helixbits:
1212 number_of_residues=len(set(res_ind))
1216 outhier+=simo.add_component_density(compname,
1218 num_components=num_components,
1220 inputfile=txtfilename)
1221 if len(helixbits)!=0:
1222 outhier+=simo.add_component_density(compname,
1224 num_components=num_components,
1226 inputfile=txtfilename)
1231 if num_components<0:
1234 num_components=number_of_residues/abs(num_components)
1235 outhier+=simo.add_component_density(compname,
1237 num_components=num_components,
1239 outputfile=txtfilename,
1240 outputmap=mrcfilename,
1241 multiply_by_total_mass=
True)
1243 if len(helixbits)!=0:
1244 if num_components<0:
1247 num_components=number_of_residues/abs(num_components)
1248 outhier+=simo.add_component_density(compname,
1250 num_components=num_components,
1252 outputfile=txtfilename,
1253 outputmap=mrcfilename,
1254 multiply_by_total_mass=
True)
1256 return outhier,beadbits
1258 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1260 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1261 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1263 outhier=simo.autobuild_model(comname,
1267 resolutions=[0,1,10],
1270 missingbeadsize=beadsize)
1272 outhier=simo.autobuild_model(comname,
1279 missingbeadsize=beadsize)
1281 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1282 outhier=simo.add_component_ideal_helix(comname,
1288 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" and pdbname
is not "DENSITY" :
1291 seq_len=len(simo.sequence_dict[comname])
1292 outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1294 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is "DENSITY" :
1299 seq_len=len(simo.sequence_dict[comname])
1300 outhier=simo.add_component_necklace(comname,
1307 def set_coordinates(self,hier_name,xyz_tuple):
1308 hier=self.domain_dict[hier_name]
1316 def save_rmf(self,rmfname):
1319 o.init_rmf(rmfname,[self.simo.prot])
1320 o.write_rmf(rmfname)
1321 o.close_rmf(rmfname)
1330 missing_bead_size=20,
1331 residue_per_gaussian=
None):
1333 Construct a component for each subunit (no splitting, nothing fancy).
1334 You can pass the resolutions and the bead size for the missing residue regions.
1335 To use this macro, you must provide the following data structure:
1336 Component pdbfile chainid rgb color fastafile sequence id
1338 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1339 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1340 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1341 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1342 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1343 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1344 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1345 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1346 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1347 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1348 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1349 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1359 component_name = d[0]
1363 fasta_file = d[4][0]
1365 fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
1366 fasta_file_id = d[4][1]
1368 r.create_component(component_name,
1371 r.add_component_sequence(component_name,
1373 id=fastids[fasta_file_id])
1375 hierarchies = r.autobuild_model(component_name,
1378 resolutions=resolutions,
1379 missingbeadsize=missing_bead_size)
1381 r.show_component_table(component_name)
1383 r.set_rigid_bodies([component_name])
1385 r.set_chain_of_super_rigid_bodies(
1390 r.setup_component_sequence_connectivity(component_name, resolution=1)
1391 r.setup_component_geometry(component_name)
1395 r.set_floppy_bodies()
1398 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
1405 """A macro for running all the basic operations of analysis.
1406 Includes clustering, precision analysis, and making ensemble density maps.
1407 A number of plots are also supported.
1410 merge_directories=[
"./"],
1411 stat_file_name_suffix=
"stat",
1412 best_pdb_name_suffix=
"model",
1413 do_clean_first=
True,
1414 do_create_directories=
True,
1415 global_output_directory=
"output/",
1416 replica_stat_file_suffix=
"stat_replica",
1417 global_analysis_result_directory=
"./analysis/",
1420 @param model The IMP model
1421 @param stat_file_name_suffix
1422 @param merge_directories The directories containing output files
1423 @param best_pdb_name_suffix
1424 @param do_clean_first
1425 @param do_create_directories
1426 @param global_output_directory Where everything is
1427 @param replica_stat_file_suffix
1428 @param global_analysis_result_directory
1429 @param test_mode If True, nothing is changed on disk
1433 from mpi4py
import MPI
1434 self.comm = MPI.COMM_WORLD
1435 self.rank = self.comm.Get_rank()
1436 self.number_of_processes = self.comm.size
1439 self.number_of_processes = 1
1441 self.test_mode = test_mode
1442 self._protocol_output = []
1443 self.cluster_obj =
None
1445 stat_dir = global_output_directory
1446 self.stat_files = []
1448 for rd
in merge_directories:
1449 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
1450 if len(stat_files)==0:
1451 print(
"WARNING: no stat files found in",os.path.join(rd,stat_dir))
1452 self.stat_files += stat_files
1455 """Capture details of the modeling protocol.
1456 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1459 self._protocol_output.append((p, p._last_state))
1462 score_key=
"SimplifiedModel_Total_Score_None",
1463 rmf_file_key=
"rmf_file",
1464 rmf_file_frame_key=
"rmf_frame_index",
1467 nframes_trajectory=10000):
1468 """ Get a trajectory of the modeling run, for generating demonstrative movies
1469 @param score_key The score for ranking models
1470 @param rmf_file_key Key pointing to RMF filename
1471 @param rmf_file_frame_key Key pointing to RMF frame number
1472 @param outputdir The local output directory used in the run
1473 @param get_every Extract every nth frame
1474 @param nframes_trajectory Total number of frames of the trajectory
1476 from operator
import itemgetter
1484 rmf_file_list=trajectory_models[0]
1485 rmf_file_frame_list=trajectory_models[1]
1486 score_list=list(map(float, trajectory_models[2]))
1488 max_score=max(score_list)
1489 min_score=min(score_list)
1492 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1493 binned_scores=[
None]*nframes_trajectory
1494 binned_model_indexes=[-1]*nframes_trajectory
1496 for model_index,s
in enumerate(score_list):
1497 bins_score_diffs=[abs(s-b)
for b
in bins]
1498 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1499 if binned_scores[bin_index]==
None:
1500 binned_scores[bin_index]=s
1501 binned_model_indexes[bin_index]=model_index
1503 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1504 new_diff=abs(s-bins[bin_index])
1505 if new_diff < old_diff:
1506 binned_scores[bin_index]=s
1507 binned_model_indexes[bin_index]=model_index
1509 print(binned_scores)
1510 print(binned_model_indexes)
1513 def _expand_ambiguity(self,prot,d):
1514 """If using PMI2, expand the dictionary to include copies as ambiguous options
1515 This also keeps the states separate.
1520 if '..' in key
or (type(val)
is tuple
and len(val)>=3):
1523 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1524 if type(val)
is tuple:
1532 for nst
in range(len(states)):
1534 copies = sel.get_selected_particles(with_representation=
False)
1536 for nc
in range(len(copies)):
1538 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1540 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1547 score_key=
"SimplifiedModel_Total_Score_None",
1548 rmf_file_key=
"rmf_file",
1549 rmf_file_frame_key=
"rmf_frame_index",
1551 prefiltervalue=
None,
1554 alignment_components=
None,
1555 number_of_best_scoring_models=10,
1556 rmsd_calculation_components=
None,
1557 distance_matrix_file=
'distances.mat',
1558 load_distance_matrix_file=
False,
1559 skip_clustering=
False,
1560 number_of_clusters=1,
1562 exit_after_display=
True,
1564 first_and_last_frames=
None,
1565 density_custom_ranges=
None,
1566 write_pdb_with_centered_coordinates=
False,
1568 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1569 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1570 Can pass None for copy or state to ignore that field.
1571 If you don't pass a specific copy number
1572 @param score_key The score for ranking models. Use "Total_Score"
1573 instead of default for PMI2.
1574 @param rmf_file_key Key pointing to RMF filename
1575 @param rmf_file_frame_key Key pointing to RMF frame number
1576 @param state_number State number to analyze
1577 @param prefiltervalue Only include frames where the
1578 score key is below this value
1579 @param feature_keys Keywords for which you want to
1580 calculate average, medians, etc.
1581 If you pass "Keyname" it'll include everything that matches "*Keyname*"
1582 @param outputdir The local output directory used in the run
1583 @param alignment_components Dictionary with keys=groupname, values are tuples
1584 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1585 @param number_of_best_scoring_models Num models to keep per run
1586 @param rmsd_calculation_components For calculating RMSD
1587 (same format as alignment_components)
1588 @param distance_matrix_file Where to store/read the distance matrix
1589 @param load_distance_matrix_file Try to load the distance matrix file
1590 @param skip_clustering Just extract the best scoring models
1592 @param number_of_clusters Number of k-means clusters
1593 @param display_plot Display the distance matrix
1594 @param exit_after_display Exit after displaying distance matrix
1595 @param get_every Extract every nth frame
1596 @param first_and_last_frames A tuple with the first and last frames to be
1597 analyzed. Values are percentages!
1598 Default: get all frames
1599 @param density_custom_ranges For density calculation
1600 (same format as alignment_components)
1601 @param write_pdb_with_centered_coordinates
1602 @param voxel_size Used for the density output
1604 self._outputdir = os.path.abspath(outputdir)
1605 self._number_of_clusters = number_of_clusters
1606 for p, state
in self._protocol_output:
1607 p.add_replica_exchange_analysis(state, self)
1618 if not load_distance_matrix_file:
1619 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1620 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1621 self.stat_files,self.number_of_processes)[self.rank]
1625 orig_score_key = score_key
1626 if score_key
not in po.get_keys():
1627 if 'Total_Score' in po.get_keys():
1628 score_key =
'Total_Score'
1629 print(
"WARNING: Using 'Total_Score' instead of "
1630 "'SimplifiedModel_Total_Score_None' for the score key")
1631 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1632 if k
in feature_keys:
1633 print(
"WARNING: no need to pass " +k+
" to feature_keys.")
1634 feature_keys.remove(k)
1643 rmf_file_list=best_models[0]
1644 rmf_file_frame_list=best_models[1]
1645 score_list=best_models[2]
1646 feature_keyword_list_dict=best_models[3]
1652 if self.number_of_processes > 1:
1656 rmf_file_frame_list)
1657 for k
in feature_keyword_list_dict:
1659 feature_keyword_list_dict[k])
1662 score_rmf_tuples = list(zip(score_list,
1664 rmf_file_frame_list,
1665 list(range(len(score_list)))))
1667 if density_custom_ranges:
1668 for k
in density_custom_ranges:
1669 if type(density_custom_ranges[k])
is not list:
1670 raise Exception(
"Density custom ranges: values must be lists of tuples")
1673 if first_and_last_frames
is not None:
1674 nframes = len(score_rmf_tuples)
1675 first_frame = int(first_and_last_frames[0] * nframes)
1676 last_frame = int(first_and_last_frames[1] * nframes)
1677 if last_frame > len(score_rmf_tuples):
1679 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1682 best_score_rmf_tuples = sorted(score_rmf_tuples,
1683 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1684 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1686 best_score_feature_keyword_list_dict = defaultdict(list)
1687 for tpl
in best_score_rmf_tuples:
1689 for f
in feature_keyword_list_dict:
1690 best_score_feature_keyword_list_dict[f].append(
1691 feature_keyword_list_dict[f][index])
1692 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1693 best_score_rmf_tuples,
1694 self.number_of_processes)[self.rank]
1697 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1699 my_best_score_rmf_tuples[0][1])[0]
1701 if rmsd_calculation_components
is not None:
1702 tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1703 if tmp!=rmsd_calculation_components:
1704 print(
'Detected ambiguity, expand rmsd components to',tmp)
1705 rmsd_calculation_components = tmp
1706 if alignment_components
is not None:
1707 tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1708 if tmp!=alignment_components:
1709 print(
'Detected ambiguity, expand alignment components to',tmp)
1710 alignment_components = tmp
1716 my_best_score_rmf_tuples[0],
1717 rmsd_calculation_components,
1718 state_number=state_number)
1720 my_best_score_rmf_tuples,
1721 alignment_components,
1722 rmsd_calculation_components,
1723 state_number=state_number)
1728 all_coordinates=got_coords[0]
1729 alignment_coordinates=got_coords[1]
1730 rmsd_coordinates=got_coords[2]
1731 rmf_file_name_index_dict=got_coords[3]
1732 all_rmf_file_names=got_coords[4]
1738 if density_custom_ranges:
1740 density_custom_ranges,
1743 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1749 os.mkdir(dircluster)
1752 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1753 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1755 rmf_frame_number=tpl[2]
1758 for key
in best_score_feature_keyword_list_dict:
1759 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1762 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1767 linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1773 if not linking_successful:
1780 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1781 prot = states[state_number]
1783 prot = prots[state_number]
1788 coords_f1=alignment_coordinates[cnt]
1790 coords_f2=alignment_coordinates[cnt]
1793 transformation = Ali.align()[1]
1814 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1815 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1816 o.init_pdb(out_pdb_fn,prot)
1817 o.write_pdb(out_pdb_fn,
1818 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1820 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1821 tmp_dict[
"rmf_file_full_path"]=rmf_name
1822 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1823 tmp_dict[
"local_rmf_frame_number"]=0
1825 clusstat.write(str(tmp_dict)+
"\n")
1830 h.set_name(
"System")
1832 o.init_rmf(out_rmf_fn, [h], rs)
1834 o.init_rmf(out_rmf_fn, [prot],rs)
1836 o.write_rmf(out_rmf_fn)
1837 o.close_rmf(out_rmf_fn)
1839 if density_custom_ranges:
1840 DensModule.add_subunits_density(prot)
1842 if density_custom_ranges:
1843 DensModule.write_mrc(path=dircluster)
1850 if self.number_of_processes > 1:
1856 rmf_file_name_index_dict)
1858 alignment_coordinates)
1865 [best_score_feature_keyword_list_dict,
1866 rmf_file_name_index_dict],
1872 print(
"setup clustering class")
1875 for n, model_coordinate_dict
in enumerate(all_coordinates):
1876 template_coordinate_dict = {}
1878 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1880 self.cluster_obj.set_template(alignment_coordinates[n])
1881 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1882 print(
"Global calculating the distance matrix")
1885 self.cluster_obj.dist_matrix()
1889 self.cluster_obj.do_cluster(number_of_clusters)
1892 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1893 if exit_after_display:
1895 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1902 print(
"setup clustering class")
1904 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1905 print(
"clustering with %s clusters" % str(number_of_clusters))
1906 self.cluster_obj.do_cluster(number_of_clusters)
1907 [best_score_feature_keyword_list_dict,
1908 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1911 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1912 if exit_after_display:
1914 if self.number_of_processes > 1:
1922 print(self.cluster_obj.get_cluster_labels())
1923 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1924 print(
"rank %s " % str(self.rank))
1925 print(
"cluster %s " % str(n))
1926 print(
"cluster label %s " % str(cl))
1927 print(self.cluster_obj.get_cluster_label_names(cl))
1930 if density_custom_ranges:
1932 density_custom_ranges,
1935 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1937 os.mkdir(dircluster)
1941 rmsd_dict = {
"AVERAGE_RMSD":
1942 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1943 clusstat = open(dircluster +
"stat.out",
"w")
1944 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1947 tmp_dict.update(rmsd_dict)
1948 index = rmf_file_name_index_dict[structure_name]
1949 for key
in best_score_feature_keyword_list_dict:
1951 key] = best_score_feature_keyword_list_dict[
1957 rmf_name = structure_name.split(
"|")[0]
1958 rmf_frame_number = int(structure_name.split(
"|")[1])
1959 clusstat.write(str(tmp_dict) +
"\n")
1963 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1968 linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1974 if not linking_successful:
1980 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1981 prot = states[state_number]
1983 prot = prots[state_number]
1987 model_index = self.cluster_obj.get_model_index_from_name(
1989 transformation = self.cluster_obj.get_transformation_to_first_member(
2009 if density_custom_ranges:
2010 DensModule.add_subunits_density(prot)
2014 o.init_pdb(dircluster + str(k) +
".pdb", prot)
2015 o.write_pdb(dircluster + str(k) +
".pdb")
2020 h.set_name(
"System")
2022 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
2024 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
2025 o.write_rmf(dircluster + str(k) +
".rmf3")
2026 o.close_rmf(dircluster + str(k) +
".rmf3")
2031 if density_custom_ranges:
2032 DensModule.write_mrc(path=dircluster)
2035 if self.number_of_processes>1:
2038 def get_cluster_rmsd(self,cluster_num):
2039 if self.cluster_obj
is None:
2041 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2043 def save_objects(self, objects, file_name):
2045 outf = open(file_name,
'wb')
2046 pickle.dump(objects, outf)
2049 def load_objects(self, file_name):
2051 inputf = open(file_name,
'rb')
2052 objects = pickle.load(inputf)
A class to simplify create of constraints and movers for an IMP Hierarchy.
A macro for running all the basic operations of analysis.
Sample using molecular dynamics.
A class for reading stat files.
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def get_molecules
Return list of all molecules grouped by state.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
This class initializes the root node of the global IMP.atom.Hierarchy.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Representation of the system.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
A decorator for a particle with x,y,z coordinates.
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Class for easy writing of PDBs, RMFs, and stat files.
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and setup constraints for PMI objects.
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Deprecated building macro - use BuildSystem()
The general base class for IMP exceptions.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
A dictionary-like wrapper for reading and storing sequence data.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
A decorator for a particle with x,y,z coordinates and a radius.