1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
19 from operator
import itemgetter
20 from collections
import defaultdict
25 """A macro to help setup and run replica exchange.
26 Supports Monte Carlo and molecular dynamics.
27 Produces trajectory RMF files, best PDB structures,
28 and output stat files.
34 monte_carlo_sample_objects=
None,
35 molecular_dynamics_sample_objects=
None,
37 crosslink_restraints=
None,
38 monte_carlo_temperature=1.0,
39 simulated_annealing=
False,
40 simulated_annealing_minimum_temperature=1.0,
41 simulated_annealing_maximum_temperature=2.5,
42 simulated_annealing_minimum_temperature_nframes=100,
43 simulated_annealing_maximum_temperature_nframes=100,
44 replica_exchange_minimum_temperature=1.0,
45 replica_exchange_maximum_temperature=2.5,
47 number_of_best_scoring_models=500,
49 molecular_dynamics_steps=10,
50 molecular_dynamics_max_time_step=1.0,
51 number_of_frames=1000,
52 nframes_write_coordinates=1,
53 write_initial_rmf=
True,
54 initial_rmf_name_suffix=
"initial",
55 stat_file_name_suffix=
"stat",
56 best_pdb_name_suffix=
"model",
58 do_create_directories=
True,
59 global_output_directory=
"./",
62 replica_stat_file_suffix=
"stat_replica",
63 em_object_for_rmf=
None,
65 replica_exchange_object=
None,
68 @param model The IMP model
69 @param representation PMI.representation.Representation object
70 (or list of them, for multi-state modeling)
71 @param root_hier Instead of passing Representation, pass the System hierarchy
72 @param monte_carlo_sample_objects Objects for MC sampling; a list of
73 structural components (generally the representation) that will
74 be moved and restraints with parameters that need to
76 For PMI2: just pass flat list of movers
77 @param molecular_dynamics_sample_objects Objects for MD sampling
78 For PMI2: just pass flat list of particles
79 @param output_objects A list of structural objects and restraints
80 that will be included in output (statistics files). Any object
81 that provides a get_output() method can be used here.
82 @param crosslink_restraints List of cross-link restraints that will
83 be included in output RMF files (for visualization).
84 @param monte_carlo_temperature MC temp (may need to be optimized
85 based on post-sampling analysis)
86 @param simulated_annealing If True, perform simulated annealing
87 @param simulated_annealing_minimum_temperature Should generally be
88 the same as monte_carlo_temperature.
89 @param simulated_annealing_minimum_temperature_nframes Number of
90 frames to compute at minimum temperature.
91 @param simulated_annealing_maximum_temperature_nframes Number of
93 temps > simulated_annealing_maximum_temperature.
94 @param replica_exchange_minimum_temperature Low temp for REX; should
95 generally be the same as monte_carlo_temperature.
96 @param replica_exchange_maximum_temperature High temp for REX
97 @param num_sample_rounds Number of rounds of MC/MD per cycle
98 @param number_of_best_scoring_models Number of top-scoring PDB models
99 to keep around for analysis
100 @param monte_carlo_steps Number of MC steps per round
101 @param molecular_dynamics_steps Number of MD steps per round
102 @param molecular_dynamics_max_time_step Max time step for MD
103 @param number_of_frames Number of REX frames to run
104 @param nframes_write_coordinates How often to write the coordinates
106 @param write_initial_rmf Write the initial configuration
107 @param global_output_directory Folder that will be created to house
109 @param test_mode Set to True to avoid writing any files, just test one frame.
116 self.output_objects = output_objects
117 self.representation = representation
119 if type(representation) == list:
120 self.is_multi_state =
True
121 self.root_hiers = [r.prot
for r
in representation]
122 self.vars[
"number_of_states"] = len(representation)
124 self.is_multi_state =
False
125 self.root_hier = representation.prot
126 self.vars[
"number_of_states"] = 1
127 elif root_hier
and type(root_hier) ==
IMP.atom.Hierarchy and root_hier.get_name()==
'System':
130 self.root_hier = root_hier
131 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
132 self.vars[
"number_of_states"] = len(states)
134 self.root_hiers = states
135 self.is_multi_state =
True
137 self.root_hier = root_hier
138 self.is_multi_state =
False
140 raise Exception(
"Must provide representation or System hierarchy (root_hier)")
142 self.crosslink_restraints = crosslink_restraints
143 self.em_object_for_rmf = em_object_for_rmf
144 self.monte_carlo_sample_objects = monte_carlo_sample_objects
145 if sample_objects
is not None:
146 self.monte_carlo_sample_objects+=sample_objects
147 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
148 self.replica_exchange_object = replica_exchange_object
149 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
150 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
152 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
154 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
156 self.vars[
"simulated_annealing"]=\
158 self.vars[
"simulated_annealing_minimum_temperature"]=\
159 simulated_annealing_minimum_temperature
160 self.vars[
"simulated_annealing_maximum_temperature"]=\
161 simulated_annealing_maximum_temperature
162 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
163 simulated_annealing_minimum_temperature_nframes
164 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
165 simulated_annealing_maximum_temperature_nframes
167 self.vars[
"num_sample_rounds"] = num_sample_rounds
169 "number_of_best_scoring_models"] = number_of_best_scoring_models
170 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
171 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
172 self.vars[
"number_of_frames"] = number_of_frames
173 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
174 self.vars[
"write_initial_rmf"] = write_initial_rmf
175 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
176 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
177 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
178 self.vars[
"do_clean_first"] = do_clean_first
179 self.vars[
"do_create_directories"] = do_create_directories
180 self.vars[
"global_output_directory"] = global_output_directory
181 self.vars[
"rmf_dir"] = rmf_dir
182 self.vars[
"best_pdb_dir"] = best_pdb_dir
183 self.vars[
"atomistic"] = atomistic
184 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
185 self.vars[
"geometries"] =
None
186 self.test_mode = test_mode
189 if self.vars[
"geometries"]
is None:
190 self.vars[
"geometries"] = list(geometries)
192 self.vars[
"geometries"].extend(geometries)
195 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
196 print(
"--- it stores the best scoring pdb models in pdbs/")
197 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
198 print(
"--- variables:")
199 keys = list(self.vars.keys())
202 print(
"------", v.ljust(30), self.vars[v])
204 def get_replica_exchange_object(self):
205 return self.replica_exchange_object
207 def execute_macro(self):
208 temp_index_factor = 100000.0
212 if self.monte_carlo_sample_objects
is not None:
213 print(
"Setting up MonteCarlo")
215 self.monte_carlo_sample_objects,
216 self.vars[
"monte_carlo_temperature"])
217 if self.vars[
"simulated_annealing"]:
218 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
219 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
220 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
221 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
222 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
223 self.output_objects.append(sampler_mc)
224 samplers.append(sampler_mc)
227 if self.molecular_dynamics_sample_objects
is not None:
228 print(
"Setting up MolecularDynamics")
230 self.molecular_dynamics_sample_objects,
231 self.vars[
"monte_carlo_temperature"],
232 maximum_time_step=self.molecular_dynamics_max_time_step)
233 if self.vars[
"simulated_annealing"]:
234 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
235 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
236 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
237 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
238 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
239 self.output_objects.append(sampler_md)
240 samplers.append(sampler_md)
243 print(
"Setting up ReplicaExchange")
246 "replica_exchange_minimum_temperature"],
248 "replica_exchange_maximum_temperature"],
250 replica_exchange_object=self.replica_exchange_object)
251 self.replica_exchange_object = rex.rem
253 myindex = rex.get_my_index()
254 self.output_objects.append(rex)
259 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
263 globaldir = self.vars[
"global_output_directory"] +
"/"
264 rmf_dir = globaldir + self.vars[
"rmf_dir"]
265 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
267 if not self.test_mode:
268 if self.vars[
"do_clean_first"]:
271 if self.vars[
"do_create_directories"]:
274 os.makedirs(globaldir)
282 if not self.is_multi_state:
288 for n
in range(self.vars[
"number_of_states"]):
290 os.makedirs(pdb_dir +
"/" + str(n))
296 sw = IMP.pmi.tools.Stopwatch()
297 self.output_objects.append(sw)
299 print(
"Setting up stat file")
301 low_temp_stat_file = globaldir + \
302 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
303 if not self.test_mode:
304 output.init_stat2(low_temp_stat_file,
306 extralabels=[
"rmf_file",
"rmf_frame_index"])
308 print(
"Setting up replica stat file")
309 replica_stat_file = globaldir + \
310 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
311 if not self.test_mode:
312 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
314 if not self.test_mode:
315 print(
"Setting up best pdb files")
316 if not self.is_multi_state:
317 if self.vars[
"number_of_best_scoring_models"] > 0:
318 output.init_pdb_best_scoring(pdb_dir +
"/" +
319 self.vars[
"best_pdb_name_suffix"],
322 "number_of_best_scoring_models"],
323 replica_exchange=
True)
324 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
325 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
327 if self.vars[
"number_of_best_scoring_models"] > 0:
328 for n
in range(self.vars[
"number_of_states"]):
329 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
330 self.vars[
"best_pdb_name_suffix"],
333 "number_of_best_scoring_models"],
334 replica_exchange=
True)
335 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
336 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
339 if self.em_object_for_rmf
is not None:
340 if not self.is_multi_state
or self.pmi2:
341 output_hierarchies = [
343 self.em_object_for_rmf.get_density_as_hierarchy(
346 output_hierarchies = self.root_hiers
347 output_hierarchies.append(
348 self.em_object_for_rmf.get_density_as_hierarchy())
350 if not self.is_multi_state
or self.pmi2:
351 output_hierarchies = [self.root_hier]
353 output_hierarchies = self.root_hiers
356 if not self.test_mode:
357 print(
"Setting up and writing initial rmf coordinate file")
358 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
359 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
361 if self.crosslink_restraints:
362 output.add_restraints_to_rmf(
363 init_suffix +
"." + str(myindex) +
".rmf3",
364 self.crosslink_restraints)
365 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
366 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
370 if not self.test_mode:
371 print(
"Setting up production rmf files")
372 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
373 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"])
375 if self.crosslink_restraints:
376 output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
378 ntimes_at_low_temp = 0
382 self.replica_exchange_object.set_was_used(
True)
383 nframes = self.vars[
"number_of_frames"]
386 for i
in range(nframes):
390 for nr
in range(self.vars[
"num_sample_rounds"]):
391 if sampler_md
is not None:
393 self.vars[
"molecular_dynamics_steps"])
394 if sampler_mc
is not None:
395 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
397 self.model).evaluate(
False)
398 output.set_output_entry(
"score", score)
400 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
402 if min_temp_index == my_temp_index:
403 print(
"--- frame %s score %s " % (str(i), str(score)))
405 if not self.test_mode:
406 if i % self.vars[
"nframes_write_coordinates"]==0:
407 print(
'--- writing coordinates')
408 if self.vars[
"number_of_best_scoring_models"] > 0:
409 output.write_pdb_best_scoring(score)
410 output.write_rmf(rmfname)
411 output.set_output_entry(
"rmf_file", rmfname)
412 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
414 output.set_output_entry(
"rmf_file", rmfname)
415 output.set_output_entry(
"rmf_frame_index",
'-1')
416 output.write_stat2(low_temp_stat_file)
417 ntimes_at_low_temp += 1
419 if not self.test_mode:
420 output.write_stat2(replica_stat_file)
421 rex.swap_temp(i, score)
422 if self.representation:
423 for p
in self.representation._protocol_output:
424 p.add_replica_exchange(self)
426 if not self.test_mode:
427 print(
"closing production rmf files")
428 output.close_rmf(rmfname)
434 """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
435 Easily create multi-state systems by calling this macro
436 repeatedly with different TopologyReader objects!
437 A useful function is get_molecules() which returns the PMI Molecules grouped by state
438 as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
439 Quick multi-state system:
442 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
443 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
444 bs = IMP.pmi.macros.BuildSystem(mdl)
445 bs.add_state(reader1)
446 bs.add_state(reader2)
447 bs.execute_macro() # build everything including degrees of freedom
448 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
449 ### now you have a two state system, you add restraints etc
451 \note The "domain name" entry of the topology reader is not used.
452 All molecules are set up by the component name, but split into rigid bodies
457 sequence_connectivity_scale=4.0,
458 force_create_gmm_files=
False,
461 @param mdl An IMP Model
462 @param sequence_connectivity_scale For scaling the connectivity restraint
463 @param force_create_gmm_files If True, will sample and create GMMs
464 no matter what. If False, will only only sample if the
465 files don't exist. If number of Gaussians is zero, won't
467 @param resolutions The resolutions to build for structured regions
472 self._domain_res = []
474 self.force_create_gmm_files = force_create_gmm_files
475 self.resolutions = resolutions
479 keep_chain_id=
False):
480 """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
481 When you are done adding states, call execute_macro()
482 @param reader The TopologyReader object
484 state = self.system.create_state()
485 self._readers.append(reader)
486 these_domain_res = {}
488 chain_ids = string.ascii_uppercase+string.ascii_lowercase+
'0123456789'
493 for molname
in reader.get_molecules():
494 copies = reader.get_molecules()[molname].domains
495 for nc,copyname
in enumerate(copies):
496 print(
"BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
497 copy = copies[copyname]
499 if keep_chain_id ==
True:
500 chain_id = copy[0].chain
502 chain_id = chain_ids[numchain]
505 fasta_flag=copy[0].fasta_flag
506 if fasta_flag ==
"RNA" or fasta_flag ==
"DNA": is_nucleic=
True
509 print(
"BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
510 orig_mol = state.create_molecule(molname,
516 print(
"BuildSystem.add_state: creating a copy for molecule %s" % molname)
517 mol = orig_mol.create_copy(chain_id)
520 for domainnumber,domain
in enumerate(copy):
521 print(
"BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
524 these_domains[domain.get_unique_name()] = domain
525 if domain.residue_range==[]
or domain.residue_range
is None:
526 domain_res = mol.get_residues()
528 start = domain.residue_range[0]+domain.pdb_offset
529 if domain.residue_range[1]==
'END':
530 end = len(mol.sequence)
532 end = domain.residue_range[1]+domain.pdb_offset
533 domain_res = mol.residue_range(start-1,end-1)
534 print(
"BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
535 if domain.pdb_file==
"BEADS":
536 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
537 mol.add_representation(
539 resolutions=[domain.bead_size],
540 setup_particles_as_densities=(
541 domain.em_residues_per_gaussian!=0),
542 color = domain.color)
543 these_domain_res[domain.get_unique_name()] = (set(),domain_res)
544 elif domain.pdb_file==
"IDEAL_HELIX":
545 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
546 mol.add_representation(
548 resolutions=self.resolutions,
550 density_residues_per_component=domain.em_residues_per_gaussian,
551 density_prefix=domain.density_prefix,
552 density_force_compute=self.force_create_gmm_files,
553 color = domain.color)
554 these_domain_res[domain.get_unique_name()] = (domain_res,set())
556 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
557 domain_atomic = mol.add_structure(domain.pdb_file,
559 domain.residue_range,
562 domain_non_atomic = domain_res - domain_atomic
563 if not domain.em_residues_per_gaussian:
564 mol.add_representation(domain_atomic,
565 resolutions=self.resolutions,
566 color = domain.color)
567 if len(domain_non_atomic)>0:
568 mol.add_representation(domain_non_atomic,
569 resolutions=[domain.bead_size],
570 color = domain.color)
572 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
573 mol.add_representation(
575 resolutions=self.resolutions,
576 density_residues_per_component=domain.em_residues_per_gaussian,
577 density_prefix=domain.density_prefix,
578 density_force_compute=self.force_create_gmm_files,
579 color = domain.color)
580 if len(domain_non_atomic)>0:
581 mol.add_representation(domain_non_atomic,
582 resolutions=[domain.bead_size],
583 setup_particles_as_densities=
True,
584 color = domain.color)
585 these_domain_res[domain.get_unique_name()] = (
586 domain_atomic,domain_non_atomic)
587 self._domain_res.append(these_domain_res)
588 self._domains.append(these_domains)
589 print(
'BuildSystem.add_state: State',len(self.system.states),
'added')
592 """Return list of all molecules grouped by state.
593 For each state, it's a dictionary of Molecules where key is the molecule name
595 return [s.get_molecules()
for s
in self.system.get_states()]
597 def get_molecule(self,molname,copy_index=0,state_index=0):
598 return self.system.get_states()[state_index].
get_molecules()[molname][copy_index]
600 def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
601 """Builds representations and sets up degrees of freedom"""
602 print(
"BuildSystem.execute_macro: building representations")
603 self.root_hier = self.system.build()
605 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
607 for nstate,reader
in enumerate(self._readers):
608 rbs = reader.get_rigid_bodies()
609 srbs = reader.get_super_rigid_bodies()
610 csrbs = reader.get_chains_of_super_rigid_bodies()
613 domains_in_rbs = set()
615 print(
"BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
616 all_res = IMP.pmi.tools.OrderedSet()
617 bead_res = IMP.pmi.tools.OrderedSet()
619 domain = self._domains[nstate][dname]
620 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
621 all_res|=self._domain_res[nstate][dname][0]
622 bead_res|=self._domain_res[nstate][dname][1]
623 domains_in_rbs.add(dname)
625 print(
"BuildSystem.execute_macro: -------- \
626 creating rigid body with max_trans %s \
627 max_rot %s non_rigid_max_trans %s" \
628 % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
629 self.dof.create_rigid_body(all_res,
630 nonrigid_parts=bead_res,
631 max_trans=max_rb_trans,
633 nonrigid_max_trans=max_bead_trans)
636 for dname
in self._domains[nstate]:
637 domain = self._domains[nstate][dname]
638 if domain.pdb_file==
"BEADS" and dname
not in domains_in_rbs:
639 self.dof.create_flexible_beads(
640 self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
644 print(
"BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
645 all_res = IMP.pmi.tools.OrderedSet()
646 for dname
in srblist:
647 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
648 all_res|=self._domain_res[nstate][dname][0]
649 all_res|=self._domain_res[nstate][dname][1]
651 print(
"BuildSystem.execute_macro: -------- \
652 creating super rigid body with max_trans %s max_rot %s " \
653 % (str(max_srb_trans),str(max_srb_rot)))
654 self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
657 for csrblist
in csrbs:
658 all_res = IMP.pmi.tools.OrderedSet()
659 for dname
in csrblist:
660 all_res|=self._domain_res[nstate][dname][0]
661 all_res|=self._domain_res[nstate][dname][1]
662 all_res = list(all_res)
663 all_res.sort(key=
lambda r:r.get_index())
665 self.dof.create_main_chain_mover(all_res)
666 return self.root_hier,self.dof
670 """A macro to build a Representation based on a Topology and lists of movers
671 DEPRECATED - Use BuildSystem instead.
675 component_topologies,
676 list_of_rigid_bodies=[],
677 list_of_super_rigid_bodies=[],
678 chain_of_super_rigid_bodies=[],
679 sequence_connectivity_scale=4.0,
680 add_each_domain_as_rigid_body=
False,
681 force_create_gmm_files=
False):
683 @param model The IMP model
684 @param component_topologies List of
685 IMP.pmi.topology.ComponentTopology items
686 @param list_of_rigid_bodies List of lists of domain names that will
687 be moved as rigid bodies.
688 @param list_of_super_rigid_bodies List of lists of domain names
689 that will move together in an additional Monte Carlo move.
690 @param chain_of_super_rigid_bodies List of lists of domain names
691 (choices can only be from the same molecule). Each of these
692 groups will be moved rigidly. This helps to sample more
693 efficiently complex topologies, made of several rigid bodies,
694 connected by flexible linkers.
695 @param sequence_connectivity_scale For scaling the connectivity
697 @param add_each_domain_as_rigid_body That way you don't have to
698 put all of them in the list
699 @param force_create_gmm_files If True, will sample and create GMMs
700 no matter what. If False, will only only sample if the
701 files don't exist. If number of Gaussians is zero, won't
707 disorderedlength=
False)
709 data=component_topologies
710 if list_of_rigid_bodies==[]:
711 print(
"WARNING: No list of rigid bodies inputted to build_model()")
712 if list_of_super_rigid_bodies==[]:
713 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
714 if chain_of_super_rigid_bodies==[]:
715 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
716 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
717 +chain_of_super_rigid_bodies
for d
in sublist])
718 all_available = set([c._domain_name
for c
in component_topologies])
719 if not all_dnames <= all_available:
720 raise ValueError(
"All requested movers must reference domain "
721 "names in the component topologies")
725 super_rigid_bodies={}
726 chain_super_rigid_bodies={}
730 comp_name = c.molname
731 hier_name = c._domain_name
733 fasta_file = c.fasta_file
734 fasta_id = c.fasta_id
735 pdb_name = c.pdb_file
737 res_range = c.residue_range
738 offset = c.pdb_offset
739 bead_size = c.bead_size
740 em_num_components = c.em_residues_per_gaussian
741 em_txt_file_name = c.gmm_file
742 em_mrc_file_name = c.mrc_file
744 if comp_name
not in self.simo.get_component_names():
745 self.simo.create_component(comp_name,color=0.0)
746 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
749 if em_num_components==0:
753 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
760 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
761 res_range,include_res0,beadsize=bead_size,
762 color=color,offset=offset)
763 if em_num_components!=0:
765 print(
"will read GMM files")
767 print(
"will calculate GMMs")
769 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
770 em_mrc_file_name,em_num_components,read_em_files)
771 self.simo.add_all_atom_densities(comp_name, particles=beads)
776 self.resdensities[hier_name]=dens_hier
777 self.domain_dict[hier_name]=outhier+dens_hier
780 for c
in self.simo.get_component_names():
781 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
782 self.simo.setup_component_geometry(c)
785 for rblist
in list_of_rigid_bodies:
787 for rbmember
in rblist:
788 rb+=[h
for h
in self.domain_dict[rbmember]]
789 self.simo.set_rigid_body_from_hierarchies(rb)
790 for srblist
in list_of_super_rigid_bodies:
792 for srbmember
in rblist:
793 srb+=[h
for h
in self.domain_dict[srbmember]]
794 self.simo.set_super_rigid_body_from_hierarchies(srb)
795 for clist
in chain_of_super_rigid_bodies:
797 for crbmember
in rblist:
798 crb+=[h
for h
in self.domain_dict[crbmember]]
799 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
801 self.simo.set_floppy_bodies()
802 self.simo.setup_bonds()
805 '''Return the Representation object'''
808 def get_density_hierarchies(self,hier_name_list):
812 for hn
in hier_name_list:
814 dens_hier_list+=self.resdensities[hn]
815 return dens_hier_list
817 def set_gmm_models_directory(self,directory_name):
818 self.gmm_models_directory=directory_name
820 def get_pdb_bead_bits(self,hierarchy):
825 if "_pdb" in h.get_name():pdbbits.append(h)
826 if "_bead" in h.get_name():beadbits.append(h)
827 if "_helix" in h.get_name():helixbits.append(h)
828 return (pdbbits,beadbits,helixbits)
830 def scale_bead_radii(self,nresidues,scale):
832 for h
in self.domain_dict:
833 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
834 slope=(1.0-scale)/(1.0-float(nresidues))
839 if b
not in scaled_beads:
845 scale_factor=slope*float(num_residues)+1.0
847 new_radius=scale_factor*radius
850 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
853 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
855 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
858 for pb
in pdbbits+helixbits:
862 number_of_residues=len(set(res_ind))
866 outhier+=simo.add_component_density(compname,
868 num_components=num_components,
870 inputfile=txtfilename)
871 if len(helixbits)!=0:
872 outhier+=simo.add_component_density(compname,
874 num_components=num_components,
876 inputfile=txtfilename)
881 num_components=number_of_residues//abs(num_components)+1
882 outhier+=simo.add_component_density(compname,
884 num_components=num_components,
886 outputfile=txtfilename,
887 outputmap=mrcfilename,
888 multiply_by_total_mass=
True)
890 if len(helixbits)!=0:
891 num_components=number_of_residues//abs(num_components)+1
892 outhier+=simo.add_component_density(compname,
894 num_components=num_components,
896 outputfile=txtfilename,
897 outputmap=mrcfilename,
898 multiply_by_total_mass=
True)
900 return outhier,beadbits
902 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
903 beadsize=5,color=0.0,offset=0):
904 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
906 outhier=simo.autobuild_model(comname,
910 resolutions=[0,1,10],
913 missingbeadsize=beadsize)
915 outhier=simo.autobuild_model(comname,
922 missingbeadsize=beadsize)
925 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
926 outhier=simo.add_component_ideal_helix(comname,
932 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
933 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
937 seq_len=len(simo.sequence_dict[comname])
938 outhier=simo.add_component_necklace(comname,
948 """Deprecated building macro - use BuildSystem()"""
952 @param representation The PMI representation
954 self.simo=representation
955 self.gmm_models_directory=
"."
957 self.rmf_frame_number={}
958 self.rmf_names_map={}
959 self.rmf_component_name={}
961 def set_gmm_models_directory(self,directory_name):
962 self.gmm_models_directory=directory_name
964 def build_model(self,data_structure,sequence_connectivity_scale=4.0,
965 sequence_connectivity_resolution=10,
966 rmf_file=
None,rmf_frame_number=0,rmf_file_map=
None,
967 skip_connectivity_these_domains=
None,
968 skip_gaussian_in_rmf=
False, skip_gaussian_in_representation=
False):
970 @param data_structure List of lists containing these entries:
971 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
972 res_range, read_em_files, bead_size, rb, super_rb,
973 em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
974 keep_gaussian_flexible_beads (optional)
975 @param sequence_connectivity_scale
977 @param rmf_frame_number
978 @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
984 super_rigid_bodies={}
985 chain_super_rigid_bodies={}
988 for d
in data_structure:
996 res_range = d[7][0:2]
1001 read_em_files = d[8]
1005 em_num_components = d[12]
1006 em_txt_file_name = d[13]
1007 em_mrc_file_name = d[14]
1008 chain_of_super_rb = d[15]
1010 keep_gaussian_flexible_beads = d[16]
1012 keep_gaussian_flexible_beads =
True
1014 if comp_name
not in self.simo.get_component_names():
1015 self.simo.create_component(comp_name,color=0.0)
1016 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1017 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1020 if not read_em_files
is None:
1021 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
1022 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
1025 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1027 if (keep_gaussian_flexible_beads):
1028 self.simo.add_all_atom_densities(comp_name, particles=beads)
1033 self.resdensities[hier_name]=dens_hier
1034 self.domain_dict[hier_name]=outhier+dens_hier
1037 if rb
not in rigid_bodies:
1038 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
1040 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
1043 if super_rb
is not None:
1045 if k
not in super_rigid_bodies:
1046 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1048 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1050 if chain_of_super_rb
is not None:
1051 for k
in chain_of_super_rb:
1052 if k
not in chain_super_rigid_bodies:
1053 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1055 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1059 self.rigid_bodies=rigid_bodies
1061 for c
in self.simo.get_component_names():
1062 if rmf_file
is not None:
1064 rfn=rmf_frame_number
1065 self.simo.set_coordinates_from_rmf(c, rf,rfn,
1066 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1068 for k
in rmf_file_map:
1070 rf=rmf_file_map[k][0]
1071 rfn=rmf_file_map[k][1]
1072 rcname=rmf_file_map[k][2]
1073 self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1074 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1076 if c
in self.rmf_file:
1078 rfn=self.rmf_frame_number[c]
1079 rfm=self.rmf_names_map[c]
1080 rcname=self.rmf_component_name[c]
1081 self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1082 rmf_component_name=rcname,
1083 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1084 if (
not skip_connectivity_these_domains)
or (c
not in skip_connectivity_these_domains):
1085 self.simo.setup_component_sequence_connectivity(c,
1086 resolution=sequence_connectivity_resolution,
1087 scale=sequence_connectivity_scale)
1088 self.simo.setup_component_geometry(c)
1090 for rb
in rigid_bodies:
1091 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1093 for k
in super_rigid_bodies:
1094 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1096 for k
in chain_super_rigid_bodies:
1097 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1099 self.simo.set_floppy_bodies()
1100 self.simo.setup_bonds()
1102 def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1103 hiers=self.domain_dict[hier_name]
1104 for length
in lengths:
1105 for n
in range(len(hiers)-length):
1106 hs=hiers[n+1:n+length-1]
1107 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1108 for n
in range(1,len(hiers)-1,5):
1110 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1112 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1115 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1116 self.rmf_file[component_name]=rmf_file
1117 self.rmf_frame_number[component_name]=rmf_frame_number
1118 self.rmf_names_map[component_name]=rmf_names_map
1119 self.rmf_component_name[component_name]=rmf_component_name
1121 def get_density_hierarchies(self,hier_name_list):
1125 for hn
in hier_name_list:
1127 dens_hier_list+=self.resdensities[hn]
1128 return dens_hier_list
1130 def get_pdb_bead_bits(self,hierarchy):
1135 if "_pdb" in h.get_name():pdbbits.append(h)
1136 if "_bead" in h.get_name():beadbits.append(h)
1137 if "_helix" in h.get_name():helixbits.append(h)
1138 return (pdbbits,beadbits,helixbits)
1140 def scale_bead_radii(self,nresidues,scale):
1142 for h
in self.domain_dict:
1143 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1144 slope=(1.0-scale)/(1.0-float(nresidues))
1149 if b
not in scaled_beads:
1155 scale_factor=slope*float(num_residues)+1.0
1157 new_radius=scale_factor*radius
1160 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1163 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1165 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1169 for pb
in pdbbits+helixbits:
1173 number_of_residues=len(set(res_ind))
1177 outhier+=simo.add_component_density(compname,
1179 num_components=num_components,
1181 inputfile=txtfilename)
1182 if len(helixbits)!=0:
1183 outhier+=simo.add_component_density(compname,
1185 num_components=num_components,
1187 inputfile=txtfilename)
1192 if num_components<0:
1195 num_components=number_of_residues/abs(num_components)
1196 outhier+=simo.add_component_density(compname,
1198 num_components=num_components,
1200 outputfile=txtfilename,
1201 outputmap=mrcfilename,
1202 multiply_by_total_mass=
True)
1204 if len(helixbits)!=0:
1205 if num_components<0:
1208 num_components=number_of_residues/abs(num_components)
1209 outhier+=simo.add_component_density(compname,
1211 num_components=num_components,
1213 outputfile=txtfilename,
1214 outputmap=mrcfilename,
1215 multiply_by_total_mass=
True)
1217 return outhier,beadbits
1219 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1221 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1222 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1224 outhier=simo.autobuild_model(comname,
1228 resolutions=[0,1,10],
1231 missingbeadsize=beadsize)
1233 outhier=simo.autobuild_model(comname,
1240 missingbeadsize=beadsize)
1242 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1243 outhier=simo.add_component_ideal_helix(comname,
1249 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" and pdbname
is not "DENSITY" :
1252 seq_len=len(simo.sequence_dict[comname])
1253 outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1255 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is "DENSITY" :
1260 seq_len=len(simo.sequence_dict[comname])
1261 outhier=simo.add_component_necklace(comname,
1268 def set_coordinates(self,hier_name,xyz_tuple):
1269 hier=self.domain_dict[hier_name]
1277 def save_rmf(self,rmfname):
1280 o.init_rmf(rmfname,[self.simo.prot])
1281 o.write_rmf(rmfname)
1282 o.close_rmf(rmfname)
1291 missing_bead_size=20,
1292 residue_per_gaussian=
None):
1294 Construct a component for each subunit (no splitting, nothing fancy).
1295 You can pass the resolutions and the bead size for the missing residue regions.
1296 To use this macro, you must provide the following data structure:
1297 Component pdbfile chainid rgb color fastafile sequence id
1299 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1300 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1301 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1302 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1303 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1304 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1305 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1306 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1307 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1308 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1309 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1310 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1320 component_name = d[0]
1324 fasta_file = d[4][0]
1326 fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
1327 fasta_file_id = d[4][1]
1329 r.create_component(component_name,
1332 r.add_component_sequence(component_name,
1334 id=fastids[fasta_file_id])
1336 hierarchies = r.autobuild_model(component_name,
1339 resolutions=resolutions,
1340 missingbeadsize=missing_bead_size)
1342 r.show_component_table(component_name)
1344 r.set_rigid_bodies([component_name])
1346 r.set_chain_of_super_rigid_bodies(
1351 r.setup_component_sequence_connectivity(component_name, resolution=1)
1352 r.setup_component_geometry(component_name)
1356 r.set_floppy_bodies()
1359 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
1366 """A macro for running all the basic operations of analysis.
1367 Includes clustering, precision analysis, and making ensemble density maps.
1368 A number of plots are also supported.
1371 merge_directories=[
"./"],
1372 stat_file_name_suffix=
"stat",
1373 best_pdb_name_suffix=
"model",
1374 do_clean_first=
True,
1375 do_create_directories=
True,
1376 global_output_directory=
"output/",
1377 replica_stat_file_suffix=
"stat_replica",
1378 global_analysis_result_directory=
"./analysis/",
1381 @param model The IMP model
1382 @param stat_file_name_suffix
1383 @param merge_directories The directories containing output files
1384 @param best_pdb_name_suffix
1385 @param do_clean_first
1386 @param do_create_directories
1387 @param global_output_directory Where everything is
1388 @param replica_stat_file_suffix
1389 @param global_analysis_result_directory
1390 @param test_mode If True, nothing is changed on disk
1394 from mpi4py
import MPI
1395 self.comm = MPI.COMM_WORLD
1396 self.rank = self.comm.Get_rank()
1397 self.number_of_processes = self.comm.size
1400 self.number_of_processes = 1
1402 self.test_mode = test_mode
1403 self._protocol_output = []
1404 self.cluster_obj =
None
1406 stat_dir = global_output_directory
1407 self.stat_files = []
1409 for rd
in merge_directories:
1410 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
1411 if len(stat_files)==0:
1412 print(
"WARNING: no stat files found in",os.path.join(rd,stat_dir))
1413 self.stat_files += stat_files
1416 """Capture details of the modeling protocol.
1417 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1419 self._protocol_output.append(p)
1422 score_key=
"SimplifiedModel_Total_Score_None",
1423 rmf_file_key=
"rmf_file",
1424 rmf_file_frame_key=
"rmf_frame_index",
1427 nframes_trajectory=10000):
1428 """ Get a trajectory of the modeling run, for generating demonstrative movies
1429 @param score_key The score for ranking models
1430 @param rmf_file_key Key pointing to RMF filename
1431 @param rmf_file_frame_key Key pointing to RMF frame number
1432 @param outputdir The local output directory used in the run
1433 @param get_every Extract every nth frame
1434 @param nframes_trajectory Total number of frames of the trajectory
1436 from operator
import itemgetter
1444 rmf_file_list=trajectory_models[0]
1445 rmf_file_frame_list=trajectory_models[1]
1446 score_list=list(map(float, trajectory_models[2]))
1448 max_score=max(score_list)
1449 min_score=min(score_list)
1452 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1453 binned_scores=[
None]*nframes_trajectory
1454 binned_model_indexes=[-1]*nframes_trajectory
1456 for model_index,s
in enumerate(score_list):
1457 bins_score_diffs=[abs(s-b)
for b
in bins]
1458 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1459 if binned_scores[bin_index]==
None:
1460 binned_scores[bin_index]=s
1461 binned_model_indexes[bin_index]=model_index
1463 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1464 new_diff=abs(s-bins[bin_index])
1465 if new_diff < old_diff:
1466 binned_scores[bin_index]=s
1467 binned_model_indexes[bin_index]=model_index
1469 print(binned_scores)
1470 print(binned_model_indexes)
1473 def _expand_ambiguity(self,prot,d):
1474 """If using PMI2, expand the dictionary to include copies as ambiguous options
1475 This also keeps the states separate.
1480 if '..' in key
or (type(val)
is tuple
and len(val)>=3):
1483 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1484 if type(val)
is tuple:
1492 for nst
in range(len(states)):
1494 copies = sel.get_selected_particles(with_representation=
False)
1496 for nc
in range(len(copies)):
1498 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1500 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1507 score_key=
"SimplifiedModel_Total_Score_None",
1508 rmf_file_key=
"rmf_file",
1509 rmf_file_frame_key=
"rmf_frame_index",
1511 prefiltervalue=
None,
1514 alignment_components=
None,
1515 number_of_best_scoring_models=10,
1516 rmsd_calculation_components=
None,
1517 distance_matrix_file=
'distances.mat',
1518 load_distance_matrix_file=
False,
1519 skip_clustering=
False,
1520 number_of_clusters=1,
1522 exit_after_display=
True,
1524 first_and_last_frames=
None,
1525 density_custom_ranges=
None,
1526 write_pdb_with_centered_coordinates=
False,
1528 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1529 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1530 Can pass None for copy or state to ignore that field.
1531 If you don't pass a specific copy number
1532 @param score_key The score for ranking models. Use "Total_Score"
1533 instead of default for PMI2.
1534 @param rmf_file_key Key pointing to RMF filename
1535 @param rmf_file_frame_key Key pointing to RMF frame number
1536 @param state_number State number to analyze
1537 @param prefiltervalue Only include frames where the
1538 score key is below this value
1539 @param feature_keys Keywords for which you want to
1540 calculate average, medians, etc.
1541 If you pass "Keyname" it'll include everything that matches "*Keyname*"
1542 @param outputdir The local output directory used in the run
1543 @param alignment_components Dictionary with keys=groupname, values are tuples
1544 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1545 @param number_of_best_scoring_models Num models to keep per run
1546 @param rmsd_calculation_components For calculating RMSD
1547 (same format as alignment_components)
1548 @param distance_matrix_file Where to store/read the distance matrix
1549 @param load_distance_matrix_file Try to load the distance matrix file
1550 @param skip_clustering Just extract the best scoring models
1552 @param number_of_clusters Number of k-means clusters
1553 @param display_plot Display the distance matrix
1554 @param exit_after_display Exit after displaying distance matrix
1555 @param get_every Extract every nth frame
1556 @param first_and_last_frames A tuple with the first and last frames to be
1557 analyzed. Values are percentages!
1558 Default: get all frames
1559 @param density_custom_ranges For density calculation
1560 (same format as alignment_components)
1561 @param write_pdb_with_centered_coordinates
1562 @param voxel_size Used for the density output
1564 self._outputdir = outputdir
1565 self._number_of_clusters = number_of_clusters
1566 for p
in self._protocol_output:
1567 p.add_replica_exchange_analysis(self)
1578 if not load_distance_matrix_file:
1579 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1580 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1581 self.stat_files,self.number_of_processes)[self.rank]
1585 orig_score_key = score_key
1586 if score_key
not in po.get_keys():
1587 if 'Total_Score' in po.get_keys():
1588 score_key =
'Total_Score'
1589 print(
"WARNING: Using 'Total_Score' instead of "
1590 "'SimplifiedModel_Total_Score_None' for the score key")
1591 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1592 if k
in feature_keys:
1593 print(
"WARNING: no need to pass " +k+
" to feature_keys.")
1594 feature_keys.remove(k)
1603 rmf_file_list=best_models[0]
1604 rmf_file_frame_list=best_models[1]
1605 score_list=best_models[2]
1606 feature_keyword_list_dict=best_models[3]
1612 if self.number_of_processes > 1:
1616 rmf_file_frame_list)
1617 for k
in feature_keyword_list_dict:
1619 feature_keyword_list_dict[k])
1622 score_rmf_tuples = list(zip(score_list,
1624 rmf_file_frame_list,
1625 list(range(len(score_list)))))
1627 if density_custom_ranges:
1628 for k
in density_custom_ranges:
1629 if type(density_custom_ranges[k])
is not list:
1630 raise Exception(
"Density custom ranges: values must be lists of tuples")
1633 if first_and_last_frames
is not None:
1634 nframes = len(score_rmf_tuples)
1635 first_frame = int(first_and_last_frames[0] * nframes)
1636 last_frame = int(first_and_last_frames[1] * nframes)
1637 if last_frame > len(score_rmf_tuples):
1639 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1642 best_score_rmf_tuples = sorted(score_rmf_tuples,
1643 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1644 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1646 best_score_feature_keyword_list_dict = defaultdict(list)
1647 for tpl
in best_score_rmf_tuples:
1649 for f
in feature_keyword_list_dict:
1650 best_score_feature_keyword_list_dict[f].append(
1651 feature_keyword_list_dict[f][index])
1652 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1653 best_score_rmf_tuples,
1654 self.number_of_processes)[self.rank]
1657 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1659 my_best_score_rmf_tuples[0][1])[0]
1661 if rmsd_calculation_components
is not None:
1662 tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1663 if tmp!=rmsd_calculation_components:
1664 print(
'Detected ambiguity, expand rmsd components to',tmp)
1665 rmsd_calculation_components = tmp
1666 if alignment_components
is not None:
1667 tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1668 if tmp!=alignment_components:
1669 print(
'Detected ambiguity, expand alignment components to',tmp)
1670 alignment_components = tmp
1676 my_best_score_rmf_tuples[0],
1677 rmsd_calculation_components,
1678 state_number=state_number)
1680 my_best_score_rmf_tuples,
1681 alignment_components,
1682 rmsd_calculation_components,
1683 state_number=state_number)
1688 all_coordinates=got_coords[0]
1689 alignment_coordinates=got_coords[1]
1690 rmsd_coordinates=got_coords[2]
1691 rmf_file_name_index_dict=got_coords[3]
1692 all_rmf_file_names=got_coords[4]
1698 if density_custom_ranges:
1700 density_custom_ranges,
1703 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1709 os.mkdir(dircluster)
1712 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1713 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1715 rmf_frame_number=tpl[2]
1718 for key
in best_score_feature_keyword_list_dict:
1719 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1722 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1727 linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1733 if not linking_successful:
1740 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1741 prot = states[state_number]
1743 prot = prots[state_number]
1748 coords_f1=alignment_coordinates[cnt]
1750 coords_f2=alignment_coordinates[cnt]
1753 transformation = Ali.align()[1]
1774 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1775 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1776 o.init_pdb(out_pdb_fn,prot)
1777 o.write_pdb(out_pdb_fn,
1778 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1780 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1781 tmp_dict[
"rmf_file_full_path"]=rmf_name
1782 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1783 tmp_dict[
"local_rmf_frame_number"]=0
1785 clusstat.write(str(tmp_dict)+
"\n")
1790 h.set_name(
"System")
1792 o.init_rmf(out_rmf_fn, [h], rs)
1794 o.init_rmf(out_rmf_fn, [prot],rs)
1796 o.write_rmf(out_rmf_fn)
1797 o.close_rmf(out_rmf_fn)
1799 if density_custom_ranges:
1800 DensModule.add_subunits_density(prot)
1802 if density_custom_ranges:
1803 DensModule.write_mrc(path=dircluster)
1810 if self.number_of_processes > 1:
1816 rmf_file_name_index_dict)
1818 alignment_coordinates)
1825 [best_score_feature_keyword_list_dict,
1826 rmf_file_name_index_dict],
1832 print(
"setup clustering class")
1835 for n, model_coordinate_dict
in enumerate(all_coordinates):
1836 template_coordinate_dict = {}
1838 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1840 self.cluster_obj.set_template(alignment_coordinates[n])
1841 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1842 print(
"Global calculating the distance matrix")
1845 self.cluster_obj.dist_matrix()
1849 self.cluster_obj.do_cluster(number_of_clusters)
1852 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1853 if exit_after_display:
1855 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1862 print(
"setup clustering class")
1864 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1865 print(
"clustering with %s clusters" % str(number_of_clusters))
1866 self.cluster_obj.do_cluster(number_of_clusters)
1867 [best_score_feature_keyword_list_dict,
1868 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1871 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1872 if exit_after_display:
1874 if self.number_of_processes > 1:
1882 print(self.cluster_obj.get_cluster_labels())
1883 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1884 print(
"rank %s " % str(self.rank))
1885 print(
"cluster %s " % str(n))
1886 print(
"cluster label %s " % str(cl))
1887 print(self.cluster_obj.get_cluster_label_names(cl))
1890 if density_custom_ranges:
1892 density_custom_ranges,
1895 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1897 os.mkdir(dircluster)
1901 rmsd_dict = {
"AVERAGE_RMSD":
1902 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1903 clusstat = open(dircluster +
"stat.out",
"w")
1904 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1907 tmp_dict.update(rmsd_dict)
1908 index = rmf_file_name_index_dict[structure_name]
1909 for key
in best_score_feature_keyword_list_dict:
1911 key] = best_score_feature_keyword_list_dict[
1917 rmf_name = structure_name.split(
"|")[0]
1918 rmf_frame_number = int(structure_name.split(
"|")[1])
1919 clusstat.write(str(tmp_dict) +
"\n")
1923 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1928 linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1934 if not linking_successful:
1940 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1941 prot = states[state_number]
1943 prot = prots[state_number]
1947 model_index = self.cluster_obj.get_model_index_from_name(
1949 transformation = self.cluster_obj.get_transformation_to_first_member(
1969 if density_custom_ranges:
1970 DensModule.add_subunits_density(prot)
1974 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1975 o.write_pdb(dircluster + str(k) +
".pdb")
1980 h.set_name(
"System")
1982 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1984 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1985 o.write_rmf(dircluster + str(k) +
".rmf3")
1986 o.close_rmf(dircluster + str(k) +
".rmf3")
1991 if density_custom_ranges:
1992 DensModule.write_mrc(path=dircluster)
1995 if self.number_of_processes>1:
1998 def get_cluster_rmsd(self,cluster_num):
1999 if self.cluster_obj
is None:
2001 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2003 def save_objects(self, objects, file_name):
2005 outf = open(file_name,
'wb')
2006 pickle.dump(objects, outf)
2009 def load_objects(self, file_name):
2011 inputf = open(file_name,
'rb')
2012 objects = pickle.load(inputf)
A class to simplify create of constraints and movers for an IMP Hierarchy.
A macro for running all the basic operations of analysis.
Sample using molecular dynamics.
A class for reading stat files.
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def get_molecules
Return list of all molecules grouped by state.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
This class initializes the root node of the global IMP.atom.Hierarchy.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
Representation of the system.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
A decorator for a particle with x,y,z coordinates.
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Class for easy writing of PDBs, RMFs, and stat files.
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and setup constraints for PMI objects.
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Deprecated building macro - use BuildSystem()
The general base class for IMP exceptions.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
A dictionary-like wrapper for reading and storing sequence data.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
A decorator for a particle with x,y,z coordinates and a radius.