1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function
16 from operator
import itemgetter
17 from collections
import defaultdict
21 """A macro to help setup and run replica exchange.
22 Supports Monte Carlo and molecular dynamics.
23 Produces trajectory RMF files, best PDB structures,
24 and output stat files.
30 monte_carlo_sample_objects=
None,
31 molecular_dynamics_sample_objects=
None,
33 crosslink_restraints=
None,
34 monte_carlo_temperature=1.0,
35 simulated_annealing=
False,
36 simulated_annealing_minimum_temperature=1.0,
37 simulated_annealing_maximum_temperature=2.5,
38 simulated_annealing_minimum_temperature_nframes=100,
39 simulated_annealing_maximum_temperature_nframes=100,
40 replica_exchange_minimum_temperature=1.0,
41 replica_exchange_maximum_temperature=2.5,
43 number_of_best_scoring_models=500,
45 molecular_dynamics_steps=10,
46 molecular_dynamics_max_time_step=1.0,
47 number_of_frames=1000,
48 nframes_write_coordinates=1,
49 write_initial_rmf=
True,
50 initial_rmf_name_suffix=
"initial",
51 stat_file_name_suffix=
"stat",
52 best_pdb_name_suffix=
"model",
54 do_create_directories=
True,
55 global_output_directory=
"./",
58 replica_stat_file_suffix=
"stat_replica",
59 em_object_for_rmf=
None,
61 replica_exchange_object=
None):
63 @param model The IMP model
64 @param representation PMI.representation.Representation object
65 (or list of them, for multi-state modeling)
66 @param root_hier Instead of passing Representation, just pass
68 @param monte_carlo_sample_objects Objects for MC sampling; a list of
69 structural components (generally the representation) that will
70 be moved and restraints with parameters that need to
71 be sampled. OR a DegreesOfFreedom object!
72 @param molecular_dynamics_sample_objects Objects for MD sampling
73 @param output_objects A list of structural objects and restraints
74 that will be included in output (statistics files). Any object
75 that provides a get_output() method can be used here.
76 @param crosslink_restraints List of cross-link restraints that will
77 be included in output RMF files (for visualization).
78 @param monte_carlo_temperature MC temp (may need to be optimized
79 based on post-sampling analysis)
80 @param simulated_annealing If True, perform simulated annealing
81 @param simulated_annealing_minimum_temperature Should generally be
82 the same as monte_carlo_temperature.
83 @param simulated_annealing_minimum_temperature_nframes Number of
84 frames to compute at minimum temperature.
85 @param simulated_annealing_maximum_temperature_nframes Number of
87 temps > simulated_annealing_maximum_temperature.
88 @param replica_exchange_minimum_temperature Low temp for REX; should
89 generally be the same as monte_carlo_temperature.
90 @param replica_exchange_maximum_temperature High temp for REX
91 @param num_sample_rounds Number of rounds of MC/MD per cycle
92 @param number_of_best_scoring_models Number of top-scoring PDB models
93 to keep around for analysis
94 @param monte_carlo_steps Number of MC steps per round
95 @param molecular_dynamics_steps Number of MD steps per round
96 @param molecular_dynamics_max_time_step Max time step for MD
97 @param number_of_frames Number of REX frames to run
98 @param nframes_write_coordinates How often to write the coordinates
100 @param write_initial_rmf Write the initial configuration
101 @param global_output_directory Folder that will be created to house
109 if type(representation) == list:
110 self.is_multi_state =
True
111 self.root_hiers = [r.prot
for r
in representation]
112 self.vars[
"number_of_states"] = len(representation)
114 self.is_multi_state =
False
115 self.root_hier = representation.prot
116 self.vars[
"number_of_states"] = 1
119 self.vars[
"number_of_states"] = len(states)
121 self.root_hiers = states
122 self.is_multi_state =
True
124 self.root_hier = root_hier
125 self.is_multi_state =
False
127 print(
"ERROR: Must provide representation or root_hier")
130 self.crosslink_restraints = crosslink_restraints
131 self.em_object_for_rmf = em_object_for_rmf
132 self.monte_carlo_sample_objects = monte_carlo_sample_objects
133 if sample_objects
is not None:
134 self.monte_carlo_sample_objects+=sample_objects
135 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
136 self.output_objects = output_objects
137 self.replica_exchange_object = replica_exchange_object
138 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
139 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
141 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
143 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
145 self.vars[
"simulated_annealing"]=\
147 self.vars[
"simulated_annealing_minimum_temperature"]=\
148 simulated_annealing_minimum_temperature
149 self.vars[
"simulated_annealing_maximum_temperature"]=\
150 simulated_annealing_maximum_temperature
151 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
152 simulated_annealing_minimum_temperature_nframes
153 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
154 simulated_annealing_maximum_temperature_nframes
156 self.vars[
"num_sample_rounds"] = num_sample_rounds
158 "number_of_best_scoring_models"] = number_of_best_scoring_models
159 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
160 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
161 self.vars[
"number_of_frames"] = number_of_frames
162 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
163 self.vars[
"write_initial_rmf"] = write_initial_rmf
164 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
165 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
166 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
167 self.vars[
"do_clean_first"] = do_clean_first
168 self.vars[
"do_create_directories"] = do_create_directories
169 self.vars[
"global_output_directory"] = global_output_directory
170 self.vars[
"rmf_dir"] = rmf_dir
171 self.vars[
"best_pdb_dir"] = best_pdb_dir
172 self.vars[
"atomistic"] = atomistic
173 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
176 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
177 print(
"--- it stores the best scoring pdb models in pdbs/")
178 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
179 print(
"--- variables:")
180 keys = list(self.vars.keys())
183 print(
"------", v.ljust(30), self.vars[v])
185 def get_replica_exchange_object(self):
186 return self.replica_exchange_object
188 def execute_macro(self):
190 temp_index_factor = 100000.0
194 if self.monte_carlo_sample_objects
is not None:
195 print(
"Setting up MonteCarlo")
197 self.monte_carlo_sample_objects,
198 self.vars[
"monte_carlo_temperature"])
199 if self.vars[
"simulated_annealing"]:
200 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
201 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
202 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
203 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
204 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
205 self.output_objects.append(sampler_mc)
206 samplers.append(sampler_mc)
209 if self.molecular_dynamics_sample_objects
is not None:
210 print(
"Setting up MolecularDynamics")
212 self.molecular_dynamics_sample_objects,
213 self.vars[
"monte_carlo_temperature"],
214 maximum_time_step=self.molecular_dynamics_max_time_step)
215 if self.vars[
"simulated_annealing"]:
216 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
217 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
218 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
219 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
220 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
221 self.output_objects.append(sampler_md)
222 samplers.append(sampler_md)
225 print(
"Setting up ReplicaExchange")
228 "replica_exchange_minimum_temperature"],
230 "replica_exchange_maximum_temperature"],
232 replica_exchange_object=self.replica_exchange_object)
233 self.replica_exchange_object = rex.rem
235 myindex = rex.get_my_index()
236 self.output_objects.append(rex)
241 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
245 globaldir = self.vars[
"global_output_directory"] +
"/"
246 rmf_dir = globaldir + self.vars[
"rmf_dir"]
247 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
249 if self.vars[
"do_clean_first"]:
252 if self.vars[
"do_create_directories"]:
255 os.makedirs(globaldir)
263 if not self.is_multi_state:
269 for n
in range(self.vars[
"number_of_states"]):
271 os.makedirs(pdb_dir +
"/" + str(n))
277 sw = IMP.pmi.tools.Stopwatch()
278 self.output_objects.append(sw)
280 print(
"Setting up stat file")
282 low_temp_stat_file = globaldir + \
283 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
284 output.init_stat2(low_temp_stat_file,
286 extralabels=[
"rmf_file",
"rmf_frame_index"])
288 print(
"Setting up replica stat file")
289 replica_stat_file = globaldir + \
290 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
291 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
293 print(
"Setting up best pdb files")
294 if not self.is_multi_state:
295 if self.vars[
"number_of_best_scoring_models"] > 0:
296 output.init_pdb_best_scoring(pdb_dir +
"/" +
297 self.vars[
"best_pdb_name_suffix"],
300 "number_of_best_scoring_models"],
301 replica_exchange=
True)
302 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
303 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
305 if self.vars[
"number_of_best_scoring_models"] > 0:
306 for n
in range(self.vars[
"number_of_states"]):
307 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
308 self.vars[
"best_pdb_name_suffix"],
311 "number_of_best_scoring_models"],
312 replica_exchange=
True)
313 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
314 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
317 if not self.em_object_for_rmf
is None:
318 if not self.is_multi_state:
319 output_hierarchies = [
321 self.em_object_for_rmf.get_density_as_hierarchy(
324 output_hierarchies = self.root_hiers
325 output_hierarchies.append(
326 self.em_object_for_rmf.get_density_as_hierarchy())
328 if not self.is_multi_state:
329 output_hierarchies = [self.root_hier]
331 output_hierarchies = self.root_hiers
334 print(
"Setting up and writing initial rmf coordinate file")
335 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
336 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
338 if self.crosslink_restraints:
339 output.add_restraints_to_rmf(
340 init_suffix +
"." + str(myindex) +
".rmf3",
341 self.crosslink_restraints)
342 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
343 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
347 print(
"Setting up production rmf files")
349 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
350 output.init_rmf(rmfname, output_hierarchies)
352 if self.crosslink_restraints:
353 output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
355 ntimes_at_low_temp = 0
360 for i
in range(self.vars[
"number_of_frames"]):
361 for nr
in range(self.vars[
"num_sample_rounds"]):
362 if sampler_md
is not None:
363 sampler_md.optimize(self.vars[
"molecular_dynamics_steps"])
364 if sampler_mc
is not None:
365 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
367 output.set_output_entry(
"score", score)
369 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
371 if min_temp_index == my_temp_index:
372 print(
"--- frame %s score %s " % (str(i), str(score)))
374 if i % self.vars[
"nframes_write_coordinates"]==0:
375 print(
'--- writing coordinates')
376 if self.vars[
"number_of_best_scoring_models"] > 0:
377 output.write_pdb_best_scoring(score)
378 output.write_rmf(rmfname)
379 output.set_output_entry(
"rmf_file", rmfname)
380 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
382 output.set_output_entry(
"rmf_file", rmfname)
383 output.set_output_entry(
"rmf_frame_index",
'-1')
384 output.write_stat2(low_temp_stat_file)
385 ntimes_at_low_temp += 1
387 output.write_stat2(replica_stat_file)
388 rex.swap_temp(i, score)
399 missing_bead_size=20,
400 residue_per_gaussian=
None):
402 Construct a component for each subunit (no splitting, nothing fancy).
404 You can pass the resolutions and the bead size for the missing residue regions.
405 To use this macro, you must provide the following data structure:
407 Component pdbfile chainid rgb color fastafile sequence id
410 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
411 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
412 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
413 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
414 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
415 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
416 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
417 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
418 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
419 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
420 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
421 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
432 component_name = d[0]
438 fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
439 fasta_file_id = d[4][1]
441 r.create_component(component_name,
444 r.add_component_sequence(component_name,
446 id=fastids[fasta_file_id])
448 hierarchies = r.autobuild_model(component_name,
451 resolutions=resolutions,
452 missingbeadsize=missing_bead_size)
454 r.show_component_table(component_name)
456 r.set_rigid_bodies([component_name])
458 r.set_chain_of_super_rigid_bodies(
463 r.setup_component_sequence_connectivity(component_name, resolution=1)
464 r.setup_component_geometry(component_name)
468 r.set_floppy_bodies()
471 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
479 """A macro to build a Representation based on a Topology and lists of movers
483 component_topologies,
484 list_of_rigid_bodies=[],
485 list_of_super_rigid_bodies=[],
486 chain_of_super_rigid_bodies=[],
487 sequence_connectivity_scale=4.0,
488 add_each_domain_as_rigid_body=
False,
489 force_create_gmm_files=
False):
491 @param model The IMP model
492 @param component_topologies List of
493 IMP.pmi.topology.ComponentTopology items
494 @param list_of_rigid_bodies List of lists of domain names that will
495 be moved as rigid bodies.
496 @param list_of_super_rigid_bodies List of lists of domain names
497 that will move together in an additional Monte Carlo move.
498 @param chain_of_super_rigid_bodies List of lists of domain names
499 (choices can only be from the same molecule). Each of these
500 groups will be moved rigidly. This helps to sample more
501 efficiently complex topologies, made of several rigid bodies,
502 connected by flexible linkers.
503 @param sequence_connectivity_scale For scaling the connectivity
505 @param add_each_domain_as_rigid_body That way you don't have to
506 put all of them in the list
507 @param force_create_gmm_files If True, will sample and create GMMs
508 no matter what. If False, will only only sample if the
509 files don't exist. If number of Gaussians is zero, won't
515 disorderedlength=
False)
517 data=component_topologies
518 if list_of_rigid_bodies==[]:
519 print(
"WARNING: No list of rigid bodies inputted to build_model()")
520 if list_of_super_rigid_bodies==[]:
521 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
522 if chain_of_super_rigid_bodies==[]:
523 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
524 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
525 +chain_of_super_rigid_bodies
for d
in sublist])
526 all_available = set([c.domain_name
for c
in component_topologies])
527 if not all_dnames <= all_available:
528 raise ValueError(
"All requested movers must reference domain "
529 "names in the component topologies")
533 super_rigid_bodies={}
534 chain_super_rigid_bodies={}
539 hier_name = c.domain_name
541 fasta_file = c.fasta_file
542 fasta_id = c.fasta_id
543 pdb_name = c.pdb_file
545 res_range = c.residue_range
546 offset = c.pdb_offset
547 bead_size = c.bead_size
548 em_num_components = c.em_residues_per_gaussian
549 em_txt_file_name = c.gmm_file
550 em_mrc_file_name = c.mrc_file
552 if comp_name
not in self.simo.get_component_names():
553 self.simo.create_component(comp_name,color=0.0)
554 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
557 if em_num_components==0:
561 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
568 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
569 res_range,include_res0,beadsize=bead_size,
570 color=color,offset=offset)
571 if em_num_components!=0:
573 print(
"will read GMM files")
575 print(
"will calculate GMMs")
577 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
578 em_mrc_file_name,em_num_components,read_em_files)
579 self.simo.add_all_atom_densities(comp_name, hierarchies=beads)
584 self.resdensities[hier_name]=dens_hier
585 self.domain_dict[hier_name]=outhier+dens_hier
588 for c
in self.simo.get_component_names():
589 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
590 self.simo.setup_component_geometry(c)
593 for rblist
in list_of_rigid_bodies:
595 for rbmember
in rblist:
596 rb+=[h
for h
in self.domain_dict[rbmember]]
597 self.simo.set_rigid_body_from_hierarchies(rb)
598 for srblist
in list_of_super_rigid_bodies:
600 for srbmember
in rblist:
601 srb+=[h
for h
in self.domain_dict[srbmember]]
602 self.simo.set_super_rigid_body_from_hierarchies(srb)
603 for clist
in chain_of_super_rigid_bodies:
605 for crbmember
in rblist:
606 crb+=[h
for h
in self.domain_dict[crbmember]]
607 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
609 self.simo.set_floppy_bodies()
610 self.simo.setup_bonds()
613 '''Return the Representation object'''
616 def get_density_hierarchies(self,hier_name_list):
620 for hn
in hier_name_list:
622 dens_hier_list+=self.resdensities[hn]
623 return dens_hier_list
625 def set_gmm_models_directory(self,directory_name):
626 self.gmm_models_directory=directory_name
628 def get_pdb_bead_bits(self,hierarchy):
633 if "_pdb" in h.get_name():pdbbits.append(h)
634 if "_bead" in h.get_name():beadbits.append(h)
635 if "_helix" in h.get_name():helixbits.append(h)
636 return (pdbbits,beadbits,helixbits)
638 def scale_bead_radii(self,nresidues,scale):
640 for h
in self.domain_dict:
641 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
642 slope=(1.0-scale)/(1.0-float(nresidues))
647 if b
not in scaled_beads:
653 scale_factor=slope*float(num_residues)+1.0
655 new_radius=scale_factor*radius
658 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
661 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
663 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
666 for pb
in pdbbits+helixbits:
670 number_of_residues=len(set(res_ind))
674 outhier+=simo.add_component_density(compname,
676 num_components=num_components,
678 inputfile=txtfilename)
679 if len(helixbits)!=0:
680 outhier+=simo.add_component_density(compname,
682 num_components=num_components,
684 inputfile=txtfilename)
689 num_components=number_of_residues/abs(num_components)
690 outhier+=simo.add_component_density(compname,
692 num_components=num_components,
694 outputfile=txtfilename,
695 outputmap=mrcfilename,
696 multiply_by_total_mass=
True)
698 if len(helixbits)!=0:
699 num_components=number_of_residues/abs(num_components)
700 outhier+=simo.add_component_density(compname,
702 num_components=num_components,
704 outputfile=txtfilename,
705 outputmap=mrcfilename,
706 multiply_by_total_mass=
True)
708 return outhier,beadbits
710 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
711 beadsize=5,color=0.0,offset=0):
712 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
714 resrange=(resrange[0],len(simo.sequence_dict[comname]))
716 outhier=simo.autobuild_model(comname,
720 resolutions=[0,1,10],
723 missingbeadsize=beadsize)
725 outhier=simo.autobuild_model(comname,
732 missingbeadsize=beadsize)
735 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
736 outhier=simo.add_component_ideal_helix(comname,
742 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
743 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
747 seq_len=len(simo.sequence_dict[comname])
748 outhier=simo.add_component_necklace(comname,
758 """Deprecated building macro - use BuildModel()"""
762 @param representation The PMI representation
764 self.simo=representation
765 self.gmm_models_directory=
"."
767 self.rmf_frame_number={}
769 def set_gmm_models_directory(self,directory_name):
770 self.gmm_models_directory=directory_name
772 def build_model(self,data_structure,sequence_connectivity_scale=4.0,
773 sequence_connectivity_resolution=10,rmf_file=
None,rmf_frame_number=0):
775 @param data_structure List of lists containing these entries:
776 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
777 res_range, read_em_files, bead_size, rb, super_rb,
778 em_num_components, em_txt_file_name, em_mrc_file_name
779 @param sequence_connectivity_scale
781 @param rmf_frame_number
785 super_rigid_bodies={}
786 chain_super_rigid_bodies={}
789 for d
in data_structure:
797 res_range = d[7][0:2]
806 em_num_components = d[12]
807 em_txt_file_name = d[13]
808 em_mrc_file_name = d[14]
809 chain_of_super_rb = d[15]
811 if comp_name
not in self.simo.get_component_names():
812 self.simo.create_component(comp_name,color=0.0)
813 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
814 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
817 if not read_em_files
is None:
818 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
819 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
822 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
823 self.simo.add_all_atom_densities(comp_name, hierarchies=beads)
829 self.resdensities[hier_name]=dens_hier
830 self.domain_dict[hier_name]=outhier+dens_hier
833 if rb
not in rigid_bodies:
834 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
836 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
839 if super_rb
is not None:
841 if k
not in super_rigid_bodies:
842 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
844 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
846 if chain_of_super_rb
is not None:
847 for k
in chain_of_super_rb:
848 if k
not in chain_super_rigid_bodies:
849 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
851 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
855 self.rigid_bodies=rigid_bodies
857 for c
in self.simo.get_component_names():
858 if rmf_file
is not None:
861 self.simo.set_coordinates_from_rmf(c, rf,rfn)
863 if c
in self.rmf_file:
865 rfn=self.rmf_frame_number[c]
866 self.simo.set_coordinates_from_rmf(c, rf,rfn)
867 self.simo.setup_component_sequence_connectivity(c,resolution=sequence_connectivity_resolution,scale=sequence_connectivity_scale)
868 self.simo.setup_component_geometry(c)
870 for rb
in rigid_bodies:
871 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
873 for k
in super_rigid_bodies:
874 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
876 for k
in chain_super_rigid_bodies:
877 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
879 self.simo.set_floppy_bodies()
880 self.simo.setup_bonds()
884 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number):
885 self.rmf_file[component_name]=rmf_file
886 self.rmf_frame_number[component_name]=rmf_frame_number
888 def get_density_hierarchies(self,hier_name_list):
892 for hn
in hier_name_list:
894 dens_hier_list+=self.resdensities[hn]
895 return dens_hier_list
897 def get_pdb_bead_bits(self,hierarchy):
902 if "_pdb" in h.get_name():pdbbits.append(h)
903 if "_bead" in h.get_name():beadbits.append(h)
904 if "_helix" in h.get_name():helixbits.append(h)
905 return (pdbbits,beadbits,helixbits)
907 def scale_bead_radii(self,nresidues,scale):
909 for h
in self.domain_dict:
910 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
911 slope=(1.0-scale)/(1.0-float(nresidues))
916 if b
not in scaled_beads:
922 scale_factor=slope*float(num_residues)+1.0
924 new_radius=scale_factor*radius
927 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
930 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
932 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
936 for pb
in pdbbits+helixbits:
940 number_of_residues=len(set(res_ind))
944 outhier+=simo.add_component_density(compname,
946 num_components=num_components,
948 inputfile=txtfilename)
949 if len(helixbits)!=0:
950 outhier+=simo.add_component_density(compname,
952 num_components=num_components,
954 inputfile=txtfilename)
962 num_components=number_of_residues/abs(num_components)
963 outhier+=simo.add_component_density(compname,
965 num_components=num_components,
967 outputfile=txtfilename,
968 outputmap=mrcfilename,
969 multiply_by_total_mass=
True)
971 if len(helixbits)!=0:
975 num_components=number_of_residues/abs(num_components)
976 outhier+=simo.add_component_density(compname,
978 num_components=num_components,
980 outputfile=txtfilename,
981 outputmap=mrcfilename,
982 multiply_by_total_mass=
True)
984 return outhier,beadbits
986 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
988 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
989 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
991 outhier=simo.autobuild_model(comname,
995 resolutions=[0,1,10],
998 missingbeadsize=beadsize)
1000 outhier=simo.autobuild_model(comname,
1007 missingbeadsize=beadsize)
1010 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1012 outhier=simo.add_component_ideal_helix(comname,
1018 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" and pdbname
is not "DENSITY" :
1019 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
1021 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is "DENSITY" :
1026 seq_len=len(simo.sequence_dict[comname])
1027 outhier=simo.add_component_necklace(comname,
1034 def set_coordinates(self,hier_name,xyz_tuple):
1035 hier=self.domain_dict[hier_name]
1043 def save_rmf(self,rmfname):
1046 o.init_rmf(rmfname,[self.simo.prot])
1047 o.write_rmf(rmfname)
1048 o.close_rmf(rmfname)
1053 """A macro for running all the basic operations of analysis.
1054 Includes clustering, precision analysis, and making ensemble density maps.
1055 A number of plots are also supported.
1058 merge_directories=[
"./"],
1059 stat_file_name_suffix=
"stat",
1060 best_pdb_name_suffix=
"model",
1061 do_clean_first=
True,
1062 do_create_directories=
True,
1063 global_output_directory=
"output/",
1064 replica_stat_file_suffix=
"stat_replica",
1065 global_analysis_result_directory=
"./analysis/"):
1067 @param model The IMP model
1068 @param stat_file_name_suffix
1069 @param merge_directories The directories containing output files
1070 @param best_pdb_name_suffix
1071 @param do_clean_first
1072 @param do_create_directories
1073 @param global_output_directory Where everything is
1074 @param replica_stat_file_suffix
1075 @param global_analysis_result_directory
1079 from mpi4py
import MPI
1080 self.comm = MPI.COMM_WORLD
1081 self.rank = self.comm.Get_rank()
1082 self.number_of_processes = self.comm.size
1085 self.number_of_processes = 1
1088 stat_dir = global_output_directory
1089 self.stat_files = []
1091 for rd
in merge_directories:
1092 stat_files = glob.glob(rd +
"/" + stat_dir +
"/stat.*.out")
1093 if len(stat_files)==0:
1094 print(
"WARNING: no stat files found in",rd +
"/" + stat_dir +
"/stat.*.out")
1095 self.stat_files += stat_files
1099 score_key=
"SimplifiedModel_Total_Score_None",
1100 rmf_file_key=
"rmf_file",
1101 rmf_file_frame_key=
"rmf_frame_index",
1104 nframes_trajectory=10000):
1105 """ Get a trajectory of the modeling run, for generating demonstrative movies
1106 @param score_key The score for ranking models
1107 @param rmf_file_key Key pointing to RMF filename
1108 @param rmf_file_frame_key Key pointing to RMF frame number
1109 @param outputdir The local output directory used in the run
1110 @param get_every Extract every nth frame
1111 @param nframes_trajectory Total number of frames of the trajectory
1113 from operator
import itemgetter
1121 rmf_file_list=trajectory_models[0]
1122 rmf_file_frame_list=trajectory_models[1]
1123 score_list=list(map(float, trajectory_models[2]))
1125 max_score=max(score_list)
1126 min_score=min(score_list)
1129 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1130 binned_scores=[
None]*nframes_trajectory
1131 binned_model_indexes=[-1]*nframes_trajectory
1133 for model_index,s
in enumerate(score_list):
1134 bins_score_diffs=[abs(s-b)
for b
in bins]
1135 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1136 if binned_scores[bin_index]==
None:
1137 binned_scores[bin_index]=s
1138 binned_model_indexes[bin_index]=model_index
1140 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1141 new_diff=abs(s-bins[bin_index])
1142 if new_diff < old_diff:
1143 binned_scores[bin_index]=s
1144 binned_model_indexes[bin_index]=model_index
1146 print(binned_scores)
1147 print(binned_model_indexes)
1152 score_key=
"SimplifiedModel_Total_Score_None",
1153 rmf_file_key=
"rmf_file",
1154 rmf_file_frame_key=
"rmf_frame_index",
1156 prefiltervalue=
None,
1159 alignment_components=
None,
1160 number_of_best_scoring_models=10,
1161 rmsd_calculation_components=
None,
1162 distance_matrix_file=
None,
1163 load_distance_matrix_file=
False,
1164 skip_clustering=
False,
1165 number_of_clusters=1,
1167 exit_after_display=
True,
1169 first_and_last_frames=
None,
1170 density_custom_ranges=
None,
1171 write_pdb_with_centered_coordinates=
False,
1173 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps
1174 @param score_key The score for ranking models
1175 @param rmf_file_key Key pointing to RMF filename
1176 @param rmf_file_frame_key Key pointing to RMF frame number
1177 @param state_number State number to analyse
1178 @param prefiltervalue Only include frames where the score key is below this value
1179 @param feature_keys Keywords for which you want to calculate average,
1181 @param outputdir The local output directory used in the run
1182 @param alignment_components List of tuples for aligning the structures
1183 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1184 @param number_of_best_scoring_models Num models to keep per run
1185 @param rmsd_calculation_components List of tuples for calculating RMSD
1186 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1187 @param distance_matrix_file Where to store/read the distance matrix
1188 @param load_distance_matrix_file Try to load the distance matrix file
1189 @param skip_clustering Just extract the best scoring models and save the pdbs
1190 @param number_of_clusters Number of k-means clusters
1191 @param display_plot Display the distance matrix
1192 @param exit_after_display Exit after displaying distance matrix
1193 @param get_every Extract every nth frame
1194 @param first_and_last_frames A tuple with the first and last frames to be
1195 analyzed. Values are percentages!
1196 Default: get all frames
1197 @param density_custom_ranges List of tuples or strings for density calculation
1198 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1199 @param write_pdb_with_centered_coordinates
1200 @param voxel_size Used for the density output
1211 if not load_distance_matrix_file:
1212 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1213 my_stat_files=IMP.pmi.tools.chunk_list_into_segments(
1214 self.stat_files,self.number_of_processes)[self.rank]
1222 rmf_file_list=best_models[0]
1223 rmf_file_frame_list=best_models[1]
1224 score_list=best_models[2]
1225 feature_keyword_list_dict=best_models[3]
1231 if self.number_of_processes > 1:
1235 rmf_file_frame_list)
1236 for k
in feature_keyword_list_dict:
1238 feature_keyword_list_dict[k])
1241 score_rmf_tuples = list(zip(score_list,
1243 rmf_file_frame_list,
1244 list(range(len(score_list)))))
1247 if first_and_last_frames
is not None:
1248 nframes = len(score_rmf_tuples)
1249 first_frame = int(first_and_last_frames[0] * nframes)
1250 last_frame = int(first_and_last_frames[1] * nframes)
1251 if last_frame > len(score_rmf_tuples):
1253 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1256 best_score_rmf_tuples = sorted(score_rmf_tuples,
1257 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1258 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1261 best_score_feature_keyword_list_dict = defaultdict(list)
1262 for tpl
in best_score_rmf_tuples:
1264 for f
in feature_keyword_list_dict:
1265 best_score_feature_keyword_list_dict[f].append(
1266 feature_keyword_list_dict[f][index])
1268 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1269 best_score_rmf_tuples,
1270 self.number_of_processes)[self.rank]
1277 my_best_score_rmf_tuples[0],
1278 rmsd_calculation_components,
1279 state_number=state_number)
1281 my_best_score_rmf_tuples,
1282 alignment_components,
1283 rmsd_calculation_components,
1284 state_number=state_number)
1289 all_coordinates=got_coords[0]
1290 alignment_coordinates=got_coords[1]
1291 rmsd_coordinates=got_coords[2]
1292 rmf_file_name_index_dict=got_coords[3]
1293 all_rmf_file_names=got_coords[4]
1299 if density_custom_ranges:
1301 density_custom_ranges,
1304 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1310 os.mkdir(dircluster)
1313 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1314 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1316 rmf_frame_number=tpl[2]
1319 for key
in best_score_feature_keyword_list_dict:
1320 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1323 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1328 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1338 prot=prots[state_number]
1341 coords_f1=alignment_coordinates[cnt]
1343 coords_f2=alignment_coordinates[cnt]
1345 transformation = Ali.align()[1]
1363 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1364 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1365 o.init_pdb(out_pdb_fn,prot)
1366 o.write_pdb(out_pdb_fn,
1367 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1369 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1370 tmp_dict[
"rmf_file_full_path"]=rmf_name
1371 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1372 tmp_dict[
"local_rmf_frame_number"]=0
1374 clusstat.write(str(tmp_dict)+
"\n")
1375 o.init_rmf(out_rmf_fn,[prot],rs)
1376 o.write_rmf(out_rmf_fn)
1377 o.close_rmf(out_rmf_fn)
1379 if density_custom_ranges:
1380 DensModule.add_subunits_density(prot)
1382 if density_custom_ranges:
1383 DensModule.write_mrc(path=dircluster)
1390 if self.number_of_processes > 1:
1396 rmf_file_name_index_dict)
1398 alignment_coordinates)
1405 [best_score_feature_keyword_list_dict,
1406 rmf_file_name_index_dict],
1412 print(
"setup clustering class")
1415 for n, model_coordinate_dict
in enumerate(all_coordinates):
1416 template_coordinate_dict = {}
1418 if alignment_components
is not None and len(Clusters.all_coords) == 0:
1420 Clusters.set_template(alignment_coordinates[n])
1421 Clusters.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1423 print(
"Global calculating the distance matrix")
1426 Clusters.dist_matrix()
1430 Clusters.do_cluster(number_of_clusters)
1433 Clusters.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1434 if exit_after_display:
1436 Clusters.save_distance_matrix_file(file_name=distance_matrix_file)
1443 print(
"setup clustering class")
1445 Clusters.load_distance_matrix_file(file_name=distance_matrix_file)
1446 print(
"clustering with %s clusters" % str(number_of_clusters))
1447 Clusters.do_cluster(number_of_clusters)
1448 [best_score_feature_keyword_list_dict,
1449 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1452 Clusters.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1453 if exit_after_display:
1455 if self.number_of_processes > 1:
1463 print(Clusters.get_cluster_labels())
1464 for n, cl
in enumerate(Clusters.get_cluster_labels()):
1465 print(
"rank %s " % str(self.rank))
1466 print(
"cluster %s " % str(n))
1467 print(
"cluster label %s " % str(cl))
1468 print(Clusters.get_cluster_label_names(cl))
1472 if density_custom_ranges:
1474 density_custom_ranges,
1477 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1479 os.mkdir(dircluster)
1483 rmsd_dict = {
"AVERAGE_RMSD":
1484 str(Clusters.get_cluster_label_average_rmsd(cl))}
1485 clusstat = open(dircluster +
"stat.out",
"w")
1486 for k, structure_name
in enumerate(Clusters.get_cluster_label_names(cl)):
1490 tmp_dict.update(rmsd_dict)
1491 index = rmf_file_name_index_dict[structure_name]
1492 for key
in best_score_feature_keyword_list_dict:
1494 key] = best_score_feature_keyword_list_dict[
1499 rmf_name = structure_name.split(
"|")[0]
1500 rmf_frame_number = int(structure_name.split(
"|")[1])
1502 clusstat.write(str(tmp_dict) +
"\n")
1505 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1510 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1520 prot=prots[state_number]
1523 model_index = Clusters.get_model_index_from_name(
1525 transformation = Clusters.get_transformation_to_first_member(
1545 if density_custom_ranges:
1546 DensModule.add_subunits_density(prot)
1550 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1551 o.write_pdb(dircluster + str(k) +
".pdb")
1553 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1554 o.write_rmf(dircluster + str(k) +
".rmf3")
1555 o.close_rmf(dircluster + str(k) +
".rmf3")
1560 if density_custom_ranges:
1561 DensModule.write_mrc(path=dircluster)
1564 if self.number_of_processes>1:
1567 def save_objects(self, objects, file_name):
1569 outf = open(file_name,
'wb')
1570 pickle.dump(objects, outf)
1573 def load_objects(self, file_name):
1575 inputf = open(file_name,
'rb')
1576 objects = pickle.load(inputf)
A macro for running all the basic operations of analysis.
Sample using molecular dynamics.
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
A class to cluster structures.
Representation of the system.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
Performs alignment and RMSD calculation for two sets of coordinates.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
Gather all the molecular particles of a certain level in the hierarchy.
A decorator for a particle with x,y,z coordinates.
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Class for easy writing of PDBs, RMFs, and stat files.
A macro to build a Representation based on a Topology and lists of movers.
Tools for clustering and cluster analysis.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Deprecated building macro - use BuildModel()
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
Hierarchies get_leaves(const Selection &h)
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
def get_representation
Return the Representation object.
A decorator for a particle with x,y,z coordinates and a radius.