1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
10 import IMP.pmi.io.input
15 from operator
import itemgetter
16 from collections
import defaultdict
19 class ReplicaExchange0(object):
20 """A macro to help setup and run replica exchange.
21 Supports monte carlo and molecular dynamics.
22 Produces trajectory RMF files, best PDB structures,
23 and output stat files.
24 @param model The IMP model
25 @param representation PMI.Representation() (or list of them, for multi-state modeling)
26 @param root_hier Instead of passing Representation, just pass a hierarchy
27 @param mote_carlo_sample_objcts Objects for MC sampling
28 @param molecular_dynamics_sample_objects Objects for MD sampling
29 @param output_objects Objects with get_output() for packing into stat files
30 @param crosslink_restraints Harmonic restraints to go in output RMF files
31 @param monte_carlo_temperature MC temp
32 @param replica_exchange_minimum_temperature Low temp for REX
33 @param replica_exchange_maximum_temperature High temp for REX
34 @param num_sample_rounds Number of rounds of MC/MD per cycle
35 @param number_of_best_scoring_models Number of top-scoring PDB models to keep around
36 @param monte_carlo_steps Number of MC steps per round
37 @param molecular_dynamics_steps Number of MD steps per round
38 @param number_of_frames Number of REX frames to run
39 @param nframes_write_coordinates How often to write the coordinates of a frame
40 @param write_initial_rmf Write the initial configuration
42 def __init__(self, model,
46 monte_carlo_sample_objects=
None,
47 molecular_dynamics_sample_objects=
None,
49 crosslink_restraints=
None,
50 monte_carlo_temperature=1.0,
51 simulated_annealing=
False,
52 simulated_annealing_minimum_temperature=1.0,
53 simulated_annealing_maximum_temperature=2.5,
54 simulated_annealing_minimum_temperature_nframes=100,
55 simulated_annealing_maximum_temperature_nframes=100,
56 replica_exchange_minimum_temperature=1.0,
57 replica_exchange_maximum_temperature=2.5,
59 number_of_best_scoring_models=500,
61 molecular_dynamics_steps=10,
62 number_of_frames=1000,
63 nframes_write_coordinates=1,
64 write_initial_rmf=
True,
65 initial_rmf_name_suffix=
"initial",
66 stat_file_name_suffix=
"stat",
67 best_pdb_name_suffix=
"model",
69 do_create_directories=
True,
70 global_output_directory=
"./",
73 replica_stat_file_suffix=
"stat_replica",
74 em_object_for_rmf=
None,
76 replica_exchange_object=
None):
83 if type(representation) == list:
84 self.is_multi_state =
True
85 self.root_hiers = [r.prot
for r
in representation]
86 self.vars[
"number_of_states"] = len(representation)
88 self.is_multi_state =
False
89 self.root_hier = representation.prot
90 self.vars[
"number_of_states"] = 1
93 self.vars[
"number_of_states"] = len(states)
95 self.root_hiers = states
96 self.is_multi_state =
True
98 self.root_hier = root_hier
99 self.is_multi_state =
False
101 print "ERROR: Must provide representation or root_hier"
104 self.crosslink_restraints = crosslink_restraints
105 self.em_object_for_rmf = em_object_for_rmf
106 self.monte_carlo_sample_objects = monte_carlo_sample_objects
107 if sample_objects
is not None:
108 self.monte_carlo_sample_objects+=sample_objects
109 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
110 self.output_objects = output_objects
111 self.replica_exchange_object = replica_exchange_object
112 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
114 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
116 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
118 self.vars[
"simulated_annealing"]=\
120 self.vars[
"simulated_annealing_minimum_temperature"]=\
121 simulated_annealing_minimum_temperature
122 self.vars[
"simulated_annealing_maximum_temperature"]=\
123 simulated_annealing_maximum_temperature
124 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
125 simulated_annealing_minimum_temperature_nframes
126 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
127 simulated_annealing_maximum_temperature_nframes
129 self.vars[
"num_sample_rounds"] = num_sample_rounds
131 "number_of_best_scoring_models"] = number_of_best_scoring_models
132 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
133 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
134 self.vars[
"number_of_frames"] = number_of_frames
135 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
136 self.vars[
"write_initial_rmf"] = write_initial_rmf
137 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
138 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
139 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
140 self.vars[
"do_clean_first"] = do_clean_first
141 self.vars[
"do_create_directories"] = do_create_directories
142 self.vars[
"global_output_directory"] = global_output_directory
143 self.vars[
"rmf_dir"] = rmf_dir
144 self.vars[
"best_pdb_dir"] = best_pdb_dir
145 self.vars[
"atomistic"] = atomistic
146 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
149 print "ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica "
150 print "--- it stores the best scoring pdb models in pdbs/"
151 print "--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature"
152 print "--- variables:"
153 keys = self.vars.keys()
156 print "------", v.ljust(30), self.vars[v]
158 def get_replica_exchange_object(self):
159 return self.replica_exchange_object
161 def execute_macro(self):
163 temp_index_factor = 100000.0
167 if self.monte_carlo_sample_objects
is not None:
168 print "Setting up MonteCarlo"
169 sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
170 self.monte_carlo_sample_objects,
171 self.vars[
"monte_carlo_temperature"])
172 if self.vars[
"simulated_annealing"]:
173 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
174 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
175 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
176 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
177 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
178 self.output_objects.append(sampler_mc)
179 samplers.append(sampler_mc)
182 if self.molecular_dynamics_sample_objects
is not None:
183 print "Setting up MolecularDynamics"
184 sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
185 self.molecular_dynamics_sample_objects,
186 self.vars[
"monte_carlo_temperature"])
187 if self.vars[
"simulated_annealing"]:
188 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
189 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
190 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
191 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
192 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
193 self.output_objects.append(sampler_md)
194 samplers.append(sampler_md)
197 print "Setting up ReplicaExchange"
198 rex = IMP.pmi.samplers.ReplicaExchange(self.model,
200 "replica_exchange_minimum_temperature"],
202 "replica_exchange_maximum_temperature"],
204 replica_exchange_object=self.replica_exchange_object)
205 self.replica_exchange_object = rex.rem
207 myindex = rex.get_my_index()
208 self.output_objects.append(rex)
213 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
217 globaldir = self.vars[
"global_output_directory"] +
"/"
218 rmf_dir = globaldir + self.vars[
"rmf_dir"]
219 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
221 if self.vars[
"do_clean_first"]:
224 if self.vars[
"do_create_directories"]:
227 os.makedirs(globaldir)
235 if not self.is_multi_state:
241 for n
in range(self.vars[
"number_of_states"]):
243 os.makedirs(pdb_dir +
"/" + str(n))
249 sw = IMP.pmi.tools.Stopwatch()
250 self.output_objects.append(sw)
252 print "Setting up stat file"
254 low_temp_stat_file = globaldir + \
255 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
256 output.init_stat2(low_temp_stat_file,
258 extralabels=[
"rmf_file",
"rmf_frame_index"])
260 print "Setting up replica stat file"
261 replica_stat_file = globaldir + \
262 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
263 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
265 print "Setting up best pdb files"
266 if not self.is_multi_state:
267 if self.vars[
"number_of_best_scoring_models"] > 0:
268 output.init_pdb_best_scoring(pdb_dir +
"/" +
269 self.vars[
"best_pdb_name_suffix"],
272 "number_of_best_scoring_models"],
273 replica_exchange=
True)
275 if self.vars[
"number_of_best_scoring_models"] > 0:
276 for n
in range(self.vars[
"number_of_states"]):
277 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
278 self.vars[
"best_pdb_name_suffix"],
281 "number_of_best_scoring_models"],
282 replica_exchange=
True)
286 if not self.em_object_for_rmf
is None:
287 if not self.is_multi_state:
288 output_hierarchies = [
290 self.em_object_for_rmf.get_density_as_hierarchy(
293 output_hierarchies = self.root_hiers
294 output_hierarchies.append(
295 self.em_object_for_rmf.get_density_as_hierarchy())
297 if not self.is_multi_state:
298 output_hierarchies = [self.root_hier]
300 output_hierarchies = self.root_hiers
303 print "Setting up and writing initial rmf coordinate file"
304 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
305 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
307 if self.crosslink_restraints:
308 output.add_restraints_to_rmf(
309 init_suffix +
"." + str(myindex) +
".rmf3",
310 self.crosslink_restraints)
311 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
312 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
316 print "Setting up production rmf files"
318 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
319 output.init_rmf(rmfname, output_hierarchies)
321 if self.crosslink_restraints:
322 output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
324 ntimes_at_low_temp = 0
329 for i
in range(self.vars[
"number_of_frames"]):
330 for nr
in range(self.vars[
"num_sample_rounds"]):
331 if sampler_mc
is not None:
332 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
333 if sampler_md
is not None:
334 sampler_md.optimize(self.vars[
"molecular_dynamics_steps"])
335 score = self.model.evaluate(
False)
336 output.set_output_entry(
"score", score)
338 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
340 if min_temp_index == my_temp_index:
341 print "--- frame %s score %s " % (str(i), str(score))
343 if i % self.vars[
"nframes_write_coordinates"]==0:
344 print '--- writing coordinates'
345 if self.vars[
"number_of_best_scoring_models"] > 0:
346 output.write_pdb_best_scoring(score)
347 output.write_rmf(rmfname)
348 output.set_output_entry(
"rmf_file", rmfname)
349 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
351 output.set_output_entry(
"rmf_file", rmfname)
352 output.set_output_entry(
"rmf_frame_index",
'-1')
353 output.write_stat2(low_temp_stat_file)
354 ntimes_at_low_temp += 1
356 output.write_stat2(replica_stat_file)
357 rex.swap_temp(i, score)
368 missing_bead_size=20,
369 residue_per_gaussian=
None):
371 The macro construct a component for each subunit (no splitting, nothing fancy)
372 You can pass the resolutions and the bead size for the missing residue regions.
373 To use this macro, you must provide the following data structure:
375 Component pdbfile chainid rgb color fastafile sequence id
378 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
379 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
380 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
381 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
382 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
383 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
384 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
385 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
386 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
387 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
388 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
389 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
393 r = IMP.pmi.representation.Representation(m)
400 component_name = d[0]
406 fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
407 fasta_file_id = d[4][1]
409 r.create_component(component_name,
412 r.add_component_sequence(component_name,
414 id=fastids[fasta_file_id])
416 hierarchies = r.autobuild_model(component_name,
419 resolutions=resolutions,
420 missingbeadsize=missing_bead_size)
422 r.show_component_table(component_name)
424 r.set_rigid_bodies([component_name])
426 r.set_chain_of_super_rigid_bodies(
431 r.setup_component_sequence_connectivity(component_name, resolution=1)
432 r.setup_component_geometry(component_name)
436 r.set_floppy_bodies()
439 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
447 """A macro to build a Representation based on a Topology and lists of movers
448 @param model The IMP model
449 @param component_topologies List of IMP.pmi.topology.ComponentTopology items
450 @param list_of_rigid_bodies List of lists of domain names from the components.
451 @param list_of_super_rigid_bodies List of lists of domain names from the components.
452 @param chain_of_super_rigid_bodies List of lists of domain names from the components
453 Choices can only be from the same molecule
454 @param sequence_connectivity_scale For scaling the connectivity restraint
455 @param add_each_domain_as_rigid_body That way you don't have to put all of them in the list
456 @param force_create_gmm_files If True, will sample and create GMMs no matter what.
457 If False, will only only sample if the files don't exist.
458 If number of Gaussians is zero, won't do anything.
462 component_topologies,
463 list_of_rigid_bodies=[],
464 list_of_super_rigid_bodies=[],
465 chain_of_super_rigid_bodies=[],
466 sequence_connectivity_scale=4.0,
467 add_each_domain_as_rigid_body=
False,
468 force_create_gmm_files=
False):
470 self.simo = IMP.pmi.representation.Representation(self.m,
472 disorderedlength=
False)
474 data=component_topologies
475 if list_of_rigid_bodies==[]:
476 print "WARNING: No list of rigid bodies inputted to build_model()"
477 if list_of_super_rigid_bodies==[]:
478 print "WARNING: No list of super rigid bodies inputted to build_model()"
479 if chain_of_super_rigid_bodies==[]:
480 print "WARNING: No chain of super rigid bodies inputted to build_model()"
481 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
482 +chain_of_super_rigid_bodies
for d
in sublist])
483 all_available = set([c.domain_name
for c
in component_topologies])
484 if not all_dnames <= all_available:
485 raise ValueError(
"All requested movers must reference domain "
486 "names in the component topologies")
490 super_rigid_bodies={}
491 chain_super_rigid_bodies={}
496 hier_name = c.domain_name
498 fasta_file = c.fasta_file
499 fasta_id = c.fasta_id
500 pdb_name = c.pdb_file
502 res_range = c.residue_range
503 offset = c.pdb_offset
504 bead_size = c.bead_size
505 em_num_components = c.em_residues_per_gaussian
506 em_txt_file_name = c.gmm_file
507 em_mrc_file_name = c.mrc_file
509 if comp_name
not in self.simo.get_component_names():
510 self.simo.create_component(comp_name,color=0.0)
511 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
514 if em_num_components==0:
518 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
525 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
526 res_range,include_res0,beadsize=bead_size,
527 color=color,offset=offset)
528 if em_num_components!=0:
530 print "will read GMM files"
532 print "will calculate GMMs"
534 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
535 em_mrc_file_name,em_num_components,read_em_files)
536 self.simo.add_all_atom_densities(comp_name, hierarchies=beads)
541 self.resdensities[hier_name]=dens_hier
542 self.domain_dict[hier_name]=outhier+dens_hier
545 for c
in self.simo.get_component_names():
546 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
547 self.simo.setup_component_geometry(c)
550 for rblist
in list_of_rigid_bodies:
552 for rbmember
in rblist:
553 rb+=[h
for h
in self.domain_dict[rbmember]]
554 self.simo.set_rigid_body_from_hierarchies(rb)
555 for srblist
in list_of_super_rigid_bodies:
557 for srbmember
in rblist:
558 srb+=[h
for h
in self.domain_dict[srbmember]]
559 self.simo.set_super_rigid_body_from_hierarchies(srb)
560 for clist
in chain_of_super_rigid_bodies:
562 for crbmember
in rblist:
563 crb+=[h
for h
in self.domain_dict[crbmember]]
564 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
566 self.simo.set_floppy_bodies()
567 self.simo.setup_bonds()
570 '''Return the Representation object'''
573 def get_density_hierarchies(self,hier_name_list):
577 for hn
in hier_name_list:
579 dens_hier_list+=self.resdensities[hn]
580 return dens_hier_list
582 def set_gmm_models_directory(self,directory_name):
583 self.gmm_models_directory=directory_name
585 def get_pdb_bead_bits(self,hierarchy):
590 if "_pdb" in h.get_name():pdbbits.append(h)
591 if "_bead" in h.get_name():beadbits.append(h)
592 if "_helix" in h.get_name():helixbits.append(h)
593 return (pdbbits,beadbits,helixbits)
595 def scale_bead_radii(self,nresidues,scale):
597 for h
in self.domain_dict:
598 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
599 slope=(1.0-scale)/(1.0-float(nresidues))
604 if b
not in scaled_beads:
610 scale_factor=slope*float(num_residues)+1.0
612 new_radius=scale_factor*radius
615 print "particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius)
618 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
620 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
623 for pb
in pdbbits+helixbits:
627 number_of_residues=len(set(res_ind))
631 outhier+=simo.add_component_density(compname,
633 num_components=num_components,
635 inputfile=txtfilename)
636 if len(helixbits)!=0:
637 outhier+=simo.add_component_density(compname,
639 num_components=num_components,
641 inputfile=txtfilename)
646 num_components=number_of_residues/abs(num_components)
647 outhier+=simo.add_component_density(compname,
649 num_components=num_components,
651 outputfile=txtfilename,
652 outputmap=mrcfilename,
653 multiply_by_total_mass=
True)
655 if len(helixbits)!=0:
656 num_components=number_of_residues/abs(num_components)
657 outhier+=simo.add_component_density(compname,
659 num_components=num_components,
661 outputfile=txtfilename,
662 outputmap=mrcfilename,
663 multiply_by_total_mass=
True)
665 return outhier,beadbits
667 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
668 beadsize=5,color=0.0,offset=0):
669 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
671 resrange=(resrange[0],len(simo.sequence_dict[comname]))
673 outhier=simo.autobuild_model(comname,
677 resolutions=[0,1,10],
680 missingbeadsize=beadsize)
682 outhier=simo.autobuild_model(comname,
689 missingbeadsize=beadsize)
692 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
693 outhier=simo.add_component_ideal_helix(comname,
699 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
700 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
704 seq_len=len(simo.sequence_dict[comname])
705 outhier=simo.add_component_necklace(comname,
714 """Deprecated building macro - use BuildModel()
715 @param representation The PMI representation"""
717 def __init__(self, representation):
718 self.simo=representation
719 self.gmm_models_directory=
"."
721 def set_gmm_models_directory(self,directory_name):
722 self.gmm_models_directory=directory_name
724 def build_model(self,data_structure,sequence_connectivity_scale=4.0):
726 @param data_structure List of lists containing these entries:
727 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
728 res_range, read_em_files, bead_size, rb, super_rb,
729 em_num_components, em_txt_file_name, em_mrc_file_name
730 @param sequence_connectivity_scale
734 super_rigid_bodies={}
735 chain_super_rigid_bodies={}
738 for d
in data_structure:
746 res_range = d[7][0:2]
755 em_num_components = d[12]
756 em_txt_file_name = d[13]
757 em_mrc_file_name = d[14]
758 chain_of_super_rb = d[15]
760 if comp_name
not in self.simo.get_component_names():
761 self.simo.create_component(comp_name,color=0.0)
762 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
763 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
766 if not read_em_files
is None:
767 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
768 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
771 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
772 self.simo.add_all_atom_densities(comp_name, hierarchies=beads)
778 self.resdensities[hier_name]=dens_hier
779 self.domain_dict[hier_name]=outhier+dens_hier
782 if rb
not in rigid_bodies:
783 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
785 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
788 if super_rb
is not None:
790 if k
not in super_rigid_bodies:
791 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
793 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
795 if chain_of_super_rb
is not None:
796 for k
in chain_of_super_rb:
797 if k
not in chain_super_rigid_bodies:
798 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
800 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
802 self.rigid_bodies=rigid_bodies
805 for c
in self.simo.get_component_names():
806 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
807 self.simo.setup_component_geometry(c)
810 for rb
in rigid_bodies:
811 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
813 for k
in super_rigid_bodies:
814 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
816 for k
in chain_super_rigid_bodies:
817 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
819 self.simo.set_floppy_bodies()
820 self.simo.setup_bonds()
822 def get_density_hierarchies(self,hier_name_list):
826 for hn
in hier_name_list:
828 dens_hier_list+=self.resdensities[hn]
829 return dens_hier_list
831 def get_pdb_bead_bits(self,hierarchy):
836 if "_pdb" in h.get_name():pdbbits.append(h)
837 if "_bead" in h.get_name():beadbits.append(h)
838 if "_helix" in h.get_name():helixbits.append(h)
839 return (pdbbits,beadbits,helixbits)
841 def scale_bead_radii(self,nresidues,scale):
843 for h
in self.domain_dict:
844 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
845 slope=(1.0-scale)/(1.0-float(nresidues))
850 if b
not in scaled_beads:
856 scale_factor=slope*float(num_residues)+1.0
858 new_radius=scale_factor*radius
861 print "particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius)
864 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
866 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
870 for pb
in pdbbits+helixbits:
874 number_of_residues=len(set(res_ind))
878 outhier+=simo.add_component_density(compname,
880 num_components=num_components,
882 inputfile=txtfilename)
883 if len(helixbits)!=0:
884 outhier+=simo.add_component_density(compname,
886 num_components=num_components,
888 inputfile=txtfilename)
896 num_components=number_of_residues/abs(num_components)
897 outhier+=simo.add_component_density(compname,
899 num_components=num_components,
901 outputfile=txtfilename,
902 outputmap=mrcfilename,
903 multiply_by_total_mass=
True)
905 if len(helixbits)!=0:
909 num_components=number_of_residues/abs(num_components)
910 outhier+=simo.add_component_density(compname,
912 num_components=num_components,
914 outputfile=txtfilename,
915 outputmap=mrcfilename,
916 multiply_by_total_mass=
True)
918 return outhier,beadbits
920 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
922 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
923 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
925 outhier=simo.autobuild_model(comname,
929 resolutions=[0,1,10],
932 missingbeadsize=beadsize)
934 outhier=simo.autobuild_model(comname,
941 missingbeadsize=beadsize)
944 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
946 outhier=simo.add_component_ideal_helix(comname,
952 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
953 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
957 seq_len=len(simo.sequence_dict[comname])
958 outhier=simo.add_component_necklace(comname,
970 """A macro for running all the basic operations of analysis.
971 Includes clustering, precision analysis, and making ensemble density maps.
972 A number of plots are also supported.
973 @param model The IMP model
974 @param stat_file_name_suffix
975 @param merge_directories The directories containing output files
976 @param best_pdb_name_suffix
977 @param do_clean_first
978 @param do_create_directories
979 @param global_output_directory Where everything is
980 @param replica_stat_file_suffix
981 @param global_analysis_result_directory
983 def __init__(self, model,
984 merge_directories=[
"./"],
985 stat_file_name_suffix=
"stat",
986 best_pdb_name_suffix=
"model",
988 do_create_directories=
True,
989 global_output_directory=
"output/",
990 replica_stat_file_suffix=
"stat_replica",
991 global_analysis_result_directory=
"./analysis/"):
995 from mpi4py
import MPI
996 self.comm = MPI.COMM_WORLD
997 self.rank = self.comm.Get_rank()
998 self.number_of_processes = self.comm.size
1001 self.number_of_processes = 1
1004 stat_dir = global_output_directory
1005 self.stat_files = []
1007 for rd
in merge_directories:
1008 stat_files = glob.glob(rd +
"/" + stat_dir +
"/stat.*.out")
1009 if len(stat_files)==0:
1010 print "WARNING: no stat files found in",rd +
"/" + stat_dir +
"/stat.*.out"
1011 self.stat_files += stat_files
1015 score_key=
"SimplifiedModel_Total_Score_None",
1016 rmf_file_key=
"rmf_file",
1017 rmf_file_frame_key=
"rmf_frame_index",
1020 nframes_trajectory=10000):
1021 """ Get a trajectory of the modeling run, for generating demonstrative movies
1022 @param score_key The score for ranking models
1023 @param rmf_file_key Key pointing to RMF filename
1024 @param rmf_file_frame_key Key pointing to RMF frame number
1025 @param outputdir The local output directory used in the run
1026 @param get_every Extract every nth frame
1027 @param nframes_trajectory Total number of frames of the trajectory
1029 from operator
import itemgetter
1032 trajectory_models = IMP.pmi.io.get_trajectory_models(self.stat_files,
1037 rmf_file_list=trajectory_models[0]
1038 rmf_file_frame_list=trajectory_models[1]
1039 score_list=map(float, trajectory_models[2])
1041 max_score=max(score_list)
1042 min_score=min(score_list)
1045 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1046 binned_scores=[
None]*nframes_trajectory
1047 binned_model_indexes=[-1]*nframes_trajectory
1049 for model_index,s
in enumerate(score_list):
1050 bins_score_diffs=[abs(s-b)
for b
in bins]
1051 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1052 if binned_scores[bin_index]==
None:
1053 binned_scores[bin_index]=s
1054 binned_model_indexes[bin_index]=model_index
1056 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1057 new_diff=abs(s-bins[bin_index])
1058 if new_diff < old_diff:
1059 binned_scores[bin_index]=s
1060 binned_model_indexes[bin_index]=model_index
1063 print binned_model_indexes
1068 score_key=
"SimplifiedModel_Total_Score_None",
1069 rmf_file_key=
"rmf_file",
1070 rmf_file_frame_key=
"rmf_frame_index",
1071 prefiltervalue=
None,
1074 alignment_components=
None,
1075 number_of_best_scoring_models=10,
1076 rmsd_calculation_components=
None,
1077 distance_matrix_file=
None,
1078 load_distance_matrix_file=
False,
1079 skip_clustering=
False,
1080 number_of_clusters=1,
1082 exit_after_display=
True,
1084 first_and_last_frames=
None,
1085 density_custom_ranges=
None,
1086 write_pdb_with_centered_coordinates=
False,
1088 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps
1089 @param score_key The score for ranking models
1090 @param rmf_file_key Key pointing to RMF filename
1091 @param rmf_file_frame_key Key pointing to RMF frame number
1092 @param prefiltervalue Only include frames where the score key is below this value
1093 @param feature_keys Keywords for which you want to calculate average,
1095 @param outputdir The local output directory used in the run
1096 @param alignment_components List of tuples for aligning the structures
1097 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1098 @param number_of_best_scoring_models Num models to keep per run
1099 @param rmsd_calculation_components List of tuples for calculating RMSD
1100 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1101 @param distance_matrix_file Where to store/read the distance matrix
1102 @param load_distance_matrix_file Try to load the distance matrix file
1103 @param skip_clustering Just extract the best scoring models and save the pdbs
1104 @param number_of_clusters Number of k-means clusters
1105 @param display_plot Display the distance matrix
1106 @param exit_after_display Exit after displaying distance matrix
1107 @param get_every Extract every nth frame
1108 @param first_and_last_frames A tuple with the first and last frames to be
1109 analyzed. Values are percentages!
1110 Default: get all frames
1111 @param density_custom_ranges List of tuples or strings for density calculation
1112 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1113 @param write_pdb_with_centered_coordinates
1114 @param voxel_size Used for the density output
1125 if not load_distance_matrix_file:
1126 if len(self.stat_files)==0:
print "ERROR: no stat file found in the given path";
return
1127 my_stat_files=IMP.pmi.tools.chunk_list_into_segments(
1128 self.stat_files,self.number_of_processes)[self.rank]
1129 best_models = IMP.pmi.io.get_best_models(my_stat_files,
1136 rmf_file_list=best_models[0]
1137 rmf_file_frame_list=best_models[1]
1138 score_list=best_models[2]
1139 feature_keyword_list_dict=best_models[3]
1145 if self.number_of_processes > 1:
1149 rmf_file_frame_list)
1150 for k
in feature_keyword_list_dict:
1152 feature_keyword_list_dict[k])
1155 score_rmf_tuples = zip(score_list,
1157 rmf_file_frame_list,
1158 range(len(score_list)))
1161 if first_and_last_frames
is not None:
1162 nframes = len(score_rmf_tuples)
1163 first_frame = int(first_and_last_frames[0] * nframes)
1164 last_frame = int(first_and_last_frames[1] * nframes)
1165 if last_frame > len(score_rmf_tuples):
1167 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1170 best_score_rmf_tuples = sorted(score_rmf_tuples,
1171 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1172 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1175 best_score_feature_keyword_list_dict = defaultdict(list)
1176 for tpl
in best_score_rmf_tuples:
1178 for f
in feature_keyword_list_dict:
1179 best_score_feature_keyword_list_dict[f].append(
1180 feature_keyword_list_dict[f][index])
1182 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1183 best_score_rmf_tuples,
1184 self.number_of_processes)[self.rank]
1190 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1196 os.mkdir(dircluster)
1199 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1200 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1202 rmf_frame_number=tpl[2]
1205 for key
in best_score_feature_keyword_list_dict:
1206 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1208 prot=IMP.pmi.analysis.get_hier_from_rmf(self.model,rmf_frame_number,rmf_name)
1213 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1214 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf")
1215 o.init_pdb(out_pdb_fn,prot)
1216 o.write_pdb(out_pdb_fn,
1217 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1219 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1220 tmp_dict[
"rmf_file_full_path"]=rmf_name
1221 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1222 tmp_dict[
"local_rmf_frame_number"]=0
1224 clusstat.write(str(tmp_dict)+
"\n")
1225 o.init_rmf(out_rmf_fn,[prot])
1226 o.write_rmf(out_rmf_fn)
1227 o.close_rmf(out_rmf_fn)
1234 rmsd_weights = IMP.pmi.io.get_bead_sizes(self.model,
1235 my_best_score_rmf_tuples[0],
1236 rmsd_calculation_components)
1237 got_coords = IMP.pmi.io.read_coordinates_of_rmfs(self.model,
1238 my_best_score_rmf_tuples,
1239 alignment_components,
1240 rmsd_calculation_components)
1245 all_coordinates=got_coords[0]
1246 alignment_coordinates=got_coords[1]
1247 rmsd_coordinates=got_coords[2]
1248 rmf_file_name_index_dict=got_coords[3]
1249 all_rmf_file_names=got_coords[4]
1253 if self.number_of_processes > 1:
1259 rmf_file_name_index_dict)
1261 alignment_coordinates)
1268 [best_score_feature_keyword_list_dict,
1269 rmf_file_name_index_dict],
1275 print "setup clustering class"
1278 for n, model_coordinate_dict
in enumerate(all_coordinates):
1279 template_coordinate_dict = {}
1281 if alignment_components
is not None and len(Clusters.all_coords) == 0:
1283 Clusters.set_template(alignment_coordinates[n])
1284 Clusters.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1286 print "Global calculating the distance matrix"
1289 Clusters.dist_matrix()
1293 Clusters.do_cluster(number_of_clusters)
1296 Clusters.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1297 if exit_after_display:
1299 Clusters.save_distance_matrix_file(file_name=distance_matrix_file)
1306 print "setup clustering class"
1308 Clusters.load_distance_matrix_file(file_name=distance_matrix_file)
1309 print "clustering with %s clusters" % str(number_of_clusters)
1310 Clusters.do_cluster(number_of_clusters)
1311 [best_score_feature_keyword_list_dict,
1312 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1315 Clusters.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1316 if exit_after_display:
1318 if self.number_of_processes > 1:
1326 print Clusters.get_cluster_labels()
1327 for n, cl
in enumerate(Clusters.get_cluster_labels()):
1328 print "rank %s " % str(self.rank)
1329 print "cluster %s " % str(n)
1330 print "cluster label %s " % str(cl)
1331 print Clusters.get_cluster_label_names(cl)
1335 if density_custom_ranges:
1337 density_custom_ranges,
1340 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1342 os.mkdir(dircluster)
1346 rmsd_dict = {
"AVERAGE_RMSD":
1347 str(Clusters.get_cluster_label_average_rmsd(cl))}
1348 clusstat = open(dircluster +
"stat.out",
"w")
1349 for k, structure_name
in enumerate(Clusters.get_cluster_label_names(cl)):
1353 tmp_dict.update(rmsd_dict)
1354 index = rmf_file_name_index_dict[structure_name]
1355 for key
in best_score_feature_keyword_list_dict:
1357 key] = best_score_feature_keyword_list_dict[
1362 rmf_name = structure_name.split(
"|")[0]
1363 rmf_frame_number = int(structure_name.split(
"|")[1])
1365 clusstat.write(str(tmp_dict) +
"\n")
1366 prot,rs = IMP.pmi.analysis.get_hier_and_restraints_from_rmf(
1374 model_index = Clusters.get_model_index_from_name(
1376 transformation = Clusters.get_transformation_to_first_member(
1397 if density_custom_ranges:
1398 DensModule.add_subunits_density(prot)
1402 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1403 o.write_pdb(dircluster + str(k) +
".pdb")
1405 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1407 o.write_rmf(dircluster + str(k) +
".rmf3")
1408 o.close_rmf(dircluster + str(k) +
".rmf3")
1413 if density_custom_ranges:
1414 DensModule.write_mrc(path=dircluster)
1417 if self.number_of_processes>1:
1420 def save_objects(self, objects, file_name):
1422 outf = open(file_name,
'w')
1423 pickle.dump(objects, outf)
1426 def load_objects(self, file_name):
1428 inputf = open(file_name,
'r')
1429 objects = pickle.load(inputf)
A macro for running all the basic operations of analysis.
A member of a rigid body, it has internal (local) coordinates.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
A class to cluster structures.
Representation of the system.
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static XYZR setup_particle(kernel::Model *m, ParticleIndex pi)
def build_model
Create model.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
A decorator for a particle with x,y,z coordinates.
def BuildModel0
The macro construct a component for each subunit (no splitting, nothing fancy) You can pass the resol...
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
Class for easy writing of PDBs, RMFs, and stat files.
A macro to build a Representation based on a Topology and lists of movers.
Tools for clustering and cluster analysis.
Classes for writing output files and processing them.
Deprecated building macro - use BuildModel()
Hierarchies get_leaves(const Selection &h)
A class to compute mean density maps from structures Keeps a dictionary of density maps...
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_representation
Return the Representation object.
A decorator for a particle with x,y,z coordinates and a radius.