1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
10 import IMP.pmi.io.input
15 from operator
import itemgetter
16 from collections
import defaultdict
20 """ A macro to help setup and run replica exchange,
21 supporting monte carlo and molecular dynamics.
22 Produces trajectory RMF files, best PDB structures,
26 def __init__(self, model,
30 monte_carlo_sample_objects=
None,
31 molecular_dynamics_sample_objects=
None,
33 crosslink_restraints=
None,
34 monte_carlo_temperature=1.0,
35 replica_exchange_minimum_temperature=1.0,
36 replica_exchange_maximum_temperature=2.5,
38 number_of_best_scoring_models=500,
40 molecular_dynamics_steps=10,
41 number_of_frames=1000,
42 nframes_write_coordinates=1,
43 write_initial_rmf=
True,
44 initial_rmf_name_suffix=
"initial",
45 stat_file_name_suffix=
"stat",
46 best_pdb_name_suffix=
"model",
48 do_create_directories=
True,
49 global_output_directory=
"./",
52 replica_stat_file_suffix=
"stat_replica",
53 em_object_for_rmf=
None,
55 replica_exchange_object=
None):
57 Setup replica exchange.
58 @param model The IMP model
59 @param representation PMI.Representation() (or list of them, for multi-state modeling)
60 @param root_hier Instead of passing Representation, just pass a hierarchy
61 @param mote_carlo_sample_objcts Objects for MC sampling
62 @param molecular_dynamics_sample_objects Objects for MD sampling
63 @param output_objects Objects with get_output() for packing into stat files
64 @param crosslink_restraints Harmonic restraints to go in output RMF files
65 @param monte_carlo_temperature MC temp
66 @param replica_exchange_minimum_temperature Low temp for REX
67 @param replica_exchange_maximum_temperature High temp for REX
68 @param num_sample_rounds Number of rounds of MC/MD per cycle
69 @param number_of_best_scoring_models Number of top-scoring PDB models to keep around
70 @param monte_carlo_steps Number of MC steps per round
71 @param molecular_dynamics_steps Number of MD steps per round
72 @param number_of_frames Number of REX frames to run
73 @param nframes_write_coordinates How often to write the coordinates of a frame
74 @param write_initial_rmf Write the initial configuration
82 if type(representation) == list:
83 self.is_multi_state =
True
84 self.root_hiers = [r.prot
for r
in representation]
85 self.vars[
"number_of_states"] = len(representation)
87 self.is_multi_state =
False
88 self.root_hier = representation.prot
89 self.vars[
"number_of_states"] = 1
92 self.vars[
"number_of_states"] = len(states)
94 self.root_hiers = states
95 self.is_multi_state =
True
97 self.root_hier = root_hier
98 self.is_multi_state =
False
100 print "ERROR: Must provide representation or root_hier"
103 self.crosslink_restraints = crosslink_restraints
104 self.em_object_for_rmf = em_object_for_rmf
105 self.monte_carlo_sample_objects = monte_carlo_sample_objects
106 if sample_objects
is not None:
107 self.monte_carlo_sample_objects+=sample_objects
108 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
109 self.output_objects = output_objects
110 self.replica_exchange_object = replica_exchange_object
111 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
113 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
115 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
116 self.vars[
"num_sample_rounds"] = num_sample_rounds
118 "number_of_best_scoring_models"] = number_of_best_scoring_models
119 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
120 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
121 self.vars[
"number_of_frames"] = number_of_frames
122 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
123 self.vars[
"write_initial_rmf"] = write_initial_rmf
124 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
125 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
126 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
127 self.vars[
"do_clean_first"] = do_clean_first
128 self.vars[
"do_create_directories"] = do_create_directories
129 self.vars[
"global_output_directory"] = global_output_directory
130 self.vars[
"rmf_dir"] = rmf_dir
131 self.vars[
"best_pdb_dir"] = best_pdb_dir
132 self.vars[
"atomistic"] = atomistic
133 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
136 print "ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica "
137 print "--- it stores the best scoring pdb models in pdbs/"
138 print "--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature"
139 print "--- variables:"
140 keys = self.vars.keys()
143 print "------", v.ljust(30), self.vars[v]
145 def get_replica_exchange_object(self):
146 return self.replica_exchange_object
148 def execute_macro(self):
150 temp_index_factor = 100000.0
154 if self.monte_carlo_sample_objects
is not None:
155 print "Setting up MonteCarlo"
156 sampler_mc = IMP.pmi.samplers.MonteCarlo(self.model,
157 self.monte_carlo_sample_objects,
158 self.vars[
"monte_carlo_temperature"])
159 self.output_objects.append(sampler_mc)
160 samplers.append(sampler_mc)
161 if self.molecular_dynamics_sample_objects
is not None:
162 print "Setting up MolecularDynamics"
163 sampler_md = IMP.pmi.samplers.MolecularDynamics(self.model,
164 self.molecular_dynamics_sample_objects,
165 self.vars[
"monte_carlo_temperature"])
166 self.output_objects.append(sampler_md)
167 samplers.append(sampler_md)
170 print "Setting up ReplicaExchange"
171 rex = IMP.pmi.samplers.ReplicaExchange(self.model,
173 "replica_exchange_minimum_temperature"],
175 "replica_exchange_maximum_temperature"],
177 replica_exchange_object=self.replica_exchange_object)
178 self.replica_exchange_object = rex.rem
180 myindex = rex.get_my_index()
181 self.output_objects.append(rex)
186 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
190 globaldir = self.vars[
"global_output_directory"] +
"/"
191 rmf_dir = globaldir + self.vars[
"rmf_dir"]
192 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
194 if self.vars[
"do_clean_first"]:
197 if self.vars[
"do_create_directories"]:
200 os.makedirs(globaldir)
208 if not self.is_multi_state:
214 for n
in range(self.vars[
"number_of_states"]):
216 os.makedirs(pdb_dir +
"/" + str(n))
222 sw = IMP.pmi.tools.Stopwatch()
223 self.output_objects.append(sw)
225 print "Setting up stat file"
227 low_temp_stat_file = globaldir + \
228 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
229 output.init_stat2(low_temp_stat_file,
231 extralabels=[
"rmf_file",
"rmf_frame_index"])
233 print "Setting up replica stat file"
234 replica_stat_file = globaldir + \
235 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
236 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
238 print "Setting up best pdb files"
239 if not self.is_multi_state:
240 output.init_pdb_best_scoring(pdb_dir +
"/" +
241 self.vars[
"best_pdb_name_suffix"],
244 "number_of_best_scoring_models"],
245 replica_exchange=
True)
247 for n
in range(self.vars[
"number_of_states"]):
248 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
249 self.vars[
"best_pdb_name_suffix"],
252 "number_of_best_scoring_models"],
253 replica_exchange=
True)
257 if not self.em_object_for_rmf
is None:
258 if not self.is_multi_state:
259 output_hierarchies = [
261 self.em_object_for_rmf.get_density_as_hierarchy(
264 output_hierarchies = self.root_hiers
265 output_hierarchies.append(
266 self.em_object_for_rmf.get_density_as_hierarchy())
268 if not self.is_multi_state:
269 output_hierarchies = [self.root_hier]
271 output_hierarchies = self.root_hiers
274 print "Setting up and writing initial rmf coordinate file"
275 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
276 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
278 if self.crosslink_restraints:
279 output.add_restraints_to_rmf(
280 init_suffix +
"." + str(myindex) +
".rmf3",
281 self.crosslink_restraints)
282 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
283 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
287 print "Setting up production rmf files"
289 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
290 output.init_rmf(rmfname, output_hierarchies)
292 if self.crosslink_restraints:
293 output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
295 ntimes_at_low_temp = 0
300 for i
in range(self.vars[
"number_of_frames"]):
301 for nr
in range(self.vars[
"num_sample_rounds"]):
302 if sampler_mc
is not None:
303 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
304 if sampler_md
is not None:
305 sampler_md.optimize(self.vars[
"molecular_dynamics_steps"])
306 score = self.model.evaluate(
False)
307 output.set_output_entry(
"score", score)
309 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
311 if min_temp_index == my_temp_index:
312 print "--- frame %s score %s " % (str(i), str(score))
314 if i % self.vars[
"nframes_write_coordinates"]==0:
315 print '--- writing coordinates'
316 output.write_pdb_best_scoring(score)
317 output.write_rmf(rmfname)
318 output.set_output_entry(
"rmf_file", rmfname)
319 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
321 output.set_output_entry(
"rmf_file", rmfname)
322 output.set_output_entry(
"rmf_frame_index",
'-1')
323 output.write_stat2(low_temp_stat_file)
324 ntimes_at_low_temp += 1
326 output.write_stat2(replica_stat_file)
327 rex.swap_temp(i, score)
338 missing_bead_size=20,
339 residue_per_gaussian=
None):
341 The macro construct a component for each subunit (no splitting, nothing fancy)
342 You can pass the resolutions and the bead size for the missing residue regions.
343 To use this macro, you must provide the following data structure:
345 Component pdbfile chainid rgb color fastafile sequence id
348 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
349 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
350 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
351 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
352 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
353 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
354 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
355 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
356 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
357 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
358 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
359 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
363 r = IMP.pmi.representation.Representation(m)
370 component_name = d[0]
376 fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
377 fasta_file_id = d[4][1]
379 r.create_component(component_name,
382 r.add_component_sequence(component_name,
384 id=fastids[fasta_file_id])
386 hierarchies = r.autobuild_model(component_name,
389 resolutions=resolutions,
390 missingbeadsize=missing_bead_size)
392 r.show_component_table(component_name)
394 r.set_rigid_bodies([component_name])
396 r.set_chain_of_super_rigid_bodies(
401 r.setup_component_sequence_connectivity(component_name, resolution=1)
402 r.setup_component_geometry(component_name)
406 r.set_floppy_bodies()
409 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
418 ''' this building scheme needs a data structure with the following fields
436 def __init__(self, representation):
437 self.simo=representation
438 self.gmm_models_directory=
"."
440 def set_gmm_models_directory(self,directory_name):
441 self.gmm_models_directory=directory_name
443 def build_model(self,data_structure,sequence_connectivity_scale=4.0):
447 super_rigid_bodies={}
448 chain_super_rigid_bodies={}
451 for d
in data_structure:
459 res_range = d[7][0:2]
468 em_num_components = d[12]
469 em_txt_file_name = d[13]
470 em_mrc_file_name = d[14]
471 chain_of_super_rb = d[15]
473 if comp_name
not in self.simo.get_component_names():
474 self.simo.create_component(comp_name,color=0.0)
475 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
476 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
478 if not read_em_files
is None:
479 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
480 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
483 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
484 self.simo.add_all_atom_densities(comp_name, hierarchies=beads)
490 self.resdensities[hier_name]=dens_hier
491 self.domain_dict[hier_name]=outhier+dens_hier
494 if rb
not in rigid_bodies:
495 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
497 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
500 if super_rb
is not None:
502 if k
not in super_rigid_bodies:
503 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
505 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
507 if chain_of_super_rb
is not None:
508 for k
in chain_of_super_rb:
509 if k
not in chain_super_rigid_bodies:
510 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
512 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
514 self.rigid_bodies=rigid_bodies
517 for c
in self.simo.get_component_names():
518 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
519 self.simo.setup_component_geometry(c)
522 for rb
in rigid_bodies:
523 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
525 for k
in super_rigid_bodies:
526 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
528 for k
in chain_super_rigid_bodies:
529 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
531 self.simo.set_floppy_bodies()
532 self.simo.setup_bonds()
534 def get_density_hierarchies(self,hier_name_list):
538 for hn
in hier_name_list:
540 dens_hier_list+=self.resdensities[hn]
541 return dens_hier_list
543 def get_pdb_bead_bits(self,hierarchy):
548 if "_pdb" in h.get_name():pdbbits.append(h)
549 if "_bead" in h.get_name():beadbits.append(h)
550 if "_helix" in h.get_name():helixbits.append(h)
551 return (pdbbits,beadbits,helixbits)
553 def scale_bead_radii(self,nresidues,scale):
555 for h
in self.domain_dict:
556 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
557 slope=(1.0-scale)/(1.0-float(nresidues))
562 if b
not in scaled_beads:
568 scale_factor=slope*float(num_residues)+1.0
570 new_radius=scale_factor*radius
573 print "particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius)
576 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
578 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
584 outhier+=simo.add_component_density(compname,
586 num_components=num_components,
588 inputfile=txtfilename)
589 if len(helixbits)!=0:
590 outhier+=simo.add_component_density(compname,
592 num_components=num_components,
594 inputfile=txtfilename)
599 outhier+=simo.add_component_density(compname,
601 num_components=num_components,
603 outputfile=txtfilename,
604 outputmap=mrcfilename,
605 multiply_by_total_mass=
True)
607 if len(helixbits)!=0:
608 outhier+=simo.add_component_density(compname,
610 num_components=num_components,
612 outputfile=txtfilename,
613 outputmap=mrcfilename,
614 multiply_by_total_mass=
True)
616 return outhier,beadbits
618 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
620 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
621 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
623 outhier=simo.autobuild_model(comname,
627 resolutions=[0,1,10],
630 missingbeadsize=beadsize)
632 outhier=simo.autobuild_model(comname,
639 missingbeadsize=beadsize)
642 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
644 outhier=simo.add_component_ideal_helix(comname,
650 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
651 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
655 seq_len=len(simo.sequence_dict[comname])
656 outhier=simo.add_component_necklace(comname,
667 """A macro for running all the basic operations of analysis.
668 Including clustering, precision analysis, and making ensemble density maps.
669 A number of plots are also supported.
671 def __init__(self, model,
672 stat_file_name_suffix=
"stat",
674 merge_directories=[
"./"],
675 best_pdb_name_suffix=
"model",
677 do_create_directories=
True,
678 global_output_directory=
"./",
679 replica_stat_file_suffix=
"stat_replica",
680 global_analysis_result_directory=
"./analysis/",
685 @param model The IMP model
686 @param stat_file_name_suffix
687 @param merge_directories The directories containing output files
688 @param best_pdb_name_suffix
689 @param do_clean_first
690 @param do_create_directories
691 @param global_output_directory Where everything is
692 @param replica_stat_file_suffix
693 @param global_analysis_result_directory
697 stat_dir = global_output_directory
700 for rd
in merge_directories:
701 stat_files = glob.glob(rd +
"/" + stat_dir +
"/stat.*.out")
702 self.stat_files += stat_files
705 score_key=
"SimplifiedModel_Total_Score_None",
706 rmf_file_key=
"rmf_file",
707 rmf_file_frame_key=
"rmf_frame_index",
711 alignment_components=
None,
712 number_of_best_scoring_models=10,
713 rmsd_calculation_components=
None,
714 distance_matrix_file=
None,
715 load_distance_matrix_file=
False,
717 skip_clustering=
False,
718 number_of_clusters=1,
720 exit_after_display=
True,
722 first_and_last_frames=
None,
723 density_custom_ranges=
None,
724 write_pdb_with_centered_coordinates=
False,
726 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps
727 @param score_key The score for ranking models
728 @param rmf_file_key Key pointing to RMF filename
729 @param rmf_file_frame_key Key pointing to RMF frame number
730 @param prefiltervalue Only include frames where the score key is below this value
731 @param feature_keys Keywords for which you want to calculate average,
733 @param outputdir The local output directory used in the run
734 @param alignment_components List of tuples for aligning the structures
735 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
736 @param number_of_best_scoring_models Num models to keep per run
737 @param rmsd_calculation_components List of tuples for calculating RMSD
738 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
739 @param distance_matrix_file Where to store/read the distance matrix
740 @param load_distance_matrix_file Try to load the distance matrix file
741 @param is_mpi Enable MPI
742 @param skip_clustering Just extract the best scoring models and save the pdbs
743 @param number_of_clusters Number of k-means clusters
744 @param display_plot Display the distance matrix
745 @param exit_after_display Exit after displaying distance matrix
746 @param get_every Extract every nth frame
747 @param first_and_last_frames A tuple with the first and last frames to be
748 analyzed. Values are percentages!
749 Default: get all frames
750 @param density_custom_ranges List of tuples or strings for density calculation
751 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
752 @param write_pdb_with_centered_coordinates
753 @param voxel_size Used for the density output
756 from mpi4py
import MPI
757 comm = MPI.COMM_WORLD
758 rank = comm.Get_rank()
759 number_of_processes = comm.size
762 number_of_processes = 1
766 if not load_distance_matrix_file:
767 if len(self.stat_files)==0:
print "ERROR: no stat file found in the given path";
return
768 my_stat_files=IMP.pmi.tools.chunk_list_into_segments(self.stat_files,number_of_processes)[rank]
769 best_models = IMP.pmi.io.input.get_best_models(my_stat_files,
776 rmf_file_list=best_models[0]
777 rmf_file_frame_list=best_models[1]
778 score_list=best_models[2]
779 feature_keyword_list_dict=best_models[3]
785 if number_of_processes > 1:
790 for k
in feature_keyword_list_dict:
792 feature_keyword_list_dict[k])
795 score_rmf_tuples = zip(score_list,
798 range(len(score_list)))
801 if first_and_last_frames
is not None:
802 nframes = len(score_rmf_tuples)
803 first_frame = int(first_and_last_frames[0] * nframes)
804 last_frame = int(first_and_last_frames[1] * nframes)
805 if last_frame > len(score_rmf_tuples):
807 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
810 best_score_rmf_tuples = sorted(score_rmf_tuples,
811 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
812 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
815 best_score_feature_keyword_list_dict = defaultdict(list)
816 for tpl
in best_score_rmf_tuples:
818 for f
in feature_keyword_list_dict:
819 best_score_feature_keyword_list_dict[f].append(
820 feature_keyword_list_dict[f][index])
822 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
823 best_score_rmf_tuples,
824 number_of_processes)[rank]
830 dircluster=os.path.join(outputdir,
"all_models."+str(n))
839 clusstat=open(os.path.join(dircluster,
"stat."+str(rank)+
".out"),
"w")
840 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
842 rmf_frame_number=tpl[2]
845 for key
in best_score_feature_keyword_list_dict:
846 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
848 prot=IMP.pmi.analysis.get_hier_from_rmf(self.model,rmf_frame_number,rmf_name)
853 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(rank)+
".pdb")
854 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(rank)+
".rmf")
855 o.init_pdb(out_pdb_fn,prot)
856 o.write_pdb(out_pdb_fn,
857 translate_to_geometric_center=write_pdb_with_centered_coordinates)
859 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
860 tmp_dict[
"rmf_file_full_path"]=rmf_name
861 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
862 tmp_dict[
"local_rmf_frame_number"]=0
864 clusstat.write(str(tmp_dict)+
"\n")
865 o.init_rmf(out_rmf_fn,[prot])
866 o.write_rmf(out_rmf_fn)
867 o.close_rmf(out_rmf_fn)
874 rmsd_weights = IMP.pmi.io.input.get_bead_sizes(self.model,
875 my_best_score_rmf_tuples[0],
876 rmsd_calculation_components)
877 got_coords = IMP.pmi.io.input.read_coordinates_of_rmfs(self.model,
878 my_best_score_rmf_tuples,
879 alignment_components,
880 rmsd_calculation_components)
881 all_coordinates=got_coords[0]
882 alignment_coordinates=got_coords[1]
883 rmsd_coordinates=got_coords[2]
884 rmf_file_name_index_dict=got_coords[3]
885 all_rmf_file_names=got_coords[4]
889 if number_of_processes > 1:
895 rmf_file_name_index_dict)
897 alignment_coordinates)
904 [best_score_feature_keyword_list_dict,
905 rmf_file_name_index_dict],
911 print "setup clustering class"
914 for n, model_coordinate_dict
in enumerate(all_coordinates):
915 template_coordinate_dict = {}
917 if alignment_components
is not None and len(Clusters.all_coords) == 0:
919 Clusters.set_template(alignment_coordinates[n])
920 Clusters.fill(all_rmf_file_names[n], rmsd_coordinates[n])
922 print "Global calculating the distance matrix"
925 Clusters.dist_matrix(is_mpi=is_mpi)
929 Clusters.do_cluster(number_of_clusters)
932 Clusters.plot_matrix()
933 if number_of_processes > 1:
935 if exit_after_display:
937 Clusters.save_distance_matrix_file(file_name=distance_matrix_file)
944 print "setup clustering class"
946 Clusters.load_distance_matrix_file(file_name=distance_matrix_file)
947 print "clustering with %s clusters" % str(number_of_clusters)
948 Clusters.do_cluster(number_of_clusters)
949 [best_score_feature_keyword_list_dict,
950 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
953 Clusters.plot_matrix()
954 if number_of_processes > 1:
956 if exit_after_display:
964 print Clusters.get_cluster_labels()
965 for n, cl
in enumerate(Clusters.get_cluster_labels()):
966 print "rank %s " % str(rank)
967 print "cluster %s " % str(n)
968 print "cluster label %s " % str(cl)
969 print Clusters.get_cluster_label_names(cl)
973 if density_custom_ranges:
975 density_custom_ranges,
978 dircluster = outputdir +
"/cluster." + str(n) +
"/"
988 rmsd_dict = {
"AVERAGE_RMSD":
989 str(Clusters.get_cluster_label_average_rmsd(cl))}
990 clusstat = open(dircluster +
"stat.out",
"w")
991 for k, structure_name
in enumerate(Clusters.get_cluster_label_names(cl)):
995 tmp_dict.update(rmsd_dict)
996 index = rmf_file_name_index_dict[structure_name]
997 for key
in best_score_feature_keyword_list_dict:
999 key] = best_score_feature_keyword_list_dict[
1005 rmf_name = structure_name.split(
"|")[0]
1006 rmf_frame_number = int(structure_name.split(
"|")[1])
1008 clusstat.write(str(tmp_dict) +
"\n")
1009 prot,rs = IMP.pmi.analysis.get_hier_and_restraints_from_rmf(
1017 model_index = Clusters.get_model_index_from_name(
1019 transformation = Clusters.get_transformation_to_first_member(
1040 if density_custom_ranges:
1041 DensModule.add_subunits_density(prot)
1044 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1045 o.write_pdb(dircluster + str(k) +
".pdb")
1047 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1049 o.write_rmf(dircluster + str(k) +
".rmf3")
1050 o.close_rmf(dircluster + str(k) +
".rmf3")
1055 if density_custom_ranges:
1056 DensModule.write_mrc(path=dircluster)
1062 def save_objects(self, objects, file_name):
1064 outf = open(file_name,
'w')
1065 pickle.dump(objects, outf)
1068 def load_objects(self, file_name):
1070 inputf = open(file_name,
'r')
1071 objects = pickle.load(inputf)
A macro for running all the basic operations of analysis.
A member of a rigid body, it has internal (local) coordinates.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
A class to cluster structures.
Representation of the system.
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static XYZR setup_particle(kernel::Model *m, ParticleIndex pi)
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
A decorator for a particle with x,y,z coordinates.
def BuildModel0
The macro construct a component for each subunit (no splitting, nothing fancy) You can pass the resol...
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
Class for easy writing of PDBs, RMFs, and stat files.
Classes for writing output files and processing them.
this building scheme needs a data structure with the following fields comp_name hier_name color fasta...
A macro to help setup and run replica exchange, supporting monte carlo and molecular dynamics...
Hierarchies get_leaves(const Selection &h)
A class to compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
A decorator for a particle with x,y,z coordinates and a radius.