1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
19 from operator
import itemgetter
20 from collections
import defaultdict
24 """A macro to help setup and run replica exchange.
25 Supports Monte Carlo and molecular dynamics.
26 Produces trajectory RMF files, best PDB structures,
27 and output stat files.
33 monte_carlo_sample_objects=
None,
34 molecular_dynamics_sample_objects=
None,
36 crosslink_restraints=
None,
37 monte_carlo_temperature=1.0,
38 simulated_annealing=
False,
39 simulated_annealing_minimum_temperature=1.0,
40 simulated_annealing_maximum_temperature=2.5,
41 simulated_annealing_minimum_temperature_nframes=100,
42 simulated_annealing_maximum_temperature_nframes=100,
43 replica_exchange_minimum_temperature=1.0,
44 replica_exchange_maximum_temperature=2.5,
46 number_of_best_scoring_models=500,
48 molecular_dynamics_steps=10,
49 molecular_dynamics_max_time_step=1.0,
50 number_of_frames=1000,
51 nframes_write_coordinates=1,
52 write_initial_rmf=
True,
53 initial_rmf_name_suffix=
"initial",
54 stat_file_name_suffix=
"stat",
55 best_pdb_name_suffix=
"model",
57 do_create_directories=
True,
58 global_output_directory=
"./",
61 replica_stat_file_suffix=
"stat_replica",
62 em_object_for_rmf=
None,
64 replica_exchange_object=
None,
67 @param model The IMP model
68 @param representation PMI.representation.Representation object
69 (or list of them, for multi-state modeling)
70 @param root_hier Instead of passing Representation, just pass
72 @param monte_carlo_sample_objects Objects for MC sampling; a list of
73 structural components (generally the representation) that will
74 be moved and restraints with parameters that need to
76 For PMI2: just pass flat list of movers
77 @param molecular_dynamics_sample_objects Objects for MD sampling
78 For PMI2: just pass flat list of particles
79 @param output_objects A list of structural objects and restraints
80 that will be included in output (statistics files). Any object
81 that provides a get_output() method can be used here.
82 @param crosslink_restraints List of cross-link restraints that will
83 be included in output RMF files (for visualization).
84 @param monte_carlo_temperature MC temp (may need to be optimized
85 based on post-sampling analysis)
86 @param simulated_annealing If True, perform simulated annealing
87 @param simulated_annealing_minimum_temperature Should generally be
88 the same as monte_carlo_temperature.
89 @param simulated_annealing_minimum_temperature_nframes Number of
90 frames to compute at minimum temperature.
91 @param simulated_annealing_maximum_temperature_nframes Number of
93 temps > simulated_annealing_maximum_temperature.
94 @param replica_exchange_minimum_temperature Low temp for REX; should
95 generally be the same as monte_carlo_temperature.
96 @param replica_exchange_maximum_temperature High temp for REX
97 @param num_sample_rounds Number of rounds of MC/MD per cycle
98 @param number_of_best_scoring_models Number of top-scoring PDB models
99 to keep around for analysis
100 @param monte_carlo_steps Number of MC steps per round
101 @param molecular_dynamics_steps Number of MD steps per round
102 @param molecular_dynamics_max_time_step Max time step for MD
103 @param number_of_frames Number of REX frames to run
104 @param nframes_write_coordinates How often to write the coordinates
106 @param write_initial_rmf Write the initial configuration
107 @param global_output_directory Folder that will be created to house
109 @param test_mode Set to True to avoid writing any files, just test one frame.
116 if type(representation) == list:
117 self.is_multi_state =
True
118 self.root_hiers = [r.prot
for r
in representation]
119 self.vars[
"number_of_states"] = len(representation)
121 self.is_multi_state =
False
122 self.root_hier = representation.prot
123 self.vars[
"number_of_states"] = 1
125 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
126 self.vars[
"number_of_states"] = len(states)
128 self.root_hiers = states
129 self.is_multi_state =
True
131 self.root_hier = root_hier
132 self.is_multi_state =
False
134 print(
"ERROR: Must provide representation or root_hier")
137 self.crosslink_restraints = crosslink_restraints
138 self.em_object_for_rmf = em_object_for_rmf
139 self.monte_carlo_sample_objects = monte_carlo_sample_objects
140 if sample_objects
is not None:
141 self.monte_carlo_sample_objects+=sample_objects
142 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
143 self.output_objects = output_objects
144 self.replica_exchange_object = replica_exchange_object
145 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
146 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
148 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
150 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
152 self.vars[
"simulated_annealing"]=\
154 self.vars[
"simulated_annealing_minimum_temperature"]=\
155 simulated_annealing_minimum_temperature
156 self.vars[
"simulated_annealing_maximum_temperature"]=\
157 simulated_annealing_maximum_temperature
158 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
159 simulated_annealing_minimum_temperature_nframes
160 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
161 simulated_annealing_maximum_temperature_nframes
163 self.vars[
"num_sample_rounds"] = num_sample_rounds
165 "number_of_best_scoring_models"] = number_of_best_scoring_models
166 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
167 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
168 self.vars[
"number_of_frames"] = number_of_frames
169 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
170 self.vars[
"write_initial_rmf"] = write_initial_rmf
171 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
172 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
173 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
174 self.vars[
"do_clean_first"] = do_clean_first
175 self.vars[
"do_create_directories"] = do_create_directories
176 self.vars[
"global_output_directory"] = global_output_directory
177 self.vars[
"rmf_dir"] = rmf_dir
178 self.vars[
"best_pdb_dir"] = best_pdb_dir
179 self.vars[
"atomistic"] = atomistic
180 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
181 self.vars[
"geometries"] =
None
182 self.test_mode = test_mode
186 if self.vars[
"geometries"]
is None:
187 self.vars[
"geometries"] = list(geometries)
189 self.vars[
"geometries"].extend(geometries)
192 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
193 print(
"--- it stores the best scoring pdb models in pdbs/")
194 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
195 print(
"--- variables:")
196 keys = list(self.vars.keys())
199 print(
"------", v.ljust(30), self.vars[v])
201 def get_replica_exchange_object(self):
202 return self.replica_exchange_object
204 def execute_macro(self):
205 temp_index_factor = 100000.0
209 if self.monte_carlo_sample_objects
is not None:
210 print(
"Setting up MonteCarlo")
212 self.monte_carlo_sample_objects,
213 self.vars[
"monte_carlo_temperature"])
214 if self.vars[
"simulated_annealing"]:
215 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
216 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
217 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
218 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
219 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
220 self.output_objects.append(sampler_mc)
221 samplers.append(sampler_mc)
224 if self.molecular_dynamics_sample_objects
is not None:
225 print(
"Setting up MolecularDynamics")
227 self.molecular_dynamics_sample_objects,
228 self.vars[
"monte_carlo_temperature"],
229 maximum_time_step=self.molecular_dynamics_max_time_step)
230 if self.vars[
"simulated_annealing"]:
231 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
232 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
233 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
234 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
235 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
236 self.output_objects.append(sampler_md)
237 samplers.append(sampler_md)
240 print(
"Setting up ReplicaExchange")
243 "replica_exchange_minimum_temperature"],
245 "replica_exchange_maximum_temperature"],
247 replica_exchange_object=self.replica_exchange_object)
248 self.replica_exchange_object = rex.rem
250 myindex = rex.get_my_index()
251 self.output_objects.append(rex)
256 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
260 globaldir = self.vars[
"global_output_directory"] +
"/"
261 rmf_dir = globaldir + self.vars[
"rmf_dir"]
262 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
264 if not self.test_mode:
265 if self.vars[
"do_clean_first"]:
268 if self.vars[
"do_create_directories"]:
271 os.makedirs(globaldir)
279 if not self.is_multi_state:
285 for n
in range(self.vars[
"number_of_states"]):
287 os.makedirs(pdb_dir +
"/" + str(n))
293 sw = IMP.pmi.tools.Stopwatch()
294 self.output_objects.append(sw)
296 print(
"Setting up stat file")
298 low_temp_stat_file = globaldir + \
299 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
300 if not self.test_mode:
301 output.init_stat2(low_temp_stat_file,
303 extralabels=[
"rmf_file",
"rmf_frame_index"])
305 print(
"Setting up replica stat file")
306 replica_stat_file = globaldir + \
307 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
308 if not self.test_mode:
309 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
311 if not self.test_mode:
312 print(
"Setting up best pdb files")
313 if not self.is_multi_state:
314 if self.vars[
"number_of_best_scoring_models"] > 0:
315 output.init_pdb_best_scoring(pdb_dir +
"/" +
316 self.vars[
"best_pdb_name_suffix"],
319 "number_of_best_scoring_models"],
320 replica_exchange=
True)
321 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
322 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
324 if self.vars[
"number_of_best_scoring_models"] > 0:
325 for n
in range(self.vars[
"number_of_states"]):
326 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
327 self.vars[
"best_pdb_name_suffix"],
330 "number_of_best_scoring_models"],
331 replica_exchange=
True)
332 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
333 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
336 if not self.em_object_for_rmf
is None:
337 if not self.is_multi_state:
338 output_hierarchies = [
340 self.em_object_for_rmf.get_density_as_hierarchy(
343 output_hierarchies = self.root_hiers
344 output_hierarchies.append(
345 self.em_object_for_rmf.get_density_as_hierarchy())
347 if not self.is_multi_state:
348 output_hierarchies = [self.root_hier]
350 output_hierarchies = self.root_hiers
353 if not self.test_mode:
354 print(
"Setting up and writing initial rmf coordinate file")
355 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
356 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
358 if self.crosslink_restraints:
359 output.add_restraints_to_rmf(
360 init_suffix +
"." + str(myindex) +
".rmf3",
361 self.crosslink_restraints)
362 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
363 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
367 if not self.test_mode:
368 print(
"Setting up production rmf files")
369 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
370 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"])
372 if self.crosslink_restraints:
373 output.add_restraints_to_rmf(rmfname, self.crosslink_restraints)
375 ntimes_at_low_temp = 0
379 self.replica_exchange_object.set_was_used(
True)
380 for i
in range(self.vars[
"number_of_frames"]):
381 for nr
in range(self.vars[
"num_sample_rounds"]):
382 if sampler_md
is not None:
383 sampler_md.optimize(self.vars[
"molecular_dynamics_steps"])
384 if sampler_mc
is not None:
385 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
387 output.set_output_entry(
"score", score)
389 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
391 if min_temp_index == my_temp_index:
392 print(
"--- frame %s score %s " % (str(i), str(score)))
394 if not self.test_mode:
395 if i % self.vars[
"nframes_write_coordinates"]==0:
396 print(
'--- writing coordinates')
397 if self.vars[
"number_of_best_scoring_models"] > 0:
398 output.write_pdb_best_scoring(score)
399 output.write_rmf(rmfname)
400 output.set_output_entry(
"rmf_file", rmfname)
401 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
403 output.set_output_entry(
"rmf_file", rmfname)
404 output.set_output_entry(
"rmf_frame_index",
'-1')
405 output.write_stat2(low_temp_stat_file)
406 ntimes_at_low_temp += 1
408 if not self.test_mode:
409 output.write_stat2(replica_stat_file)
410 rex.swap_temp(i, score)
414 """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
415 Easily create multi-state systems by calling this macro
416 repeatedly with different TopologyReader objects!
417 A useful function is get_molecules() which returns the PMI Molecules grouped by state
418 as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
420 Quick multi-state system:
423 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
424 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
426 bs = IMP.pmi.macros.BuildSystem(mdl)
427 bs.add_state(reader1)
428 bs.add_state(reader2)
429 bs.execute_macro() # build everything including degrees of freedom
431 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
432 ### now you have a two state system, you add restraints etc
435 \note The "domain name" entry of the topology reader is not used.
436 All molecules are set up by the component name, but split into rigid bodies
441 sequence_connectivity_scale=4.0,
442 force_create_gmm_files=
False):
444 @param mdl An IMP Model
445 @param sequence_connectivity_scale For scaling the connectivity restraint
446 @param force_create_gmm_files If True, will sample and create GMMs
447 no matter what. If False, will only only sample if the
448 files don't exist. If number of Gaussians is zero, won't
455 self.force_create_gmm_files = force_create_gmm_files
457 """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
458 When you are done adding states, call execute_macro()
459 @param reader The TopologyReader object
461 state = self.system.create_state()
462 self._readers.append(reader)
467 for molname
in reader.get_unique_molecules():
468 mlist = reader.get_unique_molecules()[molname]
470 mol = state.create_molecule(molname,seq[mlist[0].fasta_id],mlist[0].chain)
473 if domain.residue_range==[]
or domain.residue_range
is None:
474 domain_res = mol.get_residues()
476 start = domain.residue_range[0]+domain.pdb_offset
477 if domain.residue_range[1]==-1:
480 end = domain.residue_range[1]+domain.pdb_offset
481 domain_res = mol.residue_range(start,end)
482 if domain.pdb_file==
"BEADS":
483 mol.add_representation(domain_res,
484 resolutions=[domain.bead_size],
485 setup_particles_as_densities=(domain.em_residues_per_gaussian!=0))
486 these_domains[domain.domain_name] = (set(),domain_res)
487 elif domain.pdb_file==
"IDEAL_HELIX":
488 mol.add_representation(domain_res,
489 resolutions=reader.resolutions,
491 density_residues_per_component=domain.em_residues_per_gaussian,
492 density_prefix=domain.density_prefix,
493 density_force_compute=self.force_create_gmm_files)
494 these_domains[domain.domain_name] = (domain_res,set())
496 domain_atomic = mol.add_structure(domain.pdb_file,
498 domain.residue_range,
501 domain_non_atomic = domain_res - domain_atomic
502 if domain.em_residues_per_gaussian==0:
503 mol.add_representation(domain_atomic,
504 resolutions=reader.resolutions)
505 if len(domain_non_atomic)>0:
506 mol.add_representation(domain_non_atomic,
507 resolutions=[domain.bead_size])
509 mol.add_representation(domain_atomic,
510 resolutions=reader.resolutions,
511 density_residues_per_component=domain.em_residues_per_gaussian,
512 density_prefix=domain.density_prefix,
513 density_force_compute=self.force_create_gmm_files)
514 if len(domain_non_atomic)>0:
515 mol.add_representation(domain_non_atomic,
516 resolutions=[domain.bead_size],
517 setup_particles_as_densities=
True)
518 these_domains[domain.domain_name] = (domain_atomic,domain_non_atomic)
519 self._domains.append(these_domains)
520 print(
'State',len(self.system.states),
'added')
523 """Return list of all molecules grouped by state.
524 For each state, it's a dictionary of Molecules where key is the molecule name
526 return [s.get_molecules()
for s
in self.system.get_states()]
529 """Builds representations and sets up degrees of freedom"""
530 print(
"BuildSystem: building representations")
531 self.root_hier = self.system.build()
533 print(
"BuildSystem: setting up degrees of freedom")
535 for nstate,reader
in enumerate(self._readers):
536 rbs = reader.get_rigid_bodies()
537 srbs = reader.get_super_rigid_bodies()
538 csrbs = reader.get_chains_of_super_rigid_bodies()
541 domains_in_rbs = set()
543 all_res = IMP.pmi.tools.OrderedSet()
544 bead_res = IMP.pmi.tools.OrderedSet()
545 for domain
in rblist:
546 all_res|=self._domains[nstate][domain][0]
547 bead_res|=self._domains[nstate][domain][1]
548 domains_in_rbs.add(domain)
550 self.dof.create_rigid_body(all_res,
551 nonrigid_parts=bead_res)
554 for molname
in reader.get_unique_molecules():
555 mlist = reader.get_unique_molecules()[molname]
557 if domain.pdb_file==
"BEADS" and domain
not in domains_in_rbs:
558 self.dof.create_flexible_beads(self._domains[nstate][domain.domain_name][1])
562 all_res = IMP.pmi.tools.OrderedSet()
563 for domain
in srblist:
564 all_res|=self._domains[nstate][domain][0]
565 self.dof.create_super_rigid_body(all_res)
568 for csrblist
in csrbs:
569 all_res = IMP.pmi.tools.OrderedSet()
570 for domain
in csrblist:
571 all_res|=self._domains[nstate][domain][0]
572 all_res = list(all_res)
573 all_res.sort(key=
lambda r:r.get_index())
574 self.dof.create_super_rigid_body(all_res,chain_min_length=2,chain_max_length=3)
575 return self.root_hier,self.dof
579 """A macro to build a Representation based on a Topology and lists of movers
580 DEPRECATED - Use BuildSystem instead.
584 component_topologies,
585 list_of_rigid_bodies=[],
586 list_of_super_rigid_bodies=[],
587 chain_of_super_rigid_bodies=[],
588 sequence_connectivity_scale=4.0,
589 add_each_domain_as_rigid_body=
False,
590 force_create_gmm_files=
False):
592 @param model The IMP model
593 @param component_topologies List of
594 IMP.pmi.topology.ComponentTopology items
595 @param list_of_rigid_bodies List of lists of domain names that will
596 be moved as rigid bodies.
597 @param list_of_super_rigid_bodies List of lists of domain names
598 that will move together in an additional Monte Carlo move.
599 @param chain_of_super_rigid_bodies List of lists of domain names
600 (choices can only be from the same molecule). Each of these
601 groups will be moved rigidly. This helps to sample more
602 efficiently complex topologies, made of several rigid bodies,
603 connected by flexible linkers.
604 @param sequence_connectivity_scale For scaling the connectivity
606 @param add_each_domain_as_rigid_body That way you don't have to
607 put all of them in the list
608 @param force_create_gmm_files If True, will sample and create GMMs
609 no matter what. If False, will only only sample if the
610 files don't exist. If number of Gaussians is zero, won't
616 disorderedlength=
False)
618 data=component_topologies
619 if list_of_rigid_bodies==[]:
620 print(
"WARNING: No list of rigid bodies inputted to build_model()")
621 if list_of_super_rigid_bodies==[]:
622 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
623 if chain_of_super_rigid_bodies==[]:
624 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
625 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
626 +chain_of_super_rigid_bodies
for d
in sublist])
627 all_available = set([c.domain_name
for c
in component_topologies])
628 if not all_dnames <= all_available:
629 raise ValueError(
"All requested movers must reference domain "
630 "names in the component topologies")
634 super_rigid_bodies={}
635 chain_super_rigid_bodies={}
640 hier_name = c.domain_name
642 fasta_file = c.fasta_file
643 fasta_id = c.fasta_id
644 pdb_name = c.pdb_file
646 res_range = c.residue_range
647 offset = c.pdb_offset
648 bead_size = c.bead_size
649 em_num_components = c.em_residues_per_gaussian
650 em_txt_file_name = c.gmm_file
651 em_mrc_file_name = c.mrc_file
653 if comp_name
not in self.simo.get_component_names():
654 self.simo.create_component(comp_name,color=0.0)
655 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
658 if em_num_components==0:
662 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
669 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
670 res_range,include_res0,beadsize=bead_size,
671 color=color,offset=offset)
672 if em_num_components!=0:
674 print(
"will read GMM files")
676 print(
"will calculate GMMs")
678 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
679 em_mrc_file_name,em_num_components,read_em_files)
680 self.simo.add_all_atom_densities(comp_name, particles=beads)
685 self.resdensities[hier_name]=dens_hier
686 self.domain_dict[hier_name]=outhier+dens_hier
689 for c
in self.simo.get_component_names():
690 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
691 self.simo.setup_component_geometry(c)
694 for rblist
in list_of_rigid_bodies:
696 for rbmember
in rblist:
697 rb+=[h
for h
in self.domain_dict[rbmember]]
698 self.simo.set_rigid_body_from_hierarchies(rb)
699 for srblist
in list_of_super_rigid_bodies:
701 for srbmember
in rblist:
702 srb+=[h
for h
in self.domain_dict[srbmember]]
703 self.simo.set_super_rigid_body_from_hierarchies(srb)
704 for clist
in chain_of_super_rigid_bodies:
706 for crbmember
in rblist:
707 crb+=[h
for h
in self.domain_dict[crbmember]]
708 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
710 self.simo.set_floppy_bodies()
711 self.simo.setup_bonds()
714 '''Return the Representation object'''
717 def get_density_hierarchies(self,hier_name_list):
721 for hn
in hier_name_list:
723 dens_hier_list+=self.resdensities[hn]
724 return dens_hier_list
726 def set_gmm_models_directory(self,directory_name):
727 self.gmm_models_directory=directory_name
729 def get_pdb_bead_bits(self,hierarchy):
734 if "_pdb" in h.get_name():pdbbits.append(h)
735 if "_bead" in h.get_name():beadbits.append(h)
736 if "_helix" in h.get_name():helixbits.append(h)
737 return (pdbbits,beadbits,helixbits)
739 def scale_bead_radii(self,nresidues,scale):
741 for h
in self.domain_dict:
742 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
743 slope=(1.0-scale)/(1.0-float(nresidues))
748 if b
not in scaled_beads:
754 scale_factor=slope*float(num_residues)+1.0
756 new_radius=scale_factor*radius
759 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
762 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
764 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
767 for pb
in pdbbits+helixbits:
771 number_of_residues=len(set(res_ind))
775 outhier+=simo.add_component_density(compname,
777 num_components=num_components,
779 inputfile=txtfilename)
780 if len(helixbits)!=0:
781 outhier+=simo.add_component_density(compname,
783 num_components=num_components,
785 inputfile=txtfilename)
790 num_components=number_of_residues//abs(num_components)+1
791 outhier+=simo.add_component_density(compname,
793 num_components=num_components,
795 outputfile=txtfilename,
796 outputmap=mrcfilename,
797 multiply_by_total_mass=
True)
799 if len(helixbits)!=0:
800 num_components=number_of_residues//abs(num_components)+1
801 outhier+=simo.add_component_density(compname,
803 num_components=num_components,
805 outputfile=txtfilename,
806 outputmap=mrcfilename,
807 multiply_by_total_mass=
True)
809 return outhier,beadbits
811 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
812 beadsize=5,color=0.0,offset=0):
813 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
815 outhier=simo.autobuild_model(comname,
819 resolutions=[0,1,10],
822 missingbeadsize=beadsize)
824 outhier=simo.autobuild_model(comname,
831 missingbeadsize=beadsize)
834 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
835 outhier=simo.add_component_ideal_helix(comname,
841 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
842 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
846 seq_len=len(simo.sequence_dict[comname])
847 outhier=simo.add_component_necklace(comname,
857 """Deprecated building macro - use BuildSystem()"""
861 @param representation The PMI representation
863 self.simo=representation
864 self.gmm_models_directory=
"."
866 self.rmf_frame_number={}
867 self.rmf_names_map={}
869 def set_gmm_models_directory(self,directory_name):
870 self.gmm_models_directory=directory_name
872 def build_model(self,data_structure,sequence_connectivity_scale=4.0,
873 sequence_connectivity_resolution=10,rmf_file=
None,rmf_frame_number=0,rmf_file_map=
None):
875 @param data_structure List of lists containing these entries:
876 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
877 res_range, read_em_files, bead_size, rb, super_rb,
878 em_num_components, em_txt_file_name, em_mrc_file_name
879 @param sequence_connectivity_scale
881 @param rmf_frame_number
882 @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
888 super_rigid_bodies={}
889 chain_super_rigid_bodies={}
892 for d
in data_structure:
900 res_range = d[7][0:2]
909 em_num_components = d[12]
910 em_txt_file_name = d[13]
911 em_mrc_file_name = d[14]
912 chain_of_super_rb = d[15]
914 if comp_name
not in self.simo.get_component_names():
915 self.simo.create_component(comp_name,color=0.0)
916 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
917 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
920 if not read_em_files
is None:
921 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
922 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
925 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
927 self.simo.add_all_atom_densities(comp_name, particles=beads)
933 self.resdensities[hier_name]=dens_hier
934 self.domain_dict[hier_name]=outhier+dens_hier
937 if rb
not in rigid_bodies:
938 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
940 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
943 if super_rb
is not None:
945 if k
not in super_rigid_bodies:
946 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
948 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
950 if chain_of_super_rb
is not None:
951 for k
in chain_of_super_rb:
952 if k
not in chain_super_rigid_bodies:
953 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
955 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
959 self.rigid_bodies=rigid_bodies
961 for c
in self.simo.get_component_names():
962 if rmf_file
is not None:
965 self.simo.set_coordinates_from_rmf(c, rf,rfn)
967 for k
in rmf_file_map:
969 rf=rmf_file_map[k][0]
970 rfn=rmf_file_map[k][1]
971 rcname=rmf_file_map[k][2]
972 self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname)
974 if c
in self.rmf_file:
976 rfn=self.rmf_frame_number[c]
977 rfm=self.rmf_names_map[c]
978 self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm)
979 self.simo.setup_component_sequence_connectivity(c,resolution=sequence_connectivity_resolution,scale=sequence_connectivity_scale)
980 self.simo.setup_component_geometry(c)
982 for rb
in rigid_bodies:
983 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
985 for k
in super_rigid_bodies:
986 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
988 for k
in chain_super_rigid_bodies:
989 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
991 self.simo.set_floppy_bodies()
992 self.simo.setup_bonds()
996 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None):
997 self.rmf_file[component_name]=rmf_file
998 self.rmf_frame_number[component_name]=rmf_frame_number
999 self.rmf_names_map[component_name]=rmf_names_map
1001 def get_density_hierarchies(self,hier_name_list):
1005 for hn
in hier_name_list:
1007 dens_hier_list+=self.resdensities[hn]
1008 return dens_hier_list
1010 def get_pdb_bead_bits(self,hierarchy):
1015 if "_pdb" in h.get_name():pdbbits.append(h)
1016 if "_bead" in h.get_name():beadbits.append(h)
1017 if "_helix" in h.get_name():helixbits.append(h)
1018 return (pdbbits,beadbits,helixbits)
1020 def scale_bead_radii(self,nresidues,scale):
1022 for h
in self.domain_dict:
1023 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1024 slope=(1.0-scale)/(1.0-float(nresidues))
1029 if b
not in scaled_beads:
1035 scale_factor=slope*float(num_residues)+1.0
1037 new_radius=scale_factor*radius
1040 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1043 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1045 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1049 for pb
in pdbbits+helixbits:
1053 number_of_residues=len(set(res_ind))
1057 outhier+=simo.add_component_density(compname,
1059 num_components=num_components,
1061 inputfile=txtfilename)
1062 if len(helixbits)!=0:
1063 outhier+=simo.add_component_density(compname,
1065 num_components=num_components,
1067 inputfile=txtfilename)
1072 if num_components<0:
1075 num_components=number_of_residues/abs(num_components)
1076 outhier+=simo.add_component_density(compname,
1078 num_components=num_components,
1080 outputfile=txtfilename,
1081 outputmap=mrcfilename,
1082 multiply_by_total_mass=
True)
1084 if len(helixbits)!=0:
1085 if num_components<0:
1088 num_components=number_of_residues/abs(num_components)
1089 outhier+=simo.add_component_density(compname,
1091 num_components=num_components,
1093 outputfile=txtfilename,
1094 outputmap=mrcfilename,
1095 multiply_by_total_mass=
True)
1097 return outhier,beadbits
1099 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1101 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1102 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1104 outhier=simo.autobuild_model(comname,
1108 resolutions=[0,1,10],
1111 missingbeadsize=beadsize)
1113 outhier=simo.autobuild_model(comname,
1120 missingbeadsize=beadsize)
1122 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1123 outhier=simo.add_component_ideal_helix(comname,
1129 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" and pdbname
is not "DENSITY" :
1132 seq_len=len(simo.sequence_dict[comname])
1133 outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1135 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is "DENSITY" :
1140 seq_len=len(simo.sequence_dict[comname])
1141 outhier=simo.add_component_necklace(comname,
1148 def set_coordinates(self,hier_name,xyz_tuple):
1149 hier=self.domain_dict[hier_name]
1157 def save_rmf(self,rmfname):
1160 o.init_rmf(rmfname,[self.simo.prot])
1161 o.write_rmf(rmfname)
1162 o.close_rmf(rmfname)
1171 missing_bead_size=20,
1172 residue_per_gaussian=
None):
1174 Construct a component for each subunit (no splitting, nothing fancy).
1176 You can pass the resolutions and the bead size for the missing residue regions.
1177 To use this macro, you must provide the following data structure:
1179 Component pdbfile chainid rgb color fastafile sequence id
1182 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1183 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1184 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1185 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1186 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1187 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1188 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1189 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1190 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1191 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1192 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1193 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1204 component_name = d[0]
1208 fasta_file = d[4][0]
1210 fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
1211 fasta_file_id = d[4][1]
1213 r.create_component(component_name,
1216 r.add_component_sequence(component_name,
1218 id=fastids[fasta_file_id])
1220 hierarchies = r.autobuild_model(component_name,
1223 resolutions=resolutions,
1224 missingbeadsize=missing_bead_size)
1226 r.show_component_table(component_name)
1228 r.set_rigid_bodies([component_name])
1230 r.set_chain_of_super_rigid_bodies(
1235 r.setup_component_sequence_connectivity(component_name, resolution=1)
1236 r.setup_component_geometry(component_name)
1240 r.set_floppy_bodies()
1243 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
1250 """A macro for running all the basic operations of analysis.
1251 Includes clustering, precision analysis, and making ensemble density maps.
1252 A number of plots are also supported.
1255 merge_directories=[
"./"],
1256 stat_file_name_suffix=
"stat",
1257 best_pdb_name_suffix=
"model",
1258 do_clean_first=
True,
1259 do_create_directories=
True,
1260 global_output_directory=
"output/",
1261 replica_stat_file_suffix=
"stat_replica",
1262 global_analysis_result_directory=
"./analysis/"):
1264 @param model The IMP model
1265 @param stat_file_name_suffix
1266 @param merge_directories The directories containing output files
1267 @param best_pdb_name_suffix
1268 @param do_clean_first
1269 @param do_create_directories
1270 @param global_output_directory Where everything is
1271 @param replica_stat_file_suffix
1272 @param global_analysis_result_directory
1276 from mpi4py
import MPI
1277 self.comm = MPI.COMM_WORLD
1278 self.rank = self.comm.Get_rank()
1279 self.number_of_processes = self.comm.size
1282 self.number_of_processes = 1
1285 stat_dir = global_output_directory
1286 self.stat_files = []
1288 for rd
in merge_directories:
1289 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
1290 if len(stat_files)==0:
1291 print(
"WARNING: no stat files found in",os.path.join(rd,stat_dir))
1292 self.stat_files += stat_files
1296 score_key=
"SimplifiedModel_Total_Score_None",
1297 rmf_file_key=
"rmf_file",
1298 rmf_file_frame_key=
"rmf_frame_index",
1301 nframes_trajectory=10000):
1302 """ Get a trajectory of the modeling run, for generating demonstrative movies
1303 @param score_key The score for ranking models
1304 @param rmf_file_key Key pointing to RMF filename
1305 @param rmf_file_frame_key Key pointing to RMF frame number
1306 @param outputdir The local output directory used in the run
1307 @param get_every Extract every nth frame
1308 @param nframes_trajectory Total number of frames of the trajectory
1310 from operator
import itemgetter
1318 rmf_file_list=trajectory_models[0]
1319 rmf_file_frame_list=trajectory_models[1]
1320 score_list=list(map(float, trajectory_models[2]))
1322 max_score=max(score_list)
1323 min_score=min(score_list)
1326 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1327 binned_scores=[
None]*nframes_trajectory
1328 binned_model_indexes=[-1]*nframes_trajectory
1330 for model_index,s
in enumerate(score_list):
1331 bins_score_diffs=[abs(s-b)
for b
in bins]
1332 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1333 if binned_scores[bin_index]==
None:
1334 binned_scores[bin_index]=s
1335 binned_model_indexes[bin_index]=model_index
1337 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1338 new_diff=abs(s-bins[bin_index])
1339 if new_diff < old_diff:
1340 binned_scores[bin_index]=s
1341 binned_model_indexes[bin_index]=model_index
1343 print(binned_scores)
1344 print(binned_model_indexes)
1349 score_key=
"SimplifiedModel_Total_Score_None",
1350 rmf_file_key=
"rmf_file",
1351 rmf_file_frame_key=
"rmf_frame_index",
1353 prefiltervalue=
None,
1356 alignment_components=
None,
1357 number_of_best_scoring_models=10,
1358 rmsd_calculation_components=
None,
1359 distance_matrix_file=
'distances.mat',
1360 load_distance_matrix_file=
False,
1361 skip_clustering=
False,
1362 number_of_clusters=1,
1364 exit_after_display=
True,
1366 first_and_last_frames=
None,
1367 density_custom_ranges=
None,
1368 write_pdb_with_centered_coordinates=
False,
1370 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps
1371 @param score_key The score for ranking models
1372 @param rmf_file_key Key pointing to RMF filename
1373 @param rmf_file_frame_key Key pointing to RMF frame number
1374 @param state_number State number to analyze
1375 @param prefiltervalue Only include frames where the score key is below this value
1376 @param feature_keys Keywords for which you want to calculate average,
1378 @param outputdir The local output directory used in the run
1379 @param alignment_components List of tuples for aligning the structures
1380 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1381 @param number_of_best_scoring_models Num models to keep per run
1382 @param rmsd_calculation_components List of tuples for calculating RMSD
1383 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1384 @param distance_matrix_file Where to store/read the distance matrix
1385 @param load_distance_matrix_file Try to load the distance matrix file
1386 @param skip_clustering Just extract the best scoring models and save the pdbs
1387 @param number_of_clusters Number of k-means clusters
1388 @param display_plot Display the distance matrix
1389 @param exit_after_display Exit after displaying distance matrix
1390 @param get_every Extract every nth frame
1391 @param first_and_last_frames A tuple with the first and last frames to be
1392 analyzed. Values are percentages!
1393 Default: get all frames
1394 @param density_custom_ranges List of tuples or strings for density calculation
1395 e.g. ["Rpb1", (20,100,"Rpb2"), .....]
1396 @param write_pdb_with_centered_coordinates
1397 @param voxel_size Used for the density output
1408 if not load_distance_matrix_file:
1409 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1410 my_stat_files=IMP.pmi.tools.chunk_list_into_segments(
1411 self.stat_files,self.number_of_processes)[self.rank]
1419 rmf_file_list=best_models[0]
1420 rmf_file_frame_list=best_models[1]
1421 score_list=best_models[2]
1422 feature_keyword_list_dict=best_models[3]
1428 if self.number_of_processes > 1:
1432 rmf_file_frame_list)
1433 for k
in feature_keyword_list_dict:
1435 feature_keyword_list_dict[k])
1438 score_rmf_tuples = list(zip(score_list,
1440 rmf_file_frame_list,
1441 list(range(len(score_list)))))
1443 if density_custom_ranges:
1444 for k
in density_custom_ranges:
1445 if type(density_custom_ranges[k])
is not list:
1446 raise Exception(
"Density custom ranges: values must be lists of tuples")
1449 if first_and_last_frames
is not None:
1450 nframes = len(score_rmf_tuples)
1451 first_frame = int(first_and_last_frames[0] * nframes)
1452 last_frame = int(first_and_last_frames[1] * nframes)
1453 if last_frame > len(score_rmf_tuples):
1455 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1458 best_score_rmf_tuples = sorted(score_rmf_tuples,
1459 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1460 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1463 best_score_feature_keyword_list_dict = defaultdict(list)
1464 for tpl
in best_score_rmf_tuples:
1466 for f
in feature_keyword_list_dict:
1467 best_score_feature_keyword_list_dict[f].append(
1468 feature_keyword_list_dict[f][index])
1470 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1471 best_score_rmf_tuples,
1472 self.number_of_processes)[self.rank]
1479 my_best_score_rmf_tuples[0],
1480 rmsd_calculation_components,
1481 state_number=state_number)
1483 my_best_score_rmf_tuples,
1484 alignment_components,
1485 rmsd_calculation_components,
1486 state_number=state_number)
1491 all_coordinates=got_coords[0]
1492 alignment_coordinates=got_coords[1]
1493 rmsd_coordinates=got_coords[2]
1494 rmf_file_name_index_dict=got_coords[3]
1495 all_rmf_file_names=got_coords[4]
1501 if density_custom_ranges:
1503 density_custom_ranges,
1506 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1512 os.mkdir(dircluster)
1515 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1516 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1518 rmf_frame_number=tpl[2]
1521 for key
in best_score_feature_keyword_list_dict:
1522 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1525 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1530 linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1536 if not linking_successful:
1543 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1544 prot = states[state_number]
1546 prot = prots[state_number]
1550 coords_f1=alignment_coordinates[cnt]
1552 coords_f2=alignment_coordinates[cnt]
1555 transformation = Ali.align()[1]
1576 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1577 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1578 o.init_pdb(out_pdb_fn,prot)
1579 o.write_pdb(out_pdb_fn,
1580 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1582 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1583 tmp_dict[
"rmf_file_full_path"]=rmf_name
1584 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1585 tmp_dict[
"local_rmf_frame_number"]=0
1587 clusstat.write(str(tmp_dict)+
"\n")
1588 o.init_rmf(out_rmf_fn,[prot],rs)
1589 o.write_rmf(out_rmf_fn)
1590 o.close_rmf(out_rmf_fn)
1592 if density_custom_ranges:
1593 DensModule.add_subunits_density(prot)
1595 if density_custom_ranges:
1596 DensModule.write_mrc(path=dircluster)
1603 if self.number_of_processes > 1:
1609 rmf_file_name_index_dict)
1611 alignment_coordinates)
1618 [best_score_feature_keyword_list_dict,
1619 rmf_file_name_index_dict],
1625 print(
"setup clustering class")
1628 for n, model_coordinate_dict
in enumerate(all_coordinates):
1629 template_coordinate_dict = {}
1631 if alignment_components
is not None and len(Clusters.all_coords) == 0:
1633 Clusters.set_template(alignment_coordinates[n])
1634 Clusters.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1635 print(
"Global calculating the distance matrix")
1638 Clusters.dist_matrix()
1642 Clusters.do_cluster(number_of_clusters)
1645 Clusters.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1646 if exit_after_display:
1648 Clusters.save_distance_matrix_file(file_name=distance_matrix_file)
1655 print(
"setup clustering class")
1657 Clusters.load_distance_matrix_file(file_name=distance_matrix_file)
1658 print(
"clustering with %s clusters" % str(number_of_clusters))
1659 Clusters.do_cluster(number_of_clusters)
1660 [best_score_feature_keyword_list_dict,
1661 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1664 Clusters.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1665 if exit_after_display:
1667 if self.number_of_processes > 1:
1675 print(Clusters.get_cluster_labels())
1676 for n, cl
in enumerate(Clusters.get_cluster_labels()):
1677 print(
"rank %s " % str(self.rank))
1678 print(
"cluster %s " % str(n))
1679 print(
"cluster label %s " % str(cl))
1680 print(Clusters.get_cluster_label_names(cl))
1683 if density_custom_ranges:
1685 density_custom_ranges,
1688 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1690 os.mkdir(dircluster)
1694 rmsd_dict = {
"AVERAGE_RMSD":
1695 str(Clusters.get_cluster_label_average_rmsd(cl))}
1696 clusstat = open(dircluster +
"stat.out",
"w")
1697 for k, structure_name
in enumerate(Clusters.get_cluster_label_names(cl)):
1700 tmp_dict.update(rmsd_dict)
1701 index = rmf_file_name_index_dict[structure_name]
1702 for key
in best_score_feature_keyword_list_dict:
1704 key] = best_score_feature_keyword_list_dict[
1710 rmf_name = structure_name.split(
"|")[0]
1711 rmf_frame_number = int(structure_name.split(
"|")[1])
1712 clusstat.write(str(tmp_dict) +
"\n")
1716 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1721 linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1727 if not linking_successful:
1733 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1734 prot = states[state_number]
1736 prot = prots[state_number]
1740 model_index = Clusters.get_model_index_from_name(
1742 transformation = Clusters.get_transformation_to_first_member(
1762 if density_custom_ranges:
1763 DensModule.add_subunits_density(prot)
1767 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1768 o.write_pdb(dircluster + str(k) +
".pdb")
1773 h.set_name(
"System")
1775 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1777 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1778 o.write_rmf(dircluster + str(k) +
".rmf3")
1779 o.close_rmf(dircluster + str(k) +
".rmf3")
1784 if density_custom_ranges:
1785 DensModule.write_mrc(path=dircluster)
1788 if self.number_of_processes>1:
1791 def save_objects(self, objects, file_name):
1793 outf = open(file_name,
'wb')
1794 pickle.dump(objects, outf)
1797 def load_objects(self, file_name):
1799 inputf = open(file_name,
'rb')
1800 objects = pickle.load(inputf)
A class to simplify create of constraints and movers for an IMP Hierarchy.
A macro for running all the basic operations of analysis.
Sample using molecular dynamics.
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def get_molecules
Return list of all molecules grouped by state.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
This class initializes the root node of the global IMP.atom.Hierarchy.
A class to cluster structures.
Representation of the system.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
Performs alignment and RMSD calculation for two sets of coordinates.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
A decorator for a particle with x,y,z coordinates.
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Class for easy writing of PDBs, RMFs, and stat files.
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and setup constraints for PMI objects.
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Deprecated building macro - use BuildSystem()
The general base class for IMP exceptions.
Class to handle individual model particles.
def execute_macro
Builds representations and sets up degrees of freedom.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
A dictionary-like wrapper for reading and storing sequence data.
Hierarchies get_leaves(const Selection &h)
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
A decorator for a particle with x,y,z coordinates and a radius.