1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
19 from operator
import itemgetter
20 from collections
import defaultdict
28 class _RMFRestraints(object):
29 """All restraints that are written out to the RMF file"""
30 def __init__(self, model, user_restraints):
32 self._user_restraints = user_restraints
if user_restraints
else []
35 return (len(self._user_restraints)
36 + self._rmf_rs.get_number_of_restraints())
40 __nonzero__ = __bool__
42 def __getitem__(self, i):
43 class FakePMIWrapper(object):
44 def __init__(self, r):
45 self.r = IMP.RestraintSet.get_from(r)
46 get_restraint =
lambda self: self.r
47 lenuser = len(self._user_restraints)
49 return self._user_restraints[i]
50 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
51 r = self._rmf_rs.get_restraint(i - lenuser)
52 return FakePMIWrapper(r)
54 raise IndexError(
"Out of range")
58 """A macro to help setup and run replica exchange.
59 Supports Monte Carlo and molecular dynamics.
60 Produces trajectory RMF files, best PDB structures,
61 and output stat files.
65 monte_carlo_sample_objects=
None,
66 molecular_dynamics_sample_objects=
None,
68 rmf_output_objects=
None,
69 crosslink_restraints=
None,
70 monte_carlo_temperature=1.0,
71 simulated_annealing=
False,
72 simulated_annealing_minimum_temperature=1.0,
73 simulated_annealing_maximum_temperature=2.5,
74 simulated_annealing_minimum_temperature_nframes=100,
75 simulated_annealing_maximum_temperature_nframes=100,
76 replica_exchange_minimum_temperature=1.0,
77 replica_exchange_maximum_temperature=2.5,
78 replica_exchange_swap=
True,
80 number_of_best_scoring_models=500,
83 molecular_dynamics_steps=10,
84 molecular_dynamics_max_time_step=1.0,
85 number_of_frames=1000,
86 save_coordinates_mode=
"lowest_temperature",
87 nframes_write_coordinates=1,
88 write_initial_rmf=
True,
89 initial_rmf_name_suffix=
"initial",
90 stat_file_name_suffix=
"stat",
91 best_pdb_name_suffix=
"model",
93 do_create_directories=
True,
94 global_output_directory=
"./",
97 replica_stat_file_suffix=
"stat_replica",
98 em_object_for_rmf=
None,
100 replica_exchange_object=
None,
103 @param model The IMP model
104 @param root_hier Top-level (System)hierarchy
105 @param monte_carlo_sample_objects Objects for MC sampling; a list of
106 structural components (generally the representation) that will
107 be moved and restraints with parameters that need to
109 For PMI2: just pass flat list of movers
110 @param molecular_dynamics_sample_objects Objects for MD sampling
111 For PMI2: just pass flat list of particles
112 @param output_objects A list of structural objects and restraints
113 that will be included in output (ie, statistics "stat" files). Any object
114 that provides a get_output() method can be used here. If None is passed
115 the macro will not write stat files.
116 @param rmf_output_objects A list of structural objects and restraints
117 that will be included in rmf. Any object
118 that provides a get_output() method can be used here.
119 @param monte_carlo_temperature MC temp (may need to be optimized
120 based on post-sampling analysis)
121 @param simulated_annealing If True, perform simulated annealing
122 @param simulated_annealing_minimum_temperature Should generally be
123 the same as monte_carlo_temperature.
124 @param simulated_annealing_minimum_temperature_nframes Number of
125 frames to compute at minimum temperature.
126 @param simulated_annealing_maximum_temperature_nframes Number of
128 temps > simulated_annealing_maximum_temperature.
129 @param replica_exchange_minimum_temperature Low temp for REX; should
130 generally be the same as monte_carlo_temperature.
131 @param replica_exchange_maximum_temperature High temp for REX
132 @param replica_exchange_swap Boolean, enable disable temperature
134 @param num_sample_rounds Number of rounds of MC/MD per cycle
135 @param number_of_best_scoring_models Number of top-scoring PDB models
136 to keep around for analysis
137 @param monte_carlo_steps Number of MC steps per round
138 @param self_adaptive self adaptive scheme for monte carlo movers
139 @param molecular_dynamics_steps Number of MD steps per round
140 @param molecular_dynamics_max_time_step Max time step for MD
141 @param number_of_frames Number of REX frames to run
142 @param save_coordinates_mode string: how to save coordinates.
143 "lowest_temperature" (default) only the lowest temperatures is saved
144 "25th_score" all replicas whose score is below the 25th percentile
145 "50th_score" all replicas whose score is below the 50th percentile
146 "75th_score" all replicas whose score is below the 75th percentile
147 @param nframes_write_coordinates How often to write the coordinates
149 @param write_initial_rmf Write the initial configuration
150 @param global_output_directory Folder that will be created to house
152 @param test_mode Set to True to avoid writing any files, just test one frame.
158 self.output_objects = output_objects
159 self.rmf_output_objects=rmf_output_objects
161 and root_hier.get_name() ==
'System'):
162 if self.output_objects
is not None:
164 if self.rmf_output_objects
is not None:
166 self.root_hier = root_hier
167 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
168 self.vars[
"number_of_states"] = len(states)
170 self.root_hiers = states
171 self.is_multi_state =
True
173 self.root_hier = root_hier
174 self.is_multi_state =
False
176 raise TypeError(
"Must provide System hierarchy (root_hier)")
178 if crosslink_restraints:
180 "crosslink_restraints is deprecated and is ignored; "
181 "all cross-link restraints should be automatically "
182 "added to output RMF files")
183 self._rmf_restraints = _RMFRestraints(model,
None)
184 self.em_object_for_rmf = em_object_for_rmf
185 self.monte_carlo_sample_objects = monte_carlo_sample_objects
186 self.vars[
"self_adaptive"]=self_adaptive
187 if sample_objects
is not None:
189 "sample_objects is deprecated; use monte_carlo_sample_objects "
190 "(or molecular_dynamics_sample_objects) instead")
191 self.monte_carlo_sample_objects+=sample_objects
192 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
193 self.replica_exchange_object = replica_exchange_object
194 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
195 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
197 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
199 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
200 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
201 self.vars[
"simulated_annealing"]=\
203 self.vars[
"simulated_annealing_minimum_temperature"]=\
204 simulated_annealing_minimum_temperature
205 self.vars[
"simulated_annealing_maximum_temperature"]=\
206 simulated_annealing_maximum_temperature
207 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
208 simulated_annealing_minimum_temperature_nframes
209 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
210 simulated_annealing_maximum_temperature_nframes
212 self.vars[
"num_sample_rounds"] = num_sample_rounds
214 "number_of_best_scoring_models"] = number_of_best_scoring_models
215 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
216 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
217 self.vars[
"number_of_frames"] = number_of_frames
218 if not save_coordinates_mode
in [
"lowest_temperature",
"25th_score",
"50th_score",
"75th_score"]:
219 raise Exception(
"save_coordinates_mode has unrecognized value")
221 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
222 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
223 self.vars[
"write_initial_rmf"] = write_initial_rmf
224 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
225 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
226 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
227 self.vars[
"do_clean_first"] = do_clean_first
228 self.vars[
"do_create_directories"] = do_create_directories
229 self.vars[
"global_output_directory"] = global_output_directory
230 self.vars[
"rmf_dir"] = rmf_dir
231 self.vars[
"best_pdb_dir"] = best_pdb_dir
232 self.vars[
"atomistic"] = atomistic
233 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
234 self.vars[
"geometries"] =
None
235 self.test_mode = test_mode
238 if self.vars[
"geometries"]
is None:
239 self.vars[
"geometries"] = list(geometries)
241 self.vars[
"geometries"].extend(geometries)
244 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
245 print(
"--- it stores the best scoring pdb models in pdbs/")
246 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
247 print(
"--- variables:")
248 keys = list(self.vars.keys())
251 print(
"------", v.ljust(30), self.vars[v])
253 def get_replica_exchange_object(self):
254 return self.replica_exchange_object
256 def _add_provenance(self, sampler_md, sampler_mc):
257 """Record details about the sampling in the IMP Hierarchies"""
260 method =
"Molecular Dynamics"
261 iterations += self.vars[
"molecular_dynamics_steps"]
263 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
264 iterations += self.vars[
"monte_carlo_steps"]
266 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
268 iterations *= self.vars[
"num_sample_rounds"]
270 pi = self.model.add_particle(
"sampling")
272 self.model, pi, method, self.vars[
"number_of_frames"],
274 p.set_number_of_replicas(
275 self.replica_exchange_object.get_number_of_replicas())
276 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
279 def execute_macro(self):
280 temp_index_factor = 100000.0
284 if self.monte_carlo_sample_objects
is not None:
285 print(
"Setting up MonteCarlo")
287 self.monte_carlo_sample_objects,
288 self.vars[
"monte_carlo_temperature"])
289 if self.vars[
"simulated_annealing"]:
290 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
291 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
292 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
293 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
294 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
295 if self.vars[
"self_adaptive"]:
296 sampler_mc.set_self_adaptive(isselfadaptive=self.vars[
"self_adaptive"])
297 if self.output_objects
is not None:
298 self.output_objects.append(sampler_mc)
299 if self.rmf_output_objects
is not None:
300 self.rmf_output_objects.append(sampler_mc)
301 samplers.append(sampler_mc)
304 if self.molecular_dynamics_sample_objects
is not None:
305 print(
"Setting up MolecularDynamics")
307 self.molecular_dynamics_sample_objects,
308 self.vars[
"monte_carlo_temperature"],
309 maximum_time_step=self.molecular_dynamics_max_time_step)
310 if self.vars[
"simulated_annealing"]:
311 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
312 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
313 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
314 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
315 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
316 if self.output_objects
is not None:
317 self.output_objects.append(sampler_md)
318 if self.rmf_output_objects
is not None:
319 self.rmf_output_objects.append(sampler_md)
320 samplers.append(sampler_md)
323 print(
"Setting up ReplicaExchange")
326 "replica_exchange_minimum_temperature"],
328 "replica_exchange_maximum_temperature"],
330 replica_exchange_object=self.replica_exchange_object)
331 self.replica_exchange_object = rex.rem
333 myindex = rex.get_my_index()
334 if self.output_objects
is not None:
335 self.output_objects.append(rex)
336 if self.rmf_output_objects
is not None:
337 self.rmf_output_objects.append(rex)
341 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
345 globaldir = self.vars[
"global_output_directory"] +
"/"
346 rmf_dir = globaldir + self.vars[
"rmf_dir"]
347 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
349 if not self.test_mode:
350 if self.vars[
"do_clean_first"]:
353 if self.vars[
"do_create_directories"]:
356 os.makedirs(globaldir)
364 if not self.is_multi_state:
370 for n
in range(self.vars[
"number_of_states"]):
372 os.makedirs(pdb_dir +
"/" + str(n))
379 if self.output_objects
is not None:
380 self.output_objects.append(sw)
381 if self.rmf_output_objects
is not None:
382 self.rmf_output_objects.append(sw)
384 print(
"Setting up stat file")
386 low_temp_stat_file = globaldir + \
387 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
390 if not self.test_mode:
393 if not self.test_mode:
394 if self.output_objects
is not None:
395 output.init_stat2(low_temp_stat_file,
397 extralabels=[
"rmf_file",
"rmf_frame_index"])
399 print(
"Stat file writing is disabled")
401 if self.rmf_output_objects
is not None:
402 print(
"Stat info being written in the rmf file")
404 print(
"Setting up replica stat file")
405 replica_stat_file = globaldir + \
406 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
407 if not self.test_mode:
408 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
410 if not self.test_mode:
411 print(
"Setting up best pdb files")
412 if not self.is_multi_state:
413 if self.vars[
"number_of_best_scoring_models"] > 0:
414 output.init_pdb_best_scoring(pdb_dir +
"/" +
415 self.vars[
"best_pdb_name_suffix"],
418 "number_of_best_scoring_models"],
419 replica_exchange=
True)
420 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
421 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
423 if self.vars[
"number_of_best_scoring_models"] > 0:
424 for n
in range(self.vars[
"number_of_states"]):
425 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
426 self.vars[
"best_pdb_name_suffix"],
429 "number_of_best_scoring_models"],
430 replica_exchange=
True)
431 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
432 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
435 if self.em_object_for_rmf
is not None:
436 output_hierarchies = [
438 self.em_object_for_rmf.get_density_as_hierarchy(
441 output_hierarchies = [self.root_hier]
444 if not self.test_mode:
445 print(
"Setting up and writing initial rmf coordinate file")
446 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
447 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
448 output_hierarchies,listofobjects=self.rmf_output_objects)
449 if self._rmf_restraints:
450 output.add_restraints_to_rmf(
451 init_suffix +
"." + str(myindex) +
".rmf3",
452 self._rmf_restraints)
453 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
454 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
458 if not self.test_mode:
459 mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
463 self._add_provenance(sampler_md, sampler_mc)
465 if not self.test_mode:
466 print(
"Setting up production rmf files")
467 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
468 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"],
469 listofobjects = self.rmf_output_objects)
471 if self._rmf_restraints:
472 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
474 ntimes_at_low_temp = 0
478 self.replica_exchange_object.set_was_used(
True)
479 nframes = self.vars[
"number_of_frames"]
482 for i
in range(nframes):
486 for nr
in range(self.vars[
"num_sample_rounds"]):
487 if sampler_md
is not None:
489 self.vars[
"molecular_dynamics_steps"])
490 if sampler_mc
is not None:
491 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
493 self.model).evaluate(
False)
494 mpivs.set_value(
"score",score)
495 output.set_output_entry(
"score", score)
499 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
501 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
502 save_frame=(min_temp_index == my_temp_index)
503 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
504 score_perc=mpivs.get_percentile(
"score")
505 save_frame=(score_perc*100.0<=25.0)
506 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
507 score_perc=mpivs.get_percentile(
"score")
508 save_frame=(score_perc*100.0<=50.0)
509 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
510 score_perc=mpivs.get_percentile(
"score")
511 save_frame=(score_perc*100.0<=75.0)
514 if save_frame
and not self.test_mode:
518 print(
"--- frame %s score %s " % (str(i), str(score)))
520 if not self.test_mode:
521 if i % self.vars[
"nframes_write_coordinates"]==0:
522 print(
'--- writing coordinates')
523 if self.vars[
"number_of_best_scoring_models"] > 0:
524 output.write_pdb_best_scoring(score)
525 output.write_rmf(rmfname)
526 output.set_output_entry(
"rmf_file", rmfname)
527 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
529 output.set_output_entry(
"rmf_file", rmfname)
530 output.set_output_entry(
"rmf_frame_index",
'-1')
531 if self.output_objects
is not None:
532 output.write_stat2(low_temp_stat_file)
533 ntimes_at_low_temp += 1
535 if not self.test_mode:
536 output.write_stat2(replica_stat_file)
537 if self.vars[
"replica_exchange_swap"]:
538 rex.swap_temp(i, score)
539 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
540 p.add_replica_exchange(state, self)
542 if not self.test_mode:
543 print(
"closing production rmf files")
544 output.close_rmf(rmfname)
550 """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
551 Easily create multi-state systems by calling this macro
552 repeatedly with different TopologyReader objects!
553 A useful function is get_molecules() which returns the PMI Molecules grouped by state
554 as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
555 Quick multi-state system:
558 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
559 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
560 bs = IMP.pmi.macros.BuildSystem(model)
561 bs.add_state(reader1)
562 bs.add_state(reader2)
563 bs.execute_macro() # build everything including degrees of freedom
564 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
565 ### now you have a two state system, you add restraints etc
567 @note The "domain name" entry of the topology reader is not used.
568 All molecules are set up by the component name, but split into rigid bodies
572 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
573 'RNA': IMP.pmi.alphabets.rna}
577 sequence_connectivity_scale=4.0,
578 force_create_gmm_files=
False,
581 @param model An IMP Model
582 @param sequence_connectivity_scale For scaling the connectivity restraint
583 @param force_create_gmm_files If True, will sample and create GMMs
584 no matter what. If False, will only sample if the
585 files don't exist. If number of Gaussians is zero, won't
587 @param resolutions The resolutions to build for structured regions
592 self._domain_res = []
594 self.force_create_gmm_files = force_create_gmm_files
595 self.resolutions = resolutions
599 keep_chain_id=
False, fasta_name_map=
None):
600 """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
601 When you are done adding states, call execute_macro()
602 @param reader The TopologyReader object
603 @param fasta_name_map dictionary for converting protein names found in the fasta file
605 state = self.system.create_state()
606 self._readers.append(reader)
607 these_domain_res = {}
609 chain_ids = string.ascii_uppercase+string.ascii_lowercase+
'0123456789'
614 for molname
in reader.get_molecules():
615 copies = reader.get_molecules()[molname].domains
616 for nc,copyname
in enumerate(copies):
617 print(
"BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
618 copy = copies[copyname]
620 if keep_chain_id ==
True:
621 chain_id = copy[0].chain
623 chain_id = chain_ids[numchain]
625 alphabet = IMP.pmi.alphabets.amino_acid
626 fasta_flag=copy[0].fasta_flag
627 if fasta_flag
in self._alphabets:
628 alphabet = self._alphabets[fasta_flag]
630 print(
"BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
631 orig_mol = state.create_molecule(molname, seq, chain_id,
636 print(
"BuildSystem.add_state: creating a copy for molecule %s" % molname)
637 mol = orig_mol.create_copy(chain_id)
640 for domainnumber,domain
in enumerate(copy):
641 print(
"BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
644 these_domains[domain.get_unique_name()] = domain
645 if domain.residue_range==[]
or domain.residue_range
is None:
646 domain_res = mol.get_residues()
648 start = domain.residue_range[0]+domain.pdb_offset
649 if domain.residue_range[1]==
'END':
650 end = len(mol.sequence)
652 end = domain.residue_range[1]+domain.pdb_offset
653 domain_res = mol.residue_range(start-1,end-1)
654 print(
"BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
655 if domain.pdb_file==
"BEADS":
656 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
657 mol.add_representation(
659 resolutions=[domain.bead_size],
660 setup_particles_as_densities=(
661 domain.em_residues_per_gaussian!=0),
662 color = domain.color)
663 these_domain_res[domain.get_unique_name()] = (set(),domain_res)
664 elif domain.pdb_file==
"IDEAL_HELIX":
665 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
666 mol.add_representation(
668 resolutions=self.resolutions,
670 density_residues_per_component=domain.em_residues_per_gaussian,
671 density_prefix=domain.density_prefix,
672 density_force_compute=self.force_create_gmm_files,
673 color = domain.color)
674 these_domain_res[domain.get_unique_name()] = (domain_res,set())
676 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
677 domain_atomic = mol.add_structure(domain.pdb_file,
679 domain.residue_range,
682 domain_non_atomic = domain_res - domain_atomic
683 if not domain.em_residues_per_gaussian:
684 mol.add_representation(domain_atomic,
685 resolutions=self.resolutions,
686 color = domain.color)
687 if len(domain_non_atomic)>0:
688 mol.add_representation(domain_non_atomic,
689 resolutions=[domain.bead_size],
690 color = domain.color)
692 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
693 mol.add_representation(
695 resolutions=self.resolutions,
696 density_residues_per_component=domain.em_residues_per_gaussian,
697 density_prefix=domain.density_prefix,
698 density_force_compute=self.force_create_gmm_files,
699 color = domain.color)
700 if len(domain_non_atomic)>0:
701 mol.add_representation(domain_non_atomic,
702 resolutions=[domain.bead_size],
703 setup_particles_as_densities=
True,
704 color = domain.color)
705 these_domain_res[domain.get_unique_name()] = (
706 domain_atomic,domain_non_atomic)
707 self._domain_res.append(these_domain_res)
708 self._domains.append(these_domains)
709 print(
'BuildSystem.add_state: State',len(self.system.states),
'added')
712 """Return list of all molecules grouped by state.
713 For each state, it's a dictionary of Molecules where key is the molecule name
715 return [s.get_molecules()
for s
in self.system.get_states()]
717 def get_molecule(self,molname,copy_index=0,state_index=0):
718 return self.system.get_states()[state_index].
get_molecules()[molname][copy_index]
720 def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
721 """Builds representations and sets up degrees of freedom"""
722 print(
"BuildSystem.execute_macro: building representations")
723 self.root_hier = self.system.build()
725 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
727 for nstate,reader
in enumerate(self._readers):
728 rbs = reader.get_rigid_bodies()
729 srbs = reader.get_super_rigid_bodies()
730 csrbs = reader.get_chains_of_super_rigid_bodies()
733 domains_in_rbs = set()
735 print(
"BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
736 all_res = IMP.pmi.tools.OrderedSet()
737 bead_res = IMP.pmi.tools.OrderedSet()
739 domain = self._domains[nstate][dname]
740 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
741 all_res|=self._domain_res[nstate][dname][0]
742 bead_res|=self._domain_res[nstate][dname][1]
743 domains_in_rbs.add(dname)
745 print(
"BuildSystem.execute_macro: -------- \
746 creating rigid body with max_trans %s \
747 max_rot %s non_rigid_max_trans %s" \
748 % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
749 self.dof.create_rigid_body(all_res,
750 nonrigid_parts=bead_res,
751 max_trans=max_rb_trans,
753 nonrigid_max_trans=max_bead_trans)
756 for dname, domain
in self._domains[nstate].items():
757 if dname
not in domains_in_rbs:
758 if domain.pdb_file !=
"BEADS":
760 "No rigid bodies set for %s. Residues read from "
761 "the PDB file will not be sampled - only regions "
762 "missing from the PDB will be treated flexibly. "
763 "To sample the entire sequence, use BEADS instead "
764 "of a PDB file name" % dname,
766 self.dof.create_flexible_beads(
767 self._domain_res[nstate][dname][1],
768 max_trans=max_bead_trans)
772 print(
"BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
773 all_res = IMP.pmi.tools.OrderedSet()
774 for dname
in srblist:
775 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
776 all_res|=self._domain_res[nstate][dname][0]
777 all_res|=self._domain_res[nstate][dname][1]
779 print(
"BuildSystem.execute_macro: -------- \
780 creating super rigid body with max_trans %s max_rot %s " \
781 % (str(max_srb_trans),str(max_srb_rot)))
782 self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
785 for csrblist
in csrbs:
786 all_res = IMP.pmi.tools.OrderedSet()
787 for dname
in csrblist:
788 all_res|=self._domain_res[nstate][dname][0]
789 all_res|=self._domain_res[nstate][dname][1]
790 all_res = list(all_res)
791 all_res.sort(key=
lambda r:r.get_index())
793 self.dof.create_main_chain_mover(all_res)
794 return self.root_hier,self.dof
799 """A macro for running all the basic operations of analysis.
800 Includes clustering, precision analysis, and making ensemble density maps.
801 A number of plots are also supported.
804 merge_directories=[
"./"],
805 stat_file_name_suffix=
"stat",
806 best_pdb_name_suffix=
"model",
808 do_create_directories=
True,
809 global_output_directory=
"output/",
810 replica_stat_file_suffix=
"stat_replica",
811 global_analysis_result_directory=
"./analysis/",
814 @param model The IMP model
815 @param stat_file_name_suffix
816 @param merge_directories The directories containing output files
817 @param best_pdb_name_suffix
818 @param do_clean_first
819 @param do_create_directories
820 @param global_output_directory Where everything is
821 @param replica_stat_file_suffix
822 @param global_analysis_result_directory
823 @param test_mode If True, nothing is changed on disk
827 from mpi4py
import MPI
828 self.comm = MPI.COMM_WORLD
829 self.rank = self.comm.Get_rank()
830 self.number_of_processes = self.comm.size
833 self.number_of_processes = 1
835 self.test_mode = test_mode
836 self._protocol_output = []
837 self.cluster_obj =
None
839 stat_dir = global_output_directory
842 for rd
in merge_directories:
843 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
844 if len(stat_files)==0:
845 warnings.warn(
"no stat files found in %s"
846 % os.path.join(rd, stat_dir),
848 self.stat_files += stat_files
851 """Capture details of the modeling protocol.
852 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
855 self._protocol_output.append((p, p._last_state))
858 score_key=
"SimplifiedModel_Total_Score_None",
859 rmf_file_key=
"rmf_file",
860 rmf_file_frame_key=
"rmf_frame_index",
863 nframes_trajectory=10000):
864 """ Get a trajectory of the modeling run, for generating demonstrative movies
865 @param score_key The score for ranking models
866 @param rmf_file_key Key pointing to RMF filename
867 @param rmf_file_frame_key Key pointing to RMF frame number
868 @param outputdir The local output directory used in the run
869 @param get_every Extract every nth frame
870 @param nframes_trajectory Total number of frames of the trajectory
872 from operator
import itemgetter
880 rmf_file_list=trajectory_models[0]
881 rmf_file_frame_list=trajectory_models[1]
882 score_list=list(map(float, trajectory_models[2]))
884 max_score=max(score_list)
885 min_score=min(score_list)
888 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
889 binned_scores=[
None]*nframes_trajectory
890 binned_model_indexes=[-1]*nframes_trajectory
892 for model_index,s
in enumerate(score_list):
893 bins_score_diffs=[abs(s-b)
for b
in bins]
894 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
895 if binned_scores[bin_index]==
None:
896 binned_scores[bin_index]=s
897 binned_model_indexes[bin_index]=model_index
899 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
900 new_diff=abs(s-bins[bin_index])
901 if new_diff < old_diff:
902 binned_scores[bin_index]=s
903 binned_model_indexes[bin_index]=model_index
906 print(binned_model_indexes)
909 def _expand_ambiguity(self,prot,d):
910 """If using PMI2, expand the dictionary to include copies as ambiguous options
911 This also keeps the states separate.
916 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
919 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
920 if isinstance(val, tuple):
928 for nst
in range(len(states)):
930 copies = sel.get_selected_particles(with_representation=
False)
932 for nc
in range(len(copies)):
934 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
936 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
943 score_key=
"SimplifiedModel_Total_Score_None",
944 rmf_file_key=
"rmf_file",
945 rmf_file_frame_key=
"rmf_frame_index",
950 alignment_components=
None,
951 number_of_best_scoring_models=10,
952 rmsd_calculation_components=
None,
953 distance_matrix_file=
'distances.mat',
954 load_distance_matrix_file=
False,
955 skip_clustering=
False,
956 number_of_clusters=1,
958 exit_after_display=
True,
960 first_and_last_frames=
None,
961 density_custom_ranges=
None,
962 write_pdb_with_centered_coordinates=
False,
964 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
965 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
966 Can pass None for copy or state to ignore that field.
967 If you don't pass a specific copy number
968 @param score_key The score for ranking models. Use "Total_Score"
969 instead of default for PMI2.
970 @param rmf_file_key Key pointing to RMF filename
971 @param rmf_file_frame_key Key pointing to RMF frame number
972 @param state_number State number to analyze
973 @param prefiltervalue Only include frames where the
974 score key is below this value
975 @param feature_keys Keywords for which you want to
976 calculate average, medians, etc.
977 If you pass "Keyname" it'll include everything that matches "*Keyname*"
978 @param outputdir The local output directory used in the run
979 @param alignment_components Dictionary with keys=groupname, values are tuples
980 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
981 @param number_of_best_scoring_models Num models to keep per run
982 @param rmsd_calculation_components For calculating RMSD
983 (same format as alignment_components)
984 @param distance_matrix_file Where to store/read the distance matrix
985 @param load_distance_matrix_file Try to load the distance matrix file
986 @param skip_clustering Just extract the best scoring models
988 @param number_of_clusters Number of k-means clusters
989 @param display_plot Display the distance matrix
990 @param exit_after_display Exit after displaying distance matrix
991 @param get_every Extract every nth frame
992 @param first_and_last_frames A tuple with the first and last frames to be
993 analyzed. Values are percentages!
994 Default: get all frames
995 @param density_custom_ranges For density calculation
996 (same format as alignment_components)
997 @param write_pdb_with_centered_coordinates
998 @param voxel_size Used for the density output
1002 self._outputdir = os.path.abspath(outputdir)
1003 self._number_of_clusters = number_of_clusters
1004 for p, state
in self._protocol_output:
1005 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1016 if not load_distance_matrix_file:
1017 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1018 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1019 self.stat_files,self.number_of_processes)[self.rank]
1023 orig_score_key = score_key
1024 if score_key
not in po.get_keys():
1025 if 'Total_Score' in po.get_keys():
1026 score_key =
'Total_Score'
1028 "Using 'Total_Score' instead of "
1029 "'SimplifiedModel_Total_Score_None' for the score key",
1031 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1032 if k
in feature_keys:
1034 "no need to pass " + k +
" to feature_keys.",
1036 feature_keys.remove(k)
1044 get_every, provenance=prov)
1045 rmf_file_list=best_models[0]
1046 rmf_file_frame_list=best_models[1]
1047 score_list=best_models[2]
1048 feature_keyword_list_dict=best_models[3]
1054 if self.number_of_processes > 1:
1058 rmf_file_frame_list)
1059 for k
in feature_keyword_list_dict:
1061 feature_keyword_list_dict[k])
1064 score_rmf_tuples = list(zip(score_list,
1066 rmf_file_frame_list,
1067 list(range(len(score_list)))))
1069 if density_custom_ranges:
1070 for k
in density_custom_ranges:
1071 if not isinstance(density_custom_ranges[k], list):
1072 raise Exception(
"Density custom ranges: values must be lists of tuples")
1075 if first_and_last_frames
is not None:
1076 nframes = len(score_rmf_tuples)
1077 first_frame = int(first_and_last_frames[0] * nframes)
1078 last_frame = int(first_and_last_frames[1] * nframes)
1079 if last_frame > len(score_rmf_tuples):
1081 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1084 best_score_rmf_tuples = sorted(score_rmf_tuples,
1085 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1086 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1088 prov.append(IMP.pmi.io.FilterProvenance(
"Best scoring",
1089 0, number_of_best_scoring_models))
1091 best_score_feature_keyword_list_dict = defaultdict(list)
1092 for tpl
in best_score_rmf_tuples:
1094 for f
in feature_keyword_list_dict:
1095 best_score_feature_keyword_list_dict[f].append(
1096 feature_keyword_list_dict[f][index])
1097 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1098 best_score_rmf_tuples,
1099 self.number_of_processes)[self.rank]
1102 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1104 my_best_score_rmf_tuples[0][1])[0]
1106 if rmsd_calculation_components
is not None:
1107 tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1108 if tmp!=rmsd_calculation_components:
1109 print(
'Detected ambiguity, expand rmsd components to',tmp)
1110 rmsd_calculation_components = tmp
1111 if alignment_components
is not None:
1112 tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1113 if tmp!=alignment_components:
1114 print(
'Detected ambiguity, expand alignment components to',tmp)
1115 alignment_components = tmp
1121 my_best_score_rmf_tuples[0],
1122 rmsd_calculation_components,
1123 state_number=state_number)
1125 my_best_score_rmf_tuples,
1126 alignment_components,
1127 rmsd_calculation_components,
1128 state_number=state_number)
1133 all_coordinates=got_coords[0]
1134 alignment_coordinates=got_coords[1]
1135 rmsd_coordinates=got_coords[2]
1136 rmf_file_name_index_dict=got_coords[3]
1137 all_rmf_file_names=got_coords[4]
1143 if density_custom_ranges:
1145 density_custom_ranges,
1148 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1154 os.mkdir(dircluster)
1157 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1158 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1160 rmf_frame_number=tpl[2]
1163 for key
in best_score_feature_keyword_list_dict:
1164 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1167 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1172 linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1178 if not linking_successful:
1185 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1186 prot = states[state_number]
1188 prot = prots[state_number]
1193 coords_f1=alignment_coordinates[cnt]
1195 coords_f2=alignment_coordinates[cnt]
1198 transformation = Ali.align()[1]
1220 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1221 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1222 o.init_pdb(out_pdb_fn,prot)
1223 o.write_pdb(out_pdb_fn,
1224 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1226 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1227 tmp_dict[
"rmf_file_full_path"]=rmf_name
1228 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1229 tmp_dict[
"local_rmf_frame_number"]=0
1231 clusstat.write(str(tmp_dict)+
"\n")
1236 h.set_name(
"System")
1238 o.init_rmf(out_rmf_fn, [h], rs)
1240 o.init_rmf(out_rmf_fn, [prot],rs)
1242 o.write_rmf(out_rmf_fn)
1243 o.close_rmf(out_rmf_fn)
1245 if density_custom_ranges:
1246 DensModule.add_subunits_density(prot)
1248 if density_custom_ranges:
1249 DensModule.write_mrc(path=dircluster)
1256 if self.number_of_processes > 1:
1262 rmf_file_name_index_dict)
1264 alignment_coordinates)
1271 [best_score_feature_keyword_list_dict,
1272 rmf_file_name_index_dict],
1278 print(
"setup clustering class")
1281 for n, model_coordinate_dict
in enumerate(all_coordinates):
1282 template_coordinate_dict = {}
1284 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1286 self.cluster_obj.set_template(alignment_coordinates[n])
1287 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1288 print(
"Global calculating the distance matrix")
1291 self.cluster_obj.dist_matrix()
1295 self.cluster_obj.do_cluster(number_of_clusters)
1298 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1299 if exit_after_display:
1301 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1308 print(
"setup clustering class")
1310 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1311 print(
"clustering with %s clusters" % str(number_of_clusters))
1312 self.cluster_obj.do_cluster(number_of_clusters)
1313 [best_score_feature_keyword_list_dict,
1314 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1317 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1318 if exit_after_display:
1320 if self.number_of_processes > 1:
1328 print(self.cluster_obj.get_cluster_labels())
1329 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1330 print(
"rank %s " % str(self.rank))
1331 print(
"cluster %s " % str(n))
1332 print(
"cluster label %s " % str(cl))
1333 print(self.cluster_obj.get_cluster_label_names(cl))
1334 cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1335 cluster_prov = prov + \
1336 [IMP.pmi.io.ClusterProvenance(cluster_size)]
1339 if density_custom_ranges:
1341 density_custom_ranges,
1344 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1346 os.mkdir(dircluster)
1350 rmsd_dict = {
"AVERAGE_RMSD":
1351 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1352 clusstat = open(dircluster +
"stat.out",
"w")
1353 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1356 tmp_dict.update(rmsd_dict)
1357 index = rmf_file_name_index_dict[structure_name]
1358 for key
in best_score_feature_keyword_list_dict:
1360 key] = best_score_feature_keyword_list_dict[
1366 rmf_name = structure_name.split(
"|")[0]
1367 rmf_frame_number = int(structure_name.split(
"|")[1])
1368 clusstat.write(str(tmp_dict) +
"\n")
1372 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1377 linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1383 if not linking_successful:
1389 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1390 prot = states[state_number]
1392 prot = prots[state_number]
1398 model_index = self.cluster_obj.get_model_index_from_name(
1400 transformation = self.cluster_obj.get_transformation_to_first_member(
1420 if density_custom_ranges:
1421 DensModule.add_subunits_density(prot)
1426 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1427 o.write_pdb(dircluster + str(k) +
".pdb")
1432 h.set_name(
"System")
1434 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1436 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1437 o.write_rmf(dircluster + str(k) +
".rmf3")
1438 o.close_rmf(dircluster + str(k) +
".rmf3")
1443 if density_custom_ranges:
1444 DensModule.write_mrc(path=dircluster)
1447 if self.number_of_processes>1:
1450 def get_cluster_rmsd(self,cluster_num):
1451 if self.cluster_obj
is None:
1453 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1455 def save_objects(self, objects, file_name):
1457 outf = open(file_name,
'wb')
1458 pickle.dump(objects, outf)
1461 def load_objects(self, file_name):
1463 inputf = open(file_name,
'rb')
1464 objects = pickle.load(inputf)
1471 This class contains analysis utilities to investigate ReplicaExchange results.
1484 Construction of the Class.
1485 @param model IMP.Model()
1486 @param stat_files list of string. Can be ascii stat files, rmf files names
1487 @param best_models Integer. Number of best scoring models, if None: all models will be read
1488 @param alignment boolean (Default=True). Align before computing the rmsd.
1492 self.best_models=best_models
1505 self.clusters.append(c)
1506 for n0
in range(len(self.stath0)):
1508 self.pairwise_rmsd={}
1509 self.pairwise_molecular_assignment={}
1510 self.alignment=alignment
1511 self.symmetric_molecules={}
1512 self.issymmetricsel={}
1514 self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath0))
1515 self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath1))
1519 Setup the selection onto which the rmsd is computed
1520 @param kwargs use IMP.atom.Selection keywords
1528 Store names of symmetric molecules
1530 self.symmetric_molecules[molecule_name]=0
1535 Setup the selection onto which the alignment is computed
1536 @param kwargs use IMP.atom.Selection keywords
1544 def clean_clusters(self):
1545 for c
in self.clusters: del c
1549 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1551 Cluster the models based on RMSD.
1552 @param rmsd_cutoff Float the distance cutoff in Angstrom
1553 @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1555 self.clean_clusters()
1556 not_clustered=set(range(len(self.stath1)))
1557 while len(not_clustered)>0:
1558 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1564 Refine the clusters by merging the ones whose centers are close
1565 @param rmsd_cutoff cutoff distance in Angstorms
1568 clusters_copy=self.clusters
1569 for c0,c1
in itertools.combinations(self.clusters,2):
1570 if c0.center_index
is None:
1572 if c1.center_index
is None:
1574 d0=self.stath0[c0.center_index]
1575 d1=self.stath1[c1.center_index]
1576 rmsd, molecular_assignment = self.
rmsd()
1577 if rmsd <= rmsd_cutoff:
1578 if c1
in self.clusters:
1579 clusters_copy.remove(c1)
1581 self.clusters=clusters_copy
1588 def set_cluster_assignments(self, cluster_ids):
1589 if len(cluster_ids)!=len(self.stath0):
1590 raise ValueError(
'cluster ids has to be same length as number of frames')
1593 for i
in sorted(list(set(cluster_ids))):
1595 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1596 self.clusters[idx].add_member(i,d)
1600 Return the model data from a cluster
1601 @param cluster IMP.pmi.output.Cluster object
1610 Save the data for the whole models into a pickle file
1611 @param filename string
1613 self.stath0.save_data(filename)
1617 Set the data from an external IMP.pmi.output.Data
1618 @param data IMP.pmi.output.Data
1620 self.stath0.data=data
1621 self.stath1.data=data
1625 Load the data from an external pickled file
1626 @param filename string
1628 self.stath0.load_data(filename)
1629 self.stath1.load_data(filename)
1630 self.best_models=len(self.stath0)
1632 def add_cluster(self,rmf_name_list):
1634 print(
"creating cluster index "+str(len(self.clusters)))
1635 self.clusters.append(c)
1636 current_len=len(self.stath0)
1638 for rmf
in rmf_name_list:
1639 print(
"adding rmf "+rmf)
1640 self.stath0.add_stat_file(rmf)
1641 self.stath1.add_stat_file(rmf)
1643 for n0
in range(current_len,len(self.stath0)):
1650 Save the clusters into a pickle file
1651 @param filename string
1654 import cPickle
as pickle
1657 fl=open(filename,
'wb')
1658 pickle.dump(self.clusters,fl)
1662 Load the clusters from a pickle file
1663 @param filename string
1664 @param append bool (Default=False), if True. append the clusters to the ones currently present
1667 import cPickle
as pickle
1670 fl=open(filename,
'rb')
1671 self.clean_clusters()
1673 self.clusters+=pickle.load(fl)
1675 self.clusters=pickle.load(fl)
1684 Compute the cluster center for a given cluster
1686 member_distance=defaultdict(float)
1688 for n0,n1
in itertools.combinations(cluster.members,2):
1691 rmsd, _ = self.
rmsd()
1692 member_distance[n0]+=rmsd
1694 if len(member_distance)>0:
1695 cluster.center_index=min(member_distance, key=member_distance.get)
1697 cluster.center_index=cluster.members[0]
1701 Save the coordinates of the current cluster a single rmf file
1703 print(
"saving coordinates",cluster)
1706 if rmf_name
is None:
1707 rmf_name=prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1709 d1=self.stath1[cluster.members[0]]
1711 o.init_rmf(rmf_name, [self.stath1])
1712 for n1
in cluster.members:
1716 if self.alignment: self.align()
1717 o.write_rmf(rmf_name)
1719 o.close_rmf(rmf_name)
1723 remove structures that are similar
1724 append it to a new cluster
1726 print(
"pruning models")
1729 remaining=range(1,len(self.stath1),10)
1731 while len(remaining)>0:
1732 d0=self.stath0[selected]
1734 for n1
in remaining:
1736 if self.alignment: self.align()
1740 print(
"pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
1741 remaining=[x
for x
in remaining
if x
not in rm]
1742 if len(remaining)==0:
break
1743 selected=remaining[0]
1744 filtered.append(selected)
1747 self.clusters.append(c)
1757 Compute the precision of a cluster
1763 if not cluster.center_index
is None:
1764 members1=[cluster.center_index]
1766 members1=cluster.members
1770 for n1
in cluster.members:
1775 tmp_rmsd, _ = self.
rmsd()
1780 precision=rmsd/npairs
1781 cluster.precision=precision
1786 Compute the bipartite precision (ie the cross-precision)
1787 between two clusters
1791 for cn0,n0
in enumerate(cluster1.members):
1793 for cn1,n1
in enumerate(cluster2.members):
1795 tmp_rmsd, _ =self.
rmsd()
1796 if verbose: print(
"--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
1799 precision=rmsd/npairs
1802 def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
1804 Compute the Root mean square fluctuations
1805 of a molecule in a cluster
1806 Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
1808 rmsf=IMP.pmi.tools.OrderedDict()
1811 if cluster_ref
is not None:
1812 if not cluster_ref.center_index
is None:
1813 members0 = [cluster_ref.center_index]
1815 members0 = cluster_ref.members
1817 if not cluster.center_index
is None:
1818 members0 = [cluster.center_index]
1820 members0 = cluster.members
1823 copy_index=copy_index,state_index=state_index)
1824 ps0=s0.get_selected_particles()
1838 for n1
in cluster.members[::step]:
1840 print(
"--- rmsf %s %s"%(str(n0),str(n1)))
1843 s1=
IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1844 copy_index=copy_index,state_index=state_index)
1845 ps1 = s1.get_selected_particles()
1848 if self.alignment: self.align()
1849 for n,(p0,p1)
in enumerate(zip(ps0,ps1)):
1850 r=residue_indexes[n]
1862 for stath
in [self.stath0,self.stath1]:
1863 if molecule
not in self.symmetric_molecules:
1865 copy_index=copy_index,state_index=state_index)
1868 state_index=state_index)
1870 ps = s.get_selected_particles()
1879 def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
1884 for n1
in cluster.members[::step]:
1885 print(
"density "+str(n1))
1888 if self.alignment: self.align()
1889 dens.add_subunits_density(self.stath1)
1891 dens.write_mrc(path=prefix+
'/',suffix=str(cluster.cluster_id))
1894 def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
1897 import matplotlib.pyplot
as plt
1898 import matplotlib.cm
as cm
1899 from scipy.spatial.distance
import cdist
1901 if molecules
is None:
1905 unique_copies=[mol
for mol
in mols
if mol.get_copy_index() == 0]
1906 mol_names_unique=dict((mol.get_name(),mol)
for mol
in unique_copies)
1907 total_len_unique=sum(max(mol.get_residue_indexes())
for mol
in unique_copies)
1916 seqlen=max(mol.get_residue_indexes())
1917 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1921 for mol
in unique_copies:
1922 seqlen=max(mol.get_residue_indexes())
1923 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1927 for ncl,n1
in enumerate(cluster.members):
1931 coord_dict=IMP.pmi.tools.OrderedDict()
1933 mol_name=mol.get_name()
1934 copy_index=mol.get_copy_index()
1935 rindexes = mol.get_residue_indexes()
1936 coords = np.ones((max(rindexes),3))
1937 for rnum
in rindexes:
1939 selpart = sel.get_selected_particles()
1940 if len(selpart) == 0:
continue
1941 selpart = selpart[0]
1942 coords[rnum - 1, :] =
IMP.core.XYZ(selpart).get_coordinates()
1943 coord_dict[mol]=coords
1946 coords=np.concatenate(list(coord_dict.values()))
1947 dists = cdist(coords, coords)
1948 binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1950 binary_dists_dict={}
1952 len1 = max(mol1.get_residue_indexes())
1954 name1=mol1.get_name()
1955 name2=mol2.get_name()
1956 dists = cdist(coord_dict[mol1], coord_dict[mol2])
1957 if (name1, name2)
not in binary_dists_dict:
1958 binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
1959 binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1960 binary_dists=np.zeros((total_len_unique,total_len_unique))
1962 for name1,name2
in binary_dists_dict:
1963 r1=index_dict[mol_names_unique[name1]]
1964 r2=index_dict[mol_names_unique[name2]]
1965 binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
1970 contact_freqs = binary_dists
1972 dist_maps.append(dists)
1973 av_dist_map += dists
1974 contact_freqs += binary_dists
1979 contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
1981 contact_freqs =1.0/len(cluster)*contact_freqs
1982 av_dist_map=1.0/len(cluster)*contact_freqs
1984 fig = plt.figure(figsize=(100, 100))
1985 ax = fig.add_subplot(111)
1988 gap_between_components=50
2000 prot_list=list(zip(*sorted_tuple))[1]
2003 prot_list=list(zip(*sorted_tuple))[1]
2005 prot_listx = prot_list
2006 nresx = gap_between_components + \
2007 sum([max(mol.get_residue_indexes())
2008 + gap_between_components
for mol
in prot_listx])
2011 prot_listy = prot_list
2012 nresy = gap_between_components + \
2013 sum([max(mol.get_residue_indexes())
2014 + gap_between_components
for mol
in prot_listy])
2019 res = gap_between_components
2020 for mol
in prot_listx:
2021 resoffsetx[mol] = res
2022 res += max(mol.get_residue_indexes())
2024 res += gap_between_components
2028 res = gap_between_components
2029 for mol
in prot_listy:
2030 resoffsety[mol] = res
2031 res += max(mol.get_residue_indexes())
2033 res += gap_between_components
2035 resoffsetdiagonal = {}
2036 res = gap_between_components
2037 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2038 resoffsetdiagonal[mol] = res
2039 res += max(mol.get_residue_indexes())
2040 res += gap_between_components
2045 for n, prot
in enumerate(prot_listx):
2046 res = resoffsetx[prot]
2048 for proty
in prot_listy:
2049 resy = resoffsety[proty]
2050 endy = resendy[proty]
2051 ax.plot([res, res], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2052 ax.plot([end, end], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2053 xticks.append((float(res) + float(end)) / 2)
2058 for n, prot
in enumerate(prot_listy):
2059 res = resoffsety[prot]
2061 for protx
in prot_listx:
2062 resx = resoffsetx[protx]
2063 endx = resendx[protx]
2064 ax.plot([resx, endx], [res, res], linestyle=
'-',color=
'gray', lw=0.4)
2065 ax.plot([resx, endx], [end, end], linestyle=
'-',color=
'gray', lw=0.4)
2066 yticks.append((float(res) + float(end)) / 2)
2071 tmp_array = np.zeros((nresx, nresy))
2073 for px
in prot_listx:
2074 for py
in prot_listy:
2075 resx = resoffsetx[px]
2076 lengx = resendx[px] - 1
2077 resy = resoffsety[py]
2078 lengy = resendy[py] - 1
2079 indexes_x = index_dict[px]
2080 minx = min(indexes_x)
2081 maxx = max(indexes_x)
2082 indexes_y = index_dict[py]
2083 miny = min(indexes_y)
2084 maxy = max(indexes_y)
2085 tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2086 ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2088 cax = ax.imshow(tmp_array,
2093 interpolation=
'nearest')
2095 ax.set_xticks(xticks)
2096 ax.set_xticklabels(xlabels, rotation=90)
2097 ax.set_yticks(yticks)
2098 ax.set_yticklabels(ylabels)
2099 plt.setp(ax.get_xticklabels(), fontsize=6)
2100 plt.setp(ax.get_yticklabels(), fontsize=6)
2103 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2104 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2109 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf", dpi=300,transparent=
"False")
2113 def plot_rmsd_matrix(self,filename):
2115 self.compute_all_pairwise_rmsd()
2116 distance_matrix = np.zeros(
2117 (len(self.stath0), len(self.stath1)))
2118 for (n0,n1)
in self.pairwise_rmsd:
2119 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2121 import matplotlib
as mpl
2123 import matplotlib.pylab
as pl
2124 from scipy.cluster
import hierarchy
as hrc
2126 fig = pl.figure(figsize=(10,8))
2127 ax = fig.add_subplot(212)
2128 dendrogram = hrc.dendrogram(
2129 hrc.linkage(distance_matrix),
2132 leaves_order = dendrogram[
'leaves']
2133 ax.set_xlabel(
'Model')
2134 ax.set_ylabel(
'RMSD [Angstroms]')
2136 ax2 = fig.add_subplot(221)
2138 distance_matrix[leaves_order,
2141 interpolation=
'nearest')
2142 cb = fig.colorbar(cax)
2143 cb.set_label(
'RMSD [Angstroms]')
2144 ax2.set_xlabel(
'Model')
2145 ax2.set_ylabel(
'Model')
2147 pl.savefig(filename, dpi=300)
2156 Update the cluster id numbers
2158 for n,c
in enumerate(self.clusters):
2161 def get_molecule(self, hier, name, copy):
2169 self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2170 self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2171 for mol
in self.seldict0:
2172 for sel
in self.seldict0[mol]:
2173 self.issymmetricsel[sel]=
False
2174 for mol
in self.symmetric_molecules:
2175 self.symmetric_molecules[mol]=len(self.seldict0[mol])
2176 for sel
in self.seldict0[mol]:
2177 self.issymmetricsel[sel]=
True
2183 for rb
in self.rbs1:
2186 for bead
in self.beads1:
2195 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2197 initial filling of the clusters.
2200 print(
"clustering model "+str(n0))
2201 d0 = self.stath0[n0]
2203 print(
"creating cluster index "+str(len(self.clusters)))
2204 self.clusters.append(c)
2206 clustered = set([n0])
2208 print(
"--- trying to add model "+str(n1)+
" to cluster "+str(len(self.clusters)))
2209 d1 = self.stath1[n1]
2210 if self.alignment: self.align()
2211 rmsd, _ = self.
rmsd(metric=metric)
2212 if rmsd<rmsd_cutoff:
2213 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2217 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2222 merge the clusters that have close members
2223 @param rmsd_cutoff cutoff distance in Angstorms
2229 for c0, c1
in filter(
lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2230 n0, n1 = [c.members[0]
for c
in (c0,c1)]
2231 d0 = self.stath0[n0]
2232 d1 = self.stath1[n1]
2233 rmsd, _ = self.
rmsd()
2235 to_merge.append((c0,c1))
2237 for c0, c
in reversed(to_merge):
2241 self.clusters = [c
for c
in filter(
lambda x: len(x.members)>0, self.clusters)]
2246 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2248 print(
"check close members for clusters "+str(c0.cluster_id)+
" and "+str(c1.cluster_id))
2249 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2250 d0 = self.stath0[n0]
2251 d1 = self.stath1[n1]
2252 rmsd, _ = self.
rmsd(metric=metric)
2253 if rmsd<rmsd_cutoff:
2269 a function that returns the permutation best_sel of sels0 that minimizes metric
2271 best_rmsd2 = float(
'inf')
2273 if self.issymmetricsel[sels0[0]]:
2276 for offset
in range(N):
2277 order=[(offset+i)%N
for i
in range(N)]
2278 sels = [sels0[(offset+i)%N]
for i
in range(N)]
2281 r=metric(sel0, sel1)
2284 if rmsd2 < best_rmsd2:
2289 for sels
in itertools.permutations(sels0):
2291 for sel0, sel1
in itertools.takewhile(
lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2292 r=metric(sel0, sel1)
2294 if rmsd2 < best_rmsd2:
2308 return best_sel, best_rmsd2
2310 def compute_all_pairwise_rmsd(self):
2311 for d0
in self.stath0:
2312 for d1
in self.stath1:
2313 rmsd, _ = self.
rmsd()
2315 def rmsd(self,metric=IMP.atom.get_rmsd):
2317 Computes the RMSD. Resolves ambiguous pairs assignments
2320 n0=self.stath0.current_index
2321 n1=self.stath1.current_index
2322 if ((n0,n1)
in self.pairwise_rmsd)
and ((n0,n1)
in self.pairwise_molecular_assignment):
2323 return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2331 seldict_best_order={}
2332 molecular_assignment={}
2333 for molname, sels0
in self.seldict0.items():
2334 sels_best_order, best_rmsd2 = self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2336 Ncoords = len(sels_best_order[0].get_selected_particles())
2337 Ncopies = len(self.seldict1[molname])
2338 total_rmsd += Ncoords*best_rmsd2
2339 total_N += Ncoords*Ncopies
2341 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2342 p0 = sel0.get_selected_particles()[0]
2343 p1 = sel1.get_selected_particles()[0]
2349 molecular_assignment[(molname,c0)]=(molname,c1)
2351 total_rmsd = math.sqrt(total_rmsd/total_N)
2353 self.pairwise_rmsd[(n0,n1)]=total_rmsd
2354 self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2355 self.pairwise_rmsd[(n1,n0)]=total_rmsd
2356 self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2358 return total_rmsd, molecular_assignment
2362 Fix the reference structure for structural alignment, rmsd and chain assignemnt
2363 @param reference can be either "Absolute" (cluster center of the first cluster)
2364 or Relative (cluster center of the current cluster)
2366 if reference==
"Absolute":
2368 elif reference==
"Relative":
2369 if cluster.center_index:
2370 n0=cluster.center_index
2372 n0=cluster.members[0]
2377 compute the molecular assignemnts between multiple copies
2378 of the same sequence. It changes the Copy index of Molecules
2381 _, molecular_assignment = self.
rmsd()
2382 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2383 mol0 = self.molcopydict0[m0][c0]
2384 mol1 = self.molcopydict1[m1][c1]
2387 p1.set_value(cik0,c0)
2391 Undo the Copy index assignment
2394 _, molecular_assignment = self.
rmsd()
2396 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2397 mol0 = self.molcopydict0[m0][c0]
2398 mol1 = self.molcopydict1[m1][c1]
2401 p1.set_value(cik0,c1)
2408 s=
"AnalysisReplicaExchange\n"
2409 s+=
"---- number of clusters %s \n"%str(len(self.clusters))
2410 s+=
"---- number of models %s \n"%str(len(self.stath0))
2413 def __getitem__(self,int_slice_adaptor):
2414 if isinstance(int_slice_adaptor, int):
2415 return self.clusters[int_slice_adaptor]
2416 elif isinstance(int_slice_adaptor, slice):
2417 return self.__iter__(int_slice_adaptor)
2419 raise TypeError(
"Unknown Type")
2422 return len(self.clusters)
2424 def __iter__(self,slice_key=None):
2425 if slice_key
is None:
2426 for i
in range(len(self)):
2429 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
void handle_use_deprecated(std::string message)
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.