1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from operator
import itemgetter
19 from collections
import defaultdict
27 class _RMFRestraints(object):
28 """All restraints that are written out to the RMF file"""
29 def __init__(self, model, user_restraints):
31 self._user_restraints = user_restraints
if user_restraints
else []
34 return (len(self._user_restraints)
35 + self._rmf_rs.get_number_of_restraints())
39 __nonzero__ = __bool__
41 def __getitem__(self, i):
42 class FakePMIWrapper(object):
43 def __init__(self, r):
44 self.r = IMP.RestraintSet.get_from(r)
46 def get_restraint(self):
49 lenuser = len(self._user_restraints)
51 return self._user_restraints[i]
52 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
53 r = self._rmf_rs.get_restraint(i - lenuser)
54 return FakePMIWrapper(r)
56 raise IndexError(
"Out of range")
60 """A macro to help setup and run replica exchange.
61 Supports Monte Carlo and molecular dynamics.
62 Produces trajectory RMF files, best PDB structures,
63 and output stat files.
67 monte_carlo_sample_objects=
None,
68 molecular_dynamics_sample_objects=
None,
70 rmf_output_objects=
None,
71 crosslink_restraints=
None,
72 monte_carlo_temperature=1.0,
73 simulated_annealing=
False,
74 simulated_annealing_minimum_temperature=1.0,
75 simulated_annealing_maximum_temperature=2.5,
76 simulated_annealing_minimum_temperature_nframes=100,
77 simulated_annealing_maximum_temperature_nframes=100,
78 replica_exchange_minimum_temperature=1.0,
79 replica_exchange_maximum_temperature=2.5,
80 replica_exchange_swap=
True,
82 number_of_best_scoring_models=500,
85 molecular_dynamics_steps=10,
86 molecular_dynamics_max_time_step=1.0,
87 number_of_frames=1000,
88 save_coordinates_mode=
"lowest_temperature",
89 nframes_write_coordinates=1,
90 write_initial_rmf=
True,
91 initial_rmf_name_suffix=
"initial",
92 stat_file_name_suffix=
"stat",
93 best_pdb_name_suffix=
"model",
95 do_create_directories=
True,
96 global_output_directory=
"./",
99 replica_stat_file_suffix=
"stat_replica",
100 em_object_for_rmf=
None,
102 replica_exchange_object=
None,
105 @param model The IMP model
106 @param root_hier Top-level (System)hierarchy
107 @param monte_carlo_sample_objects Objects for MC sampling; a list of
108 structural components (generally the representation) that
109 will be moved and restraints with parameters that need to
111 For PMI2: just pass flat list of movers
112 @param molecular_dynamics_sample_objects Objects for MD sampling
113 For PMI2: just pass flat list of particles
114 @param output_objects A list of structural objects and restraints
115 that will be included in output (ie, statistics "stat"
116 files). Any object that provides a get_output() method
117 can be used here. If None is passed
118 the macro will not write stat files.
119 @param rmf_output_objects A list of structural objects and
120 restraints that will be included in rmf. Any object
121 that provides a get_output() method can be used here.
122 @param monte_carlo_temperature MC temp (may need to be optimized
123 based on post-sampling analysis)
124 @param simulated_annealing If True, perform simulated annealing
125 @param simulated_annealing_minimum_temperature Should generally be
126 the same as monte_carlo_temperature.
127 @param simulated_annealing_minimum_temperature_nframes Number of
128 frames to compute at minimum temperature.
129 @param simulated_annealing_maximum_temperature_nframes Number of
131 temps > simulated_annealing_maximum_temperature.
132 @param replica_exchange_minimum_temperature Low temp for REX; should
133 generally be the same as monte_carlo_temperature.
134 @param replica_exchange_maximum_temperature High temp for REX
135 @param replica_exchange_swap Boolean, enable disable temperature
137 @param num_sample_rounds Number of rounds of MC/MD per cycle
138 @param number_of_best_scoring_models Number of top-scoring PDB
139 models to keep around for analysis
140 @param monte_carlo_steps Number of MC steps per round
141 @param self_adaptive self adaptive scheme for Monte Carlo movers
142 @param molecular_dynamics_steps Number of MD steps per round
143 @param molecular_dynamics_max_time_step Max time step for MD
144 @param number_of_frames Number of REX frames to run
145 @param save_coordinates_mode string: how to save coordinates.
146 "lowest_temperature" (default) only the lowest temperatures
148 "25th_score" all replicas whose score is below the 25th
150 "50th_score" all replicas whose score is below the 50th
152 "75th_score" all replicas whose score is below the 75th
154 @param nframes_write_coordinates How often to write the coordinates
156 @param write_initial_rmf Write the initial configuration
157 @param global_output_directory Folder that will be created to house
159 @param test_mode Set to True to avoid writing any files, just test
166 self.output_objects = output_objects
167 self.rmf_output_objects = rmf_output_objects
169 and root_hier.get_name() ==
'System'):
170 if self.output_objects
is not None:
171 self.output_objects.append(
173 if self.rmf_output_objects
is not None:
174 self.rmf_output_objects.append(
176 self.root_hier = root_hier
177 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
178 self.vars[
"number_of_states"] = len(states)
180 self.root_hiers = states
181 self.is_multi_state =
True
183 self.root_hier = root_hier
184 self.is_multi_state =
False
186 raise TypeError(
"Must provide System hierarchy (root_hier)")
188 if crosslink_restraints:
190 "crosslink_restraints is deprecated and is ignored; "
191 "all cross-link restraints should be automatically "
192 "added to output RMF files")
193 self._rmf_restraints = _RMFRestraints(model,
None)
194 self.em_object_for_rmf = em_object_for_rmf
195 self.monte_carlo_sample_objects = monte_carlo_sample_objects
196 self.vars[
"self_adaptive"] = self_adaptive
197 if sample_objects
is not None:
199 "sample_objects is deprecated; use monte_carlo_sample_objects "
200 "(or molecular_dynamics_sample_objects) instead")
201 self.monte_carlo_sample_objects += sample_objects
202 self.molecular_dynamics_sample_objects = \
203 molecular_dynamics_sample_objects
204 self.replica_exchange_object = replica_exchange_object
205 self.molecular_dynamics_max_time_step = \
206 molecular_dynamics_max_time_step
207 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
208 self.vars[
"replica_exchange_minimum_temperature"] = \
209 replica_exchange_minimum_temperature
210 self.vars[
"replica_exchange_maximum_temperature"] = \
211 replica_exchange_maximum_temperature
212 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
213 self.vars[
"simulated_annealing"] = simulated_annealing
214 self.vars[
"simulated_annealing_minimum_temperature"] = \
215 simulated_annealing_minimum_temperature
216 self.vars[
"simulated_annealing_maximum_temperature"] = \
217 simulated_annealing_maximum_temperature
218 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
219 simulated_annealing_minimum_temperature_nframes
220 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
221 simulated_annealing_maximum_temperature_nframes
223 self.vars[
"num_sample_rounds"] = num_sample_rounds
225 "number_of_best_scoring_models"] = number_of_best_scoring_models
226 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
227 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
228 self.vars[
"number_of_frames"] = number_of_frames
229 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
230 "50th_score",
"75th_score"):
231 raise Exception(
"save_coordinates_mode has unrecognized value")
233 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
234 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
235 self.vars[
"write_initial_rmf"] = write_initial_rmf
236 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
237 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
238 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
239 self.vars[
"do_clean_first"] = do_clean_first
240 self.vars[
"do_create_directories"] = do_create_directories
241 self.vars[
"global_output_directory"] = global_output_directory
242 self.vars[
"rmf_dir"] = rmf_dir
243 self.vars[
"best_pdb_dir"] = best_pdb_dir
244 self.vars[
"atomistic"] = atomistic
245 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
246 self.vars[
"geometries"] =
None
247 self.test_mode = test_mode
250 if self.vars[
"geometries"]
is None:
251 self.vars[
"geometries"] = list(geometries)
253 self.vars[
"geometries"].extend(geometries)
256 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, "
257 "rmfs/*.rmf3 for each replica ")
258 print(
"--- it stores the best scoring pdb models in pdbs/")
259 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
260 "lowest temperature")
261 print(
"--- variables:")
262 keys = list(self.vars.keys())
265 print(
"------", v.ljust(30), self.vars[v])
267 def get_replica_exchange_object(self):
268 return self.replica_exchange_object
270 def _add_provenance(self, sampler_md, sampler_mc):
271 """Record details about the sampling in the IMP Hierarchies"""
274 method =
"Molecular Dynamics"
275 iterations += self.vars[
"molecular_dynamics_steps"]
277 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
278 iterations += self.vars[
"monte_carlo_steps"]
280 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
282 iterations *= self.vars[
"num_sample_rounds"]
284 pi = self.model.add_particle(
"sampling")
286 self.model, pi, method, self.vars[
"number_of_frames"],
288 p.set_number_of_replicas(
289 self.replica_exchange_object.get_number_of_replicas())
290 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
293 def execute_macro(self):
294 temp_index_factor = 100000.0
298 if self.monte_carlo_sample_objects
is not None:
299 print(
"Setting up MonteCarlo")
301 self.model, self.monte_carlo_sample_objects,
302 self.vars[
"monte_carlo_temperature"])
303 if self.vars[
"simulated_annealing"]:
304 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
305 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
307 "simulated_annealing_minimum_temperature_nframes"]
309 "simulated_annealing_maximum_temperature_nframes"]
310 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
311 if self.vars[
"self_adaptive"]:
312 sampler_mc.set_self_adaptive(
313 isselfadaptive=self.vars[
"self_adaptive"])
314 if self.output_objects
is not None:
315 self.output_objects.append(sampler_mc)
316 if self.rmf_output_objects
is not None:
317 self.rmf_output_objects.append(sampler_mc)
318 samplers.append(sampler_mc)
320 if self.molecular_dynamics_sample_objects
is not None:
321 print(
"Setting up MolecularDynamics")
323 self.model, self.molecular_dynamics_sample_objects,
324 self.vars[
"monte_carlo_temperature"],
325 maximum_time_step=self.molecular_dynamics_max_time_step)
326 if self.vars[
"simulated_annealing"]:
327 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
328 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
330 "simulated_annealing_minimum_temperature_nframes"]
332 "simulated_annealing_maximum_temperature_nframes"]
333 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
334 if self.output_objects
is not None:
335 self.output_objects.append(sampler_md)
336 if self.rmf_output_objects
is not None:
337 self.rmf_output_objects.append(sampler_md)
338 samplers.append(sampler_md)
341 print(
"Setting up ReplicaExchange")
343 self.model, self.vars[
"replica_exchange_minimum_temperature"],
344 self.vars[
"replica_exchange_maximum_temperature"], samplers,
345 replica_exchange_object=self.replica_exchange_object)
346 self.replica_exchange_object = rex.rem
348 myindex = rex.get_my_index()
349 if self.output_objects
is not None:
350 self.output_objects.append(rex)
351 if self.rmf_output_objects
is not None:
352 self.rmf_output_objects.append(rex)
356 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
360 globaldir = self.vars[
"global_output_directory"] +
"/"
361 rmf_dir = globaldir + self.vars[
"rmf_dir"]
362 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
364 if not self.test_mode:
365 if self.vars[
"do_clean_first"]:
368 if self.vars[
"do_create_directories"]:
371 os.makedirs(globaldir)
379 if not self.is_multi_state:
385 for n
in range(self.vars[
"number_of_states"]):
387 os.makedirs(pdb_dir +
"/" + str(n))
394 if self.output_objects
is not None:
395 self.output_objects.append(sw)
396 if self.rmf_output_objects
is not None:
397 self.rmf_output_objects.append(sw)
399 print(
"Setting up stat file")
401 low_temp_stat_file = globaldir + \
402 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
405 if not self.test_mode:
408 if not self.test_mode:
409 if self.output_objects
is not None:
410 output.init_stat2(low_temp_stat_file,
412 extralabels=[
"rmf_file",
"rmf_frame_index"])
414 print(
"Stat file writing is disabled")
416 if self.rmf_output_objects
is not None:
417 print(
"Stat info being written in the rmf file")
419 print(
"Setting up replica stat file")
420 replica_stat_file = globaldir + \
421 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
422 if not self.test_mode:
423 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
425 if not self.test_mode:
426 print(
"Setting up best pdb files")
427 if not self.is_multi_state:
428 if self.vars[
"number_of_best_scoring_models"] > 0:
429 output.init_pdb_best_scoring(
430 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
432 self.vars[
"number_of_best_scoring_models"],
433 replica_exchange=
True)
435 pdb_dir +
"/" +
"model.psf",
437 self.vars[
"best_pdb_name_suffix"] +
".0.pdb")
439 if self.vars[
"number_of_best_scoring_models"] > 0:
440 for n
in range(self.vars[
"number_of_states"]):
441 output.init_pdb_best_scoring(
442 pdb_dir +
"/" + str(n) +
"/" +
443 self.vars[
"best_pdb_name_suffix"],
445 self.vars[
"number_of_best_scoring_models"],
446 replica_exchange=
True)
448 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
449 pdb_dir +
"/" + str(n) +
"/" +
450 self.vars[
"best_pdb_name_suffix"] +
".0.pdb")
453 if self.em_object_for_rmf
is not None:
454 output_hierarchies = [
456 self.em_object_for_rmf.get_density_as_hierarchy(
459 output_hierarchies = [self.root_hier]
461 if not self.test_mode:
462 print(
"Setting up and writing initial rmf coordinate file")
463 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
464 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
466 listofobjects=self.rmf_output_objects)
467 if self._rmf_restraints:
468 output.add_restraints_to_rmf(
469 init_suffix +
"." + str(myindex) +
".rmf3",
470 self._rmf_restraints)
471 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
472 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
474 if not self.test_mode:
475 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
477 self._add_provenance(sampler_md, sampler_mc)
479 if not self.test_mode:
480 print(
"Setting up production rmf files")
481 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
482 output.init_rmf(rmfname, output_hierarchies,
483 geometries=self.vars[
"geometries"],
484 listofobjects=self.rmf_output_objects)
486 if self._rmf_restraints:
487 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
489 ntimes_at_low_temp = 0
493 self.replica_exchange_object.set_was_used(
True)
494 nframes = self.vars[
"number_of_frames"]
497 for i
in range(nframes):
501 for nr
in range(self.vars[
"num_sample_rounds"]):
502 if sampler_md
is not None:
504 self.vars[
"molecular_dynamics_steps"])
505 if sampler_mc
is not None:
506 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
508 self.model).evaluate(
False)
509 mpivs.set_value(
"score", score)
510 output.set_output_entry(
"score", score)
512 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
514 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
515 save_frame = (min_temp_index == my_temp_index)
516 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
517 score_perc = mpivs.get_percentile(
"score")
518 save_frame = (score_perc*100.0 <= 25.0)
519 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
520 score_perc = mpivs.get_percentile(
"score")
521 save_frame = (score_perc*100.0 <= 50.0)
522 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
523 score_perc = mpivs.get_percentile(
"score")
524 save_frame = (score_perc*100.0 <= 75.0)
527 if save_frame
and not self.test_mode:
531 print(
"--- frame %s score %s " % (str(i), str(score)))
533 if not self.test_mode:
534 if i % self.vars[
"nframes_write_coordinates"] == 0:
535 print(
'--- writing coordinates')
536 if self.vars[
"number_of_best_scoring_models"] > 0:
537 output.write_pdb_best_scoring(score)
538 output.write_rmf(rmfname)
539 output.set_output_entry(
"rmf_file", rmfname)
540 output.set_output_entry(
"rmf_frame_index",
543 output.set_output_entry(
"rmf_file", rmfname)
544 output.set_output_entry(
"rmf_frame_index",
'-1')
545 if self.output_objects
is not None:
546 output.write_stat2(low_temp_stat_file)
547 ntimes_at_low_temp += 1
549 if not self.test_mode:
550 output.write_stat2(replica_stat_file)
551 if self.vars[
"replica_exchange_swap"]:
552 rex.swap_temp(i, score)
553 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
554 p.add_replica_exchange(state, self)
556 if not self.test_mode:
557 print(
"closing production rmf files")
558 output.close_rmf(rmfname)
562 """A macro to build a IMP::pmi::topology::System based on a
563 TopologyReader object.
565 Easily create multi-state systems by calling this macro
566 repeatedly with different TopologyReader objects!
567 A useful function is get_molecules() which returns the PMI Molecules
568 grouped by state as a dictionary with key = (molecule name),
569 value = IMP.pmi.topology.Molecule
570 Quick multi-state system:
573 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
574 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
575 bs = IMP.pmi.macros.BuildSystem(model)
576 bs.add_state(reader1)
577 bs.add_state(reader2)
578 bs.execute_macro() # build everything including degrees of freedom
579 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
580 ### now you have a two state system, you add restraints etc
582 @note The "domain name" entry of the topology reader is not used.
583 All molecules are set up by the component name, but split into rigid bodies
587 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
588 'RNA': IMP.pmi.alphabets.rna}
590 def __init__(self, model, sequence_connectivity_scale=4.0,
591 force_create_gmm_files=
False, resolutions=[1, 10]):
593 @param model An IMP Model
594 @param sequence_connectivity_scale For scaling the connectivity
596 @param force_create_gmm_files If True, will sample and create GMMs
597 no matter what. If False, will only sample if the
598 files don't exist. If number of Gaussians is zero, won't
600 @param resolutions The resolutions to build for structured regions
607 self._domain_res = []
609 self.force_create_gmm_files = force_create_gmm_files
610 self.resolutions = resolutions
612 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None):
613 """Add a state using the topology info in a
614 IMP::pmi::topology::TopologyReader object.
615 When you are done adding states, call execute_macro()
616 @param reader The TopologyReader object
617 @param fasta_name_map dictionary for converting protein names
618 found in the fasta file
620 state = self.system.create_state()
621 self._readers.append(reader)
623 these_domain_res = {}
625 chain_ids = string.ascii_uppercase+string.ascii_lowercase+
'0123456789'
630 for molname
in reader.get_molecules():
631 copies = reader.get_molecules()[molname].domains
632 for nc, copyname
in enumerate(copies):
633 print(
"BuildSystem.add_state: setting up molecule %s copy "
634 "number %s" % (molname, str(nc)))
635 copy = copies[copyname]
638 all_chains = [c
for c
in copy
if c.chain
is not None]
640 chain_id = all_chains[0].chain
642 chain_id = chain_ids[numchain]
644 "No PDBs specified for %s, so keep_chain_id has "
645 "no effect; using default chain ID '%s'"
648 chain_id = chain_ids[numchain]
650 alphabet = IMP.pmi.alphabets.amino_acid
651 fasta_flag = copy[0].fasta_flag
652 if fasta_flag
in self._alphabets:
653 alphabet = self._alphabets[fasta_flag]
655 copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
656 print(
"BuildSystem.add_state: molecule %s sequence has "
657 "%s residues" % (molname, len(seq)))
658 orig_mol = state.create_molecule(molname, seq, chain_id,
663 print(
"BuildSystem.add_state: creating a copy for "
664 "molecule %s" % molname)
665 mol = orig_mol.create_copy(chain_id)
668 for domainnumber, domain
in enumerate(copy):
669 print(
"BuildSystem.add_state: ---- setting up domain %s "
670 "of molecule %s" % (domainnumber, molname))
673 these_domains[domain.get_unique_name()] = domain
674 if domain.residue_range == []
or \
675 domain.residue_range
is None:
676 domain_res = mol.get_residues()
678 start = domain.residue_range[0]+domain.pdb_offset
679 if domain.residue_range[1] ==
'END':
680 end = len(mol.sequence)
682 end = domain.residue_range[1]+domain.pdb_offset
683 domain_res = mol.residue_range(start-1, end-1)
684 print(
"BuildSystem.add_state: -------- domain %s of "
685 "molecule %s extends from residue %s to "
687 % (domainnumber, molname, start, end))
688 if domain.pdb_file ==
"BEADS":
689 print(
"BuildSystem.add_state: -------- domain %s of "
690 "molecule %s represented by BEADS "
691 % (domainnumber, molname))
692 mol.add_representation(
694 resolutions=[domain.bead_size],
695 setup_particles_as_densities=(
696 domain.em_residues_per_gaussian != 0),
698 these_domain_res[domain.get_unique_name()] = \
700 elif domain.pdb_file ==
"IDEAL_HELIX":
701 print(
"BuildSystem.add_state: -------- domain %s of "
702 "molecule %s represented by IDEAL_HELIX "
703 % (domainnumber, molname))
704 emper = domain.em_residues_per_gaussian
705 mol.add_representation(
707 resolutions=self.resolutions,
709 density_residues_per_component=emper,
710 density_prefix=domain.density_prefix,
711 density_force_compute=self.force_create_gmm_files,
713 these_domain_res[domain.get_unique_name()] = \
716 print(
"BuildSystem.add_state: -------- domain %s of "
717 "molecule %s represented by pdb file %s "
718 % (domainnumber, molname, domain.pdb_file))
719 domain_atomic = mol.add_structure(domain.pdb_file,
721 domain.residue_range,
724 domain_non_atomic = domain_res - domain_atomic
725 if not domain.em_residues_per_gaussian:
726 mol.add_representation(
727 domain_atomic, resolutions=self.resolutions,
729 if len(domain_non_atomic) > 0:
730 mol.add_representation(
732 resolutions=[domain.bead_size],
735 print(
"BuildSystem.add_state: -------- domain %s "
736 "of molecule %s represented by gaussians "
737 % (domainnumber, molname))
738 emper = domain.em_residues_per_gaussian
739 creategmm = self.force_create_gmm_files
740 mol.add_representation(
742 resolutions=self.resolutions,
743 density_residues_per_component=emper,
744 density_prefix=domain.density_prefix,
745 density_force_compute=creategmm,
747 if len(domain_non_atomic) > 0:
748 mol.add_representation(
750 resolutions=[domain.bead_size],
751 setup_particles_as_densities=
True,
753 these_domain_res[domain.get_unique_name()] = (
754 domain_atomic, domain_non_atomic)
755 self._domain_res.append(these_domain_res)
756 self._domains.append(these_domains)
757 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
760 """Return list of all molecules grouped by state.
761 For each state, it's a dictionary of Molecules where key is the
764 return [s.get_molecules()
for s
in self.system.get_states()]
766 def get_molecule(self, molname, copy_index=0, state_index=0):
767 return self.system.get_states()[state_index].
get_molecules()[
771 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
772 """Builds representations and sets up degrees of freedom"""
773 print(
"BuildSystem.execute_macro: building representations")
774 self.root_hier = self.system.build()
776 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
778 for nstate, reader
in enumerate(self._readers):
779 rbs = reader.get_rigid_bodies()
780 srbs = reader.get_super_rigid_bodies()
781 csrbs = reader.get_chains_of_super_rigid_bodies()
784 domains_in_rbs = set()
786 print(
"BuildSystem.execute_macro: -------- building rigid "
787 "body %s" % (str(rblist)))
788 all_res = IMP.pmi.tools.OrderedSet()
789 bead_res = IMP.pmi.tools.OrderedSet()
791 domain = self._domains[nstate][dname]
792 print(
"BuildSystem.execute_macro: -------- adding %s"
794 all_res |= self._domain_res[nstate][dname][0]
795 bead_res |= self._domain_res[nstate][dname][1]
796 domains_in_rbs.add(dname)
798 print(
"BuildSystem.execute_macro: -------- creating rigid "
799 "body with max_trans %s max_rot %s "
800 "non_rigid_max_trans %s"
801 % (str(max_rb_trans), str(max_rb_rot),
802 str(max_bead_trans)))
803 self.dof.create_rigid_body(all_res,
804 nonrigid_parts=bead_res,
805 max_trans=max_rb_trans,
807 nonrigid_max_trans=max_bead_trans)
810 for dname, domain
in self._domains[nstate].items():
811 if dname
not in domains_in_rbs:
812 if domain.pdb_file !=
"BEADS":
814 "No rigid bodies set for %s. Residues read from "
815 "the PDB file will not be sampled - only regions "
816 "missing from the PDB will be treated flexibly. "
817 "To sample the entire sequence, use BEADS instead "
818 "of a PDB file name" % dname,
820 self.dof.create_flexible_beads(
821 self._domain_res[nstate][dname][1],
822 max_trans=max_bead_trans)
826 print(
"BuildSystem.execute_macro: -------- building "
827 "super rigid body %s" % (str(srblist)))
828 all_res = IMP.pmi.tools.OrderedSet()
829 for dname
in srblist:
830 print(
"BuildSystem.execute_macro: -------- adding %s"
832 all_res |= self._domain_res[nstate][dname][0]
833 all_res |= self._domain_res[nstate][dname][1]
835 print(
"BuildSystem.execute_macro: -------- creating super "
836 "rigid body with max_trans %s max_rot %s "
837 % (str(max_srb_trans), str(max_srb_rot)))
838 self.dof.create_super_rigid_body(
839 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
842 for csrblist
in csrbs:
843 all_res = IMP.pmi.tools.OrderedSet()
844 for dname
in csrblist:
845 all_res |= self._domain_res[nstate][dname][0]
846 all_res |= self._domain_res[nstate][dname][1]
847 all_res = list(all_res)
848 all_res.sort(key=
lambda r: r.get_index())
849 self.dof.create_main_chain_mover(all_res)
850 return self.root_hier, self.dof
855 """A macro for running all the basic operations of analysis.
856 Includes clustering, precision analysis, and making ensemble density maps.
857 A number of plots are also supported.
860 merge_directories=[
"./"],
861 stat_file_name_suffix=
"stat",
862 best_pdb_name_suffix=
"model",
864 do_create_directories=
True,
865 global_output_directory=
"output/",
866 replica_stat_file_suffix=
"stat_replica",
867 global_analysis_result_directory=
"./analysis/",
870 @param model The IMP model
871 @param stat_file_name_suffix
872 @param merge_directories The directories containing output files
873 @param best_pdb_name_suffix
874 @param do_clean_first
875 @param do_create_directories
876 @param global_output_directory Where everything is
877 @param replica_stat_file_suffix
878 @param global_analysis_result_directory
879 @param test_mode If True, nothing is changed on disk
883 from mpi4py
import MPI
884 self.comm = MPI.COMM_WORLD
885 self.rank = self.comm.Get_rank()
886 self.number_of_processes = self.comm.size
889 self.number_of_processes = 1
891 self.test_mode = test_mode
892 self._protocol_output = []
893 self.cluster_obj =
None
895 stat_dir = global_output_directory
898 for rd
in merge_directories:
899 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
900 if len(stat_files) == 0:
901 warnings.warn(
"no stat files found in %s"
902 % os.path.join(rd, stat_dir),
904 self.stat_files += stat_files
907 """Capture details of the modeling protocol.
908 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
911 self._protocol_output.append((p, p._last_state))
914 score_key=
"SimplifiedModel_Total_Score_None",
915 rmf_file_key=
"rmf_file",
916 rmf_file_frame_key=
"rmf_frame_index",
919 nframes_trajectory=10000):
920 """ Get a trajectory of the modeling run, for generating
923 @param score_key The score for ranking models
924 @param rmf_file_key Key pointing to RMF filename
925 @param rmf_file_frame_key Key pointing to RMF frame number
926 @param outputdir The local output directory used in the run
927 @param get_every Extract every nth frame
928 @param nframes_trajectory Total number of frames of the trajectory
933 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
935 score_list = list(map(float, trajectory_models[2]))
937 max_score = max(score_list)
938 min_score = min(score_list)
940 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
941 for i
in range(nframes_trajectory)]
942 binned_scores = [
None]*nframes_trajectory
943 binned_model_indexes = [-1]*nframes_trajectory
945 for model_index, s
in enumerate(score_list):
946 bins_score_diffs = [abs(s-b)
for b
in bins]
947 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
948 if binned_scores[bin_index]
is None:
949 binned_scores[bin_index] = s
950 binned_model_indexes[bin_index] = model_index
952 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
953 new_diff = abs(s-bins[bin_index])
954 if new_diff < old_diff:
955 binned_scores[bin_index] = s
956 binned_model_indexes[bin_index] = model_index
959 print(binned_model_indexes)
961 def _expand_ambiguity(self, prot, d):
962 """If using PMI2, expand the dictionary to include copies as
965 This also keeps the states separate.
970 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
973 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
974 if isinstance(val, tuple):
982 for nst
in range(len(states)):
984 copies = sel.get_selected_particles(with_representation=
False)
986 for nc
in range(len(copies)):
988 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
989 (start, stop, name, nc, nst)
991 newdict[
'%s..%i' % (name, nc)] = \
992 (start, stop, name, nc, nst)
998 score_key=
"SimplifiedModel_Total_Score_None",
999 rmf_file_key=
"rmf_file",
1000 rmf_file_frame_key=
"rmf_frame_index",
1002 prefiltervalue=
None,
1005 alignment_components=
None,
1006 number_of_best_scoring_models=10,
1007 rmsd_calculation_components=
None,
1008 distance_matrix_file=
'distances.mat',
1009 load_distance_matrix_file=
False,
1010 skip_clustering=
False,
1011 number_of_clusters=1,
1013 exit_after_display=
True,
1015 first_and_last_frames=
None,
1016 density_custom_ranges=
None,
1017 write_pdb_with_centered_coordinates=
False,
1019 """Get the best scoring models, compute a distance matrix,
1020 cluster them, and create density maps.
1022 Tuple format: "molname" just the molecule,
1023 or (start,stop,molname,copy_num(optional),state_num(optional)
1024 Can pass None for copy or state to ignore that field.
1025 If you don't pass a specific copy number
1027 @param score_key The score for ranking models.
1028 Use "Total_Score" instead of default for PMI2.
1029 @param rmf_file_key Key pointing to RMF filename
1030 @param rmf_file_frame_key Key pointing to RMF frame number
1031 @param state_number State number to analyze
1032 @param prefiltervalue Only include frames where the
1033 score key is below this value
1034 @param feature_keys Keywords for which you want to
1035 calculate average, medians, etc.
1036 If you pass "Keyname" it'll include everything that matches
1038 @param outputdir The local output directory used in
1040 @param alignment_components Dictionary with keys=groupname,
1041 values are tuples for aligning the structures
1042 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1043 @param number_of_best_scoring_models Num models to keep per run
1044 @param rmsd_calculation_components For calculating RMSD
1045 (same format as alignment_components)
1046 @param distance_matrix_file Where to store/read the
1048 @param load_distance_matrix_file Try to load the distance
1050 @param skip_clustering Just extract the best scoring
1051 models and save the pdbs
1052 @param number_of_clusters Number of k-means clusters
1053 @param display_plot Display the distance matrix
1054 @param exit_after_display Exit after displaying distance
1056 @param get_every Extract every nth frame
1057 @param first_and_last_frames A tuple with the first and last
1058 frames to be analyzed. Values are percentages!
1059 Default: get all frames
1060 @param density_custom_ranges For density calculation
1061 (same format as alignment_components)
1062 @param write_pdb_with_centered_coordinates
1063 @param voxel_size Used for the density output
1067 self._outputdir = os.path.abspath(outputdir)
1068 self._number_of_clusters = number_of_clusters
1069 for p, state
in self._protocol_output:
1070 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1081 if not load_distance_matrix_file:
1082 if len(self.stat_files) == 0:
1083 print(
"ERROR: no stat file found in the given path")
1085 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1086 self.stat_files, self.number_of_processes)[self.rank]
1090 orig_score_key = score_key
1091 if score_key
not in po.get_keys():
1092 if 'Total_Score' in po.get_keys():
1093 score_key =
'Total_Score'
1095 "Using 'Total_Score' instead of "
1096 "'SimplifiedModel_Total_Score_None' for the score key",
1098 for k
in [orig_score_key, score_key, rmf_file_key,
1099 rmf_file_frame_key]:
1100 if k
in feature_keys:
1102 "no need to pass " + k +
" to feature_keys.",
1104 feature_keys.remove(k)
1107 my_stat_files, score_key, feature_keys, rmf_file_key,
1108 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1109 rmf_file_list = best_models[0]
1110 rmf_file_frame_list = best_models[1]
1111 score_list = best_models[2]
1112 feature_keyword_list_dict = best_models[3]
1118 if self.number_of_processes > 1:
1122 rmf_file_frame_list)
1123 for k
in feature_keyword_list_dict:
1124 feature_keyword_list_dict[k] = \
1126 feature_keyword_list_dict[k])
1129 score_rmf_tuples = list(zip(score_list,
1131 rmf_file_frame_list,
1132 list(range(len(score_list)))))
1134 if density_custom_ranges:
1135 for k
in density_custom_ranges:
1136 if not isinstance(density_custom_ranges[k], list):
1137 raise Exception(
"Density custom ranges: values must "
1138 "be lists of tuples")
1141 if first_and_last_frames
is not None:
1142 nframes = len(score_rmf_tuples)
1143 first_frame = int(first_and_last_frames[0] * nframes)
1144 last_frame = int(first_and_last_frames[1] * nframes)
1145 if last_frame > len(score_rmf_tuples):
1147 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1150 best_score_rmf_tuples = sorted(
1152 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1153 best_score_rmf_tuples = [t+(n,)
for n, t
in
1154 enumerate(best_score_rmf_tuples)]
1156 prov.append(IMP.pmi.io.FilterProvenance(
1157 "Best scoring", 0, number_of_best_scoring_models))
1159 best_score_feature_keyword_list_dict = defaultdict(list)
1160 for tpl
in best_score_rmf_tuples:
1162 for f
in feature_keyword_list_dict:
1163 best_score_feature_keyword_list_dict[f].append(
1164 feature_keyword_list_dict[f][index])
1165 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1166 best_score_rmf_tuples,
1167 self.number_of_processes)[self.rank]
1170 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1171 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1173 if rmsd_calculation_components
is not None:
1174 tmp = self._expand_ambiguity(
1175 prot_ahead, rmsd_calculation_components)
1176 if tmp != rmsd_calculation_components:
1177 print(
'Detected ambiguity, expand rmsd components to',
1179 rmsd_calculation_components = tmp
1180 if alignment_components
is not None:
1181 tmp = self._expand_ambiguity(prot_ahead,
1182 alignment_components)
1183 if tmp != alignment_components:
1184 print(
'Detected ambiguity, expand alignment '
1185 'components to', tmp)
1186 alignment_components = tmp
1192 self.model, my_best_score_rmf_tuples[0],
1193 rmsd_calculation_components, state_number=state_number)
1195 self.model, my_best_score_rmf_tuples, alignment_components,
1196 rmsd_calculation_components, state_number=state_number)
1204 all_coordinates = got_coords[0]
1207 alignment_coordinates = got_coords[1]
1210 rmsd_coordinates = got_coords[2]
1213 rmf_file_name_index_dict = got_coords[3]
1216 all_rmf_file_names = got_coords[4]
1222 if density_custom_ranges:
1224 density_custom_ranges, voxel=voxel_size)
1226 dircluster = os.path.join(outputdir,
1227 "all_models."+str(self.rank))
1233 os.mkdir(dircluster)
1236 clusstat = open(os.path.join(
1237 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1238 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1240 rmf_frame_number = tpl[2]
1243 for key
in best_score_feature_keyword_list_dict:
1245 best_score_feature_keyword_list_dict[key][index]
1249 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1250 self.model, rmf_frame_number, rmf_name)
1252 linking_successful = \
1253 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1254 self.model, prots, rs, rmf_frame_number,
1256 if not linking_successful:
1263 states = IMP.atom.get_by_type(
1264 prots[0], IMP.atom.STATE_TYPE)
1265 prot = states[state_number]
1267 prot = prots[state_number]
1272 coords_f1 = alignment_coordinates[cnt]
1274 coords_f2 = alignment_coordinates[cnt]
1277 coords_f1, coords_f2)
1278 transformation = Ali.align()[1]
1292 rb = rbm.get_rigid_body()
1302 out_pdb_fn = os.path.join(
1303 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1304 out_rmf_fn = os.path.join(
1305 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1306 o.init_pdb(out_pdb_fn, prot)
1307 tc = write_pdb_with_centered_coordinates
1308 o.write_pdb(out_pdb_fn,
1309 translate_to_geometric_center=tc)
1311 tmp_dict[
"local_pdb_file_name"] = \
1312 os.path.basename(out_pdb_fn)
1313 tmp_dict[
"rmf_file_full_path"] = rmf_name
1314 tmp_dict[
"local_rmf_file_name"] = \
1315 os.path.basename(out_rmf_fn)
1316 tmp_dict[
"local_rmf_frame_number"] = 0
1318 clusstat.write(str(tmp_dict)+
"\n")
1324 h.set_name(
"System")
1326 o.init_rmf(out_rmf_fn, [h], rs)
1328 o.init_rmf(out_rmf_fn, [prot], rs)
1330 o.write_rmf(out_rmf_fn)
1331 o.close_rmf(out_rmf_fn)
1333 if density_custom_ranges:
1334 DensModule.add_subunits_density(prot)
1336 if density_custom_ranges:
1337 DensModule.write_mrc(path=dircluster)
1342 if self.number_of_processes > 1:
1348 rmf_file_name_index_dict)
1350 alignment_coordinates)
1357 [best_score_feature_keyword_list_dict,
1358 rmf_file_name_index_dict],
1364 print(
"setup clustering class")
1367 for n, model_coordinate_dict
in enumerate(all_coordinates):
1369 if (alignment_components
is not None
1370 and len(self.cluster_obj.all_coords) == 0):
1372 self.cluster_obj.set_template(alignment_coordinates[n])
1373 self.cluster_obj.fill(all_rmf_file_names[n],
1374 rmsd_coordinates[n])
1375 print(
"Global calculating the distance matrix")
1378 self.cluster_obj.dist_matrix()
1382 self.cluster_obj.do_cluster(number_of_clusters)
1385 self.cluster_obj.plot_matrix(
1386 figurename=os.path.join(outputdir,
1388 if exit_after_display:
1390 self.cluster_obj.save_distance_matrix_file(
1391 file_name=distance_matrix_file)
1398 print(
"setup clustering class")
1400 self.cluster_obj.load_distance_matrix_file(
1401 file_name=distance_matrix_file)
1402 print(
"clustering with %s clusters" % str(number_of_clusters))
1403 self.cluster_obj.do_cluster(number_of_clusters)
1404 [best_score_feature_keyword_list_dict,
1405 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1408 self.cluster_obj.plot_matrix(figurename=os.path.join(
1409 outputdir,
'dist_matrix.pdf'))
1410 if exit_after_display:
1412 if self.number_of_processes > 1:
1420 print(self.cluster_obj.get_cluster_labels())
1421 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1422 print(
"rank %s " % str(self.rank))
1423 print(
"cluster %s " % str(n))
1424 print(
"cluster label %s " % str(cl))
1425 print(self.cluster_obj.get_cluster_label_names(cl))
1427 len(self.cluster_obj.get_cluster_label_names(cl))
1429 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1432 if density_custom_ranges:
1434 density_custom_ranges,
1437 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1439 os.mkdir(dircluster)
1445 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1446 clusstat = open(dircluster +
"stat.out",
"w")
1447 for k, structure_name
in enumerate(
1448 self.cluster_obj.get_cluster_label_names(cl)):
1451 tmp_dict.update(rmsd_dict)
1452 index = rmf_file_name_index_dict[structure_name]
1453 for key
in best_score_feature_keyword_list_dict:
1455 key] = best_score_feature_keyword_list_dict[
1461 rmf_name = structure_name.split(
"|")[0]
1462 rmf_frame_number = int(structure_name.split(
"|")[1])
1463 clusstat.write(str(tmp_dict) +
"\n")
1468 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1469 self.model, rmf_frame_number, rmf_name)
1471 linking_successful = \
1472 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1473 self.model, prots, rs, rmf_frame_number,
1475 if not linking_successful:
1481 states = IMP.atom.get_by_type(
1482 prots[0], IMP.atom.STATE_TYPE)
1483 prot = states[state_number]
1485 prot = prots[state_number]
1491 co = self.cluster_obj
1492 model_index = co.get_model_index_from_name(
1494 transformation = co.get_transformation_to_first_member(
1505 rb = rbm.get_rigid_body()
1514 if density_custom_ranges:
1515 DensModule.add_subunits_density(prot)
1520 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1521 o.write_pdb(dircluster + str(k) +
".pdb")
1527 h.set_name(
"System")
1529 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1531 o.init_rmf(dircluster + str(k) +
".rmf3", [prot], rs)
1532 o.write_rmf(dircluster + str(k) +
".rmf3")
1533 o.close_rmf(dircluster + str(k) +
".rmf3")
1538 if density_custom_ranges:
1539 DensModule.write_mrc(path=dircluster)
1542 if self.number_of_processes > 1:
1545 def get_cluster_rmsd(self, cluster_num):
1546 if self.cluster_obj
is None:
1548 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1550 def save_objects(self, objects, file_name):
1552 outf = open(file_name,
'wb')
1553 pickle.dump(objects, outf)
1556 def load_objects(self, file_name):
1558 inputf = open(file_name,
'rb')
1559 objects = pickle.load(inputf)
1567 This class contains analysis utilities to investigate ReplicaExchange
1575 def __init__(self, model, stat_files, best_models=None, score_key=None,
1578 Construction of the Class.
1579 @param model IMP.Model()
1580 @param stat_files list of string. Can be ascii stat files,
1582 @param best_models Integer. Number of best scoring models,
1583 if None: all models will be read
1584 @param alignment boolean (Default=True). Align before computing
1589 self.best_models = best_models
1591 model, stat_files, self.best_models, score_key, cache=
True)
1593 StatHierarchyHandler=self.stath0)
1606 self.clusters.append(c)
1607 for n0
in range(len(self.stath0)):
1609 self.pairwise_rmsd = {}
1610 self.pairwise_molecular_assignment = {}
1611 self.alignment = alignment
1612 self.symmetric_molecules = {}
1613 self.issymmetricsel = {}
1615 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1617 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1622 Setup the selection onto which the rmsd is computed
1623 @param kwargs use IMP.atom.Selection keywords
1631 Store names of symmetric molecules
1633 self.symmetric_molecules[molecule_name] = 0
1638 Setup the selection onto which the alignment is computed
1639 @param kwargs use IMP.atom.Selection keywords
1647 def clean_clusters(self):
1648 for c
in self.clusters:
1652 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1654 Cluster the models based on RMSD.
1655 @param rmsd_cutoff Float the distance cutoff in Angstrom
1656 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1657 be used to compute rmsds
1659 self.clean_clusters()
1660 not_clustered = set(range(len(self.stath1)))
1661 while len(not_clustered) > 0:
1662 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1667 Refine the clusters by merging the ones whose centers are close
1668 @param rmsd_cutoff cutoff distance in Angstorms
1670 clusters_copy = self.clusters
1671 for c0, c1
in itertools.combinations(self.clusters, 2):
1672 if c0.center_index
is None:
1674 if c1.center_index
is None:
1676 _ = self.stath0[c0.center_index]
1677 _ = self.stath1[c1.center_index]
1678 rmsd, molecular_assignment = self.
rmsd()
1679 if rmsd <= rmsd_cutoff:
1680 if c1
in self.clusters:
1681 clusters_copy.remove(c1)
1683 self.clusters = clusters_copy
1690 def set_cluster_assignments(self, cluster_ids):
1691 if len(cluster_ids) != len(self.stath0):
1692 raise ValueError(
'cluster ids has to be same length as '
1696 for i
in sorted(list(set(cluster_ids))):
1698 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1699 self.clusters[idx].add_member(i, d)
1703 Return the model data from a cluster
1704 @param cluster IMP.pmi.output.Cluster object
1713 Save the data for the whole models into a pickle file
1714 @param filename string
1716 self.stath0.save_data(filename)
1720 Set the data from an external IMP.pmi.output.Data
1721 @param data IMP.pmi.output.Data
1723 self.stath0.data = data
1724 self.stath1.data = data
1728 Load the data from an external pickled file
1729 @param filename string
1731 self.stath0.load_data(filename)
1732 self.stath1.load_data(filename)
1733 self.best_models = len(self.stath0)
1735 def add_cluster(self, rmf_name_list):
1737 print(
"creating cluster index "+str(len(self.clusters)))
1738 self.clusters.append(c)
1739 current_len = len(self.stath0)
1741 for rmf
in rmf_name_list:
1742 print(
"adding rmf "+rmf)
1743 self.stath0.add_stat_file(rmf)
1744 self.stath1.add_stat_file(rmf)
1746 for n0
in range(current_len, len(self.stath0)):
1747 d0 = self.stath0[n0]
1748 c.add_member(n0, d0)
1753 Save the clusters into a pickle file
1754 @param filename string
1757 import cPickle
as pickle
1760 fl = open(filename,
'wb')
1761 pickle.dump(self.clusters, fl)
1765 Load the clusters from a pickle file
1766 @param filename string
1767 @param append bool (Default=False), if True. append the clusters
1768 to the ones currently present
1771 import cPickle
as pickle
1774 fl = open(filename,
'rb')
1775 self.clean_clusters()
1777 self.clusters += pickle.load(fl)
1779 self.clusters = pickle.load(fl)
1788 Compute the cluster center for a given cluster
1790 member_distance = defaultdict(float)
1792 for n0, n1
in itertools.combinations(cluster.members, 2):
1795 rmsd, _ = self.
rmsd()
1796 member_distance[n0] += rmsd
1798 if len(member_distance) > 0:
1799 cluster.center_index = min(member_distance,
1800 key=member_distance.get)
1802 cluster.center_index = cluster.members[0]
1807 Save the coordinates of the current cluster a single rmf file
1809 print(
"saving coordinates", cluster)
1813 if rmf_name
is None:
1814 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1816 _ = self.stath1[cluster.members[0]]
1818 o.init_rmf(rmf_name, [self.stath1])
1819 for n1
in cluster.members:
1825 o.write_rmf(rmf_name)
1827 o.close_rmf(rmf_name)
1831 remove structures that are similar
1832 append it to a new cluster
1834 print(
"pruning models")
1836 filtered = [selected]
1837 remaining = range(1, len(self.stath1), 10)
1839 while len(remaining) > 0:
1840 d0 = self.stath0[selected]
1842 for n1
in remaining:
1847 if d <= rmsd_cutoff:
1849 print(
"pruning model %s, similar to model %s, rmsd %s"
1850 % (str(n1), str(selected), str(d)))
1851 remaining = [x
for x
in remaining
if x
not in rm]
1852 if len(remaining) == 0:
1854 selected = remaining[0]
1855 filtered.append(selected)
1858 self.clusters.append(c)
1860 d0 = self.stath0[n0]
1861 c.add_member(n0, d0)
1866 Compute the precision of a cluster
1872 if cluster.center_index
is not None:
1873 members1 = [cluster.center_index]
1875 members1 = cluster.members
1879 for n1
in cluster.members:
1884 tmp_rmsd, _ = self.
rmsd()
1889 precision = rmsd/npairs
1890 cluster.precision = precision
1895 Compute the bipartite precision (ie the cross-precision)
1896 between two clusters
1900 for cn0, n0
in enumerate(cluster1.members):
1902 for cn1, n1
in enumerate(cluster2.members):
1904 tmp_rmsd, _ = self.
rmsd()
1906 print(
"--- rmsd between structure %s and structure "
1907 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1910 precision = rmsd/npairs
1913 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1914 cluster_ref=
None, step=1):
1916 Compute the Root mean square fluctuations
1917 of a molecule in a cluster
1918 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1919 residue indexes and the value is the rmsf
1921 rmsf = IMP.pmi.tools.OrderedDict()
1924 if cluster_ref
is not None:
1925 if cluster_ref.center_index
is not None:
1926 members0 = [cluster_ref.center_index]
1928 members0 = cluster_ref.members
1930 if cluster.center_index
is not None:
1931 members0 = [cluster.center_index]
1933 members0 = cluster.members
1936 copy_index=copy_index, state_index=state_index)
1937 ps0 = s0.get_selected_particles()
1939 residue_indexes = list(IMP.pmi.tools.OrderedSet(
1945 d0 = self.stath0[n0]
1946 for n1
in cluster.members[::step]:
1948 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
1952 self.stath1, molecule=molecule,
1953 residue_indexes=residue_indexes, resolution=1,
1954 copy_index=copy_index, state_index=state_index)
1955 ps1 = s1.get_selected_particles()
1957 d1 = self.stath1[n1]
1960 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
1961 r = residue_indexes[n]
1973 for stath
in [self.stath0, self.stath1]:
1974 if molecule
not in self.symmetric_molecules:
1976 stath, molecule=molecule, residue_index=r,
1977 resolution=1, copy_index=copy_index,
1978 state_index=state_index)
1981 stath, molecule=molecule, residue_index=r,
1982 resolution=1, state_index=state_index)
1984 ps = s.get_selected_particles()
1993 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
1994 reference=
"Absolute", prefix=
"./", step=1):
2000 for n1
in cluster.members[::step]:
2001 print(
"density "+str(n1))
2006 dens.add_subunits_density(self.stath1)
2008 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2011 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2012 consolidate=
False, molecules=
None, prefix=
'./',
2013 reference=
"Absolute"):
2017 import matplotlib.pyplot
as plt
2018 import matplotlib.cm
as cm
2019 from scipy.spatial.distance
import cdist
2021 if molecules
is None:
2030 molecules=molecules).get_selected_particles())]
2031 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2032 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2033 total_len_unique = sum(max(mol.get_residue_indexes())
2034 for mol
in unique_copies)
2041 seqlen = max(mol.get_residue_indexes())
2042 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2046 for mol
in unique_copies:
2047 seqlen = max(mol.get_residue_indexes())
2048 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2051 for ncl, n1
in enumerate(cluster.members):
2054 coord_dict = IMP.pmi.tools.OrderedDict()
2056 rindexes = mol.get_residue_indexes()
2057 coords = np.ones((max(rindexes), 3))
2058 for rnum
in rindexes:
2061 selpart = sel.get_selected_particles()
2062 if len(selpart) == 0:
2064 selpart = selpart[0]
2065 coords[rnum - 1, :] = \
2067 coord_dict[mol] = coords
2070 coords = np.concatenate(list(coord_dict.values()))
2071 dists = cdist(coords, coords)
2072 binary_dists = np.where((dists <= contact_threshold)
2073 & (dists >= 1.0), 1.0, 0.0)
2075 binary_dists_dict = {}
2077 len1 = max(mol1.get_residue_indexes())
2079 name1 = mol1.get_name()
2080 name2 = mol2.get_name()
2081 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2082 if (name1, name2)
not in binary_dists_dict:
2083 binary_dists_dict[(name1, name2)] = \
2084 np.zeros((len1, len1))
2085 binary_dists_dict[(name1, name2)] += \
2086 np.where((dists <= contact_threshold)
2087 & (dists >= 1.0), 1.0, 0.0)
2088 binary_dists = np.zeros((total_len_unique, total_len_unique))
2090 for name1, name2
in binary_dists_dict:
2091 r1 = index_dict[mol_names_unique[name1]]
2092 r2 = index_dict[mol_names_unique[name2]]
2093 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2094 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2100 contact_freqs = binary_dists
2102 dist_maps.append(dists)
2103 av_dist_map += dists
2104 contact_freqs += binary_dists
2107 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2109 contact_freqs = 1.0/len(cluster)*contact_freqs
2110 av_dist_map = 1.0/len(cluster)*contact_freqs
2112 fig = plt.figure(figsize=(100, 100))
2113 ax = fig.add_subplot(111)
2116 gap_between_components = 50
2121 sorted_tuple = sorted(
2123 mol).get_extended_name(), mol)
for mol
in mols)
2124 prot_list = list(zip(*sorted_tuple))[1]
2126 sorted_tuple = sorted(
2128 for mol
in unique_copies)
2129 prot_list = list(zip(*sorted_tuple))[1]
2131 prot_listx = prot_list
2132 nresx = gap_between_components + \
2133 sum([max(mol.get_residue_indexes())
2134 + gap_between_components
for mol
in prot_listx])
2137 prot_listy = prot_list
2138 nresy = gap_between_components + \
2139 sum([max(mol.get_residue_indexes())
2140 + gap_between_components
for mol
in prot_listy])
2145 res = gap_between_components
2146 for mol
in prot_listx:
2147 resoffsetx[mol] = res
2148 res += max(mol.get_residue_indexes())
2150 res += gap_between_components
2154 res = gap_between_components
2155 for mol
in prot_listy:
2156 resoffsety[mol] = res
2157 res += max(mol.get_residue_indexes())
2159 res += gap_between_components
2161 resoffsetdiagonal = {}
2162 res = gap_between_components
2163 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2164 resoffsetdiagonal[mol] = res
2165 res += max(mol.get_residue_indexes())
2166 res += gap_between_components
2171 for n, prot
in enumerate(prot_listx):
2172 res = resoffsetx[prot]
2174 for proty
in prot_listy:
2175 resy = resoffsety[proty]
2176 endy = resendy[proty]
2177 ax.plot([res, res], [resy, endy], linestyle=
'-',
2178 color=
'gray', lw=0.4)
2179 ax.plot([end, end], [resy, endy], linestyle=
'-',
2180 color=
'gray', lw=0.4)
2181 xticks.append((float(res) + float(end)) / 2)
2183 prot).get_extended_name())
2187 for n, prot
in enumerate(prot_listy):
2188 res = resoffsety[prot]
2190 for protx
in prot_listx:
2191 resx = resoffsetx[protx]
2192 endx = resendx[protx]
2193 ax.plot([resx, endx], [res, res], linestyle=
'-',
2194 color=
'gray', lw=0.4)
2195 ax.plot([resx, endx], [end, end], linestyle=
'-',
2196 color=
'gray', lw=0.4)
2197 yticks.append((float(res) + float(end)) / 2)
2199 prot).get_extended_name())
2203 tmp_array = np.zeros((nresx, nresy))
2205 for px
in prot_listx:
2206 for py
in prot_listy:
2207 resx = resoffsetx[px]
2208 lengx = resendx[px] - 1
2209 resy = resoffsety[py]
2210 lengy = resendy[py] - 1
2211 indexes_x = index_dict[px]
2212 minx = min(indexes_x)
2213 maxx = max(indexes_x)
2214 indexes_y = index_dict[py]
2215 miny = min(indexes_y)
2216 maxy = max(indexes_y)
2217 tmp_array[resx:lengx, resy:lengy] = \
2218 contact_freqs[minx:maxx, miny:maxy]
2219 ret[(px, py)] = np.argwhere(
2220 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2222 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2223 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2225 ax.set_xticks(xticks)
2226 ax.set_xticklabels(xlabels, rotation=90)
2227 ax.set_yticks(yticks)
2228 ax.set_yticklabels(ylabels)
2229 plt.setp(ax.get_xticklabels(), fontsize=6)
2230 plt.setp(ax.get_yticklabels(), fontsize=6)
2233 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2234 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2236 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2237 dpi=300, transparent=
"False")
2240 def plot_rmsd_matrix(self, filename):
2241 self.compute_all_pairwise_rmsd()
2242 distance_matrix = np.zeros(
2243 (len(self.stath0), len(self.stath1)))
2244 for (n0, n1)
in self.pairwise_rmsd:
2245 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2247 import matplotlib
as mpl
2249 import matplotlib.pylab
as pl
2250 from scipy.cluster
import hierarchy
as hrc
2252 fig = pl.figure(figsize=(10, 8))
2253 ax = fig.add_subplot(212)
2254 dendrogram = hrc.dendrogram(
2255 hrc.linkage(distance_matrix),
2258 leaves_order = dendrogram[
'leaves']
2259 ax.set_xlabel(
'Model')
2260 ax.set_ylabel(
'RMSD [Angstroms]')
2262 ax2 = fig.add_subplot(221)
2264 distance_matrix[leaves_order, :][:, leaves_order],
2265 interpolation=
'nearest')
2266 cb = fig.colorbar(cax)
2267 cb.set_label(
'RMSD [Angstroms]')
2268 ax2.set_xlabel(
'Model')
2269 ax2.set_ylabel(
'Model')
2271 pl.savefig(filename, dpi=300)
2280 Update the cluster id numbers
2282 for n, c
in enumerate(self.clusters):
2285 def get_molecule(self, hier, name, copy):
2293 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2294 self.sel0_rmsd.get_selected_particles())
2295 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2296 self.sel1_rmsd.get_selected_particles())
2297 for mol
in self.seldict0:
2298 for sel
in self.seldict0[mol]:
2299 self.issymmetricsel[sel] =
False
2300 for mol
in self.symmetric_molecules:
2301 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2302 for sel
in self.seldict0[mol]:
2303 self.issymmetricsel[sel] =
True
2307 self.sel1_alignment, self.sel0_alignment)
2309 for rb
in self.rbs1:
2312 for bead
in self.beads1:
2320 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2322 initial filling of the clusters.
2325 print(
"clustering model "+str(n0))
2326 d0 = self.stath0[n0]
2328 print(
"creating cluster index "+str(len(self.clusters)))
2329 self.clusters.append(c)
2330 c.add_member(n0, d0)
2331 clustered = set([n0])
2333 print(
"--- trying to add model " + str(n1) +
" to cluster "
2334 + str(len(self.clusters)))
2335 d1 = self.stath1[n1]
2338 rmsd, _ = self.
rmsd(metric=metric)
2339 if rmsd < rmsd_cutoff:
2340 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2341 c.add_member(n1, d1)
2344 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2349 merge the clusters that have close members
2350 @param rmsd_cutoff cutoff distance in Angstorms
2358 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2359 itertools.combinations(self.clusters, 2)):
2360 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2363 rmsd, _ = self.
rmsd()
2364 if (rmsd < 2*rmsd_cutoff
and
2366 to_merge.append((c0, c1))
2368 for c0, c
in reversed(to_merge):
2372 self.clusters = [c
for c
in
2373 filter(
lambda x: len(x.members) > 0, self.clusters)]
2377 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2379 print(
"check close members for clusters " + str(c0.cluster_id) +
2380 " and " + str(c1.cluster_id))
2381 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2384 rmsd, _ = self.
rmsd(metric=metric)
2385 if rmsd < rmsd_cutoff:
2400 a function that returns the permutation best_sel of sels0 that
2403 best_rmsd2 = float(
'inf')
2405 if self.issymmetricsel[sels0[0]]:
2408 for offset
in range(N):
2409 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2412 r = metric(sel0, sel1)
2414 if rmsd2 < best_rmsd2:
2418 for sels
in itertools.permutations(sels0):
2420 for sel0, sel1
in itertools.takewhile(
2421 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2422 r = metric(sel0, sel1)
2424 if rmsd2 < best_rmsd2:
2427 return best_sel, best_rmsd2
2429 def compute_all_pairwise_rmsd(self):
2430 for d0
in self.stath0:
2431 for d1
in self.stath1:
2432 rmsd, _ = self.
rmsd()
2434 def rmsd(self, metric=IMP.atom.get_rmsd):
2436 Computes the RMSD. Resolves ambiguous pairs assignments
2440 n0 = self.stath0.current_index
2441 n1 = self.stath1.current_index
2442 if ((n0, n1)
in self.pairwise_rmsd) \
2443 and ((n0, n1)
in self.pairwise_molecular_assignment):
2444 return (self.pairwise_rmsd[(n0, n1)],
2445 self.pairwise_molecular_assignment[(n0, n1)])
2455 molecular_assignment = {}
2456 for molname, sels0
in self.seldict0.items():
2457 sels_best_order, best_rmsd2 = \
2458 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2460 Ncoords = len(sels_best_order[0].get_selected_particles())
2461 Ncopies = len(self.seldict1[molname])
2462 total_rmsd += Ncoords*best_rmsd2
2463 total_N += Ncoords*Ncopies
2465 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2466 p0 = sel0.get_selected_particles()[0]
2467 p1 = sel1.get_selected_particles()[0]
2472 molecular_assignment[(molname, c0)] = (molname, c1)
2474 total_rmsd = math.sqrt(total_rmsd/total_N)
2476 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2477 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2478 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2479 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2480 return total_rmsd, molecular_assignment
2484 Fix the reference structure for structural alignment, rmsd and
2487 @param reference can be either "Absolute" (cluster center of the
2488 first cluster) or Relative (cluster center of the current
2491 if reference ==
"Absolute":
2493 elif reference ==
"Relative":
2494 if cluster.center_index:
2495 n0 = cluster.center_index
2497 n0 = cluster.members[0]
2502 compute the molecular assignemnts between multiple copies
2503 of the same sequence. It changes the Copy index of Molecules
2506 _, molecular_assignment = self.
rmsd()
2507 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2508 mol0 = self.molcopydict0[m0][c0]
2509 mol1 = self.molcopydict1[m1][c1]
2512 p1.set_value(cik0, c0)
2516 Undo the Copy index assignment
2519 _, molecular_assignment = self.
rmsd()
2520 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2521 mol0 = self.molcopydict0[m0][c0]
2522 mol1 = self.molcopydict1[m1][c1]
2525 p1.set_value(cik0, c1)
2532 s =
"AnalysisReplicaExchange\n"
2533 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2534 s +=
"---- number of models %s \n" % str(len(self.stath0))
2537 def __getitem__(self, int_slice_adaptor):
2538 if isinstance(int_slice_adaptor, int):
2539 return self.clusters[int_slice_adaptor]
2540 elif isinstance(int_slice_adaptor, slice):
2541 return self.__iter__(int_slice_adaptor)
2543 raise TypeError(
"Unknown Type")
2546 return len(self.clusters)
2548 def __iter__(self, slice_key=None):
2549 if slice_key
is None:
2550 for i
in range(len(self)):
2553 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.