1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from pathlib
import Path
20 from IMP._compat_pathlib
import Path
22 from operator
import itemgetter
23 from collections
import defaultdict
30 class _MockMPIValues(object):
31 """Replace samplers.MPI_values when in test mode"""
32 def get_percentile(self, name):
36 class _RMFRestraints(object):
37 """All restraints that are written out to the RMF file"""
38 def __init__(self, model, user_restraints):
40 self._user_restraints = user_restraints
if user_restraints
else []
43 return (len(self._user_restraints)
44 + self._rmf_rs.get_number_of_restraints())
48 __nonzero__ = __bool__
50 def __getitem__(self, i):
51 class FakePMIWrapper(object):
52 def __init__(self, r):
53 self.r = IMP.RestraintSet.get_from(r)
55 def get_restraint(self):
58 lenuser = len(self._user_restraints)
60 return self._user_restraints[i]
61 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62 r = self._rmf_rs.get_restraint(i - lenuser)
63 return FakePMIWrapper(r)
65 raise IndexError(
"Out of range")
69 """A macro to help setup and run replica exchange.
70 Supports Monte Carlo and molecular dynamics.
71 Produces trajectory RMF files, best PDB structures,
72 and output stat files.
75 monte_carlo_sample_objects=
None,
76 molecular_dynamics_sample_objects=
None,
78 rmf_output_objects=
None,
79 monte_carlo_temperature=1.0,
80 simulated_annealing=
False,
81 simulated_annealing_minimum_temperature=1.0,
82 simulated_annealing_maximum_temperature=2.5,
83 simulated_annealing_minimum_temperature_nframes=100,
84 simulated_annealing_maximum_temperature_nframes=100,
85 replica_exchange_minimum_temperature=1.0,
86 replica_exchange_maximum_temperature=2.5,
87 replica_exchange_swap=
True,
89 number_of_best_scoring_models=500,
92 molecular_dynamics_steps=10,
93 molecular_dynamics_max_time_step=1.0,
94 number_of_frames=1000,
95 save_coordinates_mode=
"lowest_temperature",
96 nframes_write_coordinates=1,
97 write_initial_rmf=
True,
98 initial_rmf_name_suffix=
"initial",
99 stat_file_name_suffix=
"stat",
100 best_pdb_name_suffix=
"model",
103 do_create_directories=
True,
104 global_output_directory=
"./",
106 best_pdb_dir=
"pdbs/",
107 replica_stat_file_suffix=
"stat_replica",
108 em_object_for_rmf=
None,
110 replica_exchange_object=
None,
114 @param model The IMP model
115 @param root_hier Top-level (System)hierarchy
116 @param monte_carlo_sample_objects Objects for MC sampling, which
117 should generally be a simple list of Mover objects, e.g.
118 from DegreesOfFreedom.get_movers().
119 @param molecular_dynamics_sample_objects Objects for MD sampling,
120 which should generally be a simple list of particles.
121 @param output_objects A list of structural objects and restraints
122 that will be included in output (ie, statistics "stat"
123 files). Any object that provides a get_output() method
124 can be used here. If None is passed
125 the macro will not write stat files.
126 @param rmf_output_objects A list of structural objects and
127 restraints that will be included in rmf. Any object
128 that provides a get_output() method can be used here.
129 @param monte_carlo_temperature MC temp (may need to be optimized
130 based on post-sampling analysis)
131 @param simulated_annealing If True, perform simulated annealing
132 @param simulated_annealing_minimum_temperature Should generally be
133 the same as monte_carlo_temperature.
134 @param simulated_annealing_minimum_temperature_nframes Number of
135 frames to compute at minimum temperature.
136 @param simulated_annealing_maximum_temperature_nframes Number of
138 temps > simulated_annealing_maximum_temperature.
139 @param replica_exchange_minimum_temperature Low temp for REX; should
140 generally be the same as monte_carlo_temperature.
141 @param replica_exchange_maximum_temperature High temp for REX
142 @param replica_exchange_swap Boolean, enable disable temperature
144 @param num_sample_rounds Number of rounds of MC/MD per cycle
145 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146 models to keep around for analysis.
147 @param mmcif If True, write best scoring models in mmCIF format;
148 if False (the default), write in legacy PDB format.
149 @param best_pdb_dir The directory under `global_output_directory`
150 where best-scoring PDB/mmCIF files are written.
151 @param best_pdb_name_suffix Part of the file name for best-scoring
153 @param monte_carlo_steps Number of MC steps per round
154 @param self_adaptive self adaptive scheme for Monte Carlo movers
155 @param molecular_dynamics_steps Number of MD steps per round
156 @param molecular_dynamics_max_time_step Max time step for MD
157 @param number_of_frames Number of REX frames to run
158 @param save_coordinates_mode string: how to save coordinates.
159 "lowest_temperature" (default) only the lowest temperatures
161 "25th_score" all replicas whose score is below the 25th
163 "50th_score" all replicas whose score is below the 50th
165 "75th_score" all replicas whose score is below the 75th
167 @param nframes_write_coordinates How often to write the coordinates
169 @param write_initial_rmf Write the initial configuration
170 @param global_output_directory Folder that will be created to house
172 @param test_mode Set to True to avoid writing any files, just test
174 @param score_moved If True, attempt to speed up Monte Carlo
175 sampling by caching scoring function terms on particles
182 if output_objects == []:
185 self.output_objects = []
187 self.output_objects = output_objects
188 self.rmf_output_objects = rmf_output_objects
190 and not root_hier.get_parent()):
191 if self.output_objects
is not None:
192 self.output_objects.append(
194 if self.rmf_output_objects
is not None:
195 self.rmf_output_objects.append(
197 self.root_hier = root_hier
198 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
199 self.vars[
"number_of_states"] = len(states)
201 self.root_hiers = states
202 self.is_multi_state =
True
204 self.root_hier = root_hier
205 self.is_multi_state =
False
207 raise TypeError(
"Must provide System hierarchy (root_hier)")
209 self._rmf_restraints = _RMFRestraints(model,
None)
210 self.em_object_for_rmf = em_object_for_rmf
211 self.monte_carlo_sample_objects = monte_carlo_sample_objects
212 self.vars[
"self_adaptive"] = self_adaptive
213 self.molecular_dynamics_sample_objects = \
214 molecular_dynamics_sample_objects
215 self.replica_exchange_object = replica_exchange_object
216 self.molecular_dynamics_max_time_step = \
217 molecular_dynamics_max_time_step
218 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
219 self.vars[
"replica_exchange_minimum_temperature"] = \
220 replica_exchange_minimum_temperature
221 self.vars[
"replica_exchange_maximum_temperature"] = \
222 replica_exchange_maximum_temperature
223 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
224 self.vars[
"simulated_annealing"] = simulated_annealing
225 self.vars[
"simulated_annealing_minimum_temperature"] = \
226 simulated_annealing_minimum_temperature
227 self.vars[
"simulated_annealing_maximum_temperature"] = \
228 simulated_annealing_maximum_temperature
229 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
230 simulated_annealing_minimum_temperature_nframes
231 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
232 simulated_annealing_maximum_temperature_nframes
234 self.vars[
"num_sample_rounds"] = num_sample_rounds
236 "number_of_best_scoring_models"] = number_of_best_scoring_models
237 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
238 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
239 self.vars[
"number_of_frames"] = number_of_frames
240 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
241 "50th_score",
"75th_score"):
242 raise Exception(
"save_coordinates_mode has unrecognized value")
244 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
245 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
246 self.vars[
"write_initial_rmf"] = write_initial_rmf
247 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
248 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
249 self.vars[
"mmcif"] = mmcif
250 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
251 self.vars[
"do_clean_first"] = do_clean_first
252 self.vars[
"do_create_directories"] = do_create_directories
253 self.vars[
"global_output_directory"] = global_output_directory
254 self.vars[
"rmf_dir"] = rmf_dir
255 self.vars[
"best_pdb_dir"] = best_pdb_dir
256 self.vars[
"atomistic"] = atomistic
257 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
258 self.vars[
"geometries"] =
None
259 self.test_mode = test_mode
260 self.score_moved = score_moved
263 if self.vars[
"geometries"]
is None:
264 self.vars[
"geometries"] = list(geometries)
266 self.vars[
"geometries"].extend(geometries)
269 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
270 "rmfs/*.rmf3 for each replica ")
271 print(
"--- it stores the best scoring pdb models in pdbs/")
272 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
273 "lowest temperature")
274 print(
"--- variables:")
275 keys = list(self.vars.keys())
278 print(
"------", v.ljust(30), self.vars[v])
280 def get_replica_exchange_object(self):
281 return self.replica_exchange_object
283 def _add_provenance(self, sampler_md, sampler_mc):
284 """Record details about the sampling in the IMP Hierarchies"""
287 method =
"Molecular Dynamics"
288 iterations += self.vars[
"molecular_dynamics_steps"]
290 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
291 iterations += self.vars[
"monte_carlo_steps"]
293 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
295 iterations *= self.vars[
"num_sample_rounds"]
297 pi = self.model.add_particle(
"sampling")
299 self.model, pi, method, self.vars[
"number_of_frames"],
301 p.set_number_of_replicas(
302 self.replica_exchange_object.get_number_of_replicas())
303 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
306 def execute_macro(self):
307 temp_index_factor = 100000.0
311 if self.monte_carlo_sample_objects
is not None:
312 print(
"Setting up MonteCarlo")
314 self.model, self.monte_carlo_sample_objects,
315 self.vars[
"monte_carlo_temperature"],
316 score_moved=self.score_moved)
317 if self.vars[
"simulated_annealing"]:
318 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
319 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
321 "simulated_annealing_minimum_temperature_nframes"]
323 "simulated_annealing_maximum_temperature_nframes"]
324 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
325 if self.vars[
"self_adaptive"]:
326 sampler_mc.set_self_adaptive(
327 isselfadaptive=self.vars[
"self_adaptive"])
328 if self.output_objects
is not None:
329 self.output_objects.append(sampler_mc)
330 if self.rmf_output_objects
is not None:
331 self.rmf_output_objects.append(sampler_mc)
332 samplers.append(sampler_mc)
334 if self.molecular_dynamics_sample_objects
is not None:
335 print(
"Setting up MolecularDynamics")
337 self.model, self.molecular_dynamics_sample_objects,
338 self.vars[
"monte_carlo_temperature"],
339 maximum_time_step=self.molecular_dynamics_max_time_step)
340 if self.vars[
"simulated_annealing"]:
341 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
342 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
344 "simulated_annealing_minimum_temperature_nframes"]
346 "simulated_annealing_maximum_temperature_nframes"]
347 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
348 if self.output_objects
is not None:
349 self.output_objects.append(sampler_md)
350 if self.rmf_output_objects
is not None:
351 self.rmf_output_objects.append(sampler_md)
352 samplers.append(sampler_md)
355 print(
"Setting up ReplicaExchange")
357 self.model, self.vars[
"replica_exchange_minimum_temperature"],
358 self.vars[
"replica_exchange_maximum_temperature"], samplers,
359 replica_exchange_object=self.replica_exchange_object)
360 self.replica_exchange_object = rex.rem
362 myindex = rex.get_my_index()
363 if self.output_objects
is not None:
364 self.output_objects.append(rex)
365 if self.rmf_output_objects
is not None:
366 self.rmf_output_objects.append(rex)
370 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
374 globaldir = self.vars[
"global_output_directory"] +
"/"
375 rmf_dir = globaldir + self.vars[
"rmf_dir"]
376 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
378 if not self.test_mode:
379 if self.vars[
"do_clean_first"]:
382 if self.vars[
"do_create_directories"]:
385 os.makedirs(globaldir)
393 if not self.is_multi_state:
399 for n
in range(self.vars[
"number_of_states"]):
401 os.makedirs(pdb_dir +
"/" + str(n))
408 if self.output_objects
is not None:
409 self.output_objects.append(sw)
410 if self.rmf_output_objects
is not None:
411 self.rmf_output_objects.append(sw)
413 print(
"Setting up stat file")
415 low_temp_stat_file = globaldir + \
416 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
419 if not self.test_mode:
422 if not self.test_mode:
423 if self.output_objects
is not None:
424 output.init_stat2(low_temp_stat_file,
426 extralabels=[
"rmf_file",
"rmf_frame_index"])
428 print(
"Stat file writing is disabled")
430 if self.rmf_output_objects
is not None:
431 print(
"Stat info being written in the rmf file")
433 print(
"Setting up replica stat file")
434 replica_stat_file = globaldir + \
435 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
436 if not self.test_mode:
437 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
439 if not self.test_mode:
440 print(
"Setting up best pdb files")
441 if not self.is_multi_state:
442 if self.vars[
"number_of_best_scoring_models"] > 0:
443 output.init_pdb_best_scoring(
444 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
446 self.vars[
"number_of_best_scoring_models"],
447 replica_exchange=
True,
448 mmcif=self.vars[
'mmcif'],
449 best_score_file=globaldir +
"best.scores.rex.py")
450 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
452 pdb_dir +
"/" +
"model.psf",
454 self.vars[
"best_pdb_name_suffix"] + pdbext)
456 if self.vars[
"number_of_best_scoring_models"] > 0:
457 for n
in range(self.vars[
"number_of_states"]):
458 output.init_pdb_best_scoring(
459 pdb_dir +
"/" + str(n) +
"/" +
460 self.vars[
"best_pdb_name_suffix"],
462 self.vars[
"number_of_best_scoring_models"],
463 replica_exchange=
True,
464 mmcif=self.vars[
'mmcif'],
465 best_score_file=globaldir +
"best.scores.rex.py")
466 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
468 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
469 pdb_dir +
"/" + str(n) +
"/" +
470 self.vars[
"best_pdb_name_suffix"] + pdbext)
473 if self.em_object_for_rmf
is not None:
474 output_hierarchies = [
476 self.em_object_for_rmf.get_density_as_hierarchy(
479 output_hierarchies = [self.root_hier]
481 if not self.test_mode:
482 print(
"Setting up and writing initial rmf coordinate file")
483 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
484 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
486 listofobjects=self.rmf_output_objects)
487 if self._rmf_restraints:
488 output.add_restraints_to_rmf(
489 init_suffix +
"." + str(myindex) +
".rmf3",
490 self._rmf_restraints)
491 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
492 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
494 if not self.test_mode:
495 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
497 mpivs = _MockMPIValues()
499 self._add_provenance(sampler_md, sampler_mc)
501 if not self.test_mode:
502 print(
"Setting up production rmf files")
503 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
504 output.init_rmf(rmfname, output_hierarchies,
505 geometries=self.vars[
"geometries"],
506 listofobjects=self.rmf_output_objects)
508 if self._rmf_restraints:
509 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
511 ntimes_at_low_temp = 0
515 self.replica_exchange_object.set_was_used(
True)
516 nframes = self.vars[
"number_of_frames"]
519 for i
in range(nframes):
523 for nr
in range(self.vars[
"num_sample_rounds"]):
524 if sampler_md
is not None:
526 self.vars[
"molecular_dynamics_steps"])
527 if sampler_mc
is not None:
528 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
530 self.model).evaluate(
False)
531 mpivs.set_value(
"score", score)
532 output.set_output_entry(
"score", score)
534 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
536 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
537 save_frame = (min_temp_index == my_temp_index)
538 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
539 score_perc = mpivs.get_percentile(
"score")
540 save_frame = (score_perc*100.0 <= 25.0)
541 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
542 score_perc = mpivs.get_percentile(
"score")
543 save_frame = (score_perc*100.0 <= 50.0)
544 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
545 score_perc = mpivs.get_percentile(
"score")
546 save_frame = (score_perc*100.0 <= 75.0)
549 if save_frame
and not self.test_mode:
553 print(
"--- frame %s score %s " % (str(i), str(score)))
555 if not self.test_mode:
556 if i % self.vars[
"nframes_write_coordinates"] == 0:
557 print(
'--- writing coordinates')
558 if self.vars[
"number_of_best_scoring_models"] > 0:
559 output.write_pdb_best_scoring(score)
560 output.write_rmf(rmfname)
561 output.set_output_entry(
"rmf_file", rmfname)
562 output.set_output_entry(
"rmf_frame_index",
565 output.set_output_entry(
"rmf_file", rmfname)
566 output.set_output_entry(
"rmf_frame_index",
'-1')
567 if self.output_objects
is not None:
568 output.write_stat2(low_temp_stat_file)
569 ntimes_at_low_temp += 1
571 if not self.test_mode:
572 output.write_stat2(replica_stat_file)
573 if self.vars[
"replica_exchange_swap"]:
574 rex.swap_temp(i, score)
575 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
576 p.add_replica_exchange(state, self)
578 if not self.test_mode:
579 print(
"closing production rmf files")
580 output.close_rmf(rmfname)
584 """A macro to build a IMP::pmi::topology::System based on a
585 TopologyReader object.
587 Easily create multi-state systems by calling this macro
588 repeatedly with different TopologyReader objects!
589 A useful function is get_molecules() which returns the PMI Molecules
590 grouped by state as a dictionary with key = (molecule name),
591 value = IMP.pmi.topology.Molecule
592 Quick multi-state system:
595 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
596 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
597 bs = IMP.pmi.macros.BuildSystem(model)
598 bs.add_state(reader1)
599 bs.add_state(reader2)
600 bs.execute_macro() # build everything including degrees of freedom
601 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
602 ### now you have a two state system, you add restraints etc
604 @note The "domain name" entry of the topology reader is not used.
605 All molecules are set up by the component name, but split into rigid bodies
609 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
610 'RNA': IMP.pmi.alphabets.rna}
612 def __init__(self, model, sequence_connectivity_scale=4.0,
613 force_create_gmm_files=
False, resolutions=[1, 10],
616 @param model An IMP Model
617 @param sequence_connectivity_scale For scaling the connectivity
619 @param force_create_gmm_files If True, will sample and create GMMs
620 no matter what. If False, will only sample if the
621 files don't exist. If number of Gaussians is zero, won't
623 @param resolutions The resolutions to build for structured regions
624 @param name The name of the top-level hierarchy node.
631 self._domain_res = []
633 self.force_create_gmm_files = force_create_gmm_files
634 self.resolutions = resolutions
636 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
638 """Add a state using the topology info in a
639 IMP::pmi::topology::TopologyReader object.
640 When you are done adding states, call execute_macro()
641 @param reader The TopologyReader object
642 @param fasta_name_map dictionary for converting protein names
643 found in the fasta file
644 @param chain_ids A list or string of chain IDs for assigning to
645 newly-created molecules, e.g.
646 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
647 If not specified, chain IDs A through Z are assigned, then
648 AA through AZ, then BA through BZ, and so on, in the same
651 state = self.system.create_state()
652 self._readers.append(reader)
654 these_domain_res = {}
656 if chain_ids
is None:
657 chain_ids = IMP.pmi.output._ChainIDs()
662 for molname
in reader.get_molecules():
663 copies = reader.get_molecules()[molname].domains
664 for nc, copyname
in enumerate(copies):
665 print(
"BuildSystem.add_state: setting up molecule %s copy "
666 "number %s" % (molname, str(nc)))
667 copy = copies[copyname]
670 all_chains = [c
for c
in copy
if c.chain
is not None]
672 chain_id = all_chains[0].chain
674 chain_id = chain_ids[numchain]
676 "No PDBs specified for %s, so keep_chain_id has "
677 "no effect; using default chain ID '%s'"
680 chain_id = chain_ids[numchain]
682 alphabet = IMP.pmi.alphabets.amino_acid
683 fasta_flag = copy[0].fasta_flag
684 if fasta_flag
in self._alphabets:
685 alphabet = self._alphabets[fasta_flag]
687 copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
688 print(
"BuildSystem.add_state: molecule %s sequence has "
689 "%s residues" % (molname, len(seq)))
690 orig_mol = state.create_molecule(molname, seq, chain_id,
695 print(
"BuildSystem.add_state: creating a copy for "
696 "molecule %s" % molname)
697 mol = orig_mol.create_copy(chain_id)
700 for domainnumber, domain
in enumerate(copy):
701 print(
"BuildSystem.add_state: ---- setting up domain %s "
702 "of molecule %s" % (domainnumber, molname))
705 these_domains[domain.get_unique_name()] = domain
706 if domain.residue_range == []
or \
707 domain.residue_range
is None:
708 domain_res = mol.get_residues()
710 start = domain.residue_range[0]+domain.pdb_offset
711 if domain.residue_range[1] ==
'END':
712 end = len(mol.sequence)
714 end = domain.residue_range[1]+domain.pdb_offset
715 domain_res = mol.residue_range(start-1, end-1)
716 print(
"BuildSystem.add_state: -------- domain %s of "
717 "molecule %s extends from residue %s to "
719 % (domainnumber, molname, start, end))
720 if domain.pdb_file ==
"BEADS":
721 print(
"BuildSystem.add_state: -------- domain %s of "
722 "molecule %s represented by BEADS "
723 % (domainnumber, molname))
724 mol.add_representation(
726 resolutions=[domain.bead_size],
727 setup_particles_as_densities=(
728 domain.em_residues_per_gaussian != 0),
730 these_domain_res[domain.get_unique_name()] = \
732 elif domain.pdb_file ==
"IDEAL_HELIX":
733 print(
"BuildSystem.add_state: -------- domain %s of "
734 "molecule %s represented by IDEAL_HELIX "
735 % (domainnumber, molname))
736 emper = domain.em_residues_per_gaussian
737 mol.add_representation(
739 resolutions=self.resolutions,
741 density_residues_per_component=emper,
742 density_prefix=domain.density_prefix,
743 density_force_compute=self.force_create_gmm_files,
745 these_domain_res[domain.get_unique_name()] = \
748 print(
"BuildSystem.add_state: -------- domain %s of "
749 "molecule %s represented by pdb file %s "
750 % (domainnumber, molname, domain.pdb_file))
751 domain_atomic = mol.add_structure(domain.pdb_file,
753 domain.residue_range,
756 domain_non_atomic = domain_res - domain_atomic
757 if not domain.em_residues_per_gaussian:
758 mol.add_representation(
759 domain_atomic, resolutions=self.resolutions,
761 if len(domain_non_atomic) > 0:
762 mol.add_representation(
764 resolutions=[domain.bead_size],
767 print(
"BuildSystem.add_state: -------- domain %s "
768 "of molecule %s represented by gaussians "
769 % (domainnumber, molname))
770 emper = domain.em_residues_per_gaussian
771 creategmm = self.force_create_gmm_files
772 mol.add_representation(
774 resolutions=self.resolutions,
775 density_residues_per_component=emper,
776 density_prefix=domain.density_prefix,
777 density_force_compute=creategmm,
779 if len(domain_non_atomic) > 0:
780 mol.add_representation(
782 resolutions=[domain.bead_size],
783 setup_particles_as_densities=
True,
785 these_domain_res[domain.get_unique_name()] = (
786 domain_atomic, domain_non_atomic)
787 self._domain_res.append(these_domain_res)
788 self._domains.append(these_domains)
789 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
793 """Return list of all molecules grouped by state.
794 For each state, it's a dictionary of Molecules where key is the
797 return [s.get_molecules()
for s
in self.system.get_states()]
799 def get_molecule(self, molname, copy_index=0, state_index=0):
800 return self.system.get_states()[state_index].
get_molecules()[
804 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
805 """Builds representations and sets up degrees of freedom"""
806 print(
"BuildSystem.execute_macro: building representations")
807 self.root_hier = self.system.build()
809 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
811 for nstate, reader
in enumerate(self._readers):
812 rbs = reader.get_rigid_bodies()
813 srbs = reader.get_super_rigid_bodies()
814 csrbs = reader.get_chains_of_super_rigid_bodies()
817 domains_in_rbs = set()
819 print(
"BuildSystem.execute_macro: -------- building rigid "
820 "body %s" % (str(rblist)))
821 all_res = IMP.pmi.tools.OrderedSet()
822 bead_res = IMP.pmi.tools.OrderedSet()
824 domain = self._domains[nstate][dname]
825 print(
"BuildSystem.execute_macro: -------- adding %s"
827 all_res |= self._domain_res[nstate][dname][0]
828 bead_res |= self._domain_res[nstate][dname][1]
829 domains_in_rbs.add(dname)
831 print(
"BuildSystem.execute_macro: -------- creating rigid "
832 "body with max_trans %s max_rot %s "
833 "non_rigid_max_trans %s"
834 % (str(max_rb_trans), str(max_rb_rot),
835 str(max_bead_trans)))
836 self.dof.create_rigid_body(all_res,
837 nonrigid_parts=bead_res,
838 max_trans=max_rb_trans,
840 nonrigid_max_trans=max_bead_trans,
841 name=
"RigidBody %s" % dname)
844 for dname, domain
in self._domains[nstate].items():
845 if dname
not in domains_in_rbs:
846 if domain.pdb_file !=
"BEADS":
848 "No rigid bodies set for %s. Residues read from "
849 "the PDB file will not be sampled - only regions "
850 "missing from the PDB will be treated flexibly. "
851 "To sample the entire sequence, use BEADS instead "
852 "of a PDB file name" % dname,
854 self.dof.create_flexible_beads(
855 self._domain_res[nstate][dname][1],
856 max_trans=max_bead_trans)
860 print(
"BuildSystem.execute_macro: -------- building "
861 "super rigid body %s" % (str(srblist)))
862 all_res = IMP.pmi.tools.OrderedSet()
863 for dname
in srblist:
864 print(
"BuildSystem.execute_macro: -------- adding %s"
866 all_res |= self._domain_res[nstate][dname][0]
867 all_res |= self._domain_res[nstate][dname][1]
869 print(
"BuildSystem.execute_macro: -------- creating super "
870 "rigid body with max_trans %s max_rot %s "
871 % (str(max_srb_trans), str(max_srb_rot)))
872 self.dof.create_super_rigid_body(
873 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
876 for csrblist
in csrbs:
877 all_res = IMP.pmi.tools.OrderedSet()
878 for dname
in csrblist:
879 all_res |= self._domain_res[nstate][dname][0]
880 all_res |= self._domain_res[nstate][dname][1]
881 all_res = list(all_res)
882 all_res.sort(key=
lambda r: r.get_index())
883 self.dof.create_main_chain_mover(all_res)
884 return self.root_hier, self.dof
889 """A macro for running all the basic operations of analysis.
890 Includes clustering, precision analysis, and making ensemble density maps.
891 A number of plots are also supported.
894 merge_directories=[
"./"],
895 stat_file_name_suffix=
"stat",
896 best_pdb_name_suffix=
"model",
898 do_create_directories=
True,
899 global_output_directory=
"output/",
900 replica_stat_file_suffix=
"stat_replica",
901 global_analysis_result_directory=
"./analysis/",
904 @param model The IMP model
905 @param stat_file_name_suffix
906 @param merge_directories The directories containing output files
907 @param best_pdb_name_suffix
908 @param do_clean_first
909 @param do_create_directories
910 @param global_output_directory Where everything is
911 @param replica_stat_file_suffix
912 @param global_analysis_result_directory
913 @param test_mode If True, nothing is changed on disk
917 from mpi4py
import MPI
918 self.comm = MPI.COMM_WORLD
919 self.rank = self.comm.Get_rank()
920 self.number_of_processes = self.comm.size
923 self.number_of_processes = 1
925 self.test_mode = test_mode
926 self._protocol_output = []
927 self.cluster_obj =
None
929 stat_dir = global_output_directory
932 for rd
in merge_directories:
933 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
934 if len(stat_files) == 0:
935 warnings.warn(
"no stat files found in %s"
936 % os.path.join(rd, stat_dir),
938 self.stat_files += stat_files
941 """Capture details of the modeling protocol.
942 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
945 self._protocol_output.append((p, p._last_state))
948 score_key=
"Total_Score",
949 rmf_file_key=
"rmf_file",
950 rmf_file_frame_key=
"rmf_frame_index",
953 nframes_trajectory=10000):
954 """ Get a trajectory of the modeling run, for generating
957 @param score_key The score for ranking models
958 @param rmf_file_key Key pointing to RMF filename
959 @param rmf_file_frame_key Key pointing to RMF frame number
960 @param outputdir The local output directory used in the run
961 @param get_every Extract every nth frame
962 @param nframes_trajectory Total number of frames of the trajectory
967 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
969 score_list = list(map(float, trajectory_models[2]))
971 max_score = max(score_list)
972 min_score = min(score_list)
974 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
975 for i
in range(nframes_trajectory)]
976 binned_scores = [
None]*nframes_trajectory
977 binned_model_indexes = [-1]*nframes_trajectory
979 for model_index, s
in enumerate(score_list):
980 bins_score_diffs = [abs(s-b)
for b
in bins]
981 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
982 if binned_scores[bin_index]
is None:
983 binned_scores[bin_index] = s
984 binned_model_indexes[bin_index] = model_index
986 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
987 new_diff = abs(s-bins[bin_index])
988 if new_diff < old_diff:
989 binned_scores[bin_index] = s
990 binned_model_indexes[bin_index] = model_index
993 print(binned_model_indexes)
995 def _expand_ambiguity(self, prot, d):
996 """If using PMI2, expand the dictionary to include copies as
999 This also keeps the states separate.
1004 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1007 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1008 if isinstance(val, tuple):
1016 for nst
in range(len(states)):
1018 copies = sel.get_selected_particles(with_representation=
False)
1020 for nc
in range(len(copies)):
1022 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1023 (start, stop, name, nc, nst)
1025 newdict[
'%s..%i' % (name, nc)] = \
1026 (start, stop, name, nc, nst)
1032 score_key=
"Total_Score",
1033 rmf_file_key=
"rmf_file",
1034 rmf_file_frame_key=
"rmf_frame_index",
1036 prefiltervalue=
None,
1039 alignment_components=
None,
1040 number_of_best_scoring_models=10,
1041 rmsd_calculation_components=
None,
1042 distance_matrix_file=
'distances.mat',
1043 load_distance_matrix_file=
False,
1044 skip_clustering=
False,
1045 number_of_clusters=1,
1047 exit_after_display=
True,
1049 first_and_last_frames=
None,
1050 density_custom_ranges=
None,
1051 write_pdb_with_centered_coordinates=
False,
1053 """Get the best scoring models, compute a distance matrix,
1054 cluster them, and create density maps.
1056 Tuple format: "molname" just the molecule,
1057 or (start,stop,molname,copy_num(optional),state_num(optional)
1058 Can pass None for copy or state to ignore that field.
1059 If you don't pass a specific copy number
1061 @param score_key The score for ranking models.
1062 @param rmf_file_key Key pointing to RMF filename
1063 @param rmf_file_frame_key Key pointing to RMF frame number
1064 @param state_number State number to analyze
1065 @param prefiltervalue Only include frames where the
1066 score key is below this value
1067 @param feature_keys Keywords for which you want to
1068 calculate average, medians, etc.
1069 If you pass "Keyname" it'll include everything that matches
1071 @param outputdir The local output directory used in
1073 @param alignment_components Dictionary with keys=groupname,
1074 values are tuples for aligning the structures
1075 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1076 @param number_of_best_scoring_models Num models to keep per run
1077 @param rmsd_calculation_components For calculating RMSD
1078 (same format as alignment_components)
1079 @param distance_matrix_file Where to store/read the
1081 @param load_distance_matrix_file Try to load the distance
1083 @param skip_clustering Just extract the best scoring
1084 models and save the pdbs
1085 @param number_of_clusters Number of k-means clusters
1086 @param display_plot Display the distance matrix
1087 @param exit_after_display Exit after displaying distance
1089 @param get_every Extract every nth frame
1090 @param first_and_last_frames A tuple with the first and last
1091 frames to be analyzed. Values are percentages!
1092 Default: get all frames
1093 @param density_custom_ranges For density calculation
1094 (same format as alignment_components)
1095 @param write_pdb_with_centered_coordinates
1096 @param voxel_size Used for the density output
1100 self._outputdir = Path(outputdir).absolute()
1101 self._number_of_clusters = number_of_clusters
1102 for p, state
in self._protocol_output:
1103 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1114 if not load_distance_matrix_file:
1115 if len(self.stat_files) == 0:
1116 print(
"ERROR: no stat file found in the given path")
1118 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1119 self.stat_files, self.number_of_processes)[self.rank]
1122 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1123 if k
in feature_keys:
1125 "no need to pass " + k +
" to feature_keys.",
1127 feature_keys.remove(k)
1130 my_stat_files, score_key, feature_keys, rmf_file_key,
1131 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1132 rmf_file_list = best_models[0]
1133 rmf_file_frame_list = best_models[1]
1134 score_list = best_models[2]
1135 feature_keyword_list_dict = best_models[3]
1141 if self.number_of_processes > 1:
1145 rmf_file_frame_list)
1146 for k
in feature_keyword_list_dict:
1147 feature_keyword_list_dict[k] = \
1149 feature_keyword_list_dict[k])
1152 score_rmf_tuples = list(zip(score_list,
1154 rmf_file_frame_list,
1155 list(range(len(score_list)))))
1157 if density_custom_ranges:
1158 for k
in density_custom_ranges:
1159 if not isinstance(density_custom_ranges[k], list):
1160 raise Exception(
"Density custom ranges: values must "
1161 "be lists of tuples")
1164 if first_and_last_frames
is not None:
1165 nframes = len(score_rmf_tuples)
1166 first_frame = int(first_and_last_frames[0] * nframes)
1167 last_frame = int(first_and_last_frames[1] * nframes)
1168 if last_frame > len(score_rmf_tuples):
1170 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1173 best_score_rmf_tuples = sorted(
1175 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1176 best_score_rmf_tuples = [t+(n,)
for n, t
in
1177 enumerate(best_score_rmf_tuples)]
1179 prov.append(IMP.pmi.io.FilterProvenance(
1180 "Best scoring", 0, number_of_best_scoring_models))
1182 best_score_feature_keyword_list_dict = defaultdict(list)
1183 for tpl
in best_score_rmf_tuples:
1185 for f
in feature_keyword_list_dict:
1186 best_score_feature_keyword_list_dict[f].append(
1187 feature_keyword_list_dict[f][index])
1188 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1189 best_score_rmf_tuples,
1190 self.number_of_processes)[self.rank]
1193 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1194 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1196 if rmsd_calculation_components
is not None:
1197 tmp = self._expand_ambiguity(
1198 prot_ahead, rmsd_calculation_components)
1199 if tmp != rmsd_calculation_components:
1200 print(
'Detected ambiguity, expand rmsd components to',
1202 rmsd_calculation_components = tmp
1203 if alignment_components
is not None:
1204 tmp = self._expand_ambiguity(prot_ahead,
1205 alignment_components)
1206 if tmp != alignment_components:
1207 print(
'Detected ambiguity, expand alignment '
1208 'components to', tmp)
1209 alignment_components = tmp
1215 self.model, my_best_score_rmf_tuples[0],
1216 rmsd_calculation_components, state_number=state_number)
1218 self.model, my_best_score_rmf_tuples, alignment_components,
1219 rmsd_calculation_components, state_number=state_number)
1227 all_coordinates = got_coords[0]
1230 alignment_coordinates = got_coords[1]
1233 rmsd_coordinates = got_coords[2]
1236 rmf_file_name_index_dict = got_coords[3]
1239 all_rmf_file_names = got_coords[4]
1245 if density_custom_ranges:
1247 density_custom_ranges, voxel=voxel_size)
1249 dircluster = os.path.join(outputdir,
1250 "all_models."+str(self.rank))
1256 os.mkdir(dircluster)
1259 clusstat = open(os.path.join(
1260 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1261 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1263 rmf_frame_number = tpl[2]
1266 for key
in best_score_feature_keyword_list_dict:
1268 best_score_feature_keyword_list_dict[key][index]
1272 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1273 self.model, rmf_frame_number, rmf_name)
1275 linking_successful = \
1276 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1277 self.model, prots, rs, rmf_frame_number,
1279 if not linking_successful:
1286 states = IMP.atom.get_by_type(
1287 prots[0], IMP.atom.STATE_TYPE)
1288 prot = states[state_number]
1290 prot = prots[state_number]
1295 coords_f1 = alignment_coordinates[cnt]
1297 coords_f2 = alignment_coordinates[cnt]
1300 coords_f1, coords_f2)
1301 transformation = Ali.align()[1]
1315 rb = rbm.get_rigid_body()
1325 out_pdb_fn = os.path.join(
1326 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1327 out_rmf_fn = os.path.join(
1328 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1329 o.init_pdb(out_pdb_fn, prot)
1330 tc = write_pdb_with_centered_coordinates
1331 o.write_pdb(out_pdb_fn,
1332 translate_to_geometric_center=tc)
1334 tmp_dict[
"local_pdb_file_name"] = \
1335 os.path.basename(out_pdb_fn)
1336 tmp_dict[
"rmf_file_full_path"] = rmf_name
1337 tmp_dict[
"local_rmf_file_name"] = \
1338 os.path.basename(out_rmf_fn)
1339 tmp_dict[
"local_rmf_frame_number"] = 0
1341 clusstat.write(str(tmp_dict)+
"\n")
1347 h.set_name(
"System")
1349 o.init_rmf(out_rmf_fn, [h], rs)
1351 o.init_rmf(out_rmf_fn, [prot], rs)
1353 o.write_rmf(out_rmf_fn)
1354 o.close_rmf(out_rmf_fn)
1356 if density_custom_ranges:
1357 DensModule.add_subunits_density(prot)
1359 if density_custom_ranges:
1360 DensModule.write_mrc(path=dircluster)
1365 if self.number_of_processes > 1:
1371 rmf_file_name_index_dict)
1373 alignment_coordinates)
1380 [best_score_feature_keyword_list_dict,
1381 rmf_file_name_index_dict],
1387 print(
"setup clustering class")
1390 for n, model_coordinate_dict
in enumerate(all_coordinates):
1392 if (alignment_components
is not None
1393 and len(self.cluster_obj.all_coords) == 0):
1395 self.cluster_obj.set_template(alignment_coordinates[n])
1396 self.cluster_obj.fill(all_rmf_file_names[n],
1397 rmsd_coordinates[n])
1398 print(
"Global calculating the distance matrix")
1401 self.cluster_obj.dist_matrix()
1405 self.cluster_obj.do_cluster(number_of_clusters)
1408 self.cluster_obj.plot_matrix(
1409 figurename=os.path.join(outputdir,
1411 if exit_after_display:
1413 self.cluster_obj.save_distance_matrix_file(
1414 file_name=distance_matrix_file)
1421 print(
"setup clustering class")
1423 self.cluster_obj.load_distance_matrix_file(
1424 file_name=distance_matrix_file)
1425 print(
"clustering with %s clusters" % str(number_of_clusters))
1426 self.cluster_obj.do_cluster(number_of_clusters)
1427 [best_score_feature_keyword_list_dict,
1428 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1431 self.cluster_obj.plot_matrix(figurename=os.path.join(
1432 outputdir,
'dist_matrix.pdf'))
1433 if exit_after_display:
1435 if self.number_of_processes > 1:
1443 print(self.cluster_obj.get_cluster_labels())
1444 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1445 print(
"rank %s " % str(self.rank))
1446 print(
"cluster %s " % str(n))
1447 print(
"cluster label %s " % str(cl))
1448 print(self.cluster_obj.get_cluster_label_names(cl))
1450 len(self.cluster_obj.get_cluster_label_names(cl))
1452 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1455 if density_custom_ranges:
1457 density_custom_ranges,
1460 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1462 os.mkdir(dircluster)
1468 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1469 clusstat = open(dircluster +
"stat.out",
"w")
1470 for k, structure_name
in enumerate(
1471 self.cluster_obj.get_cluster_label_names(cl)):
1474 tmp_dict.update(rmsd_dict)
1475 index = rmf_file_name_index_dict[structure_name]
1476 for key
in best_score_feature_keyword_list_dict:
1478 key] = best_score_feature_keyword_list_dict[
1484 rmf_name = structure_name.split(
"|")[0]
1485 rmf_frame_number = int(structure_name.split(
"|")[1])
1486 clusstat.write(str(tmp_dict) +
"\n")
1491 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1492 self.model, rmf_frame_number, rmf_name)
1494 linking_successful = \
1495 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1496 self.model, prots, rs, rmf_frame_number,
1498 if not linking_successful:
1504 states = IMP.atom.get_by_type(
1505 prots[0], IMP.atom.STATE_TYPE)
1506 prot = states[state_number]
1508 prot = prots[state_number]
1514 co = self.cluster_obj
1515 model_index = co.get_model_index_from_name(
1517 transformation = co.get_transformation_to_first_member(
1528 rb = rbm.get_rigid_body()
1537 if density_custom_ranges:
1538 DensModule.add_subunits_density(prot)
1543 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1544 o.write_pdb(dircluster + str(k) +
".pdb")
1550 h.set_name(
"System")
1552 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1554 o.init_rmf(dircluster + str(k) +
".rmf3", [prot], rs)
1555 o.write_rmf(dircluster + str(k) +
".rmf3")
1556 o.close_rmf(dircluster + str(k) +
".rmf3")
1561 if density_custom_ranges:
1562 DensModule.write_mrc(path=dircluster)
1565 if self.number_of_processes > 1:
1568 def get_cluster_rmsd(self, cluster_num):
1569 if self.cluster_obj
is None:
1571 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1573 def save_objects(self, objects, file_name):
1575 outf = open(file_name,
'wb')
1576 pickle.dump(objects, outf)
1579 def load_objects(self, file_name):
1581 inputf = open(file_name,
'rb')
1582 objects = pickle.load(inputf)
1590 This class contains analysis utilities to investigate ReplicaExchange
1598 def __init__(self, model, stat_files, best_models=None, score_key=None,
1601 Construction of the Class.
1602 @param model IMP.Model()
1603 @param stat_files list of string. Can be ascii stat files,
1605 @param best_models Integer. Number of best scoring models,
1606 if None: all models will be read
1607 @param alignment boolean (Default=True). Align before computing
1612 self.best_models = best_models
1614 model, stat_files, self.best_models, score_key, cache=
True)
1616 StatHierarchyHandler=self.stath0)
1629 self.clusters.append(c)
1630 for n0
in range(len(self.stath0)):
1632 self.pairwise_rmsd = {}
1633 self.pairwise_molecular_assignment = {}
1634 self.alignment = alignment
1635 self.symmetric_molecules = {}
1636 self.issymmetricsel = {}
1638 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1640 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1645 Setup the selection onto which the rmsd is computed
1646 @param kwargs use IMP.atom.Selection keywords
1654 Store names of symmetric molecules
1656 self.symmetric_molecules[molecule_name] = 0
1661 Setup the selection onto which the alignment is computed
1662 @param kwargs use IMP.atom.Selection keywords
1670 def clean_clusters(self):
1671 for c
in self.clusters:
1675 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1677 Cluster the models based on RMSD.
1678 @param rmsd_cutoff Float the distance cutoff in Angstrom
1679 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1680 be used to compute rmsds
1682 self.clean_clusters()
1683 not_clustered = set(range(len(self.stath1)))
1684 while len(not_clustered) > 0:
1685 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1690 Refine the clusters by merging the ones whose centers are close
1691 @param rmsd_cutoff cutoff distance in Angstorms
1693 clusters_copy = self.clusters
1694 for c0, c1
in itertools.combinations(self.clusters, 2):
1695 if c0.center_index
is None:
1697 if c1.center_index
is None:
1699 _ = self.stath0[c0.center_index]
1700 _ = self.stath1[c1.center_index]
1701 rmsd, molecular_assignment = self.
rmsd()
1702 if rmsd <= rmsd_cutoff:
1703 if c1
in self.clusters:
1704 clusters_copy.remove(c1)
1706 self.clusters = clusters_copy
1713 def set_cluster_assignments(self, cluster_ids):
1714 if len(cluster_ids) != len(self.stath0):
1715 raise ValueError(
'cluster ids has to be same length as '
1719 for i
in sorted(list(set(cluster_ids))):
1721 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1722 self.clusters[idx].add_member(i, d)
1726 Return the model data from a cluster
1727 @param cluster IMP.pmi.output.Cluster object
1736 Save the data for the whole models into a pickle file
1737 @param filename string
1739 self.stath0.save_data(filename)
1743 Set the data from an external IMP.pmi.output.Data
1744 @param data IMP.pmi.output.Data
1746 self.stath0.data = data
1747 self.stath1.data = data
1751 Load the data from an external pickled file
1752 @param filename string
1754 self.stath0.load_data(filename)
1755 self.stath1.load_data(filename)
1756 self.best_models = len(self.stath0)
1758 def add_cluster(self, rmf_name_list):
1760 print(
"creating cluster index "+str(len(self.clusters)))
1761 self.clusters.append(c)
1762 current_len = len(self.stath0)
1764 for rmf
in rmf_name_list:
1765 print(
"adding rmf "+rmf)
1766 self.stath0.add_stat_file(rmf)
1767 self.stath1.add_stat_file(rmf)
1769 for n0
in range(current_len, len(self.stath0)):
1770 d0 = self.stath0[n0]
1771 c.add_member(n0, d0)
1776 Save the clusters into a pickle file
1777 @param filename string
1780 import cPickle
as pickle
1783 fl = open(filename,
'wb')
1784 pickle.dump(self.clusters, fl)
1788 Load the clusters from a pickle file
1789 @param filename string
1790 @param append bool (Default=False), if True. append the clusters
1791 to the ones currently present
1794 import cPickle
as pickle
1797 fl = open(filename,
'rb')
1798 self.clean_clusters()
1800 self.clusters += pickle.load(fl)
1802 self.clusters = pickle.load(fl)
1811 Compute the cluster center for a given cluster
1813 member_distance = defaultdict(float)
1815 for n0, n1
in itertools.combinations(cluster.members, 2):
1818 rmsd, _ = self.
rmsd()
1819 member_distance[n0] += rmsd
1821 if len(member_distance) > 0:
1822 cluster.center_index = min(member_distance,
1823 key=member_distance.get)
1825 cluster.center_index = cluster.members[0]
1830 Save the coordinates of the current cluster a single rmf file
1832 print(
"saving coordinates", cluster)
1836 if rmf_name
is None:
1837 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1839 _ = self.stath1[cluster.members[0]]
1841 o.init_rmf(rmf_name, [self.stath1])
1842 for n1
in cluster.members:
1848 o.write_rmf(rmf_name)
1850 o.close_rmf(rmf_name)
1854 remove structures that are similar
1855 append it to a new cluster
1857 print(
"pruning models")
1859 filtered = [selected]
1860 remaining = range(1, len(self.stath1), 10)
1862 while len(remaining) > 0:
1863 d0 = self.stath0[selected]
1865 for n1
in remaining:
1870 if d <= rmsd_cutoff:
1872 print(
"pruning model %s, similar to model %s, rmsd %s"
1873 % (str(n1), str(selected), str(d)))
1874 remaining = [x
for x
in remaining
if x
not in rm]
1875 if len(remaining) == 0:
1877 selected = remaining[0]
1878 filtered.append(selected)
1881 self.clusters.append(c)
1883 d0 = self.stath0[n0]
1884 c.add_member(n0, d0)
1889 Compute the precision of a cluster
1895 if cluster.center_index
is not None:
1896 members1 = [cluster.center_index]
1898 members1 = cluster.members
1902 for n1
in cluster.members:
1907 tmp_rmsd, _ = self.
rmsd()
1912 precision = rmsd/npairs
1913 cluster.precision = precision
1918 Compute the bipartite precision (ie the cross-precision)
1919 between two clusters
1923 for cn0, n0
in enumerate(cluster1.members):
1925 for cn1, n1
in enumerate(cluster2.members):
1927 tmp_rmsd, _ = self.
rmsd()
1929 print(
"--- rmsd between structure %s and structure "
1930 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1933 precision = rmsd/npairs
1936 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1937 cluster_ref=
None, step=1):
1939 Compute the Root mean square fluctuations
1940 of a molecule in a cluster
1941 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1942 residue indexes and the value is the rmsf
1944 rmsf = IMP.pmi.tools.OrderedDict()
1947 if cluster_ref
is not None:
1948 if cluster_ref.center_index
is not None:
1949 members0 = [cluster_ref.center_index]
1951 members0 = cluster_ref.members
1953 if cluster.center_index
is not None:
1954 members0 = [cluster.center_index]
1956 members0 = cluster.members
1959 copy_index=copy_index, state_index=state_index)
1960 ps0 = s0.get_selected_particles()
1962 residue_indexes = list(IMP.pmi.tools.OrderedSet(
1968 d0 = self.stath0[n0]
1969 for n1
in cluster.members[::step]:
1971 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
1975 self.stath1, molecule=molecule,
1976 residue_indexes=residue_indexes, resolution=1,
1977 copy_index=copy_index, state_index=state_index)
1978 ps1 = s1.get_selected_particles()
1980 d1 = self.stath1[n1]
1983 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
1984 r = residue_indexes[n]
1996 for stath
in [self.stath0, self.stath1]:
1997 if molecule
not in self.symmetric_molecules:
1999 stath, molecule=molecule, residue_index=r,
2000 resolution=1, copy_index=copy_index,
2001 state_index=state_index)
2004 stath, molecule=molecule, residue_index=r,
2005 resolution=1, state_index=state_index)
2007 ps = s.get_selected_particles()
2016 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2017 reference=
"Absolute", prefix=
"./", step=1):
2023 for n1
in cluster.members[::step]:
2024 print(
"density "+str(n1))
2029 dens.add_subunits_density(self.stath1)
2031 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2034 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2035 consolidate=
False, molecules=
None, prefix=
'./',
2036 reference=
"Absolute"):
2040 import matplotlib.pyplot
as plt
2041 import matplotlib.cm
as cm
2042 from scipy.spatial.distance
import cdist
2044 if molecules
is None:
2053 molecules=molecules).get_selected_particles())]
2054 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2055 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2056 total_len_unique = sum(max(mol.get_residue_indexes())
2057 for mol
in unique_copies)
2064 seqlen = max(mol.get_residue_indexes())
2065 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2069 for mol
in unique_copies:
2070 seqlen = max(mol.get_residue_indexes())
2071 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2074 for ncl, n1
in enumerate(cluster.members):
2077 coord_dict = IMP.pmi.tools.OrderedDict()
2079 rindexes = mol.get_residue_indexes()
2080 coords = np.ones((max(rindexes), 3))
2081 for rnum
in rindexes:
2084 selpart = sel.get_selected_particles()
2085 if len(selpart) == 0:
2087 selpart = selpart[0]
2088 coords[rnum - 1, :] = \
2090 coord_dict[mol] = coords
2093 coords = np.concatenate(list(coord_dict.values()))
2094 dists = cdist(coords, coords)
2095 binary_dists = np.where((dists <= contact_threshold)
2096 & (dists >= 1.0), 1.0, 0.0)
2098 binary_dists_dict = {}
2100 len1 = max(mol1.get_residue_indexes())
2102 name1 = mol1.get_name()
2103 name2 = mol2.get_name()
2104 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2105 if (name1, name2)
not in binary_dists_dict:
2106 binary_dists_dict[(name1, name2)] = \
2107 np.zeros((len1, len1))
2108 binary_dists_dict[(name1, name2)] += \
2109 np.where((dists <= contact_threshold)
2110 & (dists >= 1.0), 1.0, 0.0)
2111 binary_dists = np.zeros((total_len_unique, total_len_unique))
2113 for name1, name2
in binary_dists_dict:
2114 r1 = index_dict[mol_names_unique[name1]]
2115 r2 = index_dict[mol_names_unique[name2]]
2116 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2117 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2123 contact_freqs = binary_dists
2125 dist_maps.append(dists)
2126 av_dist_map += dists
2127 contact_freqs += binary_dists
2130 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2132 contact_freqs = 1.0/len(cluster)*contact_freqs
2133 av_dist_map = 1.0/len(cluster)*contact_freqs
2135 fig = plt.figure(figsize=(100, 100))
2136 ax = fig.add_subplot(111)
2139 gap_between_components = 50
2144 sorted_tuple = sorted(
2146 mol).get_extended_name(), mol)
for mol
in mols)
2147 prot_list = list(zip(*sorted_tuple))[1]
2149 sorted_tuple = sorted(
2151 for mol
in unique_copies)
2152 prot_list = list(zip(*sorted_tuple))[1]
2154 prot_listx = prot_list
2155 nresx = gap_between_components + \
2156 sum([max(mol.get_residue_indexes())
2157 + gap_between_components
for mol
in prot_listx])
2160 prot_listy = prot_list
2161 nresy = gap_between_components + \
2162 sum([max(mol.get_residue_indexes())
2163 + gap_between_components
for mol
in prot_listy])
2168 res = gap_between_components
2169 for mol
in prot_listx:
2170 resoffsetx[mol] = res
2171 res += max(mol.get_residue_indexes())
2173 res += gap_between_components
2177 res = gap_between_components
2178 for mol
in prot_listy:
2179 resoffsety[mol] = res
2180 res += max(mol.get_residue_indexes())
2182 res += gap_between_components
2184 resoffsetdiagonal = {}
2185 res = gap_between_components
2186 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2187 resoffsetdiagonal[mol] = res
2188 res += max(mol.get_residue_indexes())
2189 res += gap_between_components
2194 for n, prot
in enumerate(prot_listx):
2195 res = resoffsetx[prot]
2197 for proty
in prot_listy:
2198 resy = resoffsety[proty]
2199 endy = resendy[proty]
2200 ax.plot([res, res], [resy, endy], linestyle=
'-',
2201 color=
'gray', lw=0.4)
2202 ax.plot([end, end], [resy, endy], linestyle=
'-',
2203 color=
'gray', lw=0.4)
2204 xticks.append((float(res) + float(end)) / 2)
2206 prot).get_extended_name())
2210 for n, prot
in enumerate(prot_listy):
2211 res = resoffsety[prot]
2213 for protx
in prot_listx:
2214 resx = resoffsetx[protx]
2215 endx = resendx[protx]
2216 ax.plot([resx, endx], [res, res], linestyle=
'-',
2217 color=
'gray', lw=0.4)
2218 ax.plot([resx, endx], [end, end], linestyle=
'-',
2219 color=
'gray', lw=0.4)
2220 yticks.append((float(res) + float(end)) / 2)
2222 prot).get_extended_name())
2226 tmp_array = np.zeros((nresx, nresy))
2228 for px
in prot_listx:
2229 for py
in prot_listy:
2230 resx = resoffsetx[px]
2231 lengx = resendx[px] - 1
2232 resy = resoffsety[py]
2233 lengy = resendy[py] - 1
2234 indexes_x = index_dict[px]
2235 minx = min(indexes_x)
2236 maxx = max(indexes_x)
2237 indexes_y = index_dict[py]
2238 miny = min(indexes_y)
2239 maxy = max(indexes_y)
2240 tmp_array[resx:lengx, resy:lengy] = \
2241 contact_freqs[minx:maxx, miny:maxy]
2242 ret[(px, py)] = np.argwhere(
2243 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2245 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2246 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2248 ax.set_xticks(xticks)
2249 ax.set_xticklabels(xlabels, rotation=90)
2250 ax.set_yticks(yticks)
2251 ax.set_yticklabels(ylabels)
2252 plt.setp(ax.get_xticklabels(), fontsize=6)
2253 plt.setp(ax.get_yticklabels(), fontsize=6)
2256 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2257 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2259 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2260 dpi=300, transparent=
"False")
2263 def plot_rmsd_matrix(self, filename):
2264 self.compute_all_pairwise_rmsd()
2265 distance_matrix = np.zeros(
2266 (len(self.stath0), len(self.stath1)))
2267 for (n0, n1)
in self.pairwise_rmsd:
2268 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2270 import matplotlib
as mpl
2272 import matplotlib.pylab
as pl
2273 from scipy.cluster
import hierarchy
as hrc
2275 fig = pl.figure(figsize=(10, 8))
2276 ax = fig.add_subplot(212)
2277 dendrogram = hrc.dendrogram(
2278 hrc.linkage(distance_matrix),
2281 leaves_order = dendrogram[
'leaves']
2282 ax.set_xlabel(
'Model')
2283 ax.set_ylabel(
'RMSD [Angstroms]')
2285 ax2 = fig.add_subplot(221)
2287 distance_matrix[leaves_order, :][:, leaves_order],
2288 interpolation=
'nearest')
2289 cb = fig.colorbar(cax)
2290 cb.set_label(
'RMSD [Angstroms]')
2291 ax2.set_xlabel(
'Model')
2292 ax2.set_ylabel(
'Model')
2294 pl.savefig(filename, dpi=300)
2303 Update the cluster id numbers
2305 for n, c
in enumerate(self.clusters):
2308 def get_molecule(self, hier, name, copy):
2316 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2317 self.sel0_rmsd.get_selected_particles())
2318 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2319 self.sel1_rmsd.get_selected_particles())
2320 for mol
in self.seldict0:
2321 for sel
in self.seldict0[mol]:
2322 self.issymmetricsel[sel] =
False
2323 for mol
in self.symmetric_molecules:
2324 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2325 for sel
in self.seldict0[mol]:
2326 self.issymmetricsel[sel] =
True
2330 self.sel1_alignment, self.sel0_alignment)
2332 for rb
in self.rbs1:
2335 for bead
in self.beads1:
2343 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2345 initial filling of the clusters.
2348 print(
"clustering model "+str(n0))
2349 d0 = self.stath0[n0]
2351 print(
"creating cluster index "+str(len(self.clusters)))
2352 self.clusters.append(c)
2353 c.add_member(n0, d0)
2354 clustered = set([n0])
2356 print(
"--- trying to add model " + str(n1) +
" to cluster "
2357 + str(len(self.clusters)))
2358 d1 = self.stath1[n1]
2361 rmsd, _ = self.
rmsd(metric=metric)
2362 if rmsd < rmsd_cutoff:
2363 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2364 c.add_member(n1, d1)
2367 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2372 merge the clusters that have close members
2373 @param rmsd_cutoff cutoff distance in Angstorms
2381 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2382 itertools.combinations(self.clusters, 2)):
2383 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2386 rmsd, _ = self.
rmsd()
2387 if (rmsd < 2*rmsd_cutoff
and
2389 to_merge.append((c0, c1))
2391 for c0, c
in reversed(to_merge):
2395 self.clusters = [c
for c
in
2396 filter(
lambda x: len(x.members) > 0, self.clusters)]
2400 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2402 print(
"check close members for clusters " + str(c0.cluster_id) +
2403 " and " + str(c1.cluster_id))
2404 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2407 rmsd, _ = self.
rmsd(metric=metric)
2408 if rmsd < rmsd_cutoff:
2423 a function that returns the permutation best_sel of sels0 that
2426 best_rmsd2 = float(
'inf')
2428 if self.issymmetricsel[sels0[0]]:
2431 for offset
in range(N):
2432 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2435 r = metric(sel0, sel1)
2437 if rmsd2 < best_rmsd2:
2441 for sels
in itertools.permutations(sels0):
2443 for sel0, sel1
in itertools.takewhile(
2444 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2445 r = metric(sel0, sel1)
2447 if rmsd2 < best_rmsd2:
2450 return best_sel, best_rmsd2
2452 def compute_all_pairwise_rmsd(self):
2453 for d0
in self.stath0:
2454 for d1
in self.stath1:
2455 rmsd, _ = self.
rmsd()
2457 def rmsd(self, metric=IMP.atom.get_rmsd):
2459 Computes the RMSD. Resolves ambiguous pairs assignments
2463 n0 = self.stath0.current_index
2464 n1 = self.stath1.current_index
2465 if ((n0, n1)
in self.pairwise_rmsd) \
2466 and ((n0, n1)
in self.pairwise_molecular_assignment):
2467 return (self.pairwise_rmsd[(n0, n1)],
2468 self.pairwise_molecular_assignment[(n0, n1)])
2478 molecular_assignment = {}
2479 for molname, sels0
in self.seldict0.items():
2480 sels_best_order, best_rmsd2 = \
2481 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2483 Ncoords = len(sels_best_order[0].get_selected_particles())
2484 Ncopies = len(self.seldict1[molname])
2485 total_rmsd += Ncoords*best_rmsd2
2486 total_N += Ncoords*Ncopies
2488 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2489 p0 = sel0.get_selected_particles()[0]
2490 p1 = sel1.get_selected_particles()[0]
2495 molecular_assignment[(molname, c0)] = (molname, c1)
2497 total_rmsd = math.sqrt(total_rmsd/total_N)
2499 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2500 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2501 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2502 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2503 return total_rmsd, molecular_assignment
2507 Fix the reference structure for structural alignment, rmsd and
2510 @param reference can be either "Absolute" (cluster center of the
2511 first cluster) or Relative (cluster center of the current
2514 if reference ==
"Absolute":
2516 elif reference ==
"Relative":
2517 if cluster.center_index:
2518 n0 = cluster.center_index
2520 n0 = cluster.members[0]
2525 compute the molecular assignemnts between multiple copies
2526 of the same sequence. It changes the Copy index of Molecules
2529 _, molecular_assignment = self.
rmsd()
2530 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2531 mol0 = self.molcopydict0[m0][c0]
2532 mol1 = self.molcopydict1[m1][c1]
2535 p1.set_value(cik0, c0)
2539 Undo the Copy index assignment
2542 _, molecular_assignment = self.
rmsd()
2543 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2544 mol0 = self.molcopydict0[m0][c0]
2545 mol1 = self.molcopydict1[m1][c1]
2548 p1.set_value(cik0, c1)
2555 s =
"AnalysisReplicaExchange\n"
2556 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2557 s +=
"---- number of models %s \n" % str(len(self.stath0))
2560 def __getitem__(self, int_slice_adaptor):
2561 if isinstance(int_slice_adaptor, int):
2562 return self.clusters[int_slice_adaptor]
2563 elif isinstance(int_slice_adaptor, slice):
2564 return self.__iter__(int_slice_adaptor)
2566 raise TypeError(
"Unknown Type")
2569 return len(self.clusters)
2571 def __iter__(self, slice_key=None):
2572 if slice_key
is None:
2573 for i
in range(len(self)):
2576 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.