1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from pathlib
import Path
20 from IMP._compat_pathlib
import Path
22 from operator
import itemgetter
23 from collections
import defaultdict
30 class _MockMPIValues(object):
31 """Replace samplers.MPI_values when in test mode"""
32 def get_percentile(self, name):
36 class _RMFRestraints(object):
37 """All restraints that are written out to the RMF file"""
38 def __init__(self, model, user_restraints):
40 self._user_restraints = user_restraints
if user_restraints
else []
43 return (len(self._user_restraints)
44 + self._rmf_rs.get_number_of_restraints())
48 __nonzero__ = __bool__
50 def __getitem__(self, i):
51 class FakePMIWrapper(object):
52 def __init__(self, r):
53 self.r = IMP.RestraintSet.get_from(r)
55 def get_restraint(self):
58 lenuser = len(self._user_restraints)
60 return self._user_restraints[i]
61 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62 r = self._rmf_rs.get_restraint(i - lenuser)
63 return FakePMIWrapper(r)
65 raise IndexError(
"Out of range")
69 """A macro to help setup and run replica exchange.
70 Supports Monte Carlo and molecular dynamics.
71 Produces trajectory RMF files, best PDB structures,
72 and output stat files.
75 monte_carlo_sample_objects=
None,
76 molecular_dynamics_sample_objects=
None,
78 rmf_output_objects=
None,
79 monte_carlo_temperature=1.0,
80 simulated_annealing=
False,
81 simulated_annealing_minimum_temperature=1.0,
82 simulated_annealing_maximum_temperature=2.5,
83 simulated_annealing_minimum_temperature_nframes=100,
84 simulated_annealing_maximum_temperature_nframes=100,
85 replica_exchange_minimum_temperature=1.0,
86 replica_exchange_maximum_temperature=2.5,
87 replica_exchange_swap=
True,
89 number_of_best_scoring_models=500,
92 molecular_dynamics_steps=10,
93 molecular_dynamics_max_time_step=1.0,
94 number_of_frames=1000,
95 save_coordinates_mode=
"lowest_temperature",
96 nframes_write_coordinates=1,
97 write_initial_rmf=
True,
98 initial_rmf_name_suffix=
"initial",
99 stat_file_name_suffix=
"stat",
100 best_pdb_name_suffix=
"model",
103 do_create_directories=
True,
104 global_output_directory=
"./",
106 best_pdb_dir=
"pdbs/",
107 replica_stat_file_suffix=
"stat_replica",
108 em_object_for_rmf=
None,
110 replica_exchange_object=
None,
114 @param model The IMP model
115 @param root_hier Top-level (System)hierarchy
116 @param monte_carlo_sample_objects Objects for MC sampling, which
117 should generally be a simple list of Mover objects, e.g.
118 from DegreesOfFreedom.get_movers().
119 @param molecular_dynamics_sample_objects Objects for MD sampling,
120 which should generally be a simple list of particles.
121 @param output_objects A list of structural objects and restraints
122 that will be included in output (ie, statistics "stat"
123 files). Any object that provides a get_output() method
124 can be used here. If None is passed
125 the macro will not write stat files.
126 @param rmf_output_objects A list of structural objects and
127 restraints that will be included in rmf. Any object
128 that provides a get_output() method can be used here.
129 @param monte_carlo_temperature MC temp (may need to be optimized
130 based on post-sampling analysis)
131 @param simulated_annealing If True, perform simulated annealing
132 @param simulated_annealing_minimum_temperature Should generally be
133 the same as monte_carlo_temperature.
134 @param simulated_annealing_minimum_temperature_nframes Number of
135 frames to compute at minimum temperature.
136 @param simulated_annealing_maximum_temperature_nframes Number of
138 temps > simulated_annealing_maximum_temperature.
139 @param replica_exchange_minimum_temperature Low temp for REX; should
140 generally be the same as monte_carlo_temperature.
141 @param replica_exchange_maximum_temperature High temp for REX
142 @param replica_exchange_swap Boolean, enable disable temperature
144 @param num_sample_rounds Number of rounds of MC/MD per cycle
145 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146 models to keep around for analysis.
147 @param mmcif If True, write best scoring models in mmCIF format;
148 if False (the default), write in legacy PDB format.
149 @param best_pdb_dir The directory under `global_output_directory`
150 where best-scoring PDB/mmCIF files are written.
151 @param best_pdb_name_suffix Part of the file name for best-scoring
153 @param monte_carlo_steps Number of MC steps per round
154 @param self_adaptive self adaptive scheme for Monte Carlo movers
155 @param molecular_dynamics_steps Number of MD steps per round
156 @param molecular_dynamics_max_time_step Max time step for MD
157 @param number_of_frames Number of REX frames to run
158 @param save_coordinates_mode string: how to save coordinates.
159 "lowest_temperature" (default) only the lowest temperatures
161 "25th_score" all replicas whose score is below the 25th
163 "50th_score" all replicas whose score is below the 50th
165 "75th_score" all replicas whose score is below the 75th
167 @param nframes_write_coordinates How often to write the coordinates
169 @param write_initial_rmf Write the initial configuration
170 @param global_output_directory Folder that will be created to house
172 @param test_mode Set to True to avoid writing any files, just test
174 @param score_moved If True, attempt to speed up Monte Carlo
175 sampling by caching scoring function terms on particles
182 if output_objects == []:
185 self.output_objects = []
187 self.output_objects = output_objects
188 self.rmf_output_objects = rmf_output_objects
190 and not root_hier.get_parent()):
191 if self.output_objects
is not None:
192 self.output_objects.append(
194 if self.rmf_output_objects
is not None:
195 self.rmf_output_objects.append(
197 self.root_hier = root_hier
198 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
199 self.vars[
"number_of_states"] = len(states)
201 self.root_hiers = states
202 self.is_multi_state =
True
204 self.root_hier = root_hier
205 self.is_multi_state =
False
207 raise TypeError(
"Must provide System hierarchy (root_hier)")
209 self._rmf_restraints = _RMFRestraints(model,
None)
210 self.em_object_for_rmf = em_object_for_rmf
211 self.monte_carlo_sample_objects = monte_carlo_sample_objects
212 self.vars[
"self_adaptive"] = self_adaptive
213 self.molecular_dynamics_sample_objects = \
214 molecular_dynamics_sample_objects
215 self.replica_exchange_object = replica_exchange_object
216 self.molecular_dynamics_max_time_step = \
217 molecular_dynamics_max_time_step
218 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
219 self.vars[
"replica_exchange_minimum_temperature"] = \
220 replica_exchange_minimum_temperature
221 self.vars[
"replica_exchange_maximum_temperature"] = \
222 replica_exchange_maximum_temperature
223 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
224 self.vars[
"simulated_annealing"] = simulated_annealing
225 self.vars[
"simulated_annealing_minimum_temperature"] = \
226 simulated_annealing_minimum_temperature
227 self.vars[
"simulated_annealing_maximum_temperature"] = \
228 simulated_annealing_maximum_temperature
229 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
230 simulated_annealing_minimum_temperature_nframes
231 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
232 simulated_annealing_maximum_temperature_nframes
234 self.vars[
"num_sample_rounds"] = num_sample_rounds
236 "number_of_best_scoring_models"] = number_of_best_scoring_models
237 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
238 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
239 self.vars[
"number_of_frames"] = number_of_frames
240 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
241 "50th_score",
"75th_score"):
242 raise Exception(
"save_coordinates_mode has unrecognized value")
244 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
245 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
246 self.vars[
"write_initial_rmf"] = write_initial_rmf
247 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
248 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
249 self.vars[
"mmcif"] = mmcif
250 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
251 self.vars[
"do_clean_first"] = do_clean_first
252 self.vars[
"do_create_directories"] = do_create_directories
253 self.vars[
"global_output_directory"] = global_output_directory
254 self.vars[
"rmf_dir"] = rmf_dir
255 self.vars[
"best_pdb_dir"] = best_pdb_dir
256 self.vars[
"atomistic"] = atomistic
257 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
258 self.vars[
"geometries"] =
None
259 self.test_mode = test_mode
260 self.score_moved = score_moved
263 if self.vars[
"geometries"]
is None:
264 self.vars[
"geometries"] = list(geometries)
266 self.vars[
"geometries"].extend(geometries)
269 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
270 "rmfs/*.rmf3 for each replica ")
271 print(
"--- it stores the best scoring pdb models in pdbs/")
272 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
273 "lowest temperature")
274 print(
"--- variables:")
275 keys = list(self.vars.keys())
278 print(
"------", v.ljust(30), self.vars[v])
280 def get_replica_exchange_object(self):
281 return self.replica_exchange_object
283 def _add_provenance(self, sampler_md, sampler_mc):
284 """Record details about the sampling in the IMP Hierarchies"""
287 method =
"Molecular Dynamics"
288 iterations += self.vars[
"molecular_dynamics_steps"]
290 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
291 iterations += self.vars[
"monte_carlo_steps"]
293 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
295 iterations *= self.vars[
"num_sample_rounds"]
297 pi = self.model.add_particle(
"sampling")
299 self.model, pi, method, self.vars[
"number_of_frames"],
301 p.set_number_of_replicas(
302 self.replica_exchange_object.get_number_of_replicas())
303 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
306 def execute_macro(self):
307 temp_index_factor = 100000.0
311 if self.monte_carlo_sample_objects
is not None:
312 print(
"Setting up MonteCarlo")
314 self.model, self.monte_carlo_sample_objects,
315 self.vars[
"monte_carlo_temperature"],
316 score_moved=self.score_moved)
317 if self.vars[
"simulated_annealing"]:
318 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
319 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
321 "simulated_annealing_minimum_temperature_nframes"]
323 "simulated_annealing_maximum_temperature_nframes"]
324 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
325 if self.vars[
"self_adaptive"]:
326 sampler_mc.set_self_adaptive(
327 isselfadaptive=self.vars[
"self_adaptive"])
328 if self.output_objects
is not None:
329 self.output_objects.append(sampler_mc)
330 if self.rmf_output_objects
is not None:
331 self.rmf_output_objects.append(sampler_mc)
332 samplers.append(sampler_mc)
334 if self.molecular_dynamics_sample_objects
is not None:
335 print(
"Setting up MolecularDynamics")
337 self.model, self.molecular_dynamics_sample_objects,
338 self.vars[
"monte_carlo_temperature"],
339 maximum_time_step=self.molecular_dynamics_max_time_step)
340 if self.vars[
"simulated_annealing"]:
341 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
342 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
344 "simulated_annealing_minimum_temperature_nframes"]
346 "simulated_annealing_maximum_temperature_nframes"]
347 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
348 if self.output_objects
is not None:
349 self.output_objects.append(sampler_md)
350 if self.rmf_output_objects
is not None:
351 self.rmf_output_objects.append(sampler_md)
352 samplers.append(sampler_md)
355 print(
"Setting up ReplicaExchange")
357 self.model, self.vars[
"replica_exchange_minimum_temperature"],
358 self.vars[
"replica_exchange_maximum_temperature"], samplers,
359 replica_exchange_object=self.replica_exchange_object)
360 self.replica_exchange_object = rex.rem
362 myindex = rex.get_my_index()
363 if self.output_objects
is not None:
364 self.output_objects.append(rex)
365 if self.rmf_output_objects
is not None:
366 self.rmf_output_objects.append(rex)
370 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
374 globaldir = self.vars[
"global_output_directory"] +
"/"
375 rmf_dir = globaldir + self.vars[
"rmf_dir"]
376 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
378 if not self.test_mode:
379 if self.vars[
"do_clean_first"]:
382 if self.vars[
"do_create_directories"]:
385 os.makedirs(globaldir)
393 if not self.is_multi_state:
399 for n
in range(self.vars[
"number_of_states"]):
401 os.makedirs(pdb_dir +
"/" + str(n))
408 if self.output_objects
is not None:
409 self.output_objects.append(sw)
410 if self.rmf_output_objects
is not None:
411 self.rmf_output_objects.append(sw)
413 print(
"Setting up stat file")
415 low_temp_stat_file = globaldir + \
416 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
419 if not self.test_mode:
422 if not self.test_mode:
423 if self.output_objects
is not None:
424 output.init_stat2(low_temp_stat_file,
426 extralabels=[
"rmf_file",
"rmf_frame_index"])
428 print(
"Stat file writing is disabled")
430 if self.rmf_output_objects
is not None:
431 print(
"Stat info being written in the rmf file")
433 print(
"Setting up replica stat file")
434 replica_stat_file = globaldir + \
435 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
436 if not self.test_mode:
437 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
439 if not self.test_mode:
440 print(
"Setting up best pdb files")
441 if not self.is_multi_state:
442 if self.vars[
"number_of_best_scoring_models"] > 0:
443 output.init_pdb_best_scoring(
444 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
446 self.vars[
"number_of_best_scoring_models"],
447 replica_exchange=
True,
448 mmcif=self.vars[
'mmcif'],
449 best_score_file=globaldir +
"best.scores.rex.py")
450 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
452 pdb_dir +
"/" +
"model.psf",
454 self.vars[
"best_pdb_name_suffix"] + pdbext)
456 if self.vars[
"number_of_best_scoring_models"] > 0:
457 for n
in range(self.vars[
"number_of_states"]):
458 output.init_pdb_best_scoring(
459 pdb_dir +
"/" + str(n) +
"/" +
460 self.vars[
"best_pdb_name_suffix"],
462 self.vars[
"number_of_best_scoring_models"],
463 replica_exchange=
True,
464 mmcif=self.vars[
'mmcif'],
465 best_score_file=globaldir +
"best.scores.rex.py")
466 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
468 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
469 pdb_dir +
"/" + str(n) +
"/" +
470 self.vars[
"best_pdb_name_suffix"] + pdbext)
473 if self.em_object_for_rmf
is not None:
474 output_hierarchies = [
476 self.em_object_for_rmf.get_density_as_hierarchy(
479 output_hierarchies = [self.root_hier]
481 if not self.test_mode:
482 print(
"Setting up and writing initial rmf coordinate file")
483 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
484 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
486 listofobjects=self.rmf_output_objects)
487 if self._rmf_restraints:
488 output.add_restraints_to_rmf(
489 init_suffix +
"." + str(myindex) +
".rmf3",
490 self._rmf_restraints)
491 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
492 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
494 if not self.test_mode:
495 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
497 mpivs = _MockMPIValues()
499 self._add_provenance(sampler_md, sampler_mc)
501 if not self.test_mode:
502 print(
"Setting up production rmf files")
503 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
504 output.init_rmf(rmfname, output_hierarchies,
505 geometries=self.vars[
"geometries"],
506 listofobjects=self.rmf_output_objects)
508 if self._rmf_restraints:
509 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
511 ntimes_at_low_temp = 0
515 self.replica_exchange_object.set_was_used(
True)
516 nframes = self.vars[
"number_of_frames"]
519 for i
in range(nframes):
523 for nr
in range(self.vars[
"num_sample_rounds"]):
524 if sampler_md
is not None:
526 self.vars[
"molecular_dynamics_steps"])
527 if sampler_mc
is not None:
528 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
530 self.model).evaluate(
False)
531 mpivs.set_value(
"score", score)
532 output.set_output_entry(
"score", score)
534 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
536 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
537 save_frame = (min_temp_index == my_temp_index)
538 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
539 score_perc = mpivs.get_percentile(
"score")
540 save_frame = (score_perc*100.0 <= 25.0)
541 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
542 score_perc = mpivs.get_percentile(
"score")
543 save_frame = (score_perc*100.0 <= 50.0)
544 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
545 score_perc = mpivs.get_percentile(
"score")
546 save_frame = (score_perc*100.0 <= 75.0)
549 if save_frame
and not self.test_mode:
553 print(
"--- frame %s score %s " % (str(i), str(score)))
555 if not self.test_mode:
556 if i % self.vars[
"nframes_write_coordinates"] == 0:
557 print(
'--- writing coordinates')
558 if self.vars[
"number_of_best_scoring_models"] > 0:
559 output.write_pdb_best_scoring(score)
560 output.write_rmf(rmfname)
561 output.set_output_entry(
"rmf_file", rmfname)
562 output.set_output_entry(
"rmf_frame_index",
565 output.set_output_entry(
"rmf_file", rmfname)
566 output.set_output_entry(
"rmf_frame_index",
'-1')
567 if self.output_objects
is not None:
568 output.write_stat2(low_temp_stat_file)
569 ntimes_at_low_temp += 1
571 if not self.test_mode:
572 output.write_stat2(replica_stat_file)
573 if self.vars[
"replica_exchange_swap"]:
574 rex.swap_temp(i, score)
575 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
576 p.add_replica_exchange(state, self)
578 if not self.test_mode:
579 print(
"closing production rmf files")
580 output.close_rmf(rmfname)
584 """A macro to build a IMP::pmi::topology::System based on a
585 TopologyReader object.
587 Easily create multi-state systems by calling this macro
588 repeatedly with different TopologyReader objects!
589 A useful function is get_molecules() which returns the PMI Molecules
590 grouped by state as a dictionary with key = (molecule name),
591 value = IMP.pmi.topology.Molecule
592 Quick multi-state system:
595 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
596 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
597 bs = IMP.pmi.macros.BuildSystem(model)
598 bs.add_state(reader1)
599 bs.add_state(reader2)
600 bs.execute_macro() # build everything including degrees of freedom
601 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
602 ### now you have a two state system, you add restraints etc
604 @note The "domain name" entry of the topology reader is not used.
605 All molecules are set up by the component name, but split into rigid bodies
609 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
610 'RNA': IMP.pmi.alphabets.rna}
612 def __init__(self, model, sequence_connectivity_scale=4.0,
613 force_create_gmm_files=
False, resolutions=[1, 10],
616 @param model An IMP Model
617 @param sequence_connectivity_scale For scaling the connectivity
619 @param force_create_gmm_files If True, will sample and create GMMs
620 no matter what. If False, will only sample if the
621 files don't exist. If number of Gaussians is zero, won't
623 @param resolutions The resolutions to build for structured regions
624 @param name The name of the top-level hierarchy node.
631 self._domain_res = []
633 self.force_create_gmm_files = force_create_gmm_files
634 self.resolutions = resolutions
636 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
638 """Add a state using the topology info in a
639 IMP::pmi::topology::TopologyReader object.
640 When you are done adding states, call execute_macro()
641 @param reader The TopologyReader object
642 @param fasta_name_map dictionary for converting protein names
643 found in the fasta file
644 @param chain_ids A list or string of chain IDs for assigning to
645 newly-created molecules, e.g.
646 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
647 If not specified, chain IDs A through Z are assigned, then
648 AA through AZ, then BA through BZ, and so on, in the same
651 state = self.system.create_state()
652 self._readers.append(reader)
654 these_domain_res = {}
656 if chain_ids
is None:
657 chain_ids = IMP.pmi.output._ChainIDs()
662 for molname
in reader.get_molecules():
663 copies = reader.get_molecules()[molname].domains
664 for nc, copyname
in enumerate(copies):
665 print(
"BuildSystem.add_state: setting up molecule %s copy "
666 "number %s" % (molname, str(nc)))
667 copy = copies[copyname]
670 all_chains = [c
for c
in copy
if c.chain
is not None]
672 chain_id = all_chains[0].chain
674 chain_id = chain_ids[numchain]
676 "No PDBs specified for %s, so keep_chain_id has "
677 "no effect; using default chain ID '%s'"
680 chain_id = chain_ids[numchain]
682 alphabet = IMP.pmi.alphabets.amino_acid
683 fasta_flag = copy[0].fasta_flag
684 if fasta_flag
in self._alphabets:
685 alphabet = self._alphabets[fasta_flag]
687 copy[0].fasta_file, fasta_name_map)
688 seq = seqs[copy[0].fasta_id]
689 print(
"BuildSystem.add_state: molecule %s sequence has "
690 "%s residues" % (molname, len(seq)))
691 orig_mol = state.create_molecule(
692 molname, seq, chain_id, alphabet=alphabet,
693 uniprot=seqs.uniprot.get(copy[0].fasta_id))
697 print(
"BuildSystem.add_state: creating a copy for "
698 "molecule %s" % molname)
699 mol = orig_mol.create_copy(chain_id)
702 for domainnumber, domain
in enumerate(copy):
703 print(
"BuildSystem.add_state: ---- setting up domain %s "
704 "of molecule %s" % (domainnumber, molname))
707 these_domains[domain.get_unique_name()] = domain
708 if domain.residue_range == []
or \
709 domain.residue_range
is None:
710 domain_res = mol.get_residues()
712 start = domain.residue_range[0]+domain.pdb_offset
713 if domain.residue_range[1] ==
'END':
714 end = len(mol.sequence)
716 end = domain.residue_range[1]+domain.pdb_offset
717 domain_res = mol.residue_range(start-1, end-1)
718 print(
"BuildSystem.add_state: -------- domain %s of "
719 "molecule %s extends from residue %s to "
721 % (domainnumber, molname, start, end))
722 if domain.pdb_file ==
"BEADS":
723 print(
"BuildSystem.add_state: -------- domain %s of "
724 "molecule %s represented by BEADS "
725 % (domainnumber, molname))
726 mol.add_representation(
728 resolutions=[domain.bead_size],
729 setup_particles_as_densities=(
730 domain.em_residues_per_gaussian != 0),
732 these_domain_res[domain.get_unique_name()] = \
734 elif domain.pdb_file ==
"IDEAL_HELIX":
735 print(
"BuildSystem.add_state: -------- domain %s of "
736 "molecule %s represented by IDEAL_HELIX "
737 % (domainnumber, molname))
738 emper = domain.em_residues_per_gaussian
739 mol.add_representation(
741 resolutions=self.resolutions,
743 density_residues_per_component=emper,
744 density_prefix=domain.density_prefix,
745 density_force_compute=self.force_create_gmm_files,
747 these_domain_res[domain.get_unique_name()] = \
750 print(
"BuildSystem.add_state: -------- domain %s of "
751 "molecule %s represented by pdb file %s "
752 % (domainnumber, molname, domain.pdb_file))
753 domain_atomic = mol.add_structure(domain.pdb_file,
755 domain.residue_range,
758 domain_non_atomic = domain_res - domain_atomic
759 if not domain.em_residues_per_gaussian:
760 mol.add_representation(
761 domain_atomic, resolutions=self.resolutions,
763 if len(domain_non_atomic) > 0:
764 mol.add_representation(
766 resolutions=[domain.bead_size],
769 print(
"BuildSystem.add_state: -------- domain %s "
770 "of molecule %s represented by gaussians "
771 % (domainnumber, molname))
772 emper = domain.em_residues_per_gaussian
773 creategmm = self.force_create_gmm_files
774 mol.add_representation(
776 resolutions=self.resolutions,
777 density_residues_per_component=emper,
778 density_prefix=domain.density_prefix,
779 density_force_compute=creategmm,
781 if len(domain_non_atomic) > 0:
782 mol.add_representation(
784 resolutions=[domain.bead_size],
785 setup_particles_as_densities=
True,
787 these_domain_res[domain.get_unique_name()] = (
788 domain_atomic, domain_non_atomic)
789 self._domain_res.append(these_domain_res)
790 self._domains.append(these_domains)
791 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
795 """Return list of all molecules grouped by state.
796 For each state, it's a dictionary of Molecules where key is the
799 return [s.get_molecules()
for s
in self.system.get_states()]
801 def get_molecule(self, molname, copy_index=0, state_index=0):
802 return self.system.get_states()[state_index].
get_molecules()[
806 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
807 """Builds representations and sets up degrees of freedom"""
808 print(
"BuildSystem.execute_macro: building representations")
809 self.root_hier = self.system.build()
811 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
813 for nstate, reader
in enumerate(self._readers):
814 rbs = reader.get_rigid_bodies()
815 srbs = reader.get_super_rigid_bodies()
816 csrbs = reader.get_chains_of_super_rigid_bodies()
819 domains_in_rbs = set()
821 print(
"BuildSystem.execute_macro: -------- building rigid "
822 "body %s" % (str(rblist)))
823 all_res = IMP.pmi.tools.OrderedSet()
824 bead_res = IMP.pmi.tools.OrderedSet()
826 domain = self._domains[nstate][dname]
827 print(
"BuildSystem.execute_macro: -------- adding %s"
829 all_res |= self._domain_res[nstate][dname][0]
830 bead_res |= self._domain_res[nstate][dname][1]
831 domains_in_rbs.add(dname)
833 print(
"BuildSystem.execute_macro: -------- creating rigid "
834 "body with max_trans %s max_rot %s "
835 "non_rigid_max_trans %s"
836 % (str(max_rb_trans), str(max_rb_rot),
837 str(max_bead_trans)))
838 self.dof.create_rigid_body(all_res,
839 nonrigid_parts=bead_res,
840 max_trans=max_rb_trans,
842 nonrigid_max_trans=max_bead_trans,
843 name=
"RigidBody %s" % dname)
846 for dname, domain
in self._domains[nstate].items():
847 if dname
not in domains_in_rbs:
848 if domain.pdb_file !=
"BEADS":
850 "No rigid bodies set for %s. Residues read from "
851 "the PDB file will not be sampled - only regions "
852 "missing from the PDB will be treated flexibly. "
853 "To sample the entire sequence, use BEADS instead "
854 "of a PDB file name" % dname,
856 self.dof.create_flexible_beads(
857 self._domain_res[nstate][dname][1],
858 max_trans=max_bead_trans)
862 print(
"BuildSystem.execute_macro: -------- building "
863 "super rigid body %s" % (str(srblist)))
864 all_res = IMP.pmi.tools.OrderedSet()
865 for dname
in srblist:
866 print(
"BuildSystem.execute_macro: -------- adding %s"
868 all_res |= self._domain_res[nstate][dname][0]
869 all_res |= self._domain_res[nstate][dname][1]
871 print(
"BuildSystem.execute_macro: -------- creating super "
872 "rigid body with max_trans %s max_rot %s "
873 % (str(max_srb_trans), str(max_srb_rot)))
874 self.dof.create_super_rigid_body(
875 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
878 for csrblist
in csrbs:
879 all_res = IMP.pmi.tools.OrderedSet()
880 for dname
in csrblist:
881 all_res |= self._domain_res[nstate][dname][0]
882 all_res |= self._domain_res[nstate][dname][1]
883 all_res = list(all_res)
884 all_res.sort(key=
lambda r: r.get_index())
885 self.dof.create_main_chain_mover(all_res)
886 return self.root_hier, self.dof
891 """A macro for running all the basic operations of analysis.
892 Includes clustering, precision analysis, and making ensemble density maps.
893 A number of plots are also supported.
896 merge_directories=[
"./"],
897 stat_file_name_suffix=
"stat",
898 best_pdb_name_suffix=
"model",
900 do_create_directories=
True,
901 global_output_directory=
"output/",
902 replica_stat_file_suffix=
"stat_replica",
903 global_analysis_result_directory=
"./analysis/",
906 @param model The IMP model
907 @param stat_file_name_suffix
908 @param merge_directories The directories containing output files
909 @param best_pdb_name_suffix
910 @param do_clean_first
911 @param do_create_directories
912 @param global_output_directory Where everything is
913 @param replica_stat_file_suffix
914 @param global_analysis_result_directory
915 @param test_mode If True, nothing is changed on disk
919 from mpi4py
import MPI
920 self.comm = MPI.COMM_WORLD
921 self.rank = self.comm.Get_rank()
922 self.number_of_processes = self.comm.size
925 self.number_of_processes = 1
927 self.test_mode = test_mode
928 self._protocol_output = []
929 self.cluster_obj =
None
931 stat_dir = global_output_directory
934 for rd
in merge_directories:
935 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
936 if len(stat_files) == 0:
937 warnings.warn(
"no stat files found in %s"
938 % os.path.join(rd, stat_dir),
940 self.stat_files += stat_files
943 """Capture details of the modeling protocol.
944 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
947 self._protocol_output.append((p, p._last_state))
950 score_key=
"Total_Score",
951 rmf_file_key=
"rmf_file",
952 rmf_file_frame_key=
"rmf_frame_index",
955 nframes_trajectory=10000):
956 """ Get a trajectory of the modeling run, for generating
959 @param score_key The score for ranking models
960 @param rmf_file_key Key pointing to RMF filename
961 @param rmf_file_frame_key Key pointing to RMF frame number
962 @param outputdir The local output directory used in the run
963 @param get_every Extract every nth frame
964 @param nframes_trajectory Total number of frames of the trajectory
969 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
971 score_list = list(map(float, trajectory_models[2]))
973 max_score = max(score_list)
974 min_score = min(score_list)
976 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
977 for i
in range(nframes_trajectory)]
978 binned_scores = [
None]*nframes_trajectory
979 binned_model_indexes = [-1]*nframes_trajectory
981 for model_index, s
in enumerate(score_list):
982 bins_score_diffs = [abs(s-b)
for b
in bins]
983 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
984 if binned_scores[bin_index]
is None:
985 binned_scores[bin_index] = s
986 binned_model_indexes[bin_index] = model_index
988 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
989 new_diff = abs(s-bins[bin_index])
990 if new_diff < old_diff:
991 binned_scores[bin_index] = s
992 binned_model_indexes[bin_index] = model_index
995 print(binned_model_indexes)
997 def _expand_ambiguity(self, prot, d):
998 """If using PMI2, expand the dictionary to include copies as
1001 This also keeps the states separate.
1006 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1009 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1010 if isinstance(val, tuple):
1018 for nst
in range(len(states)):
1020 copies = sel.get_selected_particles(with_representation=
False)
1022 for nc
in range(len(copies)):
1024 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1025 (start, stop, name, nc, nst)
1027 newdict[
'%s..%i' % (name, nc)] = \
1028 (start, stop, name, nc, nst)
1034 score_key=
"Total_Score",
1035 rmf_file_key=
"rmf_file",
1036 rmf_file_frame_key=
"rmf_frame_index",
1038 prefiltervalue=
None,
1041 alignment_components=
None,
1042 number_of_best_scoring_models=10,
1043 rmsd_calculation_components=
None,
1044 distance_matrix_file=
'distances.mat',
1045 load_distance_matrix_file=
False,
1046 skip_clustering=
False,
1047 number_of_clusters=1,
1049 exit_after_display=
True,
1051 first_and_last_frames=
None,
1052 density_custom_ranges=
None,
1053 write_pdb_with_centered_coordinates=
False,
1055 """Get the best scoring models, compute a distance matrix,
1056 cluster them, and create density maps.
1058 Tuple format: "molname" just the molecule,
1059 or (start,stop,molname,copy_num(optional),state_num(optional)
1060 Can pass None for copy or state to ignore that field.
1061 If you don't pass a specific copy number
1063 @param score_key The score for ranking models.
1064 @param rmf_file_key Key pointing to RMF filename
1065 @param rmf_file_frame_key Key pointing to RMF frame number
1066 @param state_number State number to analyze
1067 @param prefiltervalue Only include frames where the
1068 score key is below this value
1069 @param feature_keys Keywords for which you want to
1070 calculate average, medians, etc.
1071 If you pass "Keyname" it'll include everything that matches
1073 @param outputdir The local output directory used in
1075 @param alignment_components Dictionary with keys=groupname,
1076 values are tuples for aligning the structures
1077 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1078 @param number_of_best_scoring_models Num models to keep per run
1079 @param rmsd_calculation_components For calculating RMSD
1080 (same format as alignment_components)
1081 @param distance_matrix_file Where to store/read the
1083 @param load_distance_matrix_file Try to load the distance
1085 @param skip_clustering Just extract the best scoring
1086 models and save the pdbs
1087 @param number_of_clusters Number of k-means clusters
1088 @param display_plot Display the distance matrix
1089 @param exit_after_display Exit after displaying distance
1091 @param get_every Extract every nth frame
1092 @param first_and_last_frames A tuple with the first and last
1093 frames to be analyzed. Values are percentages!
1094 Default: get all frames
1095 @param density_custom_ranges For density calculation
1096 (same format as alignment_components)
1097 @param write_pdb_with_centered_coordinates
1098 @param voxel_size Used for the density output
1102 self._outputdir = Path(outputdir).absolute()
1103 self._number_of_clusters = number_of_clusters
1104 for p, state
in self._protocol_output:
1105 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1116 if not load_distance_matrix_file:
1117 if len(self.stat_files) == 0:
1118 print(
"ERROR: no stat file found in the given path")
1120 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1121 self.stat_files, self.number_of_processes)[self.rank]
1124 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1125 if k
in feature_keys:
1127 "no need to pass " + k +
" to feature_keys.",
1129 feature_keys.remove(k)
1132 my_stat_files, score_key, feature_keys, rmf_file_key,
1133 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1134 rmf_file_list = best_models[0]
1135 rmf_file_frame_list = best_models[1]
1136 score_list = best_models[2]
1137 feature_keyword_list_dict = best_models[3]
1143 if self.number_of_processes > 1:
1147 rmf_file_frame_list)
1148 for k
in feature_keyword_list_dict:
1149 feature_keyword_list_dict[k] = \
1151 feature_keyword_list_dict[k])
1154 score_rmf_tuples = list(zip(score_list,
1156 rmf_file_frame_list,
1157 list(range(len(score_list)))))
1159 if density_custom_ranges:
1160 for k
in density_custom_ranges:
1161 if not isinstance(density_custom_ranges[k], list):
1162 raise Exception(
"Density custom ranges: values must "
1163 "be lists of tuples")
1166 if first_and_last_frames
is not None:
1167 nframes = len(score_rmf_tuples)
1168 first_frame = int(first_and_last_frames[0] * nframes)
1169 last_frame = int(first_and_last_frames[1] * nframes)
1170 if last_frame > len(score_rmf_tuples):
1172 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1175 best_score_rmf_tuples = sorted(
1177 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1178 best_score_rmf_tuples = [t+(n,)
for n, t
in
1179 enumerate(best_score_rmf_tuples)]
1181 prov.append(IMP.pmi.io.FilterProvenance(
1182 "Best scoring", 0, number_of_best_scoring_models))
1184 best_score_feature_keyword_list_dict = defaultdict(list)
1185 for tpl
in best_score_rmf_tuples:
1187 for f
in feature_keyword_list_dict:
1188 best_score_feature_keyword_list_dict[f].append(
1189 feature_keyword_list_dict[f][index])
1190 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1191 best_score_rmf_tuples,
1192 self.number_of_processes)[self.rank]
1195 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1196 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1197 if rmsd_calculation_components
is not None:
1198 tmp = self._expand_ambiguity(
1199 prot_ahead, rmsd_calculation_components)
1200 if tmp != rmsd_calculation_components:
1201 print(
'Detected ambiguity, expand rmsd components to',
1203 rmsd_calculation_components = tmp
1204 if alignment_components
is not None:
1205 tmp = self._expand_ambiguity(prot_ahead,
1206 alignment_components)
1207 if tmp != alignment_components:
1208 print(
'Detected ambiguity, expand alignment '
1209 'components to', tmp)
1210 alignment_components = tmp
1216 self.model, my_best_score_rmf_tuples[0],
1217 rmsd_calculation_components, state_number=state_number)
1219 self.model, my_best_score_rmf_tuples, alignment_components,
1220 rmsd_calculation_components, state_number=state_number)
1228 all_coordinates = got_coords[0]
1231 alignment_coordinates = got_coords[1]
1234 rmsd_coordinates = got_coords[2]
1237 rmf_file_name_index_dict = got_coords[3]
1240 all_rmf_file_names = got_coords[4]
1246 if density_custom_ranges:
1248 density_custom_ranges, voxel=voxel_size)
1250 dircluster = os.path.join(outputdir,
1251 "all_models."+str(self.rank))
1257 os.mkdir(dircluster)
1260 clusstat = open(os.path.join(
1261 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1262 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1264 rmf_frame_number = tpl[2]
1267 for key
in best_score_feature_keyword_list_dict:
1269 best_score_feature_keyword_list_dict[key][index]
1273 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1274 self.model, rmf_frame_number, rmf_name)
1276 linking_successful = \
1277 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1278 self.model, prots, rs, rmf_frame_number,
1280 if not linking_successful:
1286 states = IMP.atom.get_by_type(
1287 prots[0], IMP.atom.STATE_TYPE)
1288 prot = states[state_number]
1293 coords_f1 = alignment_coordinates[cnt]
1295 coords_f2 = alignment_coordinates[cnt]
1298 coords_f1, coords_f2)
1299 transformation = Ali.align()[1]
1313 rb = rbm.get_rigid_body()
1323 out_pdb_fn = os.path.join(
1324 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1325 out_rmf_fn = os.path.join(
1326 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1327 o.init_pdb(out_pdb_fn, prot)
1328 tc = write_pdb_with_centered_coordinates
1329 o.write_pdb(out_pdb_fn,
1330 translate_to_geometric_center=tc)
1332 tmp_dict[
"local_pdb_file_name"] = \
1333 os.path.basename(out_pdb_fn)
1334 tmp_dict[
"rmf_file_full_path"] = rmf_name
1335 tmp_dict[
"local_rmf_file_name"] = \
1336 os.path.basename(out_rmf_fn)
1337 tmp_dict[
"local_rmf_frame_number"] = 0
1339 clusstat.write(str(tmp_dict)+
"\n")
1344 h.set_name(
"System")
1346 o.init_rmf(out_rmf_fn, [h], rs)
1348 o.write_rmf(out_rmf_fn)
1349 o.close_rmf(out_rmf_fn)
1351 if density_custom_ranges:
1352 DensModule.add_subunits_density(prot)
1354 if density_custom_ranges:
1355 DensModule.write_mrc(path=dircluster)
1360 if self.number_of_processes > 1:
1366 rmf_file_name_index_dict)
1368 alignment_coordinates)
1375 [best_score_feature_keyword_list_dict,
1376 rmf_file_name_index_dict],
1382 print(
"setup clustering class")
1385 for n, model_coordinate_dict
in enumerate(all_coordinates):
1387 if (alignment_components
is not None
1388 and len(self.cluster_obj.all_coords) == 0):
1390 self.cluster_obj.set_template(alignment_coordinates[n])
1391 self.cluster_obj.fill(all_rmf_file_names[n],
1392 rmsd_coordinates[n])
1393 print(
"Global calculating the distance matrix")
1396 self.cluster_obj.dist_matrix()
1400 self.cluster_obj.do_cluster(number_of_clusters)
1403 self.cluster_obj.plot_matrix(
1404 figurename=os.path.join(outputdir,
1406 if exit_after_display:
1408 self.cluster_obj.save_distance_matrix_file(
1409 file_name=distance_matrix_file)
1416 print(
"setup clustering class")
1418 self.cluster_obj.load_distance_matrix_file(
1419 file_name=distance_matrix_file)
1420 print(
"clustering with %s clusters" % str(number_of_clusters))
1421 self.cluster_obj.do_cluster(number_of_clusters)
1422 [best_score_feature_keyword_list_dict,
1423 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1426 self.cluster_obj.plot_matrix(figurename=os.path.join(
1427 outputdir,
'dist_matrix.pdf'))
1428 if exit_after_display:
1430 if self.number_of_processes > 1:
1438 print(self.cluster_obj.get_cluster_labels())
1439 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1440 print(
"rank %s " % str(self.rank))
1441 print(
"cluster %s " % str(n))
1442 print(
"cluster label %s " % str(cl))
1443 print(self.cluster_obj.get_cluster_label_names(cl))
1445 len(self.cluster_obj.get_cluster_label_names(cl))
1447 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1450 if density_custom_ranges:
1452 density_custom_ranges,
1455 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1457 os.mkdir(dircluster)
1463 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1464 clusstat = open(dircluster +
"stat.out",
"w")
1465 for k, structure_name
in enumerate(
1466 self.cluster_obj.get_cluster_label_names(cl)):
1469 tmp_dict.update(rmsd_dict)
1470 index = rmf_file_name_index_dict[structure_name]
1471 for key
in best_score_feature_keyword_list_dict:
1473 key] = best_score_feature_keyword_list_dict[
1479 rmf_name = structure_name.split(
"|")[0]
1480 rmf_frame_number = int(structure_name.split(
"|")[1])
1481 clusstat.write(str(tmp_dict) +
"\n")
1486 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1487 self.model, rmf_frame_number, rmf_name)
1489 linking_successful = \
1490 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1491 self.model, prots, rs, rmf_frame_number,
1493 if not linking_successful:
1498 states = IMP.atom.get_by_type(
1499 prots[0], IMP.atom.STATE_TYPE)
1500 prot = states[state_number]
1506 co = self.cluster_obj
1507 model_index = co.get_model_index_from_name(
1509 transformation = co.get_transformation_to_first_member(
1520 rb = rbm.get_rigid_body()
1529 if density_custom_ranges:
1530 DensModule.add_subunits_density(prot)
1535 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1536 o.write_pdb(dircluster + str(k) +
".pdb")
1541 h.set_name(
"System")
1543 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1544 o.write_rmf(dircluster + str(k) +
".rmf3")
1545 o.close_rmf(dircluster + str(k) +
".rmf3")
1550 if density_custom_ranges:
1551 DensModule.write_mrc(path=dircluster)
1554 if self.number_of_processes > 1:
1557 def get_cluster_rmsd(self, cluster_num):
1558 if self.cluster_obj
is None:
1560 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1562 def save_objects(self, objects, file_name):
1564 outf = open(file_name,
'wb')
1565 pickle.dump(objects, outf)
1568 def load_objects(self, file_name):
1570 inputf = open(file_name,
'rb')
1571 objects = pickle.load(inputf)
1579 This class contains analysis utilities to investigate ReplicaExchange
1587 def __init__(self, model, stat_files, best_models=None, score_key=None,
1590 Construction of the Class.
1591 @param model IMP.Model()
1592 @param stat_files list of string. Can be ascii stat files,
1594 @param best_models Integer. Number of best scoring models,
1595 if None: all models will be read
1596 @param alignment boolean (Default=True). Align before computing
1601 self.best_models = best_models
1603 model, stat_files, self.best_models, score_key, cache=
True)
1605 StatHierarchyHandler=self.stath0)
1618 self.clusters.append(c)
1619 for n0
in range(len(self.stath0)):
1621 self.pairwise_rmsd = {}
1622 self.pairwise_molecular_assignment = {}
1623 self.alignment = alignment
1624 self.symmetric_molecules = {}
1625 self.issymmetricsel = {}
1627 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1629 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1634 Setup the selection onto which the rmsd is computed
1635 @param kwargs use IMP.atom.Selection keywords
1643 Store names of symmetric molecules
1645 self.symmetric_molecules[molecule_name] = 0
1650 Setup the selection onto which the alignment is computed
1651 @param kwargs use IMP.atom.Selection keywords
1659 def clean_clusters(self):
1660 for c
in self.clusters:
1664 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1666 Cluster the models based on RMSD.
1667 @param rmsd_cutoff Float the distance cutoff in Angstrom
1668 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1669 be used to compute rmsds
1671 self.clean_clusters()
1672 not_clustered = set(range(len(self.stath1)))
1673 while len(not_clustered) > 0:
1674 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1679 Refine the clusters by merging the ones whose centers are close
1680 @param rmsd_cutoff cutoff distance in Angstorms
1682 clusters_copy = self.clusters
1683 for c0, c1
in itertools.combinations(self.clusters, 2):
1684 if c0.center_index
is None:
1686 if c1.center_index
is None:
1688 _ = self.stath0[c0.center_index]
1689 _ = self.stath1[c1.center_index]
1690 rmsd, molecular_assignment = self.
rmsd()
1691 if rmsd <= rmsd_cutoff:
1692 if c1
in self.clusters:
1693 clusters_copy.remove(c1)
1695 self.clusters = clusters_copy
1702 def set_cluster_assignments(self, cluster_ids):
1703 if len(cluster_ids) != len(self.stath0):
1704 raise ValueError(
'cluster ids has to be same length as '
1708 for i
in sorted(list(set(cluster_ids))):
1710 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1711 self.clusters[idx].add_member(i, d)
1715 Return the model data from a cluster
1716 @param cluster IMP.pmi.output.Cluster object
1725 Save the data for the whole models into a pickle file
1726 @param filename string
1728 self.stath0.save_data(filename)
1732 Set the data from an external IMP.pmi.output.Data
1733 @param data IMP.pmi.output.Data
1735 self.stath0.data = data
1736 self.stath1.data = data
1740 Load the data from an external pickled file
1741 @param filename string
1743 self.stath0.load_data(filename)
1744 self.stath1.load_data(filename)
1745 self.best_models = len(self.stath0)
1747 def add_cluster(self, rmf_name_list):
1749 print(
"creating cluster index "+str(len(self.clusters)))
1750 self.clusters.append(c)
1751 current_len = len(self.stath0)
1753 for rmf
in rmf_name_list:
1754 print(
"adding rmf "+rmf)
1755 self.stath0.add_stat_file(rmf)
1756 self.stath1.add_stat_file(rmf)
1758 for n0
in range(current_len, len(self.stath0)):
1759 d0 = self.stath0[n0]
1760 c.add_member(n0, d0)
1765 Save the clusters into a pickle file
1766 @param filename string
1769 import cPickle
as pickle
1772 fl = open(filename,
'wb')
1773 pickle.dump(self.clusters, fl)
1777 Load the clusters from a pickle file
1778 @param filename string
1779 @param append bool (Default=False), if True. append the clusters
1780 to the ones currently present
1783 import cPickle
as pickle
1786 fl = open(filename,
'rb')
1787 self.clean_clusters()
1789 self.clusters += pickle.load(fl)
1791 self.clusters = pickle.load(fl)
1800 Compute the cluster center for a given cluster
1802 member_distance = defaultdict(float)
1804 for n0, n1
in itertools.combinations(cluster.members, 2):
1807 rmsd, _ = self.
rmsd()
1808 member_distance[n0] += rmsd
1810 if len(member_distance) > 0:
1811 cluster.center_index = min(member_distance,
1812 key=member_distance.get)
1814 cluster.center_index = cluster.members[0]
1819 Save the coordinates of the current cluster a single rmf file
1821 print(
"saving coordinates", cluster)
1825 if rmf_name
is None:
1826 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1828 _ = self.stath1[cluster.members[0]]
1830 o.init_rmf(rmf_name, [self.stath1])
1831 for n1
in cluster.members:
1837 o.write_rmf(rmf_name)
1839 o.close_rmf(rmf_name)
1843 remove structures that are similar
1844 append it to a new cluster
1846 print(
"pruning models")
1848 filtered = [selected]
1849 remaining = range(1, len(self.stath1), 10)
1851 while len(remaining) > 0:
1852 d0 = self.stath0[selected]
1854 for n1
in remaining:
1859 if d <= rmsd_cutoff:
1861 print(
"pruning model %s, similar to model %s, rmsd %s"
1862 % (str(n1), str(selected), str(d)))
1863 remaining = [x
for x
in remaining
if x
not in rm]
1864 if len(remaining) == 0:
1866 selected = remaining[0]
1867 filtered.append(selected)
1870 self.clusters.append(c)
1872 d0 = self.stath0[n0]
1873 c.add_member(n0, d0)
1878 Compute the precision of a cluster
1884 if cluster.center_index
is not None:
1885 members1 = [cluster.center_index]
1887 members1 = cluster.members
1891 for n1
in cluster.members:
1896 tmp_rmsd, _ = self.
rmsd()
1901 precision = rmsd/npairs
1902 cluster.precision = precision
1907 Compute the bipartite precision (ie the cross-precision)
1908 between two clusters
1912 for cn0, n0
in enumerate(cluster1.members):
1914 for cn1, n1
in enumerate(cluster2.members):
1916 tmp_rmsd, _ = self.
rmsd()
1918 print(
"--- rmsd between structure %s and structure "
1919 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1922 precision = rmsd/npairs
1925 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1926 cluster_ref=
None, step=1):
1928 Compute the Root mean square fluctuations
1929 of a molecule in a cluster
1930 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1931 residue indexes and the value is the rmsf
1933 rmsf = IMP.pmi.tools.OrderedDict()
1936 if cluster_ref
is not None:
1937 if cluster_ref.center_index
is not None:
1938 members0 = [cluster_ref.center_index]
1940 members0 = cluster_ref.members
1942 if cluster.center_index
is not None:
1943 members0 = [cluster.center_index]
1945 members0 = cluster.members
1948 copy_index=copy_index, state_index=state_index)
1949 ps0 = s0.get_selected_particles()
1951 residue_indexes = list(IMP.pmi.tools.OrderedSet(
1957 d0 = self.stath0[n0]
1958 for n1
in cluster.members[::step]:
1960 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
1964 self.stath1, molecule=molecule,
1965 residue_indexes=residue_indexes, resolution=1,
1966 copy_index=copy_index, state_index=state_index)
1967 ps1 = s1.get_selected_particles()
1969 d1 = self.stath1[n1]
1972 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
1973 r = residue_indexes[n]
1985 for stath
in [self.stath0, self.stath1]:
1986 if molecule
not in self.symmetric_molecules:
1988 stath, molecule=molecule, residue_index=r,
1989 resolution=1, copy_index=copy_index,
1990 state_index=state_index)
1993 stath, molecule=molecule, residue_index=r,
1994 resolution=1, state_index=state_index)
1996 ps = s.get_selected_particles()
2005 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2006 reference=
"Absolute", prefix=
"./", step=1):
2012 for n1
in cluster.members[::step]:
2013 print(
"density "+str(n1))
2018 dens.add_subunits_density(self.stath1)
2020 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2023 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2024 consolidate=
False, molecules=
None, prefix=
'./',
2025 reference=
"Absolute"):
2029 import matplotlib.pyplot
as plt
2030 import matplotlib.cm
as cm
2031 from scipy.spatial.distance
import cdist
2033 if molecules
is None:
2042 molecules=molecules).get_selected_particles())]
2043 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2044 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2045 total_len_unique = sum(max(mol.get_residue_indexes())
2046 for mol
in unique_copies)
2053 seqlen = max(mol.get_residue_indexes())
2054 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2058 for mol
in unique_copies:
2059 seqlen = max(mol.get_residue_indexes())
2060 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2063 for ncl, n1
in enumerate(cluster.members):
2066 coord_dict = IMP.pmi.tools.OrderedDict()
2068 rindexes = mol.get_residue_indexes()
2069 coords = np.ones((max(rindexes), 3))
2070 for rnum
in rindexes:
2073 selpart = sel.get_selected_particles()
2074 if len(selpart) == 0:
2076 selpart = selpart[0]
2077 coords[rnum - 1, :] = \
2079 coord_dict[mol] = coords
2082 coords = np.concatenate(list(coord_dict.values()))
2083 dists = cdist(coords, coords)
2084 binary_dists = np.where((dists <= contact_threshold)
2085 & (dists >= 1.0), 1.0, 0.0)
2087 binary_dists_dict = {}
2089 len1 = max(mol1.get_residue_indexes())
2091 name1 = mol1.get_name()
2092 name2 = mol2.get_name()
2093 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2094 if (name1, name2)
not in binary_dists_dict:
2095 binary_dists_dict[(name1, name2)] = \
2096 np.zeros((len1, len1))
2097 binary_dists_dict[(name1, name2)] += \
2098 np.where((dists <= contact_threshold)
2099 & (dists >= 1.0), 1.0, 0.0)
2100 binary_dists = np.zeros((total_len_unique, total_len_unique))
2102 for name1, name2
in binary_dists_dict:
2103 r1 = index_dict[mol_names_unique[name1]]
2104 r2 = index_dict[mol_names_unique[name2]]
2105 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2106 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2112 contact_freqs = binary_dists
2114 dist_maps.append(dists)
2115 av_dist_map += dists
2116 contact_freqs += binary_dists
2119 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2121 contact_freqs = 1.0/len(cluster)*contact_freqs
2122 av_dist_map = 1.0/len(cluster)*contact_freqs
2124 fig = plt.figure(figsize=(100, 100))
2125 ax = fig.add_subplot(111)
2128 gap_between_components = 50
2133 sorted_tuple = sorted(
2135 mol).get_extended_name(), mol)
for mol
in mols)
2136 prot_list = list(zip(*sorted_tuple))[1]
2138 sorted_tuple = sorted(
2140 for mol
in unique_copies)
2141 prot_list = list(zip(*sorted_tuple))[1]
2143 prot_listx = prot_list
2144 nresx = gap_between_components + \
2145 sum([max(mol.get_residue_indexes())
2146 + gap_between_components
for mol
in prot_listx])
2149 prot_listy = prot_list
2150 nresy = gap_between_components + \
2151 sum([max(mol.get_residue_indexes())
2152 + gap_between_components
for mol
in prot_listy])
2157 res = gap_between_components
2158 for mol
in prot_listx:
2159 resoffsetx[mol] = res
2160 res += max(mol.get_residue_indexes())
2162 res += gap_between_components
2166 res = gap_between_components
2167 for mol
in prot_listy:
2168 resoffsety[mol] = res
2169 res += max(mol.get_residue_indexes())
2171 res += gap_between_components
2173 resoffsetdiagonal = {}
2174 res = gap_between_components
2175 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2176 resoffsetdiagonal[mol] = res
2177 res += max(mol.get_residue_indexes())
2178 res += gap_between_components
2183 for n, prot
in enumerate(prot_listx):
2184 res = resoffsetx[prot]
2186 for proty
in prot_listy:
2187 resy = resoffsety[proty]
2188 endy = resendy[proty]
2189 ax.plot([res, res], [resy, endy], linestyle=
'-',
2190 color=
'gray', lw=0.4)
2191 ax.plot([end, end], [resy, endy], linestyle=
'-',
2192 color=
'gray', lw=0.4)
2193 xticks.append((float(res) + float(end)) / 2)
2195 prot).get_extended_name())
2199 for n, prot
in enumerate(prot_listy):
2200 res = resoffsety[prot]
2202 for protx
in prot_listx:
2203 resx = resoffsetx[protx]
2204 endx = resendx[protx]
2205 ax.plot([resx, endx], [res, res], linestyle=
'-',
2206 color=
'gray', lw=0.4)
2207 ax.plot([resx, endx], [end, end], linestyle=
'-',
2208 color=
'gray', lw=0.4)
2209 yticks.append((float(res) + float(end)) / 2)
2211 prot).get_extended_name())
2215 tmp_array = np.zeros((nresx, nresy))
2217 for px
in prot_listx:
2218 for py
in prot_listy:
2219 resx = resoffsetx[px]
2220 lengx = resendx[px] - 1
2221 resy = resoffsety[py]
2222 lengy = resendy[py] - 1
2223 indexes_x = index_dict[px]
2224 minx = min(indexes_x)
2225 maxx = max(indexes_x)
2226 indexes_y = index_dict[py]
2227 miny = min(indexes_y)
2228 maxy = max(indexes_y)
2229 tmp_array[resx:lengx, resy:lengy] = \
2230 contact_freqs[minx:maxx, miny:maxy]
2231 ret[(px, py)] = np.argwhere(
2232 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2234 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2235 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2237 ax.set_xticks(xticks)
2238 ax.set_xticklabels(xlabels, rotation=90)
2239 ax.set_yticks(yticks)
2240 ax.set_yticklabels(ylabels)
2241 plt.setp(ax.get_xticklabels(), fontsize=6)
2242 plt.setp(ax.get_yticklabels(), fontsize=6)
2245 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2246 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2248 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2249 dpi=300, transparent=
"False")
2252 def plot_rmsd_matrix(self, filename):
2253 self.compute_all_pairwise_rmsd()
2254 distance_matrix = np.zeros(
2255 (len(self.stath0), len(self.stath1)))
2256 for (n0, n1)
in self.pairwise_rmsd:
2257 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2259 import matplotlib
as mpl
2261 import matplotlib.pylab
as pl
2262 from scipy.cluster
import hierarchy
as hrc
2264 fig = pl.figure(figsize=(10, 8))
2265 ax = fig.add_subplot(212)
2266 dendrogram = hrc.dendrogram(
2267 hrc.linkage(distance_matrix),
2270 leaves_order = dendrogram[
'leaves']
2271 ax.set_xlabel(
'Model')
2272 ax.set_ylabel(
'RMSD [Angstroms]')
2274 ax2 = fig.add_subplot(221)
2276 distance_matrix[leaves_order, :][:, leaves_order],
2277 interpolation=
'nearest')
2278 cb = fig.colorbar(cax)
2279 cb.set_label(
'RMSD [Angstroms]')
2280 ax2.set_xlabel(
'Model')
2281 ax2.set_ylabel(
'Model')
2283 pl.savefig(filename, dpi=300)
2292 Update the cluster id numbers
2294 for n, c
in enumerate(self.clusters):
2297 def get_molecule(self, hier, name, copy):
2305 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2306 self.sel0_rmsd.get_selected_particles())
2307 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2308 self.sel1_rmsd.get_selected_particles())
2309 for mol
in self.seldict0:
2310 for sel
in self.seldict0[mol]:
2311 self.issymmetricsel[sel] =
False
2312 for mol
in self.symmetric_molecules:
2313 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2314 for sel
in self.seldict0[mol]:
2315 self.issymmetricsel[sel] =
True
2319 self.sel1_alignment, self.sel0_alignment)
2321 for rb
in self.rbs1:
2324 for bead
in self.beads1:
2332 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2334 initial filling of the clusters.
2337 print(
"clustering model "+str(n0))
2338 d0 = self.stath0[n0]
2340 print(
"creating cluster index "+str(len(self.clusters)))
2341 self.clusters.append(c)
2342 c.add_member(n0, d0)
2343 clustered = set([n0])
2345 print(
"--- trying to add model " + str(n1) +
" to cluster "
2346 + str(len(self.clusters)))
2347 d1 = self.stath1[n1]
2350 rmsd, _ = self.
rmsd(metric=metric)
2351 if rmsd < rmsd_cutoff:
2352 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2353 c.add_member(n1, d1)
2356 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2361 merge the clusters that have close members
2362 @param rmsd_cutoff cutoff distance in Angstorms
2370 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2371 itertools.combinations(self.clusters, 2)):
2372 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2375 rmsd, _ = self.
rmsd()
2376 if (rmsd < 2*rmsd_cutoff
and
2378 to_merge.append((c0, c1))
2380 for c0, c
in reversed(to_merge):
2384 self.clusters = [c
for c
in
2385 filter(
lambda x: len(x.members) > 0, self.clusters)]
2389 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2391 print(
"check close members for clusters " + str(c0.cluster_id) +
2392 " and " + str(c1.cluster_id))
2393 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2396 rmsd, _ = self.
rmsd(metric=metric)
2397 if rmsd < rmsd_cutoff:
2412 a function that returns the permutation best_sel of sels0 that
2415 best_rmsd2 = float(
'inf')
2417 if self.issymmetricsel[sels0[0]]:
2420 for offset
in range(N):
2421 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2424 r = metric(sel0, sel1)
2426 if rmsd2 < best_rmsd2:
2430 for sels
in itertools.permutations(sels0):
2432 for sel0, sel1
in itertools.takewhile(
2433 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2434 r = metric(sel0, sel1)
2436 if rmsd2 < best_rmsd2:
2439 return best_sel, best_rmsd2
2441 def compute_all_pairwise_rmsd(self):
2442 for d0
in self.stath0:
2443 for d1
in self.stath1:
2444 rmsd, _ = self.
rmsd()
2446 def rmsd(self, metric=IMP.atom.get_rmsd):
2448 Computes the RMSD. Resolves ambiguous pairs assignments
2452 n0 = self.stath0.current_index
2453 n1 = self.stath1.current_index
2454 if ((n0, n1)
in self.pairwise_rmsd) \
2455 and ((n0, n1)
in self.pairwise_molecular_assignment):
2456 return (self.pairwise_rmsd[(n0, n1)],
2457 self.pairwise_molecular_assignment[(n0, n1)])
2467 molecular_assignment = {}
2468 for molname, sels0
in self.seldict0.items():
2469 sels_best_order, best_rmsd2 = \
2470 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2472 Ncoords = len(sels_best_order[0].get_selected_particles())
2473 Ncopies = len(self.seldict1[molname])
2474 total_rmsd += Ncoords*best_rmsd2
2475 total_N += Ncoords*Ncopies
2477 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2478 p0 = sel0.get_selected_particles()[0]
2479 p1 = sel1.get_selected_particles()[0]
2484 molecular_assignment[(molname, c0)] = (molname, c1)
2486 total_rmsd = math.sqrt(total_rmsd/total_N)
2488 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2489 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2490 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2491 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2492 return total_rmsd, molecular_assignment
2496 Fix the reference structure for structural alignment, rmsd and
2499 @param reference can be either "Absolute" (cluster center of the
2500 first cluster) or Relative (cluster center of the current
2503 if reference ==
"Absolute":
2505 elif reference ==
"Relative":
2506 if cluster.center_index:
2507 n0 = cluster.center_index
2509 n0 = cluster.members[0]
2514 compute the molecular assignments between multiple copies
2515 of the same sequence. It changes the Copy index of Molecules
2518 _, molecular_assignment = self.
rmsd()
2519 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2520 mol0 = self.molcopydict0[m0][c0]
2521 mol1 = self.molcopydict1[m1][c1]
2524 p1.set_value(cik0, c0)
2528 Undo the Copy index assignment
2531 _, molecular_assignment = self.
rmsd()
2532 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2533 mol0 = self.molcopydict0[m0][c0]
2534 mol1 = self.molcopydict1[m1][c1]
2537 p1.set_value(cik0, c1)
2544 s =
"AnalysisReplicaExchange\n"
2545 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2546 s +=
"---- number of models %s \n" % str(len(self.stath0))
2549 def __getitem__(self, int_slice_adaptor):
2550 if isinstance(int_slice_adaptor, int):
2551 return self.clusters[int_slice_adaptor]
2552 elif isinstance(int_slice_adaptor, slice):
2553 return self.__iter__(int_slice_adaptor)
2555 raise TypeError(
"Unknown Type")
2558 return len(self.clusters)
2560 def __iter__(self, slice_key=None):
2561 if slice_key
is None:
2562 for i
in range(len(self)):
2565 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.