1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from pathlib
import Path
20 from IMP._compat_pathlib
import Path
22 from operator
import itemgetter
23 from collections
import defaultdict
30 class _RMFRestraints(object):
31 """All restraints that are written out to the RMF file"""
32 def __init__(self, model, user_restraints):
34 self._user_restraints = user_restraints
if user_restraints
else []
37 return (len(self._user_restraints)
38 + self._rmf_rs.get_number_of_restraints())
42 __nonzero__ = __bool__
44 def __getitem__(self, i):
45 class FakePMIWrapper(object):
46 def __init__(self, r):
47 self.r = IMP.RestraintSet.get_from(r)
49 def get_restraint(self):
52 lenuser = len(self._user_restraints)
54 return self._user_restraints[i]
55 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
56 r = self._rmf_rs.get_restraint(i - lenuser)
57 return FakePMIWrapper(r)
59 raise IndexError(
"Out of range")
63 """A macro to help setup and run replica exchange.
64 Supports Monte Carlo and molecular dynamics.
65 Produces trajectory RMF files, best PDB structures,
66 and output stat files.
70 monte_carlo_sample_objects=
None,
71 molecular_dynamics_sample_objects=
None,
73 rmf_output_objects=
None,
74 crosslink_restraints=
None,
75 monte_carlo_temperature=1.0,
76 simulated_annealing=
False,
77 simulated_annealing_minimum_temperature=1.0,
78 simulated_annealing_maximum_temperature=2.5,
79 simulated_annealing_minimum_temperature_nframes=100,
80 simulated_annealing_maximum_temperature_nframes=100,
81 replica_exchange_minimum_temperature=1.0,
82 replica_exchange_maximum_temperature=2.5,
83 replica_exchange_swap=
True,
85 number_of_best_scoring_models=500,
88 molecular_dynamics_steps=10,
89 molecular_dynamics_max_time_step=1.0,
90 number_of_frames=1000,
91 save_coordinates_mode=
"lowest_temperature",
92 nframes_write_coordinates=1,
93 write_initial_rmf=
True,
94 initial_rmf_name_suffix=
"initial",
95 stat_file_name_suffix=
"stat",
96 best_pdb_name_suffix=
"model",
99 do_create_directories=
True,
100 global_output_directory=
"./",
102 best_pdb_dir=
"pdbs/",
103 replica_stat_file_suffix=
"stat_replica",
104 em_object_for_rmf=
None,
106 replica_exchange_object=
None,
110 @param model The IMP model
111 @param root_hier Top-level (System)hierarchy
112 @param monte_carlo_sample_objects Objects for MC sampling; a list of
113 structural components (generally the representation) that
114 will be moved and restraints with parameters that need to
116 For PMI2: just pass flat list of movers
117 @param molecular_dynamics_sample_objects Objects for MD sampling
118 For PMI2: just pass flat list of particles
119 @param output_objects A list of structural objects and restraints
120 that will be included in output (ie, statistics "stat"
121 files). Any object that provides a get_output() method
122 can be used here. If None is passed
123 the macro will not write stat files.
124 @param rmf_output_objects A list of structural objects and
125 restraints that will be included in rmf. Any object
126 that provides a get_output() method can be used here.
127 @param monte_carlo_temperature MC temp (may need to be optimized
128 based on post-sampling analysis)
129 @param simulated_annealing If True, perform simulated annealing
130 @param simulated_annealing_minimum_temperature Should generally be
131 the same as monte_carlo_temperature.
132 @param simulated_annealing_minimum_temperature_nframes Number of
133 frames to compute at minimum temperature.
134 @param simulated_annealing_maximum_temperature_nframes Number of
136 temps > simulated_annealing_maximum_temperature.
137 @param replica_exchange_minimum_temperature Low temp for REX; should
138 generally be the same as monte_carlo_temperature.
139 @param replica_exchange_maximum_temperature High temp for REX
140 @param replica_exchange_swap Boolean, enable disable temperature
142 @param num_sample_rounds Number of rounds of MC/MD per cycle
143 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
144 models to keep around for analysis.
145 @param mmcif If True, write best scoring models in mmCIF format;
146 if False (the default), write in legacy PDB format.
147 @param best_pdb_dir The directory under `global_output_directory`
148 where best-scoring PDB/mmCIF files are written.
149 @param best_pdb_name_suffix Part of the file name for best-scoring
151 @param monte_carlo_steps Number of MC steps per round
152 @param self_adaptive self adaptive scheme for Monte Carlo movers
153 @param molecular_dynamics_steps Number of MD steps per round
154 @param molecular_dynamics_max_time_step Max time step for MD
155 @param number_of_frames Number of REX frames to run
156 @param save_coordinates_mode string: how to save coordinates.
157 "lowest_temperature" (default) only the lowest temperatures
159 "25th_score" all replicas whose score is below the 25th
161 "50th_score" all replicas whose score is below the 50th
163 "75th_score" all replicas whose score is below the 75th
165 @param nframes_write_coordinates How often to write the coordinates
167 @param write_initial_rmf Write the initial configuration
168 @param global_output_directory Folder that will be created to house
170 @param test_mode Set to True to avoid writing any files, just test
172 @param score_moved If True, attempt to speed up Monte Carlo
173 sampling by caching scoring function terms on particles
180 self.output_objects = output_objects
181 self.rmf_output_objects = rmf_output_objects
183 and root_hier.get_name() ==
'System'):
184 if self.output_objects
is not None:
185 self.output_objects.append(
187 if self.rmf_output_objects
is not None:
188 self.rmf_output_objects.append(
190 self.root_hier = root_hier
191 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
192 self.vars[
"number_of_states"] = len(states)
194 self.root_hiers = states
195 self.is_multi_state =
True
197 self.root_hier = root_hier
198 self.is_multi_state =
False
200 raise TypeError(
"Must provide System hierarchy (root_hier)")
202 if crosslink_restraints:
204 "crosslink_restraints is deprecated and is ignored; "
205 "all cross-link restraints should be automatically "
206 "added to output RMF files")
207 self._rmf_restraints = _RMFRestraints(model,
None)
208 self.em_object_for_rmf = em_object_for_rmf
209 self.monte_carlo_sample_objects = monte_carlo_sample_objects
210 self.vars[
"self_adaptive"] = self_adaptive
211 if sample_objects
is not None:
213 "sample_objects is deprecated; use monte_carlo_sample_objects "
214 "(or molecular_dynamics_sample_objects) instead")
215 self.monte_carlo_sample_objects += sample_objects
216 self.molecular_dynamics_sample_objects = \
217 molecular_dynamics_sample_objects
218 self.replica_exchange_object = replica_exchange_object
219 self.molecular_dynamics_max_time_step = \
220 molecular_dynamics_max_time_step
221 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
222 self.vars[
"replica_exchange_minimum_temperature"] = \
223 replica_exchange_minimum_temperature
224 self.vars[
"replica_exchange_maximum_temperature"] = \
225 replica_exchange_maximum_temperature
226 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
227 self.vars[
"simulated_annealing"] = simulated_annealing
228 self.vars[
"simulated_annealing_minimum_temperature"] = \
229 simulated_annealing_minimum_temperature
230 self.vars[
"simulated_annealing_maximum_temperature"] = \
231 simulated_annealing_maximum_temperature
232 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
233 simulated_annealing_minimum_temperature_nframes
234 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
235 simulated_annealing_maximum_temperature_nframes
237 self.vars[
"num_sample_rounds"] = num_sample_rounds
239 "number_of_best_scoring_models"] = number_of_best_scoring_models
240 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
241 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
242 self.vars[
"number_of_frames"] = number_of_frames
243 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
244 "50th_score",
"75th_score"):
245 raise Exception(
"save_coordinates_mode has unrecognized value")
247 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
248 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
249 self.vars[
"write_initial_rmf"] = write_initial_rmf
250 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
251 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
252 self.vars[
"mmcif"] = mmcif
253 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
254 self.vars[
"do_clean_first"] = do_clean_first
255 self.vars[
"do_create_directories"] = do_create_directories
256 self.vars[
"global_output_directory"] = global_output_directory
257 self.vars[
"rmf_dir"] = rmf_dir
258 self.vars[
"best_pdb_dir"] = best_pdb_dir
259 self.vars[
"atomistic"] = atomistic
260 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
261 self.vars[
"geometries"] =
None
262 self.test_mode = test_mode
263 self.score_moved = score_moved
266 if self.vars[
"geometries"]
is None:
267 self.vars[
"geometries"] = list(geometries)
269 self.vars[
"geometries"].extend(geometries)
272 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, "
273 "rmfs/*.rmf3 for each replica ")
274 print(
"--- it stores the best scoring pdb models in pdbs/")
275 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
276 "lowest temperature")
277 print(
"--- variables:")
278 keys = list(self.vars.keys())
281 print(
"------", v.ljust(30), self.vars[v])
283 def get_replica_exchange_object(self):
284 return self.replica_exchange_object
286 def _add_provenance(self, sampler_md, sampler_mc):
287 """Record details about the sampling in the IMP Hierarchies"""
290 method =
"Molecular Dynamics"
291 iterations += self.vars[
"molecular_dynamics_steps"]
293 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
294 iterations += self.vars[
"monte_carlo_steps"]
296 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
298 iterations *= self.vars[
"num_sample_rounds"]
300 pi = self.model.add_particle(
"sampling")
302 self.model, pi, method, self.vars[
"number_of_frames"],
304 p.set_number_of_replicas(
305 self.replica_exchange_object.get_number_of_replicas())
306 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
309 def execute_macro(self):
310 temp_index_factor = 100000.0
314 if self.monte_carlo_sample_objects
is not None:
315 print(
"Setting up MonteCarlo")
317 self.model, self.monte_carlo_sample_objects,
318 self.vars[
"monte_carlo_temperature"],
319 score_moved=self.score_moved)
320 if self.vars[
"simulated_annealing"]:
321 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
322 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
324 "simulated_annealing_minimum_temperature_nframes"]
326 "simulated_annealing_maximum_temperature_nframes"]
327 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
328 if self.vars[
"self_adaptive"]:
329 sampler_mc.set_self_adaptive(
330 isselfadaptive=self.vars[
"self_adaptive"])
331 if self.output_objects
is not None:
332 self.output_objects.append(sampler_mc)
333 if self.rmf_output_objects
is not None:
334 self.rmf_output_objects.append(sampler_mc)
335 samplers.append(sampler_mc)
337 if self.molecular_dynamics_sample_objects
is not None:
338 print(
"Setting up MolecularDynamics")
340 self.model, self.molecular_dynamics_sample_objects,
341 self.vars[
"monte_carlo_temperature"],
342 maximum_time_step=self.molecular_dynamics_max_time_step)
343 if self.vars[
"simulated_annealing"]:
344 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
345 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
347 "simulated_annealing_minimum_temperature_nframes"]
349 "simulated_annealing_maximum_temperature_nframes"]
350 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
351 if self.output_objects
is not None:
352 self.output_objects.append(sampler_md)
353 if self.rmf_output_objects
is not None:
354 self.rmf_output_objects.append(sampler_md)
355 samplers.append(sampler_md)
358 print(
"Setting up ReplicaExchange")
360 self.model, self.vars[
"replica_exchange_minimum_temperature"],
361 self.vars[
"replica_exchange_maximum_temperature"], samplers,
362 replica_exchange_object=self.replica_exchange_object)
363 self.replica_exchange_object = rex.rem
365 myindex = rex.get_my_index()
366 if self.output_objects
is not None:
367 self.output_objects.append(rex)
368 if self.rmf_output_objects
is not None:
369 self.rmf_output_objects.append(rex)
373 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
377 globaldir = self.vars[
"global_output_directory"] +
"/"
378 rmf_dir = globaldir + self.vars[
"rmf_dir"]
379 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
381 if not self.test_mode:
382 if self.vars[
"do_clean_first"]:
385 if self.vars[
"do_create_directories"]:
388 os.makedirs(globaldir)
396 if not self.is_multi_state:
402 for n
in range(self.vars[
"number_of_states"]):
404 os.makedirs(pdb_dir +
"/" + str(n))
411 if self.output_objects
is not None:
412 self.output_objects.append(sw)
413 if self.rmf_output_objects
is not None:
414 self.rmf_output_objects.append(sw)
416 print(
"Setting up stat file")
418 low_temp_stat_file = globaldir + \
419 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
422 if not self.test_mode:
425 if not self.test_mode:
426 if self.output_objects
is not None:
427 output.init_stat2(low_temp_stat_file,
429 extralabels=[
"rmf_file",
"rmf_frame_index"])
431 print(
"Stat file writing is disabled")
433 if self.rmf_output_objects
is not None:
434 print(
"Stat info being written in the rmf file")
436 print(
"Setting up replica stat file")
437 replica_stat_file = globaldir + \
438 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
439 if not self.test_mode:
440 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
442 if not self.test_mode:
443 print(
"Setting up best pdb files")
444 if not self.is_multi_state:
445 if self.vars[
"number_of_best_scoring_models"] > 0:
446 output.init_pdb_best_scoring(
447 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
449 self.vars[
"number_of_best_scoring_models"],
450 replica_exchange=
True,
451 mmcif=self.vars[
'mmcif'],
452 best_score_file=globaldir +
"best.scores.rex.py")
453 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
455 pdb_dir +
"/" +
"model.psf",
457 self.vars[
"best_pdb_name_suffix"] + pdbext)
459 if self.vars[
"number_of_best_scoring_models"] > 0:
460 for n
in range(self.vars[
"number_of_states"]):
461 output.init_pdb_best_scoring(
462 pdb_dir +
"/" + str(n) +
"/" +
463 self.vars[
"best_pdb_name_suffix"],
465 self.vars[
"number_of_best_scoring_models"],
466 replica_exchange=
True,
467 mmcif=self.vars[
'mmcif'],
468 best_score_file=globaldir +
"best.scores.rex.py")
469 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
471 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
472 pdb_dir +
"/" + str(n) +
"/" +
473 self.vars[
"best_pdb_name_suffix"] + pdbext)
476 if self.em_object_for_rmf
is not None:
477 output_hierarchies = [
479 self.em_object_for_rmf.get_density_as_hierarchy(
482 output_hierarchies = [self.root_hier]
484 if not self.test_mode:
485 print(
"Setting up and writing initial rmf coordinate file")
486 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
487 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
489 listofobjects=self.rmf_output_objects)
490 if self._rmf_restraints:
491 output.add_restraints_to_rmf(
492 init_suffix +
"." + str(myindex) +
".rmf3",
493 self._rmf_restraints)
494 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
495 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
497 if not self.test_mode:
498 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
500 self._add_provenance(sampler_md, sampler_mc)
502 if not self.test_mode:
503 print(
"Setting up production rmf files")
504 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
505 output.init_rmf(rmfname, output_hierarchies,
506 geometries=self.vars[
"geometries"],
507 listofobjects=self.rmf_output_objects)
509 if self._rmf_restraints:
510 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
512 ntimes_at_low_temp = 0
516 self.replica_exchange_object.set_was_used(
True)
517 nframes = self.vars[
"number_of_frames"]
520 for i
in range(nframes):
524 for nr
in range(self.vars[
"num_sample_rounds"]):
525 if sampler_md
is not None:
527 self.vars[
"molecular_dynamics_steps"])
528 if sampler_mc
is not None:
529 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
531 self.model).evaluate(
False)
532 mpivs.set_value(
"score", score)
533 output.set_output_entry(
"score", score)
535 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
537 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
538 save_frame = (min_temp_index == my_temp_index)
539 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
540 score_perc = mpivs.get_percentile(
"score")
541 save_frame = (score_perc*100.0 <= 25.0)
542 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
543 score_perc = mpivs.get_percentile(
"score")
544 save_frame = (score_perc*100.0 <= 50.0)
545 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
546 score_perc = mpivs.get_percentile(
"score")
547 save_frame = (score_perc*100.0 <= 75.0)
550 if save_frame
and not self.test_mode:
554 print(
"--- frame %s score %s " % (str(i), str(score)))
556 if not self.test_mode:
557 if i % self.vars[
"nframes_write_coordinates"] == 0:
558 print(
'--- writing coordinates')
559 if self.vars[
"number_of_best_scoring_models"] > 0:
560 output.write_pdb_best_scoring(score)
561 output.write_rmf(rmfname)
562 output.set_output_entry(
"rmf_file", rmfname)
563 output.set_output_entry(
"rmf_frame_index",
566 output.set_output_entry(
"rmf_file", rmfname)
567 output.set_output_entry(
"rmf_frame_index",
'-1')
568 if self.output_objects
is not None:
569 output.write_stat2(low_temp_stat_file)
570 ntimes_at_low_temp += 1
572 if not self.test_mode:
573 output.write_stat2(replica_stat_file)
574 if self.vars[
"replica_exchange_swap"]:
575 rex.swap_temp(i, score)
576 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
577 p.add_replica_exchange(state, self)
579 if not self.test_mode:
580 print(
"closing production rmf files")
581 output.close_rmf(rmfname)
585 """A macro to build a IMP::pmi::topology::System based on a
586 TopologyReader object.
588 Easily create multi-state systems by calling this macro
589 repeatedly with different TopologyReader objects!
590 A useful function is get_molecules() which returns the PMI Molecules
591 grouped by state as a dictionary with key = (molecule name),
592 value = IMP.pmi.topology.Molecule
593 Quick multi-state system:
596 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
597 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
598 bs = IMP.pmi.macros.BuildSystem(model)
599 bs.add_state(reader1)
600 bs.add_state(reader2)
601 bs.execute_macro() # build everything including degrees of freedom
602 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
603 ### now you have a two state system, you add restraints etc
605 @note The "domain name" entry of the topology reader is not used.
606 All molecules are set up by the component name, but split into rigid bodies
610 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
611 'RNA': IMP.pmi.alphabets.rna}
613 def __init__(self, model, sequence_connectivity_scale=4.0,
614 force_create_gmm_files=
False, resolutions=[1, 10]):
616 @param model An IMP Model
617 @param sequence_connectivity_scale For scaling the connectivity
619 @param force_create_gmm_files If True, will sample and create GMMs
620 no matter what. If False, will only sample if the
621 files don't exist. If number of Gaussians is zero, won't
623 @param resolutions The resolutions to build for structured regions
630 self._domain_res = []
632 self.force_create_gmm_files = force_create_gmm_files
633 self.resolutions = resolutions
635 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
637 """Add a state using the topology info in a
638 IMP::pmi::topology::TopologyReader object.
639 When you are done adding states, call execute_macro()
640 @param reader The TopologyReader object
641 @param fasta_name_map dictionary for converting protein names
642 found in the fasta file
643 @param chain_ids A list or string of chain IDs for assigning to
644 newly-created molecules, e.g.
645 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
646 If not specified, chain IDs A through Z are assigned, then
647 AA through AZ, then BA through BZ, and so on, in the same
650 state = self.system.create_state()
651 self._readers.append(reader)
653 these_domain_res = {}
655 if chain_ids
is None:
656 chain_ids = IMP.pmi.output._ChainIDs()
661 for molname
in reader.get_molecules():
662 copies = reader.get_molecules()[molname].domains
663 for nc, copyname
in enumerate(copies):
664 print(
"BuildSystem.add_state: setting up molecule %s copy "
665 "number %s" % (molname, str(nc)))
666 copy = copies[copyname]
669 all_chains = [c
for c
in copy
if c.chain
is not None]
671 chain_id = all_chains[0].chain
673 chain_id = chain_ids[numchain]
675 "No PDBs specified for %s, so keep_chain_id has "
676 "no effect; using default chain ID '%s'"
679 chain_id = chain_ids[numchain]
681 alphabet = IMP.pmi.alphabets.amino_acid
682 fasta_flag = copy[0].fasta_flag
683 if fasta_flag
in self._alphabets:
684 alphabet = self._alphabets[fasta_flag]
686 copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
687 print(
"BuildSystem.add_state: molecule %s sequence has "
688 "%s residues" % (molname, len(seq)))
689 orig_mol = state.create_molecule(molname, seq, chain_id,
694 print(
"BuildSystem.add_state: creating a copy for "
695 "molecule %s" % molname)
696 mol = orig_mol.create_copy(chain_id)
699 for domainnumber, domain
in enumerate(copy):
700 print(
"BuildSystem.add_state: ---- setting up domain %s "
701 "of molecule %s" % (domainnumber, molname))
704 these_domains[domain.get_unique_name()] = domain
705 if domain.residue_range == []
or \
706 domain.residue_range
is None:
707 domain_res = mol.get_residues()
709 start = domain.residue_range[0]+domain.pdb_offset
710 if domain.residue_range[1] ==
'END':
711 end = len(mol.sequence)
713 end = domain.residue_range[1]+domain.pdb_offset
714 domain_res = mol.residue_range(start-1, end-1)
715 print(
"BuildSystem.add_state: -------- domain %s of "
716 "molecule %s extends from residue %s to "
718 % (domainnumber, molname, start, end))
719 if domain.pdb_file ==
"BEADS":
720 print(
"BuildSystem.add_state: -------- domain %s of "
721 "molecule %s represented by BEADS "
722 % (domainnumber, molname))
723 mol.add_representation(
725 resolutions=[domain.bead_size],
726 setup_particles_as_densities=(
727 domain.em_residues_per_gaussian != 0),
729 these_domain_res[domain.get_unique_name()] = \
731 elif domain.pdb_file ==
"IDEAL_HELIX":
732 print(
"BuildSystem.add_state: -------- domain %s of "
733 "molecule %s represented by IDEAL_HELIX "
734 % (domainnumber, molname))
735 emper = domain.em_residues_per_gaussian
736 mol.add_representation(
738 resolutions=self.resolutions,
740 density_residues_per_component=emper,
741 density_prefix=domain.density_prefix,
742 density_force_compute=self.force_create_gmm_files,
744 these_domain_res[domain.get_unique_name()] = \
747 print(
"BuildSystem.add_state: -------- domain %s of "
748 "molecule %s represented by pdb file %s "
749 % (domainnumber, molname, domain.pdb_file))
750 domain_atomic = mol.add_structure(domain.pdb_file,
752 domain.residue_range,
755 domain_non_atomic = domain_res - domain_atomic
756 if not domain.em_residues_per_gaussian:
757 mol.add_representation(
758 domain_atomic, resolutions=self.resolutions,
760 if len(domain_non_atomic) > 0:
761 mol.add_representation(
763 resolutions=[domain.bead_size],
766 print(
"BuildSystem.add_state: -------- domain %s "
767 "of molecule %s represented by gaussians "
768 % (domainnumber, molname))
769 emper = domain.em_residues_per_gaussian
770 creategmm = self.force_create_gmm_files
771 mol.add_representation(
773 resolutions=self.resolutions,
774 density_residues_per_component=emper,
775 density_prefix=domain.density_prefix,
776 density_force_compute=creategmm,
778 if len(domain_non_atomic) > 0:
779 mol.add_representation(
781 resolutions=[domain.bead_size],
782 setup_particles_as_densities=
True,
784 these_domain_res[domain.get_unique_name()] = (
785 domain_atomic, domain_non_atomic)
786 self._domain_res.append(these_domain_res)
787 self._domains.append(these_domains)
788 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
791 """Return list of all molecules grouped by state.
792 For each state, it's a dictionary of Molecules where key is the
795 return [s.get_molecules()
for s
in self.system.get_states()]
797 def get_molecule(self, molname, copy_index=0, state_index=0):
798 return self.system.get_states()[state_index].
get_molecules()[
802 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
803 """Builds representations and sets up degrees of freedom"""
804 print(
"BuildSystem.execute_macro: building representations")
805 self.root_hier = self.system.build()
807 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
809 for nstate, reader
in enumerate(self._readers):
810 rbs = reader.get_rigid_bodies()
811 srbs = reader.get_super_rigid_bodies()
812 csrbs = reader.get_chains_of_super_rigid_bodies()
815 domains_in_rbs = set()
817 print(
"BuildSystem.execute_macro: -------- building rigid "
818 "body %s" % (str(rblist)))
819 all_res = IMP.pmi.tools.OrderedSet()
820 bead_res = IMP.pmi.tools.OrderedSet()
822 domain = self._domains[nstate][dname]
823 print(
"BuildSystem.execute_macro: -------- adding %s"
825 all_res |= self._domain_res[nstate][dname][0]
826 bead_res |= self._domain_res[nstate][dname][1]
827 domains_in_rbs.add(dname)
829 print(
"BuildSystem.execute_macro: -------- creating rigid "
830 "body with max_trans %s max_rot %s "
831 "non_rigid_max_trans %s"
832 % (str(max_rb_trans), str(max_rb_rot),
833 str(max_bead_trans)))
834 self.dof.create_rigid_body(all_res,
835 nonrigid_parts=bead_res,
836 max_trans=max_rb_trans,
838 nonrigid_max_trans=max_bead_trans,
839 name=
"RigidBody %s" % dname)
842 for dname, domain
in self._domains[nstate].items():
843 if dname
not in domains_in_rbs:
844 if domain.pdb_file !=
"BEADS":
846 "No rigid bodies set for %s. Residues read from "
847 "the PDB file will not be sampled - only regions "
848 "missing from the PDB will be treated flexibly. "
849 "To sample the entire sequence, use BEADS instead "
850 "of a PDB file name" % dname,
852 self.dof.create_flexible_beads(
853 self._domain_res[nstate][dname][1],
854 max_trans=max_bead_trans)
858 print(
"BuildSystem.execute_macro: -------- building "
859 "super rigid body %s" % (str(srblist)))
860 all_res = IMP.pmi.tools.OrderedSet()
861 for dname
in srblist:
862 print(
"BuildSystem.execute_macro: -------- adding %s"
864 all_res |= self._domain_res[nstate][dname][0]
865 all_res |= self._domain_res[nstate][dname][1]
867 print(
"BuildSystem.execute_macro: -------- creating super "
868 "rigid body with max_trans %s max_rot %s "
869 % (str(max_srb_trans), str(max_srb_rot)))
870 self.dof.create_super_rigid_body(
871 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
874 for csrblist
in csrbs:
875 all_res = IMP.pmi.tools.OrderedSet()
876 for dname
in csrblist:
877 all_res |= self._domain_res[nstate][dname][0]
878 all_res |= self._domain_res[nstate][dname][1]
879 all_res = list(all_res)
880 all_res.sort(key=
lambda r: r.get_index())
881 self.dof.create_main_chain_mover(all_res)
882 return self.root_hier, self.dof
887 """A macro for running all the basic operations of analysis.
888 Includes clustering, precision analysis, and making ensemble density maps.
889 A number of plots are also supported.
892 merge_directories=[
"./"],
893 stat_file_name_suffix=
"stat",
894 best_pdb_name_suffix=
"model",
896 do_create_directories=
True,
897 global_output_directory=
"output/",
898 replica_stat_file_suffix=
"stat_replica",
899 global_analysis_result_directory=
"./analysis/",
902 @param model The IMP model
903 @param stat_file_name_suffix
904 @param merge_directories The directories containing output files
905 @param best_pdb_name_suffix
906 @param do_clean_first
907 @param do_create_directories
908 @param global_output_directory Where everything is
909 @param replica_stat_file_suffix
910 @param global_analysis_result_directory
911 @param test_mode If True, nothing is changed on disk
915 from mpi4py
import MPI
916 self.comm = MPI.COMM_WORLD
917 self.rank = self.comm.Get_rank()
918 self.number_of_processes = self.comm.size
921 self.number_of_processes = 1
923 self.test_mode = test_mode
924 self._protocol_output = []
925 self.cluster_obj =
None
927 stat_dir = global_output_directory
930 for rd
in merge_directories:
931 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
932 if len(stat_files) == 0:
933 warnings.warn(
"no stat files found in %s"
934 % os.path.join(rd, stat_dir),
936 self.stat_files += stat_files
939 """Capture details of the modeling protocol.
940 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
943 self._protocol_output.append((p, p._last_state))
946 score_key=
"SimplifiedModel_Total_Score_None",
947 rmf_file_key=
"rmf_file",
948 rmf_file_frame_key=
"rmf_frame_index",
951 nframes_trajectory=10000):
952 """ Get a trajectory of the modeling run, for generating
955 @param score_key The score for ranking models
956 @param rmf_file_key Key pointing to RMF filename
957 @param rmf_file_frame_key Key pointing to RMF frame number
958 @param outputdir The local output directory used in the run
959 @param get_every Extract every nth frame
960 @param nframes_trajectory Total number of frames of the trajectory
965 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
967 score_list = list(map(float, trajectory_models[2]))
969 max_score = max(score_list)
970 min_score = min(score_list)
972 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
973 for i
in range(nframes_trajectory)]
974 binned_scores = [
None]*nframes_trajectory
975 binned_model_indexes = [-1]*nframes_trajectory
977 for model_index, s
in enumerate(score_list):
978 bins_score_diffs = [abs(s-b)
for b
in bins]
979 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
980 if binned_scores[bin_index]
is None:
981 binned_scores[bin_index] = s
982 binned_model_indexes[bin_index] = model_index
984 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
985 new_diff = abs(s-bins[bin_index])
986 if new_diff < old_diff:
987 binned_scores[bin_index] = s
988 binned_model_indexes[bin_index] = model_index
991 print(binned_model_indexes)
993 def _expand_ambiguity(self, prot, d):
994 """If using PMI2, expand the dictionary to include copies as
997 This also keeps the states separate.
1002 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1005 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1006 if isinstance(val, tuple):
1014 for nst
in range(len(states)):
1016 copies = sel.get_selected_particles(with_representation=
False)
1018 for nc
in range(len(copies)):
1020 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1021 (start, stop, name, nc, nst)
1023 newdict[
'%s..%i' % (name, nc)] = \
1024 (start, stop, name, nc, nst)
1030 score_key=
"SimplifiedModel_Total_Score_None",
1031 rmf_file_key=
"rmf_file",
1032 rmf_file_frame_key=
"rmf_frame_index",
1034 prefiltervalue=
None,
1037 alignment_components=
None,
1038 number_of_best_scoring_models=10,
1039 rmsd_calculation_components=
None,
1040 distance_matrix_file=
'distances.mat',
1041 load_distance_matrix_file=
False,
1042 skip_clustering=
False,
1043 number_of_clusters=1,
1045 exit_after_display=
True,
1047 first_and_last_frames=
None,
1048 density_custom_ranges=
None,
1049 write_pdb_with_centered_coordinates=
False,
1051 """Get the best scoring models, compute a distance matrix,
1052 cluster them, and create density maps.
1054 Tuple format: "molname" just the molecule,
1055 or (start,stop,molname,copy_num(optional),state_num(optional)
1056 Can pass None for copy or state to ignore that field.
1057 If you don't pass a specific copy number
1059 @param score_key The score for ranking models.
1060 Use "Total_Score" instead of default for PMI2.
1061 @param rmf_file_key Key pointing to RMF filename
1062 @param rmf_file_frame_key Key pointing to RMF frame number
1063 @param state_number State number to analyze
1064 @param prefiltervalue Only include frames where the
1065 score key is below this value
1066 @param feature_keys Keywords for which you want to
1067 calculate average, medians, etc.
1068 If you pass "Keyname" it'll include everything that matches
1070 @param outputdir The local output directory used in
1072 @param alignment_components Dictionary with keys=groupname,
1073 values are tuples for aligning the structures
1074 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1075 @param number_of_best_scoring_models Num models to keep per run
1076 @param rmsd_calculation_components For calculating RMSD
1077 (same format as alignment_components)
1078 @param distance_matrix_file Where to store/read the
1080 @param load_distance_matrix_file Try to load the distance
1082 @param skip_clustering Just extract the best scoring
1083 models and save the pdbs
1084 @param number_of_clusters Number of k-means clusters
1085 @param display_plot Display the distance matrix
1086 @param exit_after_display Exit after displaying distance
1088 @param get_every Extract every nth frame
1089 @param first_and_last_frames A tuple with the first and last
1090 frames to be analyzed. Values are percentages!
1091 Default: get all frames
1092 @param density_custom_ranges For density calculation
1093 (same format as alignment_components)
1094 @param write_pdb_with_centered_coordinates
1095 @param voxel_size Used for the density output
1099 self._outputdir = Path(outputdir).absolute()
1100 self._number_of_clusters = number_of_clusters
1101 for p, state
in self._protocol_output:
1102 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1113 if not load_distance_matrix_file:
1114 if len(self.stat_files) == 0:
1115 print(
"ERROR: no stat file found in the given path")
1117 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1118 self.stat_files, self.number_of_processes)[self.rank]
1122 orig_score_key = score_key
1123 if score_key
not in po.get_keys():
1124 if 'Total_Score' in po.get_keys():
1125 score_key =
'Total_Score'
1127 "Using 'Total_Score' instead of "
1128 "'SimplifiedModel_Total_Score_None' for the score key",
1130 for k
in [orig_score_key, score_key, rmf_file_key,
1131 rmf_file_frame_key]:
1132 if k
in feature_keys:
1134 "no need to pass " + k +
" to feature_keys.",
1136 feature_keys.remove(k)
1139 my_stat_files, score_key, feature_keys, rmf_file_key,
1140 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1141 rmf_file_list = best_models[0]
1142 rmf_file_frame_list = best_models[1]
1143 score_list = best_models[2]
1144 feature_keyword_list_dict = best_models[3]
1150 if self.number_of_processes > 1:
1154 rmf_file_frame_list)
1155 for k
in feature_keyword_list_dict:
1156 feature_keyword_list_dict[k] = \
1158 feature_keyword_list_dict[k])
1161 score_rmf_tuples = list(zip(score_list,
1163 rmf_file_frame_list,
1164 list(range(len(score_list)))))
1166 if density_custom_ranges:
1167 for k
in density_custom_ranges:
1168 if not isinstance(density_custom_ranges[k], list):
1169 raise Exception(
"Density custom ranges: values must "
1170 "be lists of tuples")
1173 if first_and_last_frames
is not None:
1174 nframes = len(score_rmf_tuples)
1175 first_frame = int(first_and_last_frames[0] * nframes)
1176 last_frame = int(first_and_last_frames[1] * nframes)
1177 if last_frame > len(score_rmf_tuples):
1179 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1182 best_score_rmf_tuples = sorted(
1184 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1185 best_score_rmf_tuples = [t+(n,)
for n, t
in
1186 enumerate(best_score_rmf_tuples)]
1188 prov.append(IMP.pmi.io.FilterProvenance(
1189 "Best scoring", 0, number_of_best_scoring_models))
1191 best_score_feature_keyword_list_dict = defaultdict(list)
1192 for tpl
in best_score_rmf_tuples:
1194 for f
in feature_keyword_list_dict:
1195 best_score_feature_keyword_list_dict[f].append(
1196 feature_keyword_list_dict[f][index])
1197 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1198 best_score_rmf_tuples,
1199 self.number_of_processes)[self.rank]
1202 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1203 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1205 if rmsd_calculation_components
is not None:
1206 tmp = self._expand_ambiguity(
1207 prot_ahead, rmsd_calculation_components)
1208 if tmp != rmsd_calculation_components:
1209 print(
'Detected ambiguity, expand rmsd components to',
1211 rmsd_calculation_components = tmp
1212 if alignment_components
is not None:
1213 tmp = self._expand_ambiguity(prot_ahead,
1214 alignment_components)
1215 if tmp != alignment_components:
1216 print(
'Detected ambiguity, expand alignment '
1217 'components to', tmp)
1218 alignment_components = tmp
1224 self.model, my_best_score_rmf_tuples[0],
1225 rmsd_calculation_components, state_number=state_number)
1227 self.model, my_best_score_rmf_tuples, alignment_components,
1228 rmsd_calculation_components, state_number=state_number)
1236 all_coordinates = got_coords[0]
1239 alignment_coordinates = got_coords[1]
1242 rmsd_coordinates = got_coords[2]
1245 rmf_file_name_index_dict = got_coords[3]
1248 all_rmf_file_names = got_coords[4]
1254 if density_custom_ranges:
1256 density_custom_ranges, voxel=voxel_size)
1258 dircluster = os.path.join(outputdir,
1259 "all_models."+str(self.rank))
1265 os.mkdir(dircluster)
1268 clusstat = open(os.path.join(
1269 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1270 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1272 rmf_frame_number = tpl[2]
1275 for key
in best_score_feature_keyword_list_dict:
1277 best_score_feature_keyword_list_dict[key][index]
1281 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1282 self.model, rmf_frame_number, rmf_name)
1284 linking_successful = \
1285 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1286 self.model, prots, rs, rmf_frame_number,
1288 if not linking_successful:
1295 states = IMP.atom.get_by_type(
1296 prots[0], IMP.atom.STATE_TYPE)
1297 prot = states[state_number]
1299 prot = prots[state_number]
1304 coords_f1 = alignment_coordinates[cnt]
1306 coords_f2 = alignment_coordinates[cnt]
1309 coords_f1, coords_f2)
1310 transformation = Ali.align()[1]
1324 rb = rbm.get_rigid_body()
1334 out_pdb_fn = os.path.join(
1335 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1336 out_rmf_fn = os.path.join(
1337 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1338 o.init_pdb(out_pdb_fn, prot)
1339 tc = write_pdb_with_centered_coordinates
1340 o.write_pdb(out_pdb_fn,
1341 translate_to_geometric_center=tc)
1343 tmp_dict[
"local_pdb_file_name"] = \
1344 os.path.basename(out_pdb_fn)
1345 tmp_dict[
"rmf_file_full_path"] = rmf_name
1346 tmp_dict[
"local_rmf_file_name"] = \
1347 os.path.basename(out_rmf_fn)
1348 tmp_dict[
"local_rmf_frame_number"] = 0
1350 clusstat.write(str(tmp_dict)+
"\n")
1356 h.set_name(
"System")
1358 o.init_rmf(out_rmf_fn, [h], rs)
1360 o.init_rmf(out_rmf_fn, [prot], rs)
1362 o.write_rmf(out_rmf_fn)
1363 o.close_rmf(out_rmf_fn)
1365 if density_custom_ranges:
1366 DensModule.add_subunits_density(prot)
1368 if density_custom_ranges:
1369 DensModule.write_mrc(path=dircluster)
1374 if self.number_of_processes > 1:
1380 rmf_file_name_index_dict)
1382 alignment_coordinates)
1389 [best_score_feature_keyword_list_dict,
1390 rmf_file_name_index_dict],
1396 print(
"setup clustering class")
1399 for n, model_coordinate_dict
in enumerate(all_coordinates):
1401 if (alignment_components
is not None
1402 and len(self.cluster_obj.all_coords) == 0):
1404 self.cluster_obj.set_template(alignment_coordinates[n])
1405 self.cluster_obj.fill(all_rmf_file_names[n],
1406 rmsd_coordinates[n])
1407 print(
"Global calculating the distance matrix")
1410 self.cluster_obj.dist_matrix()
1414 self.cluster_obj.do_cluster(number_of_clusters)
1417 self.cluster_obj.plot_matrix(
1418 figurename=os.path.join(outputdir,
1420 if exit_after_display:
1422 self.cluster_obj.save_distance_matrix_file(
1423 file_name=distance_matrix_file)
1430 print(
"setup clustering class")
1432 self.cluster_obj.load_distance_matrix_file(
1433 file_name=distance_matrix_file)
1434 print(
"clustering with %s clusters" % str(number_of_clusters))
1435 self.cluster_obj.do_cluster(number_of_clusters)
1436 [best_score_feature_keyword_list_dict,
1437 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1440 self.cluster_obj.plot_matrix(figurename=os.path.join(
1441 outputdir,
'dist_matrix.pdf'))
1442 if exit_after_display:
1444 if self.number_of_processes > 1:
1452 print(self.cluster_obj.get_cluster_labels())
1453 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1454 print(
"rank %s " % str(self.rank))
1455 print(
"cluster %s " % str(n))
1456 print(
"cluster label %s " % str(cl))
1457 print(self.cluster_obj.get_cluster_label_names(cl))
1459 len(self.cluster_obj.get_cluster_label_names(cl))
1461 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1464 if density_custom_ranges:
1466 density_custom_ranges,
1469 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1471 os.mkdir(dircluster)
1477 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1478 clusstat = open(dircluster +
"stat.out",
"w")
1479 for k, structure_name
in enumerate(
1480 self.cluster_obj.get_cluster_label_names(cl)):
1483 tmp_dict.update(rmsd_dict)
1484 index = rmf_file_name_index_dict[structure_name]
1485 for key
in best_score_feature_keyword_list_dict:
1487 key] = best_score_feature_keyword_list_dict[
1493 rmf_name = structure_name.split(
"|")[0]
1494 rmf_frame_number = int(structure_name.split(
"|")[1])
1495 clusstat.write(str(tmp_dict) +
"\n")
1500 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1501 self.model, rmf_frame_number, rmf_name)
1503 linking_successful = \
1504 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1505 self.model, prots, rs, rmf_frame_number,
1507 if not linking_successful:
1513 states = IMP.atom.get_by_type(
1514 prots[0], IMP.atom.STATE_TYPE)
1515 prot = states[state_number]
1517 prot = prots[state_number]
1523 co = self.cluster_obj
1524 model_index = co.get_model_index_from_name(
1526 transformation = co.get_transformation_to_first_member(
1537 rb = rbm.get_rigid_body()
1546 if density_custom_ranges:
1547 DensModule.add_subunits_density(prot)
1552 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1553 o.write_pdb(dircluster + str(k) +
".pdb")
1559 h.set_name(
"System")
1561 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1563 o.init_rmf(dircluster + str(k) +
".rmf3", [prot], rs)
1564 o.write_rmf(dircluster + str(k) +
".rmf3")
1565 o.close_rmf(dircluster + str(k) +
".rmf3")
1570 if density_custom_ranges:
1571 DensModule.write_mrc(path=dircluster)
1574 if self.number_of_processes > 1:
1577 def get_cluster_rmsd(self, cluster_num):
1578 if self.cluster_obj
is None:
1580 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1582 def save_objects(self, objects, file_name):
1584 outf = open(file_name,
'wb')
1585 pickle.dump(objects, outf)
1588 def load_objects(self, file_name):
1590 inputf = open(file_name,
'rb')
1591 objects = pickle.load(inputf)
1599 This class contains analysis utilities to investigate ReplicaExchange
1607 def __init__(self, model, stat_files, best_models=None, score_key=None,
1610 Construction of the Class.
1611 @param model IMP.Model()
1612 @param stat_files list of string. Can be ascii stat files,
1614 @param best_models Integer. Number of best scoring models,
1615 if None: all models will be read
1616 @param alignment boolean (Default=True). Align before computing
1621 self.best_models = best_models
1623 model, stat_files, self.best_models, score_key, cache=
True)
1625 StatHierarchyHandler=self.stath0)
1638 self.clusters.append(c)
1639 for n0
in range(len(self.stath0)):
1641 self.pairwise_rmsd = {}
1642 self.pairwise_molecular_assignment = {}
1643 self.alignment = alignment
1644 self.symmetric_molecules = {}
1645 self.issymmetricsel = {}
1647 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1649 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1654 Setup the selection onto which the rmsd is computed
1655 @param kwargs use IMP.atom.Selection keywords
1663 Store names of symmetric molecules
1665 self.symmetric_molecules[molecule_name] = 0
1670 Setup the selection onto which the alignment is computed
1671 @param kwargs use IMP.atom.Selection keywords
1679 def clean_clusters(self):
1680 for c
in self.clusters:
1684 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1686 Cluster the models based on RMSD.
1687 @param rmsd_cutoff Float the distance cutoff in Angstrom
1688 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1689 be used to compute rmsds
1691 self.clean_clusters()
1692 not_clustered = set(range(len(self.stath1)))
1693 while len(not_clustered) > 0:
1694 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1699 Refine the clusters by merging the ones whose centers are close
1700 @param rmsd_cutoff cutoff distance in Angstorms
1702 clusters_copy = self.clusters
1703 for c0, c1
in itertools.combinations(self.clusters, 2):
1704 if c0.center_index
is None:
1706 if c1.center_index
is None:
1708 _ = self.stath0[c0.center_index]
1709 _ = self.stath1[c1.center_index]
1710 rmsd, molecular_assignment = self.
rmsd()
1711 if rmsd <= rmsd_cutoff:
1712 if c1
in self.clusters:
1713 clusters_copy.remove(c1)
1715 self.clusters = clusters_copy
1722 def set_cluster_assignments(self, cluster_ids):
1723 if len(cluster_ids) != len(self.stath0):
1724 raise ValueError(
'cluster ids has to be same length as '
1728 for i
in sorted(list(set(cluster_ids))):
1730 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1731 self.clusters[idx].add_member(i, d)
1735 Return the model data from a cluster
1736 @param cluster IMP.pmi.output.Cluster object
1745 Save the data for the whole models into a pickle file
1746 @param filename string
1748 self.stath0.save_data(filename)
1752 Set the data from an external IMP.pmi.output.Data
1753 @param data IMP.pmi.output.Data
1755 self.stath0.data = data
1756 self.stath1.data = data
1760 Load the data from an external pickled file
1761 @param filename string
1763 self.stath0.load_data(filename)
1764 self.stath1.load_data(filename)
1765 self.best_models = len(self.stath0)
1767 def add_cluster(self, rmf_name_list):
1769 print(
"creating cluster index "+str(len(self.clusters)))
1770 self.clusters.append(c)
1771 current_len = len(self.stath0)
1773 for rmf
in rmf_name_list:
1774 print(
"adding rmf "+rmf)
1775 self.stath0.add_stat_file(rmf)
1776 self.stath1.add_stat_file(rmf)
1778 for n0
in range(current_len, len(self.stath0)):
1779 d0 = self.stath0[n0]
1780 c.add_member(n0, d0)
1785 Save the clusters into a pickle file
1786 @param filename string
1789 import cPickle
as pickle
1792 fl = open(filename,
'wb')
1793 pickle.dump(self.clusters, fl)
1797 Load the clusters from a pickle file
1798 @param filename string
1799 @param append bool (Default=False), if True. append the clusters
1800 to the ones currently present
1803 import cPickle
as pickle
1806 fl = open(filename,
'rb')
1807 self.clean_clusters()
1809 self.clusters += pickle.load(fl)
1811 self.clusters = pickle.load(fl)
1820 Compute the cluster center for a given cluster
1822 member_distance = defaultdict(float)
1824 for n0, n1
in itertools.combinations(cluster.members, 2):
1827 rmsd, _ = self.
rmsd()
1828 member_distance[n0] += rmsd
1830 if len(member_distance) > 0:
1831 cluster.center_index = min(member_distance,
1832 key=member_distance.get)
1834 cluster.center_index = cluster.members[0]
1839 Save the coordinates of the current cluster a single rmf file
1841 print(
"saving coordinates", cluster)
1845 if rmf_name
is None:
1846 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1848 _ = self.stath1[cluster.members[0]]
1850 o.init_rmf(rmf_name, [self.stath1])
1851 for n1
in cluster.members:
1857 o.write_rmf(rmf_name)
1859 o.close_rmf(rmf_name)
1863 remove structures that are similar
1864 append it to a new cluster
1866 print(
"pruning models")
1868 filtered = [selected]
1869 remaining = range(1, len(self.stath1), 10)
1871 while len(remaining) > 0:
1872 d0 = self.stath0[selected]
1874 for n1
in remaining:
1879 if d <= rmsd_cutoff:
1881 print(
"pruning model %s, similar to model %s, rmsd %s"
1882 % (str(n1), str(selected), str(d)))
1883 remaining = [x
for x
in remaining
if x
not in rm]
1884 if len(remaining) == 0:
1886 selected = remaining[0]
1887 filtered.append(selected)
1890 self.clusters.append(c)
1892 d0 = self.stath0[n0]
1893 c.add_member(n0, d0)
1898 Compute the precision of a cluster
1904 if cluster.center_index
is not None:
1905 members1 = [cluster.center_index]
1907 members1 = cluster.members
1911 for n1
in cluster.members:
1916 tmp_rmsd, _ = self.
rmsd()
1921 precision = rmsd/npairs
1922 cluster.precision = precision
1927 Compute the bipartite precision (ie the cross-precision)
1928 between two clusters
1932 for cn0, n0
in enumerate(cluster1.members):
1934 for cn1, n1
in enumerate(cluster2.members):
1936 tmp_rmsd, _ = self.
rmsd()
1938 print(
"--- rmsd between structure %s and structure "
1939 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1942 precision = rmsd/npairs
1945 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1946 cluster_ref=
None, step=1):
1948 Compute the Root mean square fluctuations
1949 of a molecule in a cluster
1950 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1951 residue indexes and the value is the rmsf
1953 rmsf = IMP.pmi.tools.OrderedDict()
1956 if cluster_ref
is not None:
1957 if cluster_ref.center_index
is not None:
1958 members0 = [cluster_ref.center_index]
1960 members0 = cluster_ref.members
1962 if cluster.center_index
is not None:
1963 members0 = [cluster.center_index]
1965 members0 = cluster.members
1968 copy_index=copy_index, state_index=state_index)
1969 ps0 = s0.get_selected_particles()
1971 residue_indexes = list(IMP.pmi.tools.OrderedSet(
1977 d0 = self.stath0[n0]
1978 for n1
in cluster.members[::step]:
1980 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
1984 self.stath1, molecule=molecule,
1985 residue_indexes=residue_indexes, resolution=1,
1986 copy_index=copy_index, state_index=state_index)
1987 ps1 = s1.get_selected_particles()
1989 d1 = self.stath1[n1]
1992 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
1993 r = residue_indexes[n]
2005 for stath
in [self.stath0, self.stath1]:
2006 if molecule
not in self.symmetric_molecules:
2008 stath, molecule=molecule, residue_index=r,
2009 resolution=1, copy_index=copy_index,
2010 state_index=state_index)
2013 stath, molecule=molecule, residue_index=r,
2014 resolution=1, state_index=state_index)
2016 ps = s.get_selected_particles()
2025 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2026 reference=
"Absolute", prefix=
"./", step=1):
2032 for n1
in cluster.members[::step]:
2033 print(
"density "+str(n1))
2038 dens.add_subunits_density(self.stath1)
2040 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2043 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2044 consolidate=
False, molecules=
None, prefix=
'./',
2045 reference=
"Absolute"):
2049 import matplotlib.pyplot
as plt
2050 import matplotlib.cm
as cm
2051 from scipy.spatial.distance
import cdist
2053 if molecules
is None:
2062 molecules=molecules).get_selected_particles())]
2063 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2064 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2065 total_len_unique = sum(max(mol.get_residue_indexes())
2066 for mol
in unique_copies)
2073 seqlen = max(mol.get_residue_indexes())
2074 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2078 for mol
in unique_copies:
2079 seqlen = max(mol.get_residue_indexes())
2080 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2083 for ncl, n1
in enumerate(cluster.members):
2086 coord_dict = IMP.pmi.tools.OrderedDict()
2088 rindexes = mol.get_residue_indexes()
2089 coords = np.ones((max(rindexes), 3))
2090 for rnum
in rindexes:
2093 selpart = sel.get_selected_particles()
2094 if len(selpart) == 0:
2096 selpart = selpart[0]
2097 coords[rnum - 1, :] = \
2099 coord_dict[mol] = coords
2102 coords = np.concatenate(list(coord_dict.values()))
2103 dists = cdist(coords, coords)
2104 binary_dists = np.where((dists <= contact_threshold)
2105 & (dists >= 1.0), 1.0, 0.0)
2107 binary_dists_dict = {}
2109 len1 = max(mol1.get_residue_indexes())
2111 name1 = mol1.get_name()
2112 name2 = mol2.get_name()
2113 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2114 if (name1, name2)
not in binary_dists_dict:
2115 binary_dists_dict[(name1, name2)] = \
2116 np.zeros((len1, len1))
2117 binary_dists_dict[(name1, name2)] += \
2118 np.where((dists <= contact_threshold)
2119 & (dists >= 1.0), 1.0, 0.0)
2120 binary_dists = np.zeros((total_len_unique, total_len_unique))
2122 for name1, name2
in binary_dists_dict:
2123 r1 = index_dict[mol_names_unique[name1]]
2124 r2 = index_dict[mol_names_unique[name2]]
2125 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2126 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2132 contact_freqs = binary_dists
2134 dist_maps.append(dists)
2135 av_dist_map += dists
2136 contact_freqs += binary_dists
2139 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2141 contact_freqs = 1.0/len(cluster)*contact_freqs
2142 av_dist_map = 1.0/len(cluster)*contact_freqs
2144 fig = plt.figure(figsize=(100, 100))
2145 ax = fig.add_subplot(111)
2148 gap_between_components = 50
2153 sorted_tuple = sorted(
2155 mol).get_extended_name(), mol)
for mol
in mols)
2156 prot_list = list(zip(*sorted_tuple))[1]
2158 sorted_tuple = sorted(
2160 for mol
in unique_copies)
2161 prot_list = list(zip(*sorted_tuple))[1]
2163 prot_listx = prot_list
2164 nresx = gap_between_components + \
2165 sum([max(mol.get_residue_indexes())
2166 + gap_between_components
for mol
in prot_listx])
2169 prot_listy = prot_list
2170 nresy = gap_between_components + \
2171 sum([max(mol.get_residue_indexes())
2172 + gap_between_components
for mol
in prot_listy])
2177 res = gap_between_components
2178 for mol
in prot_listx:
2179 resoffsetx[mol] = res
2180 res += max(mol.get_residue_indexes())
2182 res += gap_between_components
2186 res = gap_between_components
2187 for mol
in prot_listy:
2188 resoffsety[mol] = res
2189 res += max(mol.get_residue_indexes())
2191 res += gap_between_components
2193 resoffsetdiagonal = {}
2194 res = gap_between_components
2195 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2196 resoffsetdiagonal[mol] = res
2197 res += max(mol.get_residue_indexes())
2198 res += gap_between_components
2203 for n, prot
in enumerate(prot_listx):
2204 res = resoffsetx[prot]
2206 for proty
in prot_listy:
2207 resy = resoffsety[proty]
2208 endy = resendy[proty]
2209 ax.plot([res, res], [resy, endy], linestyle=
'-',
2210 color=
'gray', lw=0.4)
2211 ax.plot([end, end], [resy, endy], linestyle=
'-',
2212 color=
'gray', lw=0.4)
2213 xticks.append((float(res) + float(end)) / 2)
2215 prot).get_extended_name())
2219 for n, prot
in enumerate(prot_listy):
2220 res = resoffsety[prot]
2222 for protx
in prot_listx:
2223 resx = resoffsetx[protx]
2224 endx = resendx[protx]
2225 ax.plot([resx, endx], [res, res], linestyle=
'-',
2226 color=
'gray', lw=0.4)
2227 ax.plot([resx, endx], [end, end], linestyle=
'-',
2228 color=
'gray', lw=0.4)
2229 yticks.append((float(res) + float(end)) / 2)
2231 prot).get_extended_name())
2235 tmp_array = np.zeros((nresx, nresy))
2237 for px
in prot_listx:
2238 for py
in prot_listy:
2239 resx = resoffsetx[px]
2240 lengx = resendx[px] - 1
2241 resy = resoffsety[py]
2242 lengy = resendy[py] - 1
2243 indexes_x = index_dict[px]
2244 minx = min(indexes_x)
2245 maxx = max(indexes_x)
2246 indexes_y = index_dict[py]
2247 miny = min(indexes_y)
2248 maxy = max(indexes_y)
2249 tmp_array[resx:lengx, resy:lengy] = \
2250 contact_freqs[minx:maxx, miny:maxy]
2251 ret[(px, py)] = np.argwhere(
2252 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2254 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2255 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2257 ax.set_xticks(xticks)
2258 ax.set_xticklabels(xlabels, rotation=90)
2259 ax.set_yticks(yticks)
2260 ax.set_yticklabels(ylabels)
2261 plt.setp(ax.get_xticklabels(), fontsize=6)
2262 plt.setp(ax.get_yticklabels(), fontsize=6)
2265 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2266 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2268 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2269 dpi=300, transparent=
"False")
2272 def plot_rmsd_matrix(self, filename):
2273 self.compute_all_pairwise_rmsd()
2274 distance_matrix = np.zeros(
2275 (len(self.stath0), len(self.stath1)))
2276 for (n0, n1)
in self.pairwise_rmsd:
2277 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2279 import matplotlib
as mpl
2281 import matplotlib.pylab
as pl
2282 from scipy.cluster
import hierarchy
as hrc
2284 fig = pl.figure(figsize=(10, 8))
2285 ax = fig.add_subplot(212)
2286 dendrogram = hrc.dendrogram(
2287 hrc.linkage(distance_matrix),
2290 leaves_order = dendrogram[
'leaves']
2291 ax.set_xlabel(
'Model')
2292 ax.set_ylabel(
'RMSD [Angstroms]')
2294 ax2 = fig.add_subplot(221)
2296 distance_matrix[leaves_order, :][:, leaves_order],
2297 interpolation=
'nearest')
2298 cb = fig.colorbar(cax)
2299 cb.set_label(
'RMSD [Angstroms]')
2300 ax2.set_xlabel(
'Model')
2301 ax2.set_ylabel(
'Model')
2303 pl.savefig(filename, dpi=300)
2312 Update the cluster id numbers
2314 for n, c
in enumerate(self.clusters):
2317 def get_molecule(self, hier, name, copy):
2325 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2326 self.sel0_rmsd.get_selected_particles())
2327 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2328 self.sel1_rmsd.get_selected_particles())
2329 for mol
in self.seldict0:
2330 for sel
in self.seldict0[mol]:
2331 self.issymmetricsel[sel] =
False
2332 for mol
in self.symmetric_molecules:
2333 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2334 for sel
in self.seldict0[mol]:
2335 self.issymmetricsel[sel] =
True
2339 self.sel1_alignment, self.sel0_alignment)
2341 for rb
in self.rbs1:
2344 for bead
in self.beads1:
2352 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2354 initial filling of the clusters.
2357 print(
"clustering model "+str(n0))
2358 d0 = self.stath0[n0]
2360 print(
"creating cluster index "+str(len(self.clusters)))
2361 self.clusters.append(c)
2362 c.add_member(n0, d0)
2363 clustered = set([n0])
2365 print(
"--- trying to add model " + str(n1) +
" to cluster "
2366 + str(len(self.clusters)))
2367 d1 = self.stath1[n1]
2370 rmsd, _ = self.
rmsd(metric=metric)
2371 if rmsd < rmsd_cutoff:
2372 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2373 c.add_member(n1, d1)
2376 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2381 merge the clusters that have close members
2382 @param rmsd_cutoff cutoff distance in Angstorms
2390 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2391 itertools.combinations(self.clusters, 2)):
2392 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2395 rmsd, _ = self.
rmsd()
2396 if (rmsd < 2*rmsd_cutoff
and
2398 to_merge.append((c0, c1))
2400 for c0, c
in reversed(to_merge):
2404 self.clusters = [c
for c
in
2405 filter(
lambda x: len(x.members) > 0, self.clusters)]
2409 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2411 print(
"check close members for clusters " + str(c0.cluster_id) +
2412 " and " + str(c1.cluster_id))
2413 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2416 rmsd, _ = self.
rmsd(metric=metric)
2417 if rmsd < rmsd_cutoff:
2432 a function that returns the permutation best_sel of sels0 that
2435 best_rmsd2 = float(
'inf')
2437 if self.issymmetricsel[sels0[0]]:
2440 for offset
in range(N):
2441 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2444 r = metric(sel0, sel1)
2446 if rmsd2 < best_rmsd2:
2450 for sels
in itertools.permutations(sels0):
2452 for sel0, sel1
in itertools.takewhile(
2453 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2454 r = metric(sel0, sel1)
2456 if rmsd2 < best_rmsd2:
2459 return best_sel, best_rmsd2
2461 def compute_all_pairwise_rmsd(self):
2462 for d0
in self.stath0:
2463 for d1
in self.stath1:
2464 rmsd, _ = self.
rmsd()
2466 def rmsd(self, metric=IMP.atom.get_rmsd):
2468 Computes the RMSD. Resolves ambiguous pairs assignments
2472 n0 = self.stath0.current_index
2473 n1 = self.stath1.current_index
2474 if ((n0, n1)
in self.pairwise_rmsd) \
2475 and ((n0, n1)
in self.pairwise_molecular_assignment):
2476 return (self.pairwise_rmsd[(n0, n1)],
2477 self.pairwise_molecular_assignment[(n0, n1)])
2487 molecular_assignment = {}
2488 for molname, sels0
in self.seldict0.items():
2489 sels_best_order, best_rmsd2 = \
2490 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2492 Ncoords = len(sels_best_order[0].get_selected_particles())
2493 Ncopies = len(self.seldict1[molname])
2494 total_rmsd += Ncoords*best_rmsd2
2495 total_N += Ncoords*Ncopies
2497 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2498 p0 = sel0.get_selected_particles()[0]
2499 p1 = sel1.get_selected_particles()[0]
2504 molecular_assignment[(molname, c0)] = (molname, c1)
2506 total_rmsd = math.sqrt(total_rmsd/total_N)
2508 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2509 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2510 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2511 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2512 return total_rmsd, molecular_assignment
2516 Fix the reference structure for structural alignment, rmsd and
2519 @param reference can be either "Absolute" (cluster center of the
2520 first cluster) or Relative (cluster center of the current
2523 if reference ==
"Absolute":
2525 elif reference ==
"Relative":
2526 if cluster.center_index:
2527 n0 = cluster.center_index
2529 n0 = cluster.members[0]
2534 compute the molecular assignemnts between multiple copies
2535 of the same sequence. It changes the Copy index of Molecules
2538 _, molecular_assignment = self.
rmsd()
2539 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2540 mol0 = self.molcopydict0[m0][c0]
2541 mol1 = self.molcopydict1[m1][c1]
2544 p1.set_value(cik0, c0)
2548 Undo the Copy index assignment
2551 _, molecular_assignment = self.
rmsd()
2552 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2553 mol0 = self.molcopydict0[m0][c0]
2554 mol1 = self.molcopydict1[m1][c1]
2557 p1.set_value(cik0, c1)
2564 s =
"AnalysisReplicaExchange\n"
2565 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2566 s +=
"---- number of models %s \n" % str(len(self.stath0))
2569 def __getitem__(self, int_slice_adaptor):
2570 if isinstance(int_slice_adaptor, int):
2571 return self.clusters[int_slice_adaptor]
2572 elif isinstance(int_slice_adaptor, slice):
2573 return self.__iter__(int_slice_adaptor)
2575 raise TypeError(
"Unknown Type")
2578 return len(self.clusters)
2580 def __iter__(self, slice_key=None):
2581 if slice_key
is None:
2582 for i
in range(len(self)):
2585 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.