1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from pathlib
import Path
20 from IMP._compat_pathlib
import Path
22 from operator
import itemgetter
23 from collections
import defaultdict
30 class _MockMPIValues(object):
31 """Replace samplers.MPI_values when in test mode"""
32 def get_percentile(self, name):
36 class _RMFRestraints(object):
37 """All restraints that are written out to the RMF file"""
38 def __init__(self, model, user_restraints):
40 self._user_restraints = user_restraints
if user_restraints
else []
43 return (len(self._user_restraints)
44 + self._rmf_rs.get_number_of_restraints())
48 __nonzero__ = __bool__
50 def __getitem__(self, i):
51 class FakePMIWrapper(object):
52 def __init__(self, r):
53 self.r = IMP.RestraintSet.get_from(r)
55 def get_restraint(self):
58 lenuser = len(self._user_restraints)
60 return self._user_restraints[i]
61 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62 r = self._rmf_rs.get_restraint(i - lenuser)
63 return FakePMIWrapper(r)
65 raise IndexError(
"Out of range")
69 """A macro to help setup and run replica exchange.
70 Supports Monte Carlo and molecular dynamics.
71 Produces trajectory RMF files, best PDB structures,
72 and output stat files.
75 monte_carlo_sample_objects=
None,
76 molecular_dynamics_sample_objects=
None,
78 rmf_output_objects=
None,
79 monte_carlo_temperature=1.0,
80 simulated_annealing=
False,
81 simulated_annealing_minimum_temperature=1.0,
82 simulated_annealing_maximum_temperature=2.5,
83 simulated_annealing_minimum_temperature_nframes=100,
84 simulated_annealing_maximum_temperature_nframes=100,
85 replica_exchange_minimum_temperature=1.0,
86 replica_exchange_maximum_temperature=2.5,
87 replica_exchange_swap=
True,
89 number_of_best_scoring_models=500,
92 molecular_dynamics_steps=10,
93 molecular_dynamics_max_time_step=1.0,
94 number_of_frames=1000,
95 save_coordinates_mode=
"lowest_temperature",
96 nframes_write_coordinates=1,
97 write_initial_rmf=
True,
98 initial_rmf_name_suffix=
"initial",
99 stat_file_name_suffix=
"stat",
100 best_pdb_name_suffix=
"model",
103 do_create_directories=
True,
104 global_output_directory=
"./",
106 best_pdb_dir=
"pdbs/",
107 replica_stat_file_suffix=
"stat_replica",
108 em_object_for_rmf=
None,
110 replica_exchange_object=
None,
114 @param model The IMP model
115 @param root_hier Top-level (System)hierarchy
116 @param monte_carlo_sample_objects Objects for MC sampling, which
117 should generally be a simple list of Mover objects, e.g.
118 from DegreesOfFreedom.get_movers().
119 @param molecular_dynamics_sample_objects Objects for MD sampling,
120 which should generally be a simple list of particles.
121 @param output_objects A list of structural objects and restraints
122 that will be included in output (ie, statistics "stat"
123 files). Any object that provides a get_output() method
124 can be used here. If None is passed
125 the macro will not write stat files.
126 @param rmf_output_objects A list of structural objects and
127 restraints that will be included in rmf. Any object
128 that provides a get_output() method can be used here.
129 @param monte_carlo_temperature MC temp (may need to be optimized
130 based on post-sampling analysis)
131 @param simulated_annealing If True, perform simulated annealing
132 @param simulated_annealing_minimum_temperature Should generally be
133 the same as monte_carlo_temperature.
134 @param simulated_annealing_minimum_temperature_nframes Number of
135 frames to compute at minimum temperature.
136 @param simulated_annealing_maximum_temperature_nframes Number of
138 temps > simulated_annealing_maximum_temperature.
139 @param replica_exchange_minimum_temperature Low temp for REX; should
140 generally be the same as monte_carlo_temperature.
141 @param replica_exchange_maximum_temperature High temp for REX
142 @param replica_exchange_swap Boolean, enable disable temperature
144 @param num_sample_rounds Number of rounds of MC/MD per cycle
145 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146 models to keep around for analysis.
147 @param mmcif If True, write best scoring models in mmCIF format;
148 if False (the default), write in legacy PDB format.
149 @param best_pdb_dir The directory under `global_output_directory`
150 where best-scoring PDB/mmCIF files are written.
151 @param best_pdb_name_suffix Part of the file name for best-scoring
153 @param monte_carlo_steps Number of MC steps per round
154 @param self_adaptive self adaptive scheme for Monte Carlo movers
155 @param molecular_dynamics_steps Number of MD steps per round
156 @param molecular_dynamics_max_time_step Max time step for MD
157 @param number_of_frames Number of REX frames to run
158 @param save_coordinates_mode string: how to save coordinates.
159 "lowest_temperature" (default) only the lowest temperatures
161 "25th_score" all replicas whose score is below the 25th
163 "50th_score" all replicas whose score is below the 50th
165 "75th_score" all replicas whose score is below the 75th
167 @param nframes_write_coordinates How often to write the coordinates
169 @param write_initial_rmf Write the initial configuration
170 @param global_output_directory Folder that will be created to house
172 @param test_mode Set to True to avoid writing any files, just test
174 @param score_moved If True, attempt to speed up Monte Carlo
175 sampling by caching scoring function terms on particles
182 self.output_objects = output_objects
183 self.rmf_output_objects = rmf_output_objects
185 and root_hier.get_name() ==
'System'):
186 if self.output_objects
is not None:
187 self.output_objects.append(
189 if self.rmf_output_objects
is not None:
190 self.rmf_output_objects.append(
192 self.root_hier = root_hier
193 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
194 self.vars[
"number_of_states"] = len(states)
196 self.root_hiers = states
197 self.is_multi_state =
True
199 self.root_hier = root_hier
200 self.is_multi_state =
False
202 raise TypeError(
"Must provide System hierarchy (root_hier)")
204 self._rmf_restraints = _RMFRestraints(model,
None)
205 self.em_object_for_rmf = em_object_for_rmf
206 self.monte_carlo_sample_objects = monte_carlo_sample_objects
207 self.vars[
"self_adaptive"] = self_adaptive
208 self.molecular_dynamics_sample_objects = \
209 molecular_dynamics_sample_objects
210 self.replica_exchange_object = replica_exchange_object
211 self.molecular_dynamics_max_time_step = \
212 molecular_dynamics_max_time_step
213 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
214 self.vars[
"replica_exchange_minimum_temperature"] = \
215 replica_exchange_minimum_temperature
216 self.vars[
"replica_exchange_maximum_temperature"] = \
217 replica_exchange_maximum_temperature
218 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
219 self.vars[
"simulated_annealing"] = simulated_annealing
220 self.vars[
"simulated_annealing_minimum_temperature"] = \
221 simulated_annealing_minimum_temperature
222 self.vars[
"simulated_annealing_maximum_temperature"] = \
223 simulated_annealing_maximum_temperature
224 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
225 simulated_annealing_minimum_temperature_nframes
226 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
227 simulated_annealing_maximum_temperature_nframes
229 self.vars[
"num_sample_rounds"] = num_sample_rounds
231 "number_of_best_scoring_models"] = number_of_best_scoring_models
232 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
233 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
234 self.vars[
"number_of_frames"] = number_of_frames
235 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
236 "50th_score",
"75th_score"):
237 raise Exception(
"save_coordinates_mode has unrecognized value")
239 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
240 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
241 self.vars[
"write_initial_rmf"] = write_initial_rmf
242 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
243 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
244 self.vars[
"mmcif"] = mmcif
245 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
246 self.vars[
"do_clean_first"] = do_clean_first
247 self.vars[
"do_create_directories"] = do_create_directories
248 self.vars[
"global_output_directory"] = global_output_directory
249 self.vars[
"rmf_dir"] = rmf_dir
250 self.vars[
"best_pdb_dir"] = best_pdb_dir
251 self.vars[
"atomistic"] = atomistic
252 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
253 self.vars[
"geometries"] =
None
254 self.test_mode = test_mode
255 self.score_moved = score_moved
258 if self.vars[
"geometries"]
is None:
259 self.vars[
"geometries"] = list(geometries)
261 self.vars[
"geometries"].extend(geometries)
264 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
265 "rmfs/*.rmf3 for each replica ")
266 print(
"--- it stores the best scoring pdb models in pdbs/")
267 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
268 "lowest temperature")
269 print(
"--- variables:")
270 keys = list(self.vars.keys())
273 print(
"------", v.ljust(30), self.vars[v])
275 def get_replica_exchange_object(self):
276 return self.replica_exchange_object
278 def _add_provenance(self, sampler_md, sampler_mc):
279 """Record details about the sampling in the IMP Hierarchies"""
282 method =
"Molecular Dynamics"
283 iterations += self.vars[
"molecular_dynamics_steps"]
285 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
286 iterations += self.vars[
"monte_carlo_steps"]
288 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
290 iterations *= self.vars[
"num_sample_rounds"]
292 pi = self.model.add_particle(
"sampling")
294 self.model, pi, method, self.vars[
"number_of_frames"],
296 p.set_number_of_replicas(
297 self.replica_exchange_object.get_number_of_replicas())
298 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
301 def execute_macro(self):
302 temp_index_factor = 100000.0
306 if self.monte_carlo_sample_objects
is not None:
307 print(
"Setting up MonteCarlo")
309 self.model, self.monte_carlo_sample_objects,
310 self.vars[
"monte_carlo_temperature"],
311 score_moved=self.score_moved)
312 if self.vars[
"simulated_annealing"]:
313 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
314 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
316 "simulated_annealing_minimum_temperature_nframes"]
318 "simulated_annealing_maximum_temperature_nframes"]
319 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
320 if self.vars[
"self_adaptive"]:
321 sampler_mc.set_self_adaptive(
322 isselfadaptive=self.vars[
"self_adaptive"])
323 if self.output_objects
is not None:
324 self.output_objects.append(sampler_mc)
325 if self.rmf_output_objects
is not None:
326 self.rmf_output_objects.append(sampler_mc)
327 samplers.append(sampler_mc)
329 if self.molecular_dynamics_sample_objects
is not None:
330 print(
"Setting up MolecularDynamics")
332 self.model, self.molecular_dynamics_sample_objects,
333 self.vars[
"monte_carlo_temperature"],
334 maximum_time_step=self.molecular_dynamics_max_time_step)
335 if self.vars[
"simulated_annealing"]:
336 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
337 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
339 "simulated_annealing_minimum_temperature_nframes"]
341 "simulated_annealing_maximum_temperature_nframes"]
342 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
343 if self.output_objects
is not None:
344 self.output_objects.append(sampler_md)
345 if self.rmf_output_objects
is not None:
346 self.rmf_output_objects.append(sampler_md)
347 samplers.append(sampler_md)
350 print(
"Setting up ReplicaExchange")
352 self.model, self.vars[
"replica_exchange_minimum_temperature"],
353 self.vars[
"replica_exchange_maximum_temperature"], samplers,
354 replica_exchange_object=self.replica_exchange_object)
355 self.replica_exchange_object = rex.rem
357 myindex = rex.get_my_index()
358 if self.output_objects
is not None:
359 self.output_objects.append(rex)
360 if self.rmf_output_objects
is not None:
361 self.rmf_output_objects.append(rex)
365 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
369 globaldir = self.vars[
"global_output_directory"] +
"/"
370 rmf_dir = globaldir + self.vars[
"rmf_dir"]
371 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
373 if not self.test_mode:
374 if self.vars[
"do_clean_first"]:
377 if self.vars[
"do_create_directories"]:
380 os.makedirs(globaldir)
388 if not self.is_multi_state:
394 for n
in range(self.vars[
"number_of_states"]):
396 os.makedirs(pdb_dir +
"/" + str(n))
403 if self.output_objects
is not None:
404 self.output_objects.append(sw)
405 if self.rmf_output_objects
is not None:
406 self.rmf_output_objects.append(sw)
408 print(
"Setting up stat file")
410 low_temp_stat_file = globaldir + \
411 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
414 if not self.test_mode:
417 if not self.test_mode:
418 if self.output_objects
is not None:
419 output.init_stat2(low_temp_stat_file,
421 extralabels=[
"rmf_file",
"rmf_frame_index"])
423 print(
"Stat file writing is disabled")
425 if self.rmf_output_objects
is not None:
426 print(
"Stat info being written in the rmf file")
428 print(
"Setting up replica stat file")
429 replica_stat_file = globaldir + \
430 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
431 if not self.test_mode:
432 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
434 if not self.test_mode:
435 print(
"Setting up best pdb files")
436 if not self.is_multi_state:
437 if self.vars[
"number_of_best_scoring_models"] > 0:
438 output.init_pdb_best_scoring(
439 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
441 self.vars[
"number_of_best_scoring_models"],
442 replica_exchange=
True,
443 mmcif=self.vars[
'mmcif'],
444 best_score_file=globaldir +
"best.scores.rex.py")
445 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
447 pdb_dir +
"/" +
"model.psf",
449 self.vars[
"best_pdb_name_suffix"] + pdbext)
451 if self.vars[
"number_of_best_scoring_models"] > 0:
452 for n
in range(self.vars[
"number_of_states"]):
453 output.init_pdb_best_scoring(
454 pdb_dir +
"/" + str(n) +
"/" +
455 self.vars[
"best_pdb_name_suffix"],
457 self.vars[
"number_of_best_scoring_models"],
458 replica_exchange=
True,
459 mmcif=self.vars[
'mmcif'],
460 best_score_file=globaldir +
"best.scores.rex.py")
461 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
463 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
464 pdb_dir +
"/" + str(n) +
"/" +
465 self.vars[
"best_pdb_name_suffix"] + pdbext)
468 if self.em_object_for_rmf
is not None:
469 output_hierarchies = [
471 self.em_object_for_rmf.get_density_as_hierarchy(
474 output_hierarchies = [self.root_hier]
476 if not self.test_mode:
477 print(
"Setting up and writing initial rmf coordinate file")
478 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
479 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
481 listofobjects=self.rmf_output_objects)
482 if self._rmf_restraints:
483 output.add_restraints_to_rmf(
484 init_suffix +
"." + str(myindex) +
".rmf3",
485 self._rmf_restraints)
486 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
487 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
489 if not self.test_mode:
490 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
492 mpivs = _MockMPIValues()
494 self._add_provenance(sampler_md, sampler_mc)
496 if not self.test_mode:
497 print(
"Setting up production rmf files")
498 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
499 output.init_rmf(rmfname, output_hierarchies,
500 geometries=self.vars[
"geometries"],
501 listofobjects=self.rmf_output_objects)
503 if self._rmf_restraints:
504 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
506 ntimes_at_low_temp = 0
510 self.replica_exchange_object.set_was_used(
True)
511 nframes = self.vars[
"number_of_frames"]
514 for i
in range(nframes):
518 for nr
in range(self.vars[
"num_sample_rounds"]):
519 if sampler_md
is not None:
521 self.vars[
"molecular_dynamics_steps"])
522 if sampler_mc
is not None:
523 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
525 self.model).evaluate(
False)
526 mpivs.set_value(
"score", score)
527 output.set_output_entry(
"score", score)
529 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
531 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
532 save_frame = (min_temp_index == my_temp_index)
533 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
534 score_perc = mpivs.get_percentile(
"score")
535 save_frame = (score_perc*100.0 <= 25.0)
536 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
537 score_perc = mpivs.get_percentile(
"score")
538 save_frame = (score_perc*100.0 <= 50.0)
539 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
540 score_perc = mpivs.get_percentile(
"score")
541 save_frame = (score_perc*100.0 <= 75.0)
544 if save_frame
and not self.test_mode:
548 print(
"--- frame %s score %s " % (str(i), str(score)))
550 if not self.test_mode:
551 if i % self.vars[
"nframes_write_coordinates"] == 0:
552 print(
'--- writing coordinates')
553 if self.vars[
"number_of_best_scoring_models"] > 0:
554 output.write_pdb_best_scoring(score)
555 output.write_rmf(rmfname)
556 output.set_output_entry(
"rmf_file", rmfname)
557 output.set_output_entry(
"rmf_frame_index",
560 output.set_output_entry(
"rmf_file", rmfname)
561 output.set_output_entry(
"rmf_frame_index",
'-1')
562 if self.output_objects
is not None:
563 output.write_stat2(low_temp_stat_file)
564 ntimes_at_low_temp += 1
566 if not self.test_mode:
567 output.write_stat2(replica_stat_file)
568 if self.vars[
"replica_exchange_swap"]:
569 rex.swap_temp(i, score)
570 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
571 p.add_replica_exchange(state, self)
573 if not self.test_mode:
574 print(
"closing production rmf files")
575 output.close_rmf(rmfname)
583 class BuildSystem(object):
584 """A macro to build a IMP::pmi::topology::System based on a
585 TopologyReader object.
587 Easily create multi-state systems by calling this macro
588 repeatedly with different TopologyReader objects!
589 A useful function is get_molecules() which returns the PMI Molecules
590 grouped by state as a dictionary with key = (molecule name),
591 value = IMP.pmi.topology.Molecule
592 Quick multi-state system:
595 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
596 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
597 bs = IMP.pmi.macros.BuildSystem(model)
598 bs.add_state(reader1)
599 bs.add_state(reader2)
600 bs.execute_macro() # build everything including degrees of freedom
601 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
602 ### now you have a two state system, you add restraints etc
604 @note The "domain name" entry of the topology reader is not used.
605 All molecules are set up by the component name, but split into rigid bodies
609 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
610 'RNA': IMP.pmi.alphabets.rna}
612 def __init__(self, model, sequence_connectivity_scale=4.0,
613 force_create_gmm_files=
False, resolutions=[1, 10]):
615 @param model An IMP Model
616 @param sequence_connectivity_scale For scaling the connectivity
618 @param force_create_gmm_files If True, will sample and create GMMs
619 no matter what. If False, will only sample if the
620 files don't exist. If number of Gaussians is zero, won't
622 @param resolutions The resolutions to build for structured regions
629 self._domain_res = []
631 self.force_create_gmm_files = force_create_gmm_files
632 self.resolutions = resolutions
634 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
636 """Add a state using the topology info in a
637 IMP::pmi::topology::TopologyReader object.
638 When you are done adding states, call execute_macro()
639 @param reader The TopologyReader object
640 @param fasta_name_map dictionary for converting protein names
641 found in the fasta file
642 @param chain_ids A list or string of chain IDs for assigning to
643 newly-created molecules, e.g.
644 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
645 If not specified, chain IDs A through Z are assigned, then
646 AA through AZ, then BA through BZ, and so on, in the same
649 state = self.system.create_state()
650 self._readers.append(reader)
652 these_domain_res = {}
654 if chain_ids
is None:
655 chain_ids = IMP.pmi.output._ChainIDs()
660 for molname
in reader.get_molecules():
661 copies = reader.get_molecules()[molname].domains
662 for nc, copyname
in enumerate(copies):
663 print(
"BuildSystem.add_state: setting up molecule %s copy "
664 "number %s" % (molname, str(nc)))
665 copy = copies[copyname]
668 all_chains = [c
for c
in copy
if c.chain
is not None]
670 chain_id = all_chains[0].chain
672 chain_id = chain_ids[numchain]
674 "No PDBs specified for %s, so keep_chain_id has "
675 "no effect; using default chain ID '%s'"
678 chain_id = chain_ids[numchain]
680 alphabet = IMP.pmi.alphabets.amino_acid
681 fasta_flag = copy[0].fasta_flag
682 if fasta_flag
in self._alphabets:
683 alphabet = self._alphabets[fasta_flag]
685 copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
686 print(
"BuildSystem.add_state: molecule %s sequence has "
687 "%s residues" % (molname, len(seq)))
688 orig_mol = state.create_molecule(molname, seq, chain_id,
693 print(
"BuildSystem.add_state: creating a copy for "
694 "molecule %s" % molname)
695 mol = orig_mol.create_copy(chain_id)
698 for domainnumber, domain
in enumerate(copy):
699 print(
"BuildSystem.add_state: ---- setting up domain %s "
700 "of molecule %s" % (domainnumber, molname))
703 these_domains[domain.get_unique_name()] = domain
704 if domain.residue_range == []
or \
705 domain.residue_range
is None:
706 domain_res = mol.get_residues()
708 start = domain.residue_range[0]+domain.pdb_offset
709 if domain.residue_range[1] ==
'END':
710 end = len(mol.sequence)
712 end = domain.residue_range[1]+domain.pdb_offset
713 domain_res = mol.residue_range(start-1, end-1)
714 print(
"BuildSystem.add_state: -------- domain %s of "
715 "molecule %s extends from residue %s to "
717 % (domainnumber, molname, start, end))
718 if domain.pdb_file ==
"BEADS":
719 print(
"BuildSystem.add_state: -------- domain %s of "
720 "molecule %s represented by BEADS "
721 % (domainnumber, molname))
722 mol.add_representation(
724 resolutions=[domain.bead_size],
725 setup_particles_as_densities=(
726 domain.em_residues_per_gaussian != 0),
728 these_domain_res[domain.get_unique_name()] = \
730 elif domain.pdb_file ==
"IDEAL_HELIX":
731 print(
"BuildSystem.add_state: -------- domain %s of "
732 "molecule %s represented by IDEAL_HELIX "
733 % (domainnumber, molname))
734 emper = domain.em_residues_per_gaussian
735 mol.add_representation(
737 resolutions=self.resolutions,
739 density_residues_per_component=emper,
740 density_prefix=domain.density_prefix,
741 density_force_compute=self.force_create_gmm_files,
743 these_domain_res[domain.get_unique_name()] = \
746 print(
"BuildSystem.add_state: -------- domain %s of "
747 "molecule %s represented by pdb file %s "
748 % (domainnumber, molname, domain.pdb_file))
749 domain_atomic = mol.add_structure(domain.pdb_file,
751 domain.residue_range,
754 domain_non_atomic = domain_res - domain_atomic
755 if not domain.em_residues_per_gaussian:
756 mol.add_representation(
757 domain_atomic, resolutions=self.resolutions,
759 if len(domain_non_atomic) > 0:
760 mol.add_representation(
762 resolutions=[domain.bead_size],
765 print(
"BuildSystem.add_state: -------- domain %s "
766 "of molecule %s represented by gaussians "
767 % (domainnumber, molname))
768 emper = domain.em_residues_per_gaussian
769 creategmm = self.force_create_gmm_files
770 mol.add_representation(
772 resolutions=self.resolutions,
773 density_residues_per_component=emper,
774 density_prefix=domain.density_prefix,
775 density_force_compute=creategmm,
777 if len(domain_non_atomic) > 0:
778 mol.add_representation(
780 resolutions=[domain.bead_size],
781 setup_particles_as_densities=
True,
783 these_domain_res[domain.get_unique_name()] = (
784 domain_atomic, domain_non_atomic)
785 self._domain_res.append(these_domain_res)
786 self._domains.append(these_domains)
787 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
790 """Return list of all molecules grouped by state.
791 For each state, it's a dictionary of Molecules where key is the
794 return [s.get_molecules()
for s
in self.system.get_states()]
796 def get_molecule(self, molname, copy_index=0, state_index=0):
797 return self.system.get_states()[state_index].
get_molecules()[
801 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
802 """Builds representations and sets up degrees of freedom"""
803 print(
"BuildSystem.execute_macro: building representations")
804 self.root_hier = self.system.build()
806 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
808 for nstate, reader
in enumerate(self._readers):
809 rbs = reader.get_rigid_bodies()
810 srbs = reader.get_super_rigid_bodies()
811 csrbs = reader.get_chains_of_super_rigid_bodies()
814 domains_in_rbs = set()
816 print(
"BuildSystem.execute_macro: -------- building rigid "
817 "body %s" % (str(rblist)))
818 all_res = IMP.pmi.tools.OrderedSet()
819 bead_res = IMP.pmi.tools.OrderedSet()
821 domain = self._domains[nstate][dname]
822 print(
"BuildSystem.execute_macro: -------- adding %s"
824 all_res |= self._domain_res[nstate][dname][0]
825 bead_res |= self._domain_res[nstate][dname][1]
826 domains_in_rbs.add(dname)
828 print(
"BuildSystem.execute_macro: -------- creating rigid "
829 "body with max_trans %s max_rot %s "
830 "non_rigid_max_trans %s"
831 % (str(max_rb_trans), str(max_rb_rot),
832 str(max_bead_trans)))
833 self.dof.create_rigid_body(all_res,
834 nonrigid_parts=bead_res,
835 max_trans=max_rb_trans,
837 nonrigid_max_trans=max_bead_trans,
838 name=
"RigidBody %s" % dname)
841 for dname, domain
in self._domains[nstate].items():
842 if dname
not in domains_in_rbs:
843 if domain.pdb_file !=
"BEADS":
845 "No rigid bodies set for %s. Residues read from "
846 "the PDB file will not be sampled - only regions "
847 "missing from the PDB will be treated flexibly. "
848 "To sample the entire sequence, use BEADS instead "
849 "of a PDB file name" % dname,
851 self.dof.create_flexible_beads(
852 self._domain_res[nstate][dname][1],
853 max_trans=max_bead_trans)
857 print(
"BuildSystem.execute_macro: -------- building "
858 "super rigid body %s" % (str(srblist)))
859 all_res = IMP.pmi.tools.OrderedSet()
860 for dname
in srblist:
861 print(
"BuildSystem.execute_macro: -------- adding %s"
863 all_res |= self._domain_res[nstate][dname][0]
864 all_res |= self._domain_res[nstate][dname][1]
866 print(
"BuildSystem.execute_macro: -------- creating super "
867 "rigid body with max_trans %s max_rot %s "
868 % (str(max_srb_trans), str(max_srb_rot)))
869 self.dof.create_super_rigid_body(
870 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
873 for csrblist
in csrbs:
874 all_res = IMP.pmi.tools.OrderedSet()
875 for dname
in csrblist:
876 all_res |= self._domain_res[nstate][dname][0]
877 all_res |= self._domain_res[nstate][dname][1]
878 all_res = list(all_res)
879 all_res.sort(key=
lambda r: r.get_index())
880 self.dof.create_main_chain_mover(all_res)
881 return self.root_hier, self.dof
886 """A macro for running all the basic operations of analysis.
887 Includes clustering, precision analysis, and making ensemble density maps.
888 A number of plots are also supported.
891 merge_directories=[
"./"],
892 stat_file_name_suffix=
"stat",
893 best_pdb_name_suffix=
"model",
895 do_create_directories=
True,
896 global_output_directory=
"output/",
897 replica_stat_file_suffix=
"stat_replica",
898 global_analysis_result_directory=
"./analysis/",
901 @param model The IMP model
902 @param stat_file_name_suffix
903 @param merge_directories The directories containing output files
904 @param best_pdb_name_suffix
905 @param do_clean_first
906 @param do_create_directories
907 @param global_output_directory Where everything is
908 @param replica_stat_file_suffix
909 @param global_analysis_result_directory
910 @param test_mode If True, nothing is changed on disk
914 from mpi4py
import MPI
915 self.comm = MPI.COMM_WORLD
916 self.rank = self.comm.Get_rank()
917 self.number_of_processes = self.comm.size
920 self.number_of_processes = 1
922 self.test_mode = test_mode
923 self._protocol_output = []
924 self.cluster_obj =
None
926 stat_dir = global_output_directory
929 for rd
in merge_directories:
930 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
931 if len(stat_files) == 0:
932 warnings.warn(
"no stat files found in %s"
933 % os.path.join(rd, stat_dir),
935 self.stat_files += stat_files
938 """Capture details of the modeling protocol.
939 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
942 self._protocol_output.append((p, p._last_state))
945 score_key=
"Total_Score",
946 rmf_file_key=
"rmf_file",
947 rmf_file_frame_key=
"rmf_frame_index",
950 nframes_trajectory=10000):
951 """ Get a trajectory of the modeling run, for generating
954 @param score_key The score for ranking models
955 @param rmf_file_key Key pointing to RMF filename
956 @param rmf_file_frame_key Key pointing to RMF frame number
957 @param outputdir The local output directory used in the run
958 @param get_every Extract every nth frame
959 @param nframes_trajectory Total number of frames of the trajectory
964 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
966 score_list = list(map(float, trajectory_models[2]))
968 max_score = max(score_list)
969 min_score = min(score_list)
971 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
972 for i
in range(nframes_trajectory)]
973 binned_scores = [
None]*nframes_trajectory
974 binned_model_indexes = [-1]*nframes_trajectory
976 for model_index, s
in enumerate(score_list):
977 bins_score_diffs = [abs(s-b)
for b
in bins]
978 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
979 if binned_scores[bin_index]
is None:
980 binned_scores[bin_index] = s
981 binned_model_indexes[bin_index] = model_index
983 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
984 new_diff = abs(s-bins[bin_index])
985 if new_diff < old_diff:
986 binned_scores[bin_index] = s
987 binned_model_indexes[bin_index] = model_index
990 print(binned_model_indexes)
992 def _expand_ambiguity(self, prot, d):
993 """If using PMI2, expand the dictionary to include copies as
996 This also keeps the states separate.
1001 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1004 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1005 if isinstance(val, tuple):
1013 for nst
in range(len(states)):
1015 copies = sel.get_selected_particles(with_representation=
False)
1017 for nc
in range(len(copies)):
1019 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1020 (start, stop, name, nc, nst)
1022 newdict[
'%s..%i' % (name, nc)] = \
1023 (start, stop, name, nc, nst)
1029 score_key=
"Total_Score",
1030 rmf_file_key=
"rmf_file",
1031 rmf_file_frame_key=
"rmf_frame_index",
1033 prefiltervalue=
None,
1036 alignment_components=
None,
1037 number_of_best_scoring_models=10,
1038 rmsd_calculation_components=
None,
1039 distance_matrix_file=
'distances.mat',
1040 load_distance_matrix_file=
False,
1041 skip_clustering=
False,
1042 number_of_clusters=1,
1044 exit_after_display=
True,
1046 first_and_last_frames=
None,
1047 density_custom_ranges=
None,
1048 write_pdb_with_centered_coordinates=
False,
1050 """Get the best scoring models, compute a distance matrix,
1051 cluster them, and create density maps.
1053 Tuple format: "molname" just the molecule,
1054 or (start,stop,molname,copy_num(optional),state_num(optional)
1055 Can pass None for copy or state to ignore that field.
1056 If you don't pass a specific copy number
1058 @param score_key The score for ranking models.
1059 @param rmf_file_key Key pointing to RMF filename
1060 @param rmf_file_frame_key Key pointing to RMF frame number
1061 @param state_number State number to analyze
1062 @param prefiltervalue Only include frames where the
1063 score key is below this value
1064 @param feature_keys Keywords for which you want to
1065 calculate average, medians, etc.
1066 If you pass "Keyname" it'll include everything that matches
1068 @param outputdir The local output directory used in
1070 @param alignment_components Dictionary with keys=groupname,
1071 values are tuples for aligning the structures
1072 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1073 @param number_of_best_scoring_models Num models to keep per run
1074 @param rmsd_calculation_components For calculating RMSD
1075 (same format as alignment_components)
1076 @param distance_matrix_file Where to store/read the
1078 @param load_distance_matrix_file Try to load the distance
1080 @param skip_clustering Just extract the best scoring
1081 models and save the pdbs
1082 @param number_of_clusters Number of k-means clusters
1083 @param display_plot Display the distance matrix
1084 @param exit_after_display Exit after displaying distance
1086 @param get_every Extract every nth frame
1087 @param first_and_last_frames A tuple with the first and last
1088 frames to be analyzed. Values are percentages!
1089 Default: get all frames
1090 @param density_custom_ranges For density calculation
1091 (same format as alignment_components)
1092 @param write_pdb_with_centered_coordinates
1093 @param voxel_size Used for the density output
1097 self._outputdir = Path(outputdir).absolute()
1098 self._number_of_clusters = number_of_clusters
1099 for p, state
in self._protocol_output:
1100 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1111 if not load_distance_matrix_file:
1112 if len(self.stat_files) == 0:
1113 print(
"ERROR: no stat file found in the given path")
1115 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1116 self.stat_files, self.number_of_processes)[self.rank]
1119 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1120 if k
in feature_keys:
1122 "no need to pass " + k +
" to feature_keys.",
1124 feature_keys.remove(k)
1127 my_stat_files, score_key, feature_keys, rmf_file_key,
1128 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1129 rmf_file_list = best_models[0]
1130 rmf_file_frame_list = best_models[1]
1131 score_list = best_models[2]
1132 feature_keyword_list_dict = best_models[3]
1138 if self.number_of_processes > 1:
1142 rmf_file_frame_list)
1143 for k
in feature_keyword_list_dict:
1144 feature_keyword_list_dict[k] = \
1146 feature_keyword_list_dict[k])
1149 score_rmf_tuples = list(zip(score_list,
1151 rmf_file_frame_list,
1152 list(range(len(score_list)))))
1154 if density_custom_ranges:
1155 for k
in density_custom_ranges:
1156 if not isinstance(density_custom_ranges[k], list):
1157 raise Exception(
"Density custom ranges: values must "
1158 "be lists of tuples")
1161 if first_and_last_frames
is not None:
1162 nframes = len(score_rmf_tuples)
1163 first_frame = int(first_and_last_frames[0] * nframes)
1164 last_frame = int(first_and_last_frames[1] * nframes)
1165 if last_frame > len(score_rmf_tuples):
1167 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1170 best_score_rmf_tuples = sorted(
1172 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1173 best_score_rmf_tuples = [t+(n,)
for n, t
in
1174 enumerate(best_score_rmf_tuples)]
1176 prov.append(IMP.pmi.io.FilterProvenance(
1177 "Best scoring", 0, number_of_best_scoring_models))
1179 best_score_feature_keyword_list_dict = defaultdict(list)
1180 for tpl
in best_score_rmf_tuples:
1182 for f
in feature_keyword_list_dict:
1183 best_score_feature_keyword_list_dict[f].append(
1184 feature_keyword_list_dict[f][index])
1185 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1186 best_score_rmf_tuples,
1187 self.number_of_processes)[self.rank]
1190 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1191 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1193 if rmsd_calculation_components
is not None:
1194 tmp = self._expand_ambiguity(
1195 prot_ahead, rmsd_calculation_components)
1196 if tmp != rmsd_calculation_components:
1197 print(
'Detected ambiguity, expand rmsd components to',
1199 rmsd_calculation_components = tmp
1200 if alignment_components
is not None:
1201 tmp = self._expand_ambiguity(prot_ahead,
1202 alignment_components)
1203 if tmp != alignment_components:
1204 print(
'Detected ambiguity, expand alignment '
1205 'components to', tmp)
1206 alignment_components = tmp
1212 self.model, my_best_score_rmf_tuples[0],
1213 rmsd_calculation_components, state_number=state_number)
1215 self.model, my_best_score_rmf_tuples, alignment_components,
1216 rmsd_calculation_components, state_number=state_number)
1224 all_coordinates = got_coords[0]
1227 alignment_coordinates = got_coords[1]
1230 rmsd_coordinates = got_coords[2]
1233 rmf_file_name_index_dict = got_coords[3]
1236 all_rmf_file_names = got_coords[4]
1242 if density_custom_ranges:
1244 density_custom_ranges, voxel=voxel_size)
1246 dircluster = os.path.join(outputdir,
1247 "all_models."+str(self.rank))
1253 os.mkdir(dircluster)
1256 clusstat = open(os.path.join(
1257 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1258 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1260 rmf_frame_number = tpl[2]
1263 for key
in best_score_feature_keyword_list_dict:
1265 best_score_feature_keyword_list_dict[key][index]
1269 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1270 self.model, rmf_frame_number, rmf_name)
1272 linking_successful = \
1273 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1274 self.model, prots, rs, rmf_frame_number,
1276 if not linking_successful:
1283 states = IMP.atom.get_by_type(
1284 prots[0], IMP.atom.STATE_TYPE)
1285 prot = states[state_number]
1287 prot = prots[state_number]
1292 coords_f1 = alignment_coordinates[cnt]
1294 coords_f2 = alignment_coordinates[cnt]
1297 coords_f1, coords_f2)
1298 transformation = Ali.align()[1]
1312 rb = rbm.get_rigid_body()
1322 out_pdb_fn = os.path.join(
1323 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1324 out_rmf_fn = os.path.join(
1325 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1326 o.init_pdb(out_pdb_fn, prot)
1327 tc = write_pdb_with_centered_coordinates
1328 o.write_pdb(out_pdb_fn,
1329 translate_to_geometric_center=tc)
1331 tmp_dict[
"local_pdb_file_name"] = \
1332 os.path.basename(out_pdb_fn)
1333 tmp_dict[
"rmf_file_full_path"] = rmf_name
1334 tmp_dict[
"local_rmf_file_name"] = \
1335 os.path.basename(out_rmf_fn)
1336 tmp_dict[
"local_rmf_frame_number"] = 0
1338 clusstat.write(str(tmp_dict)+
"\n")
1344 h.set_name(
"System")
1346 o.init_rmf(out_rmf_fn, [h], rs)
1348 o.init_rmf(out_rmf_fn, [prot], rs)
1350 o.write_rmf(out_rmf_fn)
1351 o.close_rmf(out_rmf_fn)
1353 if density_custom_ranges:
1354 DensModule.add_subunits_density(prot)
1356 if density_custom_ranges:
1357 DensModule.write_mrc(path=dircluster)
1362 if self.number_of_processes > 1:
1368 rmf_file_name_index_dict)
1370 alignment_coordinates)
1377 [best_score_feature_keyword_list_dict,
1378 rmf_file_name_index_dict],
1384 print(
"setup clustering class")
1387 for n, model_coordinate_dict
in enumerate(all_coordinates):
1389 if (alignment_components
is not None
1390 and len(self.cluster_obj.all_coords) == 0):
1392 self.cluster_obj.set_template(alignment_coordinates[n])
1393 self.cluster_obj.fill(all_rmf_file_names[n],
1394 rmsd_coordinates[n])
1395 print(
"Global calculating the distance matrix")
1398 self.cluster_obj.dist_matrix()
1402 self.cluster_obj.do_cluster(number_of_clusters)
1405 self.cluster_obj.plot_matrix(
1406 figurename=os.path.join(outputdir,
1408 if exit_after_display:
1410 self.cluster_obj.save_distance_matrix_file(
1411 file_name=distance_matrix_file)
1418 print(
"setup clustering class")
1420 self.cluster_obj.load_distance_matrix_file(
1421 file_name=distance_matrix_file)
1422 print(
"clustering with %s clusters" % str(number_of_clusters))
1423 self.cluster_obj.do_cluster(number_of_clusters)
1424 [best_score_feature_keyword_list_dict,
1425 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1428 self.cluster_obj.plot_matrix(figurename=os.path.join(
1429 outputdir,
'dist_matrix.pdf'))
1430 if exit_after_display:
1432 if self.number_of_processes > 1:
1440 print(self.cluster_obj.get_cluster_labels())
1441 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1442 print(
"rank %s " % str(self.rank))
1443 print(
"cluster %s " % str(n))
1444 print(
"cluster label %s " % str(cl))
1445 print(self.cluster_obj.get_cluster_label_names(cl))
1447 len(self.cluster_obj.get_cluster_label_names(cl))
1449 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1452 if density_custom_ranges:
1454 density_custom_ranges,
1457 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1459 os.mkdir(dircluster)
1465 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1466 clusstat = open(dircluster +
"stat.out",
"w")
1467 for k, structure_name
in enumerate(
1468 self.cluster_obj.get_cluster_label_names(cl)):
1471 tmp_dict.update(rmsd_dict)
1472 index = rmf_file_name_index_dict[structure_name]
1473 for key
in best_score_feature_keyword_list_dict:
1475 key] = best_score_feature_keyword_list_dict[
1481 rmf_name = structure_name.split(
"|")[0]
1482 rmf_frame_number = int(structure_name.split(
"|")[1])
1483 clusstat.write(str(tmp_dict) +
"\n")
1488 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1489 self.model, rmf_frame_number, rmf_name)
1491 linking_successful = \
1492 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1493 self.model, prots, rs, rmf_frame_number,
1495 if not linking_successful:
1501 states = IMP.atom.get_by_type(
1502 prots[0], IMP.atom.STATE_TYPE)
1503 prot = states[state_number]
1505 prot = prots[state_number]
1511 co = self.cluster_obj
1512 model_index = co.get_model_index_from_name(
1514 transformation = co.get_transformation_to_first_member(
1525 rb = rbm.get_rigid_body()
1534 if density_custom_ranges:
1535 DensModule.add_subunits_density(prot)
1540 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1541 o.write_pdb(dircluster + str(k) +
".pdb")
1547 h.set_name(
"System")
1549 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1551 o.init_rmf(dircluster + str(k) +
".rmf3", [prot], rs)
1552 o.write_rmf(dircluster + str(k) +
".rmf3")
1553 o.close_rmf(dircluster + str(k) +
".rmf3")
1558 if density_custom_ranges:
1559 DensModule.write_mrc(path=dircluster)
1562 if self.number_of_processes > 1:
1565 def get_cluster_rmsd(self, cluster_num):
1566 if self.cluster_obj
is None:
1568 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1570 def save_objects(self, objects, file_name):
1572 outf = open(file_name,
'wb')
1573 pickle.dump(objects, outf)
1576 def load_objects(self, file_name):
1578 inputf = open(file_name,
'rb')
1579 objects = pickle.load(inputf)
1587 This class contains analysis utilities to investigate ReplicaExchange
1595 def __init__(self, model, stat_files, best_models=None, score_key=None,
1598 Construction of the Class.
1599 @param model IMP.Model()
1600 @param stat_files list of string. Can be ascii stat files,
1602 @param best_models Integer. Number of best scoring models,
1603 if None: all models will be read
1604 @param alignment boolean (Default=True). Align before computing
1609 self.best_models = best_models
1611 model, stat_files, self.best_models, score_key, cache=
True)
1613 StatHierarchyHandler=self.stath0)
1626 self.clusters.append(c)
1627 for n0
in range(len(self.stath0)):
1629 self.pairwise_rmsd = {}
1630 self.pairwise_molecular_assignment = {}
1631 self.alignment = alignment
1632 self.symmetric_molecules = {}
1633 self.issymmetricsel = {}
1635 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1637 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1642 Setup the selection onto which the rmsd is computed
1643 @param kwargs use IMP.atom.Selection keywords
1651 Store names of symmetric molecules
1653 self.symmetric_molecules[molecule_name] = 0
1658 Setup the selection onto which the alignment is computed
1659 @param kwargs use IMP.atom.Selection keywords
1667 def clean_clusters(self):
1668 for c
in self.clusters:
1672 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1674 Cluster the models based on RMSD.
1675 @param rmsd_cutoff Float the distance cutoff in Angstrom
1676 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1677 be used to compute rmsds
1679 self.clean_clusters()
1680 not_clustered = set(range(len(self.stath1)))
1681 while len(not_clustered) > 0:
1682 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1687 Refine the clusters by merging the ones whose centers are close
1688 @param rmsd_cutoff cutoff distance in Angstorms
1690 clusters_copy = self.clusters
1691 for c0, c1
in itertools.combinations(self.clusters, 2):
1692 if c0.center_index
is None:
1694 if c1.center_index
is None:
1696 _ = self.stath0[c0.center_index]
1697 _ = self.stath1[c1.center_index]
1698 rmsd, molecular_assignment = self.
rmsd()
1699 if rmsd <= rmsd_cutoff:
1700 if c1
in self.clusters:
1701 clusters_copy.remove(c1)
1703 self.clusters = clusters_copy
1710 def set_cluster_assignments(self, cluster_ids):
1711 if len(cluster_ids) != len(self.stath0):
1712 raise ValueError(
'cluster ids has to be same length as '
1716 for i
in sorted(list(set(cluster_ids))):
1718 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1719 self.clusters[idx].add_member(i, d)
1723 Return the model data from a cluster
1724 @param cluster IMP.pmi.output.Cluster object
1733 Save the data for the whole models into a pickle file
1734 @param filename string
1736 self.stath0.save_data(filename)
1740 Set the data from an external IMP.pmi.output.Data
1741 @param data IMP.pmi.output.Data
1743 self.stath0.data = data
1744 self.stath1.data = data
1748 Load the data from an external pickled file
1749 @param filename string
1751 self.stath0.load_data(filename)
1752 self.stath1.load_data(filename)
1753 self.best_models = len(self.stath0)
1755 def add_cluster(self, rmf_name_list):
1757 print(
"creating cluster index "+str(len(self.clusters)))
1758 self.clusters.append(c)
1759 current_len = len(self.stath0)
1761 for rmf
in rmf_name_list:
1762 print(
"adding rmf "+rmf)
1763 self.stath0.add_stat_file(rmf)
1764 self.stath1.add_stat_file(rmf)
1766 for n0
in range(current_len, len(self.stath0)):
1767 d0 = self.stath0[n0]
1768 c.add_member(n0, d0)
1773 Save the clusters into a pickle file
1774 @param filename string
1777 import cPickle
as pickle
1780 fl = open(filename,
'wb')
1781 pickle.dump(self.clusters, fl)
1785 Load the clusters from a pickle file
1786 @param filename string
1787 @param append bool (Default=False), if True. append the clusters
1788 to the ones currently present
1791 import cPickle
as pickle
1794 fl = open(filename,
'rb')
1795 self.clean_clusters()
1797 self.clusters += pickle.load(fl)
1799 self.clusters = pickle.load(fl)
1808 Compute the cluster center for a given cluster
1810 member_distance = defaultdict(float)
1812 for n0, n1
in itertools.combinations(cluster.members, 2):
1815 rmsd, _ = self.
rmsd()
1816 member_distance[n0] += rmsd
1818 if len(member_distance) > 0:
1819 cluster.center_index = min(member_distance,
1820 key=member_distance.get)
1822 cluster.center_index = cluster.members[0]
1827 Save the coordinates of the current cluster a single rmf file
1829 print(
"saving coordinates", cluster)
1833 if rmf_name
is None:
1834 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1836 _ = self.stath1[cluster.members[0]]
1838 o.init_rmf(rmf_name, [self.stath1])
1839 for n1
in cluster.members:
1845 o.write_rmf(rmf_name)
1847 o.close_rmf(rmf_name)
1851 remove structures that are similar
1852 append it to a new cluster
1854 print(
"pruning models")
1856 filtered = [selected]
1857 remaining = range(1, len(self.stath1), 10)
1859 while len(remaining) > 0:
1860 d0 = self.stath0[selected]
1862 for n1
in remaining:
1867 if d <= rmsd_cutoff:
1869 print(
"pruning model %s, similar to model %s, rmsd %s"
1870 % (str(n1), str(selected), str(d)))
1871 remaining = [x
for x
in remaining
if x
not in rm]
1872 if len(remaining) == 0:
1874 selected = remaining[0]
1875 filtered.append(selected)
1878 self.clusters.append(c)
1880 d0 = self.stath0[n0]
1881 c.add_member(n0, d0)
1886 Compute the precision of a cluster
1892 if cluster.center_index
is not None:
1893 members1 = [cluster.center_index]
1895 members1 = cluster.members
1899 for n1
in cluster.members:
1904 tmp_rmsd, _ = self.
rmsd()
1909 precision = rmsd/npairs
1910 cluster.precision = precision
1915 Compute the bipartite precision (ie the cross-precision)
1916 between two clusters
1920 for cn0, n0
in enumerate(cluster1.members):
1922 for cn1, n1
in enumerate(cluster2.members):
1924 tmp_rmsd, _ = self.
rmsd()
1926 print(
"--- rmsd between structure %s and structure "
1927 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1930 precision = rmsd/npairs
1933 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1934 cluster_ref=
None, step=1):
1936 Compute the Root mean square fluctuations
1937 of a molecule in a cluster
1938 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1939 residue indexes and the value is the rmsf
1941 rmsf = IMP.pmi.tools.OrderedDict()
1944 if cluster_ref
is not None:
1945 if cluster_ref.center_index
is not None:
1946 members0 = [cluster_ref.center_index]
1948 members0 = cluster_ref.members
1950 if cluster.center_index
is not None:
1951 members0 = [cluster.center_index]
1953 members0 = cluster.members
1956 copy_index=copy_index, state_index=state_index)
1957 ps0 = s0.get_selected_particles()
1959 residue_indexes = list(IMP.pmi.tools.OrderedSet(
1965 d0 = self.stath0[n0]
1966 for n1
in cluster.members[::step]:
1968 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
1972 self.stath1, molecule=molecule,
1973 residue_indexes=residue_indexes, resolution=1,
1974 copy_index=copy_index, state_index=state_index)
1975 ps1 = s1.get_selected_particles()
1977 d1 = self.stath1[n1]
1980 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
1981 r = residue_indexes[n]
1993 for stath
in [self.stath0, self.stath1]:
1994 if molecule
not in self.symmetric_molecules:
1996 stath, molecule=molecule, residue_index=r,
1997 resolution=1, copy_index=copy_index,
1998 state_index=state_index)
2001 stath, molecule=molecule, residue_index=r,
2002 resolution=1, state_index=state_index)
2004 ps = s.get_selected_particles()
2013 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2014 reference=
"Absolute", prefix=
"./", step=1):
2020 for n1
in cluster.members[::step]:
2021 print(
"density "+str(n1))
2026 dens.add_subunits_density(self.stath1)
2028 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2031 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2032 consolidate=
False, molecules=
None, prefix=
'./',
2033 reference=
"Absolute"):
2037 import matplotlib.pyplot
as plt
2038 import matplotlib.cm
as cm
2039 from scipy.spatial.distance
import cdist
2041 if molecules
is None:
2050 molecules=molecules).get_selected_particles())]
2051 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2052 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2053 total_len_unique = sum(max(mol.get_residue_indexes())
2054 for mol
in unique_copies)
2061 seqlen = max(mol.get_residue_indexes())
2062 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2066 for mol
in unique_copies:
2067 seqlen = max(mol.get_residue_indexes())
2068 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2071 for ncl, n1
in enumerate(cluster.members):
2074 coord_dict = IMP.pmi.tools.OrderedDict()
2076 rindexes = mol.get_residue_indexes()
2077 coords = np.ones((max(rindexes), 3))
2078 for rnum
in rindexes:
2081 selpart = sel.get_selected_particles()
2082 if len(selpart) == 0:
2084 selpart = selpart[0]
2085 coords[rnum - 1, :] = \
2087 coord_dict[mol] = coords
2090 coords = np.concatenate(list(coord_dict.values()))
2091 dists = cdist(coords, coords)
2092 binary_dists = np.where((dists <= contact_threshold)
2093 & (dists >= 1.0), 1.0, 0.0)
2095 binary_dists_dict = {}
2097 len1 = max(mol1.get_residue_indexes())
2099 name1 = mol1.get_name()
2100 name2 = mol2.get_name()
2101 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2102 if (name1, name2)
not in binary_dists_dict:
2103 binary_dists_dict[(name1, name2)] = \
2104 np.zeros((len1, len1))
2105 binary_dists_dict[(name1, name2)] += \
2106 np.where((dists <= contact_threshold)
2107 & (dists >= 1.0), 1.0, 0.0)
2108 binary_dists = np.zeros((total_len_unique, total_len_unique))
2110 for name1, name2
in binary_dists_dict:
2111 r1 = index_dict[mol_names_unique[name1]]
2112 r2 = index_dict[mol_names_unique[name2]]
2113 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2114 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2120 contact_freqs = binary_dists
2122 dist_maps.append(dists)
2123 av_dist_map += dists
2124 contact_freqs += binary_dists
2127 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2129 contact_freqs = 1.0/len(cluster)*contact_freqs
2130 av_dist_map = 1.0/len(cluster)*contact_freqs
2132 fig = plt.figure(figsize=(100, 100))
2133 ax = fig.add_subplot(111)
2136 gap_between_components = 50
2141 sorted_tuple = sorted(
2143 mol).get_extended_name(), mol)
for mol
in mols)
2144 prot_list = list(zip(*sorted_tuple))[1]
2146 sorted_tuple = sorted(
2148 for mol
in unique_copies)
2149 prot_list = list(zip(*sorted_tuple))[1]
2151 prot_listx = prot_list
2152 nresx = gap_between_components + \
2153 sum([max(mol.get_residue_indexes())
2154 + gap_between_components
for mol
in prot_listx])
2157 prot_listy = prot_list
2158 nresy = gap_between_components + \
2159 sum([max(mol.get_residue_indexes())
2160 + gap_between_components
for mol
in prot_listy])
2165 res = gap_between_components
2166 for mol
in prot_listx:
2167 resoffsetx[mol] = res
2168 res += max(mol.get_residue_indexes())
2170 res += gap_between_components
2174 res = gap_between_components
2175 for mol
in prot_listy:
2176 resoffsety[mol] = res
2177 res += max(mol.get_residue_indexes())
2179 res += gap_between_components
2181 resoffsetdiagonal = {}
2182 res = gap_between_components
2183 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2184 resoffsetdiagonal[mol] = res
2185 res += max(mol.get_residue_indexes())
2186 res += gap_between_components
2191 for n, prot
in enumerate(prot_listx):
2192 res = resoffsetx[prot]
2194 for proty
in prot_listy:
2195 resy = resoffsety[proty]
2196 endy = resendy[proty]
2197 ax.plot([res, res], [resy, endy], linestyle=
'-',
2198 color=
'gray', lw=0.4)
2199 ax.plot([end, end], [resy, endy], linestyle=
'-',
2200 color=
'gray', lw=0.4)
2201 xticks.append((float(res) + float(end)) / 2)
2203 prot).get_extended_name())
2207 for n, prot
in enumerate(prot_listy):
2208 res = resoffsety[prot]
2210 for protx
in prot_listx:
2211 resx = resoffsetx[protx]
2212 endx = resendx[protx]
2213 ax.plot([resx, endx], [res, res], linestyle=
'-',
2214 color=
'gray', lw=0.4)
2215 ax.plot([resx, endx], [end, end], linestyle=
'-',
2216 color=
'gray', lw=0.4)
2217 yticks.append((float(res) + float(end)) / 2)
2219 prot).get_extended_name())
2223 tmp_array = np.zeros((nresx, nresy))
2225 for px
in prot_listx:
2226 for py
in prot_listy:
2227 resx = resoffsetx[px]
2228 lengx = resendx[px] - 1
2229 resy = resoffsety[py]
2230 lengy = resendy[py] - 1
2231 indexes_x = index_dict[px]
2232 minx = min(indexes_x)
2233 maxx = max(indexes_x)
2234 indexes_y = index_dict[py]
2235 miny = min(indexes_y)
2236 maxy = max(indexes_y)
2237 tmp_array[resx:lengx, resy:lengy] = \
2238 contact_freqs[minx:maxx, miny:maxy]
2239 ret[(px, py)] = np.argwhere(
2240 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2242 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2243 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2245 ax.set_xticks(xticks)
2246 ax.set_xticklabels(xlabels, rotation=90)
2247 ax.set_yticks(yticks)
2248 ax.set_yticklabels(ylabels)
2249 plt.setp(ax.get_xticklabels(), fontsize=6)
2250 plt.setp(ax.get_yticklabels(), fontsize=6)
2253 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2254 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2256 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2257 dpi=300, transparent=
"False")
2260 def plot_rmsd_matrix(self, filename):
2261 self.compute_all_pairwise_rmsd()
2262 distance_matrix = np.zeros(
2263 (len(self.stath0), len(self.stath1)))
2264 for (n0, n1)
in self.pairwise_rmsd:
2265 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2267 import matplotlib
as mpl
2269 import matplotlib.pylab
as pl
2270 from scipy.cluster
import hierarchy
as hrc
2272 fig = pl.figure(figsize=(10, 8))
2273 ax = fig.add_subplot(212)
2274 dendrogram = hrc.dendrogram(
2275 hrc.linkage(distance_matrix),
2278 leaves_order = dendrogram[
'leaves']
2279 ax.set_xlabel(
'Model')
2280 ax.set_ylabel(
'RMSD [Angstroms]')
2282 ax2 = fig.add_subplot(221)
2284 distance_matrix[leaves_order, :][:, leaves_order],
2285 interpolation=
'nearest')
2286 cb = fig.colorbar(cax)
2287 cb.set_label(
'RMSD [Angstroms]')
2288 ax2.set_xlabel(
'Model')
2289 ax2.set_ylabel(
'Model')
2291 pl.savefig(filename, dpi=300)
2300 Update the cluster id numbers
2302 for n, c
in enumerate(self.clusters):
2305 def get_molecule(self, hier, name, copy):
2313 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2314 self.sel0_rmsd.get_selected_particles())
2315 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2316 self.sel1_rmsd.get_selected_particles())
2317 for mol
in self.seldict0:
2318 for sel
in self.seldict0[mol]:
2319 self.issymmetricsel[sel] =
False
2320 for mol
in self.symmetric_molecules:
2321 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2322 for sel
in self.seldict0[mol]:
2323 self.issymmetricsel[sel] =
True
2327 self.sel1_alignment, self.sel0_alignment)
2329 for rb
in self.rbs1:
2332 for bead
in self.beads1:
2340 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2342 initial filling of the clusters.
2345 print(
"clustering model "+str(n0))
2346 d0 = self.stath0[n0]
2348 print(
"creating cluster index "+str(len(self.clusters)))
2349 self.clusters.append(c)
2350 c.add_member(n0, d0)
2351 clustered = set([n0])
2353 print(
"--- trying to add model " + str(n1) +
" to cluster "
2354 + str(len(self.clusters)))
2355 d1 = self.stath1[n1]
2358 rmsd, _ = self.
rmsd(metric=metric)
2359 if rmsd < rmsd_cutoff:
2360 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2361 c.add_member(n1, d1)
2364 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2369 merge the clusters that have close members
2370 @param rmsd_cutoff cutoff distance in Angstorms
2378 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2379 itertools.combinations(self.clusters, 2)):
2380 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2383 rmsd, _ = self.
rmsd()
2384 if (rmsd < 2*rmsd_cutoff
and
2386 to_merge.append((c0, c1))
2388 for c0, c
in reversed(to_merge):
2392 self.clusters = [c
for c
in
2393 filter(
lambda x: len(x.members) > 0, self.clusters)]
2397 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2399 print(
"check close members for clusters " + str(c0.cluster_id) +
2400 " and " + str(c1.cluster_id))
2401 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2404 rmsd, _ = self.
rmsd(metric=metric)
2405 if rmsd < rmsd_cutoff:
2420 a function that returns the permutation best_sel of sels0 that
2423 best_rmsd2 = float(
'inf')
2425 if self.issymmetricsel[sels0[0]]:
2428 for offset
in range(N):
2429 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2432 r = metric(sel0, sel1)
2434 if rmsd2 < best_rmsd2:
2438 for sels
in itertools.permutations(sels0):
2440 for sel0, sel1
in itertools.takewhile(
2441 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2442 r = metric(sel0, sel1)
2444 if rmsd2 < best_rmsd2:
2447 return best_sel, best_rmsd2
2449 def compute_all_pairwise_rmsd(self):
2450 for d0
in self.stath0:
2451 for d1
in self.stath1:
2452 rmsd, _ = self.
rmsd()
2454 def rmsd(self, metric=IMP.atom.get_rmsd):
2456 Computes the RMSD. Resolves ambiguous pairs assignments
2460 n0 = self.stath0.current_index
2461 n1 = self.stath1.current_index
2462 if ((n0, n1)
in self.pairwise_rmsd) \
2463 and ((n0, n1)
in self.pairwise_molecular_assignment):
2464 return (self.pairwise_rmsd[(n0, n1)],
2465 self.pairwise_molecular_assignment[(n0, n1)])
2475 molecular_assignment = {}
2476 for molname, sels0
in self.seldict0.items():
2477 sels_best_order, best_rmsd2 = \
2478 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2480 Ncoords = len(sels_best_order[0].get_selected_particles())
2481 Ncopies = len(self.seldict1[molname])
2482 total_rmsd += Ncoords*best_rmsd2
2483 total_N += Ncoords*Ncopies
2485 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2486 p0 = sel0.get_selected_particles()[0]
2487 p1 = sel1.get_selected_particles()[0]
2492 molecular_assignment[(molname, c0)] = (molname, c1)
2494 total_rmsd = math.sqrt(total_rmsd/total_N)
2496 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2497 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2498 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2499 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2500 return total_rmsd, molecular_assignment
2504 Fix the reference structure for structural alignment, rmsd and
2507 @param reference can be either "Absolute" (cluster center of the
2508 first cluster) or Relative (cluster center of the current
2511 if reference ==
"Absolute":
2513 elif reference ==
"Relative":
2514 if cluster.center_index:
2515 n0 = cluster.center_index
2517 n0 = cluster.members[0]
2522 compute the molecular assignemnts between multiple copies
2523 of the same sequence. It changes the Copy index of Molecules
2526 _, molecular_assignment = self.
rmsd()
2527 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2528 mol0 = self.molcopydict0[m0][c0]
2529 mol1 = self.molcopydict1[m1][c1]
2532 p1.set_value(cik0, c0)
2536 Undo the Copy index assignment
2539 _, molecular_assignment = self.
rmsd()
2540 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2541 mol0 = self.molcopydict0[m0][c0]
2542 mol1 = self.molcopydict1[m1][c1]
2545 p1.set_value(cik0, c1)
2552 s =
"AnalysisReplicaExchange\n"
2553 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2554 s +=
"---- number of models %s \n" % str(len(self.stath0))
2557 def __getitem__(self, int_slice_adaptor):
2558 if isinstance(int_slice_adaptor, int):
2559 return self.clusters[int_slice_adaptor]
2560 elif isinstance(int_slice_adaptor, slice):
2561 return self.__iter__(int_slice_adaptor)
2563 raise TypeError(
"Unknown Type")
2566 return len(self.clusters)
2568 def __iter__(self, slice_key=None):
2569 if slice_key
is None:
2570 for i
in range(len(self)):
2573 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.