1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from pathlib
import Path
20 from IMP._compat_pathlib
import Path
22 from operator
import itemgetter
23 from collections
import defaultdict
32 class _MockMPIValues(object):
33 """Replace samplers.MPI_values when in test mode"""
34 def get_percentile(self, name):
38 class _RMFRestraints(object):
39 """All restraints that are written out to the RMF file"""
40 def __init__(self, model, user_restraints):
42 self._user_restraints = user_restraints
if user_restraints
else []
45 return (len(self._user_restraints)
46 + self._rmf_rs.get_number_of_restraints())
50 __nonzero__ = __bool__
52 def __getitem__(self, i):
53 class FakePMIWrapper(object):
54 def __init__(self, r):
55 self.r = IMP.RestraintSet.get_from(r)
57 def get_restraint(self):
60 lenuser = len(self._user_restraints)
62 return self._user_restraints[i]
63 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
64 r = self._rmf_rs.get_restraint(i - lenuser)
65 return FakePMIWrapper(r)
67 raise IndexError(
"Out of range")
71 """A macro to help setup and run replica exchange.
72 Supports Monte Carlo and molecular dynamics.
73 Produces trajectory RMF files, best PDB structures,
74 and output stat files.
77 monte_carlo_sample_objects=
None,
78 molecular_dynamics_sample_objects=
None,
80 rmf_output_objects=
None,
81 monte_carlo_temperature=1.0,
82 simulated_annealing=
False,
83 simulated_annealing_minimum_temperature=1.0,
84 simulated_annealing_maximum_temperature=2.5,
85 simulated_annealing_minimum_temperature_nframes=100,
86 simulated_annealing_maximum_temperature_nframes=100,
87 replica_exchange_minimum_temperature=1.0,
88 replica_exchange_maximum_temperature=2.5,
89 replica_exchange_swap=
True,
91 number_of_best_scoring_models=500,
94 molecular_dynamics_steps=10,
95 molecular_dynamics_max_time_step=1.0,
96 number_of_frames=1000,
97 save_coordinates_mode=
"lowest_temperature",
98 nframes_write_coordinates=1,
99 write_initial_rmf=
True,
100 initial_rmf_name_suffix=
"initial",
101 stat_file_name_suffix=
"stat",
102 best_pdb_name_suffix=
"model",
105 do_create_directories=
True,
106 global_output_directory=
"./",
108 best_pdb_dir=
"pdbs/",
109 replica_stat_file_suffix=
"stat_replica",
110 em_object_for_rmf=
None,
112 replica_exchange_object=
None,
116 nestor_restraints=
None,
117 nestor_rmf_fname_prefix=
"nested",):
119 @param model The IMP model
120 @param root_hier Top-level (System)hierarchy
121 @param monte_carlo_sample_objects Objects for MC sampling, which
122 should generally be a simple list of Mover objects, e.g.
123 from DegreesOfFreedom.get_movers().
124 @param molecular_dynamics_sample_objects Objects for MD sampling,
125 which should generally be a simple list of particles.
126 @param output_objects A list of structural objects and restraints
127 that will be included in output (ie, statistics "stat"
128 files). Any object that provides a get_output() method
129 can be used here. If None is passed
130 the macro will not write stat files.
131 @param rmf_output_objects A list of structural objects and
132 restraints that will be included in rmf. Any object
133 that provides a get_output() method can be used here.
134 @param monte_carlo_temperature MC temp (may need to be optimized
135 based on post-sampling analysis)
136 @param simulated_annealing If True, perform simulated annealing
137 @param simulated_annealing_minimum_temperature Should generally be
138 the same as monte_carlo_temperature.
139 @param simulated_annealing_minimum_temperature_nframes Number of
140 frames to compute at minimum temperature.
141 @param simulated_annealing_maximum_temperature_nframes Number of
143 temps > simulated_annealing_maximum_temperature.
144 @param replica_exchange_minimum_temperature Low temp for REX; should
145 generally be the same as monte_carlo_temperature.
146 @param replica_exchange_maximum_temperature High temp for REX
147 @param replica_exchange_swap Boolean, enable disable temperature
149 @param num_sample_rounds Number of rounds of MC/MD per cycle
150 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
151 models to keep around for analysis.
152 @param mmcif If True, write best scoring models in mmCIF format;
153 if False (the default), write in legacy PDB format.
154 @param best_pdb_dir The directory under `global_output_directory`
155 where best-scoring PDB/mmCIF files are written.
156 @param best_pdb_name_suffix Part of the file name for best-scoring
158 @param monte_carlo_steps Number of MC steps per round
159 @param self_adaptive self adaptive scheme for Monte Carlo movers
160 @param molecular_dynamics_steps Number of MD steps per round
161 @param molecular_dynamics_max_time_step Max time step for MD
162 @param number_of_frames Number of REX frames to run
163 @param save_coordinates_mode string: how to save coordinates.
164 "lowest_temperature" (default) only the lowest temperatures
166 "25th_score" all replicas whose score is below the 25th
168 "50th_score" all replicas whose score is below the 50th
170 "75th_score" all replicas whose score is below the 75th
172 @param nframes_write_coordinates How often to write the coordinates
174 @param write_initial_rmf Write the initial configuration
175 @param global_output_directory Folder that will be created to house
177 @param test_mode Set to True to avoid writing any files, just test
179 @param score_moved If True, attempt to speed up Monte Carlo
180 sampling by caching scoring function terms on particles
182 @param use_nestor If True, follows the Nested Sampling workflow
183 of the NestOR module and skips writing stat files and
185 @param nestor_restraints A list of restraints for which
186 likelihoods are to be computed for use by NestOR module.
187 @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
188 files generated by NestOR .
194 if output_objects == []:
197 self.output_objects = []
199 self.output_objects = output_objects
200 self.rmf_output_objects = rmf_output_objects
202 and not root_hier.get_parent()):
203 if self.output_objects
is not None:
204 self.output_objects.append(
206 if self.rmf_output_objects
is not None:
207 self.rmf_output_objects.append(
209 self.root_hier = root_hier
210 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
211 self.vars[
"number_of_states"] = len(states)
213 self.root_hiers = states
214 self.is_multi_state =
True
216 self.root_hier = root_hier
217 self.is_multi_state =
False
219 raise TypeError(
"Must provide System hierarchy (root_hier)")
221 self._rmf_restraints = _RMFRestraints(model,
None)
222 self.em_object_for_rmf = em_object_for_rmf
223 self.monte_carlo_sample_objects = monte_carlo_sample_objects
224 self.vars[
"self_adaptive"] = self_adaptive
225 self.molecular_dynamics_sample_objects = \
226 molecular_dynamics_sample_objects
227 self.replica_exchange_object = replica_exchange_object
228 self.molecular_dynamics_max_time_step = \
229 molecular_dynamics_max_time_step
230 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
231 self.vars[
"replica_exchange_minimum_temperature"] = \
232 replica_exchange_minimum_temperature
233 self.vars[
"replica_exchange_maximum_temperature"] = \
234 replica_exchange_maximum_temperature
235 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
236 self.vars[
"simulated_annealing"] = simulated_annealing
237 self.vars[
"simulated_annealing_minimum_temperature"] = \
238 simulated_annealing_minimum_temperature
239 self.vars[
"simulated_annealing_maximum_temperature"] = \
240 simulated_annealing_maximum_temperature
241 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
242 simulated_annealing_minimum_temperature_nframes
243 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
244 simulated_annealing_maximum_temperature_nframes
246 self.vars[
"num_sample_rounds"] = num_sample_rounds
248 "number_of_best_scoring_models"] = number_of_best_scoring_models
249 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
250 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
251 self.vars[
"number_of_frames"] = number_of_frames
252 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
253 "50th_score",
"75th_score"):
254 raise Exception(
"save_coordinates_mode has unrecognized value")
256 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
257 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
258 self.vars[
"write_initial_rmf"] = write_initial_rmf
259 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
260 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
261 self.vars[
"mmcif"] = mmcif
262 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
263 self.vars[
"do_clean_first"] = do_clean_first
264 self.vars[
"do_create_directories"] = do_create_directories
265 self.vars[
"global_output_directory"] = global_output_directory
266 self.vars[
"rmf_dir"] = rmf_dir
267 self.vars[
"best_pdb_dir"] = best_pdb_dir
268 self.vars[
"atomistic"] = atomistic
269 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
270 self.vars[
"geometries"] =
None
271 self.test_mode = test_mode
272 self.score_moved = score_moved
273 self.nest = use_nestor
274 self.nestor_restraints = nestor_restraints
275 self.nestor_rmf_fname = nestor_rmf_fname_prefix
278 if self.vars[
"geometries"]
is None:
279 self.vars[
"geometries"] = list(geometries)
281 self.vars[
"geometries"].extend(geometries)
284 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
285 "rmfs/*.rmf3 for each replica ")
286 print(
"--- it stores the best scoring pdb models in pdbs/")
287 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
288 "lowest temperature")
289 print(
"--- variables:")
290 keys = list(self.vars.keys())
293 print(
"------", v.ljust(30), self.vars[v])
294 print(
"Use nestor: ", self.nest)
296 def get_replica_exchange_object(self):
297 return self.replica_exchange_object
299 def _add_provenance(self, sampler_md, sampler_mc):
300 """Record details about the sampling in the IMP Hierarchies"""
303 method =
"Molecular Dynamics"
304 iterations += self.vars[
"molecular_dynamics_steps"]
306 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
307 iterations += self.vars[
"monte_carlo_steps"]
309 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
311 iterations *= self.vars[
"num_sample_rounds"]
313 pi = self.model.add_particle(
"sampling")
315 self.model, pi, method, self.vars[
"number_of_frames"],
317 p.set_number_of_replicas(
318 self.replica_exchange_object.get_number_of_replicas())
319 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
322 def execute_macro(self):
323 temp_index_factor = 100000.0
327 if self.monte_carlo_sample_objects
is not None:
328 print(
"Setting up MonteCarlo")
330 self.model, self.monte_carlo_sample_objects,
331 self.vars[
"monte_carlo_temperature"],
332 score_moved=self.score_moved)
333 if self.vars[
"simulated_annealing"]:
334 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
335 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
337 "simulated_annealing_minimum_temperature_nframes"]
339 "simulated_annealing_maximum_temperature_nframes"]
340 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
341 if self.vars[
"self_adaptive"]:
342 sampler_mc.set_self_adaptive(
343 isselfadaptive=self.vars[
"self_adaptive"])
344 if self.output_objects
is not None:
345 self.output_objects.append(sampler_mc)
346 if self.rmf_output_objects
is not None:
347 self.rmf_output_objects.append(sampler_mc)
348 samplers.append(sampler_mc)
350 if self.molecular_dynamics_sample_objects
is not None:
351 print(
"Setting up MolecularDynamics")
353 self.model, self.molecular_dynamics_sample_objects,
354 self.vars[
"monte_carlo_temperature"],
355 maximum_time_step=self.molecular_dynamics_max_time_step)
356 if self.vars[
"simulated_annealing"]:
357 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
358 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
360 "simulated_annealing_minimum_temperature_nframes"]
362 "simulated_annealing_maximum_temperature_nframes"]
363 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
364 if self.output_objects
is not None:
365 self.output_objects.append(sampler_md)
366 if self.rmf_output_objects
is not None:
367 self.rmf_output_objects.append(sampler_md)
368 samplers.append(sampler_md)
371 print(
"Setting up ReplicaExchange")
373 self.model, self.vars[
"replica_exchange_minimum_temperature"],
374 self.vars[
"replica_exchange_maximum_temperature"], samplers,
375 replica_exchange_object=self.replica_exchange_object)
376 self.replica_exchange_object = rex.rem
378 myindex = rex.get_my_index()
379 if self.output_objects
is not None:
380 self.output_objects.append(rex)
381 if self.rmf_output_objects
is not None:
382 self.rmf_output_objects.append(rex)
386 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
390 globaldir = self.vars[
"global_output_directory"] +
"/"
391 rmf_dir = globaldir + self.vars[
"rmf_dir"]
392 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
394 if not self.test_mode
and not self.nest:
395 if self.vars[
"do_clean_first"]:
398 if self.vars[
"do_create_directories"]:
401 os.makedirs(globaldir)
409 if not self.is_multi_state:
415 for n
in range(self.vars[
"number_of_states"]):
417 os.makedirs(pdb_dir +
"/" + str(n))
424 if self.output_objects
is not None:
425 self.output_objects.append(sw)
426 if self.rmf_output_objects
is not None:
427 self.rmf_output_objects.append(sw)
432 print(
"Setting up stat file")
433 low_temp_stat_file = globaldir + \
434 self.vars[
"stat_file_name_suffix"] +
"." + \
435 str(myindex) +
".out"
438 if not self.test_mode:
441 if not self.test_mode
and not self.nest:
442 if self.output_objects
is not None:
443 output.init_stat2(low_temp_stat_file,
445 extralabels=[
"rmf_file",
"rmf_frame_index"])
447 print(
"Stat file writing is disabled")
449 if self.rmf_output_objects
is not None and not self.nest:
450 print(
"Stat info being written in the rmf file")
452 if not self.test_mode
and not self.nest:
453 print(
"Setting up replica stat file")
454 replica_stat_file = globaldir + \
455 self.vars[
"replica_stat_file_suffix"] +
"." + \
456 str(myindex) +
".out"
457 if not self.test_mode:
458 output.init_stat2(replica_stat_file, [rex],
459 extralabels=[
"score"])
461 print(
"Setting up best pdb files")
462 if not self.is_multi_state:
463 if self.vars[
"number_of_best_scoring_models"] > 0:
464 output.init_pdb_best_scoring(
465 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
467 self.vars[
"number_of_best_scoring_models"],
468 replica_exchange=
True,
469 mmcif=self.vars[
'mmcif'],
470 best_score_file=globaldir +
"best.scores.rex.py")
471 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
473 pdb_dir +
"/" +
"model.psf",
475 self.vars[
"best_pdb_name_suffix"] + pdbext)
477 if self.vars[
"number_of_best_scoring_models"] > 0:
478 for n
in range(self.vars[
"number_of_states"]):
479 output.init_pdb_best_scoring(
480 pdb_dir +
"/" + str(n) +
"/" +
481 self.vars[
"best_pdb_name_suffix"],
483 self.vars[
"number_of_best_scoring_models"],
484 replica_exchange=
True,
485 mmcif=self.vars[
'mmcif'],
486 best_score_file=globaldir +
"best.scores.rex.py")
487 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
489 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
490 pdb_dir +
"/" + str(n) +
"/" +
491 self.vars[
"best_pdb_name_suffix"] + pdbext)
494 if self.em_object_for_rmf
is not None:
495 output_hierarchies = [
497 self.em_object_for_rmf.get_density_as_hierarchy(
500 output_hierarchies = [self.root_hier]
502 if not self.test_mode
and not self.nest:
503 print(
"Setting up and writing initial rmf coordinate file")
504 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
505 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
507 listofobjects=self.rmf_output_objects)
508 if self._rmf_restraints:
509 output.add_restraints_to_rmf(
510 init_suffix +
"." + str(myindex) +
".rmf3",
511 self._rmf_restraints)
512 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
513 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
515 if not self.test_mode:
516 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
518 mpivs = _MockMPIValues()
520 self._add_provenance(sampler_md, sampler_mc)
522 if not self.test_mode
and not self.nest:
523 print(
"Setting up production rmf files")
524 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
525 output.init_rmf(rmfname, output_hierarchies,
526 geometries=self.vars[
"geometries"],
527 listofobjects=self.rmf_output_objects)
529 if self._rmf_restraints:
530 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
532 if not self.test_mode
and self.nest:
533 print(
"Setting up NestOR rmf files")
534 nestor_rmf_fname = str(self.nestor_rmf_fname) +
'_' + \
535 str(self.replica_exchange_object.get_my_index()) +
'.rmf3'
537 output.init_rmf(nestor_rmf_fname, output_hierarchies,
538 geometries=self.vars[
"geometries"],
539 listofobjects=self.rmf_output_objects)
541 ntimes_at_low_temp = 0
543 if myindex == 0
and not self.nest:
545 self.replica_exchange_object.set_was_used(
True)
546 nframes = self.vars[
"number_of_frames"]
550 sampled_likelihoods = []
551 for i
in range(nframes):
555 for nr
in range(self.vars[
"num_sample_rounds"]):
556 if sampler_md
is not None:
558 self.vars[
"molecular_dynamics_steps"])
559 if sampler_mc
is not None:
560 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
562 self.model).evaluate(
False)
563 mpivs.set_value(
"score", score)
565 output.set_output_entry(
"score", score)
567 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
569 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
570 save_frame = (min_temp_index == my_temp_index)
571 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
572 score_perc = mpivs.get_percentile(
"score")
573 save_frame = (score_perc*100.0 <= 25.0)
574 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
575 score_perc = mpivs.get_percentile(
"score")
576 save_frame = (score_perc*100.0 <= 50.0)
577 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
578 score_perc = mpivs.get_percentile(
"score")
579 save_frame = (score_perc*100.0 <= 75.0)
582 if save_frame
and not self.test_mode:
586 print(
"--- frame %s score %s " % (str(i), str(score)))
589 if math.isnan(score):
590 sampled_likelihoods.append(math.nan)
592 likelihood_for_sample = 1
593 for rstrnt
in self.nestor_restraints:
594 likelihood_for_sample *= rstrnt.get_likelihood()
595 sampled_likelihoods.append(likelihood_for_sample)
596 output.write_rmf(nestor_rmf_fname)
598 if not self.test_mode
and not self.nest:
599 if i % self.vars[
"nframes_write_coordinates"] == 0:
600 print(
'--- writing coordinates')
601 if self.vars[
"number_of_best_scoring_models"] > 0:
602 output.write_pdb_best_scoring(score)
603 output.write_rmf(rmfname)
604 output.set_output_entry(
"rmf_file", rmfname)
605 output.set_output_entry(
"rmf_frame_index",
608 output.set_output_entry(
"rmf_file", rmfname)
609 output.set_output_entry(
"rmf_frame_index",
'-1')
610 if self.output_objects
is not None:
611 output.write_stat2(low_temp_stat_file)
612 ntimes_at_low_temp += 1
614 if not self.test_mode
and not self.nest:
615 output.write_stat2(replica_stat_file)
616 if self.vars[
"replica_exchange_swap"]:
617 rex.swap_temp(i, score)
619 if self.nest
and len(sampled_likelihoods) > 0:
620 with open(
"likelihoods_"
621 + str(self.replica_exchange_object.get_my_index()),
623 pickle.dump(sampled_likelihoods, lif)
625 output.close_rmf(nestor_rmf_fname)
627 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
628 p.add_replica_exchange(state, self)
630 if not self.test_mode
and not self.nest:
631 print(
"closing production rmf files")
632 output.close_rmf(rmfname)
636 """A macro to build a IMP::pmi::topology::System based on a
637 TopologyReader object.
639 Easily create multi-state systems by calling this macro
640 repeatedly with different TopologyReader objects!
641 A useful function is get_molecules() which returns the PMI Molecules
642 grouped by state as a dictionary with key = (molecule name),
643 value = IMP.pmi.topology.Molecule
644 Quick multi-state system:
647 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
648 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
649 bs = IMP.pmi.macros.BuildSystem(model)
650 bs.add_state(reader1)
651 bs.add_state(reader2)
652 bs.execute_macro() # build everything including degrees of freedom
653 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
654 ### now you have a two state system, you add restraints etc
656 @note The "domain name" entry of the topology reader is not used.
657 All molecules are set up by the component name, but split into rigid bodies
661 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
662 'RNA': IMP.pmi.alphabets.rna}
664 def __init__(self, model, sequence_connectivity_scale=4.0,
665 force_create_gmm_files=
False, resolutions=[1, 10],
668 @param model An IMP Model
669 @param sequence_connectivity_scale For scaling the connectivity
671 @param force_create_gmm_files If True, will sample and create GMMs
672 no matter what. If False, will only sample if the
673 files don't exist. If number of Gaussians is zero, won't
675 @param resolutions The resolutions to build for structured regions
676 @param name The name of the top-level hierarchy node.
683 self._domain_res = []
685 self.force_create_gmm_files = force_create_gmm_files
686 self.resolutions = resolutions
688 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
690 """Add a state using the topology info in a
691 IMP::pmi::topology::TopologyReader object.
692 When you are done adding states, call execute_macro()
693 @param reader The TopologyReader object
694 @param fasta_name_map dictionary for converting protein names
695 found in the fasta file
696 @param chain_ids A list or string of chain IDs for assigning to
697 newly-created molecules, e.g.
698 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
699 If not specified, chain IDs A through Z are assigned, then
700 AA through AZ, then BA through BZ, and so on, in the same
703 state = self.system.create_state()
704 self._readers.append(reader)
706 these_domain_res = {}
708 if chain_ids
is None:
709 chain_ids = IMP.pmi.output._ChainIDs()
714 for molname
in reader.get_molecules():
715 copies = reader.get_molecules()[molname].domains
716 for nc, copyname
in enumerate(copies):
717 print(
"BuildSystem.add_state: setting up molecule %s copy "
718 "number %s" % (molname, str(nc)))
719 copy = copies[copyname]
722 all_chains = [c
for c
in copy
if c.chain
is not None]
724 chain_id = all_chains[0].chain
726 chain_id = chain_ids[numchain]
728 "No PDBs specified for %s, so keep_chain_id has "
729 "no effect; using default chain ID '%s'"
732 chain_id = chain_ids[numchain]
734 alphabet = IMP.pmi.alphabets.amino_acid
735 fasta_flag = copy[0].fasta_flag
736 if fasta_flag
in self._alphabets:
737 alphabet = self._alphabets[fasta_flag]
739 copy[0].fasta_file, fasta_name_map)
740 seq = seqs[copy[0].fasta_id]
741 print(
"BuildSystem.add_state: molecule %s sequence has "
742 "%s residues" % (molname, len(seq)))
743 orig_mol = state.create_molecule(
744 molname, seq, chain_id, alphabet=alphabet,
745 uniprot=seqs.uniprot.get(copy[0].fasta_id))
749 print(
"BuildSystem.add_state: creating a copy for "
750 "molecule %s" % molname)
751 mol = orig_mol.create_copy(chain_id)
754 for domainnumber, domain
in enumerate(copy):
755 print(
"BuildSystem.add_state: ---- setting up domain %s "
756 "of molecule %s" % (domainnumber, molname))
759 these_domains[domain.get_unique_name()] = domain
760 if domain.residue_range == []
or \
761 domain.residue_range
is None:
762 domain_res = mol.get_residues()
764 start = domain.residue_range[0]+domain.pdb_offset
765 if domain.residue_range[1] ==
'END':
766 end = len(mol.sequence)
768 end = domain.residue_range[1]+domain.pdb_offset
769 domain_res = mol.residue_range(start-1, end-1)
770 print(
"BuildSystem.add_state: -------- domain %s of "
771 "molecule %s extends from residue %s to "
773 % (domainnumber, molname, start, end))
774 if domain.pdb_file ==
"BEADS":
775 print(
"BuildSystem.add_state: -------- domain %s of "
776 "molecule %s represented by BEADS "
777 % (domainnumber, molname))
778 mol.add_representation(
780 resolutions=[domain.bead_size],
781 setup_particles_as_densities=(
782 domain.em_residues_per_gaussian != 0),
784 these_domain_res[domain.get_unique_name()] = \
786 elif domain.pdb_file ==
"IDEAL_HELIX":
787 print(
"BuildSystem.add_state: -------- domain %s of "
788 "molecule %s represented by IDEAL_HELIX "
789 % (domainnumber, molname))
790 emper = domain.em_residues_per_gaussian
791 mol.add_representation(
793 resolutions=self.resolutions,
795 density_residues_per_component=emper,
796 density_prefix=domain.density_prefix,
797 density_force_compute=self.force_create_gmm_files,
799 these_domain_res[domain.get_unique_name()] = \
802 print(
"BuildSystem.add_state: -------- domain %s of "
803 "molecule %s represented by pdb file %s "
804 % (domainnumber, molname, domain.pdb_file))
805 domain_atomic = mol.add_structure(domain.pdb_file,
807 domain.residue_range,
810 domain_non_atomic = domain_res - domain_atomic
811 if not domain.em_residues_per_gaussian:
812 mol.add_representation(
813 domain_atomic, resolutions=self.resolutions,
815 if len(domain_non_atomic) > 0:
816 mol.add_representation(
818 resolutions=[domain.bead_size],
821 print(
"BuildSystem.add_state: -------- domain %s "
822 "of molecule %s represented by gaussians "
823 % (domainnumber, molname))
824 emper = domain.em_residues_per_gaussian
825 creategmm = self.force_create_gmm_files
826 mol.add_representation(
828 resolutions=self.resolutions,
829 density_residues_per_component=emper,
830 density_prefix=domain.density_prefix,
831 density_force_compute=creategmm,
833 if len(domain_non_atomic) > 0:
834 mol.add_representation(
836 resolutions=[domain.bead_size],
837 setup_particles_as_densities=
True,
839 these_domain_res[domain.get_unique_name()] = (
840 domain_atomic, domain_non_atomic)
841 self._domain_res.append(these_domain_res)
842 self._domains.append(these_domains)
843 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
847 """Return list of all molecules grouped by state.
848 For each state, it's a dictionary of Molecules where key is the
851 return [s.get_molecules()
for s
in self.system.get_states()]
853 def get_molecule(self, molname, copy_index=0, state_index=0):
854 return self.system.get_states()[state_index].
get_molecules()[
858 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
859 """Builds representations and sets up degrees of freedom"""
860 print(
"BuildSystem.execute_macro: building representations")
861 self.root_hier = self.system.build()
863 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
865 for nstate, reader
in enumerate(self._readers):
866 rbs = reader.get_rigid_bodies()
867 srbs = reader.get_super_rigid_bodies()
868 csrbs = reader.get_chains_of_super_rigid_bodies()
871 domains_in_rbs = set()
873 print(
"BuildSystem.execute_macro: -------- building rigid "
874 "body %s" % (str(rblist)))
875 all_res = IMP.pmi.tools.OrderedSet()
876 bead_res = IMP.pmi.tools.OrderedSet()
878 domain = self._domains[nstate][dname]
879 print(
"BuildSystem.execute_macro: -------- adding %s"
881 all_res |= self._domain_res[nstate][dname][0]
882 bead_res |= self._domain_res[nstate][dname][1]
883 domains_in_rbs.add(dname)
885 print(
"BuildSystem.execute_macro: -------- creating rigid "
886 "body with max_trans %s max_rot %s "
887 "non_rigid_max_trans %s"
888 % (str(max_rb_trans), str(max_rb_rot),
889 str(max_bead_trans)))
890 self.dof.create_rigid_body(all_res,
891 nonrigid_parts=bead_res,
892 max_trans=max_rb_trans,
894 nonrigid_max_trans=max_bead_trans,
895 name=
"RigidBody %s" % dname)
898 for dname, domain
in self._domains[nstate].items():
899 if dname
not in domains_in_rbs:
900 if domain.pdb_file !=
"BEADS":
902 "No rigid bodies set for %s. Residues read from "
903 "the PDB file will not be sampled - only regions "
904 "missing from the PDB will be treated flexibly. "
905 "To sample the entire sequence, use BEADS instead "
906 "of a PDB file name" % dname,
908 self.dof.create_flexible_beads(
909 self._domain_res[nstate][dname][1],
910 max_trans=max_bead_trans)
914 print(
"BuildSystem.execute_macro: -------- building "
915 "super rigid body %s" % (str(srblist)))
916 all_res = IMP.pmi.tools.OrderedSet()
917 for dname
in srblist:
918 print(
"BuildSystem.execute_macro: -------- adding %s"
920 all_res |= self._domain_res[nstate][dname][0]
921 all_res |= self._domain_res[nstate][dname][1]
923 print(
"BuildSystem.execute_macro: -------- creating super "
924 "rigid body with max_trans %s max_rot %s "
925 % (str(max_srb_trans), str(max_srb_rot)))
926 self.dof.create_super_rigid_body(
927 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
930 for csrblist
in csrbs:
931 all_res = IMP.pmi.tools.OrderedSet()
932 for dname
in csrblist:
933 all_res |= self._domain_res[nstate][dname][0]
934 all_res |= self._domain_res[nstate][dname][1]
935 all_res = list(all_res)
936 all_res.sort(key=
lambda r: r.get_index())
937 self.dof.create_main_chain_mover(all_res)
938 return self.root_hier, self.dof
943 """A macro for running all the basic operations of analysis.
944 Includes clustering, precision analysis, and making ensemble density maps.
945 A number of plots are also supported.
948 merge_directories=[
"./"],
949 stat_file_name_suffix=
"stat",
950 best_pdb_name_suffix=
"model",
952 do_create_directories=
True,
953 global_output_directory=
"output/",
954 replica_stat_file_suffix=
"stat_replica",
955 global_analysis_result_directory=
"./analysis/",
958 @param model The IMP model
959 @param stat_file_name_suffix
960 @param merge_directories The directories containing output files
961 @param best_pdb_name_suffix
962 @param do_clean_first
963 @param do_create_directories
964 @param global_output_directory Where everything is
965 @param replica_stat_file_suffix
966 @param global_analysis_result_directory
967 @param test_mode If True, nothing is changed on disk
971 from mpi4py
import MPI
972 self.comm = MPI.COMM_WORLD
973 self.rank = self.comm.Get_rank()
974 self.number_of_processes = self.comm.size
977 self.number_of_processes = 1
979 self.test_mode = test_mode
980 self._protocol_output = []
981 self.cluster_obj =
None
983 stat_dir = global_output_directory
986 for rd
in merge_directories:
987 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
988 if len(stat_files) == 0:
989 warnings.warn(
"no stat files found in %s"
990 % os.path.join(rd, stat_dir),
992 self.stat_files += stat_files
995 """Capture details of the modeling protocol.
996 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
999 self._protocol_output.append((p, p._last_state))
1002 score_key=
"Total_Score",
1003 rmf_file_key=
"rmf_file",
1004 rmf_file_frame_key=
"rmf_frame_index",
1007 nframes_trajectory=10000):
1008 """ Get a trajectory of the modeling run, for generating
1009 demonstrative movies
1011 @param score_key The score for ranking models
1012 @param rmf_file_key Key pointing to RMF filename
1013 @param rmf_file_frame_key Key pointing to RMF frame number
1014 @param outputdir The local output directory used in the run
1015 @param get_every Extract every nth frame
1016 @param nframes_trajectory Total number of frames of the trajectory
1021 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1023 score_list = list(map(float, trajectory_models[2]))
1025 max_score = max(score_list)
1026 min_score = min(score_list)
1028 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1029 for i
in range(nframes_trajectory)]
1030 binned_scores = [
None]*nframes_trajectory
1031 binned_model_indexes = [-1]*nframes_trajectory
1033 for model_index, s
in enumerate(score_list):
1034 bins_score_diffs = [abs(s-b)
for b
in bins]
1035 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1036 if binned_scores[bin_index]
is None:
1037 binned_scores[bin_index] = s
1038 binned_model_indexes[bin_index] = model_index
1040 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1041 new_diff = abs(s-bins[bin_index])
1042 if new_diff < old_diff:
1043 binned_scores[bin_index] = s
1044 binned_model_indexes[bin_index] = model_index
1046 print(binned_scores)
1047 print(binned_model_indexes)
1049 def _expand_ambiguity(self, prot, d):
1050 """If using PMI2, expand the dictionary to include copies as
1053 This also keeps the states separate.
1058 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1061 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1062 if isinstance(val, tuple):
1070 for nst
in range(len(states)):
1072 copies = sel.get_selected_particles(with_representation=
False)
1074 for nc
in range(len(copies)):
1076 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1077 (start, stop, name, nc, nst)
1079 newdict[
'%s..%i' % (name, nc)] = \
1080 (start, stop, name, nc, nst)
1086 score_key=
"Total_Score",
1087 rmf_file_key=
"rmf_file",
1088 rmf_file_frame_key=
"rmf_frame_index",
1090 prefiltervalue=
None,
1093 alignment_components=
None,
1094 number_of_best_scoring_models=10,
1095 rmsd_calculation_components=
None,
1096 distance_matrix_file=
'distances.mat',
1097 load_distance_matrix_file=
False,
1098 skip_clustering=
False,
1099 number_of_clusters=1,
1101 exit_after_display=
True,
1103 first_and_last_frames=
None,
1104 density_custom_ranges=
None,
1105 write_pdb_with_centered_coordinates=
False,
1107 """Get the best scoring models, compute a distance matrix,
1108 cluster them, and create density maps.
1110 Tuple format: "molname" just the molecule,
1111 or (start,stop,molname,copy_num(optional),state_num(optional)
1112 Can pass None for copy or state to ignore that field.
1113 If you don't pass a specific copy number
1115 @param score_key The score for ranking models.
1116 @param rmf_file_key Key pointing to RMF filename
1117 @param rmf_file_frame_key Key pointing to RMF frame number
1118 @param state_number State number to analyze
1119 @param prefiltervalue Only include frames where the
1120 score key is below this value
1121 @param feature_keys Keywords for which you want to
1122 calculate average, medians, etc.
1123 If you pass "Keyname" it'll include everything that matches
1125 @param outputdir The local output directory used in
1127 @param alignment_components Dictionary with keys=groupname,
1128 values are tuples for aligning the structures
1129 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1130 @param number_of_best_scoring_models Num models to keep per run
1131 @param rmsd_calculation_components For calculating RMSD
1132 (same format as alignment_components)
1133 @param distance_matrix_file Where to store/read the
1135 @param load_distance_matrix_file Try to load the distance
1137 @param skip_clustering Just extract the best scoring
1138 models and save the pdbs
1139 @param number_of_clusters Number of k-means clusters
1140 @param display_plot Display the distance matrix
1141 @param exit_after_display Exit after displaying distance
1143 @param get_every Extract every nth frame
1144 @param first_and_last_frames A tuple with the first and last
1145 frames to be analyzed. Values are percentages!
1146 Default: get all frames
1147 @param density_custom_ranges For density calculation
1148 (same format as alignment_components)
1149 @param write_pdb_with_centered_coordinates
1150 @param voxel_size Used for the density output
1154 self._outputdir = Path(outputdir).absolute()
1155 self._number_of_clusters = number_of_clusters
1156 for p, state
in self._protocol_output:
1157 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1168 if not load_distance_matrix_file:
1169 if len(self.stat_files) == 0:
1170 print(
"ERROR: no stat file found in the given path")
1172 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1173 self.stat_files, self.number_of_processes)[self.rank]
1176 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1177 if k
in feature_keys:
1179 "no need to pass " + k +
" to feature_keys.",
1181 feature_keys.remove(k)
1184 my_stat_files, score_key, feature_keys, rmf_file_key,
1185 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1186 rmf_file_list = best_models[0]
1187 rmf_file_frame_list = best_models[1]
1188 score_list = best_models[2]
1189 feature_keyword_list_dict = best_models[3]
1195 if self.number_of_processes > 1:
1199 rmf_file_frame_list)
1200 for k
in feature_keyword_list_dict:
1201 feature_keyword_list_dict[k] = \
1203 feature_keyword_list_dict[k])
1206 score_rmf_tuples = list(zip(score_list,
1208 rmf_file_frame_list,
1209 list(range(len(score_list)))))
1211 if density_custom_ranges:
1212 for k
in density_custom_ranges:
1213 if not isinstance(density_custom_ranges[k], list):
1214 raise Exception(
"Density custom ranges: values must "
1215 "be lists of tuples")
1218 if first_and_last_frames
is not None:
1219 nframes = len(score_rmf_tuples)
1220 first_frame = int(first_and_last_frames[0] * nframes)
1221 last_frame = int(first_and_last_frames[1] * nframes)
1222 if last_frame > len(score_rmf_tuples):
1224 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1227 best_score_rmf_tuples = sorted(
1229 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1230 best_score_rmf_tuples = [t+(n,)
for n, t
in
1231 enumerate(best_score_rmf_tuples)]
1233 prov.append(IMP.pmi.io.FilterProvenance(
1234 "Best scoring", 0, number_of_best_scoring_models))
1236 best_score_feature_keyword_list_dict = defaultdict(list)
1237 for tpl
in best_score_rmf_tuples:
1239 for f
in feature_keyword_list_dict:
1240 best_score_feature_keyword_list_dict[f].append(
1241 feature_keyword_list_dict[f][index])
1242 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1243 best_score_rmf_tuples,
1244 self.number_of_processes)[self.rank]
1247 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1248 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1249 if rmsd_calculation_components
is not None:
1250 tmp = self._expand_ambiguity(
1251 prot_ahead, rmsd_calculation_components)
1252 if tmp != rmsd_calculation_components:
1253 print(
'Detected ambiguity, expand rmsd components to',
1255 rmsd_calculation_components = tmp
1256 if alignment_components
is not None:
1257 tmp = self._expand_ambiguity(prot_ahead,
1258 alignment_components)
1259 if tmp != alignment_components:
1260 print(
'Detected ambiguity, expand alignment '
1261 'components to', tmp)
1262 alignment_components = tmp
1268 self.model, my_best_score_rmf_tuples[0],
1269 rmsd_calculation_components, state_number=state_number)
1271 self.model, my_best_score_rmf_tuples, alignment_components,
1272 rmsd_calculation_components, state_number=state_number)
1280 all_coordinates = got_coords[0]
1283 alignment_coordinates = got_coords[1]
1286 rmsd_coordinates = got_coords[2]
1289 rmf_file_name_index_dict = got_coords[3]
1292 all_rmf_file_names = got_coords[4]
1298 if density_custom_ranges:
1300 density_custom_ranges, voxel=voxel_size)
1302 dircluster = os.path.join(outputdir,
1303 "all_models."+str(self.rank))
1309 os.mkdir(dircluster)
1312 clusstat = open(os.path.join(
1313 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1314 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1316 rmf_frame_number = tpl[2]
1319 for key
in best_score_feature_keyword_list_dict:
1321 best_score_feature_keyword_list_dict[key][index]
1325 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1326 self.model, rmf_frame_number, rmf_name)
1328 linking_successful = \
1329 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1330 self.model, prots, rs, rmf_frame_number,
1332 if not linking_successful:
1338 states = IMP.atom.get_by_type(
1339 prots[0], IMP.atom.STATE_TYPE)
1340 prot = states[state_number]
1345 coords_f1 = alignment_coordinates[cnt]
1347 coords_f2 = alignment_coordinates[cnt]
1350 coords_f1, coords_f2)
1351 transformation = Ali.align()[1]
1365 rb = rbm.get_rigid_body()
1375 out_pdb_fn = os.path.join(
1376 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1377 out_rmf_fn = os.path.join(
1378 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1379 o.init_pdb(out_pdb_fn, prot)
1380 tc = write_pdb_with_centered_coordinates
1381 o.write_pdb(out_pdb_fn,
1382 translate_to_geometric_center=tc)
1384 tmp_dict[
"local_pdb_file_name"] = \
1385 os.path.basename(out_pdb_fn)
1386 tmp_dict[
"rmf_file_full_path"] = rmf_name
1387 tmp_dict[
"local_rmf_file_name"] = \
1388 os.path.basename(out_rmf_fn)
1389 tmp_dict[
"local_rmf_frame_number"] = 0
1391 clusstat.write(str(tmp_dict)+
"\n")
1396 h.set_name(
"System")
1398 o.init_rmf(out_rmf_fn, [h], rs)
1400 o.write_rmf(out_rmf_fn)
1401 o.close_rmf(out_rmf_fn)
1403 if density_custom_ranges:
1404 DensModule.add_subunits_density(prot)
1406 if density_custom_ranges:
1407 DensModule.write_mrc(path=dircluster)
1412 if self.number_of_processes > 1:
1418 rmf_file_name_index_dict)
1420 alignment_coordinates)
1427 [best_score_feature_keyword_list_dict,
1428 rmf_file_name_index_dict],
1434 print(
"setup clustering class")
1437 for n, model_coordinate_dict
in enumerate(all_coordinates):
1439 if (alignment_components
is not None
1440 and len(self.cluster_obj.all_coords) == 0):
1442 self.cluster_obj.set_template(alignment_coordinates[n])
1443 self.cluster_obj.fill(all_rmf_file_names[n],
1444 rmsd_coordinates[n])
1445 print(
"Global calculating the distance matrix")
1448 self.cluster_obj.dist_matrix()
1452 self.cluster_obj.do_cluster(number_of_clusters)
1455 self.cluster_obj.plot_matrix(
1456 figurename=os.path.join(outputdir,
1458 if exit_after_display:
1460 self.cluster_obj.save_distance_matrix_file(
1461 file_name=distance_matrix_file)
1468 print(
"setup clustering class")
1470 self.cluster_obj.load_distance_matrix_file(
1471 file_name=distance_matrix_file)
1472 print(
"clustering with %s clusters" % str(number_of_clusters))
1473 self.cluster_obj.do_cluster(number_of_clusters)
1474 [best_score_feature_keyword_list_dict,
1475 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1478 self.cluster_obj.plot_matrix(figurename=os.path.join(
1479 outputdir,
'dist_matrix.pdf'))
1480 if exit_after_display:
1482 if self.number_of_processes > 1:
1490 print(self.cluster_obj.get_cluster_labels())
1491 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1492 print(
"rank %s " % str(self.rank))
1493 print(
"cluster %s " % str(n))
1494 print(
"cluster label %s " % str(cl))
1495 print(self.cluster_obj.get_cluster_label_names(cl))
1497 len(self.cluster_obj.get_cluster_label_names(cl))
1499 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1502 if density_custom_ranges:
1504 density_custom_ranges,
1507 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1509 os.mkdir(dircluster)
1515 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1516 clusstat = open(dircluster +
"stat.out",
"w")
1517 for k, structure_name
in enumerate(
1518 self.cluster_obj.get_cluster_label_names(cl)):
1521 tmp_dict.update(rmsd_dict)
1522 index = rmf_file_name_index_dict[structure_name]
1523 for key
in best_score_feature_keyword_list_dict:
1525 key] = best_score_feature_keyword_list_dict[
1531 rmf_name = structure_name.split(
"|")[0]
1532 rmf_frame_number = int(structure_name.split(
"|")[1])
1533 clusstat.write(str(tmp_dict) +
"\n")
1538 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1539 self.model, rmf_frame_number, rmf_name)
1541 linking_successful = \
1542 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1543 self.model, prots, rs, rmf_frame_number,
1545 if not linking_successful:
1550 states = IMP.atom.get_by_type(
1551 prots[0], IMP.atom.STATE_TYPE)
1552 prot = states[state_number]
1558 co = self.cluster_obj
1559 model_index = co.get_model_index_from_name(
1561 transformation = co.get_transformation_to_first_member(
1572 rb = rbm.get_rigid_body()
1581 if density_custom_ranges:
1582 DensModule.add_subunits_density(prot)
1587 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1588 o.write_pdb(dircluster + str(k) +
".pdb")
1593 h.set_name(
"System")
1595 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1596 o.write_rmf(dircluster + str(k) +
".rmf3")
1597 o.close_rmf(dircluster + str(k) +
".rmf3")
1602 if density_custom_ranges:
1603 DensModule.write_mrc(path=dircluster)
1606 if self.number_of_processes > 1:
1609 def get_cluster_rmsd(self, cluster_num):
1610 if self.cluster_obj
is None:
1612 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1614 def save_objects(self, objects, file_name):
1616 outf = open(file_name,
'wb')
1617 pickle.dump(objects, outf)
1620 def load_objects(self, file_name):
1622 inputf = open(file_name,
'rb')
1623 objects = pickle.load(inputf)
1631 This class contains analysis utilities to investigate ReplicaExchange
1639 def __init__(self, model, stat_files, best_models=None, score_key=None,
1642 Construction of the Class.
1643 @param model IMP.Model()
1644 @param stat_files list of string. Can be ascii stat files,
1646 @param best_models Integer. Number of best scoring models,
1647 if None: all models will be read
1648 @param alignment boolean (Default=True). Align before computing
1653 self.best_models = best_models
1655 model, stat_files, self.best_models, score_key, cache=
True)
1657 StatHierarchyHandler=self.stath0)
1670 self.clusters.append(c)
1671 for n0
in range(len(self.stath0)):
1673 self.pairwise_rmsd = {}
1674 self.pairwise_molecular_assignment = {}
1675 self.alignment = alignment
1676 self.symmetric_molecules = {}
1677 self.issymmetricsel = {}
1679 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1681 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1686 Setup the selection onto which the rmsd is computed
1687 @param kwargs use IMP.atom.Selection keywords
1695 Store names of symmetric molecules
1697 self.symmetric_molecules[molecule_name] = 0
1702 Setup the selection onto which the alignment is computed
1703 @param kwargs use IMP.atom.Selection keywords
1711 def clean_clusters(self):
1712 for c
in self.clusters:
1716 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1718 Cluster the models based on RMSD.
1719 @param rmsd_cutoff Float the distance cutoff in Angstrom
1720 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1721 be used to compute rmsds
1723 self.clean_clusters()
1724 not_clustered = set(range(len(self.stath1)))
1725 while len(not_clustered) > 0:
1726 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1731 Refine the clusters by merging the ones whose centers are close
1732 @param rmsd_cutoff cutoff distance in Angstorms
1734 clusters_copy = self.clusters
1735 for c0, c1
in itertools.combinations(self.clusters, 2):
1736 if c0.center_index
is None:
1738 if c1.center_index
is None:
1740 _ = self.stath0[c0.center_index]
1741 _ = self.stath1[c1.center_index]
1742 rmsd, molecular_assignment = self.
rmsd()
1743 if rmsd <= rmsd_cutoff:
1744 if c1
in self.clusters:
1745 clusters_copy.remove(c1)
1747 self.clusters = clusters_copy
1754 def set_cluster_assignments(self, cluster_ids):
1755 if len(cluster_ids) != len(self.stath0):
1756 raise ValueError(
'cluster ids has to be same length as '
1760 for i
in sorted(list(set(cluster_ids))):
1762 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1763 self.clusters[idx].add_member(i, d)
1767 Return the model data from a cluster
1768 @param cluster IMP.pmi.output.Cluster object
1777 Save the data for the whole models into a pickle file
1778 @param filename string
1780 self.stath0.save_data(filename)
1784 Set the data from an external IMP.pmi.output.Data
1785 @param data IMP.pmi.output.Data
1787 self.stath0.data = data
1788 self.stath1.data = data
1792 Load the data from an external pickled file
1793 @param filename string
1795 self.stath0.load_data(filename)
1796 self.stath1.load_data(filename)
1797 self.best_models = len(self.stath0)
1799 def add_cluster(self, rmf_name_list):
1801 print(
"creating cluster index "+str(len(self.clusters)))
1802 self.clusters.append(c)
1803 current_len = len(self.stath0)
1805 for rmf
in rmf_name_list:
1806 print(
"adding rmf "+rmf)
1807 self.stath0.add_stat_file(rmf)
1808 self.stath1.add_stat_file(rmf)
1810 for n0
in range(current_len, len(self.stath0)):
1811 d0 = self.stath0[n0]
1812 c.add_member(n0, d0)
1817 Save the clusters into a pickle file
1818 @param filename string
1821 import cPickle
as pickle
1824 fl = open(filename,
'wb')
1825 pickle.dump(self.clusters, fl)
1829 Load the clusters from a pickle file
1830 @param filename string
1831 @param append bool (Default=False), if True. append the clusters
1832 to the ones currently present
1835 import cPickle
as pickle
1838 fl = open(filename,
'rb')
1839 self.clean_clusters()
1841 self.clusters += pickle.load(fl)
1843 self.clusters = pickle.load(fl)
1852 Compute the cluster center for a given cluster
1854 member_distance = defaultdict(float)
1856 for n0, n1
in itertools.combinations(cluster.members, 2):
1859 rmsd, _ = self.
rmsd()
1860 member_distance[n0] += rmsd
1862 if len(member_distance) > 0:
1863 cluster.center_index = min(member_distance,
1864 key=member_distance.get)
1866 cluster.center_index = cluster.members[0]
1871 Save the coordinates of the current cluster a single rmf file
1873 print(
"saving coordinates", cluster)
1877 if rmf_name
is None:
1878 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1880 _ = self.stath1[cluster.members[0]]
1882 o.init_rmf(rmf_name, [self.stath1])
1883 for n1
in cluster.members:
1889 o.write_rmf(rmf_name)
1891 o.close_rmf(rmf_name)
1895 remove structures that are similar
1896 append it to a new cluster
1898 print(
"pruning models")
1900 filtered = [selected]
1901 remaining = range(1, len(self.stath1), 10)
1903 while len(remaining) > 0:
1904 d0 = self.stath0[selected]
1906 for n1
in remaining:
1911 if d <= rmsd_cutoff:
1913 print(
"pruning model %s, similar to model %s, rmsd %s"
1914 % (str(n1), str(selected), str(d)))
1915 remaining = [x
for x
in remaining
if x
not in rm]
1916 if len(remaining) == 0:
1918 selected = remaining[0]
1919 filtered.append(selected)
1922 self.clusters.append(c)
1924 d0 = self.stath0[n0]
1925 c.add_member(n0, d0)
1930 Compute the precision of a cluster
1936 if cluster.center_index
is not None:
1937 members1 = [cluster.center_index]
1939 members1 = cluster.members
1943 for n1
in cluster.members:
1948 tmp_rmsd, _ = self.
rmsd()
1953 precision = rmsd/npairs
1954 cluster.precision = precision
1959 Compute the bipartite precision (ie the cross-precision)
1960 between two clusters
1964 for cn0, n0
in enumerate(cluster1.members):
1966 for cn1, n1
in enumerate(cluster2.members):
1968 tmp_rmsd, _ = self.
rmsd()
1970 print(
"--- rmsd between structure %s and structure "
1971 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1974 precision = rmsd/npairs
1977 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1978 cluster_ref=
None, step=1):
1980 Compute the Root mean square fluctuations
1981 of a molecule in a cluster
1982 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1983 residue indexes and the value is the rmsf
1985 rmsf = IMP.pmi.tools.OrderedDict()
1988 if cluster_ref
is not None:
1989 if cluster_ref.center_index
is not None:
1990 members0 = [cluster_ref.center_index]
1992 members0 = cluster_ref.members
1994 if cluster.center_index
is not None:
1995 members0 = [cluster.center_index]
1997 members0 = cluster.members
2000 copy_index=copy_index, state_index=state_index)
2001 ps0 = s0.get_selected_particles()
2003 residue_indexes = list(IMP.pmi.tools.OrderedSet(
2009 d0 = self.stath0[n0]
2010 for n1
in cluster.members[::step]:
2012 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
2016 self.stath1, molecule=molecule,
2017 residue_indexes=residue_indexes, resolution=1,
2018 copy_index=copy_index, state_index=state_index)
2019 ps1 = s1.get_selected_particles()
2021 d1 = self.stath1[n1]
2024 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
2025 r = residue_indexes[n]
2037 for stath
in [self.stath0, self.stath1]:
2038 if molecule
not in self.symmetric_molecules:
2040 stath, molecule=molecule, residue_index=r,
2041 resolution=1, copy_index=copy_index,
2042 state_index=state_index)
2045 stath, molecule=molecule, residue_index=r,
2046 resolution=1, state_index=state_index)
2048 ps = s.get_selected_particles()
2057 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2058 reference=
"Absolute", prefix=
"./", step=1):
2064 for n1
in cluster.members[::step]:
2065 print(
"density "+str(n1))
2070 dens.add_subunits_density(self.stath1)
2072 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2075 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2076 consolidate=
False, molecules=
None, prefix=
'./',
2077 reference=
"Absolute"):
2081 import matplotlib.pyplot
as plt
2082 import matplotlib.cm
as cm
2083 from scipy.spatial.distance
import cdist
2085 if molecules
is None:
2094 molecules=molecules).get_selected_particles())]
2095 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2096 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2097 total_len_unique = sum(max(mol.get_residue_indexes())
2098 for mol
in unique_copies)
2105 seqlen = max(mol.get_residue_indexes())
2106 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2110 for mol
in unique_copies:
2111 seqlen = max(mol.get_residue_indexes())
2112 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2115 for ncl, n1
in enumerate(cluster.members):
2118 coord_dict = IMP.pmi.tools.OrderedDict()
2120 rindexes = mol.get_residue_indexes()
2121 coords = np.ones((max(rindexes), 3))
2122 for rnum
in rindexes:
2125 selpart = sel.get_selected_particles()
2126 if len(selpart) == 0:
2128 selpart = selpart[0]
2129 coords[rnum - 1, :] = \
2131 coord_dict[mol] = coords
2134 coords = np.concatenate(list(coord_dict.values()))
2135 dists = cdist(coords, coords)
2136 binary_dists = np.where((dists <= contact_threshold)
2137 & (dists >= 1.0), 1.0, 0.0)
2139 binary_dists_dict = {}
2141 len1 = max(mol1.get_residue_indexes())
2143 name1 = mol1.get_name()
2144 name2 = mol2.get_name()
2145 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2146 if (name1, name2)
not in binary_dists_dict:
2147 binary_dists_dict[(name1, name2)] = \
2148 np.zeros((len1, len1))
2149 binary_dists_dict[(name1, name2)] += \
2150 np.where((dists <= contact_threshold)
2151 & (dists >= 1.0), 1.0, 0.0)
2152 binary_dists = np.zeros((total_len_unique, total_len_unique))
2154 for name1, name2
in binary_dists_dict:
2155 r1 = index_dict[mol_names_unique[name1]]
2156 r2 = index_dict[mol_names_unique[name2]]
2157 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2158 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2164 contact_freqs = binary_dists
2166 dist_maps.append(dists)
2167 av_dist_map += dists
2168 contact_freqs += binary_dists
2171 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2173 contact_freqs = 1.0/len(cluster)*contact_freqs
2174 av_dist_map = 1.0/len(cluster)*contact_freqs
2176 fig = plt.figure(figsize=(100, 100))
2177 ax = fig.add_subplot(111)
2180 gap_between_components = 50
2185 sorted_tuple = sorted(
2187 mol).get_extended_name(), mol)
for mol
in mols)
2188 prot_list = list(zip(*sorted_tuple))[1]
2190 sorted_tuple = sorted(
2192 for mol
in unique_copies)
2193 prot_list = list(zip(*sorted_tuple))[1]
2195 prot_listx = prot_list
2196 nresx = gap_between_components + \
2197 sum([max(mol.get_residue_indexes())
2198 + gap_between_components
for mol
in prot_listx])
2201 prot_listy = prot_list
2202 nresy = gap_between_components + \
2203 sum([max(mol.get_residue_indexes())
2204 + gap_between_components
for mol
in prot_listy])
2209 res = gap_between_components
2210 for mol
in prot_listx:
2211 resoffsetx[mol] = res
2212 res += max(mol.get_residue_indexes())
2214 res += gap_between_components
2218 res = gap_between_components
2219 for mol
in prot_listy:
2220 resoffsety[mol] = res
2221 res += max(mol.get_residue_indexes())
2223 res += gap_between_components
2225 resoffsetdiagonal = {}
2226 res = gap_between_components
2227 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2228 resoffsetdiagonal[mol] = res
2229 res += max(mol.get_residue_indexes())
2230 res += gap_between_components
2235 for n, prot
in enumerate(prot_listx):
2236 res = resoffsetx[prot]
2238 for proty
in prot_listy:
2239 resy = resoffsety[proty]
2240 endy = resendy[proty]
2241 ax.plot([res, res], [resy, endy], linestyle=
'-',
2242 color=
'gray', lw=0.4)
2243 ax.plot([end, end], [resy, endy], linestyle=
'-',
2244 color=
'gray', lw=0.4)
2245 xticks.append((float(res) + float(end)) / 2)
2247 prot).get_extended_name())
2251 for n, prot
in enumerate(prot_listy):
2252 res = resoffsety[prot]
2254 for protx
in prot_listx:
2255 resx = resoffsetx[protx]
2256 endx = resendx[protx]
2257 ax.plot([resx, endx], [res, res], linestyle=
'-',
2258 color=
'gray', lw=0.4)
2259 ax.plot([resx, endx], [end, end], linestyle=
'-',
2260 color=
'gray', lw=0.4)
2261 yticks.append((float(res) + float(end)) / 2)
2263 prot).get_extended_name())
2267 tmp_array = np.zeros((nresx, nresy))
2269 for px
in prot_listx:
2270 for py
in prot_listy:
2271 resx = resoffsetx[px]
2272 lengx = resendx[px] - 1
2273 resy = resoffsety[py]
2274 lengy = resendy[py] - 1
2275 indexes_x = index_dict[px]
2276 minx = min(indexes_x)
2277 maxx = max(indexes_x)
2278 indexes_y = index_dict[py]
2279 miny = min(indexes_y)
2280 maxy = max(indexes_y)
2281 tmp_array[resx:lengx, resy:lengy] = \
2282 contact_freqs[minx:maxx, miny:maxy]
2283 ret[(px, py)] = np.argwhere(
2284 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2286 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2287 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2289 ax.set_xticks(xticks)
2290 ax.set_xticklabels(xlabels, rotation=90)
2291 ax.set_yticks(yticks)
2292 ax.set_yticklabels(ylabels)
2293 plt.setp(ax.get_xticklabels(), fontsize=6)
2294 plt.setp(ax.get_yticklabels(), fontsize=6)
2297 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2298 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2300 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2301 dpi=300, transparent=
"False")
2304 def plot_rmsd_matrix(self, filename):
2305 self.compute_all_pairwise_rmsd()
2306 distance_matrix = np.zeros(
2307 (len(self.stath0), len(self.stath1)))
2308 for (n0, n1)
in self.pairwise_rmsd:
2309 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2311 import matplotlib
as mpl
2313 import matplotlib.pylab
as pl
2314 from scipy.cluster
import hierarchy
as hrc
2316 fig = pl.figure(figsize=(10, 8))
2317 ax = fig.add_subplot(212)
2318 dendrogram = hrc.dendrogram(
2319 hrc.linkage(distance_matrix),
2322 leaves_order = dendrogram[
'leaves']
2323 ax.set_xlabel(
'Model')
2324 ax.set_ylabel(
'RMSD [Angstroms]')
2326 ax2 = fig.add_subplot(221)
2328 distance_matrix[leaves_order, :][:, leaves_order],
2329 interpolation=
'nearest')
2330 cb = fig.colorbar(cax)
2331 cb.set_label(
'RMSD [Angstroms]')
2332 ax2.set_xlabel(
'Model')
2333 ax2.set_ylabel(
'Model')
2335 pl.savefig(filename, dpi=300)
2344 Update the cluster id numbers
2346 for n, c
in enumerate(self.clusters):
2349 def get_molecule(self, hier, name, copy):
2357 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2358 self.sel0_rmsd.get_selected_particles())
2359 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2360 self.sel1_rmsd.get_selected_particles())
2361 for mol
in self.seldict0:
2362 for sel
in self.seldict0[mol]:
2363 self.issymmetricsel[sel] =
False
2364 for mol
in self.symmetric_molecules:
2365 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2366 for sel
in self.seldict0[mol]:
2367 self.issymmetricsel[sel] =
True
2371 self.sel1_alignment, self.sel0_alignment)
2373 for rb
in self.rbs1:
2376 for bead
in self.beads1:
2384 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2386 initial filling of the clusters.
2389 print(
"clustering model "+str(n0))
2390 d0 = self.stath0[n0]
2392 print(
"creating cluster index "+str(len(self.clusters)))
2393 self.clusters.append(c)
2394 c.add_member(n0, d0)
2395 clustered = set([n0])
2397 print(
"--- trying to add model " + str(n1) +
" to cluster "
2398 + str(len(self.clusters)))
2399 d1 = self.stath1[n1]
2402 rmsd, _ = self.
rmsd(metric=metric)
2403 if rmsd < rmsd_cutoff:
2404 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2405 c.add_member(n1, d1)
2408 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2413 merge the clusters that have close members
2414 @param rmsd_cutoff cutoff distance in Angstorms
2422 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2423 itertools.combinations(self.clusters, 2)):
2424 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2427 rmsd, _ = self.
rmsd()
2428 if (rmsd < 2*rmsd_cutoff
and
2430 to_merge.append((c0, c1))
2432 for c0, c
in reversed(to_merge):
2436 self.clusters = [c
for c
in
2437 filter(
lambda x: len(x.members) > 0, self.clusters)]
2441 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2443 print(
"check close members for clusters " + str(c0.cluster_id) +
2444 " and " + str(c1.cluster_id))
2445 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2448 rmsd, _ = self.
rmsd(metric=metric)
2449 if rmsd < rmsd_cutoff:
2464 a function that returns the permutation best_sel of sels0 that
2467 best_rmsd2 = float(
'inf')
2469 if self.issymmetricsel[sels0[0]]:
2472 for offset
in range(N):
2473 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2476 r = metric(sel0, sel1)
2478 if rmsd2 < best_rmsd2:
2482 for sels
in itertools.permutations(sels0):
2484 for sel0, sel1
in itertools.takewhile(
2485 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2486 r = metric(sel0, sel1)
2488 if rmsd2 < best_rmsd2:
2491 return best_sel, best_rmsd2
2493 def compute_all_pairwise_rmsd(self):
2494 for d0
in self.stath0:
2495 for d1
in self.stath1:
2496 rmsd, _ = self.
rmsd()
2498 def rmsd(self, metric=IMP.atom.get_rmsd):
2500 Computes the RMSD. Resolves ambiguous pairs assignments
2504 n0 = self.stath0.current_index
2505 n1 = self.stath1.current_index
2506 if ((n0, n1)
in self.pairwise_rmsd) \
2507 and ((n0, n1)
in self.pairwise_molecular_assignment):
2508 return (self.pairwise_rmsd[(n0, n1)],
2509 self.pairwise_molecular_assignment[(n0, n1)])
2519 molecular_assignment = {}
2520 for molname, sels0
in self.seldict0.items():
2521 sels_best_order, best_rmsd2 = \
2522 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2524 Ncoords = len(sels_best_order[0].get_selected_particles())
2525 Ncopies = len(self.seldict1[molname])
2526 total_rmsd += Ncoords*best_rmsd2
2527 total_N += Ncoords*Ncopies
2529 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2530 p0 = sel0.get_selected_particles()[0]
2531 p1 = sel1.get_selected_particles()[0]
2536 molecular_assignment[(molname, c0)] = (molname, c1)
2538 total_rmsd = math.sqrt(total_rmsd/total_N)
2540 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2541 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2542 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2543 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2544 return total_rmsd, molecular_assignment
2548 Fix the reference structure for structural alignment, rmsd and
2551 @param reference can be either "Absolute" (cluster center of the
2552 first cluster) or Relative (cluster center of the current
2555 if reference ==
"Absolute":
2557 elif reference ==
"Relative":
2558 if cluster.center_index:
2559 n0 = cluster.center_index
2561 n0 = cluster.members[0]
2566 compute the molecular assignments between multiple copies
2567 of the same sequence. It changes the Copy index of Molecules
2570 _, molecular_assignment = self.
rmsd()
2571 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2572 mol0 = self.molcopydict0[m0][c0]
2573 mol1 = self.molcopydict1[m1][c1]
2576 p1.set_value(cik0, c0)
2580 Undo the Copy index assignment
2583 _, molecular_assignment = self.
rmsd()
2584 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2585 mol0 = self.molcopydict0[m0][c0]
2586 mol1 = self.molcopydict1[m1][c1]
2589 p1.set_value(cik0, c1)
2596 s =
"AnalysisReplicaExchange\n"
2597 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2598 s +=
"---- number of models %s \n" % str(len(self.stath0))
2601 def __getitem__(self, int_slice_adaptor):
2602 if isinstance(int_slice_adaptor, int):
2603 return self.clusters[int_slice_adaptor]
2604 elif isinstance(int_slice_adaptor, slice):
2605 return self.__iter__(int_slice_adaptor)
2607 raise TypeError(
"Unknown Type")
2610 return len(self.clusters)
2612 def __iter__(self, slice_key=None):
2613 if slice_key
is None:
2614 for i
in range(len(self)):
2617 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.