1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
16 from pathlib
import Path
18 from operator
import itemgetter
19 from collections
import defaultdict
29 """Replace samplers.MPI_values when in test mode"""
30 def get_percentile(self, name):
35 """All restraints that are written out to the RMF file"""
36 def __init__(self, model, user_restraints):
38 self._user_restraints = user_restraints
if user_restraints
else []
41 return (len(self._user_restraints)
42 + self._rmf_rs.get_number_of_restraints())
47 def __getitem__(self, i):
49 def __init__(self, r):
50 self.r = IMP.RestraintSet.get_from(r)
52 def get_restraint(self):
55 lenuser = len(self._user_restraints)
57 return self._user_restraints[i]
58 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
59 r = self._rmf_rs.get_restraint(i - lenuser)
60 return FakePMIWrapper(r)
62 raise IndexError(
"Out of range")
66 """A macro to help setup and run replica exchange.
67 Supports Monte Carlo and molecular dynamics.
68 Produces trajectory RMF files, best PDB structures,
69 and output stat files.
72 monte_carlo_sample_objects=
None,
73 molecular_dynamics_sample_objects=
None,
75 rmf_output_objects=
None,
76 monte_carlo_temperature=1.0,
77 simulated_annealing=
False,
78 simulated_annealing_minimum_temperature=1.0,
79 simulated_annealing_maximum_temperature=2.5,
80 simulated_annealing_minimum_temperature_nframes=100,
81 simulated_annealing_maximum_temperature_nframes=100,
82 replica_exchange_minimum_temperature=1.0,
83 replica_exchange_maximum_temperature=2.5,
84 replica_exchange_swap=
True,
86 number_of_best_scoring_models=500,
89 molecular_dynamics_steps=10,
90 molecular_dynamics_max_time_step=1.0,
91 number_of_frames=1000,
92 save_coordinates_mode=
"lowest_temperature",
93 nframes_write_coordinates=1,
94 write_initial_rmf=
True,
95 initial_rmf_name_suffix=
"initial",
96 stat_file_name_suffix=
"stat",
97 best_pdb_name_suffix=
"model",
100 do_create_directories=
True,
101 global_output_directory=
"./",
103 best_pdb_dir=
"pdbs/",
104 replica_stat_file_suffix=
"stat_replica",
105 em_object_for_rmf=
None,
107 replica_exchange_object=
None,
111 nestor_restraints=
None,
112 nestor_rmf_fname_prefix=
"nested",
115 @param model The IMP model
116 @param root_hier Top-level (System)hierarchy
117 @param monte_carlo_sample_objects Objects for MC sampling, which
118 should generally be a simple list of Mover objects, e.g.
119 from DegreesOfFreedom.get_movers().
120 @param molecular_dynamics_sample_objects Objects for MD sampling,
121 which should generally be a simple list of particles.
122 @param output_objects A list of structural objects and restraints
123 that will be included in output (ie, statistics "stat"
124 files). Any object that provides a get_output() method
125 can be used here. If None is passed
126 the macro will not write stat files.
127 @param rmf_output_objects A list of structural objects and
128 restraints that will be included in rmf. Any object
129 that provides a get_output() method can be used here.
130 @param monte_carlo_temperature MC temp (may need to be optimized
131 based on post-sampling analysis)
132 @param simulated_annealing If True, perform simulated annealing
133 @param simulated_annealing_minimum_temperature Should generally be
134 the same as monte_carlo_temperature.
135 @param simulated_annealing_minimum_temperature_nframes Number of
136 frames to compute at minimum temperature.
137 @param simulated_annealing_maximum_temperature_nframes Number of
139 temps > simulated_annealing_maximum_temperature.
140 @param replica_exchange_minimum_temperature Low temp for REX; should
141 generally be the same as monte_carlo_temperature.
142 @param replica_exchange_maximum_temperature High temp for REX
143 @param replica_exchange_swap Boolean, enable disable temperature
145 @param num_sample_rounds Number of rounds of MC/MD per cycle
146 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
147 models to keep around for analysis.
148 @param mmcif If True, write best scoring models in mmCIF format;
149 if False (the default), write in legacy PDB format.
150 @param best_pdb_dir The directory under `global_output_directory`
151 where best-scoring PDB/mmCIF files are written.
152 @param best_pdb_name_suffix Part of the file name for best-scoring
154 @param monte_carlo_steps Number of MC steps per round
155 @param self_adaptive self adaptive scheme for Monte Carlo movers
156 @param molecular_dynamics_steps Number of MD steps per round
157 @param molecular_dynamics_max_time_step Max time step for MD
158 @param number_of_frames Number of REX frames to run
159 @param save_coordinates_mode string: how to save coordinates.
160 "lowest_temperature" (default) only the lowest temperatures
162 "25th_score" all replicas whose score is below the 25th
164 "50th_score" all replicas whose score is below the 50th
166 "75th_score" all replicas whose score is below the 75th
168 @param nframes_write_coordinates How often to write the coordinates
170 @param write_initial_rmf Write the initial configuration
171 @param global_output_directory Folder that will be created to house
173 @param test_mode Set to True to avoid writing any files, just test
175 @param score_moved If True, attempt to speed up Monte Carlo
176 sampling by caching scoring function terms on particles
178 @param use_nestor If True, follows the Nested Sampling workflow
179 of the NestOR module and skips writing stat files and
181 @param nestor_restraints A list of restraints for which
182 likelihoods are to be computed for use by NestOR module.
183 @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
184 files generated by NestOR .
185 @param use_jax If set to True, sample the scoring function using
186 JAX instead of IMP's internal C++ implementation (requires
187 that all PMI restraints used have a JAX implementation).
193 if output_objects == []:
196 self.output_objects = []
198 self.output_objects = output_objects
199 self.rmf_output_objects = rmf_output_objects
201 and not root_hier.get_parent()):
202 if self.output_objects
is not None:
203 self.output_objects.append(
205 if self.rmf_output_objects
is not None:
206 self.rmf_output_objects.append(
208 self.root_hier = root_hier
209 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
210 self.vars[
"number_of_states"] = len(states)
212 self.root_hiers = states
213 self.is_multi_state =
True
215 self.root_hier = root_hier
216 self.is_multi_state =
False
218 raise TypeError(
"Must provide System hierarchy (root_hier)")
220 self._rmf_restraints = _RMFRestraints(model,
None)
221 self.em_object_for_rmf = em_object_for_rmf
222 self.monte_carlo_sample_objects = monte_carlo_sample_objects
223 self.vars[
"self_adaptive"] = self_adaptive
224 self.molecular_dynamics_sample_objects = \
225 molecular_dynamics_sample_objects
226 self.replica_exchange_object = replica_exchange_object
227 self.molecular_dynamics_max_time_step = \
228 molecular_dynamics_max_time_step
229 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
230 self.vars[
"replica_exchange_minimum_temperature"] = \
231 replica_exchange_minimum_temperature
232 self.vars[
"replica_exchange_maximum_temperature"] = \
233 replica_exchange_maximum_temperature
234 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
235 self.vars[
"simulated_annealing"] = simulated_annealing
236 self.vars[
"simulated_annealing_minimum_temperature"] = \
237 simulated_annealing_minimum_temperature
238 self.vars[
"simulated_annealing_maximum_temperature"] = \
239 simulated_annealing_maximum_temperature
240 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
241 simulated_annealing_minimum_temperature_nframes
242 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
243 simulated_annealing_maximum_temperature_nframes
245 self.vars[
"num_sample_rounds"] = num_sample_rounds
247 "number_of_best_scoring_models"] = number_of_best_scoring_models
248 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
249 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
250 self.vars[
"number_of_frames"] = number_of_frames
251 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
252 "50th_score",
"75th_score"):
253 raise Exception(
"save_coordinates_mode has unrecognized value")
255 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
256 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
257 self.vars[
"write_initial_rmf"] = write_initial_rmf
258 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
259 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
260 self.vars[
"mmcif"] = mmcif
261 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
262 self.vars[
"do_clean_first"] = do_clean_first
263 self.vars[
"do_create_directories"] = do_create_directories
264 self.vars[
"global_output_directory"] = global_output_directory
265 self.vars[
"rmf_dir"] = rmf_dir
266 self.vars[
"best_pdb_dir"] = best_pdb_dir
267 self.vars[
"atomistic"] = atomistic
268 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
269 self.vars[
"geometries"] =
None
270 self.test_mode = test_mode
271 self.score_moved = score_moved
272 self.use_jax = use_jax
273 self.vars[
"use_nestor"] = self.nest = use_nestor
274 self.nestor_restraints = nestor_restraints
275 self.nestor_rmf_fname = nestor_rmf_fname_prefix
278 if self.vars[
"geometries"]
is None:
279 self.vars[
"geometries"] = list(geometries)
281 self.vars[
"geometries"].extend(geometries)
284 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
285 "rmfs/*.rmf3 for each replica ")
286 print(
"--- it stores the best scoring pdb models in pdbs/")
287 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
288 "lowest temperature")
289 print(
"--- variables:")
290 for k, v
in sorted(self.vars.items(), key=itemgetter(0)):
291 print(
"------", k.ljust(30), v)
293 def get_replica_exchange_object(self):
294 return self.replica_exchange_object
296 def _add_provenance(self, sampler_md, sampler_mc):
297 """Record details about the sampling in the IMP Hierarchies"""
300 method =
"Molecular Dynamics"
301 iterations += self.vars[
"molecular_dynamics_steps"]
303 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
304 iterations += self.vars[
"monte_carlo_steps"]
306 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
308 iterations *= self.vars[
"num_sample_rounds"]
310 pi = self.model.add_particle(
"sampling")
312 self.model, pi, method, self.vars[
"number_of_frames"],
314 p.set_number_of_replicas(
315 self.replica_exchange_object.get_number_of_replicas())
316 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
319 def _setup_mc_sampler(self):
321 self.model, self.monte_carlo_sample_objects,
322 self.vars[
"monte_carlo_temperature"],
323 score_moved=self.score_moved)
325 sampler_mc.set_use_jax(self.vars[
"monte_carlo_steps"])
326 if self.vars[
"simulated_annealing"]:
327 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
328 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
330 "simulated_annealing_minimum_temperature_nframes"]
332 "simulated_annealing_maximum_temperature_nframes"]
333 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
334 if self.vars[
"self_adaptive"]:
335 sampler_mc.set_self_adaptive(
336 isselfadaptive=self.vars[
"self_adaptive"])
337 if self.output_objects
is not None:
338 self.output_objects.append(sampler_mc)
339 if self.rmf_output_objects
is not None:
340 self.rmf_output_objects.append(sampler_mc)
343 def _setup_md_sampler(self):
345 self.model, self.molecular_dynamics_sample_objects,
346 self.vars[
"monte_carlo_temperature"],
347 maximum_time_step=self.molecular_dynamics_max_time_step)
349 sampler_md.set_use_jax(self.vars[
"molecular_dynamics_steps"])
350 if self.vars[
"simulated_annealing"]:
351 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
352 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
354 "simulated_annealing_minimum_temperature_nframes"]
356 "simulated_annealing_maximum_temperature_nframes"]
357 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
358 if self.output_objects
is not None:
359 self.output_objects.append(sampler_md)
360 if self.rmf_output_objects
is not None:
361 self.rmf_output_objects.append(sampler_md)
364 def _get_jax_model(self, sampler_mc):
366 return sampler_mc.get_jax_model()
368 def execute_macro(self):
369 temp_index_factor = 100000.0
373 if self.monte_carlo_sample_objects
is not None:
374 print(
"Setting up MonteCarlo")
375 sampler_mc = self._setup_mc_sampler()
376 samplers.append(sampler_mc)
378 if self.molecular_dynamics_sample_objects
is not None:
379 print(
"Setting up MolecularDynamics")
380 sampler_md = self._setup_md_sampler()
381 samplers.append(sampler_md)
385 print(
"Setting up ReplicaExchange")
387 self.model, self.vars[
"replica_exchange_minimum_temperature"],
388 self.vars[
"replica_exchange_maximum_temperature"], samplers,
389 replica_exchange_object=self.replica_exchange_object)
390 self.replica_exchange_object = rex.rem
392 myindex = rex.get_my_index()
393 if self.output_objects
is not None:
394 self.output_objects.append(rex)
395 if self.rmf_output_objects
is not None:
396 self.rmf_output_objects.append(rex)
400 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
404 globaldir = self.vars[
"global_output_directory"] +
"/"
405 rmf_dir = globaldir + self.vars[
"rmf_dir"]
406 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
408 if not self.test_mode
and not self.nest:
409 if self.vars[
"do_clean_first"]:
412 if self.vars[
"do_create_directories"]:
414 os.makedirs(globaldir, exist_ok=
True)
415 os.makedirs(rmf_dir, exist_ok=
True)
416 if not self.is_multi_state:
417 os.makedirs(pdb_dir, exist_ok=
True)
419 for n
in range(self.vars[
"number_of_states"]):
420 os.makedirs(pdb_dir +
"/" + str(n), exist_ok=
True)
425 if self.output_objects
is not None:
426 self.output_objects.append(sw)
427 if self.rmf_output_objects
is not None:
428 self.rmf_output_objects.append(sw)
433 print(
"Setting up stat file")
434 low_temp_stat_file = globaldir + \
435 self.vars[
"stat_file_name_suffix"] +
"." + \
436 str(myindex) +
".out"
439 if not self.test_mode:
442 if not self.test_mode
and not self.nest:
443 if self.output_objects
is not None:
444 output.init_stat2(low_temp_stat_file,
446 extralabels=[
"rmf_file",
"rmf_frame_index"],
447 jax_model=self._get_jax_model(sampler_mc))
449 print(
"Stat file writing is disabled")
451 if self.rmf_output_objects
is not None and not self.nest:
452 print(
"Stat info being written in the rmf file")
454 if not self.test_mode
and not self.nest:
455 print(
"Setting up replica stat file")
456 replica_stat_file = globaldir + \
457 self.vars[
"replica_stat_file_suffix"] +
"." + \
458 str(myindex) +
".out"
459 if not self.test_mode:
460 output.init_stat2(replica_stat_file, [rex],
461 extralabels=[
"score"],
462 jax_model=self._get_jax_model(sampler_mc))
464 print(
"Setting up best pdb files")
465 if not self.is_multi_state:
466 if self.vars[
"number_of_best_scoring_models"] > 0:
467 output.init_pdb_best_scoring(
468 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
470 self.vars[
"number_of_best_scoring_models"],
471 replica_exchange=
True,
472 mmcif=self.vars[
'mmcif'],
473 best_score_file=globaldir +
"best.scores.rex.py")
474 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
476 pdb_dir +
"/" +
"model.psf",
478 self.vars[
"best_pdb_name_suffix"] + pdbext)
480 if self.vars[
"number_of_best_scoring_models"] > 0:
481 for n
in range(self.vars[
"number_of_states"]):
482 output.init_pdb_best_scoring(
483 pdb_dir +
"/" + str(n) +
"/" +
484 self.vars[
"best_pdb_name_suffix"],
486 self.vars[
"number_of_best_scoring_models"],
487 replica_exchange=
True,
488 mmcif=self.vars[
'mmcif'],
489 best_score_file=globaldir +
"best.scores.rex.py")
490 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
492 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
493 pdb_dir +
"/" + str(n) +
"/" +
494 self.vars[
"best_pdb_name_suffix"] + pdbext)
497 if self.em_object_for_rmf
is not None:
498 output_hierarchies = [
500 self.em_object_for_rmf.get_density_as_hierarchy(
503 output_hierarchies = [self.root_hier]
505 if not self.test_mode
and not self.nest:
506 print(
"Setting up and writing initial rmf coordinate file")
507 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
508 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
510 listofobjects=self.rmf_output_objects)
511 if self._rmf_restraints:
512 output.add_restraints_to_rmf(
513 init_suffix +
"." + str(myindex) +
".rmf3",
514 self._rmf_restraints)
515 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
516 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
518 if not self.test_mode:
519 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
521 mpivs = _MockMPIValues()
523 self._add_provenance(sampler_md, sampler_mc)
525 if not self.test_mode
and not self.nest:
526 print(
"Setting up production rmf files")
527 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
528 output.init_rmf(rmfname, output_hierarchies,
529 geometries=self.vars[
"geometries"],
530 listofobjects=self.rmf_output_objects)
532 if self._rmf_restraints:
533 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
535 if not self.test_mode
and self.nest:
536 print(
"Setting up NestOR rmf files")
537 nestor_rmf_fname = str(self.nestor_rmf_fname) +
'_' + \
538 str(self.replica_exchange_object.get_my_index()) +
'.rmf3'
540 output.init_rmf(nestor_rmf_fname, output_hierarchies,
541 geometries=self.vars[
"geometries"],
542 listofobjects=self.rmf_output_objects)
544 ntimes_at_low_temp = 0
546 if myindex == 0
and not self.nest:
548 self.replica_exchange_object.set_was_used(
True)
549 nframes = self.vars[
"number_of_frames"]
553 sampled_likelihoods = []
554 for i
in range(nframes):
559 for nr
in range(self.vars[
"num_sample_rounds"]):
560 if sampler_md
is not None:
561 score = sampler_md.optimize(
562 self.vars[
"molecular_dynamics_steps"])
563 if sampler_mc
is not None:
564 score = sampler_mc.optimize(
565 self.vars[
"monte_carlo_steps"])
568 self.model).evaluate(
False)
570 and not self.use_jax):
574 self.model).evaluate(
False)
575 assert abs(score - check_score) < 1e-4
576 mpivs.set_value(
"score", score)
578 output.set_output_entry(
"score", score)
580 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
582 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
583 save_frame = (min_temp_index == my_temp_index)
584 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
585 score_perc = mpivs.get_percentile(
"score")
586 save_frame = (score_perc*100.0 <= 25.0)
587 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
588 score_perc = mpivs.get_percentile(
"score")
589 save_frame = (score_perc*100.0 <= 50.0)
590 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
591 score_perc = mpivs.get_percentile(
"score")
592 save_frame = (score_perc*100.0 <= 75.0)
595 if save_frame
and not self.test_mode:
599 print(
"--- frame %s score %s " % (str(i), str(score)))
602 if math.isnan(score):
603 sampled_likelihoods.append(math.nan)
605 likelihood_for_sample = 1
606 for rstrnt
in self.nestor_restraints:
607 likelihood_for_sample *= rstrnt.get_likelihood()
608 sampled_likelihoods.append(likelihood_for_sample)
609 output.write_rmf(nestor_rmf_fname)
611 if not self.test_mode
and not self.nest:
612 if i % self.vars[
"nframes_write_coordinates"] == 0:
613 print(
'--- writing coordinates')
614 if self.vars[
"number_of_best_scoring_models"] > 0:
615 output.write_pdb_best_scoring(score)
616 output.write_rmf(rmfname)
617 output.set_output_entry(
"rmf_file", rmfname)
618 output.set_output_entry(
"rmf_frame_index",
621 output.set_output_entry(
"rmf_file", rmfname)
622 output.set_output_entry(
"rmf_frame_index",
'-1')
623 if self.output_objects
is not None:
626 jax_model=self._get_jax_model(sampler_mc))
627 ntimes_at_low_temp += 1
629 if not self.test_mode
and not self.nest:
632 jax_model=self._get_jax_model(sampler_mc))
633 if self.vars[
"replica_exchange_swap"]:
634 rex.swap_temp(i, score)
636 if self.nest
and len(sampled_likelihoods) > 0:
637 with open(
"likelihoods_"
638 + str(self.replica_exchange_object.get_my_index()),
640 pickle.dump(sampled_likelihoods, lif)
642 output.close_rmf(nestor_rmf_fname)
644 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
645 p.add_replica_exchange(state, self)
647 if not self.test_mode
and not self.nest:
648 print(
"closing production rmf files")
649 output.close_rmf(rmfname)
653 """A macro to build a IMP::pmi::topology::System based on a
654 TopologyReader object.
656 Easily create multi-state systems by calling this macro
657 repeatedly with different TopologyReader objects!
658 A useful function is get_molecules() which returns the PMI Molecules
659 grouped by state as a dictionary with key = (molecule name),
660 value = IMP.pmi.topology.Molecule
661 Quick multi-state system:
664 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
665 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
666 bs = IMP.pmi.macros.BuildSystem(model)
667 bs.add_state(reader1)
668 bs.add_state(reader2)
669 bs.execute_macro() # build everything including degrees of freedom
670 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
671 ### now you have a two state system, you add restraints etc
673 @note The "domain name" entry of the topology reader is not used.
674 All molecules are set up by the component name, but split into rigid bodies
678 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
679 'RNA': IMP.pmi.alphabets.rna}
681 def __init__(self, model, sequence_connectivity_scale=4.0,
682 force_create_gmm_files=
False, resolutions=[1, 10],
685 @param model An IMP Model
686 @param sequence_connectivity_scale For scaling the connectivity
688 @param force_create_gmm_files If True, will sample and create GMMs
689 no matter what. If False, will only sample if the
690 files don't exist. If number of Gaussians is zero, won't
692 @param resolutions The resolutions to build for structured regions
693 @param name The name of the top-level hierarchy node.
700 self._domain_res = []
702 self.force_create_gmm_files = force_create_gmm_files
703 self.resolutions = resolutions
705 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
707 """Add a state using the topology info in a
708 IMP::pmi::topology::TopologyReader object.
709 When you are done adding states, call execute_macro()
710 @param reader The TopologyReader object
711 @param keep_chain_id If True, keep the chain IDs from the
712 original PDB files, if available
713 @param fasta_name_map dictionary for converting protein names
714 found in the fasta file
715 @param chain_ids A list or string of chain IDs for assigning to
716 newly-created molecules, e.g.
717 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
718 If not specified, chain IDs A through Z are assigned, then
719 AA through AZ, then BA through BZ, and so on, in the same
722 state = self.system.create_state()
723 self._readers.append(reader)
725 these_domain_res = {}
727 if chain_ids
is None:
728 chain_ids = IMP.pmi.output._ChainIDs()
733 for molname
in reader.get_molecules():
734 copies = reader.get_molecules()[molname].domains
735 for nc, copyname
in enumerate(copies):
736 print(
"BuildSystem.add_state: setting up molecule %s copy "
737 "number %s" % (molname, str(nc)))
738 copy = copies[copyname]
741 all_chains = [c
for c
in copy
if c.chain
is not None]
743 chain_id = all_chains[0].chain
745 chain_id = chain_ids[numchain]
747 "No PDBs specified for %s, so keep_chain_id has "
748 "no effect; using default chain ID '%s'"
751 chain_id = chain_ids[numchain]
753 alphabet = IMP.pmi.alphabets.amino_acid
754 fasta_flag = copy[0].fasta_flag
755 if fasta_flag
in self._alphabets:
756 alphabet = self._alphabets[fasta_flag]
758 copy[0].fasta_file, fasta_name_map)
759 seq = seqs[copy[0].fasta_id]
760 print(
"BuildSystem.add_state: molecule %s sequence has "
761 "%s residues" % (molname, len(seq)))
762 orig_mol = state.create_molecule(
763 molname, seq, chain_id, alphabet=alphabet,
764 uniprot=seqs.uniprot.get(copy[0].fasta_id))
768 print(
"BuildSystem.add_state: creating a copy for "
769 "molecule %s" % molname)
770 mol = orig_mol.create_copy(chain_id)
773 for domainnumber, domain
in enumerate(copy):
774 print(
"BuildSystem.add_state: ---- setting up domain %s "
775 "of molecule %s" % (domainnumber, molname))
778 these_domains[domain.get_unique_name()] = domain
779 if domain.residue_range == []
or \
780 domain.residue_range
is None:
781 domain_res = mol.get_residues()
783 start = domain.residue_range[0]+domain.pdb_offset
784 if domain.residue_range[1] ==
'END':
785 end = len(mol.sequence)
787 end = domain.residue_range[1]+domain.pdb_offset
788 domain_res = mol.residue_range(start-1, end-1)
789 print(
"BuildSystem.add_state: -------- domain %s of "
790 "molecule %s extends from residue %s to "
792 % (domainnumber, molname, start, end))
793 if domain.pdb_file ==
"BEADS":
794 print(
"BuildSystem.add_state: -------- domain %s of "
795 "molecule %s represented by BEADS "
796 % (domainnumber, molname))
797 mol.add_representation(
799 resolutions=[domain.bead_size],
800 setup_particles_as_densities=(
801 domain.em_residues_per_gaussian != 0),
803 these_domain_res[domain.get_unique_name()] = \
805 elif domain.pdb_file ==
"IDEAL_HELIX":
806 print(
"BuildSystem.add_state: -------- domain %s of "
807 "molecule %s represented by IDEAL_HELIX "
808 % (domainnumber, molname))
809 emper = domain.em_residues_per_gaussian
810 mol.add_representation(
812 resolutions=self.resolutions,
814 density_residues_per_component=emper,
815 density_prefix=domain.density_prefix,
816 density_force_compute=self.force_create_gmm_files,
818 these_domain_res[domain.get_unique_name()] = \
821 print(
"BuildSystem.add_state: -------- domain %s of "
822 "molecule %s represented by pdb file %s "
823 % (domainnumber, molname, domain.pdb_file))
824 domain_atomic = mol.add_structure(domain.pdb_file,
826 domain.residue_range,
829 domain_non_atomic = domain_res - domain_atomic
830 if not domain.em_residues_per_gaussian:
831 mol.add_representation(
832 domain_atomic, resolutions=self.resolutions,
834 if len(domain_non_atomic) > 0:
835 mol.add_representation(
837 resolutions=[domain.bead_size],
840 print(
"BuildSystem.add_state: -------- domain %s "
841 "of molecule %s represented by gaussians "
842 % (domainnumber, molname))
843 emper = domain.em_residues_per_gaussian
844 creategmm = self.force_create_gmm_files
845 mol.add_representation(
847 resolutions=self.resolutions,
848 density_residues_per_component=emper,
849 density_prefix=domain.density_prefix,
850 density_force_compute=creategmm,
852 if len(domain_non_atomic) > 0:
853 mol.add_representation(
855 resolutions=[domain.bead_size],
856 setup_particles_as_densities=
True,
858 these_domain_res[domain.get_unique_name()] = (
859 domain_atomic, domain_non_atomic)
860 self._domain_res.append(these_domain_res)
861 self._domains.append(these_domains)
862 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
866 """Return list of all molecules grouped by state.
867 For each state, it's a dictionary of Molecules where key is the
870 return [s.get_molecules()
for s
in self.system.get_states()]
872 def get_molecule(self, molname, copy_index=0, state_index=0):
873 return self.system.get_states()[state_index].
get_molecules()[
877 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
878 """Builds representations and sets up degrees of freedom"""
879 print(
"BuildSystem.execute_macro: building representations")
880 self.root_hier = self.system.build()
882 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
884 for nstate, reader
in enumerate(self._readers):
885 rbs = reader.get_rigid_bodies()
886 srbs = reader.get_super_rigid_bodies()
887 csrbs = reader.get_chains_of_super_rigid_bodies()
890 domains_in_rbs = set()
892 print(
"BuildSystem.execute_macro: -------- building rigid "
893 "body %s" % (str(rblist)))
894 all_res = IMP.pmi.tools.OrderedSet()
895 bead_res = IMP.pmi.tools.OrderedSet()
897 domain = self._domains[nstate][dname]
898 print(
"BuildSystem.execute_macro: -------- adding %s"
900 all_res |= self._domain_res[nstate][dname][0]
901 bead_res |= self._domain_res[nstate][dname][1]
902 domains_in_rbs.add(dname)
904 print(
"BuildSystem.execute_macro: -------- creating rigid "
905 "body with max_trans %s max_rot %s "
906 "non_rigid_max_trans %s"
907 % (str(max_rb_trans), str(max_rb_rot),
908 str(max_bead_trans)))
909 self.dof.create_rigid_body(all_res,
910 nonrigid_parts=bead_res,
911 max_trans=max_rb_trans,
913 nonrigid_max_trans=max_bead_trans,
914 name=
"RigidBody %s" % dname)
917 for dname, domain
in self._domains[nstate].items():
918 if dname
not in domains_in_rbs:
919 if domain.pdb_file !=
"BEADS":
921 "No rigid bodies set for %s. Residues read from "
922 "the PDB file will not be sampled - only regions "
923 "missing from the PDB will be treated flexibly. "
924 "To sample the entire sequence, use BEADS instead "
925 "of a PDB file name" % dname,
927 self.dof.create_flexible_beads(
928 self._domain_res[nstate][dname][1],
929 max_trans=max_bead_trans)
933 print(
"BuildSystem.execute_macro: -------- building "
934 "super rigid body %s" % (str(srblist)))
935 all_res = IMP.pmi.tools.OrderedSet()
936 for dname
in srblist:
937 print(
"BuildSystem.execute_macro: -------- adding %s"
939 all_res |= self._domain_res[nstate][dname][0]
940 all_res |= self._domain_res[nstate][dname][1]
942 print(
"BuildSystem.execute_macro: -------- creating super "
943 "rigid body with max_trans %s max_rot %s "
944 % (str(max_srb_trans), str(max_srb_rot)))
945 self.dof.create_super_rigid_body(
946 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
949 for csrblist
in csrbs:
950 all_res = IMP.pmi.tools.OrderedSet()
951 for dname
in csrblist:
952 all_res |= self._domain_res[nstate][dname][0]
953 all_res |= self._domain_res[nstate][dname][1]
954 all_res = list(all_res)
955 all_res.sort(key=
lambda r: r.get_index())
956 self.dof.create_main_chain_mover(all_res)
957 return self.root_hier, self.dof
962 """A macro for running all the basic operations of analysis.
963 Includes clustering, precision analysis, and making ensemble density maps.
964 A number of plots are also supported.
967 merge_directories=[
"./"],
968 stat_file_name_suffix=
"stat",
969 best_pdb_name_suffix=
"model",
971 do_create_directories=
True,
972 global_output_directory=
"output/",
973 replica_stat_file_suffix=
"stat_replica",
974 global_analysis_result_directory=
"./analysis/",
977 @param model The IMP model
978 @param stat_file_name_suffix
979 @param merge_directories The directories containing output files
980 @param best_pdb_name_suffix
981 @param do_clean_first
982 @param do_create_directories
983 @param global_output_directory Where everything is
984 @param replica_stat_file_suffix
985 @param global_analysis_result_directory
986 @param test_mode If True, nothing is changed on disk
990 from mpi4py
import MPI
991 self.comm = MPI.COMM_WORLD
992 self.rank = self.comm.Get_rank()
993 self.number_of_processes = self.comm.size
996 self.number_of_processes = 1
998 self.test_mode = test_mode
999 self._protocol_output = []
1000 self.cluster_obj =
None
1002 stat_dir = global_output_directory
1003 self.stat_files = []
1005 for rd
in merge_directories:
1006 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
1007 if len(stat_files) == 0:
1008 warnings.warn(
"no stat files found in %s"
1009 % os.path.join(rd, stat_dir),
1011 self.stat_files += stat_files
1014 """Capture details of the modeling protocol.
1015 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1018 self._protocol_output.append((p, p._last_state))
1021 score_key=
"Total_Score",
1022 rmf_file_key=
"rmf_file",
1023 rmf_file_frame_key=
"rmf_frame_index",
1026 nframes_trajectory=10000):
1027 """ Get a trajectory of the modeling run, for generating
1028 demonstrative movies
1030 @param score_key The score for ranking models
1031 @param rmf_file_key Key pointing to RMF filename
1032 @param rmf_file_frame_key Key pointing to RMF frame number
1033 @param outputdir The local output directory used in the run
1034 @param get_every Extract every nth frame
1035 @param nframes_trajectory Total number of frames of the trajectory
1040 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1042 score_list = list(map(float, trajectory_models[2]))
1044 max_score = max(score_list)
1045 min_score = min(score_list)
1047 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1048 for i
in range(nframes_trajectory)]
1049 binned_scores = [
None]*nframes_trajectory
1050 binned_model_indexes = [-1]*nframes_trajectory
1052 for model_index, s
in enumerate(score_list):
1053 bins_score_diffs = [abs(s-b)
for b
in bins]
1054 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1055 if binned_scores[bin_index]
is None:
1056 binned_scores[bin_index] = s
1057 binned_model_indexes[bin_index] = model_index
1059 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1060 new_diff = abs(s-bins[bin_index])
1061 if new_diff < old_diff:
1062 binned_scores[bin_index] = s
1063 binned_model_indexes[bin_index] = model_index
1065 print(binned_scores)
1066 print(binned_model_indexes)
1068 def _expand_ambiguity(self, prot, d):
1069 """If using PMI2, expand the dictionary to include copies as
1072 This also keeps the states separate.
1077 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1080 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1081 if isinstance(val, tuple):
1089 for nst
in range(len(states)):
1091 copies = sel.get_selected_particles(with_representation=
False)
1093 for nc
in range(len(copies)):
1095 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1096 (start, stop, name, nc, nst)
1098 newdict[
'%s..%i' % (name, nc)] = \
1099 (start, stop, name, nc, nst)
1105 score_key=
"Total_Score",
1106 rmf_file_key=
"rmf_file",
1107 rmf_file_frame_key=
"rmf_frame_index",
1109 prefiltervalue=
None,
1112 alignment_components=
None,
1113 number_of_best_scoring_models=10,
1114 rmsd_calculation_components=
None,
1115 distance_matrix_file=
'distances.mat',
1116 load_distance_matrix_file=
False,
1117 skip_clustering=
False,
1118 number_of_clusters=1,
1120 exit_after_display=
True,
1122 first_and_last_frames=
None,
1123 density_custom_ranges=
None,
1124 write_pdb_with_centered_coordinates=
False,
1126 """Get the best scoring models, compute a distance matrix,
1127 cluster them, and create density maps.
1129 Tuple format: "molname" just the molecule,
1130 or (start,stop,molname,copy_num(optional),state_num(optional)
1131 Can pass None for copy or state to ignore that field.
1132 If you don't pass a specific copy number
1134 @param score_key The score for ranking models.
1135 @param rmf_file_key Key pointing to RMF filename
1136 @param rmf_file_frame_key Key pointing to RMF frame number
1137 @param state_number State number to analyze
1138 @param prefiltervalue Only include frames where the
1139 score key is below this value
1140 @param feature_keys Keywords for which you want to
1141 calculate average, medians, etc.
1142 If you pass "Keyname" it'll include everything that matches
1144 @param outputdir The local output directory used in
1146 @param alignment_components Dictionary with keys=groupname,
1147 values are tuples for aligning the structures
1148 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1149 @param number_of_best_scoring_models Num models to keep per run
1150 @param rmsd_calculation_components For calculating RMSD
1151 (same format as alignment_components)
1152 @param distance_matrix_file Where to store/read the
1154 @param load_distance_matrix_file Try to load the distance
1156 @param skip_clustering Just extract the best scoring
1157 models and save the pdbs
1158 @param number_of_clusters Number of k-means clusters
1159 @param display_plot Display the distance matrix
1160 @param exit_after_display Exit after displaying distance
1162 @param get_every Extract every nth frame
1163 @param first_and_last_frames A tuple with the first and last
1164 frames to be analyzed. Values are percentages!
1165 Default: get all frames
1166 @param density_custom_ranges For density calculation
1167 (same format as alignment_components)
1168 @param write_pdb_with_centered_coordinates
1169 @param voxel_size Used for the density output
1173 self._outputdir = Path(outputdir).absolute()
1174 self._number_of_clusters = number_of_clusters
1175 for p, state
in self._protocol_output:
1176 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1187 if not load_distance_matrix_file:
1188 if len(self.stat_files) == 0:
1189 print(
"ERROR: no stat file found in the given path")
1191 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1192 self.stat_files, self.number_of_processes)[self.rank]
1195 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1196 if k
in feature_keys:
1198 "no need to pass " + k +
" to feature_keys.",
1200 feature_keys.remove(k)
1203 my_stat_files, score_key, feature_keys, rmf_file_key,
1204 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1205 rmf_file_list = best_models[0]
1206 rmf_file_frame_list = best_models[1]
1207 score_list = best_models[2]
1208 feature_keyword_list_dict = best_models[3]
1214 if self.number_of_processes > 1:
1218 rmf_file_frame_list)
1219 for k
in feature_keyword_list_dict:
1220 feature_keyword_list_dict[k] = \
1222 feature_keyword_list_dict[k])
1225 score_rmf_tuples = list(zip(score_list,
1227 rmf_file_frame_list,
1228 list(range(len(score_list)))))
1230 if density_custom_ranges:
1231 for k
in density_custom_ranges:
1232 if not isinstance(density_custom_ranges[k], list):
1233 raise Exception(
"Density custom ranges: values must "
1234 "be lists of tuples")
1237 if first_and_last_frames
is not None:
1238 nframes = len(score_rmf_tuples)
1239 first_frame = int(first_and_last_frames[0] * nframes)
1240 last_frame = int(first_and_last_frames[1] * nframes)
1241 if last_frame > len(score_rmf_tuples):
1243 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1246 best_score_rmf_tuples = sorted(
1248 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1249 best_score_rmf_tuples = [t+(n,)
for n, t
in
1250 enumerate(best_score_rmf_tuples)]
1252 prov.append(IMP.pmi.io.FilterProvenance(
1253 "Best scoring", 0, number_of_best_scoring_models))
1255 best_score_feature_keyword_list_dict = defaultdict(list)
1256 for tpl
in best_score_rmf_tuples:
1258 for f
in feature_keyword_list_dict:
1259 best_score_feature_keyword_list_dict[f].append(
1260 feature_keyword_list_dict[f][index])
1261 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1262 best_score_rmf_tuples,
1263 self.number_of_processes)[self.rank]
1266 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1267 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1268 if rmsd_calculation_components
is not None:
1269 tmp = self._expand_ambiguity(
1270 prot_ahead, rmsd_calculation_components)
1271 if tmp != rmsd_calculation_components:
1272 print(
'Detected ambiguity, expand rmsd components to',
1274 rmsd_calculation_components = tmp
1275 if alignment_components
is not None:
1276 tmp = self._expand_ambiguity(prot_ahead,
1277 alignment_components)
1278 if tmp != alignment_components:
1279 print(
'Detected ambiguity, expand alignment '
1280 'components to', tmp)
1281 alignment_components = tmp
1287 self.model, my_best_score_rmf_tuples[0],
1288 rmsd_calculation_components, state_number=state_number)
1290 self.model, my_best_score_rmf_tuples, alignment_components,
1291 rmsd_calculation_components, state_number=state_number)
1299 all_coordinates = got_coords[0]
1302 alignment_coordinates = got_coords[1]
1305 rmsd_coordinates = got_coords[2]
1308 rmf_file_name_index_dict = got_coords[3]
1311 all_rmf_file_names = got_coords[4]
1317 if density_custom_ranges:
1319 density_custom_ranges, voxel=voxel_size)
1321 dircluster = os.path.join(outputdir,
1322 "all_models."+str(self.rank))
1328 os.mkdir(dircluster)
1331 clusstat = open(os.path.join(
1332 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1333 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1335 rmf_frame_number = tpl[2]
1338 for key
in best_score_feature_keyword_list_dict:
1340 best_score_feature_keyword_list_dict[key][index]
1344 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1345 self.model, rmf_frame_number, rmf_name)
1347 linking_successful = \
1348 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1349 self.model, prots, rs, rmf_frame_number,
1351 if not linking_successful:
1357 states = IMP.atom.get_by_type(
1358 prots[0], IMP.atom.STATE_TYPE)
1359 prot = states[state_number]
1364 coords_f1 = alignment_coordinates[cnt]
1366 coords_f2 = alignment_coordinates[cnt]
1369 coords_f1, coords_f2)
1370 transformation = Ali.align()[1]
1384 rb = rbm.get_rigid_body()
1394 out_pdb_fn = os.path.join(
1395 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1396 out_rmf_fn = os.path.join(
1397 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1398 o.init_pdb(out_pdb_fn, prot)
1399 tc = write_pdb_with_centered_coordinates
1400 o.write_pdb(out_pdb_fn,
1401 translate_to_geometric_center=tc)
1403 tmp_dict[
"local_pdb_file_name"] = \
1404 os.path.basename(out_pdb_fn)
1405 tmp_dict[
"rmf_file_full_path"] = rmf_name
1406 tmp_dict[
"local_rmf_file_name"] = \
1407 os.path.basename(out_rmf_fn)
1408 tmp_dict[
"local_rmf_frame_number"] = 0
1410 clusstat.write(str(tmp_dict)+
"\n")
1415 h.set_name(
"System")
1417 o.init_rmf(out_rmf_fn, [h], rs)
1419 o.write_rmf(out_rmf_fn)
1420 o.close_rmf(out_rmf_fn)
1422 if density_custom_ranges:
1423 DensModule.add_subunits_density(prot)
1425 if density_custom_ranges:
1426 DensModule.write_mrc(path=dircluster)
1431 if self.number_of_processes > 1:
1437 rmf_file_name_index_dict)
1439 alignment_coordinates)
1446 [best_score_feature_keyword_list_dict,
1447 rmf_file_name_index_dict],
1453 print(
"setup clustering class")
1456 for n, model_coordinate_dict
in enumerate(all_coordinates):
1458 if (alignment_components
is not None
1459 and len(self.cluster_obj.all_coords) == 0):
1461 self.cluster_obj.set_template(alignment_coordinates[n])
1462 self.cluster_obj.fill(all_rmf_file_names[n],
1463 rmsd_coordinates[n])
1464 print(
"Global calculating the distance matrix")
1467 self.cluster_obj.dist_matrix()
1471 self.cluster_obj.do_cluster(number_of_clusters)
1474 self.cluster_obj.plot_matrix(
1475 figurename=os.path.join(outputdir,
1477 if exit_after_display:
1479 self.cluster_obj.save_distance_matrix_file(
1480 file_name=distance_matrix_file)
1487 print(
"setup clustering class")
1489 self.cluster_obj.load_distance_matrix_file(
1490 file_name=distance_matrix_file)
1491 print(
"clustering with %s clusters" % str(number_of_clusters))
1492 self.cluster_obj.do_cluster(number_of_clusters)
1493 [best_score_feature_keyword_list_dict,
1494 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1497 self.cluster_obj.plot_matrix(figurename=os.path.join(
1498 outputdir,
'dist_matrix.pdf'))
1499 if exit_after_display:
1501 if self.number_of_processes > 1:
1509 print(self.cluster_obj.get_cluster_labels())
1510 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1511 print(
"rank %s " % str(self.rank))
1512 print(
"cluster %s " % str(n))
1513 print(
"cluster label %s " % str(cl))
1514 print(self.cluster_obj.get_cluster_label_names(cl))
1516 len(self.cluster_obj.get_cluster_label_names(cl))
1518 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1521 if density_custom_ranges:
1523 density_custom_ranges,
1526 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1528 os.mkdir(dircluster)
1534 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1535 clusstat = open(dircluster +
"stat.out",
"w")
1536 for k, structure_name
in enumerate(
1537 self.cluster_obj.get_cluster_label_names(cl)):
1540 tmp_dict.update(rmsd_dict)
1541 index = rmf_file_name_index_dict[structure_name]
1542 for key
in best_score_feature_keyword_list_dict:
1544 key] = best_score_feature_keyword_list_dict[
1550 rmf_name = structure_name.split(
"|")[0]
1551 rmf_frame_number = int(structure_name.split(
"|")[1])
1552 clusstat.write(str(tmp_dict) +
"\n")
1557 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1558 self.model, rmf_frame_number, rmf_name)
1560 linking_successful = \
1561 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1562 self.model, prots, rs, rmf_frame_number,
1564 if not linking_successful:
1569 states = IMP.atom.get_by_type(
1570 prots[0], IMP.atom.STATE_TYPE)
1571 prot = states[state_number]
1577 co = self.cluster_obj
1578 model_index = co.get_model_index_from_name(
1580 transformation = co.get_transformation_to_first_member(
1591 rb = rbm.get_rigid_body()
1600 if density_custom_ranges:
1601 DensModule.add_subunits_density(prot)
1606 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1607 o.write_pdb(dircluster + str(k) +
".pdb")
1612 h.set_name(
"System")
1614 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1615 o.write_rmf(dircluster + str(k) +
".rmf3")
1616 o.close_rmf(dircluster + str(k) +
".rmf3")
1621 if density_custom_ranges:
1622 DensModule.write_mrc(path=dircluster)
1625 if self.number_of_processes > 1:
1628 def get_cluster_rmsd(self, cluster_num):
1629 if self.cluster_obj
is None:
1631 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1633 def save_objects(self, objects, file_name):
1635 with open(file_name,
'wb')
as outf:
1636 pickle.dump(objects, outf)
1638 def load_objects(self, file_name):
1640 with open(file_name,
'rb')
as inputf:
1641 objects = pickle.load(inputf)
1648 This class contains analysis utilities to investigate ReplicaExchange
1656 def __init__(self, model, stat_files, best_models=None, score_key=None,
1659 Construction of the Class.
1660 @param model IMP.Model()
1661 @param stat_files list of string. Can be ascii stat files,
1663 @param best_models Integer. Number of best scoring models,
1664 if None: all models will be read
1665 @param score_key Use the provided stat key keyword as the score
1666 (by default, the total score is used)
1667 @param alignment boolean (Default=True). Align before computing
1672 self.best_models = best_models
1674 model, stat_files, self.best_models, score_key, cache=
True)
1676 StatHierarchyHandler=self.stath0)
1689 self.clusters.append(c)
1690 for n0
in range(len(self.stath0)):
1692 self.pairwise_rmsd = {}
1693 self.pairwise_molecular_assignment = {}
1694 self.alignment = alignment
1695 self.symmetric_molecules = {}
1696 self.issymmetricsel = {}
1698 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1700 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1705 Setup the selection onto which the rmsd is computed
1706 @param kwargs use IMP.atom.Selection keywords
1714 Store names of symmetric molecules
1716 self.symmetric_molecules[molecule_name] = 0
1721 Setup the selection onto which the alignment is computed
1722 @param kwargs use IMP.atom.Selection keywords
1730 def clean_clusters(self):
1731 for c
in self.clusters:
1735 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1737 Cluster the models based on RMSD.
1738 @param rmsd_cutoff Float the distance cutoff in Angstrom
1739 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1740 be used to compute rmsds
1742 self.clean_clusters()
1743 not_clustered = set(range(len(self.stath1)))
1744 while len(not_clustered) > 0:
1745 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1750 Refine the clusters by merging the ones whose centers are close
1751 @param rmsd_cutoff cutoff distance in Angstorms
1753 clusters_copy = self.clusters
1754 for c0, c1
in itertools.combinations(self.clusters, 2):
1755 if c0.center_index
is None:
1757 if c1.center_index
is None:
1759 _ = self.stath0[c0.center_index]
1760 _ = self.stath1[c1.center_index]
1761 rmsd, molecular_assignment = self.
rmsd()
1762 if rmsd <= rmsd_cutoff:
1763 if c1
in self.clusters:
1764 clusters_copy.remove(c1)
1766 self.clusters = clusters_copy
1773 def set_cluster_assignments(self, cluster_ids):
1774 if len(cluster_ids) != len(self.stath0):
1775 raise ValueError(
'cluster ids has to be same length as '
1779 for i
in sorted(list(set(cluster_ids))):
1781 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1782 self.clusters[idx].add_member(i, d)
1786 Return the model data from a cluster
1787 @param cluster IMP.pmi.output.Cluster object
1796 Save the data for the whole models into a pickle file
1797 @param filename string
1799 self.stath0.save_data(filename)
1803 Set the data from an external IMP.pmi.output.Data
1804 @param data IMP.pmi.output.Data
1806 self.stath0.data = data
1807 self.stath1.data = data
1811 Load the data from an external pickled file
1812 @param filename string
1814 self.stath0.load_data(filename)
1815 self.stath1.load_data(filename)
1816 self.best_models = len(self.stath0)
1818 def add_cluster(self, rmf_name_list):
1820 print(
"creating cluster index "+str(len(self.clusters)))
1821 self.clusters.append(c)
1822 current_len = len(self.stath0)
1824 for rmf
in rmf_name_list:
1825 print(
"adding rmf "+rmf)
1826 self.stath0.add_stat_file(rmf)
1827 self.stath1.add_stat_file(rmf)
1829 for n0
in range(current_len, len(self.stath0)):
1830 d0 = self.stath0[n0]
1831 c.add_member(n0, d0)
1836 Save the clusters into a pickle file
1837 @param filename string
1840 with open(filename,
'wb')
as fl:
1841 pickle.dump(self.clusters, fl)
1845 Load the clusters from a pickle file
1846 @param filename string
1847 @param append bool (Default=False), if True. append the clusters
1848 to the ones currently present
1851 self.clean_clusters()
1852 with open(filename,
'rb')
as fl:
1854 self.clusters += pickle.load(fl)
1856 self.clusters = pickle.load(fl)
1865 Compute the cluster center for a given cluster
1867 member_distance = defaultdict(float)
1869 for n0, n1
in itertools.combinations(cluster.members, 2):
1872 rmsd, _ = self.
rmsd()
1873 member_distance[n0] += rmsd
1875 if len(member_distance) > 0:
1876 cluster.center_index = min(member_distance,
1877 key=member_distance.get)
1879 cluster.center_index = cluster.members[0]
1884 Save the coordinates of the current cluster a single rmf file
1886 print(
"saving coordinates", cluster)
1890 if rmf_name
is None:
1891 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1893 _ = self.stath1[cluster.members[0]]
1895 o.init_rmf(rmf_name, [self.stath1])
1896 for n1
in cluster.members:
1902 o.write_rmf(rmf_name)
1904 o.close_rmf(rmf_name)
1908 remove structures that are similar
1909 append it to a new cluster
1911 print(
"pruning models")
1913 filtered = [selected]
1914 remaining = range(1, len(self.stath1), 10)
1916 while len(remaining) > 0:
1917 d0 = self.stath0[selected]
1919 for n1
in remaining:
1924 if d <= rmsd_cutoff:
1926 print(
"pruning model %s, similar to model %s, rmsd %s"
1927 % (str(n1), str(selected), str(d)))
1928 remaining = [x
for x
in remaining
if x
not in rm]
1929 if len(remaining) == 0:
1931 selected = remaining[0]
1932 filtered.append(selected)
1935 self.clusters.append(c)
1937 d0 = self.stath0[n0]
1938 c.add_member(n0, d0)
1943 Compute the precision of a cluster
1949 if cluster.center_index
is not None:
1950 members1 = [cluster.center_index]
1952 members1 = cluster.members
1956 for n1
in cluster.members:
1961 tmp_rmsd, _ = self.
rmsd()
1966 precision = rmsd/npairs
1967 cluster.precision = precision
1972 Compute the bipartite precision (ie the cross-precision)
1973 between two clusters
1977 for cn0, n0
in enumerate(cluster1.members):
1979 for cn1, n1
in enumerate(cluster2.members):
1981 tmp_rmsd, _ = self.
rmsd()
1983 print(
"--- rmsd between structure %s and structure "
1984 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1987 precision = rmsd/npairs
1990 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1991 cluster_ref=
None, step=1):
1993 Compute the Root mean square fluctuations
1994 of a molecule in a cluster
1995 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1996 residue indexes and the value is the rmsf
1998 rmsf = IMP.pmi.tools.OrderedDict()
2001 if cluster_ref
is not None:
2002 if cluster_ref.center_index
is not None:
2003 members0 = [cluster_ref.center_index]
2005 members0 = cluster_ref.members
2007 if cluster.center_index
is not None:
2008 members0 = [cluster.center_index]
2010 members0 = cluster.members
2013 copy_index=copy_index, state_index=state_index)
2014 ps0 = s0.get_selected_particles()
2016 residue_indexes = list(IMP.pmi.tools.OrderedSet(
2022 d0 = self.stath0[n0]
2023 for n1
in cluster.members[::step]:
2025 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
2029 self.stath1, molecule=molecule,
2030 residue_indexes=residue_indexes, resolution=1,
2031 copy_index=copy_index, state_index=state_index)
2032 ps1 = s1.get_selected_particles()
2034 d1 = self.stath1[n1]
2037 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
2038 r = residue_indexes[n]
2050 for stath
in [self.stath0, self.stath1]:
2051 if molecule
not in self.symmetric_molecules:
2053 stath, molecule=molecule, residue_index=r,
2054 resolution=1, copy_index=copy_index,
2055 state_index=state_index)
2058 stath, molecule=molecule, residue_index=r,
2059 resolution=1, state_index=state_index)
2061 ps = s.get_selected_particles()
2070 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2071 reference=
"Absolute", prefix=
"./", step=1):
2077 for n1
in cluster.members[::step]:
2078 print(
"density "+str(n1))
2083 dens.add_subunits_density(self.stath1)
2085 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2088 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2089 consolidate=
False, molecules=
None, prefix=
'./',
2090 reference=
"Absolute"):
2094 import matplotlib.pyplot
as plt
2095 import matplotlib.cm
as cm
2096 from scipy.spatial.distance
import cdist
2098 if molecules
is None:
2107 molecules=molecules).get_selected_particles())]
2108 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2109 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2110 total_len_unique = sum(max(mol.get_residue_indexes())
2111 for mol
in unique_copies)
2118 seqlen = max(mol.get_residue_indexes())
2119 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2123 for mol
in unique_copies:
2124 seqlen = max(mol.get_residue_indexes())
2125 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2128 for ncl, n1
in enumerate(cluster.members):
2131 coord_dict = IMP.pmi.tools.OrderedDict()
2133 rindexes = mol.get_residue_indexes()
2134 coords = np.ones((max(rindexes), 3))
2135 for rnum
in rindexes:
2138 selpart = sel.get_selected_particles()
2139 if len(selpart) == 0:
2141 selpart = selpart[0]
2142 coords[rnum - 1, :] = \
2144 coord_dict[mol] = coords
2147 coords = np.concatenate(list(coord_dict.values()))
2148 dists = cdist(coords, coords)
2149 binary_dists = np.where((dists <= contact_threshold)
2150 & (dists >= 1.0), 1.0, 0.0)
2152 binary_dists_dict = {}
2154 len1 = max(mol1.get_residue_indexes())
2156 name1 = mol1.get_name()
2157 name2 = mol2.get_name()
2158 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2159 if (name1, name2)
not in binary_dists_dict:
2160 binary_dists_dict[(name1, name2)] = \
2161 np.zeros((len1, len1))
2162 binary_dists_dict[(name1, name2)] += \
2163 np.where((dists <= contact_threshold)
2164 & (dists >= 1.0), 1.0, 0.0)
2165 binary_dists = np.zeros((total_len_unique, total_len_unique))
2167 for name1, name2
in binary_dists_dict:
2168 r1 = index_dict[mol_names_unique[name1]]
2169 r2 = index_dict[mol_names_unique[name2]]
2170 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2171 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2177 contact_freqs = binary_dists
2179 dist_maps.append(dists)
2180 av_dist_map += dists
2181 contact_freqs += binary_dists
2184 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2186 contact_freqs = 1.0/len(cluster)*contact_freqs
2187 av_dist_map = 1.0/len(cluster)*contact_freqs
2189 fig = plt.figure(figsize=(100, 100))
2190 ax = fig.add_subplot(111)
2193 gap_between_components = 50
2198 sorted_tuple = sorted(
2200 mol).get_extended_name(), mol)
for mol
in mols)
2201 prot_list = list(zip(*sorted_tuple))[1]
2203 sorted_tuple = sorted(
2205 for mol
in unique_copies)
2206 prot_list = list(zip(*sorted_tuple))[1]
2208 prot_listx = prot_list
2209 nresx = gap_between_components + \
2210 sum([max(mol.get_residue_indexes())
2211 + gap_between_components
for mol
in prot_listx])
2214 prot_listy = prot_list
2215 nresy = gap_between_components + \
2216 sum([max(mol.get_residue_indexes())
2217 + gap_between_components
for mol
in prot_listy])
2222 res = gap_between_components
2223 for mol
in prot_listx:
2224 resoffsetx[mol] = res
2225 res += max(mol.get_residue_indexes())
2227 res += gap_between_components
2231 res = gap_between_components
2232 for mol
in prot_listy:
2233 resoffsety[mol] = res
2234 res += max(mol.get_residue_indexes())
2236 res += gap_between_components
2238 resoffsetdiagonal = {}
2239 res = gap_between_components
2240 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2241 resoffsetdiagonal[mol] = res
2242 res += max(mol.get_residue_indexes())
2243 res += gap_between_components
2248 for n, prot
in enumerate(prot_listx):
2249 res = resoffsetx[prot]
2251 for proty
in prot_listy:
2252 resy = resoffsety[proty]
2253 endy = resendy[proty]
2254 ax.plot([res, res], [resy, endy], linestyle=
'-',
2255 color=
'gray', lw=0.4)
2256 ax.plot([end, end], [resy, endy], linestyle=
'-',
2257 color=
'gray', lw=0.4)
2258 xticks.append((float(res) + float(end)) / 2)
2260 prot).get_extended_name())
2264 for n, prot
in enumerate(prot_listy):
2265 res = resoffsety[prot]
2267 for protx
in prot_listx:
2268 resx = resoffsetx[protx]
2269 endx = resendx[protx]
2270 ax.plot([resx, endx], [res, res], linestyle=
'-',
2271 color=
'gray', lw=0.4)
2272 ax.plot([resx, endx], [end, end], linestyle=
'-',
2273 color=
'gray', lw=0.4)
2274 yticks.append((float(res) + float(end)) / 2)
2276 prot).get_extended_name())
2280 tmp_array = np.zeros((nresx, nresy))
2282 for px
in prot_listx:
2283 for py
in prot_listy:
2284 resx = resoffsetx[px]
2285 lengx = resendx[px] - 1
2286 resy = resoffsety[py]
2287 lengy = resendy[py] - 1
2288 indexes_x = index_dict[px]
2289 minx = min(indexes_x)
2290 maxx = max(indexes_x)
2291 indexes_y = index_dict[py]
2292 miny = min(indexes_y)
2293 maxy = max(indexes_y)
2294 tmp_array[resx:lengx, resy:lengy] = \
2295 contact_freqs[minx:maxx, miny:maxy]
2296 ret[(px, py)] = np.argwhere(
2297 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2299 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2300 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2302 ax.set_xticks(xticks)
2303 ax.set_xticklabels(xlabels, rotation=90)
2304 ax.set_yticks(yticks)
2305 ax.set_yticklabels(ylabels)
2306 plt.setp(ax.get_xticklabels(), fontsize=6)
2307 plt.setp(ax.get_yticklabels(), fontsize=6)
2310 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2311 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2313 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2314 dpi=300, transparent=
"False")
2317 def plot_rmsd_matrix(self, filename):
2318 self.compute_all_pairwise_rmsd()
2319 distance_matrix = np.zeros(
2320 (len(self.stath0), len(self.stath1)))
2321 for (n0, n1)
in self.pairwise_rmsd:
2322 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2324 import matplotlib
as mpl
2326 import matplotlib.pylab
as pl
2327 from scipy.cluster
import hierarchy
as hrc
2329 fig = pl.figure(figsize=(10, 8))
2330 ax = fig.add_subplot(212)
2331 dendrogram = hrc.dendrogram(
2332 hrc.linkage(distance_matrix),
2335 leaves_order = dendrogram[
'leaves']
2336 ax.set_xlabel(
'Model')
2337 ax.set_ylabel(
'RMSD [Angstroms]')
2339 ax2 = fig.add_subplot(221)
2341 distance_matrix[leaves_order, :][:, leaves_order],
2342 interpolation=
'nearest')
2343 cb = fig.colorbar(cax)
2344 cb.set_label(
'RMSD [Angstroms]')
2345 ax2.set_xlabel(
'Model')
2346 ax2.set_ylabel(
'Model')
2348 pl.savefig(filename, dpi=300)
2357 Update the cluster id numbers
2359 for n, c
in enumerate(self.clusters):
2362 def get_molecule(self, hier, name, copy):
2370 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2371 self.sel0_rmsd.get_selected_particles())
2372 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2373 self.sel1_rmsd.get_selected_particles())
2374 for mol
in self.seldict0:
2375 for sel
in self.seldict0[mol]:
2376 self.issymmetricsel[sel] =
False
2377 for mol
in self.symmetric_molecules:
2378 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2379 for sel
in self.seldict0[mol]:
2380 self.issymmetricsel[sel] =
True
2384 self.sel1_alignment, self.sel0_alignment)
2386 for rb
in self.rbs1:
2389 for bead
in self.beads1:
2397 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2399 initial filling of the clusters.
2402 print(
"clustering model "+str(n0))
2403 d0 = self.stath0[n0]
2405 print(
"creating cluster index "+str(len(self.clusters)))
2406 self.clusters.append(c)
2407 c.add_member(n0, d0)
2408 clustered = set([n0])
2410 print(
"--- trying to add model " + str(n1) +
" to cluster "
2411 + str(len(self.clusters)))
2412 d1 = self.stath1[n1]
2415 rmsd, _ = self.
rmsd(metric=metric)
2416 if rmsd < rmsd_cutoff:
2417 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2418 c.add_member(n1, d1)
2421 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2426 merge the clusters that have close members
2428 @param rmsd_cutoff cutoff distance in Angstorms
2429 @param metric Function to calculate distance between two Selections
2430 (by default, IMP.atom.get_rmsd is used)
2438 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2439 itertools.combinations(self.clusters, 2)):
2440 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2443 rmsd, _ = self.
rmsd()
2444 if (rmsd < 2*rmsd_cutoff
and
2446 to_merge.append((c0, c1))
2448 for c0, c
in reversed(to_merge):
2452 self.clusters = [c
for c
in
2453 filter(
lambda x: len(x.members) > 0, self.clusters)]
2457 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2459 print(
"check close members for clusters " + str(c0.cluster_id) +
2460 " and " + str(c1.cluster_id))
2461 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2464 rmsd, _ = self.
rmsd(metric=metric)
2465 if rmsd < rmsd_cutoff:
2480 a function that returns the permutation best_sel of sels0 that
2483 best_rmsd2 = float(
'inf')
2485 if self.issymmetricsel[sels0[0]]:
2488 for offset
in range(N):
2489 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2492 r = metric(sel0, sel1)
2494 if rmsd2 < best_rmsd2:
2498 for sels
in itertools.permutations(sels0):
2500 for sel0, sel1
in itertools.takewhile(
2501 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2502 r = metric(sel0, sel1)
2504 if rmsd2 < best_rmsd2:
2507 return best_sel, best_rmsd2
2509 def compute_all_pairwise_rmsd(self):
2510 for d0
in self.stath0:
2511 for d1
in self.stath1:
2512 rmsd, _ = self.
rmsd()
2514 def rmsd(self, metric=IMP.atom.get_rmsd):
2516 Computes the RMSD. Resolves ambiguous pairs assignments
2520 n0 = self.stath0.current_index
2521 n1 = self.stath1.current_index
2522 if ((n0, n1)
in self.pairwise_rmsd) \
2523 and ((n0, n1)
in self.pairwise_molecular_assignment):
2524 return (self.pairwise_rmsd[(n0, n1)],
2525 self.pairwise_molecular_assignment[(n0, n1)])
2535 molecular_assignment = {}
2536 for molname, sels0
in self.seldict0.items():
2537 sels_best_order, best_rmsd2 = \
2538 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2540 Ncoords = len(sels_best_order[0].get_selected_particles())
2541 Ncopies = len(self.seldict1[molname])
2542 total_rmsd += Ncoords*best_rmsd2
2543 total_N += Ncoords*Ncopies
2545 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2546 p0 = sel0.get_selected_particles()[0]
2547 p1 = sel1.get_selected_particles()[0]
2552 molecular_assignment[(molname, c0)] = (molname, c1)
2554 total_rmsd = math.sqrt(total_rmsd/total_N)
2556 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2557 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2558 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2559 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2560 return total_rmsd, molecular_assignment
2564 Fix the reference structure for structural alignment, rmsd and
2567 @param reference can be either "Absolute" (cluster center of the
2568 first cluster) or Relative (cluster center of the current
2570 #param cluster the reference IMP.pmi.output.Cluster object
2572 if reference ==
"Absolute":
2574 elif reference ==
"Relative":
2575 if cluster.center_index:
2576 n0 = cluster.center_index
2578 n0 = cluster.members[0]
2583 compute the molecular assignments between multiple copies
2584 of the same sequence. It changes the Copy index of Molecules
2587 _, molecular_assignment = self.
rmsd()
2588 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2589 mol0 = self.molcopydict0[m0][c0]
2590 mol1 = self.molcopydict1[m1][c1]
2593 p1.set_value(cik0, c0)
2597 Undo the Copy index assignment
2600 _, molecular_assignment = self.
rmsd()
2601 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2602 mol0 = self.molcopydict0[m0][c0]
2603 mol1 = self.molcopydict1[m1][c1]
2606 p1.set_value(cik0, c1)
2613 s =
"AnalysisReplicaExchange\n"
2614 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2615 s +=
"---- number of models %s \n" % str(len(self.stath0))
2618 def __getitem__(self, int_slice_adaptor):
2619 if isinstance(int_slice_adaptor, int):
2620 return self.clusters[int_slice_adaptor]
2621 elif isinstance(int_slice_adaptor, slice):
2622 return self.__iter__(int_slice_adaptor)
2624 raise TypeError(
"Unknown Type")
2627 return len(self.clusters)
2629 def __iter__(self, slice_key=None):
2630 if slice_key
is None:
2631 for i
in range(len(self)):
2634 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
CheckLevel get_check_level()
Get the current audit mode.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.