1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from operator
import itemgetter
19 from collections
import defaultdict
26 class _RMFRestraints(object):
27 """All restraints that are written out to the RMF file"""
28 def __init__(self, model, user_restraints):
30 self._user_restraints = user_restraints
if user_restraints
else []
33 return (len(self._user_restraints)
34 + self._rmf_rs.get_number_of_restraints())
38 __nonzero__ = __bool__
40 def __getitem__(self, i):
41 class FakePMIWrapper(object):
42 def __init__(self, r):
43 self.r = IMP.RestraintSet.get_from(r)
45 def get_restraint(self):
48 lenuser = len(self._user_restraints)
50 return self._user_restraints[i]
51 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
52 r = self._rmf_rs.get_restraint(i - lenuser)
53 return FakePMIWrapper(r)
55 raise IndexError(
"Out of range")
59 """A macro to help setup and run replica exchange.
60 Supports Monte Carlo and molecular dynamics.
61 Produces trajectory RMF files, best PDB structures,
62 and output stat files.
66 monte_carlo_sample_objects=
None,
67 molecular_dynamics_sample_objects=
None,
69 rmf_output_objects=
None,
70 crosslink_restraints=
None,
71 monte_carlo_temperature=1.0,
72 simulated_annealing=
False,
73 simulated_annealing_minimum_temperature=1.0,
74 simulated_annealing_maximum_temperature=2.5,
75 simulated_annealing_minimum_temperature_nframes=100,
76 simulated_annealing_maximum_temperature_nframes=100,
77 replica_exchange_minimum_temperature=1.0,
78 replica_exchange_maximum_temperature=2.5,
79 replica_exchange_swap=
True,
81 number_of_best_scoring_models=500,
84 molecular_dynamics_steps=10,
85 molecular_dynamics_max_time_step=1.0,
86 number_of_frames=1000,
87 save_coordinates_mode=
"lowest_temperature",
88 nframes_write_coordinates=1,
89 write_initial_rmf=
True,
90 initial_rmf_name_suffix=
"initial",
91 stat_file_name_suffix=
"stat",
92 best_pdb_name_suffix=
"model",
94 do_create_directories=
True,
95 global_output_directory=
"./",
98 replica_stat_file_suffix=
"stat_replica",
99 em_object_for_rmf=
None,
101 replica_exchange_object=
None,
105 @param model The IMP model
106 @param root_hier Top-level (System)hierarchy
107 @param monte_carlo_sample_objects Objects for MC sampling; a list of
108 structural components (generally the representation) that
109 will be moved and restraints with parameters that need to
111 For PMI2: just pass flat list of movers
112 @param molecular_dynamics_sample_objects Objects for MD sampling
113 For PMI2: just pass flat list of particles
114 @param output_objects A list of structural objects and restraints
115 that will be included in output (ie, statistics "stat"
116 files). Any object that provides a get_output() method
117 can be used here. If None is passed
118 the macro will not write stat files.
119 @param rmf_output_objects A list of structural objects and
120 restraints that will be included in rmf. Any object
121 that provides a get_output() method can be used here.
122 @param monte_carlo_temperature MC temp (may need to be optimized
123 based on post-sampling analysis)
124 @param simulated_annealing If True, perform simulated annealing
125 @param simulated_annealing_minimum_temperature Should generally be
126 the same as monte_carlo_temperature.
127 @param simulated_annealing_minimum_temperature_nframes Number of
128 frames to compute at minimum temperature.
129 @param simulated_annealing_maximum_temperature_nframes Number of
131 temps > simulated_annealing_maximum_temperature.
132 @param replica_exchange_minimum_temperature Low temp for REX; should
133 generally be the same as monte_carlo_temperature.
134 @param replica_exchange_maximum_temperature High temp for REX
135 @param replica_exchange_swap Boolean, enable disable temperature
137 @param num_sample_rounds Number of rounds of MC/MD per cycle
138 @param number_of_best_scoring_models Number of top-scoring PDB
139 models to keep around for analysis
140 @param monte_carlo_steps Number of MC steps per round
141 @param self_adaptive self adaptive scheme for Monte Carlo movers
142 @param molecular_dynamics_steps Number of MD steps per round
143 @param molecular_dynamics_max_time_step Max time step for MD
144 @param number_of_frames Number of REX frames to run
145 @param save_coordinates_mode string: how to save coordinates.
146 "lowest_temperature" (default) only the lowest temperatures
148 "25th_score" all replicas whose score is below the 25th
150 "50th_score" all replicas whose score is below the 50th
152 "75th_score" all replicas whose score is below the 75th
154 @param nframes_write_coordinates How often to write the coordinates
156 @param write_initial_rmf Write the initial configuration
157 @param global_output_directory Folder that will be created to house
159 @param test_mode Set to True to avoid writing any files, just test
161 @param score_moved If True, attempt to speed up Monte Carlo
162 sampling by caching scoring function terms on particles
169 self.output_objects = output_objects
170 self.rmf_output_objects = rmf_output_objects
172 and root_hier.get_name() ==
'System'):
173 if self.output_objects
is not None:
174 self.output_objects.append(
176 if self.rmf_output_objects
is not None:
177 self.rmf_output_objects.append(
179 self.root_hier = root_hier
180 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
181 self.vars[
"number_of_states"] = len(states)
183 self.root_hiers = states
184 self.is_multi_state =
True
186 self.root_hier = root_hier
187 self.is_multi_state =
False
189 raise TypeError(
"Must provide System hierarchy (root_hier)")
191 if crosslink_restraints:
193 "crosslink_restraints is deprecated and is ignored; "
194 "all cross-link restraints should be automatically "
195 "added to output RMF files")
196 self._rmf_restraints = _RMFRestraints(model,
None)
197 self.em_object_for_rmf = em_object_for_rmf
198 self.monte_carlo_sample_objects = monte_carlo_sample_objects
199 self.vars[
"self_adaptive"] = self_adaptive
200 if sample_objects
is not None:
202 "sample_objects is deprecated; use monte_carlo_sample_objects "
203 "(or molecular_dynamics_sample_objects) instead")
204 self.monte_carlo_sample_objects += sample_objects
205 self.molecular_dynamics_sample_objects = \
206 molecular_dynamics_sample_objects
207 self.replica_exchange_object = replica_exchange_object
208 self.molecular_dynamics_max_time_step = \
209 molecular_dynamics_max_time_step
210 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
211 self.vars[
"replica_exchange_minimum_temperature"] = \
212 replica_exchange_minimum_temperature
213 self.vars[
"replica_exchange_maximum_temperature"] = \
214 replica_exchange_maximum_temperature
215 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
216 self.vars[
"simulated_annealing"] = simulated_annealing
217 self.vars[
"simulated_annealing_minimum_temperature"] = \
218 simulated_annealing_minimum_temperature
219 self.vars[
"simulated_annealing_maximum_temperature"] = \
220 simulated_annealing_maximum_temperature
221 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
222 simulated_annealing_minimum_temperature_nframes
223 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
224 simulated_annealing_maximum_temperature_nframes
226 self.vars[
"num_sample_rounds"] = num_sample_rounds
228 "number_of_best_scoring_models"] = number_of_best_scoring_models
229 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
230 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
231 self.vars[
"number_of_frames"] = number_of_frames
232 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
233 "50th_score",
"75th_score"):
234 raise Exception(
"save_coordinates_mode has unrecognized value")
236 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
237 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
238 self.vars[
"write_initial_rmf"] = write_initial_rmf
239 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
240 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
241 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
242 self.vars[
"do_clean_first"] = do_clean_first
243 self.vars[
"do_create_directories"] = do_create_directories
244 self.vars[
"global_output_directory"] = global_output_directory
245 self.vars[
"rmf_dir"] = rmf_dir
246 self.vars[
"best_pdb_dir"] = best_pdb_dir
247 self.vars[
"atomistic"] = atomistic
248 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
249 self.vars[
"geometries"] =
None
250 self.test_mode = test_mode
251 self.score_moved = score_moved
254 if self.vars[
"geometries"]
is None:
255 self.vars[
"geometries"] = list(geometries)
257 self.vars[
"geometries"].extend(geometries)
260 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, "
261 "rmfs/*.rmf3 for each replica ")
262 print(
"--- it stores the best scoring pdb models in pdbs/")
263 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
264 "lowest temperature")
265 print(
"--- variables:")
266 keys = list(self.vars.keys())
269 print(
"------", v.ljust(30), self.vars[v])
271 def get_replica_exchange_object(self):
272 return self.replica_exchange_object
274 def _add_provenance(self, sampler_md, sampler_mc):
275 """Record details about the sampling in the IMP Hierarchies"""
278 method =
"Molecular Dynamics"
279 iterations += self.vars[
"molecular_dynamics_steps"]
281 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
282 iterations += self.vars[
"monte_carlo_steps"]
284 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
286 iterations *= self.vars[
"num_sample_rounds"]
288 pi = self.model.add_particle(
"sampling")
290 self.model, pi, method, self.vars[
"number_of_frames"],
292 p.set_number_of_replicas(
293 self.replica_exchange_object.get_number_of_replicas())
294 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
297 def execute_macro(self):
298 temp_index_factor = 100000.0
302 if self.monte_carlo_sample_objects
is not None:
303 print(
"Setting up MonteCarlo")
305 self.model, self.monte_carlo_sample_objects,
306 self.vars[
"monte_carlo_temperature"],
307 score_moved=self.score_moved)
308 if self.vars[
"simulated_annealing"]:
309 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
310 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
312 "simulated_annealing_minimum_temperature_nframes"]
314 "simulated_annealing_maximum_temperature_nframes"]
315 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
316 if self.vars[
"self_adaptive"]:
317 sampler_mc.set_self_adaptive(
318 isselfadaptive=self.vars[
"self_adaptive"])
319 if self.output_objects
is not None:
320 self.output_objects.append(sampler_mc)
321 if self.rmf_output_objects
is not None:
322 self.rmf_output_objects.append(sampler_mc)
323 samplers.append(sampler_mc)
325 if self.molecular_dynamics_sample_objects
is not None:
326 print(
"Setting up MolecularDynamics")
328 self.model, self.molecular_dynamics_sample_objects,
329 self.vars[
"monte_carlo_temperature"],
330 maximum_time_step=self.molecular_dynamics_max_time_step)
331 if self.vars[
"simulated_annealing"]:
332 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
333 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
335 "simulated_annealing_minimum_temperature_nframes"]
337 "simulated_annealing_maximum_temperature_nframes"]
338 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
339 if self.output_objects
is not None:
340 self.output_objects.append(sampler_md)
341 if self.rmf_output_objects
is not None:
342 self.rmf_output_objects.append(sampler_md)
343 samplers.append(sampler_md)
346 print(
"Setting up ReplicaExchange")
348 self.model, self.vars[
"replica_exchange_minimum_temperature"],
349 self.vars[
"replica_exchange_maximum_temperature"], samplers,
350 replica_exchange_object=self.replica_exchange_object)
351 self.replica_exchange_object = rex.rem
353 myindex = rex.get_my_index()
354 if self.output_objects
is not None:
355 self.output_objects.append(rex)
356 if self.rmf_output_objects
is not None:
357 self.rmf_output_objects.append(rex)
361 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
365 globaldir = self.vars[
"global_output_directory"] +
"/"
366 rmf_dir = globaldir + self.vars[
"rmf_dir"]
367 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
369 if not self.test_mode:
370 if self.vars[
"do_clean_first"]:
373 if self.vars[
"do_create_directories"]:
376 os.makedirs(globaldir)
384 if not self.is_multi_state:
390 for n
in range(self.vars[
"number_of_states"]):
392 os.makedirs(pdb_dir +
"/" + str(n))
399 if self.output_objects
is not None:
400 self.output_objects.append(sw)
401 if self.rmf_output_objects
is not None:
402 self.rmf_output_objects.append(sw)
404 print(
"Setting up stat file")
406 low_temp_stat_file = globaldir + \
407 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
410 if not self.test_mode:
413 if not self.test_mode:
414 if self.output_objects
is not None:
415 output.init_stat2(low_temp_stat_file,
417 extralabels=[
"rmf_file",
"rmf_frame_index"])
419 print(
"Stat file writing is disabled")
421 if self.rmf_output_objects
is not None:
422 print(
"Stat info being written in the rmf file")
424 print(
"Setting up replica stat file")
425 replica_stat_file = globaldir + \
426 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
427 if not self.test_mode:
428 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
430 if not self.test_mode:
431 print(
"Setting up best pdb files")
432 if not self.is_multi_state:
433 if self.vars[
"number_of_best_scoring_models"] > 0:
434 output.init_pdb_best_scoring(
435 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
437 self.vars[
"number_of_best_scoring_models"],
438 replica_exchange=
True)
440 pdb_dir +
"/" +
"model.psf",
442 self.vars[
"best_pdb_name_suffix"] +
".0.pdb")
444 if self.vars[
"number_of_best_scoring_models"] > 0:
445 for n
in range(self.vars[
"number_of_states"]):
446 output.init_pdb_best_scoring(
447 pdb_dir +
"/" + str(n) +
"/" +
448 self.vars[
"best_pdb_name_suffix"],
450 self.vars[
"number_of_best_scoring_models"],
451 replica_exchange=
True)
453 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
454 pdb_dir +
"/" + str(n) +
"/" +
455 self.vars[
"best_pdb_name_suffix"] +
".0.pdb")
458 if self.em_object_for_rmf
is not None:
459 output_hierarchies = [
461 self.em_object_for_rmf.get_density_as_hierarchy(
464 output_hierarchies = [self.root_hier]
466 if not self.test_mode:
467 print(
"Setting up and writing initial rmf coordinate file")
468 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
469 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
471 listofobjects=self.rmf_output_objects)
472 if self._rmf_restraints:
473 output.add_restraints_to_rmf(
474 init_suffix +
"." + str(myindex) +
".rmf3",
475 self._rmf_restraints)
476 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
477 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
479 if not self.test_mode:
480 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
482 self._add_provenance(sampler_md, sampler_mc)
484 if not self.test_mode:
485 print(
"Setting up production rmf files")
486 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
487 output.init_rmf(rmfname, output_hierarchies,
488 geometries=self.vars[
"geometries"],
489 listofobjects=self.rmf_output_objects)
491 if self._rmf_restraints:
492 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
494 ntimes_at_low_temp = 0
498 self.replica_exchange_object.set_was_used(
True)
499 nframes = self.vars[
"number_of_frames"]
502 for i
in range(nframes):
506 for nr
in range(self.vars[
"num_sample_rounds"]):
507 if sampler_md
is not None:
509 self.vars[
"molecular_dynamics_steps"])
510 if sampler_mc
is not None:
511 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
513 self.model).evaluate(
False)
514 mpivs.set_value(
"score", score)
515 output.set_output_entry(
"score", score)
517 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
519 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
520 save_frame = (min_temp_index == my_temp_index)
521 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
522 score_perc = mpivs.get_percentile(
"score")
523 save_frame = (score_perc*100.0 <= 25.0)
524 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
525 score_perc = mpivs.get_percentile(
"score")
526 save_frame = (score_perc*100.0 <= 50.0)
527 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
528 score_perc = mpivs.get_percentile(
"score")
529 save_frame = (score_perc*100.0 <= 75.0)
532 if save_frame
and not self.test_mode:
536 print(
"--- frame %s score %s " % (str(i), str(score)))
538 if not self.test_mode:
539 if i % self.vars[
"nframes_write_coordinates"] == 0:
540 print(
'--- writing coordinates')
541 if self.vars[
"number_of_best_scoring_models"] > 0:
542 output.write_pdb_best_scoring(score)
543 output.write_rmf(rmfname)
544 output.set_output_entry(
"rmf_file", rmfname)
545 output.set_output_entry(
"rmf_frame_index",
548 output.set_output_entry(
"rmf_file", rmfname)
549 output.set_output_entry(
"rmf_frame_index",
'-1')
550 if self.output_objects
is not None:
551 output.write_stat2(low_temp_stat_file)
552 ntimes_at_low_temp += 1
554 if not self.test_mode:
555 output.write_stat2(replica_stat_file)
556 if self.vars[
"replica_exchange_swap"]:
557 rex.swap_temp(i, score)
558 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
559 p.add_replica_exchange(state, self)
561 if not self.test_mode:
562 print(
"closing production rmf files")
563 output.close_rmf(rmfname)
567 """A macro to build a IMP::pmi::topology::System based on a
568 TopologyReader object.
570 Easily create multi-state systems by calling this macro
571 repeatedly with different TopologyReader objects!
572 A useful function is get_molecules() which returns the PMI Molecules
573 grouped by state as a dictionary with key = (molecule name),
574 value = IMP.pmi.topology.Molecule
575 Quick multi-state system:
578 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
579 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
580 bs = IMP.pmi.macros.BuildSystem(model)
581 bs.add_state(reader1)
582 bs.add_state(reader2)
583 bs.execute_macro() # build everything including degrees of freedom
584 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
585 ### now you have a two state system, you add restraints etc
587 @note The "domain name" entry of the topology reader is not used.
588 All molecules are set up by the component name, but split into rigid bodies
592 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
593 'RNA': IMP.pmi.alphabets.rna}
595 def __init__(self, model, sequence_connectivity_scale=4.0,
596 force_create_gmm_files=
False, resolutions=[1, 10]):
598 @param model An IMP Model
599 @param sequence_connectivity_scale For scaling the connectivity
601 @param force_create_gmm_files If True, will sample and create GMMs
602 no matter what. If False, will only sample if the
603 files don't exist. If number of Gaussians is zero, won't
605 @param resolutions The resolutions to build for structured regions
612 self._domain_res = []
614 self.force_create_gmm_files = force_create_gmm_files
615 self.resolutions = resolutions
617 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
619 """Add a state using the topology info in a
620 IMP::pmi::topology::TopologyReader object.
621 When you are done adding states, call execute_macro()
622 @param reader The TopologyReader object
623 @param fasta_name_map dictionary for converting protein names
624 found in the fasta file
625 @param chain_ids A list or string of chain IDs for assigning to
626 newly-created molecules, e.g.
627 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
628 If not specified, chain IDs A through Z are assigned, then
629 AA through AZ, then BA through BZ, and so on, in the same
632 state = self.system.create_state()
633 self._readers.append(reader)
635 these_domain_res = {}
637 if chain_ids
is None:
638 chain_ids = IMP.pmi.output._ChainIDs()
643 for molname
in reader.get_molecules():
644 copies = reader.get_molecules()[molname].domains
645 for nc, copyname
in enumerate(copies):
646 print(
"BuildSystem.add_state: setting up molecule %s copy "
647 "number %s" % (molname, str(nc)))
648 copy = copies[copyname]
651 all_chains = [c
for c
in copy
if c.chain
is not None]
653 chain_id = all_chains[0].chain
655 chain_id = chain_ids[numchain]
657 "No PDBs specified for %s, so keep_chain_id has "
658 "no effect; using default chain ID '%s'"
661 chain_id = chain_ids[numchain]
663 alphabet = IMP.pmi.alphabets.amino_acid
664 fasta_flag = copy[0].fasta_flag
665 if fasta_flag
in self._alphabets:
666 alphabet = self._alphabets[fasta_flag]
668 copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
669 print(
"BuildSystem.add_state: molecule %s sequence has "
670 "%s residues" % (molname, len(seq)))
671 orig_mol = state.create_molecule(molname, seq, chain_id,
676 print(
"BuildSystem.add_state: creating a copy for "
677 "molecule %s" % molname)
678 mol = orig_mol.create_copy(chain_id)
681 for domainnumber, domain
in enumerate(copy):
682 print(
"BuildSystem.add_state: ---- setting up domain %s "
683 "of molecule %s" % (domainnumber, molname))
686 these_domains[domain.get_unique_name()] = domain
687 if domain.residue_range == []
or \
688 domain.residue_range
is None:
689 domain_res = mol.get_residues()
691 start = domain.residue_range[0]+domain.pdb_offset
692 if domain.residue_range[1] ==
'END':
693 end = len(mol.sequence)
695 end = domain.residue_range[1]+domain.pdb_offset
696 domain_res = mol.residue_range(start-1, end-1)
697 print(
"BuildSystem.add_state: -------- domain %s of "
698 "molecule %s extends from residue %s to "
700 % (domainnumber, molname, start, end))
701 if domain.pdb_file ==
"BEADS":
702 print(
"BuildSystem.add_state: -------- domain %s of "
703 "molecule %s represented by BEADS "
704 % (domainnumber, molname))
705 mol.add_representation(
707 resolutions=[domain.bead_size],
708 setup_particles_as_densities=(
709 domain.em_residues_per_gaussian != 0),
711 these_domain_res[domain.get_unique_name()] = \
713 elif domain.pdb_file ==
"IDEAL_HELIX":
714 print(
"BuildSystem.add_state: -------- domain %s of "
715 "molecule %s represented by IDEAL_HELIX "
716 % (domainnumber, molname))
717 emper = domain.em_residues_per_gaussian
718 mol.add_representation(
720 resolutions=self.resolutions,
722 density_residues_per_component=emper,
723 density_prefix=domain.density_prefix,
724 density_force_compute=self.force_create_gmm_files,
726 these_domain_res[domain.get_unique_name()] = \
729 print(
"BuildSystem.add_state: -------- domain %s of "
730 "molecule %s represented by pdb file %s "
731 % (domainnumber, molname, domain.pdb_file))
732 domain_atomic = mol.add_structure(domain.pdb_file,
734 domain.residue_range,
737 domain_non_atomic = domain_res - domain_atomic
738 if not domain.em_residues_per_gaussian:
739 mol.add_representation(
740 domain_atomic, resolutions=self.resolutions,
742 if len(domain_non_atomic) > 0:
743 mol.add_representation(
745 resolutions=[domain.bead_size],
748 print(
"BuildSystem.add_state: -------- domain %s "
749 "of molecule %s represented by gaussians "
750 % (domainnumber, molname))
751 emper = domain.em_residues_per_gaussian
752 creategmm = self.force_create_gmm_files
753 mol.add_representation(
755 resolutions=self.resolutions,
756 density_residues_per_component=emper,
757 density_prefix=domain.density_prefix,
758 density_force_compute=creategmm,
760 if len(domain_non_atomic) > 0:
761 mol.add_representation(
763 resolutions=[domain.bead_size],
764 setup_particles_as_densities=
True,
766 these_domain_res[domain.get_unique_name()] = (
767 domain_atomic, domain_non_atomic)
768 self._domain_res.append(these_domain_res)
769 self._domains.append(these_domains)
770 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
773 """Return list of all molecules grouped by state.
774 For each state, it's a dictionary of Molecules where key is the
777 return [s.get_molecules()
for s
in self.system.get_states()]
779 def get_molecule(self, molname, copy_index=0, state_index=0):
780 return self.system.get_states()[state_index].
get_molecules()[
784 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
785 """Builds representations and sets up degrees of freedom"""
786 print(
"BuildSystem.execute_macro: building representations")
787 self.root_hier = self.system.build()
789 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
791 for nstate, reader
in enumerate(self._readers):
792 rbs = reader.get_rigid_bodies()
793 srbs = reader.get_super_rigid_bodies()
794 csrbs = reader.get_chains_of_super_rigid_bodies()
797 domains_in_rbs = set()
799 print(
"BuildSystem.execute_macro: -------- building rigid "
800 "body %s" % (str(rblist)))
801 all_res = IMP.pmi.tools.OrderedSet()
802 bead_res = IMP.pmi.tools.OrderedSet()
804 domain = self._domains[nstate][dname]
805 print(
"BuildSystem.execute_macro: -------- adding %s"
807 all_res |= self._domain_res[nstate][dname][0]
808 bead_res |= self._domain_res[nstate][dname][1]
809 domains_in_rbs.add(dname)
811 print(
"BuildSystem.execute_macro: -------- creating rigid "
812 "body with max_trans %s max_rot %s "
813 "non_rigid_max_trans %s"
814 % (str(max_rb_trans), str(max_rb_rot),
815 str(max_bead_trans)))
816 self.dof.create_rigid_body(all_res,
817 nonrigid_parts=bead_res,
818 max_trans=max_rb_trans,
820 nonrigid_max_trans=max_bead_trans,
821 name=
"RigidBody %s" % dname)
824 for dname, domain
in self._domains[nstate].items():
825 if dname
not in domains_in_rbs:
826 if domain.pdb_file !=
"BEADS":
828 "No rigid bodies set for %s. Residues read from "
829 "the PDB file will not be sampled - only regions "
830 "missing from the PDB will be treated flexibly. "
831 "To sample the entire sequence, use BEADS instead "
832 "of a PDB file name" % dname,
834 self.dof.create_flexible_beads(
835 self._domain_res[nstate][dname][1],
836 max_trans=max_bead_trans)
840 print(
"BuildSystem.execute_macro: -------- building "
841 "super rigid body %s" % (str(srblist)))
842 all_res = IMP.pmi.tools.OrderedSet()
843 for dname
in srblist:
844 print(
"BuildSystem.execute_macro: -------- adding %s"
846 all_res |= self._domain_res[nstate][dname][0]
847 all_res |= self._domain_res[nstate][dname][1]
849 print(
"BuildSystem.execute_macro: -------- creating super "
850 "rigid body with max_trans %s max_rot %s "
851 % (str(max_srb_trans), str(max_srb_rot)))
852 self.dof.create_super_rigid_body(
853 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
856 for csrblist
in csrbs:
857 all_res = IMP.pmi.tools.OrderedSet()
858 for dname
in csrblist:
859 all_res |= self._domain_res[nstate][dname][0]
860 all_res |= self._domain_res[nstate][dname][1]
861 all_res = list(all_res)
862 all_res.sort(key=
lambda r: r.get_index())
863 self.dof.create_main_chain_mover(all_res)
864 return self.root_hier, self.dof
869 """A macro for running all the basic operations of analysis.
870 Includes clustering, precision analysis, and making ensemble density maps.
871 A number of plots are also supported.
874 merge_directories=[
"./"],
875 stat_file_name_suffix=
"stat",
876 best_pdb_name_suffix=
"model",
878 do_create_directories=
True,
879 global_output_directory=
"output/",
880 replica_stat_file_suffix=
"stat_replica",
881 global_analysis_result_directory=
"./analysis/",
884 @param model The IMP model
885 @param stat_file_name_suffix
886 @param merge_directories The directories containing output files
887 @param best_pdb_name_suffix
888 @param do_clean_first
889 @param do_create_directories
890 @param global_output_directory Where everything is
891 @param replica_stat_file_suffix
892 @param global_analysis_result_directory
893 @param test_mode If True, nothing is changed on disk
897 from mpi4py
import MPI
898 self.comm = MPI.COMM_WORLD
899 self.rank = self.comm.Get_rank()
900 self.number_of_processes = self.comm.size
903 self.number_of_processes = 1
905 self.test_mode = test_mode
906 self._protocol_output = []
907 self.cluster_obj =
None
909 stat_dir = global_output_directory
912 for rd
in merge_directories:
913 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
914 if len(stat_files) == 0:
915 warnings.warn(
"no stat files found in %s"
916 % os.path.join(rd, stat_dir),
918 self.stat_files += stat_files
921 """Capture details of the modeling protocol.
922 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
925 self._protocol_output.append((p, p._last_state))
928 score_key=
"SimplifiedModel_Total_Score_None",
929 rmf_file_key=
"rmf_file",
930 rmf_file_frame_key=
"rmf_frame_index",
933 nframes_trajectory=10000):
934 """ Get a trajectory of the modeling run, for generating
937 @param score_key The score for ranking models
938 @param rmf_file_key Key pointing to RMF filename
939 @param rmf_file_frame_key Key pointing to RMF frame number
940 @param outputdir The local output directory used in the run
941 @param get_every Extract every nth frame
942 @param nframes_trajectory Total number of frames of the trajectory
947 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
949 score_list = list(map(float, trajectory_models[2]))
951 max_score = max(score_list)
952 min_score = min(score_list)
954 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
955 for i
in range(nframes_trajectory)]
956 binned_scores = [
None]*nframes_trajectory
957 binned_model_indexes = [-1]*nframes_trajectory
959 for model_index, s
in enumerate(score_list):
960 bins_score_diffs = [abs(s-b)
for b
in bins]
961 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
962 if binned_scores[bin_index]
is None:
963 binned_scores[bin_index] = s
964 binned_model_indexes[bin_index] = model_index
966 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
967 new_diff = abs(s-bins[bin_index])
968 if new_diff < old_diff:
969 binned_scores[bin_index] = s
970 binned_model_indexes[bin_index] = model_index
973 print(binned_model_indexes)
975 def _expand_ambiguity(self, prot, d):
976 """If using PMI2, expand the dictionary to include copies as
979 This also keeps the states separate.
984 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
987 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
988 if isinstance(val, tuple):
996 for nst
in range(len(states)):
998 copies = sel.get_selected_particles(with_representation=
False)
1000 for nc
in range(len(copies)):
1002 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1003 (start, stop, name, nc, nst)
1005 newdict[
'%s..%i' % (name, nc)] = \
1006 (start, stop, name, nc, nst)
1012 score_key=
"SimplifiedModel_Total_Score_None",
1013 rmf_file_key=
"rmf_file",
1014 rmf_file_frame_key=
"rmf_frame_index",
1016 prefiltervalue=
None,
1019 alignment_components=
None,
1020 number_of_best_scoring_models=10,
1021 rmsd_calculation_components=
None,
1022 distance_matrix_file=
'distances.mat',
1023 load_distance_matrix_file=
False,
1024 skip_clustering=
False,
1025 number_of_clusters=1,
1027 exit_after_display=
True,
1029 first_and_last_frames=
None,
1030 density_custom_ranges=
None,
1031 write_pdb_with_centered_coordinates=
False,
1033 """Get the best scoring models, compute a distance matrix,
1034 cluster them, and create density maps.
1036 Tuple format: "molname" just the molecule,
1037 or (start,stop,molname,copy_num(optional),state_num(optional)
1038 Can pass None for copy or state to ignore that field.
1039 If you don't pass a specific copy number
1041 @param score_key The score for ranking models.
1042 Use "Total_Score" instead of default for PMI2.
1043 @param rmf_file_key Key pointing to RMF filename
1044 @param rmf_file_frame_key Key pointing to RMF frame number
1045 @param state_number State number to analyze
1046 @param prefiltervalue Only include frames where the
1047 score key is below this value
1048 @param feature_keys Keywords for which you want to
1049 calculate average, medians, etc.
1050 If you pass "Keyname" it'll include everything that matches
1052 @param outputdir The local output directory used in
1054 @param alignment_components Dictionary with keys=groupname,
1055 values are tuples for aligning the structures
1056 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1057 @param number_of_best_scoring_models Num models to keep per run
1058 @param rmsd_calculation_components For calculating RMSD
1059 (same format as alignment_components)
1060 @param distance_matrix_file Where to store/read the
1062 @param load_distance_matrix_file Try to load the distance
1064 @param skip_clustering Just extract the best scoring
1065 models and save the pdbs
1066 @param number_of_clusters Number of k-means clusters
1067 @param display_plot Display the distance matrix
1068 @param exit_after_display Exit after displaying distance
1070 @param get_every Extract every nth frame
1071 @param first_and_last_frames A tuple with the first and last
1072 frames to be analyzed. Values are percentages!
1073 Default: get all frames
1074 @param density_custom_ranges For density calculation
1075 (same format as alignment_components)
1076 @param write_pdb_with_centered_coordinates
1077 @param voxel_size Used for the density output
1081 self._outputdir = os.path.abspath(outputdir)
1082 self._number_of_clusters = number_of_clusters
1083 for p, state
in self._protocol_output:
1084 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1095 if not load_distance_matrix_file:
1096 if len(self.stat_files) == 0:
1097 print(
"ERROR: no stat file found in the given path")
1099 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1100 self.stat_files, self.number_of_processes)[self.rank]
1104 orig_score_key = score_key
1105 if score_key
not in po.get_keys():
1106 if 'Total_Score' in po.get_keys():
1107 score_key =
'Total_Score'
1109 "Using 'Total_Score' instead of "
1110 "'SimplifiedModel_Total_Score_None' for the score key",
1112 for k
in [orig_score_key, score_key, rmf_file_key,
1113 rmf_file_frame_key]:
1114 if k
in feature_keys:
1116 "no need to pass " + k +
" to feature_keys.",
1118 feature_keys.remove(k)
1121 my_stat_files, score_key, feature_keys, rmf_file_key,
1122 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1123 rmf_file_list = best_models[0]
1124 rmf_file_frame_list = best_models[1]
1125 score_list = best_models[2]
1126 feature_keyword_list_dict = best_models[3]
1132 if self.number_of_processes > 1:
1136 rmf_file_frame_list)
1137 for k
in feature_keyword_list_dict:
1138 feature_keyword_list_dict[k] = \
1140 feature_keyword_list_dict[k])
1143 score_rmf_tuples = list(zip(score_list,
1145 rmf_file_frame_list,
1146 list(range(len(score_list)))))
1148 if density_custom_ranges:
1149 for k
in density_custom_ranges:
1150 if not isinstance(density_custom_ranges[k], list):
1151 raise Exception(
"Density custom ranges: values must "
1152 "be lists of tuples")
1155 if first_and_last_frames
is not None:
1156 nframes = len(score_rmf_tuples)
1157 first_frame = int(first_and_last_frames[0] * nframes)
1158 last_frame = int(first_and_last_frames[1] * nframes)
1159 if last_frame > len(score_rmf_tuples):
1161 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1164 best_score_rmf_tuples = sorted(
1166 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1167 best_score_rmf_tuples = [t+(n,)
for n, t
in
1168 enumerate(best_score_rmf_tuples)]
1170 prov.append(IMP.pmi.io.FilterProvenance(
1171 "Best scoring", 0, number_of_best_scoring_models))
1173 best_score_feature_keyword_list_dict = defaultdict(list)
1174 for tpl
in best_score_rmf_tuples:
1176 for f
in feature_keyword_list_dict:
1177 best_score_feature_keyword_list_dict[f].append(
1178 feature_keyword_list_dict[f][index])
1179 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1180 best_score_rmf_tuples,
1181 self.number_of_processes)[self.rank]
1184 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1185 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1187 if rmsd_calculation_components
is not None:
1188 tmp = self._expand_ambiguity(
1189 prot_ahead, rmsd_calculation_components)
1190 if tmp != rmsd_calculation_components:
1191 print(
'Detected ambiguity, expand rmsd components to',
1193 rmsd_calculation_components = tmp
1194 if alignment_components
is not None:
1195 tmp = self._expand_ambiguity(prot_ahead,
1196 alignment_components)
1197 if tmp != alignment_components:
1198 print(
'Detected ambiguity, expand alignment '
1199 'components to', tmp)
1200 alignment_components = tmp
1206 self.model, my_best_score_rmf_tuples[0],
1207 rmsd_calculation_components, state_number=state_number)
1209 self.model, my_best_score_rmf_tuples, alignment_components,
1210 rmsd_calculation_components, state_number=state_number)
1218 all_coordinates = got_coords[0]
1221 alignment_coordinates = got_coords[1]
1224 rmsd_coordinates = got_coords[2]
1227 rmf_file_name_index_dict = got_coords[3]
1230 all_rmf_file_names = got_coords[4]
1236 if density_custom_ranges:
1238 density_custom_ranges, voxel=voxel_size)
1240 dircluster = os.path.join(outputdir,
1241 "all_models."+str(self.rank))
1247 os.mkdir(dircluster)
1250 clusstat = open(os.path.join(
1251 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1252 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1254 rmf_frame_number = tpl[2]
1257 for key
in best_score_feature_keyword_list_dict:
1259 best_score_feature_keyword_list_dict[key][index]
1263 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1264 self.model, rmf_frame_number, rmf_name)
1266 linking_successful = \
1267 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1268 self.model, prots, rs, rmf_frame_number,
1270 if not linking_successful:
1277 states = IMP.atom.get_by_type(
1278 prots[0], IMP.atom.STATE_TYPE)
1279 prot = states[state_number]
1281 prot = prots[state_number]
1286 coords_f1 = alignment_coordinates[cnt]
1288 coords_f2 = alignment_coordinates[cnt]
1291 coords_f1, coords_f2)
1292 transformation = Ali.align()[1]
1306 rb = rbm.get_rigid_body()
1316 out_pdb_fn = os.path.join(
1317 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1318 out_rmf_fn = os.path.join(
1319 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1320 o.init_pdb(out_pdb_fn, prot)
1321 tc = write_pdb_with_centered_coordinates
1322 o.write_pdb(out_pdb_fn,
1323 translate_to_geometric_center=tc)
1325 tmp_dict[
"local_pdb_file_name"] = \
1326 os.path.basename(out_pdb_fn)
1327 tmp_dict[
"rmf_file_full_path"] = rmf_name
1328 tmp_dict[
"local_rmf_file_name"] = \
1329 os.path.basename(out_rmf_fn)
1330 tmp_dict[
"local_rmf_frame_number"] = 0
1332 clusstat.write(str(tmp_dict)+
"\n")
1338 h.set_name(
"System")
1340 o.init_rmf(out_rmf_fn, [h], rs)
1342 o.init_rmf(out_rmf_fn, [prot], rs)
1344 o.write_rmf(out_rmf_fn)
1345 o.close_rmf(out_rmf_fn)
1347 if density_custom_ranges:
1348 DensModule.add_subunits_density(prot)
1350 if density_custom_ranges:
1351 DensModule.write_mrc(path=dircluster)
1356 if self.number_of_processes > 1:
1362 rmf_file_name_index_dict)
1364 alignment_coordinates)
1371 [best_score_feature_keyword_list_dict,
1372 rmf_file_name_index_dict],
1378 print(
"setup clustering class")
1381 for n, model_coordinate_dict
in enumerate(all_coordinates):
1383 if (alignment_components
is not None
1384 and len(self.cluster_obj.all_coords) == 0):
1386 self.cluster_obj.set_template(alignment_coordinates[n])
1387 self.cluster_obj.fill(all_rmf_file_names[n],
1388 rmsd_coordinates[n])
1389 print(
"Global calculating the distance matrix")
1392 self.cluster_obj.dist_matrix()
1396 self.cluster_obj.do_cluster(number_of_clusters)
1399 self.cluster_obj.plot_matrix(
1400 figurename=os.path.join(outputdir,
1402 if exit_after_display:
1404 self.cluster_obj.save_distance_matrix_file(
1405 file_name=distance_matrix_file)
1412 print(
"setup clustering class")
1414 self.cluster_obj.load_distance_matrix_file(
1415 file_name=distance_matrix_file)
1416 print(
"clustering with %s clusters" % str(number_of_clusters))
1417 self.cluster_obj.do_cluster(number_of_clusters)
1418 [best_score_feature_keyword_list_dict,
1419 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1422 self.cluster_obj.plot_matrix(figurename=os.path.join(
1423 outputdir,
'dist_matrix.pdf'))
1424 if exit_after_display:
1426 if self.number_of_processes > 1:
1434 print(self.cluster_obj.get_cluster_labels())
1435 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1436 print(
"rank %s " % str(self.rank))
1437 print(
"cluster %s " % str(n))
1438 print(
"cluster label %s " % str(cl))
1439 print(self.cluster_obj.get_cluster_label_names(cl))
1441 len(self.cluster_obj.get_cluster_label_names(cl))
1443 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1446 if density_custom_ranges:
1448 density_custom_ranges,
1451 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1453 os.mkdir(dircluster)
1459 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1460 clusstat = open(dircluster +
"stat.out",
"w")
1461 for k, structure_name
in enumerate(
1462 self.cluster_obj.get_cluster_label_names(cl)):
1465 tmp_dict.update(rmsd_dict)
1466 index = rmf_file_name_index_dict[structure_name]
1467 for key
in best_score_feature_keyword_list_dict:
1469 key] = best_score_feature_keyword_list_dict[
1475 rmf_name = structure_name.split(
"|")[0]
1476 rmf_frame_number = int(structure_name.split(
"|")[1])
1477 clusstat.write(str(tmp_dict) +
"\n")
1482 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1483 self.model, rmf_frame_number, rmf_name)
1485 linking_successful = \
1486 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1487 self.model, prots, rs, rmf_frame_number,
1489 if not linking_successful:
1495 states = IMP.atom.get_by_type(
1496 prots[0], IMP.atom.STATE_TYPE)
1497 prot = states[state_number]
1499 prot = prots[state_number]
1505 co = self.cluster_obj
1506 model_index = co.get_model_index_from_name(
1508 transformation = co.get_transformation_to_first_member(
1519 rb = rbm.get_rigid_body()
1528 if density_custom_ranges:
1529 DensModule.add_subunits_density(prot)
1534 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1535 o.write_pdb(dircluster + str(k) +
".pdb")
1541 h.set_name(
"System")
1543 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1545 o.init_rmf(dircluster + str(k) +
".rmf3", [prot], rs)
1546 o.write_rmf(dircluster + str(k) +
".rmf3")
1547 o.close_rmf(dircluster + str(k) +
".rmf3")
1552 if density_custom_ranges:
1553 DensModule.write_mrc(path=dircluster)
1556 if self.number_of_processes > 1:
1559 def get_cluster_rmsd(self, cluster_num):
1560 if self.cluster_obj
is None:
1562 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1564 def save_objects(self, objects, file_name):
1566 outf = open(file_name,
'wb')
1567 pickle.dump(objects, outf)
1570 def load_objects(self, file_name):
1572 inputf = open(file_name,
'rb')
1573 objects = pickle.load(inputf)
1581 This class contains analysis utilities to investigate ReplicaExchange
1589 def __init__(self, model, stat_files, best_models=None, score_key=None,
1592 Construction of the Class.
1593 @param model IMP.Model()
1594 @param stat_files list of string. Can be ascii stat files,
1596 @param best_models Integer. Number of best scoring models,
1597 if None: all models will be read
1598 @param alignment boolean (Default=True). Align before computing
1603 self.best_models = best_models
1605 model, stat_files, self.best_models, score_key, cache=
True)
1607 StatHierarchyHandler=self.stath0)
1620 self.clusters.append(c)
1621 for n0
in range(len(self.stath0)):
1623 self.pairwise_rmsd = {}
1624 self.pairwise_molecular_assignment = {}
1625 self.alignment = alignment
1626 self.symmetric_molecules = {}
1627 self.issymmetricsel = {}
1629 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1631 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1636 Setup the selection onto which the rmsd is computed
1637 @param kwargs use IMP.atom.Selection keywords
1645 Store names of symmetric molecules
1647 self.symmetric_molecules[molecule_name] = 0
1652 Setup the selection onto which the alignment is computed
1653 @param kwargs use IMP.atom.Selection keywords
1661 def clean_clusters(self):
1662 for c
in self.clusters:
1666 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1668 Cluster the models based on RMSD.
1669 @param rmsd_cutoff Float the distance cutoff in Angstrom
1670 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1671 be used to compute rmsds
1673 self.clean_clusters()
1674 not_clustered = set(range(len(self.stath1)))
1675 while len(not_clustered) > 0:
1676 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1681 Refine the clusters by merging the ones whose centers are close
1682 @param rmsd_cutoff cutoff distance in Angstorms
1684 clusters_copy = self.clusters
1685 for c0, c1
in itertools.combinations(self.clusters, 2):
1686 if c0.center_index
is None:
1688 if c1.center_index
is None:
1690 _ = self.stath0[c0.center_index]
1691 _ = self.stath1[c1.center_index]
1692 rmsd, molecular_assignment = self.
rmsd()
1693 if rmsd <= rmsd_cutoff:
1694 if c1
in self.clusters:
1695 clusters_copy.remove(c1)
1697 self.clusters = clusters_copy
1704 def set_cluster_assignments(self, cluster_ids):
1705 if len(cluster_ids) != len(self.stath0):
1706 raise ValueError(
'cluster ids has to be same length as '
1710 for i
in sorted(list(set(cluster_ids))):
1712 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1713 self.clusters[idx].add_member(i, d)
1717 Return the model data from a cluster
1718 @param cluster IMP.pmi.output.Cluster object
1727 Save the data for the whole models into a pickle file
1728 @param filename string
1730 self.stath0.save_data(filename)
1734 Set the data from an external IMP.pmi.output.Data
1735 @param data IMP.pmi.output.Data
1737 self.stath0.data = data
1738 self.stath1.data = data
1742 Load the data from an external pickled file
1743 @param filename string
1745 self.stath0.load_data(filename)
1746 self.stath1.load_data(filename)
1747 self.best_models = len(self.stath0)
1749 def add_cluster(self, rmf_name_list):
1751 print(
"creating cluster index "+str(len(self.clusters)))
1752 self.clusters.append(c)
1753 current_len = len(self.stath0)
1755 for rmf
in rmf_name_list:
1756 print(
"adding rmf "+rmf)
1757 self.stath0.add_stat_file(rmf)
1758 self.stath1.add_stat_file(rmf)
1760 for n0
in range(current_len, len(self.stath0)):
1761 d0 = self.stath0[n0]
1762 c.add_member(n0, d0)
1767 Save the clusters into a pickle file
1768 @param filename string
1771 import cPickle
as pickle
1774 fl = open(filename,
'wb')
1775 pickle.dump(self.clusters, fl)
1779 Load the clusters from a pickle file
1780 @param filename string
1781 @param append bool (Default=False), if True. append the clusters
1782 to the ones currently present
1785 import cPickle
as pickle
1788 fl = open(filename,
'rb')
1789 self.clean_clusters()
1791 self.clusters += pickle.load(fl)
1793 self.clusters = pickle.load(fl)
1802 Compute the cluster center for a given cluster
1804 member_distance = defaultdict(float)
1806 for n0, n1
in itertools.combinations(cluster.members, 2):
1809 rmsd, _ = self.
rmsd()
1810 member_distance[n0] += rmsd
1812 if len(member_distance) > 0:
1813 cluster.center_index = min(member_distance,
1814 key=member_distance.get)
1816 cluster.center_index = cluster.members[0]
1821 Save the coordinates of the current cluster a single rmf file
1823 print(
"saving coordinates", cluster)
1827 if rmf_name
is None:
1828 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1830 _ = self.stath1[cluster.members[0]]
1832 o.init_rmf(rmf_name, [self.stath1])
1833 for n1
in cluster.members:
1839 o.write_rmf(rmf_name)
1841 o.close_rmf(rmf_name)
1845 remove structures that are similar
1846 append it to a new cluster
1848 print(
"pruning models")
1850 filtered = [selected]
1851 remaining = range(1, len(self.stath1), 10)
1853 while len(remaining) > 0:
1854 d0 = self.stath0[selected]
1856 for n1
in remaining:
1861 if d <= rmsd_cutoff:
1863 print(
"pruning model %s, similar to model %s, rmsd %s"
1864 % (str(n1), str(selected), str(d)))
1865 remaining = [x
for x
in remaining
if x
not in rm]
1866 if len(remaining) == 0:
1868 selected = remaining[0]
1869 filtered.append(selected)
1872 self.clusters.append(c)
1874 d0 = self.stath0[n0]
1875 c.add_member(n0, d0)
1880 Compute the precision of a cluster
1886 if cluster.center_index
is not None:
1887 members1 = [cluster.center_index]
1889 members1 = cluster.members
1893 for n1
in cluster.members:
1898 tmp_rmsd, _ = self.
rmsd()
1903 precision = rmsd/npairs
1904 cluster.precision = precision
1909 Compute the bipartite precision (ie the cross-precision)
1910 between two clusters
1914 for cn0, n0
in enumerate(cluster1.members):
1916 for cn1, n1
in enumerate(cluster2.members):
1918 tmp_rmsd, _ = self.
rmsd()
1920 print(
"--- rmsd between structure %s and structure "
1921 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1924 precision = rmsd/npairs
1927 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1928 cluster_ref=
None, step=1):
1930 Compute the Root mean square fluctuations
1931 of a molecule in a cluster
1932 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1933 residue indexes and the value is the rmsf
1935 rmsf = IMP.pmi.tools.OrderedDict()
1938 if cluster_ref
is not None:
1939 if cluster_ref.center_index
is not None:
1940 members0 = [cluster_ref.center_index]
1942 members0 = cluster_ref.members
1944 if cluster.center_index
is not None:
1945 members0 = [cluster.center_index]
1947 members0 = cluster.members
1950 copy_index=copy_index, state_index=state_index)
1951 ps0 = s0.get_selected_particles()
1953 residue_indexes = list(IMP.pmi.tools.OrderedSet(
1959 d0 = self.stath0[n0]
1960 for n1
in cluster.members[::step]:
1962 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
1966 self.stath1, molecule=molecule,
1967 residue_indexes=residue_indexes, resolution=1,
1968 copy_index=copy_index, state_index=state_index)
1969 ps1 = s1.get_selected_particles()
1971 d1 = self.stath1[n1]
1974 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
1975 r = residue_indexes[n]
1987 for stath
in [self.stath0, self.stath1]:
1988 if molecule
not in self.symmetric_molecules:
1990 stath, molecule=molecule, residue_index=r,
1991 resolution=1, copy_index=copy_index,
1992 state_index=state_index)
1995 stath, molecule=molecule, residue_index=r,
1996 resolution=1, state_index=state_index)
1998 ps = s.get_selected_particles()
2007 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2008 reference=
"Absolute", prefix=
"./", step=1):
2014 for n1
in cluster.members[::step]:
2015 print(
"density "+str(n1))
2020 dens.add_subunits_density(self.stath1)
2022 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2025 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2026 consolidate=
False, molecules=
None, prefix=
'./',
2027 reference=
"Absolute"):
2031 import matplotlib.pyplot
as plt
2032 import matplotlib.cm
as cm
2033 from scipy.spatial.distance
import cdist
2035 if molecules
is None:
2044 molecules=molecules).get_selected_particles())]
2045 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2046 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2047 total_len_unique = sum(max(mol.get_residue_indexes())
2048 for mol
in unique_copies)
2055 seqlen = max(mol.get_residue_indexes())
2056 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2060 for mol
in unique_copies:
2061 seqlen = max(mol.get_residue_indexes())
2062 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2065 for ncl, n1
in enumerate(cluster.members):
2068 coord_dict = IMP.pmi.tools.OrderedDict()
2070 rindexes = mol.get_residue_indexes()
2071 coords = np.ones((max(rindexes), 3))
2072 for rnum
in rindexes:
2075 selpart = sel.get_selected_particles()
2076 if len(selpart) == 0:
2078 selpart = selpart[0]
2079 coords[rnum - 1, :] = \
2081 coord_dict[mol] = coords
2084 coords = np.concatenate(list(coord_dict.values()))
2085 dists = cdist(coords, coords)
2086 binary_dists = np.where((dists <= contact_threshold)
2087 & (dists >= 1.0), 1.0, 0.0)
2089 binary_dists_dict = {}
2091 len1 = max(mol1.get_residue_indexes())
2093 name1 = mol1.get_name()
2094 name2 = mol2.get_name()
2095 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2096 if (name1, name2)
not in binary_dists_dict:
2097 binary_dists_dict[(name1, name2)] = \
2098 np.zeros((len1, len1))
2099 binary_dists_dict[(name1, name2)] += \
2100 np.where((dists <= contact_threshold)
2101 & (dists >= 1.0), 1.0, 0.0)
2102 binary_dists = np.zeros((total_len_unique, total_len_unique))
2104 for name1, name2
in binary_dists_dict:
2105 r1 = index_dict[mol_names_unique[name1]]
2106 r2 = index_dict[mol_names_unique[name2]]
2107 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2108 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2114 contact_freqs = binary_dists
2116 dist_maps.append(dists)
2117 av_dist_map += dists
2118 contact_freqs += binary_dists
2121 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2123 contact_freqs = 1.0/len(cluster)*contact_freqs
2124 av_dist_map = 1.0/len(cluster)*contact_freqs
2126 fig = plt.figure(figsize=(100, 100))
2127 ax = fig.add_subplot(111)
2130 gap_between_components = 50
2135 sorted_tuple = sorted(
2137 mol).get_extended_name(), mol)
for mol
in mols)
2138 prot_list = list(zip(*sorted_tuple))[1]
2140 sorted_tuple = sorted(
2142 for mol
in unique_copies)
2143 prot_list = list(zip(*sorted_tuple))[1]
2145 prot_listx = prot_list
2146 nresx = gap_between_components + \
2147 sum([max(mol.get_residue_indexes())
2148 + gap_between_components
for mol
in prot_listx])
2151 prot_listy = prot_list
2152 nresy = gap_between_components + \
2153 sum([max(mol.get_residue_indexes())
2154 + gap_between_components
for mol
in prot_listy])
2159 res = gap_between_components
2160 for mol
in prot_listx:
2161 resoffsetx[mol] = res
2162 res += max(mol.get_residue_indexes())
2164 res += gap_between_components
2168 res = gap_between_components
2169 for mol
in prot_listy:
2170 resoffsety[mol] = res
2171 res += max(mol.get_residue_indexes())
2173 res += gap_between_components
2175 resoffsetdiagonal = {}
2176 res = gap_between_components
2177 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2178 resoffsetdiagonal[mol] = res
2179 res += max(mol.get_residue_indexes())
2180 res += gap_between_components
2185 for n, prot
in enumerate(prot_listx):
2186 res = resoffsetx[prot]
2188 for proty
in prot_listy:
2189 resy = resoffsety[proty]
2190 endy = resendy[proty]
2191 ax.plot([res, res], [resy, endy], linestyle=
'-',
2192 color=
'gray', lw=0.4)
2193 ax.plot([end, end], [resy, endy], linestyle=
'-',
2194 color=
'gray', lw=0.4)
2195 xticks.append((float(res) + float(end)) / 2)
2197 prot).get_extended_name())
2201 for n, prot
in enumerate(prot_listy):
2202 res = resoffsety[prot]
2204 for protx
in prot_listx:
2205 resx = resoffsetx[protx]
2206 endx = resendx[protx]
2207 ax.plot([resx, endx], [res, res], linestyle=
'-',
2208 color=
'gray', lw=0.4)
2209 ax.plot([resx, endx], [end, end], linestyle=
'-',
2210 color=
'gray', lw=0.4)
2211 yticks.append((float(res) + float(end)) / 2)
2213 prot).get_extended_name())
2217 tmp_array = np.zeros((nresx, nresy))
2219 for px
in prot_listx:
2220 for py
in prot_listy:
2221 resx = resoffsetx[px]
2222 lengx = resendx[px] - 1
2223 resy = resoffsety[py]
2224 lengy = resendy[py] - 1
2225 indexes_x = index_dict[px]
2226 minx = min(indexes_x)
2227 maxx = max(indexes_x)
2228 indexes_y = index_dict[py]
2229 miny = min(indexes_y)
2230 maxy = max(indexes_y)
2231 tmp_array[resx:lengx, resy:lengy] = \
2232 contact_freqs[minx:maxx, miny:maxy]
2233 ret[(px, py)] = np.argwhere(
2234 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2236 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2237 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2239 ax.set_xticks(xticks)
2240 ax.set_xticklabels(xlabels, rotation=90)
2241 ax.set_yticks(yticks)
2242 ax.set_yticklabels(ylabels)
2243 plt.setp(ax.get_xticklabels(), fontsize=6)
2244 plt.setp(ax.get_yticklabels(), fontsize=6)
2247 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2248 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2250 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2251 dpi=300, transparent=
"False")
2254 def plot_rmsd_matrix(self, filename):
2255 self.compute_all_pairwise_rmsd()
2256 distance_matrix = np.zeros(
2257 (len(self.stath0), len(self.stath1)))
2258 for (n0, n1)
in self.pairwise_rmsd:
2259 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2261 import matplotlib
as mpl
2263 import matplotlib.pylab
as pl
2264 from scipy.cluster
import hierarchy
as hrc
2266 fig = pl.figure(figsize=(10, 8))
2267 ax = fig.add_subplot(212)
2268 dendrogram = hrc.dendrogram(
2269 hrc.linkage(distance_matrix),
2272 leaves_order = dendrogram[
'leaves']
2273 ax.set_xlabel(
'Model')
2274 ax.set_ylabel(
'RMSD [Angstroms]')
2276 ax2 = fig.add_subplot(221)
2278 distance_matrix[leaves_order, :][:, leaves_order],
2279 interpolation=
'nearest')
2280 cb = fig.colorbar(cax)
2281 cb.set_label(
'RMSD [Angstroms]')
2282 ax2.set_xlabel(
'Model')
2283 ax2.set_ylabel(
'Model')
2285 pl.savefig(filename, dpi=300)
2294 Update the cluster id numbers
2296 for n, c
in enumerate(self.clusters):
2299 def get_molecule(self, hier, name, copy):
2307 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2308 self.sel0_rmsd.get_selected_particles())
2309 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2310 self.sel1_rmsd.get_selected_particles())
2311 for mol
in self.seldict0:
2312 for sel
in self.seldict0[mol]:
2313 self.issymmetricsel[sel] =
False
2314 for mol
in self.symmetric_molecules:
2315 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2316 for sel
in self.seldict0[mol]:
2317 self.issymmetricsel[sel] =
True
2321 self.sel1_alignment, self.sel0_alignment)
2323 for rb
in self.rbs1:
2326 for bead
in self.beads1:
2334 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2336 initial filling of the clusters.
2339 print(
"clustering model "+str(n0))
2340 d0 = self.stath0[n0]
2342 print(
"creating cluster index "+str(len(self.clusters)))
2343 self.clusters.append(c)
2344 c.add_member(n0, d0)
2345 clustered = set([n0])
2347 print(
"--- trying to add model " + str(n1) +
" to cluster "
2348 + str(len(self.clusters)))
2349 d1 = self.stath1[n1]
2352 rmsd, _ = self.
rmsd(metric=metric)
2353 if rmsd < rmsd_cutoff:
2354 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2355 c.add_member(n1, d1)
2358 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2363 merge the clusters that have close members
2364 @param rmsd_cutoff cutoff distance in Angstorms
2372 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2373 itertools.combinations(self.clusters, 2)):
2374 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2377 rmsd, _ = self.
rmsd()
2378 if (rmsd < 2*rmsd_cutoff
and
2380 to_merge.append((c0, c1))
2382 for c0, c
in reversed(to_merge):
2386 self.clusters = [c
for c
in
2387 filter(
lambda x: len(x.members) > 0, self.clusters)]
2391 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2393 print(
"check close members for clusters " + str(c0.cluster_id) +
2394 " and " + str(c1.cluster_id))
2395 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2398 rmsd, _ = self.
rmsd(metric=metric)
2399 if rmsd < rmsd_cutoff:
2414 a function that returns the permutation best_sel of sels0 that
2417 best_rmsd2 = float(
'inf')
2419 if self.issymmetricsel[sels0[0]]:
2422 for offset
in range(N):
2423 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2426 r = metric(sel0, sel1)
2428 if rmsd2 < best_rmsd2:
2432 for sels
in itertools.permutations(sels0):
2434 for sel0, sel1
in itertools.takewhile(
2435 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2436 r = metric(sel0, sel1)
2438 if rmsd2 < best_rmsd2:
2441 return best_sel, best_rmsd2
2443 def compute_all_pairwise_rmsd(self):
2444 for d0
in self.stath0:
2445 for d1
in self.stath1:
2446 rmsd, _ = self.
rmsd()
2448 def rmsd(self, metric=IMP.atom.get_rmsd):
2450 Computes the RMSD. Resolves ambiguous pairs assignments
2454 n0 = self.stath0.current_index
2455 n1 = self.stath1.current_index
2456 if ((n0, n1)
in self.pairwise_rmsd) \
2457 and ((n0, n1)
in self.pairwise_molecular_assignment):
2458 return (self.pairwise_rmsd[(n0, n1)],
2459 self.pairwise_molecular_assignment[(n0, n1)])
2469 molecular_assignment = {}
2470 for molname, sels0
in self.seldict0.items():
2471 sels_best_order, best_rmsd2 = \
2472 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2474 Ncoords = len(sels_best_order[0].get_selected_particles())
2475 Ncopies = len(self.seldict1[molname])
2476 total_rmsd += Ncoords*best_rmsd2
2477 total_N += Ncoords*Ncopies
2479 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2480 p0 = sel0.get_selected_particles()[0]
2481 p1 = sel1.get_selected_particles()[0]
2486 molecular_assignment[(molname, c0)] = (molname, c1)
2488 total_rmsd = math.sqrt(total_rmsd/total_N)
2490 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2491 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2492 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2493 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2494 return total_rmsd, molecular_assignment
2498 Fix the reference structure for structural alignment, rmsd and
2501 @param reference can be either "Absolute" (cluster center of the
2502 first cluster) or Relative (cluster center of the current
2505 if reference ==
"Absolute":
2507 elif reference ==
"Relative":
2508 if cluster.center_index:
2509 n0 = cluster.center_index
2511 n0 = cluster.members[0]
2516 compute the molecular assignemnts between multiple copies
2517 of the same sequence. It changes the Copy index of Molecules
2520 _, molecular_assignment = self.
rmsd()
2521 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2522 mol0 = self.molcopydict0[m0][c0]
2523 mol1 = self.molcopydict1[m1][c1]
2526 p1.set_value(cik0, c0)
2530 Undo the Copy index assignment
2533 _, molecular_assignment = self.
rmsd()
2534 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2535 mol0 = self.molcopydict0[m0][c0]
2536 mol1 = self.molcopydict1[m1][c1]
2539 p1.set_value(cik0, c1)
2546 s =
"AnalysisReplicaExchange\n"
2547 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2548 s +=
"---- number of models %s \n" % str(len(self.stath0))
2551 def __getitem__(self, int_slice_adaptor):
2552 if isinstance(int_slice_adaptor, int):
2553 return self.clusters[int_slice_adaptor]
2554 elif isinstance(int_slice_adaptor, slice):
2555 return self.__iter__(int_slice_adaptor)
2557 raise TypeError(
"Unknown Type")
2560 return len(self.clusters)
2562 def __iter__(self, slice_key=None):
2563 if slice_key
is None:
2564 for i
in range(len(self)):
2567 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.