1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from pathlib
import Path
20 from IMP._compat_pathlib
import Path
22 from operator
import itemgetter
23 from collections
import defaultdict
30 class _MockMPIValues(object):
31 """Replace samplers.MPI_values when in test mode"""
32 def get_percentile(self, name):
36 class _RMFRestraints(object):
37 """All restraints that are written out to the RMF file"""
38 def __init__(self, model, user_restraints):
40 self._user_restraints = user_restraints
if user_restraints
else []
43 return (len(self._user_restraints)
44 + self._rmf_rs.get_number_of_restraints())
48 __nonzero__ = __bool__
50 def __getitem__(self, i):
51 class FakePMIWrapper(object):
52 def __init__(self, r):
53 self.r = IMP.RestraintSet.get_from(r)
55 def get_restraint(self):
58 lenuser = len(self._user_restraints)
60 return self._user_restraints[i]
61 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
62 r = self._rmf_rs.get_restraint(i - lenuser)
63 return FakePMIWrapper(r)
65 raise IndexError(
"Out of range")
69 """A macro to help setup and run replica exchange.
70 Supports Monte Carlo and molecular dynamics.
71 Produces trajectory RMF files, best PDB structures,
72 and output stat files.
75 monte_carlo_sample_objects=
None,
76 molecular_dynamics_sample_objects=
None,
78 rmf_output_objects=
None,
79 monte_carlo_temperature=1.0,
80 simulated_annealing=
False,
81 simulated_annealing_minimum_temperature=1.0,
82 simulated_annealing_maximum_temperature=2.5,
83 simulated_annealing_minimum_temperature_nframes=100,
84 simulated_annealing_maximum_temperature_nframes=100,
85 replica_exchange_minimum_temperature=1.0,
86 replica_exchange_maximum_temperature=2.5,
87 replica_exchange_swap=
True,
89 number_of_best_scoring_models=500,
92 molecular_dynamics_steps=10,
93 molecular_dynamics_max_time_step=1.0,
94 number_of_frames=1000,
95 save_coordinates_mode=
"lowest_temperature",
96 nframes_write_coordinates=1,
97 write_initial_rmf=
True,
98 initial_rmf_name_suffix=
"initial",
99 stat_file_name_suffix=
"stat",
100 best_pdb_name_suffix=
"model",
103 do_create_directories=
True,
104 global_output_directory=
"./",
106 best_pdb_dir=
"pdbs/",
107 replica_stat_file_suffix=
"stat_replica",
108 em_object_for_rmf=
None,
110 replica_exchange_object=
None,
114 @param model The IMP model
115 @param root_hier Top-level (System)hierarchy
116 @param monte_carlo_sample_objects Objects for MC sampling, which
117 should generally be a simple list of Mover objects, e.g.
118 from DegreesOfFreedom.get_movers().
119 @param molecular_dynamics_sample_objects Objects for MD sampling,
120 which should generally be a simple list of particles.
121 @param output_objects A list of structural objects and restraints
122 that will be included in output (ie, statistics "stat"
123 files). Any object that provides a get_output() method
124 can be used here. If None is passed
125 the macro will not write stat files.
126 @param rmf_output_objects A list of structural objects and
127 restraints that will be included in rmf. Any object
128 that provides a get_output() method can be used here.
129 @param monte_carlo_temperature MC temp (may need to be optimized
130 based on post-sampling analysis)
131 @param simulated_annealing If True, perform simulated annealing
132 @param simulated_annealing_minimum_temperature Should generally be
133 the same as monte_carlo_temperature.
134 @param simulated_annealing_minimum_temperature_nframes Number of
135 frames to compute at minimum temperature.
136 @param simulated_annealing_maximum_temperature_nframes Number of
138 temps > simulated_annealing_maximum_temperature.
139 @param replica_exchange_minimum_temperature Low temp for REX; should
140 generally be the same as monte_carlo_temperature.
141 @param replica_exchange_maximum_temperature High temp for REX
142 @param replica_exchange_swap Boolean, enable disable temperature
144 @param num_sample_rounds Number of rounds of MC/MD per cycle
145 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146 models to keep around for analysis.
147 @param mmcif If True, write best scoring models in mmCIF format;
148 if False (the default), write in legacy PDB format.
149 @param best_pdb_dir The directory under `global_output_directory`
150 where best-scoring PDB/mmCIF files are written.
151 @param best_pdb_name_suffix Part of the file name for best-scoring
153 @param monte_carlo_steps Number of MC steps per round
154 @param self_adaptive self adaptive scheme for Monte Carlo movers
155 @param molecular_dynamics_steps Number of MD steps per round
156 @param molecular_dynamics_max_time_step Max time step for MD
157 @param number_of_frames Number of REX frames to run
158 @param save_coordinates_mode string: how to save coordinates.
159 "lowest_temperature" (default) only the lowest temperatures
161 "25th_score" all replicas whose score is below the 25th
163 "50th_score" all replicas whose score is below the 50th
165 "75th_score" all replicas whose score is below the 75th
167 @param nframes_write_coordinates How often to write the coordinates
169 @param write_initial_rmf Write the initial configuration
170 @param global_output_directory Folder that will be created to house
172 @param test_mode Set to True to avoid writing any files, just test
174 @param score_moved If True, attempt to speed up Monte Carlo
175 sampling by caching scoring function terms on particles
182 self.output_objects = output_objects
183 self.rmf_output_objects = rmf_output_objects
185 and not root_hier.get_parent()):
186 if self.output_objects
is not None:
187 self.output_objects.append(
189 if self.rmf_output_objects
is not None:
190 self.rmf_output_objects.append(
192 self.root_hier = root_hier
193 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
194 self.vars[
"number_of_states"] = len(states)
196 self.root_hiers = states
197 self.is_multi_state =
True
199 self.root_hier = root_hier
200 self.is_multi_state =
False
202 raise TypeError(
"Must provide System hierarchy (root_hier)")
204 self._rmf_restraints = _RMFRestraints(model,
None)
205 self.em_object_for_rmf = em_object_for_rmf
206 self.monte_carlo_sample_objects = monte_carlo_sample_objects
207 self.vars[
"self_adaptive"] = self_adaptive
208 self.molecular_dynamics_sample_objects = \
209 molecular_dynamics_sample_objects
210 self.replica_exchange_object = replica_exchange_object
211 self.molecular_dynamics_max_time_step = \
212 molecular_dynamics_max_time_step
213 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
214 self.vars[
"replica_exchange_minimum_temperature"] = \
215 replica_exchange_minimum_temperature
216 self.vars[
"replica_exchange_maximum_temperature"] = \
217 replica_exchange_maximum_temperature
218 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
219 self.vars[
"simulated_annealing"] = simulated_annealing
220 self.vars[
"simulated_annealing_minimum_temperature"] = \
221 simulated_annealing_minimum_temperature
222 self.vars[
"simulated_annealing_maximum_temperature"] = \
223 simulated_annealing_maximum_temperature
224 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
225 simulated_annealing_minimum_temperature_nframes
226 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
227 simulated_annealing_maximum_temperature_nframes
229 self.vars[
"num_sample_rounds"] = num_sample_rounds
231 "number_of_best_scoring_models"] = number_of_best_scoring_models
232 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
233 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
234 self.vars[
"number_of_frames"] = number_of_frames
235 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
236 "50th_score",
"75th_score"):
237 raise Exception(
"save_coordinates_mode has unrecognized value")
239 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
240 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
241 self.vars[
"write_initial_rmf"] = write_initial_rmf
242 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
243 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
244 self.vars[
"mmcif"] = mmcif
245 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
246 self.vars[
"do_clean_first"] = do_clean_first
247 self.vars[
"do_create_directories"] = do_create_directories
248 self.vars[
"global_output_directory"] = global_output_directory
249 self.vars[
"rmf_dir"] = rmf_dir
250 self.vars[
"best_pdb_dir"] = best_pdb_dir
251 self.vars[
"atomistic"] = atomistic
252 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
253 self.vars[
"geometries"] =
None
254 self.test_mode = test_mode
255 self.score_moved = score_moved
258 if self.vars[
"geometries"]
is None:
259 self.vars[
"geometries"] = list(geometries)
261 self.vars[
"geometries"].extend(geometries)
264 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
265 "rmfs/*.rmf3 for each replica ")
266 print(
"--- it stores the best scoring pdb models in pdbs/")
267 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
268 "lowest temperature")
269 print(
"--- variables:")
270 keys = list(self.vars.keys())
273 print(
"------", v.ljust(30), self.vars[v])
275 def get_replica_exchange_object(self):
276 return self.replica_exchange_object
278 def _add_provenance(self, sampler_md, sampler_mc):
279 """Record details about the sampling in the IMP Hierarchies"""
282 method =
"Molecular Dynamics"
283 iterations += self.vars[
"molecular_dynamics_steps"]
285 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
286 iterations += self.vars[
"monte_carlo_steps"]
288 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
290 iterations *= self.vars[
"num_sample_rounds"]
292 pi = self.model.add_particle(
"sampling")
294 self.model, pi, method, self.vars[
"number_of_frames"],
296 p.set_number_of_replicas(
297 self.replica_exchange_object.get_number_of_replicas())
298 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
301 def execute_macro(self):
302 temp_index_factor = 100000.0
306 if self.monte_carlo_sample_objects
is not None:
307 print(
"Setting up MonteCarlo")
309 self.model, self.monte_carlo_sample_objects,
310 self.vars[
"monte_carlo_temperature"],
311 score_moved=self.score_moved)
312 if self.vars[
"simulated_annealing"]:
313 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
314 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
316 "simulated_annealing_minimum_temperature_nframes"]
318 "simulated_annealing_maximum_temperature_nframes"]
319 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
320 if self.vars[
"self_adaptive"]:
321 sampler_mc.set_self_adaptive(
322 isselfadaptive=self.vars[
"self_adaptive"])
323 if self.output_objects
is not None:
324 self.output_objects.append(sampler_mc)
325 if self.rmf_output_objects
is not None:
326 self.rmf_output_objects.append(sampler_mc)
327 samplers.append(sampler_mc)
329 if self.molecular_dynamics_sample_objects
is not None:
330 print(
"Setting up MolecularDynamics")
332 self.model, self.molecular_dynamics_sample_objects,
333 self.vars[
"monte_carlo_temperature"],
334 maximum_time_step=self.molecular_dynamics_max_time_step)
335 if self.vars[
"simulated_annealing"]:
336 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
337 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
339 "simulated_annealing_minimum_temperature_nframes"]
341 "simulated_annealing_maximum_temperature_nframes"]
342 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
343 if self.output_objects
is not None:
344 self.output_objects.append(sampler_md)
345 if self.rmf_output_objects
is not None:
346 self.rmf_output_objects.append(sampler_md)
347 samplers.append(sampler_md)
350 print(
"Setting up ReplicaExchange")
352 self.model, self.vars[
"replica_exchange_minimum_temperature"],
353 self.vars[
"replica_exchange_maximum_temperature"], samplers,
354 replica_exchange_object=self.replica_exchange_object)
355 self.replica_exchange_object = rex.rem
357 myindex = rex.get_my_index()
358 if self.output_objects
is not None:
359 self.output_objects.append(rex)
360 if self.rmf_output_objects
is not None:
361 self.rmf_output_objects.append(rex)
365 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
369 globaldir = self.vars[
"global_output_directory"] +
"/"
370 rmf_dir = globaldir + self.vars[
"rmf_dir"]
371 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
373 if not self.test_mode:
374 if self.vars[
"do_clean_first"]:
377 if self.vars[
"do_create_directories"]:
380 os.makedirs(globaldir)
388 if not self.is_multi_state:
394 for n
in range(self.vars[
"number_of_states"]):
396 os.makedirs(pdb_dir +
"/" + str(n))
403 if self.output_objects
is not None:
404 self.output_objects.append(sw)
405 if self.rmf_output_objects
is not None:
406 self.rmf_output_objects.append(sw)
408 print(
"Setting up stat file")
410 low_temp_stat_file = globaldir + \
411 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
414 if not self.test_mode:
417 if not self.test_mode:
418 if self.output_objects
is not None:
419 output.init_stat2(low_temp_stat_file,
421 extralabels=[
"rmf_file",
"rmf_frame_index"])
423 print(
"Stat file writing is disabled")
425 if self.rmf_output_objects
is not None:
426 print(
"Stat info being written in the rmf file")
428 print(
"Setting up replica stat file")
429 replica_stat_file = globaldir + \
430 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
431 if not self.test_mode:
432 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
434 if not self.test_mode:
435 print(
"Setting up best pdb files")
436 if not self.is_multi_state:
437 if self.vars[
"number_of_best_scoring_models"] > 0:
438 output.init_pdb_best_scoring(
439 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
441 self.vars[
"number_of_best_scoring_models"],
442 replica_exchange=
True,
443 mmcif=self.vars[
'mmcif'],
444 best_score_file=globaldir +
"best.scores.rex.py")
445 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
447 pdb_dir +
"/" +
"model.psf",
449 self.vars[
"best_pdb_name_suffix"] + pdbext)
451 if self.vars[
"number_of_best_scoring_models"] > 0:
452 for n
in range(self.vars[
"number_of_states"]):
453 output.init_pdb_best_scoring(
454 pdb_dir +
"/" + str(n) +
"/" +
455 self.vars[
"best_pdb_name_suffix"],
457 self.vars[
"number_of_best_scoring_models"],
458 replica_exchange=
True,
459 mmcif=self.vars[
'mmcif'],
460 best_score_file=globaldir +
"best.scores.rex.py")
461 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
463 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
464 pdb_dir +
"/" + str(n) +
"/" +
465 self.vars[
"best_pdb_name_suffix"] + pdbext)
468 if self.em_object_for_rmf
is not None:
469 output_hierarchies = [
471 self.em_object_for_rmf.get_density_as_hierarchy(
474 output_hierarchies = [self.root_hier]
476 if not self.test_mode:
477 print(
"Setting up and writing initial rmf coordinate file")
478 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
479 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
481 listofobjects=self.rmf_output_objects)
482 if self._rmf_restraints:
483 output.add_restraints_to_rmf(
484 init_suffix +
"." + str(myindex) +
".rmf3",
485 self._rmf_restraints)
486 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
487 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
489 if not self.test_mode:
490 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
492 mpivs = _MockMPIValues()
494 self._add_provenance(sampler_md, sampler_mc)
496 if not self.test_mode:
497 print(
"Setting up production rmf files")
498 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
499 output.init_rmf(rmfname, output_hierarchies,
500 geometries=self.vars[
"geometries"],
501 listofobjects=self.rmf_output_objects)
503 if self._rmf_restraints:
504 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
506 ntimes_at_low_temp = 0
510 self.replica_exchange_object.set_was_used(
True)
511 nframes = self.vars[
"number_of_frames"]
514 for i
in range(nframes):
518 for nr
in range(self.vars[
"num_sample_rounds"]):
519 if sampler_md
is not None:
521 self.vars[
"molecular_dynamics_steps"])
522 if sampler_mc
is not None:
523 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
525 self.model).evaluate(
False)
526 mpivs.set_value(
"score", score)
527 output.set_output_entry(
"score", score)
529 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
531 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
532 save_frame = (min_temp_index == my_temp_index)
533 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
534 score_perc = mpivs.get_percentile(
"score")
535 save_frame = (score_perc*100.0 <= 25.0)
536 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
537 score_perc = mpivs.get_percentile(
"score")
538 save_frame = (score_perc*100.0 <= 50.0)
539 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
540 score_perc = mpivs.get_percentile(
"score")
541 save_frame = (score_perc*100.0 <= 75.0)
544 if save_frame
and not self.test_mode:
548 print(
"--- frame %s score %s " % (str(i), str(score)))
550 if not self.test_mode:
551 if i % self.vars[
"nframes_write_coordinates"] == 0:
552 print(
'--- writing coordinates')
553 if self.vars[
"number_of_best_scoring_models"] > 0:
554 output.write_pdb_best_scoring(score)
555 output.write_rmf(rmfname)
556 output.set_output_entry(
"rmf_file", rmfname)
557 output.set_output_entry(
"rmf_frame_index",
560 output.set_output_entry(
"rmf_file", rmfname)
561 output.set_output_entry(
"rmf_frame_index",
'-1')
562 if self.output_objects
is not None:
563 output.write_stat2(low_temp_stat_file)
564 ntimes_at_low_temp += 1
566 if not self.test_mode:
567 output.write_stat2(replica_stat_file)
568 if self.vars[
"replica_exchange_swap"]:
569 rex.swap_temp(i, score)
570 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
571 p.add_replica_exchange(state, self)
573 if not self.test_mode:
574 print(
"closing production rmf files")
575 output.close_rmf(rmfname)
579 """A macro to build a IMP::pmi::topology::System based on a
580 TopologyReader object.
582 Easily create multi-state systems by calling this macro
583 repeatedly with different TopologyReader objects!
584 A useful function is get_molecules() which returns the PMI Molecules
585 grouped by state as a dictionary with key = (molecule name),
586 value = IMP.pmi.topology.Molecule
587 Quick multi-state system:
590 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
591 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
592 bs = IMP.pmi.macros.BuildSystem(model)
593 bs.add_state(reader1)
594 bs.add_state(reader2)
595 bs.execute_macro() # build everything including degrees of freedom
596 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
597 ### now you have a two state system, you add restraints etc
599 @note The "domain name" entry of the topology reader is not used.
600 All molecules are set up by the component name, but split into rigid bodies
604 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
605 'RNA': IMP.pmi.alphabets.rna}
607 def __init__(self, model, sequence_connectivity_scale=4.0,
608 force_create_gmm_files=
False, resolutions=[1, 10],
611 @param model An IMP Model
612 @param sequence_connectivity_scale For scaling the connectivity
614 @param force_create_gmm_files If True, will sample and create GMMs
615 no matter what. If False, will only sample if the
616 files don't exist. If number of Gaussians is zero, won't
618 @param resolutions The resolutions to build for structured regions
619 @param name The name of the top-level hierarchy node.
626 self._domain_res = []
628 self.force_create_gmm_files = force_create_gmm_files
629 self.resolutions = resolutions
631 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
633 """Add a state using the topology info in a
634 IMP::pmi::topology::TopologyReader object.
635 When you are done adding states, call execute_macro()
636 @param reader The TopologyReader object
637 @param fasta_name_map dictionary for converting protein names
638 found in the fasta file
639 @param chain_ids A list or string of chain IDs for assigning to
640 newly-created molecules, e.g.
641 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
642 If not specified, chain IDs A through Z are assigned, then
643 AA through AZ, then BA through BZ, and so on, in the same
646 state = self.system.create_state()
647 self._readers.append(reader)
649 these_domain_res = {}
651 if chain_ids
is None:
652 chain_ids = IMP.pmi.output._ChainIDs()
657 for molname
in reader.get_molecules():
658 copies = reader.get_molecules()[molname].domains
659 for nc, copyname
in enumerate(copies):
660 print(
"BuildSystem.add_state: setting up molecule %s copy "
661 "number %s" % (molname, str(nc)))
662 copy = copies[copyname]
665 all_chains = [c
for c
in copy
if c.chain
is not None]
667 chain_id = all_chains[0].chain
669 chain_id = chain_ids[numchain]
671 "No PDBs specified for %s, so keep_chain_id has "
672 "no effect; using default chain ID '%s'"
675 chain_id = chain_ids[numchain]
677 alphabet = IMP.pmi.alphabets.amino_acid
678 fasta_flag = copy[0].fasta_flag
679 if fasta_flag
in self._alphabets:
680 alphabet = self._alphabets[fasta_flag]
682 copy[0].fasta_file, fasta_name_map)[copy[0].fasta_id]
683 print(
"BuildSystem.add_state: molecule %s sequence has "
684 "%s residues" % (molname, len(seq)))
685 orig_mol = state.create_molecule(molname, seq, chain_id,
690 print(
"BuildSystem.add_state: creating a copy for "
691 "molecule %s" % molname)
692 mol = orig_mol.create_copy(chain_id)
695 for domainnumber, domain
in enumerate(copy):
696 print(
"BuildSystem.add_state: ---- setting up domain %s "
697 "of molecule %s" % (domainnumber, molname))
700 these_domains[domain.get_unique_name()] = domain
701 if domain.residue_range == []
or \
702 domain.residue_range
is None:
703 domain_res = mol.get_residues()
705 start = domain.residue_range[0]+domain.pdb_offset
706 if domain.residue_range[1] ==
'END':
707 end = len(mol.sequence)
709 end = domain.residue_range[1]+domain.pdb_offset
710 domain_res = mol.residue_range(start-1, end-1)
711 print(
"BuildSystem.add_state: -------- domain %s of "
712 "molecule %s extends from residue %s to "
714 % (domainnumber, molname, start, end))
715 if domain.pdb_file ==
"BEADS":
716 print(
"BuildSystem.add_state: -------- domain %s of "
717 "molecule %s represented by BEADS "
718 % (domainnumber, molname))
719 mol.add_representation(
721 resolutions=[domain.bead_size],
722 setup_particles_as_densities=(
723 domain.em_residues_per_gaussian != 0),
725 these_domain_res[domain.get_unique_name()] = \
727 elif domain.pdb_file ==
"IDEAL_HELIX":
728 print(
"BuildSystem.add_state: -------- domain %s of "
729 "molecule %s represented by IDEAL_HELIX "
730 % (domainnumber, molname))
731 emper = domain.em_residues_per_gaussian
732 mol.add_representation(
734 resolutions=self.resolutions,
736 density_residues_per_component=emper,
737 density_prefix=domain.density_prefix,
738 density_force_compute=self.force_create_gmm_files,
740 these_domain_res[domain.get_unique_name()] = \
743 print(
"BuildSystem.add_state: -------- domain %s of "
744 "molecule %s represented by pdb file %s "
745 % (domainnumber, molname, domain.pdb_file))
746 domain_atomic = mol.add_structure(domain.pdb_file,
748 domain.residue_range,
751 domain_non_atomic = domain_res - domain_atomic
752 if not domain.em_residues_per_gaussian:
753 mol.add_representation(
754 domain_atomic, resolutions=self.resolutions,
756 if len(domain_non_atomic) > 0:
757 mol.add_representation(
759 resolutions=[domain.bead_size],
762 print(
"BuildSystem.add_state: -------- domain %s "
763 "of molecule %s represented by gaussians "
764 % (domainnumber, molname))
765 emper = domain.em_residues_per_gaussian
766 creategmm = self.force_create_gmm_files
767 mol.add_representation(
769 resolutions=self.resolutions,
770 density_residues_per_component=emper,
771 density_prefix=domain.density_prefix,
772 density_force_compute=creategmm,
774 if len(domain_non_atomic) > 0:
775 mol.add_representation(
777 resolutions=[domain.bead_size],
778 setup_particles_as_densities=
True,
780 these_domain_res[domain.get_unique_name()] = (
781 domain_atomic, domain_non_atomic)
782 self._domain_res.append(these_domain_res)
783 self._domains.append(these_domains)
784 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
788 """Return list of all molecules grouped by state.
789 For each state, it's a dictionary of Molecules where key is the
792 return [s.get_molecules()
for s
in self.system.get_states()]
794 def get_molecule(self, molname, copy_index=0, state_index=0):
795 return self.system.get_states()[state_index].
get_molecules()[
799 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
800 """Builds representations and sets up degrees of freedom"""
801 print(
"BuildSystem.execute_macro: building representations")
802 self.root_hier = self.system.build()
804 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
806 for nstate, reader
in enumerate(self._readers):
807 rbs = reader.get_rigid_bodies()
808 srbs = reader.get_super_rigid_bodies()
809 csrbs = reader.get_chains_of_super_rigid_bodies()
812 domains_in_rbs = set()
814 print(
"BuildSystem.execute_macro: -------- building rigid "
815 "body %s" % (str(rblist)))
816 all_res = IMP.pmi.tools.OrderedSet()
817 bead_res = IMP.pmi.tools.OrderedSet()
819 domain = self._domains[nstate][dname]
820 print(
"BuildSystem.execute_macro: -------- adding %s"
822 all_res |= self._domain_res[nstate][dname][0]
823 bead_res |= self._domain_res[nstate][dname][1]
824 domains_in_rbs.add(dname)
826 print(
"BuildSystem.execute_macro: -------- creating rigid "
827 "body with max_trans %s max_rot %s "
828 "non_rigid_max_trans %s"
829 % (str(max_rb_trans), str(max_rb_rot),
830 str(max_bead_trans)))
831 self.dof.create_rigid_body(all_res,
832 nonrigid_parts=bead_res,
833 max_trans=max_rb_trans,
835 nonrigid_max_trans=max_bead_trans,
836 name=
"RigidBody %s" % dname)
839 for dname, domain
in self._domains[nstate].items():
840 if dname
not in domains_in_rbs:
841 if domain.pdb_file !=
"BEADS":
843 "No rigid bodies set for %s. Residues read from "
844 "the PDB file will not be sampled - only regions "
845 "missing from the PDB will be treated flexibly. "
846 "To sample the entire sequence, use BEADS instead "
847 "of a PDB file name" % dname,
849 self.dof.create_flexible_beads(
850 self._domain_res[nstate][dname][1],
851 max_trans=max_bead_trans)
855 print(
"BuildSystem.execute_macro: -------- building "
856 "super rigid body %s" % (str(srblist)))
857 all_res = IMP.pmi.tools.OrderedSet()
858 for dname
in srblist:
859 print(
"BuildSystem.execute_macro: -------- adding %s"
861 all_res |= self._domain_res[nstate][dname][0]
862 all_res |= self._domain_res[nstate][dname][1]
864 print(
"BuildSystem.execute_macro: -------- creating super "
865 "rigid body with max_trans %s max_rot %s "
866 % (str(max_srb_trans), str(max_srb_rot)))
867 self.dof.create_super_rigid_body(
868 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
871 for csrblist
in csrbs:
872 all_res = IMP.pmi.tools.OrderedSet()
873 for dname
in csrblist:
874 all_res |= self._domain_res[nstate][dname][0]
875 all_res |= self._domain_res[nstate][dname][1]
876 all_res = list(all_res)
877 all_res.sort(key=
lambda r: r.get_index())
878 self.dof.create_main_chain_mover(all_res)
879 return self.root_hier, self.dof
884 """A macro for running all the basic operations of analysis.
885 Includes clustering, precision analysis, and making ensemble density maps.
886 A number of plots are also supported.
889 merge_directories=[
"./"],
890 stat_file_name_suffix=
"stat",
891 best_pdb_name_suffix=
"model",
893 do_create_directories=
True,
894 global_output_directory=
"output/",
895 replica_stat_file_suffix=
"stat_replica",
896 global_analysis_result_directory=
"./analysis/",
899 @param model The IMP model
900 @param stat_file_name_suffix
901 @param merge_directories The directories containing output files
902 @param best_pdb_name_suffix
903 @param do_clean_first
904 @param do_create_directories
905 @param global_output_directory Where everything is
906 @param replica_stat_file_suffix
907 @param global_analysis_result_directory
908 @param test_mode If True, nothing is changed on disk
912 from mpi4py
import MPI
913 self.comm = MPI.COMM_WORLD
914 self.rank = self.comm.Get_rank()
915 self.number_of_processes = self.comm.size
918 self.number_of_processes = 1
920 self.test_mode = test_mode
921 self._protocol_output = []
922 self.cluster_obj =
None
924 stat_dir = global_output_directory
927 for rd
in merge_directories:
928 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
929 if len(stat_files) == 0:
930 warnings.warn(
"no stat files found in %s"
931 % os.path.join(rd, stat_dir),
933 self.stat_files += stat_files
936 """Capture details of the modeling protocol.
937 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
940 self._protocol_output.append((p, p._last_state))
943 score_key=
"Total_Score",
944 rmf_file_key=
"rmf_file",
945 rmf_file_frame_key=
"rmf_frame_index",
948 nframes_trajectory=10000):
949 """ Get a trajectory of the modeling run, for generating
952 @param score_key The score for ranking models
953 @param rmf_file_key Key pointing to RMF filename
954 @param rmf_file_frame_key Key pointing to RMF frame number
955 @param outputdir The local output directory used in the run
956 @param get_every Extract every nth frame
957 @param nframes_trajectory Total number of frames of the trajectory
962 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
964 score_list = list(map(float, trajectory_models[2]))
966 max_score = max(score_list)
967 min_score = min(score_list)
969 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
970 for i
in range(nframes_trajectory)]
971 binned_scores = [
None]*nframes_trajectory
972 binned_model_indexes = [-1]*nframes_trajectory
974 for model_index, s
in enumerate(score_list):
975 bins_score_diffs = [abs(s-b)
for b
in bins]
976 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
977 if binned_scores[bin_index]
is None:
978 binned_scores[bin_index] = s
979 binned_model_indexes[bin_index] = model_index
981 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
982 new_diff = abs(s-bins[bin_index])
983 if new_diff < old_diff:
984 binned_scores[bin_index] = s
985 binned_model_indexes[bin_index] = model_index
988 print(binned_model_indexes)
990 def _expand_ambiguity(self, prot, d):
991 """If using PMI2, expand the dictionary to include copies as
994 This also keeps the states separate.
999 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1002 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1003 if isinstance(val, tuple):
1011 for nst
in range(len(states)):
1013 copies = sel.get_selected_particles(with_representation=
False)
1015 for nc
in range(len(copies)):
1017 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1018 (start, stop, name, nc, nst)
1020 newdict[
'%s..%i' % (name, nc)] = \
1021 (start, stop, name, nc, nst)
1027 score_key=
"Total_Score",
1028 rmf_file_key=
"rmf_file",
1029 rmf_file_frame_key=
"rmf_frame_index",
1031 prefiltervalue=
None,
1034 alignment_components=
None,
1035 number_of_best_scoring_models=10,
1036 rmsd_calculation_components=
None,
1037 distance_matrix_file=
'distances.mat',
1038 load_distance_matrix_file=
False,
1039 skip_clustering=
False,
1040 number_of_clusters=1,
1042 exit_after_display=
True,
1044 first_and_last_frames=
None,
1045 density_custom_ranges=
None,
1046 write_pdb_with_centered_coordinates=
False,
1048 """Get the best scoring models, compute a distance matrix,
1049 cluster them, and create density maps.
1051 Tuple format: "molname" just the molecule,
1052 or (start,stop,molname,copy_num(optional),state_num(optional)
1053 Can pass None for copy or state to ignore that field.
1054 If you don't pass a specific copy number
1056 @param score_key The score for ranking models.
1057 @param rmf_file_key Key pointing to RMF filename
1058 @param rmf_file_frame_key Key pointing to RMF frame number
1059 @param state_number State number to analyze
1060 @param prefiltervalue Only include frames where the
1061 score key is below this value
1062 @param feature_keys Keywords for which you want to
1063 calculate average, medians, etc.
1064 If you pass "Keyname" it'll include everything that matches
1066 @param outputdir The local output directory used in
1068 @param alignment_components Dictionary with keys=groupname,
1069 values are tuples for aligning the structures
1070 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1071 @param number_of_best_scoring_models Num models to keep per run
1072 @param rmsd_calculation_components For calculating RMSD
1073 (same format as alignment_components)
1074 @param distance_matrix_file Where to store/read the
1076 @param load_distance_matrix_file Try to load the distance
1078 @param skip_clustering Just extract the best scoring
1079 models and save the pdbs
1080 @param number_of_clusters Number of k-means clusters
1081 @param display_plot Display the distance matrix
1082 @param exit_after_display Exit after displaying distance
1084 @param get_every Extract every nth frame
1085 @param first_and_last_frames A tuple with the first and last
1086 frames to be analyzed. Values are percentages!
1087 Default: get all frames
1088 @param density_custom_ranges For density calculation
1089 (same format as alignment_components)
1090 @param write_pdb_with_centered_coordinates
1091 @param voxel_size Used for the density output
1095 self._outputdir = Path(outputdir).absolute()
1096 self._number_of_clusters = number_of_clusters
1097 for p, state
in self._protocol_output:
1098 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1109 if not load_distance_matrix_file:
1110 if len(self.stat_files) == 0:
1111 print(
"ERROR: no stat file found in the given path")
1113 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1114 self.stat_files, self.number_of_processes)[self.rank]
1117 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1118 if k
in feature_keys:
1120 "no need to pass " + k +
" to feature_keys.",
1122 feature_keys.remove(k)
1125 my_stat_files, score_key, feature_keys, rmf_file_key,
1126 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1127 rmf_file_list = best_models[0]
1128 rmf_file_frame_list = best_models[1]
1129 score_list = best_models[2]
1130 feature_keyword_list_dict = best_models[3]
1136 if self.number_of_processes > 1:
1140 rmf_file_frame_list)
1141 for k
in feature_keyword_list_dict:
1142 feature_keyword_list_dict[k] = \
1144 feature_keyword_list_dict[k])
1147 score_rmf_tuples = list(zip(score_list,
1149 rmf_file_frame_list,
1150 list(range(len(score_list)))))
1152 if density_custom_ranges:
1153 for k
in density_custom_ranges:
1154 if not isinstance(density_custom_ranges[k], list):
1155 raise Exception(
"Density custom ranges: values must "
1156 "be lists of tuples")
1159 if first_and_last_frames
is not None:
1160 nframes = len(score_rmf_tuples)
1161 first_frame = int(first_and_last_frames[0] * nframes)
1162 last_frame = int(first_and_last_frames[1] * nframes)
1163 if last_frame > len(score_rmf_tuples):
1165 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1168 best_score_rmf_tuples = sorted(
1170 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1171 best_score_rmf_tuples = [t+(n,)
for n, t
in
1172 enumerate(best_score_rmf_tuples)]
1174 prov.append(IMP.pmi.io.FilterProvenance(
1175 "Best scoring", 0, number_of_best_scoring_models))
1177 best_score_feature_keyword_list_dict = defaultdict(list)
1178 for tpl
in best_score_rmf_tuples:
1180 for f
in feature_keyword_list_dict:
1181 best_score_feature_keyword_list_dict[f].append(
1182 feature_keyword_list_dict[f][index])
1183 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1184 best_score_rmf_tuples,
1185 self.number_of_processes)[self.rank]
1188 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1189 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1191 if rmsd_calculation_components
is not None:
1192 tmp = self._expand_ambiguity(
1193 prot_ahead, rmsd_calculation_components)
1194 if tmp != rmsd_calculation_components:
1195 print(
'Detected ambiguity, expand rmsd components to',
1197 rmsd_calculation_components = tmp
1198 if alignment_components
is not None:
1199 tmp = self._expand_ambiguity(prot_ahead,
1200 alignment_components)
1201 if tmp != alignment_components:
1202 print(
'Detected ambiguity, expand alignment '
1203 'components to', tmp)
1204 alignment_components = tmp
1210 self.model, my_best_score_rmf_tuples[0],
1211 rmsd_calculation_components, state_number=state_number)
1213 self.model, my_best_score_rmf_tuples, alignment_components,
1214 rmsd_calculation_components, state_number=state_number)
1222 all_coordinates = got_coords[0]
1225 alignment_coordinates = got_coords[1]
1228 rmsd_coordinates = got_coords[2]
1231 rmf_file_name_index_dict = got_coords[3]
1234 all_rmf_file_names = got_coords[4]
1240 if density_custom_ranges:
1242 density_custom_ranges, voxel=voxel_size)
1244 dircluster = os.path.join(outputdir,
1245 "all_models."+str(self.rank))
1251 os.mkdir(dircluster)
1254 clusstat = open(os.path.join(
1255 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1256 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1258 rmf_frame_number = tpl[2]
1261 for key
in best_score_feature_keyword_list_dict:
1263 best_score_feature_keyword_list_dict[key][index]
1267 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1268 self.model, rmf_frame_number, rmf_name)
1270 linking_successful = \
1271 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1272 self.model, prots, rs, rmf_frame_number,
1274 if not linking_successful:
1281 states = IMP.atom.get_by_type(
1282 prots[0], IMP.atom.STATE_TYPE)
1283 prot = states[state_number]
1285 prot = prots[state_number]
1290 coords_f1 = alignment_coordinates[cnt]
1292 coords_f2 = alignment_coordinates[cnt]
1295 coords_f1, coords_f2)
1296 transformation = Ali.align()[1]
1310 rb = rbm.get_rigid_body()
1320 out_pdb_fn = os.path.join(
1321 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1322 out_rmf_fn = os.path.join(
1323 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1324 o.init_pdb(out_pdb_fn, prot)
1325 tc = write_pdb_with_centered_coordinates
1326 o.write_pdb(out_pdb_fn,
1327 translate_to_geometric_center=tc)
1329 tmp_dict[
"local_pdb_file_name"] = \
1330 os.path.basename(out_pdb_fn)
1331 tmp_dict[
"rmf_file_full_path"] = rmf_name
1332 tmp_dict[
"local_rmf_file_name"] = \
1333 os.path.basename(out_rmf_fn)
1334 tmp_dict[
"local_rmf_frame_number"] = 0
1336 clusstat.write(str(tmp_dict)+
"\n")
1342 h.set_name(
"System")
1344 o.init_rmf(out_rmf_fn, [h], rs)
1346 o.init_rmf(out_rmf_fn, [prot], rs)
1348 o.write_rmf(out_rmf_fn)
1349 o.close_rmf(out_rmf_fn)
1351 if density_custom_ranges:
1352 DensModule.add_subunits_density(prot)
1354 if density_custom_ranges:
1355 DensModule.write_mrc(path=dircluster)
1360 if self.number_of_processes > 1:
1366 rmf_file_name_index_dict)
1368 alignment_coordinates)
1375 [best_score_feature_keyword_list_dict,
1376 rmf_file_name_index_dict],
1382 print(
"setup clustering class")
1385 for n, model_coordinate_dict
in enumerate(all_coordinates):
1387 if (alignment_components
is not None
1388 and len(self.cluster_obj.all_coords) == 0):
1390 self.cluster_obj.set_template(alignment_coordinates[n])
1391 self.cluster_obj.fill(all_rmf_file_names[n],
1392 rmsd_coordinates[n])
1393 print(
"Global calculating the distance matrix")
1396 self.cluster_obj.dist_matrix()
1400 self.cluster_obj.do_cluster(number_of_clusters)
1403 self.cluster_obj.plot_matrix(
1404 figurename=os.path.join(outputdir,
1406 if exit_after_display:
1408 self.cluster_obj.save_distance_matrix_file(
1409 file_name=distance_matrix_file)
1416 print(
"setup clustering class")
1418 self.cluster_obj.load_distance_matrix_file(
1419 file_name=distance_matrix_file)
1420 print(
"clustering with %s clusters" % str(number_of_clusters))
1421 self.cluster_obj.do_cluster(number_of_clusters)
1422 [best_score_feature_keyword_list_dict,
1423 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1426 self.cluster_obj.plot_matrix(figurename=os.path.join(
1427 outputdir,
'dist_matrix.pdf'))
1428 if exit_after_display:
1430 if self.number_of_processes > 1:
1438 print(self.cluster_obj.get_cluster_labels())
1439 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1440 print(
"rank %s " % str(self.rank))
1441 print(
"cluster %s " % str(n))
1442 print(
"cluster label %s " % str(cl))
1443 print(self.cluster_obj.get_cluster_label_names(cl))
1445 len(self.cluster_obj.get_cluster_label_names(cl))
1447 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1450 if density_custom_ranges:
1452 density_custom_ranges,
1455 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1457 os.mkdir(dircluster)
1463 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1464 clusstat = open(dircluster +
"stat.out",
"w")
1465 for k, structure_name
in enumerate(
1466 self.cluster_obj.get_cluster_label_names(cl)):
1469 tmp_dict.update(rmsd_dict)
1470 index = rmf_file_name_index_dict[structure_name]
1471 for key
in best_score_feature_keyword_list_dict:
1473 key] = best_score_feature_keyword_list_dict[
1479 rmf_name = structure_name.split(
"|")[0]
1480 rmf_frame_number = int(structure_name.split(
"|")[1])
1481 clusstat.write(str(tmp_dict) +
"\n")
1486 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1487 self.model, rmf_frame_number, rmf_name)
1489 linking_successful = \
1490 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1491 self.model, prots, rs, rmf_frame_number,
1493 if not linking_successful:
1499 states = IMP.atom.get_by_type(
1500 prots[0], IMP.atom.STATE_TYPE)
1501 prot = states[state_number]
1503 prot = prots[state_number]
1509 co = self.cluster_obj
1510 model_index = co.get_model_index_from_name(
1512 transformation = co.get_transformation_to_first_member(
1523 rb = rbm.get_rigid_body()
1532 if density_custom_ranges:
1533 DensModule.add_subunits_density(prot)
1538 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1539 o.write_pdb(dircluster + str(k) +
".pdb")
1545 h.set_name(
"System")
1547 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1549 o.init_rmf(dircluster + str(k) +
".rmf3", [prot], rs)
1550 o.write_rmf(dircluster + str(k) +
".rmf3")
1551 o.close_rmf(dircluster + str(k) +
".rmf3")
1556 if density_custom_ranges:
1557 DensModule.write_mrc(path=dircluster)
1560 if self.number_of_processes > 1:
1563 def get_cluster_rmsd(self, cluster_num):
1564 if self.cluster_obj
is None:
1566 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1568 def save_objects(self, objects, file_name):
1570 outf = open(file_name,
'wb')
1571 pickle.dump(objects, outf)
1574 def load_objects(self, file_name):
1576 inputf = open(file_name,
'rb')
1577 objects = pickle.load(inputf)
1585 This class contains analysis utilities to investigate ReplicaExchange
1593 def __init__(self, model, stat_files, best_models=None, score_key=None,
1596 Construction of the Class.
1597 @param model IMP.Model()
1598 @param stat_files list of string. Can be ascii stat files,
1600 @param best_models Integer. Number of best scoring models,
1601 if None: all models will be read
1602 @param alignment boolean (Default=True). Align before computing
1607 self.best_models = best_models
1609 model, stat_files, self.best_models, score_key, cache=
True)
1611 StatHierarchyHandler=self.stath0)
1624 self.clusters.append(c)
1625 for n0
in range(len(self.stath0)):
1627 self.pairwise_rmsd = {}
1628 self.pairwise_molecular_assignment = {}
1629 self.alignment = alignment
1630 self.symmetric_molecules = {}
1631 self.issymmetricsel = {}
1633 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1635 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1640 Setup the selection onto which the rmsd is computed
1641 @param kwargs use IMP.atom.Selection keywords
1649 Store names of symmetric molecules
1651 self.symmetric_molecules[molecule_name] = 0
1656 Setup the selection onto which the alignment is computed
1657 @param kwargs use IMP.atom.Selection keywords
1665 def clean_clusters(self):
1666 for c
in self.clusters:
1670 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1672 Cluster the models based on RMSD.
1673 @param rmsd_cutoff Float the distance cutoff in Angstrom
1674 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1675 be used to compute rmsds
1677 self.clean_clusters()
1678 not_clustered = set(range(len(self.stath1)))
1679 while len(not_clustered) > 0:
1680 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1685 Refine the clusters by merging the ones whose centers are close
1686 @param rmsd_cutoff cutoff distance in Angstorms
1688 clusters_copy = self.clusters
1689 for c0, c1
in itertools.combinations(self.clusters, 2):
1690 if c0.center_index
is None:
1692 if c1.center_index
is None:
1694 _ = self.stath0[c0.center_index]
1695 _ = self.stath1[c1.center_index]
1696 rmsd, molecular_assignment = self.
rmsd()
1697 if rmsd <= rmsd_cutoff:
1698 if c1
in self.clusters:
1699 clusters_copy.remove(c1)
1701 self.clusters = clusters_copy
1708 def set_cluster_assignments(self, cluster_ids):
1709 if len(cluster_ids) != len(self.stath0):
1710 raise ValueError(
'cluster ids has to be same length as '
1714 for i
in sorted(list(set(cluster_ids))):
1716 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1717 self.clusters[idx].add_member(i, d)
1721 Return the model data from a cluster
1722 @param cluster IMP.pmi.output.Cluster object
1731 Save the data for the whole models into a pickle file
1732 @param filename string
1734 self.stath0.save_data(filename)
1738 Set the data from an external IMP.pmi.output.Data
1739 @param data IMP.pmi.output.Data
1741 self.stath0.data = data
1742 self.stath1.data = data
1746 Load the data from an external pickled file
1747 @param filename string
1749 self.stath0.load_data(filename)
1750 self.stath1.load_data(filename)
1751 self.best_models = len(self.stath0)
1753 def add_cluster(self, rmf_name_list):
1755 print(
"creating cluster index "+str(len(self.clusters)))
1756 self.clusters.append(c)
1757 current_len = len(self.stath0)
1759 for rmf
in rmf_name_list:
1760 print(
"adding rmf "+rmf)
1761 self.stath0.add_stat_file(rmf)
1762 self.stath1.add_stat_file(rmf)
1764 for n0
in range(current_len, len(self.stath0)):
1765 d0 = self.stath0[n0]
1766 c.add_member(n0, d0)
1771 Save the clusters into a pickle file
1772 @param filename string
1775 import cPickle
as pickle
1778 fl = open(filename,
'wb')
1779 pickle.dump(self.clusters, fl)
1783 Load the clusters from a pickle file
1784 @param filename string
1785 @param append bool (Default=False), if True. append the clusters
1786 to the ones currently present
1789 import cPickle
as pickle
1792 fl = open(filename,
'rb')
1793 self.clean_clusters()
1795 self.clusters += pickle.load(fl)
1797 self.clusters = pickle.load(fl)
1806 Compute the cluster center for a given cluster
1808 member_distance = defaultdict(float)
1810 for n0, n1
in itertools.combinations(cluster.members, 2):
1813 rmsd, _ = self.
rmsd()
1814 member_distance[n0] += rmsd
1816 if len(member_distance) > 0:
1817 cluster.center_index = min(member_distance,
1818 key=member_distance.get)
1820 cluster.center_index = cluster.members[0]
1825 Save the coordinates of the current cluster a single rmf file
1827 print(
"saving coordinates", cluster)
1831 if rmf_name
is None:
1832 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1834 _ = self.stath1[cluster.members[0]]
1836 o.init_rmf(rmf_name, [self.stath1])
1837 for n1
in cluster.members:
1843 o.write_rmf(rmf_name)
1845 o.close_rmf(rmf_name)
1849 remove structures that are similar
1850 append it to a new cluster
1852 print(
"pruning models")
1854 filtered = [selected]
1855 remaining = range(1, len(self.stath1), 10)
1857 while len(remaining) > 0:
1858 d0 = self.stath0[selected]
1860 for n1
in remaining:
1865 if d <= rmsd_cutoff:
1867 print(
"pruning model %s, similar to model %s, rmsd %s"
1868 % (str(n1), str(selected), str(d)))
1869 remaining = [x
for x
in remaining
if x
not in rm]
1870 if len(remaining) == 0:
1872 selected = remaining[0]
1873 filtered.append(selected)
1876 self.clusters.append(c)
1878 d0 = self.stath0[n0]
1879 c.add_member(n0, d0)
1884 Compute the precision of a cluster
1890 if cluster.center_index
is not None:
1891 members1 = [cluster.center_index]
1893 members1 = cluster.members
1897 for n1
in cluster.members:
1902 tmp_rmsd, _ = self.
rmsd()
1907 precision = rmsd/npairs
1908 cluster.precision = precision
1913 Compute the bipartite precision (ie the cross-precision)
1914 between two clusters
1918 for cn0, n0
in enumerate(cluster1.members):
1920 for cn1, n1
in enumerate(cluster2.members):
1922 tmp_rmsd, _ = self.
rmsd()
1924 print(
"--- rmsd between structure %s and structure "
1925 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1928 precision = rmsd/npairs
1931 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1932 cluster_ref=
None, step=1):
1934 Compute the Root mean square fluctuations
1935 of a molecule in a cluster
1936 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1937 residue indexes and the value is the rmsf
1939 rmsf = IMP.pmi.tools.OrderedDict()
1942 if cluster_ref
is not None:
1943 if cluster_ref.center_index
is not None:
1944 members0 = [cluster_ref.center_index]
1946 members0 = cluster_ref.members
1948 if cluster.center_index
is not None:
1949 members0 = [cluster.center_index]
1951 members0 = cluster.members
1954 copy_index=copy_index, state_index=state_index)
1955 ps0 = s0.get_selected_particles()
1957 residue_indexes = list(IMP.pmi.tools.OrderedSet(
1963 d0 = self.stath0[n0]
1964 for n1
in cluster.members[::step]:
1966 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
1970 self.stath1, molecule=molecule,
1971 residue_indexes=residue_indexes, resolution=1,
1972 copy_index=copy_index, state_index=state_index)
1973 ps1 = s1.get_selected_particles()
1975 d1 = self.stath1[n1]
1978 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
1979 r = residue_indexes[n]
1991 for stath
in [self.stath0, self.stath1]:
1992 if molecule
not in self.symmetric_molecules:
1994 stath, molecule=molecule, residue_index=r,
1995 resolution=1, copy_index=copy_index,
1996 state_index=state_index)
1999 stath, molecule=molecule, residue_index=r,
2000 resolution=1, state_index=state_index)
2002 ps = s.get_selected_particles()
2011 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2012 reference=
"Absolute", prefix=
"./", step=1):
2018 for n1
in cluster.members[::step]:
2019 print(
"density "+str(n1))
2024 dens.add_subunits_density(self.stath1)
2026 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2029 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2030 consolidate=
False, molecules=
None, prefix=
'./',
2031 reference=
"Absolute"):
2035 import matplotlib.pyplot
as plt
2036 import matplotlib.cm
as cm
2037 from scipy.spatial.distance
import cdist
2039 if molecules
is None:
2048 molecules=molecules).get_selected_particles())]
2049 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2050 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2051 total_len_unique = sum(max(mol.get_residue_indexes())
2052 for mol
in unique_copies)
2059 seqlen = max(mol.get_residue_indexes())
2060 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2064 for mol
in unique_copies:
2065 seqlen = max(mol.get_residue_indexes())
2066 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2069 for ncl, n1
in enumerate(cluster.members):
2072 coord_dict = IMP.pmi.tools.OrderedDict()
2074 rindexes = mol.get_residue_indexes()
2075 coords = np.ones((max(rindexes), 3))
2076 for rnum
in rindexes:
2079 selpart = sel.get_selected_particles()
2080 if len(selpart) == 0:
2082 selpart = selpart[0]
2083 coords[rnum - 1, :] = \
2085 coord_dict[mol] = coords
2088 coords = np.concatenate(list(coord_dict.values()))
2089 dists = cdist(coords, coords)
2090 binary_dists = np.where((dists <= contact_threshold)
2091 & (dists >= 1.0), 1.0, 0.0)
2093 binary_dists_dict = {}
2095 len1 = max(mol1.get_residue_indexes())
2097 name1 = mol1.get_name()
2098 name2 = mol2.get_name()
2099 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2100 if (name1, name2)
not in binary_dists_dict:
2101 binary_dists_dict[(name1, name2)] = \
2102 np.zeros((len1, len1))
2103 binary_dists_dict[(name1, name2)] += \
2104 np.where((dists <= contact_threshold)
2105 & (dists >= 1.0), 1.0, 0.0)
2106 binary_dists = np.zeros((total_len_unique, total_len_unique))
2108 for name1, name2
in binary_dists_dict:
2109 r1 = index_dict[mol_names_unique[name1]]
2110 r2 = index_dict[mol_names_unique[name2]]
2111 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2112 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2118 contact_freqs = binary_dists
2120 dist_maps.append(dists)
2121 av_dist_map += dists
2122 contact_freqs += binary_dists
2125 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2127 contact_freqs = 1.0/len(cluster)*contact_freqs
2128 av_dist_map = 1.0/len(cluster)*contact_freqs
2130 fig = plt.figure(figsize=(100, 100))
2131 ax = fig.add_subplot(111)
2134 gap_between_components = 50
2139 sorted_tuple = sorted(
2141 mol).get_extended_name(), mol)
for mol
in mols)
2142 prot_list = list(zip(*sorted_tuple))[1]
2144 sorted_tuple = sorted(
2146 for mol
in unique_copies)
2147 prot_list = list(zip(*sorted_tuple))[1]
2149 prot_listx = prot_list
2150 nresx = gap_between_components + \
2151 sum([max(mol.get_residue_indexes())
2152 + gap_between_components
for mol
in prot_listx])
2155 prot_listy = prot_list
2156 nresy = gap_between_components + \
2157 sum([max(mol.get_residue_indexes())
2158 + gap_between_components
for mol
in prot_listy])
2163 res = gap_between_components
2164 for mol
in prot_listx:
2165 resoffsetx[mol] = res
2166 res += max(mol.get_residue_indexes())
2168 res += gap_between_components
2172 res = gap_between_components
2173 for mol
in prot_listy:
2174 resoffsety[mol] = res
2175 res += max(mol.get_residue_indexes())
2177 res += gap_between_components
2179 resoffsetdiagonal = {}
2180 res = gap_between_components
2181 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2182 resoffsetdiagonal[mol] = res
2183 res += max(mol.get_residue_indexes())
2184 res += gap_between_components
2189 for n, prot
in enumerate(prot_listx):
2190 res = resoffsetx[prot]
2192 for proty
in prot_listy:
2193 resy = resoffsety[proty]
2194 endy = resendy[proty]
2195 ax.plot([res, res], [resy, endy], linestyle=
'-',
2196 color=
'gray', lw=0.4)
2197 ax.plot([end, end], [resy, endy], linestyle=
'-',
2198 color=
'gray', lw=0.4)
2199 xticks.append((float(res) + float(end)) / 2)
2201 prot).get_extended_name())
2205 for n, prot
in enumerate(prot_listy):
2206 res = resoffsety[prot]
2208 for protx
in prot_listx:
2209 resx = resoffsetx[protx]
2210 endx = resendx[protx]
2211 ax.plot([resx, endx], [res, res], linestyle=
'-',
2212 color=
'gray', lw=0.4)
2213 ax.plot([resx, endx], [end, end], linestyle=
'-',
2214 color=
'gray', lw=0.4)
2215 yticks.append((float(res) + float(end)) / 2)
2217 prot).get_extended_name())
2221 tmp_array = np.zeros((nresx, nresy))
2223 for px
in prot_listx:
2224 for py
in prot_listy:
2225 resx = resoffsetx[px]
2226 lengx = resendx[px] - 1
2227 resy = resoffsety[py]
2228 lengy = resendy[py] - 1
2229 indexes_x = index_dict[px]
2230 minx = min(indexes_x)
2231 maxx = max(indexes_x)
2232 indexes_y = index_dict[py]
2233 miny = min(indexes_y)
2234 maxy = max(indexes_y)
2235 tmp_array[resx:lengx, resy:lengy] = \
2236 contact_freqs[minx:maxx, miny:maxy]
2237 ret[(px, py)] = np.argwhere(
2238 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2240 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2241 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2243 ax.set_xticks(xticks)
2244 ax.set_xticklabels(xlabels, rotation=90)
2245 ax.set_yticks(yticks)
2246 ax.set_yticklabels(ylabels)
2247 plt.setp(ax.get_xticklabels(), fontsize=6)
2248 plt.setp(ax.get_yticklabels(), fontsize=6)
2251 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2252 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2254 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2255 dpi=300, transparent=
"False")
2258 def plot_rmsd_matrix(self, filename):
2259 self.compute_all_pairwise_rmsd()
2260 distance_matrix = np.zeros(
2261 (len(self.stath0), len(self.stath1)))
2262 for (n0, n1)
in self.pairwise_rmsd:
2263 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2265 import matplotlib
as mpl
2267 import matplotlib.pylab
as pl
2268 from scipy.cluster
import hierarchy
as hrc
2270 fig = pl.figure(figsize=(10, 8))
2271 ax = fig.add_subplot(212)
2272 dendrogram = hrc.dendrogram(
2273 hrc.linkage(distance_matrix),
2276 leaves_order = dendrogram[
'leaves']
2277 ax.set_xlabel(
'Model')
2278 ax.set_ylabel(
'RMSD [Angstroms]')
2280 ax2 = fig.add_subplot(221)
2282 distance_matrix[leaves_order, :][:, leaves_order],
2283 interpolation=
'nearest')
2284 cb = fig.colorbar(cax)
2285 cb.set_label(
'RMSD [Angstroms]')
2286 ax2.set_xlabel(
'Model')
2287 ax2.set_ylabel(
'Model')
2289 pl.savefig(filename, dpi=300)
2298 Update the cluster id numbers
2300 for n, c
in enumerate(self.clusters):
2303 def get_molecule(self, hier, name, copy):
2311 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2312 self.sel0_rmsd.get_selected_particles())
2313 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2314 self.sel1_rmsd.get_selected_particles())
2315 for mol
in self.seldict0:
2316 for sel
in self.seldict0[mol]:
2317 self.issymmetricsel[sel] =
False
2318 for mol
in self.symmetric_molecules:
2319 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2320 for sel
in self.seldict0[mol]:
2321 self.issymmetricsel[sel] =
True
2325 self.sel1_alignment, self.sel0_alignment)
2327 for rb
in self.rbs1:
2330 for bead
in self.beads1:
2338 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2340 initial filling of the clusters.
2343 print(
"clustering model "+str(n0))
2344 d0 = self.stath0[n0]
2346 print(
"creating cluster index "+str(len(self.clusters)))
2347 self.clusters.append(c)
2348 c.add_member(n0, d0)
2349 clustered = set([n0])
2351 print(
"--- trying to add model " + str(n1) +
" to cluster "
2352 + str(len(self.clusters)))
2353 d1 = self.stath1[n1]
2356 rmsd, _ = self.
rmsd(metric=metric)
2357 if rmsd < rmsd_cutoff:
2358 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2359 c.add_member(n1, d1)
2362 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2367 merge the clusters that have close members
2368 @param rmsd_cutoff cutoff distance in Angstorms
2376 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2377 itertools.combinations(self.clusters, 2)):
2378 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2381 rmsd, _ = self.
rmsd()
2382 if (rmsd < 2*rmsd_cutoff
and
2384 to_merge.append((c0, c1))
2386 for c0, c
in reversed(to_merge):
2390 self.clusters = [c
for c
in
2391 filter(
lambda x: len(x.members) > 0, self.clusters)]
2395 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2397 print(
"check close members for clusters " + str(c0.cluster_id) +
2398 " and " + str(c1.cluster_id))
2399 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2402 rmsd, _ = self.
rmsd(metric=metric)
2403 if rmsd < rmsd_cutoff:
2418 a function that returns the permutation best_sel of sels0 that
2421 best_rmsd2 = float(
'inf')
2423 if self.issymmetricsel[sels0[0]]:
2426 for offset
in range(N):
2427 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2430 r = metric(sel0, sel1)
2432 if rmsd2 < best_rmsd2:
2436 for sels
in itertools.permutations(sels0):
2438 for sel0, sel1
in itertools.takewhile(
2439 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2440 r = metric(sel0, sel1)
2442 if rmsd2 < best_rmsd2:
2445 return best_sel, best_rmsd2
2447 def compute_all_pairwise_rmsd(self):
2448 for d0
in self.stath0:
2449 for d1
in self.stath1:
2450 rmsd, _ = self.
rmsd()
2452 def rmsd(self, metric=IMP.atom.get_rmsd):
2454 Computes the RMSD. Resolves ambiguous pairs assignments
2458 n0 = self.stath0.current_index
2459 n1 = self.stath1.current_index
2460 if ((n0, n1)
in self.pairwise_rmsd) \
2461 and ((n0, n1)
in self.pairwise_molecular_assignment):
2462 return (self.pairwise_rmsd[(n0, n1)],
2463 self.pairwise_molecular_assignment[(n0, n1)])
2473 molecular_assignment = {}
2474 for molname, sels0
in self.seldict0.items():
2475 sels_best_order, best_rmsd2 = \
2476 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2478 Ncoords = len(sels_best_order[0].get_selected_particles())
2479 Ncopies = len(self.seldict1[molname])
2480 total_rmsd += Ncoords*best_rmsd2
2481 total_N += Ncoords*Ncopies
2483 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2484 p0 = sel0.get_selected_particles()[0]
2485 p1 = sel1.get_selected_particles()[0]
2490 molecular_assignment[(molname, c0)] = (molname, c1)
2492 total_rmsd = math.sqrt(total_rmsd/total_N)
2494 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2495 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2496 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2497 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2498 return total_rmsd, molecular_assignment
2502 Fix the reference structure for structural alignment, rmsd and
2505 @param reference can be either "Absolute" (cluster center of the
2506 first cluster) or Relative (cluster center of the current
2509 if reference ==
"Absolute":
2511 elif reference ==
"Relative":
2512 if cluster.center_index:
2513 n0 = cluster.center_index
2515 n0 = cluster.members[0]
2520 compute the molecular assignemnts between multiple copies
2521 of the same sequence. It changes the Copy index of Molecules
2524 _, molecular_assignment = self.
rmsd()
2525 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2526 mol0 = self.molcopydict0[m0][c0]
2527 mol1 = self.molcopydict1[m1][c1]
2530 p1.set_value(cik0, c0)
2534 Undo the Copy index assignment
2537 _, molecular_assignment = self.
rmsd()
2538 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2539 mol0 = self.molcopydict0[m0][c0]
2540 mol1 = self.molcopydict1[m1][c1]
2543 p1.set_value(cik0, c1)
2550 s =
"AnalysisReplicaExchange\n"
2551 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2552 s +=
"---- number of models %s \n" % str(len(self.stath0))
2555 def __getitem__(self, int_slice_adaptor):
2556 if isinstance(int_slice_adaptor, int):
2557 return self.clusters[int_slice_adaptor]
2558 elif isinstance(int_slice_adaptor, slice):
2559 return self.__iter__(int_slice_adaptor)
2561 raise TypeError(
"Unknown Type")
2564 return len(self.clusters)
2566 def __iter__(self, slice_key=None):
2567 if slice_key
is None:
2568 for i
in range(len(self)):
2571 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.