1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
19 from operator
import itemgetter
20 from collections
import defaultdict
28 class _RMFRestraints(object):
29 """All restraints that are written out to the RMF file"""
30 def __init__(self, model, user_restraints):
32 self._user_restraints = user_restraints
if user_restraints
else []
35 return (len(self._user_restraints)
36 + self._rmf_rs.get_number_of_restraints())
40 __nonzero__ = __bool__
42 def __getitem__(self, i):
43 class FakePMIWrapper(object):
44 def __init__(self, r):
45 self.r = IMP.RestraintSet.get_from(r)
46 get_restraint =
lambda self: self.r
47 lenuser = len(self._user_restraints)
49 return self._user_restraints[i]
50 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
51 r = self._rmf_rs.get_restraint(i - lenuser)
52 return FakePMIWrapper(r)
54 raise IndexError(
"Out of range")
58 """A macro to help setup and run replica exchange.
59 Supports Monte Carlo and molecular dynamics.
60 Produces trajectory RMF files, best PDB structures,
61 and output stat files.
65 monte_carlo_sample_objects=
None,
66 molecular_dynamics_sample_objects=
None,
68 rmf_output_objects=
None,
69 crosslink_restraints=
None,
70 monte_carlo_temperature=1.0,
71 simulated_annealing=
False,
72 simulated_annealing_minimum_temperature=1.0,
73 simulated_annealing_maximum_temperature=2.5,
74 simulated_annealing_minimum_temperature_nframes=100,
75 simulated_annealing_maximum_temperature_nframes=100,
76 replica_exchange_minimum_temperature=1.0,
77 replica_exchange_maximum_temperature=2.5,
78 replica_exchange_swap=
True,
80 number_of_best_scoring_models=500,
83 molecular_dynamics_steps=10,
84 molecular_dynamics_max_time_step=1.0,
85 number_of_frames=1000,
86 save_coordinates_mode=
"lowest_temperature",
87 nframes_write_coordinates=1,
88 write_initial_rmf=
True,
89 initial_rmf_name_suffix=
"initial",
90 stat_file_name_suffix=
"stat",
91 best_pdb_name_suffix=
"model",
93 do_create_directories=
True,
94 global_output_directory=
"./",
97 replica_stat_file_suffix=
"stat_replica",
98 em_object_for_rmf=
None,
100 replica_exchange_object=
None,
103 @param model The IMP model
104 @param root_hier Top-level (System)hierarchy
105 @param monte_carlo_sample_objects Objects for MC sampling; a list of
106 structural components (generally the representation) that will
107 be moved and restraints with parameters that need to
109 For PMI2: just pass flat list of movers
110 @param molecular_dynamics_sample_objects Objects for MD sampling
111 For PMI2: just pass flat list of particles
112 @param output_objects A list of structural objects and restraints
113 that will be included in output (ie, statistics "stat" files). Any object
114 that provides a get_output() method can be used here. If None is passed
115 the macro will not write stat files.
116 @param rmf_output_objects A list of structural objects and restraints
117 that will be included in rmf. Any object
118 that provides a get_output() method can be used here.
119 @param monte_carlo_temperature MC temp (may need to be optimized
120 based on post-sampling analysis)
121 @param simulated_annealing If True, perform simulated annealing
122 @param simulated_annealing_minimum_temperature Should generally be
123 the same as monte_carlo_temperature.
124 @param simulated_annealing_minimum_temperature_nframes Number of
125 frames to compute at minimum temperature.
126 @param simulated_annealing_maximum_temperature_nframes Number of
128 temps > simulated_annealing_maximum_temperature.
129 @param replica_exchange_minimum_temperature Low temp for REX; should
130 generally be the same as monte_carlo_temperature.
131 @param replica_exchange_maximum_temperature High temp for REX
132 @param replica_exchange_swap Boolean, enable disable temperature
134 @param num_sample_rounds Number of rounds of MC/MD per cycle
135 @param number_of_best_scoring_models Number of top-scoring PDB models
136 to keep around for analysis
137 @param monte_carlo_steps Number of MC steps per round
138 @param self_adaptive self adaptive scheme for monte carlo movers
139 @param molecular_dynamics_steps Number of MD steps per round
140 @param molecular_dynamics_max_time_step Max time step for MD
141 @param number_of_frames Number of REX frames to run
142 @param save_coordinates_mode string: how to save coordinates.
143 "lowest_temperature" (default) only the lowest temperatures is saved
144 "25th_score" all replicas whose score is below the 25th percentile
145 "50th_score" all replicas whose score is below the 50th percentile
146 "75th_score" all replicas whose score is below the 75th percentile
147 @param nframes_write_coordinates How often to write the coordinates
149 @param write_initial_rmf Write the initial configuration
150 @param global_output_directory Folder that will be created to house
152 @param test_mode Set to True to avoid writing any files, just test one frame.
158 self.output_objects = output_objects
159 self.rmf_output_objects=rmf_output_objects
161 and root_hier.get_name() ==
'System'):
162 if self.output_objects
is not None:
164 if self.rmf_output_objects
is not None:
166 self.root_hier = root_hier
167 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
168 self.vars[
"number_of_states"] = len(states)
170 self.root_hiers = states
171 self.is_multi_state =
True
173 self.root_hier = root_hier
174 self.is_multi_state =
False
176 raise TypeError(
"Must provide System hierarchy (root_hier)")
178 if crosslink_restraints:
180 "crosslink_restraints is deprecated and is ignored; "
181 "all cross-link restraints should be automatically "
182 "added to output RMF files")
183 self._rmf_restraints = _RMFRestraints(model,
None)
184 self.em_object_for_rmf = em_object_for_rmf
185 self.monte_carlo_sample_objects = monte_carlo_sample_objects
186 self.vars[
"self_adaptive"]=self_adaptive
187 if sample_objects
is not None:
189 "sample_objects is deprecated; use monte_carlo_sample_objects "
190 "(or molecular_dynamics_sample_objects) instead")
191 self.monte_carlo_sample_objects+=sample_objects
192 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
193 self.replica_exchange_object = replica_exchange_object
194 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
195 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
197 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
199 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
200 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
201 self.vars[
"simulated_annealing"]=\
203 self.vars[
"simulated_annealing_minimum_temperature"]=\
204 simulated_annealing_minimum_temperature
205 self.vars[
"simulated_annealing_maximum_temperature"]=\
206 simulated_annealing_maximum_temperature
207 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
208 simulated_annealing_minimum_temperature_nframes
209 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
210 simulated_annealing_maximum_temperature_nframes
212 self.vars[
"num_sample_rounds"] = num_sample_rounds
214 "number_of_best_scoring_models"] = number_of_best_scoring_models
215 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
216 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
217 self.vars[
"number_of_frames"] = number_of_frames
218 if not save_coordinates_mode
in [
"lowest_temperature",
"25th_score",
"50th_score",
"75th_score"]:
219 raise Exception(
"save_coordinates_mode has unrecognized value")
221 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
222 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
223 self.vars[
"write_initial_rmf"] = write_initial_rmf
224 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
225 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
226 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
227 self.vars[
"do_clean_first"] = do_clean_first
228 self.vars[
"do_create_directories"] = do_create_directories
229 self.vars[
"global_output_directory"] = global_output_directory
230 self.vars[
"rmf_dir"] = rmf_dir
231 self.vars[
"best_pdb_dir"] = best_pdb_dir
232 self.vars[
"atomistic"] = atomistic
233 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
234 self.vars[
"geometries"] =
None
235 self.test_mode = test_mode
238 if self.vars[
"geometries"]
is None:
239 self.vars[
"geometries"] = list(geometries)
241 self.vars[
"geometries"].extend(geometries)
244 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
245 print(
"--- it stores the best scoring pdb models in pdbs/")
246 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
247 print(
"--- variables:")
248 keys = list(self.vars.keys())
251 print(
"------", v.ljust(30), self.vars[v])
253 def get_replica_exchange_object(self):
254 return self.replica_exchange_object
256 def _add_provenance(self, sampler_md, sampler_mc):
257 """Record details about the sampling in the IMP Hierarchies"""
260 method =
"Molecular Dynamics"
261 iterations += self.vars[
"molecular_dynamics_steps"]
263 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
264 iterations += self.vars[
"monte_carlo_steps"]
266 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
268 iterations *= self.vars[
"num_sample_rounds"]
270 pi = self.model.add_particle(
"sampling")
272 self.model, pi, method, self.vars[
"number_of_frames"],
274 p.set_number_of_replicas(
275 self.replica_exchange_object.get_number_of_replicas())
276 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
279 def execute_macro(self):
280 temp_index_factor = 100000.0
284 if self.monte_carlo_sample_objects
is not None:
285 print(
"Setting up MonteCarlo")
287 self.monte_carlo_sample_objects,
288 self.vars[
"monte_carlo_temperature"])
289 if self.vars[
"simulated_annealing"]:
290 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
291 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
292 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
293 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
294 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
295 if self.vars[
"self_adaptive"]:
296 sampler_mc.set_self_adaptive(isselfadaptive=self.vars[
"self_adaptive"])
297 if self.output_objects
is not None:
298 self.output_objects.append(sampler_mc)
299 if self.rmf_output_objects
is not None:
300 self.rmf_output_objects.append(sampler_mc)
301 samplers.append(sampler_mc)
304 if self.molecular_dynamics_sample_objects
is not None:
305 print(
"Setting up MolecularDynamics")
307 self.molecular_dynamics_sample_objects,
308 self.vars[
"monte_carlo_temperature"],
309 maximum_time_step=self.molecular_dynamics_max_time_step)
310 if self.vars[
"simulated_annealing"]:
311 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
312 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
313 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
314 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
315 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
316 if self.output_objects
is not None:
317 self.output_objects.append(sampler_md)
318 if self.rmf_output_objects
is not None:
319 self.rmf_output_objects.append(sampler_md)
320 samplers.append(sampler_md)
323 print(
"Setting up ReplicaExchange")
326 "replica_exchange_minimum_temperature"],
328 "replica_exchange_maximum_temperature"],
330 replica_exchange_object=self.replica_exchange_object)
331 self.replica_exchange_object = rex.rem
333 myindex = rex.get_my_index()
334 if self.output_objects
is not None:
335 self.output_objects.append(rex)
336 if self.rmf_output_objects
is not None:
337 self.rmf_output_objects.append(rex)
341 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
345 globaldir = self.vars[
"global_output_directory"] +
"/"
346 rmf_dir = globaldir + self.vars[
"rmf_dir"]
347 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
349 if not self.test_mode:
350 if self.vars[
"do_clean_first"]:
353 if self.vars[
"do_create_directories"]:
356 os.makedirs(globaldir)
364 if not self.is_multi_state:
370 for n
in range(self.vars[
"number_of_states"]):
372 os.makedirs(pdb_dir +
"/" + str(n))
379 if self.output_objects
is not None:
380 self.output_objects.append(sw)
381 if self.rmf_output_objects
is not None:
382 self.rmf_output_objects.append(sw)
384 print(
"Setting up stat file")
386 low_temp_stat_file = globaldir + \
387 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
390 if not self.test_mode:
393 if not self.test_mode:
394 if self.output_objects
is not None:
395 output.init_stat2(low_temp_stat_file,
397 extralabels=[
"rmf_file",
"rmf_frame_index"])
399 print(
"Stat file writing is disabled")
401 if self.rmf_output_objects
is not None:
402 print(
"Stat info being written in the rmf file")
404 print(
"Setting up replica stat file")
405 replica_stat_file = globaldir + \
406 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
407 if not self.test_mode:
408 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
410 if not self.test_mode:
411 print(
"Setting up best pdb files")
412 if not self.is_multi_state:
413 if self.vars[
"number_of_best_scoring_models"] > 0:
414 output.init_pdb_best_scoring(pdb_dir +
"/" +
415 self.vars[
"best_pdb_name_suffix"],
418 "number_of_best_scoring_models"],
419 replica_exchange=
True)
420 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
421 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
423 if self.vars[
"number_of_best_scoring_models"] > 0:
424 for n
in range(self.vars[
"number_of_states"]):
425 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
426 self.vars[
"best_pdb_name_suffix"],
429 "number_of_best_scoring_models"],
430 replica_exchange=
True)
431 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
432 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
435 if self.em_object_for_rmf
is not None:
436 output_hierarchies = [
438 self.em_object_for_rmf.get_density_as_hierarchy(
441 output_hierarchies = [self.root_hier]
444 if not self.test_mode:
445 print(
"Setting up and writing initial rmf coordinate file")
446 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
447 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
448 output_hierarchies,listofobjects=self.rmf_output_objects)
449 if self._rmf_restraints:
450 output.add_restraints_to_rmf(
451 init_suffix +
"." + str(myindex) +
".rmf3",
452 self._rmf_restraints)
453 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
454 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
458 if not self.test_mode:
459 mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
463 self._add_provenance(sampler_md, sampler_mc)
465 if not self.test_mode:
466 print(
"Setting up production rmf files")
467 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
468 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"],
469 listofobjects = self.rmf_output_objects)
471 if self._rmf_restraints:
472 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
474 ntimes_at_low_temp = 0
478 self.replica_exchange_object.set_was_used(
True)
479 nframes = self.vars[
"number_of_frames"]
482 for i
in range(nframes):
486 for nr
in range(self.vars[
"num_sample_rounds"]):
487 if sampler_md
is not None:
489 self.vars[
"molecular_dynamics_steps"])
490 if sampler_mc
is not None:
491 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
493 self.model).evaluate(
False)
494 mpivs.set_value(
"score",score)
495 output.set_output_entry(
"score", score)
499 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
501 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
502 save_frame=(min_temp_index == my_temp_index)
503 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
504 score_perc=mpivs.get_percentile(
"score")
505 save_frame=(score_perc*100.0<=25.0)
506 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
507 score_perc=mpivs.get_percentile(
"score")
508 save_frame=(score_perc*100.0<=50.0)
509 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
510 score_perc=mpivs.get_percentile(
"score")
511 save_frame=(score_perc*100.0<=75.0)
514 if save_frame
and not self.test_mode:
518 print(
"--- frame %s score %s " % (str(i), str(score)))
520 if not self.test_mode:
521 if i % self.vars[
"nframes_write_coordinates"]==0:
522 print(
'--- writing coordinates')
523 if self.vars[
"number_of_best_scoring_models"] > 0:
524 output.write_pdb_best_scoring(score)
525 output.write_rmf(rmfname)
526 output.set_output_entry(
"rmf_file", rmfname)
527 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
529 output.set_output_entry(
"rmf_file", rmfname)
530 output.set_output_entry(
"rmf_frame_index",
'-1')
531 if self.output_objects
is not None:
532 output.write_stat2(low_temp_stat_file)
533 ntimes_at_low_temp += 1
535 if not self.test_mode:
536 output.write_stat2(replica_stat_file)
537 if self.vars[
"replica_exchange_swap"]:
538 rex.swap_temp(i, score)
539 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
540 p.add_replica_exchange(state, self)
542 if not self.test_mode:
543 print(
"closing production rmf files")
544 output.close_rmf(rmfname)
550 """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
551 Easily create multi-state systems by calling this macro
552 repeatedly with different TopologyReader objects!
553 A useful function is get_molecules() which returns the PMI Molecules grouped by state
554 as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
555 Quick multi-state system:
558 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
559 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
560 bs = IMP.pmi.macros.BuildSystem(model)
561 bs.add_state(reader1)
562 bs.add_state(reader2)
563 bs.execute_macro() # build everything including degrees of freedom
564 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
565 ### now you have a two state system, you add restraints etc
567 @note The "domain name" entry of the topology reader is not used.
568 All molecules are set up by the component name, but split into rigid bodies
572 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
573 'RNA': IMP.pmi.alphabets.rna}
577 sequence_connectivity_scale=4.0,
578 force_create_gmm_files=
False,
581 @param model An IMP Model
582 @param sequence_connectivity_scale For scaling the connectivity restraint
583 @param force_create_gmm_files If True, will sample and create GMMs
584 no matter what. If False, will only sample if the
585 files don't exist. If number of Gaussians is zero, won't
587 @param resolutions The resolutions to build for structured regions
592 self._domain_res = []
594 self.force_create_gmm_files = force_create_gmm_files
595 self.resolutions = resolutions
599 keep_chain_id=
False, fasta_name_map=
None):
600 """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
601 When you are done adding states, call execute_macro()
602 @param reader The TopologyReader object
603 @param fasta_name_map dictionary for converting protein names found in the fasta file
605 state = self.system.create_state()
606 self._readers.append(reader)
607 these_domain_res = {}
609 chain_ids = string.ascii_uppercase+string.ascii_lowercase+
'0123456789'
614 for molname
in reader.get_molecules():
615 copies = reader.get_molecules()[molname].domains
616 for nc,copyname
in enumerate(copies):
617 print(
"BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
618 copy = copies[copyname]
620 if keep_chain_id ==
True:
621 chain_id = copy[0].chain
623 chain_id = chain_ids[numchain]
625 alphabet = IMP.pmi.alphabets.amino_acid
626 fasta_flag=copy[0].fasta_flag
627 if fasta_flag
in self._alphabets:
628 alphabet = self._alphabets[fasta_flag]
630 print(
"BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
631 orig_mol = state.create_molecule(molname, seq, chain_id,
636 print(
"BuildSystem.add_state: creating a copy for molecule %s" % molname)
637 mol = orig_mol.create_copy(chain_id)
640 for domainnumber,domain
in enumerate(copy):
641 print(
"BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
644 these_domains[domain.get_unique_name()] = domain
645 if domain.residue_range==[]
or domain.residue_range
is None:
646 domain_res = mol.get_residues()
648 start = domain.residue_range[0]+domain.pdb_offset
649 if domain.residue_range[1]==
'END':
650 end = len(mol.sequence)
652 end = domain.residue_range[1]+domain.pdb_offset
653 domain_res = mol.residue_range(start-1,end-1)
654 print(
"BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
655 if domain.pdb_file==
"BEADS":
656 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
657 mol.add_representation(
659 resolutions=[domain.bead_size],
660 setup_particles_as_densities=(
661 domain.em_residues_per_gaussian!=0),
662 color = domain.color)
663 these_domain_res[domain.get_unique_name()] = (set(),domain_res)
664 elif domain.pdb_file==
"IDEAL_HELIX":
665 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
666 mol.add_representation(
668 resolutions=self.resolutions,
670 density_residues_per_component=domain.em_residues_per_gaussian,
671 density_prefix=domain.density_prefix,
672 density_force_compute=self.force_create_gmm_files,
673 color = domain.color)
674 these_domain_res[domain.get_unique_name()] = (domain_res,set())
676 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
677 domain_atomic = mol.add_structure(domain.pdb_file,
679 domain.residue_range,
682 domain_non_atomic = domain_res - domain_atomic
683 if not domain.em_residues_per_gaussian:
684 mol.add_representation(domain_atomic,
685 resolutions=self.resolutions,
686 color = domain.color)
687 if len(domain_non_atomic)>0:
688 mol.add_representation(domain_non_atomic,
689 resolutions=[domain.bead_size],
690 color = domain.color)
692 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
693 mol.add_representation(
695 resolutions=self.resolutions,
696 density_residues_per_component=domain.em_residues_per_gaussian,
697 density_prefix=domain.density_prefix,
698 density_force_compute=self.force_create_gmm_files,
699 color = domain.color)
700 if len(domain_non_atomic)>0:
701 mol.add_representation(domain_non_atomic,
702 resolutions=[domain.bead_size],
703 setup_particles_as_densities=
True,
704 color = domain.color)
705 these_domain_res[domain.get_unique_name()] = (
706 domain_atomic,domain_non_atomic)
707 self._domain_res.append(these_domain_res)
708 self._domains.append(these_domains)
709 print(
'BuildSystem.add_state: State',len(self.system.states),
'added')
712 """Return list of all molecules grouped by state.
713 For each state, it's a dictionary of Molecules where key is the molecule name
715 return [s.get_molecules()
for s
in self.system.get_states()]
717 def get_molecule(self,molname,copy_index=0,state_index=0):
718 return self.system.get_states()[state_index].
get_molecules()[molname][copy_index]
720 def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
721 """Builds representations and sets up degrees of freedom"""
722 print(
"BuildSystem.execute_macro: building representations")
723 self.root_hier = self.system.build()
725 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
727 for nstate,reader
in enumerate(self._readers):
728 rbs = reader.get_rigid_bodies()
729 srbs = reader.get_super_rigid_bodies()
730 csrbs = reader.get_chains_of_super_rigid_bodies()
733 domains_in_rbs = set()
735 print(
"BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
736 all_res = IMP.pmi.tools.OrderedSet()
737 bead_res = IMP.pmi.tools.OrderedSet()
739 domain = self._domains[nstate][dname]
740 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
741 all_res|=self._domain_res[nstate][dname][0]
742 bead_res|=self._domain_res[nstate][dname][1]
743 domains_in_rbs.add(dname)
745 print(
"BuildSystem.execute_macro: -------- \
746 creating rigid body with max_trans %s \
747 max_rot %s non_rigid_max_trans %s" \
748 % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
749 self.dof.create_rigid_body(all_res,
750 nonrigid_parts=bead_res,
751 max_trans=max_rb_trans,
753 nonrigid_max_trans=max_bead_trans)
756 for dname, domain
in self._domains[nstate].items():
757 if dname
not in domains_in_rbs:
758 if domain.pdb_file !=
"BEADS":
760 "Making %s flexible. This may distort the "
761 "structure; consider making it rigid" % dname,
763 self.dof.create_flexible_beads(
764 self._domain_res[nstate][dname][1],
765 max_trans=max_bead_trans)
769 print(
"BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
770 all_res = IMP.pmi.tools.OrderedSet()
771 for dname
in srblist:
772 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
773 all_res|=self._domain_res[nstate][dname][0]
774 all_res|=self._domain_res[nstate][dname][1]
776 print(
"BuildSystem.execute_macro: -------- \
777 creating super rigid body with max_trans %s max_rot %s " \
778 % (str(max_srb_trans),str(max_srb_rot)))
779 self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
782 for csrblist
in csrbs:
783 all_res = IMP.pmi.tools.OrderedSet()
784 for dname
in csrblist:
785 all_res|=self._domain_res[nstate][dname][0]
786 all_res|=self._domain_res[nstate][dname][1]
787 all_res = list(all_res)
788 all_res.sort(key=
lambda r:r.get_index())
790 self.dof.create_main_chain_mover(all_res)
791 return self.root_hier,self.dof
796 """A macro for running all the basic operations of analysis.
797 Includes clustering, precision analysis, and making ensemble density maps.
798 A number of plots are also supported.
801 merge_directories=[
"./"],
802 stat_file_name_suffix=
"stat",
803 best_pdb_name_suffix=
"model",
805 do_create_directories=
True,
806 global_output_directory=
"output/",
807 replica_stat_file_suffix=
"stat_replica",
808 global_analysis_result_directory=
"./analysis/",
811 @param model The IMP model
812 @param stat_file_name_suffix
813 @param merge_directories The directories containing output files
814 @param best_pdb_name_suffix
815 @param do_clean_first
816 @param do_create_directories
817 @param global_output_directory Where everything is
818 @param replica_stat_file_suffix
819 @param global_analysis_result_directory
820 @param test_mode If True, nothing is changed on disk
824 from mpi4py
import MPI
825 self.comm = MPI.COMM_WORLD
826 self.rank = self.comm.Get_rank()
827 self.number_of_processes = self.comm.size
830 self.number_of_processes = 1
832 self.test_mode = test_mode
833 self._protocol_output = []
834 self.cluster_obj =
None
836 stat_dir = global_output_directory
839 for rd
in merge_directories:
840 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
841 if len(stat_files)==0:
842 warnings.warn(
"no stat files found in %s"
843 % os.path.join(rd, stat_dir),
845 self.stat_files += stat_files
848 """Capture details of the modeling protocol.
849 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
852 self._protocol_output.append((p, p._last_state))
855 score_key=
"SimplifiedModel_Total_Score_None",
856 rmf_file_key=
"rmf_file",
857 rmf_file_frame_key=
"rmf_frame_index",
860 nframes_trajectory=10000):
861 """ Get a trajectory of the modeling run, for generating demonstrative movies
862 @param score_key The score for ranking models
863 @param rmf_file_key Key pointing to RMF filename
864 @param rmf_file_frame_key Key pointing to RMF frame number
865 @param outputdir The local output directory used in the run
866 @param get_every Extract every nth frame
867 @param nframes_trajectory Total number of frames of the trajectory
869 from operator
import itemgetter
877 rmf_file_list=trajectory_models[0]
878 rmf_file_frame_list=trajectory_models[1]
879 score_list=list(map(float, trajectory_models[2]))
881 max_score=max(score_list)
882 min_score=min(score_list)
885 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
886 binned_scores=[
None]*nframes_trajectory
887 binned_model_indexes=[-1]*nframes_trajectory
889 for model_index,s
in enumerate(score_list):
890 bins_score_diffs=[abs(s-b)
for b
in bins]
891 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
892 if binned_scores[bin_index]==
None:
893 binned_scores[bin_index]=s
894 binned_model_indexes[bin_index]=model_index
896 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
897 new_diff=abs(s-bins[bin_index])
898 if new_diff < old_diff:
899 binned_scores[bin_index]=s
900 binned_model_indexes[bin_index]=model_index
903 print(binned_model_indexes)
906 def _expand_ambiguity(self,prot,d):
907 """If using PMI2, expand the dictionary to include copies as ambiguous options
908 This also keeps the states separate.
913 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
916 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
917 if isinstance(val, tuple):
925 for nst
in range(len(states)):
927 copies = sel.get_selected_particles(with_representation=
False)
929 for nc
in range(len(copies)):
931 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
933 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
940 score_key=
"SimplifiedModel_Total_Score_None",
941 rmf_file_key=
"rmf_file",
942 rmf_file_frame_key=
"rmf_frame_index",
947 alignment_components=
None,
948 number_of_best_scoring_models=10,
949 rmsd_calculation_components=
None,
950 distance_matrix_file=
'distances.mat',
951 load_distance_matrix_file=
False,
952 skip_clustering=
False,
953 number_of_clusters=1,
955 exit_after_display=
True,
957 first_and_last_frames=
None,
958 density_custom_ranges=
None,
959 write_pdb_with_centered_coordinates=
False,
961 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
962 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
963 Can pass None for copy or state to ignore that field.
964 If you don't pass a specific copy number
965 @param score_key The score for ranking models. Use "Total_Score"
966 instead of default for PMI2.
967 @param rmf_file_key Key pointing to RMF filename
968 @param rmf_file_frame_key Key pointing to RMF frame number
969 @param state_number State number to analyze
970 @param prefiltervalue Only include frames where the
971 score key is below this value
972 @param feature_keys Keywords for which you want to
973 calculate average, medians, etc.
974 If you pass "Keyname" it'll include everything that matches "*Keyname*"
975 @param outputdir The local output directory used in the run
976 @param alignment_components Dictionary with keys=groupname, values are tuples
977 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
978 @param number_of_best_scoring_models Num models to keep per run
979 @param rmsd_calculation_components For calculating RMSD
980 (same format as alignment_components)
981 @param distance_matrix_file Where to store/read the distance matrix
982 @param load_distance_matrix_file Try to load the distance matrix file
983 @param skip_clustering Just extract the best scoring models
985 @param number_of_clusters Number of k-means clusters
986 @param display_plot Display the distance matrix
987 @param exit_after_display Exit after displaying distance matrix
988 @param get_every Extract every nth frame
989 @param first_and_last_frames A tuple with the first and last frames to be
990 analyzed. Values are percentages!
991 Default: get all frames
992 @param density_custom_ranges For density calculation
993 (same format as alignment_components)
994 @param write_pdb_with_centered_coordinates
995 @param voxel_size Used for the density output
999 self._outputdir = os.path.abspath(outputdir)
1000 self._number_of_clusters = number_of_clusters
1001 for p, state
in self._protocol_output:
1002 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1013 if not load_distance_matrix_file:
1014 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1015 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1016 self.stat_files,self.number_of_processes)[self.rank]
1020 orig_score_key = score_key
1021 if score_key
not in po.get_keys():
1022 if 'Total_Score' in po.get_keys():
1023 score_key =
'Total_Score'
1025 "Using 'Total_Score' instead of "
1026 "'SimplifiedModel_Total_Score_None' for the score key",
1028 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1029 if k
in feature_keys:
1031 "no need to pass " + k +
" to feature_keys.",
1033 feature_keys.remove(k)
1041 get_every, provenance=prov)
1042 rmf_file_list=best_models[0]
1043 rmf_file_frame_list=best_models[1]
1044 score_list=best_models[2]
1045 feature_keyword_list_dict=best_models[3]
1051 if self.number_of_processes > 1:
1055 rmf_file_frame_list)
1056 for k
in feature_keyword_list_dict:
1058 feature_keyword_list_dict[k])
1061 score_rmf_tuples = list(zip(score_list,
1063 rmf_file_frame_list,
1064 list(range(len(score_list)))))
1066 if density_custom_ranges:
1067 for k
in density_custom_ranges:
1068 if not isinstance(density_custom_ranges[k], list):
1069 raise Exception(
"Density custom ranges: values must be lists of tuples")
1072 if first_and_last_frames
is not None:
1073 nframes = len(score_rmf_tuples)
1074 first_frame = int(first_and_last_frames[0] * nframes)
1075 last_frame = int(first_and_last_frames[1] * nframes)
1076 if last_frame > len(score_rmf_tuples):
1078 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1081 best_score_rmf_tuples = sorted(score_rmf_tuples,
1082 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1083 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1085 prov.append(IMP.pmi.io.FilterProvenance(
"Best scoring",
1086 0, number_of_best_scoring_models))
1088 best_score_feature_keyword_list_dict = defaultdict(list)
1089 for tpl
in best_score_rmf_tuples:
1091 for f
in feature_keyword_list_dict:
1092 best_score_feature_keyword_list_dict[f].append(
1093 feature_keyword_list_dict[f][index])
1094 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1095 best_score_rmf_tuples,
1096 self.number_of_processes)[self.rank]
1099 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1101 my_best_score_rmf_tuples[0][1])[0]
1103 if rmsd_calculation_components
is not None:
1104 tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1105 if tmp!=rmsd_calculation_components:
1106 print(
'Detected ambiguity, expand rmsd components to',tmp)
1107 rmsd_calculation_components = tmp
1108 if alignment_components
is not None:
1109 tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1110 if tmp!=alignment_components:
1111 print(
'Detected ambiguity, expand alignment components to',tmp)
1112 alignment_components = tmp
1118 my_best_score_rmf_tuples[0],
1119 rmsd_calculation_components,
1120 state_number=state_number)
1122 my_best_score_rmf_tuples,
1123 alignment_components,
1124 rmsd_calculation_components,
1125 state_number=state_number)
1130 all_coordinates=got_coords[0]
1131 alignment_coordinates=got_coords[1]
1132 rmsd_coordinates=got_coords[2]
1133 rmf_file_name_index_dict=got_coords[3]
1134 all_rmf_file_names=got_coords[4]
1140 if density_custom_ranges:
1142 density_custom_ranges,
1145 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1151 os.mkdir(dircluster)
1154 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1155 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1157 rmf_frame_number=tpl[2]
1160 for key
in best_score_feature_keyword_list_dict:
1161 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1164 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1169 linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1175 if not linking_successful:
1182 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1183 prot = states[state_number]
1185 prot = prots[state_number]
1190 coords_f1=alignment_coordinates[cnt]
1192 coords_f2=alignment_coordinates[cnt]
1195 transformation = Ali.align()[1]
1217 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1218 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1219 o.init_pdb(out_pdb_fn,prot)
1220 o.write_pdb(out_pdb_fn,
1221 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1223 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1224 tmp_dict[
"rmf_file_full_path"]=rmf_name
1225 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1226 tmp_dict[
"local_rmf_frame_number"]=0
1228 clusstat.write(str(tmp_dict)+
"\n")
1233 h.set_name(
"System")
1235 o.init_rmf(out_rmf_fn, [h], rs)
1237 o.init_rmf(out_rmf_fn, [prot],rs)
1239 o.write_rmf(out_rmf_fn)
1240 o.close_rmf(out_rmf_fn)
1242 if density_custom_ranges:
1243 DensModule.add_subunits_density(prot)
1245 if density_custom_ranges:
1246 DensModule.write_mrc(path=dircluster)
1253 if self.number_of_processes > 1:
1259 rmf_file_name_index_dict)
1261 alignment_coordinates)
1268 [best_score_feature_keyword_list_dict,
1269 rmf_file_name_index_dict],
1275 print(
"setup clustering class")
1278 for n, model_coordinate_dict
in enumerate(all_coordinates):
1279 template_coordinate_dict = {}
1281 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1283 self.cluster_obj.set_template(alignment_coordinates[n])
1284 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1285 print(
"Global calculating the distance matrix")
1288 self.cluster_obj.dist_matrix()
1292 self.cluster_obj.do_cluster(number_of_clusters)
1295 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1296 if exit_after_display:
1298 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1305 print(
"setup clustering class")
1307 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1308 print(
"clustering with %s clusters" % str(number_of_clusters))
1309 self.cluster_obj.do_cluster(number_of_clusters)
1310 [best_score_feature_keyword_list_dict,
1311 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1314 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1315 if exit_after_display:
1317 if self.number_of_processes > 1:
1325 print(self.cluster_obj.get_cluster_labels())
1326 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1327 print(
"rank %s " % str(self.rank))
1328 print(
"cluster %s " % str(n))
1329 print(
"cluster label %s " % str(cl))
1330 print(self.cluster_obj.get_cluster_label_names(cl))
1331 cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1332 cluster_prov = prov + \
1333 [IMP.pmi.io.ClusterProvenance(cluster_size)]
1336 if density_custom_ranges:
1338 density_custom_ranges,
1341 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1343 os.mkdir(dircluster)
1347 rmsd_dict = {
"AVERAGE_RMSD":
1348 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1349 clusstat = open(dircluster +
"stat.out",
"w")
1350 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1353 tmp_dict.update(rmsd_dict)
1354 index = rmf_file_name_index_dict[structure_name]
1355 for key
in best_score_feature_keyword_list_dict:
1357 key] = best_score_feature_keyword_list_dict[
1363 rmf_name = structure_name.split(
"|")[0]
1364 rmf_frame_number = int(structure_name.split(
"|")[1])
1365 clusstat.write(str(tmp_dict) +
"\n")
1369 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1374 linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1380 if not linking_successful:
1386 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1387 prot = states[state_number]
1389 prot = prots[state_number]
1395 model_index = self.cluster_obj.get_model_index_from_name(
1397 transformation = self.cluster_obj.get_transformation_to_first_member(
1417 if density_custom_ranges:
1418 DensModule.add_subunits_density(prot)
1423 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1424 o.write_pdb(dircluster + str(k) +
".pdb")
1429 h.set_name(
"System")
1431 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1433 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1434 o.write_rmf(dircluster + str(k) +
".rmf3")
1435 o.close_rmf(dircluster + str(k) +
".rmf3")
1440 if density_custom_ranges:
1441 DensModule.write_mrc(path=dircluster)
1444 if self.number_of_processes>1:
1447 def get_cluster_rmsd(self,cluster_num):
1448 if self.cluster_obj
is None:
1450 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1452 def save_objects(self, objects, file_name):
1454 outf = open(file_name,
'wb')
1455 pickle.dump(objects, outf)
1458 def load_objects(self, file_name):
1460 inputf = open(file_name,
'rb')
1461 objects = pickle.load(inputf)
1468 This class contains analysis utilities to investigate ReplicaExchange results.
1481 Construction of the Class.
1482 @param model IMP.Model()
1483 @param stat_files list of string. Can be ascii stat files, rmf files names
1484 @param best_models Integer. Number of best scoring models, if None: all models will be read
1485 @param alignment boolean (Default=True). Align before computing the rmsd.
1489 self.best_models=best_models
1502 self.clusters.append(c)
1503 for n0
in range(len(self.stath0)):
1505 self.pairwise_rmsd={}
1506 self.pairwise_molecular_assignment={}
1507 self.alignment=alignment
1508 self.symmetric_molecules={}
1509 self.issymmetricsel={}
1511 self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath0))
1512 self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath1))
1516 Setup the selection onto which the rmsd is computed
1517 @param kwargs use IMP.atom.Selection keywords
1525 Store names of symmetric molecules
1527 self.symmetric_molecules[molecule_name]=0
1532 Setup the selection onto which the alignment is computed
1533 @param kwargs use IMP.atom.Selection keywords
1541 def clean_clusters(self):
1542 for c
in self.clusters: del c
1546 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1548 Cluster the models based on RMSD.
1549 @param rmsd_cutoff Float the distance cutoff in Angstrom
1550 @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1552 self.clean_clusters()
1553 not_clustered=set(range(len(self.stath1)))
1554 while len(not_clustered)>0:
1555 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1561 Refine the clusters by merging the ones whose centers are close
1562 @param rmsd_cutoff cutoff distance in Angstorms
1565 clusters_copy=self.clusters
1566 for c0,c1
in itertools.combinations(self.clusters,2):
1567 if c0.center_index
is None:
1569 if c1.center_index
is None:
1571 d0=self.stath0[c0.center_index]
1572 d1=self.stath1[c1.center_index]
1573 rmsd, molecular_assignment = self.
rmsd()
1574 if rmsd <= rmsd_cutoff:
1575 if c1
in self.clusters:
1576 clusters_copy.remove(c1)
1578 self.clusters=clusters_copy
1585 def set_cluster_assignments(self, cluster_ids):
1586 if len(cluster_ids)!=len(self.stath0):
1587 raise ValueError(
'cluster ids has to be same length as number of frames')
1590 for i
in sorted(list(set(cluster_ids))):
1592 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1593 self.clusters[idx].add_member(i,d)
1597 Return the model data from a cluster
1598 @param cluster IMP.pmi.output.Cluster object
1607 Save the data for the whole models into a pickle file
1608 @param filename string
1610 self.stath0.save_data(filename)
1614 Set the data from an external IMP.pmi.output.Data
1615 @param data IMP.pmi.output.Data
1617 self.stath0.data=data
1618 self.stath1.data=data
1622 Load the data from an external pickled file
1623 @param filename string
1625 self.stath0.load_data(filename)
1626 self.stath1.load_data(filename)
1627 self.best_models=len(self.stath0)
1629 def add_cluster(self,rmf_name_list):
1631 print(
"creating cluster index "+str(len(self.clusters)))
1632 self.clusters.append(c)
1633 current_len=len(self.stath0)
1635 for rmf
in rmf_name_list:
1636 print(
"adding rmf "+rmf)
1637 self.stath0.add_stat_file(rmf)
1638 self.stath1.add_stat_file(rmf)
1640 for n0
in range(current_len,len(self.stath0)):
1647 Save the clusters into a pickle file
1648 @param filename string
1651 import cPickle
as pickle
1654 fl=open(filename,
'wb')
1655 pickle.dump(self.clusters,fl)
1659 Load the clusters from a pickle file
1660 @param filename string
1661 @param append bool (Default=False), if True. append the clusters to the ones currently present
1664 import cPickle
as pickle
1667 fl=open(filename,
'rb')
1668 self.clean_clusters()
1670 self.clusters+=pickle.load(fl)
1672 self.clusters=pickle.load(fl)
1681 Compute the cluster center for a given cluster
1683 member_distance=defaultdict(float)
1685 for n0,n1
in itertools.combinations(cluster.members,2):
1688 rmsd, _ = self.
rmsd()
1689 member_distance[n0]+=rmsd
1691 if len(member_distance)>0:
1692 cluster.center_index=min(member_distance, key=member_distance.get)
1694 cluster.center_index=cluster.members[0]
1698 Save the coordinates of the current cluster a single rmf file
1700 print(
"saving coordinates",cluster)
1703 if rmf_name
is None:
1704 rmf_name=prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1706 d1=self.stath1[cluster.members[0]]
1708 o.init_rmf(rmf_name, [self.stath1])
1709 for n1
in cluster.members:
1713 if self.alignment: self.align()
1714 o.write_rmf(rmf_name)
1716 o.close_rmf(rmf_name)
1720 remove structures that are similar
1721 append it to a new cluster
1723 print(
"pruning models")
1726 remaining=range(1,len(self.stath1),10)
1728 while len(remaining)>0:
1729 d0=self.stath0[selected]
1731 for n1
in remaining:
1733 if self.alignment: self.align()
1737 print(
"pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
1738 remaining=[x
for x
in remaining
if x
not in rm]
1739 if len(remaining)==0:
break
1740 selected=remaining[0]
1741 filtered.append(selected)
1744 self.clusters.append(c)
1754 Compute the precision of a cluster
1760 if not cluster.center_index
is None:
1761 members1=[cluster.center_index]
1763 members1=cluster.members
1767 for n1
in cluster.members:
1772 tmp_rmsd, _ = self.
rmsd()
1777 precision=rmsd/npairs
1778 cluster.precision=precision
1783 Compute the bipartite precision (ie the cross-precision)
1784 between two clusters
1788 for cn0,n0
in enumerate(cluster1.members):
1790 for cn1,n1
in enumerate(cluster2.members):
1792 tmp_rmsd, _ =self.
rmsd()
1793 if verbose: print(
"--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
1796 precision=rmsd/npairs
1799 def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
1801 Compute the Root mean square fluctuations
1802 of a molecule in a cluster
1803 Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
1805 rmsf=IMP.pmi.tools.OrderedDict()
1808 if cluster_ref
is not None:
1809 if not cluster_ref.center_index
is None:
1810 members0 = [cluster_ref.center_index]
1812 members0 = cluster_ref.members
1814 if not cluster.center_index
is None:
1815 members0 = [cluster.center_index]
1817 members0 = cluster.members
1820 copy_index=copy_index,state_index=state_index)
1821 ps0=s0.get_selected_particles()
1835 for n1
in cluster.members[::step]:
1837 print(
"--- rmsf %s %s"%(str(n0),str(n1)))
1840 s1=
IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
1841 copy_index=copy_index,state_index=state_index)
1842 ps1 = s1.get_selected_particles()
1845 if self.alignment: self.align()
1846 for n,(p0,p1)
in enumerate(zip(ps0,ps1)):
1847 r=residue_indexes[n]
1859 for stath
in [self.stath0,self.stath1]:
1860 if molecule
not in self.symmetric_molecules:
1862 copy_index=copy_index,state_index=state_index)
1865 state_index=state_index)
1867 ps = s.get_selected_particles()
1876 def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
1881 for n1
in cluster.members[::step]:
1882 print(
"density "+str(n1))
1885 if self.alignment: self.align()
1886 dens.add_subunits_density(self.stath1)
1888 dens.write_mrc(path=prefix+
'/',suffix=str(cluster.cluster_id))
1891 def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
1894 import matplotlib.pyplot
as plt
1895 import matplotlib.cm
as cm
1896 from scipy.spatial.distance
import cdist
1898 if molecules
is None:
1902 unique_copies=[mol
for mol
in mols
if mol.get_copy_index() == 0]
1903 mol_names_unique=dict((mol.get_name(),mol)
for mol
in unique_copies)
1904 total_len_unique=sum(max(mol.get_residue_indexes())
for mol
in unique_copies)
1913 seqlen=max(mol.get_residue_indexes())
1914 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1918 for mol
in unique_copies:
1919 seqlen=max(mol.get_residue_indexes())
1920 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
1924 for ncl,n1
in enumerate(cluster.members):
1928 coord_dict=IMP.pmi.tools.OrderedDict()
1930 mol_name=mol.get_name()
1931 copy_index=mol.get_copy_index()
1932 rindexes = mol.get_residue_indexes()
1933 coords = np.ones((max(rindexes),3))
1934 for rnum
in rindexes:
1936 selpart = sel.get_selected_particles()
1937 if len(selpart) == 0:
continue
1938 selpart = selpart[0]
1939 coords[rnum - 1, :] =
IMP.core.XYZ(selpart).get_coordinates()
1940 coord_dict[mol]=coords
1943 coords=np.concatenate(list(coord_dict.values()))
1944 dists = cdist(coords, coords)
1945 binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1947 binary_dists_dict={}
1949 len1 = max(mol1.get_residue_indexes())
1951 name1=mol1.get_name()
1952 name2=mol2.get_name()
1953 dists = cdist(coord_dict[mol1], coord_dict[mol2])
1954 if (name1, name2)
not in binary_dists_dict:
1955 binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
1956 binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
1957 binary_dists=np.zeros((total_len_unique,total_len_unique))
1959 for name1,name2
in binary_dists_dict:
1960 r1=index_dict[mol_names_unique[name1]]
1961 r2=index_dict[mol_names_unique[name2]]
1962 binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
1967 contact_freqs = binary_dists
1969 dist_maps.append(dists)
1970 av_dist_map += dists
1971 contact_freqs += binary_dists
1976 contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
1978 contact_freqs =1.0/len(cluster)*contact_freqs
1979 av_dist_map=1.0/len(cluster)*contact_freqs
1981 fig = plt.figure(figsize=(100, 100))
1982 ax = fig.add_subplot(111)
1985 gap_between_components=50
1997 prot_list=list(zip(*sorted_tuple))[1]
2000 prot_list=list(zip(*sorted_tuple))[1]
2002 prot_listx = prot_list
2003 nresx = gap_between_components + \
2004 sum([max(mol.get_residue_indexes())
2005 + gap_between_components
for mol
in prot_listx])
2008 prot_listy = prot_list
2009 nresy = gap_between_components + \
2010 sum([max(mol.get_residue_indexes())
2011 + gap_between_components
for mol
in prot_listy])
2016 res = gap_between_components
2017 for mol
in prot_listx:
2018 resoffsetx[mol] = res
2019 res += max(mol.get_residue_indexes())
2021 res += gap_between_components
2025 res = gap_between_components
2026 for mol
in prot_listy:
2027 resoffsety[mol] = res
2028 res += max(mol.get_residue_indexes())
2030 res += gap_between_components
2032 resoffsetdiagonal = {}
2033 res = gap_between_components
2034 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2035 resoffsetdiagonal[mol] = res
2036 res += max(mol.get_residue_indexes())
2037 res += gap_between_components
2042 for n, prot
in enumerate(prot_listx):
2043 res = resoffsetx[prot]
2045 for proty
in prot_listy:
2046 resy = resoffsety[proty]
2047 endy = resendy[proty]
2048 ax.plot([res, res], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2049 ax.plot([end, end], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2050 xticks.append((float(res) + float(end)) / 2)
2055 for n, prot
in enumerate(prot_listy):
2056 res = resoffsety[prot]
2058 for protx
in prot_listx:
2059 resx = resoffsetx[protx]
2060 endx = resendx[protx]
2061 ax.plot([resx, endx], [res, res], linestyle=
'-',color=
'gray', lw=0.4)
2062 ax.plot([resx, endx], [end, end], linestyle=
'-',color=
'gray', lw=0.4)
2063 yticks.append((float(res) + float(end)) / 2)
2068 tmp_array = np.zeros((nresx, nresy))
2070 for px
in prot_listx:
2071 for py
in prot_listy:
2072 resx = resoffsetx[px]
2073 lengx = resendx[px] - 1
2074 resy = resoffsety[py]
2075 lengy = resendy[py] - 1
2076 indexes_x = index_dict[px]
2077 minx = min(indexes_x)
2078 maxx = max(indexes_x)
2079 indexes_y = index_dict[py]
2080 miny = min(indexes_y)
2081 maxy = max(indexes_y)
2082 tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2083 ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2085 cax = ax.imshow(tmp_array,
2090 interpolation=
'nearest')
2092 ax.set_xticks(xticks)
2093 ax.set_xticklabels(xlabels, rotation=90)
2094 ax.set_yticks(yticks)
2095 ax.set_yticklabels(ylabels)
2096 plt.setp(ax.get_xticklabels(), fontsize=6)
2097 plt.setp(ax.get_yticklabels(), fontsize=6)
2100 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2101 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2106 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf", dpi=300,transparent=
"False")
2110 def plot_rmsd_matrix(self,filename):
2112 self.compute_all_pairwise_rmsd()
2113 distance_matrix = np.zeros(
2114 (len(self.stath0), len(self.stath1)))
2115 for (n0,n1)
in self.pairwise_rmsd:
2116 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2118 import matplotlib
as mpl
2120 import matplotlib.pylab
as pl
2121 from scipy.cluster
import hierarchy
as hrc
2123 fig = pl.figure(figsize=(10,8))
2124 ax = fig.add_subplot(212)
2125 dendrogram = hrc.dendrogram(
2126 hrc.linkage(distance_matrix),
2129 leaves_order = dendrogram[
'leaves']
2130 ax.set_xlabel(
'Model')
2131 ax.set_ylabel(
'RMSD [Angstroms]')
2133 ax2 = fig.add_subplot(221)
2135 distance_matrix[leaves_order,
2138 interpolation=
'nearest')
2139 cb = fig.colorbar(cax)
2140 cb.set_label(
'RMSD [Angstroms]')
2141 ax2.set_xlabel(
'Model')
2142 ax2.set_ylabel(
'Model')
2144 pl.savefig(filename, dpi=300)
2153 Update the cluster id numbers
2155 for n,c
in enumerate(self.clusters):
2158 def get_molecule(self, hier, name, copy):
2166 self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2167 self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2168 for mol
in self.seldict0:
2169 for sel
in self.seldict0[mol]:
2170 self.issymmetricsel[sel]=
False
2171 for mol
in self.symmetric_molecules:
2172 self.symmetric_molecules[mol]=len(self.seldict0[mol])
2173 for sel
in self.seldict0[mol]:
2174 self.issymmetricsel[sel]=
True
2180 for rb
in self.rbs1:
2183 for bead
in self.beads1:
2192 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2194 initial filling of the clusters.
2197 print(
"clustering model "+str(n0))
2198 d0 = self.stath0[n0]
2200 print(
"creating cluster index "+str(len(self.clusters)))
2201 self.clusters.append(c)
2203 clustered = set([n0])
2205 print(
"--- trying to add model "+str(n1)+
" to cluster "+str(len(self.clusters)))
2206 d1 = self.stath1[n1]
2207 if self.alignment: self.align()
2208 rmsd, _ = self.
rmsd(metric=metric)
2209 if rmsd<rmsd_cutoff:
2210 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2214 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2219 merge the clusters that have close members
2220 @param rmsd_cutoff cutoff distance in Angstorms
2226 for c0, c1
in filter(
lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2227 n0, n1 = [c.members[0]
for c
in (c0,c1)]
2228 d0 = self.stath0[n0]
2229 d1 = self.stath1[n1]
2230 rmsd, _ = self.
rmsd()
2232 to_merge.append((c0,c1))
2234 for c0, c
in reversed(to_merge):
2238 self.clusters = [c
for c
in filter(
lambda x: len(x.members)>0, self.clusters)]
2243 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2245 print(
"check close members for clusters "+str(c0.cluster_id)+
" and "+str(c1.cluster_id))
2246 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2247 d0 = self.stath0[n0]
2248 d1 = self.stath1[n1]
2249 rmsd, _ = self.
rmsd(metric=metric)
2250 if rmsd<rmsd_cutoff:
2266 a function that returns the permutation best_sel of sels0 that minimizes metric
2268 best_rmsd2 = float(
'inf')
2270 if self.issymmetricsel[sels0[0]]:
2273 for offset
in range(N):
2274 order=[(offset+i)%N
for i
in range(N)]
2275 sels = [sels0[(offset+i)%N]
for i
in range(N)]
2278 r=metric(sel0, sel1)
2281 if rmsd2 < best_rmsd2:
2286 for sels
in itertools.permutations(sels0):
2288 for sel0, sel1
in itertools.takewhile(
lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2289 r=metric(sel0, sel1)
2291 if rmsd2 < best_rmsd2:
2305 return best_sel, best_rmsd2
2307 def compute_all_pairwise_rmsd(self):
2308 for d0
in self.stath0:
2309 for d1
in self.stath1:
2310 rmsd, _ = self.
rmsd()
2312 def rmsd(self,metric=IMP.atom.get_rmsd):
2314 Computes the RMSD. Resolves ambiguous pairs assignments
2317 n0=self.stath0.current_index
2318 n1=self.stath1.current_index
2319 if ((n0,n1)
in self.pairwise_rmsd)
and ((n0,n1)
in self.pairwise_molecular_assignment):
2320 return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2328 seldict_best_order={}
2329 molecular_assignment={}
2330 for molname, sels0
in self.seldict0.items():
2331 sels_best_order, best_rmsd2 = self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2333 Ncoords = len(sels_best_order[0].get_selected_particles())
2334 Ncopies = len(self.seldict1[molname])
2335 total_rmsd += Ncoords*best_rmsd2
2336 total_N += Ncoords*Ncopies
2338 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2339 p0 = sel0.get_selected_particles()[0]
2340 p1 = sel1.get_selected_particles()[0]
2346 molecular_assignment[(molname,c0)]=(molname,c1)
2348 total_rmsd = math.sqrt(total_rmsd/total_N)
2350 self.pairwise_rmsd[(n0,n1)]=total_rmsd
2351 self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2352 self.pairwise_rmsd[(n1,n0)]=total_rmsd
2353 self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2355 return total_rmsd, molecular_assignment
2359 Fix the reference structure for structural alignment, rmsd and chain assignemnt
2360 @param reference can be either "Absolute" (cluster center of the first cluster)
2361 or Relative (cluster center of the current cluster)
2363 if reference==
"Absolute":
2365 elif reference==
"Relative":
2366 if cluster.center_index:
2367 n0=cluster.center_index
2369 n0=cluster.members[0]
2374 compute the molecular assignemnts between multiple copies
2375 of the same sequence. It changes the Copy index of Molecules
2378 _, molecular_assignment = self.
rmsd()
2379 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2380 mol0 = self.molcopydict0[m0][c0]
2381 mol1 = self.molcopydict1[m1][c1]
2384 p1.set_value(cik0,c0)
2388 Undo the Copy index assignment
2391 _, molecular_assignment = self.
rmsd()
2393 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2394 mol0 = self.molcopydict0[m0][c0]
2395 mol1 = self.molcopydict1[m1][c1]
2398 p1.set_value(cik0,c1)
2405 s=
"AnalysisReplicaExchange\n"
2406 s+=
"---- number of clusters %s \n"%str(len(self.clusters))
2407 s+=
"---- number of models %s \n"%str(len(self.stath0))
2410 def __getitem__(self,int_slice_adaptor):
2411 if isinstance(int_slice_adaptor, int):
2412 return self.clusters[int_slice_adaptor]
2413 elif isinstance(int_slice_adaptor, slice):
2414 return self.__iter__(int_slice_adaptor)
2416 raise TypeError(
"Unknown Type")
2419 return len(self.clusters)
2421 def __iter__(self,slice_key=None):
2422 if slice_key
is None:
2423 for i
in range(len(self)):
2426 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
void handle_use_deprecated(std::string message)
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
A macro to help setup and run replica exchange.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.