1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
16 from pathlib
import Path
18 from operator
import itemgetter
19 from collections
import defaultdict
29 """Replace samplers.MPI_values when in test mode"""
30 def get_percentile(self, name):
35 """All restraints that are written out to the RMF file"""
36 def __init__(self, model, user_restraints):
38 self._user_restraints = user_restraints
if user_restraints
else []
41 return (len(self._user_restraints)
42 + self._rmf_rs.get_number_of_restraints())
47 def __getitem__(self, i):
49 def __init__(self, r):
50 self.r = IMP.RestraintSet.get_from(r)
52 def get_restraint(self):
55 lenuser = len(self._user_restraints)
57 return self._user_restraints[i]
58 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
59 r = self._rmf_rs.get_restraint(i - lenuser)
60 return FakePMIWrapper(r)
62 raise IndexError(
"Out of range")
66 """A macro to help setup and run replica exchange.
67 Supports Monte Carlo and molecular dynamics.
68 Produces trajectory RMF files, best PDB structures,
69 and output stat files.
72 monte_carlo_sample_objects=
None,
73 molecular_dynamics_sample_objects=
None,
75 rmf_output_objects=
None,
76 monte_carlo_temperature=1.0,
77 simulated_annealing=
False,
78 simulated_annealing_minimum_temperature=1.0,
79 simulated_annealing_maximum_temperature=2.5,
80 simulated_annealing_minimum_temperature_nframes=100,
81 simulated_annealing_maximum_temperature_nframes=100,
82 replica_exchange_minimum_temperature=1.0,
83 replica_exchange_maximum_temperature=2.5,
84 replica_exchange_swap=
True,
86 number_of_best_scoring_models=500,
89 molecular_dynamics_steps=10,
90 molecular_dynamics_max_time_step=1.0,
91 number_of_frames=1000,
92 save_coordinates_mode=
"lowest_temperature",
93 nframes_write_coordinates=1,
94 write_initial_rmf=
True,
95 initial_rmf_name_suffix=
"initial",
96 stat_file_name_suffix=
"stat",
97 best_pdb_name_suffix=
"model",
100 do_create_directories=
True,
101 global_output_directory=
"./",
103 best_pdb_dir=
"pdbs/",
104 replica_stat_file_suffix=
"stat_replica",
105 em_object_for_rmf=
None,
107 replica_exchange_object=
None,
111 nestor_restraints=
None,
112 nestor_rmf_fname_prefix=
"nested",):
114 @param model The IMP model
115 @param root_hier Top-level (System)hierarchy
116 @param monte_carlo_sample_objects Objects for MC sampling, which
117 should generally be a simple list of Mover objects, e.g.
118 from DegreesOfFreedom.get_movers().
119 @param molecular_dynamics_sample_objects Objects for MD sampling,
120 which should generally be a simple list of particles.
121 @param output_objects A list of structural objects and restraints
122 that will be included in output (ie, statistics "stat"
123 files). Any object that provides a get_output() method
124 can be used here. If None is passed
125 the macro will not write stat files.
126 @param rmf_output_objects A list of structural objects and
127 restraints that will be included in rmf. Any object
128 that provides a get_output() method can be used here.
129 @param monte_carlo_temperature MC temp (may need to be optimized
130 based on post-sampling analysis)
131 @param simulated_annealing If True, perform simulated annealing
132 @param simulated_annealing_minimum_temperature Should generally be
133 the same as monte_carlo_temperature.
134 @param simulated_annealing_minimum_temperature_nframes Number of
135 frames to compute at minimum temperature.
136 @param simulated_annealing_maximum_temperature_nframes Number of
138 temps > simulated_annealing_maximum_temperature.
139 @param replica_exchange_minimum_temperature Low temp for REX; should
140 generally be the same as monte_carlo_temperature.
141 @param replica_exchange_maximum_temperature High temp for REX
142 @param replica_exchange_swap Boolean, enable disable temperature
144 @param num_sample_rounds Number of rounds of MC/MD per cycle
145 @param number_of_best_scoring_models Number of top-scoring PDB/mmCIF
146 models to keep around for analysis.
147 @param mmcif If True, write best scoring models in mmCIF format;
148 if False (the default), write in legacy PDB format.
149 @param best_pdb_dir The directory under `global_output_directory`
150 where best-scoring PDB/mmCIF files are written.
151 @param best_pdb_name_suffix Part of the file name for best-scoring
153 @param monte_carlo_steps Number of MC steps per round
154 @param self_adaptive self adaptive scheme for Monte Carlo movers
155 @param molecular_dynamics_steps Number of MD steps per round
156 @param molecular_dynamics_max_time_step Max time step for MD
157 @param number_of_frames Number of REX frames to run
158 @param save_coordinates_mode string: how to save coordinates.
159 "lowest_temperature" (default) only the lowest temperatures
161 "25th_score" all replicas whose score is below the 25th
163 "50th_score" all replicas whose score is below the 50th
165 "75th_score" all replicas whose score is below the 75th
167 @param nframes_write_coordinates How often to write the coordinates
169 @param write_initial_rmf Write the initial configuration
170 @param global_output_directory Folder that will be created to house
172 @param test_mode Set to True to avoid writing any files, just test
174 @param score_moved If True, attempt to speed up Monte Carlo
175 sampling by caching scoring function terms on particles
177 @param use_nestor If True, follows the Nested Sampling workflow
178 of the NestOR module and skips writing stat files and
180 @param nestor_restraints A list of restraints for which
181 likelihoods are to be computed for use by NestOR module.
182 @param nestor_rmf_fname_prefix Prefix to be used for storing .rmf3
183 files generated by NestOR .
189 if output_objects == []:
192 self.output_objects = []
194 self.output_objects = output_objects
195 self.rmf_output_objects = rmf_output_objects
197 and not root_hier.get_parent()):
198 if self.output_objects
is not None:
199 self.output_objects.append(
201 if self.rmf_output_objects
is not None:
202 self.rmf_output_objects.append(
204 self.root_hier = root_hier
205 states = IMP.atom.get_by_type(root_hier, IMP.atom.STATE_TYPE)
206 self.vars[
"number_of_states"] = len(states)
208 self.root_hiers = states
209 self.is_multi_state =
True
211 self.root_hier = root_hier
212 self.is_multi_state =
False
214 raise TypeError(
"Must provide System hierarchy (root_hier)")
216 self._rmf_restraints = _RMFRestraints(model,
None)
217 self.em_object_for_rmf = em_object_for_rmf
218 self.monte_carlo_sample_objects = monte_carlo_sample_objects
219 self.vars[
"self_adaptive"] = self_adaptive
220 self.molecular_dynamics_sample_objects = \
221 molecular_dynamics_sample_objects
222 self.replica_exchange_object = replica_exchange_object
223 self.molecular_dynamics_max_time_step = \
224 molecular_dynamics_max_time_step
225 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
226 self.vars[
"replica_exchange_minimum_temperature"] = \
227 replica_exchange_minimum_temperature
228 self.vars[
"replica_exchange_maximum_temperature"] = \
229 replica_exchange_maximum_temperature
230 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
231 self.vars[
"simulated_annealing"] = simulated_annealing
232 self.vars[
"simulated_annealing_minimum_temperature"] = \
233 simulated_annealing_minimum_temperature
234 self.vars[
"simulated_annealing_maximum_temperature"] = \
235 simulated_annealing_maximum_temperature
236 self.vars[
"simulated_annealing_minimum_temperature_nframes"] = \
237 simulated_annealing_minimum_temperature_nframes
238 self.vars[
"simulated_annealing_maximum_temperature_nframes"] = \
239 simulated_annealing_maximum_temperature_nframes
241 self.vars[
"num_sample_rounds"] = num_sample_rounds
243 "number_of_best_scoring_models"] = number_of_best_scoring_models
244 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
245 self.vars[
"molecular_dynamics_steps"] = molecular_dynamics_steps
246 self.vars[
"number_of_frames"] = number_of_frames
247 if save_coordinates_mode
not in (
"lowest_temperature",
"25th_score",
248 "50th_score",
"75th_score"):
249 raise Exception(
"save_coordinates_mode has unrecognized value")
251 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
252 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
253 self.vars[
"write_initial_rmf"] = write_initial_rmf
254 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
255 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
256 self.vars[
"mmcif"] = mmcif
257 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
258 self.vars[
"do_clean_first"] = do_clean_first
259 self.vars[
"do_create_directories"] = do_create_directories
260 self.vars[
"global_output_directory"] = global_output_directory
261 self.vars[
"rmf_dir"] = rmf_dir
262 self.vars[
"best_pdb_dir"] = best_pdb_dir
263 self.vars[
"atomistic"] = atomistic
264 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
265 self.vars[
"geometries"] =
None
266 self.test_mode = test_mode
267 self.score_moved = score_moved
268 self.nest = use_nestor
269 self.nestor_restraints = nestor_restraints
270 self.nestor_rmf_fname = nestor_rmf_fname_prefix
273 if self.vars[
"geometries"]
is None:
274 self.vars[
"geometries"] = list(geometries)
276 self.vars[
"geometries"].extend(geometries)
279 print(
"ReplicaExchange: it generates initial.*.rmf3, stat.*.out, "
280 "rmfs/*.rmf3 for each replica ")
281 print(
"--- it stores the best scoring pdb models in pdbs/")
282 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the "
283 "lowest temperature")
284 print(
"--- variables:")
285 keys = list(self.vars.keys())
288 print(
"------", v.ljust(30), self.vars[v])
289 print(
"Use nestor: ", self.nest)
291 def get_replica_exchange_object(self):
292 return self.replica_exchange_object
294 def _add_provenance(self, sampler_md, sampler_mc):
295 """Record details about the sampling in the IMP Hierarchies"""
298 method =
"Molecular Dynamics"
299 iterations += self.vars[
"molecular_dynamics_steps"]
301 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
302 iterations += self.vars[
"monte_carlo_steps"]
304 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
306 iterations *= self.vars[
"num_sample_rounds"]
308 pi = self.model.add_particle(
"sampling")
310 self.model, pi, method, self.vars[
"number_of_frames"],
312 p.set_number_of_replicas(
313 self.replica_exchange_object.get_number_of_replicas())
314 IMP.pmi.tools._add_pmi_provenance(self.root_hier)
317 def execute_macro(self):
318 temp_index_factor = 100000.0
322 if self.monte_carlo_sample_objects
is not None:
323 print(
"Setting up MonteCarlo")
325 self.model, self.monte_carlo_sample_objects,
326 self.vars[
"monte_carlo_temperature"],
327 score_moved=self.score_moved)
328 if self.vars[
"simulated_annealing"]:
329 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
330 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
332 "simulated_annealing_minimum_temperature_nframes"]
334 "simulated_annealing_maximum_temperature_nframes"]
335 sampler_mc.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
336 if self.vars[
"self_adaptive"]:
337 sampler_mc.set_self_adaptive(
338 isselfadaptive=self.vars[
"self_adaptive"])
339 if self.output_objects
is not None:
340 self.output_objects.append(sampler_mc)
341 if self.rmf_output_objects
is not None:
342 self.rmf_output_objects.append(sampler_mc)
343 samplers.append(sampler_mc)
345 if self.molecular_dynamics_sample_objects
is not None:
346 print(
"Setting up MolecularDynamics")
348 self.model, self.molecular_dynamics_sample_objects,
349 self.vars[
"monte_carlo_temperature"],
350 maximum_time_step=self.molecular_dynamics_max_time_step)
351 if self.vars[
"simulated_annealing"]:
352 tmin = self.vars[
"simulated_annealing_minimum_temperature"]
353 tmax = self.vars[
"simulated_annealing_maximum_temperature"]
355 "simulated_annealing_minimum_temperature_nframes"]
357 "simulated_annealing_maximum_temperature_nframes"]
358 sampler_md.set_simulated_annealing(tmin, tmax, nfmin, nfmax)
359 if self.output_objects
is not None:
360 self.output_objects.append(sampler_md)
361 if self.rmf_output_objects
is not None:
362 self.rmf_output_objects.append(sampler_md)
363 samplers.append(sampler_md)
366 print(
"Setting up ReplicaExchange")
368 self.model, self.vars[
"replica_exchange_minimum_temperature"],
369 self.vars[
"replica_exchange_maximum_temperature"], samplers,
370 replica_exchange_object=self.replica_exchange_object)
371 self.replica_exchange_object = rex.rem
373 myindex = rex.get_my_index()
374 if self.output_objects
is not None:
375 self.output_objects.append(rex)
376 if self.rmf_output_objects
is not None:
377 self.rmf_output_objects.append(rex)
381 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
385 globaldir = self.vars[
"global_output_directory"] +
"/"
386 rmf_dir = globaldir + self.vars[
"rmf_dir"]
387 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
389 if not self.test_mode
and not self.nest:
390 if self.vars[
"do_clean_first"]:
393 if self.vars[
"do_create_directories"]:
396 os.makedirs(globaldir)
404 if not self.is_multi_state:
410 for n
in range(self.vars[
"number_of_states"]):
412 os.makedirs(pdb_dir +
"/" + str(n))
419 if self.output_objects
is not None:
420 self.output_objects.append(sw)
421 if self.rmf_output_objects
is not None:
422 self.rmf_output_objects.append(sw)
427 print(
"Setting up stat file")
428 low_temp_stat_file = globaldir + \
429 self.vars[
"stat_file_name_suffix"] +
"." + \
430 str(myindex) +
".out"
433 if not self.test_mode:
436 if not self.test_mode
and not self.nest:
437 if self.output_objects
is not None:
438 output.init_stat2(low_temp_stat_file,
440 extralabels=[
"rmf_file",
"rmf_frame_index"])
442 print(
"Stat file writing is disabled")
444 if self.rmf_output_objects
is not None and not self.nest:
445 print(
"Stat info being written in the rmf file")
447 if not self.test_mode
and not self.nest:
448 print(
"Setting up replica stat file")
449 replica_stat_file = globaldir + \
450 self.vars[
"replica_stat_file_suffix"] +
"." + \
451 str(myindex) +
".out"
452 if not self.test_mode:
453 output.init_stat2(replica_stat_file, [rex],
454 extralabels=[
"score"])
456 print(
"Setting up best pdb files")
457 if not self.is_multi_state:
458 if self.vars[
"number_of_best_scoring_models"] > 0:
459 output.init_pdb_best_scoring(
460 pdb_dir +
"/" + self.vars[
"best_pdb_name_suffix"],
462 self.vars[
"number_of_best_scoring_models"],
463 replica_exchange=
True,
464 mmcif=self.vars[
'mmcif'],
465 best_score_file=globaldir +
"best.scores.rex.py")
466 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
468 pdb_dir +
"/" +
"model.psf",
470 self.vars[
"best_pdb_name_suffix"] + pdbext)
472 if self.vars[
"number_of_best_scoring_models"] > 0:
473 for n
in range(self.vars[
"number_of_states"]):
474 output.init_pdb_best_scoring(
475 pdb_dir +
"/" + str(n) +
"/" +
476 self.vars[
"best_pdb_name_suffix"],
478 self.vars[
"number_of_best_scoring_models"],
479 replica_exchange=
True,
480 mmcif=self.vars[
'mmcif'],
481 best_score_file=globaldir +
"best.scores.rex.py")
482 pdbext =
".0.cif" if self.vars[
'mmcif']
else ".0.pdb"
484 pdb_dir +
"/" + str(n) +
"/" +
"model.psf",
485 pdb_dir +
"/" + str(n) +
"/" +
486 self.vars[
"best_pdb_name_suffix"] + pdbext)
489 if self.em_object_for_rmf
is not None:
490 output_hierarchies = [
492 self.em_object_for_rmf.get_density_as_hierarchy(
495 output_hierarchies = [self.root_hier]
497 if not self.test_mode
and not self.nest:
498 print(
"Setting up and writing initial rmf coordinate file")
499 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
500 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
502 listofobjects=self.rmf_output_objects)
503 if self._rmf_restraints:
504 output.add_restraints_to_rmf(
505 init_suffix +
"." + str(myindex) +
".rmf3",
506 self._rmf_restraints)
507 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
508 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
510 if not self.test_mode:
511 mpivs = IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
513 mpivs = _MockMPIValues()
515 self._add_provenance(sampler_md, sampler_mc)
517 if not self.test_mode
and not self.nest:
518 print(
"Setting up production rmf files")
519 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
520 output.init_rmf(rmfname, output_hierarchies,
521 geometries=self.vars[
"geometries"],
522 listofobjects=self.rmf_output_objects)
524 if self._rmf_restraints:
525 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
527 if not self.test_mode
and self.nest:
528 print(
"Setting up NestOR rmf files")
529 nestor_rmf_fname = str(self.nestor_rmf_fname) +
'_' + \
530 str(self.replica_exchange_object.get_my_index()) +
'.rmf3'
532 output.init_rmf(nestor_rmf_fname, output_hierarchies,
533 geometries=self.vars[
"geometries"],
534 listofobjects=self.rmf_output_objects)
536 ntimes_at_low_temp = 0
538 if myindex == 0
and not self.nest:
540 self.replica_exchange_object.set_was_used(
True)
541 nframes = self.vars[
"number_of_frames"]
545 sampled_likelihoods = []
546 for i
in range(nframes):
550 for nr
in range(self.vars[
"num_sample_rounds"]):
551 if sampler_md
is not None:
553 self.vars[
"molecular_dynamics_steps"])
554 if sampler_mc
is not None:
555 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
557 self.model).evaluate(
False)
558 mpivs.set_value(
"score", score)
560 output.set_output_entry(
"score", score)
562 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
564 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
565 save_frame = (min_temp_index == my_temp_index)
566 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
567 score_perc = mpivs.get_percentile(
"score")
568 save_frame = (score_perc*100.0 <= 25.0)
569 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
570 score_perc = mpivs.get_percentile(
"score")
571 save_frame = (score_perc*100.0 <= 50.0)
572 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
573 score_perc = mpivs.get_percentile(
"score")
574 save_frame = (score_perc*100.0 <= 75.0)
577 if save_frame
and not self.test_mode:
581 print(
"--- frame %s score %s " % (str(i), str(score)))
584 if math.isnan(score):
585 sampled_likelihoods.append(math.nan)
587 likelihood_for_sample = 1
588 for rstrnt
in self.nestor_restraints:
589 likelihood_for_sample *= rstrnt.get_likelihood()
590 sampled_likelihoods.append(likelihood_for_sample)
591 output.write_rmf(nestor_rmf_fname)
593 if not self.test_mode
and not self.nest:
594 if i % self.vars[
"nframes_write_coordinates"] == 0:
595 print(
'--- writing coordinates')
596 if self.vars[
"number_of_best_scoring_models"] > 0:
597 output.write_pdb_best_scoring(score)
598 output.write_rmf(rmfname)
599 output.set_output_entry(
"rmf_file", rmfname)
600 output.set_output_entry(
"rmf_frame_index",
603 output.set_output_entry(
"rmf_file", rmfname)
604 output.set_output_entry(
"rmf_frame_index",
'-1')
605 if self.output_objects
is not None:
606 output.write_stat2(low_temp_stat_file)
607 ntimes_at_low_temp += 1
609 if not self.test_mode
and not self.nest:
610 output.write_stat2(replica_stat_file)
611 if self.vars[
"replica_exchange_swap"]:
612 rex.swap_temp(i, score)
614 if self.nest
and len(sampled_likelihoods) > 0:
615 with open(
"likelihoods_"
616 + str(self.replica_exchange_object.get_my_index()),
618 pickle.dump(sampled_likelihoods, lif)
620 output.close_rmf(nestor_rmf_fname)
622 for p, state
in IMP.pmi.tools._all_protocol_outputs(self.root_hier):
623 p.add_replica_exchange(state, self)
625 if not self.test_mode
and not self.nest:
626 print(
"closing production rmf files")
627 output.close_rmf(rmfname)
631 """A macro to build a IMP::pmi::topology::System based on a
632 TopologyReader object.
634 Easily create multi-state systems by calling this macro
635 repeatedly with different TopologyReader objects!
636 A useful function is get_molecules() which returns the PMI Molecules
637 grouped by state as a dictionary with key = (molecule name),
638 value = IMP.pmi.topology.Molecule
639 Quick multi-state system:
642 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
643 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
644 bs = IMP.pmi.macros.BuildSystem(model)
645 bs.add_state(reader1)
646 bs.add_state(reader2)
647 bs.execute_macro() # build everything including degrees of freedom
648 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
649 ### now you have a two state system, you add restraints etc
651 @note The "domain name" entry of the topology reader is not used.
652 All molecules are set up by the component name, but split into rigid bodies
656 _alphabets = {
'DNA': IMP.pmi.alphabets.dna,
657 'RNA': IMP.pmi.alphabets.rna}
659 def __init__(self, model, sequence_connectivity_scale=4.0,
660 force_create_gmm_files=
False, resolutions=[1, 10],
663 @param model An IMP Model
664 @param sequence_connectivity_scale For scaling the connectivity
666 @param force_create_gmm_files If True, will sample and create GMMs
667 no matter what. If False, will only sample if the
668 files don't exist. If number of Gaussians is zero, won't
670 @param resolutions The resolutions to build for structured regions
671 @param name The name of the top-level hierarchy node.
678 self._domain_res = []
680 self.force_create_gmm_files = force_create_gmm_files
681 self.resolutions = resolutions
683 def add_state(self, reader, keep_chain_id=False, fasta_name_map=None,
685 """Add a state using the topology info in a
686 IMP::pmi::topology::TopologyReader object.
687 When you are done adding states, call execute_macro()
688 @param reader The TopologyReader object
689 @param keep_chain_id If True, keep the chain IDs from the
690 original PDB files, if available
691 @param fasta_name_map dictionary for converting protein names
692 found in the fasta file
693 @param chain_ids A list or string of chain IDs for assigning to
694 newly-created molecules, e.g.
695 `string.ascii_uppercase+string.ascii_lowercase+string.digits`.
696 If not specified, chain IDs A through Z are assigned, then
697 AA through AZ, then BA through BZ, and so on, in the same
700 state = self.system.create_state()
701 self._readers.append(reader)
703 these_domain_res = {}
705 if chain_ids
is None:
706 chain_ids = IMP.pmi.output._ChainIDs()
711 for molname
in reader.get_molecules():
712 copies = reader.get_molecules()[molname].domains
713 for nc, copyname
in enumerate(copies):
714 print(
"BuildSystem.add_state: setting up molecule %s copy "
715 "number %s" % (molname, str(nc)))
716 copy = copies[copyname]
719 all_chains = [c
for c
in copy
if c.chain
is not None]
721 chain_id = all_chains[0].chain
723 chain_id = chain_ids[numchain]
725 "No PDBs specified for %s, so keep_chain_id has "
726 "no effect; using default chain ID '%s'"
729 chain_id = chain_ids[numchain]
731 alphabet = IMP.pmi.alphabets.amino_acid
732 fasta_flag = copy[0].fasta_flag
733 if fasta_flag
in self._alphabets:
734 alphabet = self._alphabets[fasta_flag]
736 copy[0].fasta_file, fasta_name_map)
737 seq = seqs[copy[0].fasta_id]
738 print(
"BuildSystem.add_state: molecule %s sequence has "
739 "%s residues" % (molname, len(seq)))
740 orig_mol = state.create_molecule(
741 molname, seq, chain_id, alphabet=alphabet,
742 uniprot=seqs.uniprot.get(copy[0].fasta_id))
746 print(
"BuildSystem.add_state: creating a copy for "
747 "molecule %s" % molname)
748 mol = orig_mol.create_copy(chain_id)
751 for domainnumber, domain
in enumerate(copy):
752 print(
"BuildSystem.add_state: ---- setting up domain %s "
753 "of molecule %s" % (domainnumber, molname))
756 these_domains[domain.get_unique_name()] = domain
757 if domain.residue_range == []
or \
758 domain.residue_range
is None:
759 domain_res = mol.get_residues()
761 start = domain.residue_range[0]+domain.pdb_offset
762 if domain.residue_range[1] ==
'END':
763 end = len(mol.sequence)
765 end = domain.residue_range[1]+domain.pdb_offset
766 domain_res = mol.residue_range(start-1, end-1)
767 print(
"BuildSystem.add_state: -------- domain %s of "
768 "molecule %s extends from residue %s to "
770 % (domainnumber, molname, start, end))
771 if domain.pdb_file ==
"BEADS":
772 print(
"BuildSystem.add_state: -------- domain %s of "
773 "molecule %s represented by BEADS "
774 % (domainnumber, molname))
775 mol.add_representation(
777 resolutions=[domain.bead_size],
778 setup_particles_as_densities=(
779 domain.em_residues_per_gaussian != 0),
781 these_domain_res[domain.get_unique_name()] = \
783 elif domain.pdb_file ==
"IDEAL_HELIX":
784 print(
"BuildSystem.add_state: -------- domain %s of "
785 "molecule %s represented by IDEAL_HELIX "
786 % (domainnumber, molname))
787 emper = domain.em_residues_per_gaussian
788 mol.add_representation(
790 resolutions=self.resolutions,
792 density_residues_per_component=emper,
793 density_prefix=domain.density_prefix,
794 density_force_compute=self.force_create_gmm_files,
796 these_domain_res[domain.get_unique_name()] = \
799 print(
"BuildSystem.add_state: -------- domain %s of "
800 "molecule %s represented by pdb file %s "
801 % (domainnumber, molname, domain.pdb_file))
802 domain_atomic = mol.add_structure(domain.pdb_file,
804 domain.residue_range,
807 domain_non_atomic = domain_res - domain_atomic
808 if not domain.em_residues_per_gaussian:
809 mol.add_representation(
810 domain_atomic, resolutions=self.resolutions,
812 if len(domain_non_atomic) > 0:
813 mol.add_representation(
815 resolutions=[domain.bead_size],
818 print(
"BuildSystem.add_state: -------- domain %s "
819 "of molecule %s represented by gaussians "
820 % (domainnumber, molname))
821 emper = domain.em_residues_per_gaussian
822 creategmm = self.force_create_gmm_files
823 mol.add_representation(
825 resolutions=self.resolutions,
826 density_residues_per_component=emper,
827 density_prefix=domain.density_prefix,
828 density_force_compute=creategmm,
830 if len(domain_non_atomic) > 0:
831 mol.add_representation(
833 resolutions=[domain.bead_size],
834 setup_particles_as_densities=
True,
836 these_domain_res[domain.get_unique_name()] = (
837 domain_atomic, domain_non_atomic)
838 self._domain_res.append(these_domain_res)
839 self._domains.append(these_domains)
840 print(
'BuildSystem.add_state: State', len(self.system.states),
'added')
844 """Return list of all molecules grouped by state.
845 For each state, it's a dictionary of Molecules where key is the
848 return [s.get_molecules()
for s
in self.system.get_states()]
850 def get_molecule(self, molname, copy_index=0, state_index=0):
851 return self.system.get_states()[state_index].
get_molecules()[
855 max_bead_trans=4.0, max_srb_trans=4.0, max_srb_rot=0.04):
856 """Builds representations and sets up degrees of freedom"""
857 print(
"BuildSystem.execute_macro: building representations")
858 self.root_hier = self.system.build()
860 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
862 for nstate, reader
in enumerate(self._readers):
863 rbs = reader.get_rigid_bodies()
864 srbs = reader.get_super_rigid_bodies()
865 csrbs = reader.get_chains_of_super_rigid_bodies()
868 domains_in_rbs = set()
870 print(
"BuildSystem.execute_macro: -------- building rigid "
871 "body %s" % (str(rblist)))
872 all_res = IMP.pmi.tools.OrderedSet()
873 bead_res = IMP.pmi.tools.OrderedSet()
875 domain = self._domains[nstate][dname]
876 print(
"BuildSystem.execute_macro: -------- adding %s"
878 all_res |= self._domain_res[nstate][dname][0]
879 bead_res |= self._domain_res[nstate][dname][1]
880 domains_in_rbs.add(dname)
882 print(
"BuildSystem.execute_macro: -------- creating rigid "
883 "body with max_trans %s max_rot %s "
884 "non_rigid_max_trans %s"
885 % (str(max_rb_trans), str(max_rb_rot),
886 str(max_bead_trans)))
887 self.dof.create_rigid_body(all_res,
888 nonrigid_parts=bead_res,
889 max_trans=max_rb_trans,
891 nonrigid_max_trans=max_bead_trans,
892 name=
"RigidBody %s" % dname)
895 for dname, domain
in self._domains[nstate].items():
896 if dname
not in domains_in_rbs:
897 if domain.pdb_file !=
"BEADS":
899 "No rigid bodies set for %s. Residues read from "
900 "the PDB file will not be sampled - only regions "
901 "missing from the PDB will be treated flexibly. "
902 "To sample the entire sequence, use BEADS instead "
903 "of a PDB file name" % dname,
905 self.dof.create_flexible_beads(
906 self._domain_res[nstate][dname][1],
907 max_trans=max_bead_trans)
911 print(
"BuildSystem.execute_macro: -------- building "
912 "super rigid body %s" % (str(srblist)))
913 all_res = IMP.pmi.tools.OrderedSet()
914 for dname
in srblist:
915 print(
"BuildSystem.execute_macro: -------- adding %s"
917 all_res |= self._domain_res[nstate][dname][0]
918 all_res |= self._domain_res[nstate][dname][1]
920 print(
"BuildSystem.execute_macro: -------- creating super "
921 "rigid body with max_trans %s max_rot %s "
922 % (str(max_srb_trans), str(max_srb_rot)))
923 self.dof.create_super_rigid_body(
924 all_res, max_trans=max_srb_trans, max_rot=max_srb_rot)
927 for csrblist
in csrbs:
928 all_res = IMP.pmi.tools.OrderedSet()
929 for dname
in csrblist:
930 all_res |= self._domain_res[nstate][dname][0]
931 all_res |= self._domain_res[nstate][dname][1]
932 all_res = list(all_res)
933 all_res.sort(key=
lambda r: r.get_index())
934 self.dof.create_main_chain_mover(all_res)
935 return self.root_hier, self.dof
940 """A macro for running all the basic operations of analysis.
941 Includes clustering, precision analysis, and making ensemble density maps.
942 A number of plots are also supported.
945 merge_directories=[
"./"],
946 stat_file_name_suffix=
"stat",
947 best_pdb_name_suffix=
"model",
949 do_create_directories=
True,
950 global_output_directory=
"output/",
951 replica_stat_file_suffix=
"stat_replica",
952 global_analysis_result_directory=
"./analysis/",
955 @param model The IMP model
956 @param stat_file_name_suffix
957 @param merge_directories The directories containing output files
958 @param best_pdb_name_suffix
959 @param do_clean_first
960 @param do_create_directories
961 @param global_output_directory Where everything is
962 @param replica_stat_file_suffix
963 @param global_analysis_result_directory
964 @param test_mode If True, nothing is changed on disk
968 from mpi4py
import MPI
969 self.comm = MPI.COMM_WORLD
970 self.rank = self.comm.Get_rank()
971 self.number_of_processes = self.comm.size
974 self.number_of_processes = 1
976 self.test_mode = test_mode
977 self._protocol_output = []
978 self.cluster_obj =
None
980 stat_dir = global_output_directory
983 for rd
in merge_directories:
984 stat_files = glob.glob(os.path.join(rd, stat_dir,
"stat.*.out"))
985 if len(stat_files) == 0:
986 warnings.warn(
"no stat files found in %s"
987 % os.path.join(rd, stat_dir),
989 self.stat_files += stat_files
992 """Capture details of the modeling protocol.
993 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
996 self._protocol_output.append((p, p._last_state))
999 score_key=
"Total_Score",
1000 rmf_file_key=
"rmf_file",
1001 rmf_file_frame_key=
"rmf_frame_index",
1004 nframes_trajectory=10000):
1005 """ Get a trajectory of the modeling run, for generating
1006 demonstrative movies
1008 @param score_key The score for ranking models
1009 @param rmf_file_key Key pointing to RMF filename
1010 @param rmf_file_frame_key Key pointing to RMF frame number
1011 @param outputdir The local output directory used in the run
1012 @param get_every Extract every nth frame
1013 @param nframes_trajectory Total number of frames of the trajectory
1018 self.stat_files, score_key, rmf_file_key, rmf_file_frame_key,
1020 score_list = list(map(float, trajectory_models[2]))
1022 max_score = max(score_list)
1023 min_score = min(score_list)
1025 bins = [(max_score-min_score)*math.exp(-float(i))+min_score
1026 for i
in range(nframes_trajectory)]
1027 binned_scores = [
None]*nframes_trajectory
1028 binned_model_indexes = [-1]*nframes_trajectory
1030 for model_index, s
in enumerate(score_list):
1031 bins_score_diffs = [abs(s-b)
for b
in bins]
1032 bin_index = min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1033 if binned_scores[bin_index]
is None:
1034 binned_scores[bin_index] = s
1035 binned_model_indexes[bin_index] = model_index
1037 old_diff = abs(binned_scores[bin_index]-bins[bin_index])
1038 new_diff = abs(s-bins[bin_index])
1039 if new_diff < old_diff:
1040 binned_scores[bin_index] = s
1041 binned_model_indexes[bin_index] = model_index
1043 print(binned_scores)
1044 print(binned_model_indexes)
1046 def _expand_ambiguity(self, prot, d):
1047 """If using PMI2, expand the dictionary to include copies as
1050 This also keeps the states separate.
1055 if '..' in key
or (isinstance(val, tuple)
and len(val) >= 3):
1058 states = IMP.atom.get_by_type(prot, IMP.atom.STATE_TYPE)
1059 if isinstance(val, tuple):
1067 for nst
in range(len(states)):
1069 copies = sel.get_selected_particles(with_representation=
False)
1071 for nc
in range(len(copies)):
1073 newdict[
'%s.%i..%i' % (name, nst, nc)] = \
1074 (start, stop, name, nc, nst)
1076 newdict[
'%s..%i' % (name, nc)] = \
1077 (start, stop, name, nc, nst)
1083 score_key=
"Total_Score",
1084 rmf_file_key=
"rmf_file",
1085 rmf_file_frame_key=
"rmf_frame_index",
1087 prefiltervalue=
None,
1090 alignment_components=
None,
1091 number_of_best_scoring_models=10,
1092 rmsd_calculation_components=
None,
1093 distance_matrix_file=
'distances.mat',
1094 load_distance_matrix_file=
False,
1095 skip_clustering=
False,
1096 number_of_clusters=1,
1098 exit_after_display=
True,
1100 first_and_last_frames=
None,
1101 density_custom_ranges=
None,
1102 write_pdb_with_centered_coordinates=
False,
1104 """Get the best scoring models, compute a distance matrix,
1105 cluster them, and create density maps.
1107 Tuple format: "molname" just the molecule,
1108 or (start,stop,molname,copy_num(optional),state_num(optional)
1109 Can pass None for copy or state to ignore that field.
1110 If you don't pass a specific copy number
1112 @param score_key The score for ranking models.
1113 @param rmf_file_key Key pointing to RMF filename
1114 @param rmf_file_frame_key Key pointing to RMF frame number
1115 @param state_number State number to analyze
1116 @param prefiltervalue Only include frames where the
1117 score key is below this value
1118 @param feature_keys Keywords for which you want to
1119 calculate average, medians, etc.
1120 If you pass "Keyname" it'll include everything that matches
1122 @param outputdir The local output directory used in
1124 @param alignment_components Dictionary with keys=groupname,
1125 values are tuples for aligning the structures
1126 e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1127 @param number_of_best_scoring_models Num models to keep per run
1128 @param rmsd_calculation_components For calculating RMSD
1129 (same format as alignment_components)
1130 @param distance_matrix_file Where to store/read the
1132 @param load_distance_matrix_file Try to load the distance
1134 @param skip_clustering Just extract the best scoring
1135 models and save the pdbs
1136 @param number_of_clusters Number of k-means clusters
1137 @param display_plot Display the distance matrix
1138 @param exit_after_display Exit after displaying distance
1140 @param get_every Extract every nth frame
1141 @param first_and_last_frames A tuple with the first and last
1142 frames to be analyzed. Values are percentages!
1143 Default: get all frames
1144 @param density_custom_ranges For density calculation
1145 (same format as alignment_components)
1146 @param write_pdb_with_centered_coordinates
1147 @param voxel_size Used for the density output
1151 self._outputdir = Path(outputdir).absolute()
1152 self._number_of_clusters = number_of_clusters
1153 for p, state
in self._protocol_output:
1154 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1165 if not load_distance_matrix_file:
1166 if len(self.stat_files) == 0:
1167 print(
"ERROR: no stat file found in the given path")
1169 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1170 self.stat_files, self.number_of_processes)[self.rank]
1173 for k
in (score_key, rmf_file_key, rmf_file_frame_key):
1174 if k
in feature_keys:
1176 "no need to pass " + k +
" to feature_keys.",
1178 feature_keys.remove(k)
1181 my_stat_files, score_key, feature_keys, rmf_file_key,
1182 rmf_file_frame_key, prefiltervalue, get_every, provenance=prov)
1183 rmf_file_list = best_models[0]
1184 rmf_file_frame_list = best_models[1]
1185 score_list = best_models[2]
1186 feature_keyword_list_dict = best_models[3]
1192 if self.number_of_processes > 1:
1196 rmf_file_frame_list)
1197 for k
in feature_keyword_list_dict:
1198 feature_keyword_list_dict[k] = \
1200 feature_keyword_list_dict[k])
1203 score_rmf_tuples = list(zip(score_list,
1205 rmf_file_frame_list,
1206 list(range(len(score_list)))))
1208 if density_custom_ranges:
1209 for k
in density_custom_ranges:
1210 if not isinstance(density_custom_ranges[k], list):
1211 raise Exception(
"Density custom ranges: values must "
1212 "be lists of tuples")
1215 if first_and_last_frames
is not None:
1216 nframes = len(score_rmf_tuples)
1217 first_frame = int(first_and_last_frames[0] * nframes)
1218 last_frame = int(first_and_last_frames[1] * nframes)
1219 if last_frame > len(score_rmf_tuples):
1221 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1224 best_score_rmf_tuples = sorted(
1226 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1227 best_score_rmf_tuples = [t+(n,)
for n, t
in
1228 enumerate(best_score_rmf_tuples)]
1230 prov.append(IMP.pmi.io.FilterProvenance(
1231 "Best scoring", 0, number_of_best_scoring_models))
1233 best_score_feature_keyword_list_dict = defaultdict(list)
1234 for tpl
in best_score_rmf_tuples:
1236 for f
in feature_keyword_list_dict:
1237 best_score_feature_keyword_list_dict[f].append(
1238 feature_keyword_list_dict[f][index])
1239 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1240 best_score_rmf_tuples,
1241 self.number_of_processes)[self.rank]
1244 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(
1245 self.model, 0, my_best_score_rmf_tuples[0][1])[0]
1246 if rmsd_calculation_components
is not None:
1247 tmp = self._expand_ambiguity(
1248 prot_ahead, rmsd_calculation_components)
1249 if tmp != rmsd_calculation_components:
1250 print(
'Detected ambiguity, expand rmsd components to',
1252 rmsd_calculation_components = tmp
1253 if alignment_components
is not None:
1254 tmp = self._expand_ambiguity(prot_ahead,
1255 alignment_components)
1256 if tmp != alignment_components:
1257 print(
'Detected ambiguity, expand alignment '
1258 'components to', tmp)
1259 alignment_components = tmp
1265 self.model, my_best_score_rmf_tuples[0],
1266 rmsd_calculation_components, state_number=state_number)
1268 self.model, my_best_score_rmf_tuples, alignment_components,
1269 rmsd_calculation_components, state_number=state_number)
1277 all_coordinates = got_coords[0]
1280 alignment_coordinates = got_coords[1]
1283 rmsd_coordinates = got_coords[2]
1286 rmf_file_name_index_dict = got_coords[3]
1289 all_rmf_file_names = got_coords[4]
1295 if density_custom_ranges:
1297 density_custom_ranges, voxel=voxel_size)
1299 dircluster = os.path.join(outputdir,
1300 "all_models."+str(self.rank))
1306 os.mkdir(dircluster)
1309 clusstat = open(os.path.join(
1310 dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1311 for cnt, tpl
in enumerate(my_best_score_rmf_tuples):
1313 rmf_frame_number = tpl[2]
1316 for key
in best_score_feature_keyword_list_dict:
1318 best_score_feature_keyword_list_dict[key][index]
1322 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1323 self.model, rmf_frame_number, rmf_name)
1325 linking_successful = \
1326 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1327 self.model, prots, rs, rmf_frame_number,
1329 if not linking_successful:
1335 states = IMP.atom.get_by_type(
1336 prots[0], IMP.atom.STATE_TYPE)
1337 prot = states[state_number]
1342 coords_f1 = alignment_coordinates[cnt]
1344 coords_f2 = alignment_coordinates[cnt]
1347 coords_f1, coords_f2)
1348 transformation = Ali.align()[1]
1362 rb = rbm.get_rigid_body()
1372 out_pdb_fn = os.path.join(
1373 dircluster, str(cnt)+
"."+str(self.rank)+
".pdb")
1374 out_rmf_fn = os.path.join(
1375 dircluster, str(cnt)+
"."+str(self.rank)+
".rmf3")
1376 o.init_pdb(out_pdb_fn, prot)
1377 tc = write_pdb_with_centered_coordinates
1378 o.write_pdb(out_pdb_fn,
1379 translate_to_geometric_center=tc)
1381 tmp_dict[
"local_pdb_file_name"] = \
1382 os.path.basename(out_pdb_fn)
1383 tmp_dict[
"rmf_file_full_path"] = rmf_name
1384 tmp_dict[
"local_rmf_file_name"] = \
1385 os.path.basename(out_rmf_fn)
1386 tmp_dict[
"local_rmf_frame_number"] = 0
1388 clusstat.write(str(tmp_dict)+
"\n")
1393 h.set_name(
"System")
1395 o.init_rmf(out_rmf_fn, [h], rs)
1397 o.write_rmf(out_rmf_fn)
1398 o.close_rmf(out_rmf_fn)
1400 if density_custom_ranges:
1401 DensModule.add_subunits_density(prot)
1403 if density_custom_ranges:
1404 DensModule.write_mrc(path=dircluster)
1409 if self.number_of_processes > 1:
1415 rmf_file_name_index_dict)
1417 alignment_coordinates)
1424 [best_score_feature_keyword_list_dict,
1425 rmf_file_name_index_dict],
1431 print(
"setup clustering class")
1434 for n, model_coordinate_dict
in enumerate(all_coordinates):
1436 if (alignment_components
is not None
1437 and len(self.cluster_obj.all_coords) == 0):
1439 self.cluster_obj.set_template(alignment_coordinates[n])
1440 self.cluster_obj.fill(all_rmf_file_names[n],
1441 rmsd_coordinates[n])
1442 print(
"Global calculating the distance matrix")
1445 self.cluster_obj.dist_matrix()
1449 self.cluster_obj.do_cluster(number_of_clusters)
1452 self.cluster_obj.plot_matrix(
1453 figurename=os.path.join(outputdir,
1455 if exit_after_display:
1457 self.cluster_obj.save_distance_matrix_file(
1458 file_name=distance_matrix_file)
1465 print(
"setup clustering class")
1467 self.cluster_obj.load_distance_matrix_file(
1468 file_name=distance_matrix_file)
1469 print(
"clustering with %s clusters" % str(number_of_clusters))
1470 self.cluster_obj.do_cluster(number_of_clusters)
1471 [best_score_feature_keyword_list_dict,
1472 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1475 self.cluster_obj.plot_matrix(figurename=os.path.join(
1476 outputdir,
'dist_matrix.pdf'))
1477 if exit_after_display:
1479 if self.number_of_processes > 1:
1487 print(self.cluster_obj.get_cluster_labels())
1488 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1489 print(
"rank %s " % str(self.rank))
1490 print(
"cluster %s " % str(n))
1491 print(
"cluster label %s " % str(cl))
1492 print(self.cluster_obj.get_cluster_label_names(cl))
1494 len(self.cluster_obj.get_cluster_label_names(cl))
1496 prov + [IMP.pmi.io.ClusterProvenance(cluster_size)]
1499 if density_custom_ranges:
1501 density_custom_ranges,
1504 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1506 os.mkdir(dircluster)
1512 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1513 clusstat = open(dircluster +
"stat.out",
"w")
1514 for k, structure_name
in enumerate(
1515 self.cluster_obj.get_cluster_label_names(cl)):
1518 tmp_dict.update(rmsd_dict)
1519 index = rmf_file_name_index_dict[structure_name]
1520 for key
in best_score_feature_keyword_list_dict:
1522 key] = best_score_feature_keyword_list_dict[
1528 rmf_name = structure_name.split(
"|")[0]
1529 rmf_frame_number = int(structure_name.split(
"|")[1])
1530 clusstat.write(str(tmp_dict) +
"\n")
1535 IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1536 self.model, rmf_frame_number, rmf_name)
1538 linking_successful = \
1539 IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1540 self.model, prots, rs, rmf_frame_number,
1542 if not linking_successful:
1547 states = IMP.atom.get_by_type(
1548 prots[0], IMP.atom.STATE_TYPE)
1549 prot = states[state_number]
1555 co = self.cluster_obj
1556 model_index = co.get_model_index_from_name(
1558 transformation = co.get_transformation_to_first_member(
1569 rb = rbm.get_rigid_body()
1578 if density_custom_ranges:
1579 DensModule.add_subunits_density(prot)
1584 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1585 o.write_pdb(dircluster + str(k) +
".pdb")
1590 h.set_name(
"System")
1592 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
1593 o.write_rmf(dircluster + str(k) +
".rmf3")
1594 o.close_rmf(dircluster + str(k) +
".rmf3")
1599 if density_custom_ranges:
1600 DensModule.write_mrc(path=dircluster)
1603 if self.number_of_processes > 1:
1606 def get_cluster_rmsd(self, cluster_num):
1607 if self.cluster_obj
is None:
1609 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1611 def save_objects(self, objects, file_name):
1613 outf = open(file_name,
'wb')
1614 pickle.dump(objects, outf)
1617 def load_objects(self, file_name):
1619 inputf = open(file_name,
'rb')
1620 objects = pickle.load(inputf)
1628 This class contains analysis utilities to investigate ReplicaExchange
1636 def __init__(self, model, stat_files, best_models=None, score_key=None,
1639 Construction of the Class.
1640 @param model IMP.Model()
1641 @param stat_files list of string. Can be ascii stat files,
1643 @param best_models Integer. Number of best scoring models,
1644 if None: all models will be read
1645 @param score_key Use the provided stat key keyword as the score
1646 (by default, the total score is used)
1647 @param alignment boolean (Default=True). Align before computing
1652 self.best_models = best_models
1654 model, stat_files, self.best_models, score_key, cache=
True)
1656 StatHierarchyHandler=self.stath0)
1669 self.clusters.append(c)
1670 for n0
in range(len(self.stath0)):
1672 self.pairwise_rmsd = {}
1673 self.pairwise_molecular_assignment = {}
1674 self.alignment = alignment
1675 self.symmetric_molecules = {}
1676 self.issymmetricsel = {}
1678 self.molcopydict0 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1680 self.molcopydict1 = IMP.pmi.tools.get_molecules_dictionary_by_copy(
1685 Setup the selection onto which the rmsd is computed
1686 @param kwargs use IMP.atom.Selection keywords
1694 Store names of symmetric molecules
1696 self.symmetric_molecules[molecule_name] = 0
1701 Setup the selection onto which the alignment is computed
1702 @param kwargs use IMP.atom.Selection keywords
1710 def clean_clusters(self):
1711 for c
in self.clusters:
1715 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1717 Cluster the models based on RMSD.
1718 @param rmsd_cutoff Float the distance cutoff in Angstrom
1719 @param metric (Default=IMP.atom.get_rmsd) the metric that will
1720 be used to compute rmsds
1722 self.clean_clusters()
1723 not_clustered = set(range(len(self.stath1)))
1724 while len(not_clustered) > 0:
1725 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1730 Refine the clusters by merging the ones whose centers are close
1731 @param rmsd_cutoff cutoff distance in Angstorms
1733 clusters_copy = self.clusters
1734 for c0, c1
in itertools.combinations(self.clusters, 2):
1735 if c0.center_index
is None:
1737 if c1.center_index
is None:
1739 _ = self.stath0[c0.center_index]
1740 _ = self.stath1[c1.center_index]
1741 rmsd, molecular_assignment = self.
rmsd()
1742 if rmsd <= rmsd_cutoff:
1743 if c1
in self.clusters:
1744 clusters_copy.remove(c1)
1746 self.clusters = clusters_copy
1753 def set_cluster_assignments(self, cluster_ids):
1754 if len(cluster_ids) != len(self.stath0):
1755 raise ValueError(
'cluster ids has to be same length as '
1759 for i
in sorted(list(set(cluster_ids))):
1761 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
1762 self.clusters[idx].add_member(i, d)
1766 Return the model data from a cluster
1767 @param cluster IMP.pmi.output.Cluster object
1776 Save the data for the whole models into a pickle file
1777 @param filename string
1779 self.stath0.save_data(filename)
1783 Set the data from an external IMP.pmi.output.Data
1784 @param data IMP.pmi.output.Data
1786 self.stath0.data = data
1787 self.stath1.data = data
1791 Load the data from an external pickled file
1792 @param filename string
1794 self.stath0.load_data(filename)
1795 self.stath1.load_data(filename)
1796 self.best_models = len(self.stath0)
1798 def add_cluster(self, rmf_name_list):
1800 print(
"creating cluster index "+str(len(self.clusters)))
1801 self.clusters.append(c)
1802 current_len = len(self.stath0)
1804 for rmf
in rmf_name_list:
1805 print(
"adding rmf "+rmf)
1806 self.stath0.add_stat_file(rmf)
1807 self.stath1.add_stat_file(rmf)
1809 for n0
in range(current_len, len(self.stath0)):
1810 d0 = self.stath0[n0]
1811 c.add_member(n0, d0)
1816 Save the clusters into a pickle file
1817 @param filename string
1820 fl = open(filename,
'wb')
1821 pickle.dump(self.clusters, fl)
1825 Load the clusters from a pickle file
1826 @param filename string
1827 @param append bool (Default=False), if True. append the clusters
1828 to the ones currently present
1831 fl = open(filename,
'rb')
1832 self.clean_clusters()
1834 self.clusters += pickle.load(fl)
1836 self.clusters = pickle.load(fl)
1845 Compute the cluster center for a given cluster
1847 member_distance = defaultdict(float)
1849 for n0, n1
in itertools.combinations(cluster.members, 2):
1852 rmsd, _ = self.
rmsd()
1853 member_distance[n0] += rmsd
1855 if len(member_distance) > 0:
1856 cluster.center_index = min(member_distance,
1857 key=member_distance.get)
1859 cluster.center_index = cluster.members[0]
1864 Save the coordinates of the current cluster a single rmf file
1866 print(
"saving coordinates", cluster)
1870 if rmf_name
is None:
1871 rmf_name = prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
1873 _ = self.stath1[cluster.members[0]]
1875 o.init_rmf(rmf_name, [self.stath1])
1876 for n1
in cluster.members:
1882 o.write_rmf(rmf_name)
1884 o.close_rmf(rmf_name)
1888 remove structures that are similar
1889 append it to a new cluster
1891 print(
"pruning models")
1893 filtered = [selected]
1894 remaining = range(1, len(self.stath1), 10)
1896 while len(remaining) > 0:
1897 d0 = self.stath0[selected]
1899 for n1
in remaining:
1904 if d <= rmsd_cutoff:
1906 print(
"pruning model %s, similar to model %s, rmsd %s"
1907 % (str(n1), str(selected), str(d)))
1908 remaining = [x
for x
in remaining
if x
not in rm]
1909 if len(remaining) == 0:
1911 selected = remaining[0]
1912 filtered.append(selected)
1915 self.clusters.append(c)
1917 d0 = self.stath0[n0]
1918 c.add_member(n0, d0)
1923 Compute the precision of a cluster
1929 if cluster.center_index
is not None:
1930 members1 = [cluster.center_index]
1932 members1 = cluster.members
1936 for n1
in cluster.members:
1941 tmp_rmsd, _ = self.
rmsd()
1946 precision = rmsd/npairs
1947 cluster.precision = precision
1952 Compute the bipartite precision (ie the cross-precision)
1953 between two clusters
1957 for cn0, n0
in enumerate(cluster1.members):
1959 for cn1, n1
in enumerate(cluster2.members):
1961 tmp_rmsd, _ = self.
rmsd()
1963 print(
"--- rmsd between structure %s and structure "
1964 "%s is %s" % (str(cn0), str(cn1), str(tmp_rmsd)))
1967 precision = rmsd/npairs
1970 def rmsf(self, cluster, molecule, copy_index=0, state_index=0,
1971 cluster_ref=
None, step=1):
1973 Compute the Root mean square fluctuations
1974 of a molecule in a cluster
1975 Returns an IMP.pmi.tools.OrderedDict() where the keys are the
1976 residue indexes and the value is the rmsf
1978 rmsf = IMP.pmi.tools.OrderedDict()
1981 if cluster_ref
is not None:
1982 if cluster_ref.center_index
is not None:
1983 members0 = [cluster_ref.center_index]
1985 members0 = cluster_ref.members
1987 if cluster.center_index
is not None:
1988 members0 = [cluster.center_index]
1990 members0 = cluster.members
1993 copy_index=copy_index, state_index=state_index)
1994 ps0 = s0.get_selected_particles()
1996 residue_indexes = list(IMP.pmi.tools.OrderedSet(
2002 d0 = self.stath0[n0]
2003 for n1
in cluster.members[::step]:
2005 print(
"--- rmsf %s %s" % (str(n0), str(n1)))
2009 self.stath1, molecule=molecule,
2010 residue_indexes=residue_indexes, resolution=1,
2011 copy_index=copy_index, state_index=state_index)
2012 ps1 = s1.get_selected_particles()
2014 d1 = self.stath1[n1]
2017 for n, (p0, p1)
in enumerate(zip(ps0, ps1)):
2018 r = residue_indexes[n]
2030 for stath
in [self.stath0, self.stath1]:
2031 if molecule
not in self.symmetric_molecules:
2033 stath, molecule=molecule, residue_index=r,
2034 resolution=1, copy_index=copy_index,
2035 state_index=state_index)
2038 stath, molecule=molecule, residue_index=r,
2039 resolution=1, state_index=state_index)
2041 ps = s.get_selected_particles()
2050 def save_densities(self, cluster, density_custom_ranges, voxel_size=5,
2051 reference=
"Absolute", prefix=
"./", step=1):
2057 for n1
in cluster.members[::step]:
2058 print(
"density "+str(n1))
2063 dens.add_subunits_density(self.stath1)
2065 dens.write_mrc(path=prefix+
'/', suffix=str(cluster.cluster_id))
2068 def contact_map(self, cluster, contact_threshold=15, log_scale=False,
2069 consolidate=
False, molecules=
None, prefix=
'./',
2070 reference=
"Absolute"):
2074 import matplotlib.pyplot
as plt
2075 import matplotlib.cm
as cm
2076 from scipy.spatial.distance
import cdist
2078 if molecules
is None:
2087 molecules=molecules).get_selected_particles())]
2088 unique_copies = [mol
for mol
in mols
if mol.get_copy_index() == 0]
2089 mol_names_unique = dict((mol.get_name(), mol)
for mol
in unique_copies)
2090 total_len_unique = sum(max(mol.get_residue_indexes())
2091 for mol
in unique_copies)
2098 seqlen = max(mol.get_residue_indexes())
2099 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2103 for mol
in unique_copies:
2104 seqlen = max(mol.get_residue_indexes())
2105 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2108 for ncl, n1
in enumerate(cluster.members):
2111 coord_dict = IMP.pmi.tools.OrderedDict()
2113 rindexes = mol.get_residue_indexes()
2114 coords = np.ones((max(rindexes), 3))
2115 for rnum
in rindexes:
2118 selpart = sel.get_selected_particles()
2119 if len(selpart) == 0:
2121 selpart = selpart[0]
2122 coords[rnum - 1, :] = \
2124 coord_dict[mol] = coords
2127 coords = np.concatenate(list(coord_dict.values()))
2128 dists = cdist(coords, coords)
2129 binary_dists = np.where((dists <= contact_threshold)
2130 & (dists >= 1.0), 1.0, 0.0)
2132 binary_dists_dict = {}
2134 len1 = max(mol1.get_residue_indexes())
2136 name1 = mol1.get_name()
2137 name2 = mol2.get_name()
2138 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2139 if (name1, name2)
not in binary_dists_dict:
2140 binary_dists_dict[(name1, name2)] = \
2141 np.zeros((len1, len1))
2142 binary_dists_dict[(name1, name2)] += \
2143 np.where((dists <= contact_threshold)
2144 & (dists >= 1.0), 1.0, 0.0)
2145 binary_dists = np.zeros((total_len_unique, total_len_unique))
2147 for name1, name2
in binary_dists_dict:
2148 r1 = index_dict[mol_names_unique[name1]]
2149 r2 = index_dict[mol_names_unique[name2]]
2150 binary_dists[min(r1):max(r1)+1, min(r2):max(r2)+1] = \
2151 np.where((binary_dists_dict[(name1, name2)] >= 1.0),
2157 contact_freqs = binary_dists
2159 dist_maps.append(dists)
2160 av_dist_map += dists
2161 contact_freqs += binary_dists
2164 contact_freqs = -np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2166 contact_freqs = 1.0/len(cluster)*contact_freqs
2167 av_dist_map = 1.0/len(cluster)*contact_freqs
2169 fig = plt.figure(figsize=(100, 100))
2170 ax = fig.add_subplot(111)
2173 gap_between_components = 50
2178 sorted_tuple = sorted(
2180 mol).get_extended_name(), mol)
for mol
in mols)
2181 prot_list = list(zip(*sorted_tuple))[1]
2183 sorted_tuple = sorted(
2185 for mol
in unique_copies)
2186 prot_list = list(zip(*sorted_tuple))[1]
2188 prot_listx = prot_list
2189 nresx = gap_between_components + \
2190 sum([max(mol.get_residue_indexes())
2191 + gap_between_components
for mol
in prot_listx])
2194 prot_listy = prot_list
2195 nresy = gap_between_components + \
2196 sum([max(mol.get_residue_indexes())
2197 + gap_between_components
for mol
in prot_listy])
2202 res = gap_between_components
2203 for mol
in prot_listx:
2204 resoffsetx[mol] = res
2205 res += max(mol.get_residue_indexes())
2207 res += gap_between_components
2211 res = gap_between_components
2212 for mol
in prot_listy:
2213 resoffsety[mol] = res
2214 res += max(mol.get_residue_indexes())
2216 res += gap_between_components
2218 resoffsetdiagonal = {}
2219 res = gap_between_components
2220 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2221 resoffsetdiagonal[mol] = res
2222 res += max(mol.get_residue_indexes())
2223 res += gap_between_components
2228 for n, prot
in enumerate(prot_listx):
2229 res = resoffsetx[prot]
2231 for proty
in prot_listy:
2232 resy = resoffsety[proty]
2233 endy = resendy[proty]
2234 ax.plot([res, res], [resy, endy], linestyle=
'-',
2235 color=
'gray', lw=0.4)
2236 ax.plot([end, end], [resy, endy], linestyle=
'-',
2237 color=
'gray', lw=0.4)
2238 xticks.append((float(res) + float(end)) / 2)
2240 prot).get_extended_name())
2244 for n, prot
in enumerate(prot_listy):
2245 res = resoffsety[prot]
2247 for protx
in prot_listx:
2248 resx = resoffsetx[protx]
2249 endx = resendx[protx]
2250 ax.plot([resx, endx], [res, res], linestyle=
'-',
2251 color=
'gray', lw=0.4)
2252 ax.plot([resx, endx], [end, end], linestyle=
'-',
2253 color=
'gray', lw=0.4)
2254 yticks.append((float(res) + float(end)) / 2)
2256 prot).get_extended_name())
2260 tmp_array = np.zeros((nresx, nresy))
2262 for px
in prot_listx:
2263 for py
in prot_listy:
2264 resx = resoffsetx[px]
2265 lengx = resendx[px] - 1
2266 resy = resoffsety[py]
2267 lengy = resendy[py] - 1
2268 indexes_x = index_dict[px]
2269 minx = min(indexes_x)
2270 maxx = max(indexes_x)
2271 indexes_y = index_dict[py]
2272 miny = min(indexes_y)
2273 maxy = max(indexes_y)
2274 tmp_array[resx:lengx, resy:lengy] = \
2275 contact_freqs[minx:maxx, miny:maxy]
2276 ret[(px, py)] = np.argwhere(
2277 contact_freqs[minx:maxx, miny:maxy] == 1.0) + 1
2279 ax.imshow(tmp_array, cmap=colormap, norm=colornorm,
2280 origin=
'lower', alpha=0.6, interpolation=
'nearest')
2282 ax.set_xticks(xticks)
2283 ax.set_xticklabels(xlabels, rotation=90)
2284 ax.set_yticks(yticks)
2285 ax.set_yticklabels(ylabels)
2286 plt.setp(ax.get_xticklabels(), fontsize=6)
2287 plt.setp(ax.get_yticklabels(), fontsize=6)
2290 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2291 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2293 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf",
2294 dpi=300, transparent=
"False")
2297 def plot_rmsd_matrix(self, filename):
2298 self.compute_all_pairwise_rmsd()
2299 distance_matrix = np.zeros(
2300 (len(self.stath0), len(self.stath1)))
2301 for (n0, n1)
in self.pairwise_rmsd:
2302 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0, n1)]
2304 import matplotlib
as mpl
2306 import matplotlib.pylab
as pl
2307 from scipy.cluster
import hierarchy
as hrc
2309 fig = pl.figure(figsize=(10, 8))
2310 ax = fig.add_subplot(212)
2311 dendrogram = hrc.dendrogram(
2312 hrc.linkage(distance_matrix),
2315 leaves_order = dendrogram[
'leaves']
2316 ax.set_xlabel(
'Model')
2317 ax.set_ylabel(
'RMSD [Angstroms]')
2319 ax2 = fig.add_subplot(221)
2321 distance_matrix[leaves_order, :][:, leaves_order],
2322 interpolation=
'nearest')
2323 cb = fig.colorbar(cax)
2324 cb.set_label(
'RMSD [Angstroms]')
2325 ax2.set_xlabel(
'Model')
2326 ax2.set_ylabel(
'Model')
2328 pl.savefig(filename, dpi=300)
2337 Update the cluster id numbers
2339 for n, c
in enumerate(self.clusters):
2342 def get_molecule(self, hier, name, copy):
2350 self.seldict0 = IMP.pmi.tools.get_selections_dictionary(
2351 self.sel0_rmsd.get_selected_particles())
2352 self.seldict1 = IMP.pmi.tools.get_selections_dictionary(
2353 self.sel1_rmsd.get_selected_particles())
2354 for mol
in self.seldict0:
2355 for sel
in self.seldict0[mol]:
2356 self.issymmetricsel[sel] =
False
2357 for mol
in self.symmetric_molecules:
2358 self.symmetric_molecules[mol] = len(self.seldict0[mol])
2359 for sel
in self.seldict0[mol]:
2360 self.issymmetricsel[sel] =
True
2364 self.sel1_alignment, self.sel0_alignment)
2366 for rb
in self.rbs1:
2369 for bead
in self.beads1:
2377 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2379 initial filling of the clusters.
2382 print(
"clustering model "+str(n0))
2383 d0 = self.stath0[n0]
2385 print(
"creating cluster index "+str(len(self.clusters)))
2386 self.clusters.append(c)
2387 c.add_member(n0, d0)
2388 clustered = set([n0])
2390 print(
"--- trying to add model " + str(n1) +
" to cluster "
2391 + str(len(self.clusters)))
2392 d1 = self.stath1[n1]
2395 rmsd, _ = self.
rmsd(metric=metric)
2396 if rmsd < rmsd_cutoff:
2397 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2398 c.add_member(n1, d1)
2401 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2406 merge the clusters that have close members
2408 @param rmsd_cutoff cutoff distance in Angstorms
2409 @param metric Function to calculate distance between two Selections
2410 (by default, IMP.atom.get_rmsd is used)
2418 for c0, c1
in filter(
lambda x: len(x[0].members) > 1,
2419 itertools.combinations(self.clusters, 2)):
2420 n0, n1 = [c.members[0]
for c
in (c0, c1)]
2423 rmsd, _ = self.
rmsd()
2424 if (rmsd < 2*rmsd_cutoff
and
2426 to_merge.append((c0, c1))
2428 for c0, c
in reversed(to_merge):
2432 self.clusters = [c
for c
in
2433 filter(
lambda x: len(x.members) > 0, self.clusters)]
2437 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2439 print(
"check close members for clusters " + str(c0.cluster_id) +
2440 " and " + str(c1.cluster_id))
2441 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2444 rmsd, _ = self.
rmsd(metric=metric)
2445 if rmsd < rmsd_cutoff:
2460 a function that returns the permutation best_sel of sels0 that
2463 best_rmsd2 = float(
'inf')
2465 if self.issymmetricsel[sels0[0]]:
2468 for offset
in range(N):
2469 sels = [sels0[(offset+i) % N]
for i
in range(N)]
2472 r = metric(sel0, sel1)
2474 if rmsd2 < best_rmsd2:
2478 for sels
in itertools.permutations(sels0):
2480 for sel0, sel1
in itertools.takewhile(
2481 lambda x: rmsd2 < best_rmsd2, zip(sels, sels1)):
2482 r = metric(sel0, sel1)
2484 if rmsd2 < best_rmsd2:
2487 return best_sel, best_rmsd2
2489 def compute_all_pairwise_rmsd(self):
2490 for d0
in self.stath0:
2491 for d1
in self.stath1:
2492 rmsd, _ = self.
rmsd()
2494 def rmsd(self, metric=IMP.atom.get_rmsd):
2496 Computes the RMSD. Resolves ambiguous pairs assignments
2500 n0 = self.stath0.current_index
2501 n1 = self.stath1.current_index
2502 if ((n0, n1)
in self.pairwise_rmsd) \
2503 and ((n0, n1)
in self.pairwise_molecular_assignment):
2504 return (self.pairwise_rmsd[(n0, n1)],
2505 self.pairwise_molecular_assignment[(n0, n1)])
2515 molecular_assignment = {}
2516 for molname, sels0
in self.seldict0.items():
2517 sels_best_order, best_rmsd2 = \
2518 self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2520 Ncoords = len(sels_best_order[0].get_selected_particles())
2521 Ncopies = len(self.seldict1[molname])
2522 total_rmsd += Ncoords*best_rmsd2
2523 total_N += Ncoords*Ncopies
2525 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2526 p0 = sel0.get_selected_particles()[0]
2527 p1 = sel1.get_selected_particles()[0]
2532 molecular_assignment[(molname, c0)] = (molname, c1)
2534 total_rmsd = math.sqrt(total_rmsd/total_N)
2536 self.pairwise_rmsd[(n0, n1)] = total_rmsd
2537 self.pairwise_molecular_assignment[(n0, n1)] = molecular_assignment
2538 self.pairwise_rmsd[(n1, n0)] = total_rmsd
2539 self.pairwise_molecular_assignment[(n1, n0)] = molecular_assignment
2540 return total_rmsd, molecular_assignment
2544 Fix the reference structure for structural alignment, rmsd and
2547 @param reference can be either "Absolute" (cluster center of the
2548 first cluster) or Relative (cluster center of the current
2550 #param cluster the reference IMP.pmi.output.Cluster object
2552 if reference ==
"Absolute":
2554 elif reference ==
"Relative":
2555 if cluster.center_index:
2556 n0 = cluster.center_index
2558 n0 = cluster.members[0]
2563 compute the molecular assignments between multiple copies
2564 of the same sequence. It changes the Copy index of Molecules
2567 _, molecular_assignment = self.
rmsd()
2568 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2569 mol0 = self.molcopydict0[m0][c0]
2570 mol1 = self.molcopydict1[m1][c1]
2573 p1.set_value(cik0, c0)
2577 Undo the Copy index assignment
2580 _, molecular_assignment = self.
rmsd()
2581 for (m0, c0), (m1, c1)
in molecular_assignment.items():
2582 mol0 = self.molcopydict0[m0][c0]
2583 mol1 = self.molcopydict1[m1][c1]
2586 p1.set_value(cik0, c1)
2593 s =
"AnalysisReplicaExchange\n"
2594 s +=
"---- number of clusters %s \n" % str(len(self.clusters))
2595 s +=
"---- number of models %s \n" % str(len(self.stath0))
2598 def __getitem__(self, int_slice_adaptor):
2599 if isinstance(int_slice_adaptor, int):
2600 return self.clusters[int_slice_adaptor]
2601 elif isinstance(int_slice_adaptor, slice):
2602 return self.__iter__(int_slice_adaptor)
2604 raise TypeError(
"Unknown Type")
2607 return len(self.clusters)
2609 def __iter__(self, slice_key=None):
2610 if slice_key
is None:
2611 for i
in range(len(self)):
2614 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A member of a rigid body, it has internal (local) coordinates.
A macro to help setup and run replica exchange.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
def merge_aggregates
merge the clusters that have close members
Represent the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Warning related to handling of structures.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
Warning for an expected, but missing, file.
Tools for clustering and cluster analysis.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
Mapping between FASTA one-letter codes and residue types.
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
Sample using replica exchange.
Warning for probably incorrect input parameters.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.