1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
19 from operator
import itemgetter
20 from collections
import defaultdict
26 class _RMFRestraints(object):
27 """All restraints that are written out to the RMF file"""
28 def __init__(self, model, user_restraints):
30 self._user_restraints = user_restraints
if user_restraints
else []
33 return (len(self._user_restraints)
34 + self._rmf_rs.get_number_of_restraints())
38 __nonzero__ = __bool__
40 def __getitem__(self, i):
41 class FakePMIWrapper(object):
42 def __init__(self, r):
43 self.r = IMP.RestraintSet.get_from(r)
44 get_restraint =
lambda self: self.r
45 lenuser = len(self._user_restraints)
47 return self._user_restraints[i]
48 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
49 r = self._rmf_rs.get_restraint(i - lenuser)
50 return FakePMIWrapper(r)
52 raise IndexError(
"Out of range")
56 """A macro to help setup and run replica exchange.
57 Supports Monte Carlo and molecular dynamics.
58 Produces trajectory RMF files, best PDB structures,
59 and output stat files.
65 monte_carlo_sample_objects=
None,
66 molecular_dynamics_sample_objects=
None,
68 rmf_output_objects=
None,
69 crosslink_restraints=
None,
70 monte_carlo_temperature=1.0,
71 simulated_annealing=
False,
72 simulated_annealing_minimum_temperature=1.0,
73 simulated_annealing_maximum_temperature=2.5,
74 simulated_annealing_minimum_temperature_nframes=100,
75 simulated_annealing_maximum_temperature_nframes=100,
76 replica_exchange_minimum_temperature=1.0,
77 replica_exchange_maximum_temperature=2.5,
78 replica_exchange_swap=
True,
80 number_of_best_scoring_models=500,
83 molecular_dynamics_steps=10,
84 molecular_dynamics_max_time_step=1.0,
85 number_of_frames=1000,
86 save_coordinates_mode=
"lowest_temperature",
87 nframes_write_coordinates=1,
88 write_initial_rmf=
True,
89 initial_rmf_name_suffix=
"initial",
90 stat_file_name_suffix=
"stat",
91 best_pdb_name_suffix=
"model",
93 do_create_directories=
True,
94 global_output_directory=
"./",
97 replica_stat_file_suffix=
"stat_replica",
98 em_object_for_rmf=
None,
100 replica_exchange_object=
None,
103 @param model The IMP model
104 @param representation PMI.representation.Representation object
105 (or list of them, for multi-state modeling)
106 @param root_hier Instead of passing Representation, pass the System hierarchy
107 @param monte_carlo_sample_objects Objects for MC sampling; a list of
108 structural components (generally the representation) that will
109 be moved and restraints with parameters that need to
111 For PMI2: just pass flat list of movers
112 @param molecular_dynamics_sample_objects Objects for MD sampling
113 For PMI2: just pass flat list of particles
114 @param output_objects A list of structural objects and restraints
115 that will be included in output (ie, statistics "stat" files). Any object
116 that provides a get_output() method can be used here. If None is passed
117 the macro will not write stat files.
118 @param rmf_output_objects A list of structural objects and restraints
119 that will be included in rmf. Any object
120 that provides a get_output() method can be used here.
121 @param crosslink_restraints List of cross-link restraints that will
122 be included in output RMF files (for visualization).
123 @param monte_carlo_temperature MC temp (may need to be optimized
124 based on post-sampling analysis)
125 @param simulated_annealing If True, perform simulated annealing
126 @param simulated_annealing_minimum_temperature Should generally be
127 the same as monte_carlo_temperature.
128 @param simulated_annealing_minimum_temperature_nframes Number of
129 frames to compute at minimum temperature.
130 @param simulated_annealing_maximum_temperature_nframes Number of
132 temps > simulated_annealing_maximum_temperature.
133 @param replica_exchange_minimum_temperature Low temp for REX; should
134 generally be the same as monte_carlo_temperature.
135 @param replica_exchange_maximum_temperature High temp for REX
136 @param replica_exchange_swap Boolean, enable disable temperature
138 @param num_sample_rounds Number of rounds of MC/MD per cycle
139 @param number_of_best_scoring_models Number of top-scoring PDB models
140 to keep around for analysis
141 @param monte_carlo_steps Number of MC steps per round
142 @param self_adaptive self adaptive scheme for monte carlo movers
143 @param molecular_dynamics_steps Number of MD steps per round
144 @param molecular_dynamics_max_time_step Max time step for MD
145 @param number_of_frames Number of REX frames to run
146 @param save_coordinates_mode string: how to save coordinates.
147 "lowest_temperature" (default) only the lowest temperatures is saved
148 "25th_score" all replicas whose score is below the 25th percentile
149 "50th_score" all replicas whose score is below the 50th percentile
150 "75th_score" all replicas whose score is below the 75th percentile
151 @param nframes_write_coordinates How often to write the coordinates
153 @param write_initial_rmf Write the initial configuration
154 @param global_output_directory Folder that will be created to house
156 @param test_mode Set to True to avoid writing any files, just test one frame.
163 self.output_objects = output_objects
164 self.rmf_output_objects=rmf_output_objects
165 self.representation = representation
167 if type(representation) == list:
168 self.is_multi_state =
True
169 self.root_hiers = [r.prot
for r
in representation]
170 self.vars[
"number_of_states"] = len(representation)
172 self.is_multi_state =
False
173 self.root_hier = representation.prot
174 self.vars[
"number_of_states"] = 1
175 elif root_hier
and type(root_hier) ==
IMP.atom.Hierarchy and root_hier.get_name()==
'System':
177 if self.output_objects
is not None:
178 self.output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
179 if self.rmf_output_objects
is not None:
180 self.rmf_output_objects.append(IMP.pmi.io.TotalScoreOutput(self.model))
181 self.root_hier = root_hier
182 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
183 self.vars[
"number_of_states"] = len(states)
185 self.root_hiers = states
186 self.is_multi_state =
True
188 self.root_hier = root_hier
189 self.is_multi_state =
False
191 raise Exception(
"Must provide representation or System hierarchy (root_hier)")
193 self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
194 self.em_object_for_rmf = em_object_for_rmf
195 self.monte_carlo_sample_objects = monte_carlo_sample_objects
196 self.vars[
"self_adaptive"]=self_adaptive
197 if sample_objects
is not None:
198 self.monte_carlo_sample_objects+=sample_objects
199 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
200 self.replica_exchange_object = replica_exchange_object
201 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
202 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
204 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
206 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
207 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
208 self.vars[
"simulated_annealing"]=\
210 self.vars[
"simulated_annealing_minimum_temperature"]=\
211 simulated_annealing_minimum_temperature
212 self.vars[
"simulated_annealing_maximum_temperature"]=\
213 simulated_annealing_maximum_temperature
214 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
215 simulated_annealing_minimum_temperature_nframes
216 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
217 simulated_annealing_maximum_temperature_nframes
219 self.vars[
"num_sample_rounds"] = num_sample_rounds
221 "number_of_best_scoring_models"] = number_of_best_scoring_models
222 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
223 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
224 self.vars[
"number_of_frames"] = number_of_frames
225 if not save_coordinates_mode
in [
"lowest_temperature",
"25th_score",
"50th_score",
"75th_score"]:
226 raise Exception(
"save_coordinates_mode has unrecognized value")
228 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
229 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
230 self.vars[
"write_initial_rmf"] = write_initial_rmf
231 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
232 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
233 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
234 self.vars[
"do_clean_first"] = do_clean_first
235 self.vars[
"do_create_directories"] = do_create_directories
236 self.vars[
"global_output_directory"] = global_output_directory
237 self.vars[
"rmf_dir"] = rmf_dir
238 self.vars[
"best_pdb_dir"] = best_pdb_dir
239 self.vars[
"atomistic"] = atomistic
240 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
241 self.vars[
"geometries"] =
None
242 self.test_mode = test_mode
245 if self.vars[
"geometries"]
is None:
246 self.vars[
"geometries"] = list(geometries)
248 self.vars[
"geometries"].extend(geometries)
251 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
252 print(
"--- it stores the best scoring pdb models in pdbs/")
253 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
254 print(
"--- variables:")
255 keys = list(self.vars.keys())
258 print(
"------", v.ljust(30), self.vars[v])
260 def get_replica_exchange_object(self):
261 return self.replica_exchange_object
263 def _add_provenance(self, sampler_md, sampler_mc):
264 """Record details about the sampling in the IMP Hierarchies"""
265 if not self.is_multi_state
or self.pmi2:
266 output_hierarchies = [self.root_hier]
268 output_hierarchies = self.root_hiers
272 method =
"Molecular Dynamics"
273 iterations += self.vars[
"molecular_dynamics_steps"]
275 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
276 iterations += self.vars[
"monte_carlo_steps"]
278 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
280 iterations *= self.vars[
"num_sample_rounds"]
282 for h
in output_hierarchies:
283 pi = self.model.add_particle(
"sampling")
285 self.model, pi, method, self.vars[
"number_of_frames"],
287 p.set_number_of_replicas(
288 self.replica_exchange_object.get_number_of_replicas())
289 IMP.pmi.tools._add_pmi_provenance(h)
292 def execute_macro(self):
293 temp_index_factor = 100000.0
297 if self.monte_carlo_sample_objects
is not None:
298 print(
"Setting up MonteCarlo")
300 self.monte_carlo_sample_objects,
301 self.vars[
"monte_carlo_temperature"])
302 if self.vars[
"simulated_annealing"]:
303 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
304 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
305 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
306 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
307 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
308 if self.vars[
"self_adaptive"]:
309 sampler_mc.set_self_adaptive(isselfadaptive=self.vars[
"self_adaptive"])
310 if self.output_objects
is not None:
311 self.output_objects.append(sampler_mc)
312 if self.rmf_output_objects
is not None:
313 self.rmf_output_objects.append(sampler_mc)
314 samplers.append(sampler_mc)
317 if self.molecular_dynamics_sample_objects
is not None:
318 print(
"Setting up MolecularDynamics")
320 self.molecular_dynamics_sample_objects,
321 self.vars[
"monte_carlo_temperature"],
322 maximum_time_step=self.molecular_dynamics_max_time_step)
323 if self.vars[
"simulated_annealing"]:
324 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
325 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
326 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
327 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
328 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
329 if self.output_objects
is not None:
330 self.output_objects.append(sampler_md)
331 if self.rmf_output_objects
is not None:
332 self.rmf_output_objects.append(sampler_md)
333 samplers.append(sampler_md)
336 print(
"Setting up ReplicaExchange")
339 "replica_exchange_minimum_temperature"],
341 "replica_exchange_maximum_temperature"],
343 replica_exchange_object=self.replica_exchange_object)
344 self.replica_exchange_object = rex.rem
346 myindex = rex.get_my_index()
347 if self.output_objects
is not None:
348 self.output_objects.append(rex)
349 if self.rmf_output_objects
is not None:
350 self.rmf_output_objects.append(rex)
354 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
358 globaldir = self.vars[
"global_output_directory"] +
"/"
359 rmf_dir = globaldir + self.vars[
"rmf_dir"]
360 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
362 if not self.test_mode:
363 if self.vars[
"do_clean_first"]:
366 if self.vars[
"do_create_directories"]:
369 os.makedirs(globaldir)
377 if not self.is_multi_state:
383 for n
in range(self.vars[
"number_of_states"]):
385 os.makedirs(pdb_dir +
"/" + str(n))
392 if self.output_objects
is not None:
393 self.output_objects.append(sw)
394 if self.rmf_output_objects
is not None:
395 self.rmf_output_objects.append(sw)
397 print(
"Setting up stat file")
399 low_temp_stat_file = globaldir + \
400 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
403 if not self.test_mode:
406 if not self.test_mode:
407 if self.output_objects
is not None:
408 output.init_stat2(low_temp_stat_file,
410 extralabels=[
"rmf_file",
"rmf_frame_index"])
412 print(
"Stat file writing is disabled")
414 if self.rmf_output_objects
is not None:
415 print(
"Stat info being written in the rmf file")
417 print(
"Setting up replica stat file")
418 replica_stat_file = globaldir + \
419 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
420 if not self.test_mode:
421 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
423 if not self.test_mode:
424 print(
"Setting up best pdb files")
425 if not self.is_multi_state:
426 if self.vars[
"number_of_best_scoring_models"] > 0:
427 output.init_pdb_best_scoring(pdb_dir +
"/" +
428 self.vars[
"best_pdb_name_suffix"],
431 "number_of_best_scoring_models"],
432 replica_exchange=
True)
433 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
434 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
436 if self.vars[
"number_of_best_scoring_models"] > 0:
437 for n
in range(self.vars[
"number_of_states"]):
438 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
439 self.vars[
"best_pdb_name_suffix"],
442 "number_of_best_scoring_models"],
443 replica_exchange=
True)
444 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
445 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
448 if self.em_object_for_rmf
is not None:
449 if not self.is_multi_state
or self.pmi2:
450 output_hierarchies = [
452 self.em_object_for_rmf.get_density_as_hierarchy(
455 output_hierarchies = self.root_hiers
456 output_hierarchies.append(
457 self.em_object_for_rmf.get_density_as_hierarchy())
459 if not self.is_multi_state
or self.pmi2:
460 output_hierarchies = [self.root_hier]
462 output_hierarchies = self.root_hiers
465 if not self.test_mode:
466 print(
"Setting up and writing initial rmf coordinate file")
467 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
468 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
469 output_hierarchies,listofobjects=self.rmf_output_objects)
470 if self._rmf_restraints:
471 output.add_restraints_to_rmf(
472 init_suffix +
"." + str(myindex) +
".rmf3",
473 self._rmf_restraints)
474 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
475 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
479 if not self.test_mode:
480 mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
484 self._add_provenance(sampler_md, sampler_mc)
486 if not self.test_mode:
487 print(
"Setting up production rmf files")
488 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
489 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"],
490 listofobjects = self.rmf_output_objects)
492 if self._rmf_restraints:
493 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
495 ntimes_at_low_temp = 0
499 self.replica_exchange_object.set_was_used(
True)
500 nframes = self.vars[
"number_of_frames"]
503 for i
in range(nframes):
507 for nr
in range(self.vars[
"num_sample_rounds"]):
508 if sampler_md
is not None:
510 self.vars[
"molecular_dynamics_steps"])
511 if sampler_mc
is not None:
512 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
514 self.model).evaluate(
False)
515 mpivs.set_value(
"score",score)
516 output.set_output_entry(
"score", score)
520 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
522 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
523 save_frame=(min_temp_index == my_temp_index)
524 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
525 score_perc=mpivs.get_percentile(
"score")
526 save_frame=(score_perc*100.0<=25.0)
527 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
528 score_perc=mpivs.get_percentile(
"score")
529 save_frame=(score_perc*100.0<=50.0)
530 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
531 score_perc=mpivs.get_percentile(
"score")
532 save_frame=(score_perc*100.0<=75.0)
535 if save_frame
or not self.test_mode:
539 print(
"--- frame %s score %s " % (str(i), str(score)))
541 if not self.test_mode:
542 if i % self.vars[
"nframes_write_coordinates"]==0:
543 print(
'--- writing coordinates')
544 if self.vars[
"number_of_best_scoring_models"] > 0:
545 output.write_pdb_best_scoring(score)
546 output.write_rmf(rmfname)
547 output.set_output_entry(
"rmf_file", rmfname)
548 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
550 output.set_output_entry(
"rmf_file", rmfname)
551 output.set_output_entry(
"rmf_frame_index",
'-1')
552 if self.output_objects
is not None:
553 output.write_stat2(low_temp_stat_file)
554 ntimes_at_low_temp += 1
556 if not self.test_mode:
557 output.write_stat2(replica_stat_file)
558 if self.vars[
"replica_exchange_swap"]:
559 rex.swap_temp(i, score)
560 for p, state
in IMP.pmi.tools._all_protocol_outputs(
561 [self.representation],
562 self.root_hier
if self.pmi2
else None):
563 p.add_replica_exchange(state, self)
565 if not self.test_mode:
566 print(
"closing production rmf files")
567 output.close_rmf(rmfname)
573 """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
574 Easily create multi-state systems by calling this macro
575 repeatedly with different TopologyReader objects!
576 A useful function is get_molecules() which returns the PMI Molecules grouped by state
577 as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
578 Quick multi-state system:
581 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
582 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
583 bs = IMP.pmi.macros.BuildSystem(model)
584 bs.add_state(reader1)
585 bs.add_state(reader2)
586 bs.execute_macro() # build everything including degrees of freedom
587 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
588 ### now you have a two state system, you add restraints etc
590 @note The "domain name" entry of the topology reader is not used.
591 All molecules are set up by the component name, but split into rigid bodies
596 sequence_connectivity_scale=4.0,
597 force_create_gmm_files=
False,
600 @param model An IMP Model
601 @param sequence_connectivity_scale For scaling the connectivity restraint
602 @param force_create_gmm_files If True, will sample and create GMMs
603 no matter what. If False, will only sample if the
604 files don't exist. If number of Gaussians is zero, won't
606 @param resolutions The resolutions to build for structured regions
611 self._domain_res = []
613 self.force_create_gmm_files = force_create_gmm_files
614 self.resolutions = resolutions
623 keep_chain_id=
False, fasta_name_map=
None):
624 """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
625 When you are done adding states, call execute_macro()
626 @param reader The TopologyReader object
627 @param fasta_name_map dictionary for converting protein names found in the fasta file
629 state = self.system.create_state()
630 self._readers.append(reader)
631 these_domain_res = {}
633 chain_ids = string.ascii_uppercase+string.ascii_lowercase+
'0123456789'
638 for molname
in reader.get_molecules():
639 copies = reader.get_molecules()[molname].domains
640 for nc,copyname
in enumerate(copies):
641 print(
"BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
642 copy = copies[copyname]
644 if keep_chain_id ==
True:
645 chain_id = copy[0].chain
647 chain_id = chain_ids[numchain]
650 fasta_flag=copy[0].fasta_flag
651 if fasta_flag ==
"RNA" or fasta_flag ==
"DNA": is_nucleic=
True
653 print(
"BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
654 orig_mol = state.create_molecule(molname,
660 print(
"BuildSystem.add_state: creating a copy for molecule %s" % molname)
661 mol = orig_mol.create_copy(chain_id)
664 for domainnumber,domain
in enumerate(copy):
665 print(
"BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
668 these_domains[domain.get_unique_name()] = domain
669 if domain.residue_range==[]
or domain.residue_range
is None:
670 domain_res = mol.get_residues()
672 start = domain.residue_range[0]+domain.pdb_offset
673 if domain.residue_range[1]==
'END':
674 end = len(mol.sequence)
676 end = domain.residue_range[1]+domain.pdb_offset
677 domain_res = mol.residue_range(start-1,end-1)
678 print(
"BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
679 if domain.pdb_file==
"BEADS":
680 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
681 mol.add_representation(
683 resolutions=[domain.bead_size],
684 setup_particles_as_densities=(
685 domain.em_residues_per_gaussian!=0),
686 color = domain.color)
687 these_domain_res[domain.get_unique_name()] = (set(),domain_res)
688 elif domain.pdb_file==
"IDEAL_HELIX":
689 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
690 mol.add_representation(
692 resolutions=self.resolutions,
694 density_residues_per_component=domain.em_residues_per_gaussian,
695 density_prefix=domain.density_prefix,
696 density_force_compute=self.force_create_gmm_files,
697 color = domain.color)
698 these_domain_res[domain.get_unique_name()] = (domain_res,set())
700 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
701 domain_atomic = mol.add_structure(domain.pdb_file,
703 domain.residue_range,
706 domain_non_atomic = domain_res - domain_atomic
707 if not domain.em_residues_per_gaussian:
708 mol.add_representation(domain_atomic,
709 resolutions=self.resolutions,
710 color = domain.color)
711 if len(domain_non_atomic)>0:
712 mol.add_representation(domain_non_atomic,
713 resolutions=[domain.bead_size],
714 color = domain.color)
716 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
717 mol.add_representation(
719 resolutions=self.resolutions,
720 density_residues_per_component=domain.em_residues_per_gaussian,
721 density_prefix=domain.density_prefix,
722 density_force_compute=self.force_create_gmm_files,
723 color = domain.color)
724 if len(domain_non_atomic)>0:
725 mol.add_representation(domain_non_atomic,
726 resolutions=[domain.bead_size],
727 setup_particles_as_densities=
True,
728 color = domain.color)
729 these_domain_res[domain.get_unique_name()] = (
730 domain_atomic,domain_non_atomic)
731 self._domain_res.append(these_domain_res)
732 self._domains.append(these_domains)
733 print(
'BuildSystem.add_state: State',len(self.system.states),
'added')
736 """Return list of all molecules grouped by state.
737 For each state, it's a dictionary of Molecules where key is the molecule name
739 return [s.get_molecules()
for s
in self.system.get_states()]
741 def get_molecule(self,molname,copy_index=0,state_index=0):
742 return self.system.get_states()[state_index].
get_molecules()[molname][copy_index]
744 def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
745 """Builds representations and sets up degrees of freedom"""
746 print(
"BuildSystem.execute_macro: building representations")
747 self.root_hier = self.system.build()
749 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
751 for nstate,reader
in enumerate(self._readers):
752 rbs = reader.get_rigid_bodies()
753 srbs = reader.get_super_rigid_bodies()
754 csrbs = reader.get_chains_of_super_rigid_bodies()
757 domains_in_rbs = set()
759 print(
"BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
760 all_res = IMP.pmi.tools.OrderedSet()
761 bead_res = IMP.pmi.tools.OrderedSet()
763 domain = self._domains[nstate][dname]
764 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
765 all_res|=self._domain_res[nstate][dname][0]
766 bead_res|=self._domain_res[nstate][dname][1]
767 domains_in_rbs.add(dname)
769 print(
"BuildSystem.execute_macro: -------- \
770 creating rigid body with max_trans %s \
771 max_rot %s non_rigid_max_trans %s" \
772 % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
773 self.dof.create_rigid_body(all_res,
774 nonrigid_parts=bead_res,
775 max_trans=max_rb_trans,
777 nonrigid_max_trans=max_bead_trans)
780 for dname
in self._domains[nstate]:
781 domain = self._domains[nstate][dname]
782 if domain.pdb_file==
"BEADS" and dname
not in domains_in_rbs:
783 self.dof.create_flexible_beads(
784 self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
788 print(
"BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
789 all_res = IMP.pmi.tools.OrderedSet()
790 for dname
in srblist:
791 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
792 all_res|=self._domain_res[nstate][dname][0]
793 all_res|=self._domain_res[nstate][dname][1]
795 print(
"BuildSystem.execute_macro: -------- \
796 creating super rigid body with max_trans %s max_rot %s " \
797 % (str(max_srb_trans),str(max_srb_rot)))
798 self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
801 for csrblist
in csrbs:
802 all_res = IMP.pmi.tools.OrderedSet()
803 for dname
in csrblist:
804 all_res|=self._domain_res[nstate][dname][0]
805 all_res|=self._domain_res[nstate][dname][1]
806 all_res = list(all_res)
807 all_res.sort(key=
lambda r:r.get_index())
809 self.dof.create_main_chain_mover(all_res)
810 return self.root_hier,self.dof
813 "use IMP.pmi.macros.BuildSystem instead")
815 """A macro to build a Representation based on a Topology and lists of movers
816 DEPRECATED - Use BuildSystem instead.
820 component_topologies,
821 list_of_rigid_bodies=[],
822 list_of_super_rigid_bodies=[],
823 chain_of_super_rigid_bodies=[],
824 sequence_connectivity_scale=4.0,
825 add_each_domain_as_rigid_body=
False,
826 force_create_gmm_files=
False):
828 @param model The IMP model
829 @param component_topologies List of
830 IMP.pmi.topology.ComponentTopology items
831 @param list_of_rigid_bodies List of lists of domain names that will
832 be moved as rigid bodies.
833 @param list_of_super_rigid_bodies List of lists of domain names
834 that will move together in an additional Monte Carlo move.
835 @param chain_of_super_rigid_bodies List of lists of domain names
836 (choices can only be from the same molecule). Each of these
837 groups will be moved rigidly. This helps to sample more
838 efficiently complex topologies, made of several rigid bodies,
839 connected by flexible linkers.
840 @param sequence_connectivity_scale For scaling the connectivity
842 @param add_each_domain_as_rigid_body That way you don't have to
843 put all of them in the list
844 @param force_create_gmm_files If True, will sample and create GMMs
845 no matter what. If False, will only sample if the
846 files don't exist. If number of Gaussians is zero, won't
852 disorderedlength=
False)
854 data=component_topologies
855 if list_of_rigid_bodies==[]:
856 print(
"WARNING: No list of rigid bodies inputted to build_model()")
857 if list_of_super_rigid_bodies==[]:
858 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
859 if chain_of_super_rigid_bodies==[]:
860 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
861 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
862 +chain_of_super_rigid_bodies
for d
in sublist])
863 all_available = set([c._domain_name
for c
in component_topologies])
864 if not all_dnames <= all_available:
865 raise ValueError(
"All requested movers must reference domain "
866 "names in the component topologies")
870 super_rigid_bodies={}
871 chain_super_rigid_bodies={}
875 comp_name = c.molname
876 hier_name = c._domain_name
878 fasta_file = c.fasta_file
879 fasta_id = c.fasta_id
880 pdb_name = c.pdb_file
882 res_range = c.residue_range
883 offset = c.pdb_offset
884 bead_size = c.bead_size
885 em_num_components = c.em_residues_per_gaussian
886 em_txt_file_name = c.gmm_file
887 em_mrc_file_name = c.mrc_file
889 if comp_name
not in self.simo.get_component_names():
890 self.simo.create_component(comp_name,color=0.0)
891 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
894 if em_num_components==0:
898 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
905 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
906 res_range,include_res0,beadsize=bead_size,
907 color=color,offset=offset)
908 if em_num_components!=0:
910 print(
"will read GMM files")
912 print(
"will calculate GMMs")
914 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
915 em_mrc_file_name,em_num_components,read_em_files)
916 self.simo.add_all_atom_densities(comp_name, particles=beads)
921 self.resdensities[hier_name]=dens_hier
922 self.domain_dict[hier_name]=outhier+dens_hier
925 for c
in self.simo.get_component_names():
926 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
927 self.simo.setup_component_geometry(c)
930 for rblist
in list_of_rigid_bodies:
932 for rbmember
in rblist:
933 rb+=[h
for h
in self.domain_dict[rbmember]]
934 self.simo.set_rigid_body_from_hierarchies(rb)
935 for srblist
in list_of_super_rigid_bodies:
937 for srbmember
in rblist:
938 srb+=[h
for h
in self.domain_dict[srbmember]]
939 self.simo.set_super_rigid_body_from_hierarchies(srb)
940 for clist
in chain_of_super_rigid_bodies:
942 for crbmember
in rblist:
943 crb+=[h
for h
in self.domain_dict[crbmember]]
944 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
946 self.simo.set_floppy_bodies()
947 self.simo.setup_bonds()
955 '''Return the Representation object'''
958 def get_density_hierarchies(self,hier_name_list):
962 for hn
in hier_name_list:
964 dens_hier_list+=self.resdensities[hn]
965 return dens_hier_list
967 def set_gmm_models_directory(self,directory_name):
968 self.gmm_models_directory=directory_name
970 def get_pdb_bead_bits(self,hierarchy):
975 if "_pdb" in h.get_name():pdbbits.append(h)
976 if "_bead" in h.get_name():beadbits.append(h)
977 if "_helix" in h.get_name():helixbits.append(h)
978 return (pdbbits,beadbits,helixbits)
980 def scale_bead_radii(self,nresidues,scale):
982 for h
in self.domain_dict:
983 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
984 slope=(1.0-scale)/(1.0-float(nresidues))
989 if b
not in scaled_beads:
995 scale_factor=slope*float(num_residues)+1.0
997 new_radius=scale_factor*radius
1000 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1003 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1005 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1008 for pb
in pdbbits+helixbits:
1012 number_of_residues=len(set(res_ind))
1016 outhier+=simo.add_component_density(compname,
1018 num_components=num_components,
1020 inputfile=txtfilename)
1021 if len(helixbits)!=0:
1022 outhier+=simo.add_component_density(compname,
1024 num_components=num_components,
1026 inputfile=txtfilename)
1031 num_components=number_of_residues//abs(num_components)+1
1032 outhier+=simo.add_component_density(compname,
1034 num_components=num_components,
1036 outputfile=txtfilename,
1037 outputmap=mrcfilename,
1038 multiply_by_total_mass=
True)
1040 if len(helixbits)!=0:
1041 num_components=number_of_residues//abs(num_components)+1
1042 outhier+=simo.add_component_density(compname,
1044 num_components=num_components,
1046 outputfile=txtfilename,
1047 outputmap=mrcfilename,
1048 multiply_by_total_mass=
True)
1050 return outhier,beadbits
1052 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
1053 beadsize=5,color=0.0,offset=0):
1054 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
1056 outhier=simo.autobuild_model(comname,
1060 resolutions=[0,1,10],
1063 missingbeadsize=beadsize)
1065 outhier=simo.autobuild_model(comname,
1072 missingbeadsize=beadsize)
1075 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
1076 outhier=simo.add_component_ideal_helix(comname,
1082 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
1083 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
1087 seq_len=len(simo.sequence_dict[comname])
1088 outhier=simo.add_component_necklace(comname,
1098 """Deprecated building macro - use BuildSystem()"""
1102 @param representation The PMI representation
1104 self.simo=representation
1105 self.gmm_models_directory=
"."
1107 self.rmf_frame_number={}
1108 self.rmf_names_map={}
1109 self.rmf_component_name={}
1111 def set_gmm_models_directory(self,directory_name):
1112 self.gmm_models_directory=directory_name
1115 sequence_connectivity_resolution=10,
1116 rmf_file=
None,rmf_frame_number=0,rmf_file_map=
None,
1117 skip_connectivity_these_domains=
None,
1118 skip_gaussian_in_rmf=
False, skip_gaussian_in_representation=
False):
1120 @param data_structure List of lists containing these entries:
1121 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
1122 res_range, read_em_files, bead_size, rb, super_rb,
1123 em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
1124 keep_gaussian_flexible_beads (optional)
1125 @param sequence_connectivity_scale
1127 @param rmf_frame_number
1128 @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
1133 self.resdensities={}
1134 super_rigid_bodies={}
1135 chain_super_rigid_bodies={}
1138 for d
in data_structure:
1146 res_range = d[7][0:2]
1151 read_em_files = d[8]
1155 em_num_components = d[12]
1156 em_txt_file_name = d[13]
1157 em_mrc_file_name = d[14]
1158 chain_of_super_rb = d[15]
1160 keep_gaussian_flexible_beads = d[16]
1162 keep_gaussian_flexible_beads =
True
1164 if comp_name
not in self.simo.get_component_names():
1165 self.simo.create_component(comp_name,color=0.0)
1166 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1167 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1170 if not read_em_files
is None:
1171 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
1172 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
1175 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1177 if (keep_gaussian_flexible_beads):
1178 self.simo.add_all_atom_densities(comp_name, particles=beads)
1183 self.resdensities[hier_name]=dens_hier
1184 self.domain_dict[hier_name]=outhier+dens_hier
1187 if rb
not in rigid_bodies:
1188 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
1190 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
1193 if super_rb
is not None:
1195 if k
not in super_rigid_bodies:
1196 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1198 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1200 if chain_of_super_rb
is not None:
1201 for k
in chain_of_super_rb:
1202 if k
not in chain_super_rigid_bodies:
1203 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1205 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1209 self.rigid_bodies=rigid_bodies
1211 for c
in self.simo.get_component_names():
1212 if rmf_file
is not None:
1214 rfn=rmf_frame_number
1215 self.simo.set_coordinates_from_rmf(c, rf,rfn,
1216 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1218 for k
in rmf_file_map:
1220 rf=rmf_file_map[k][0]
1221 rfn=rmf_file_map[k][1]
1222 rcname=rmf_file_map[k][2]
1223 self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1224 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1226 if c
in self.rmf_file:
1228 rfn=self.rmf_frame_number[c]
1229 rfm=self.rmf_names_map[c]
1230 rcname=self.rmf_component_name[c]
1231 self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1232 rmf_component_name=rcname,
1233 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1234 if (
not skip_connectivity_these_domains)
or (c
not in skip_connectivity_these_domains):
1235 self.simo.setup_component_sequence_connectivity(c,
1236 resolution=sequence_connectivity_resolution,
1237 scale=sequence_connectivity_scale)
1238 self.simo.setup_component_geometry(c)
1240 for rb
in rigid_bodies:
1241 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1243 for k
in super_rigid_bodies:
1244 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1246 for k
in chain_super_rigid_bodies:
1247 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1249 self.simo.set_floppy_bodies()
1250 self.simo.setup_bonds()
1252 def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1253 hiers=self.domain_dict[hier_name]
1254 for length
in lengths:
1255 for n
in range(len(hiers)-length):
1256 hs=hiers[n+1:n+length-1]
1257 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1258 for n
in range(1,len(hiers)-1,5):
1260 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1262 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1265 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1266 self.rmf_file[component_name]=rmf_file
1267 self.rmf_frame_number[component_name]=rmf_frame_number
1268 self.rmf_names_map[component_name]=rmf_names_map
1269 self.rmf_component_name[component_name]=rmf_component_name
1271 def get_density_hierarchies(self,hier_name_list):
1275 for hn
in hier_name_list:
1277 dens_hier_list+=self.resdensities[hn]
1278 return dens_hier_list
1280 def get_pdb_bead_bits(self,hierarchy):
1285 if "_pdb" in h.get_name():pdbbits.append(h)
1286 if "_bead" in h.get_name():beadbits.append(h)
1287 if "_helix" in h.get_name():helixbits.append(h)
1288 return (pdbbits,beadbits,helixbits)
1290 def scale_bead_radii(self,nresidues,scale):
1292 for h
in self.domain_dict:
1293 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1294 slope=(1.0-scale)/(1.0-float(nresidues))
1299 if b
not in scaled_beads:
1305 scale_factor=slope*float(num_residues)+1.0
1307 new_radius=scale_factor*radius
1310 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1313 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1315 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1319 for pb
in pdbbits+helixbits:
1323 number_of_residues=len(set(res_ind))
1327 outhier+=simo.add_component_density(compname,
1329 num_components=num_components,
1331 inputfile=txtfilename)
1332 if len(helixbits)!=0:
1333 outhier+=simo.add_component_density(compname,
1335 num_components=num_components,
1337 inputfile=txtfilename)
1342 if num_components<0:
1345 num_components=number_of_residues/abs(num_components)
1346 outhier+=simo.add_component_density(compname,
1348 num_components=num_components,
1350 outputfile=txtfilename,
1351 outputmap=mrcfilename,
1352 multiply_by_total_mass=
True)
1354 if len(helixbits)!=0:
1355 if num_components<0:
1358 num_components=number_of_residues/abs(num_components)
1359 outhier+=simo.add_component_density(compname,
1361 num_components=num_components,
1363 outputfile=txtfilename,
1364 outputmap=mrcfilename,
1365 multiply_by_total_mass=
True)
1367 return outhier,beadbits
1369 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1371 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1372 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1374 outhier=simo.autobuild_model(comname,
1378 resolutions=[0,1,10],
1381 missingbeadsize=beadsize)
1383 outhier=simo.autobuild_model(comname,
1390 missingbeadsize=beadsize)
1392 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1393 outhier=simo.add_component_ideal_helix(comname,
1399 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" and pdbname
is not "DENSITY" :
1402 seq_len=len(simo.sequence_dict[comname])
1403 outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1405 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is "DENSITY" :
1410 seq_len=len(simo.sequence_dict[comname])
1411 outhier=simo.add_component_necklace(comname,
1418 def set_coordinates(self,hier_name,xyz_tuple):
1419 hier=self.domain_dict[hier_name]
1427 def save_rmf(self,rmfname):
1430 self.simo.model.update()
1431 o.init_rmf(rmfname,[self.simo.prot])
1432 o.write_rmf(rmfname)
1433 o.close_rmf(rmfname)
1439 """A macro for running all the basic operations of analysis.
1440 Includes clustering, precision analysis, and making ensemble density maps.
1441 A number of plots are also supported.
1444 merge_directories=[
"./"],
1445 stat_file_name_suffix=
"stat",
1446 best_pdb_name_suffix=
"model",
1447 do_clean_first=
True,
1448 do_create_directories=
True,
1449 global_output_directory=
"output/",
1450 replica_stat_file_suffix=
"stat_replica",
1451 global_analysis_result_directory=
"./analysis/",
1454 @param model The IMP model
1455 @param stat_file_name_suffix
1456 @param merge_directories The directories containing output files
1457 @param best_pdb_name_suffix
1458 @param do_clean_first
1459 @param do_create_directories
1460 @param global_output_directory Where everything is
1461 @param replica_stat_file_suffix
1462 @param global_analysis_result_directory
1463 @param test_mode If True, nothing is changed on disk
1467 from mpi4py
import MPI
1468 self.comm = MPI.COMM_WORLD
1469 self.rank = self.comm.Get_rank()
1470 self.number_of_processes = self.comm.size
1473 self.number_of_processes = 1
1475 self.test_mode = test_mode
1476 self._protocol_output = []
1477 self.cluster_obj =
None
1479 stat_dir = global_output_directory
1480 self.stat_files = []
1482 for rd
in merge_directories:
1483 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
1484 if len(stat_files)==0:
1485 print(
"WARNING: no stat files found in",os.path.join(rd,stat_dir))
1486 self.stat_files += stat_files
1489 """Capture details of the modeling protocol.
1490 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1493 self._protocol_output.append((p, p._last_state))
1496 score_key=
"SimplifiedModel_Total_Score_None",
1497 rmf_file_key=
"rmf_file",
1498 rmf_file_frame_key=
"rmf_frame_index",
1501 nframes_trajectory=10000):
1502 """ Get a trajectory of the modeling run, for generating demonstrative movies
1503 @param score_key The score for ranking models
1504 @param rmf_file_key Key pointing to RMF filename
1505 @param rmf_file_frame_key Key pointing to RMF frame number
1506 @param outputdir The local output directory used in the run
1507 @param get_every Extract every nth frame
1508 @param nframes_trajectory Total number of frames of the trajectory
1510 from operator
import itemgetter
1518 rmf_file_list=trajectory_models[0]
1519 rmf_file_frame_list=trajectory_models[1]
1520 score_list=list(map(float, trajectory_models[2]))
1522 max_score=max(score_list)
1523 min_score=min(score_list)
1526 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1527 binned_scores=[
None]*nframes_trajectory
1528 binned_model_indexes=[-1]*nframes_trajectory
1530 for model_index,s
in enumerate(score_list):
1531 bins_score_diffs=[abs(s-b)
for b
in bins]
1532 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1533 if binned_scores[bin_index]==
None:
1534 binned_scores[bin_index]=s
1535 binned_model_indexes[bin_index]=model_index
1537 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1538 new_diff=abs(s-bins[bin_index])
1539 if new_diff < old_diff:
1540 binned_scores[bin_index]=s
1541 binned_model_indexes[bin_index]=model_index
1543 print(binned_scores)
1544 print(binned_model_indexes)
1547 def _expand_ambiguity(self,prot,d):
1548 """If using PMI2, expand the dictionary to include copies as ambiguous options
1549 This also keeps the states separate.
1554 if '..' in key
or (type(val)
is tuple
and len(val)>=3):
1557 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1558 if type(val)
is tuple:
1566 for nst
in range(len(states)):
1568 copies = sel.get_selected_particles(with_representation=
False)
1570 for nc
in range(len(copies)):
1572 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1574 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1581 score_key=
"SimplifiedModel_Total_Score_None",
1582 rmf_file_key=
"rmf_file",
1583 rmf_file_frame_key=
"rmf_frame_index",
1585 prefiltervalue=
None,
1588 alignment_components=
None,
1589 number_of_best_scoring_models=10,
1590 rmsd_calculation_components=
None,
1591 distance_matrix_file=
'distances.mat',
1592 load_distance_matrix_file=
False,
1593 skip_clustering=
False,
1594 number_of_clusters=1,
1596 exit_after_display=
True,
1598 first_and_last_frames=
None,
1599 density_custom_ranges=
None,
1600 write_pdb_with_centered_coordinates=
False,
1602 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1603 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1604 Can pass None for copy or state to ignore that field.
1605 If you don't pass a specific copy number
1606 @param score_key The score for ranking models. Use "Total_Score"
1607 instead of default for PMI2.
1608 @param rmf_file_key Key pointing to RMF filename
1609 @param rmf_file_frame_key Key pointing to RMF frame number
1610 @param state_number State number to analyze
1611 @param prefiltervalue Only include frames where the
1612 score key is below this value
1613 @param feature_keys Keywords for which you want to
1614 calculate average, medians, etc.
1615 If you pass "Keyname" it'll include everything that matches "*Keyname*"
1616 @param outputdir The local output directory used in the run
1617 @param alignment_components Dictionary with keys=groupname, values are tuples
1618 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1619 @param number_of_best_scoring_models Num models to keep per run
1620 @param rmsd_calculation_components For calculating RMSD
1621 (same format as alignment_components)
1622 @param distance_matrix_file Where to store/read the distance matrix
1623 @param load_distance_matrix_file Try to load the distance matrix file
1624 @param skip_clustering Just extract the best scoring models
1626 @param number_of_clusters Number of k-means clusters
1627 @param display_plot Display the distance matrix
1628 @param exit_after_display Exit after displaying distance matrix
1629 @param get_every Extract every nth frame
1630 @param first_and_last_frames A tuple with the first and last frames to be
1631 analyzed. Values are percentages!
1632 Default: get all frames
1633 @param density_custom_ranges For density calculation
1634 (same format as alignment_components)
1635 @param write_pdb_with_centered_coordinates
1636 @param voxel_size Used for the density output
1640 self._outputdir = os.path.abspath(outputdir)
1641 self._number_of_clusters = number_of_clusters
1642 for p, state
in self._protocol_output:
1643 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1654 if not load_distance_matrix_file:
1655 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1656 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1657 self.stat_files,self.number_of_processes)[self.rank]
1661 orig_score_key = score_key
1662 if score_key
not in po.get_keys():
1663 if 'Total_Score' in po.get_keys():
1664 score_key =
'Total_Score'
1665 print(
"WARNING: Using 'Total_Score' instead of "
1666 "'SimplifiedModel_Total_Score_None' for the score key")
1667 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1668 if k
in feature_keys:
1669 print(
"WARNING: no need to pass " +k+
" to feature_keys.")
1670 feature_keys.remove(k)
1678 get_every, provenance=prov)
1679 rmf_file_list=best_models[0]
1680 rmf_file_frame_list=best_models[1]
1681 score_list=best_models[2]
1682 feature_keyword_list_dict=best_models[3]
1688 if self.number_of_processes > 1:
1692 rmf_file_frame_list)
1693 for k
in feature_keyword_list_dict:
1695 feature_keyword_list_dict[k])
1698 score_rmf_tuples = list(zip(score_list,
1700 rmf_file_frame_list,
1701 list(range(len(score_list)))))
1703 if density_custom_ranges:
1704 for k
in density_custom_ranges:
1705 if type(density_custom_ranges[k])
is not list:
1706 raise Exception(
"Density custom ranges: values must be lists of tuples")
1709 if first_and_last_frames
is not None:
1710 nframes = len(score_rmf_tuples)
1711 first_frame = int(first_and_last_frames[0] * nframes)
1712 last_frame = int(first_and_last_frames[1] * nframes)
1713 if last_frame > len(score_rmf_tuples):
1715 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1718 best_score_rmf_tuples = sorted(score_rmf_tuples,
1719 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1720 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1722 prov.append(IMP.pmi.io.FilterProvenance(
"Best scoring",
1723 0, number_of_best_scoring_models))
1725 best_score_feature_keyword_list_dict = defaultdict(list)
1726 for tpl
in best_score_rmf_tuples:
1728 for f
in feature_keyword_list_dict:
1729 best_score_feature_keyword_list_dict[f].append(
1730 feature_keyword_list_dict[f][index])
1731 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1732 best_score_rmf_tuples,
1733 self.number_of_processes)[self.rank]
1736 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1738 my_best_score_rmf_tuples[0][1])[0]
1740 if rmsd_calculation_components
is not None:
1741 tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1742 if tmp!=rmsd_calculation_components:
1743 print(
'Detected ambiguity, expand rmsd components to',tmp)
1744 rmsd_calculation_components = tmp
1745 if alignment_components
is not None:
1746 tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1747 if tmp!=alignment_components:
1748 print(
'Detected ambiguity, expand alignment components to',tmp)
1749 alignment_components = tmp
1755 my_best_score_rmf_tuples[0],
1756 rmsd_calculation_components,
1757 state_number=state_number)
1759 my_best_score_rmf_tuples,
1760 alignment_components,
1761 rmsd_calculation_components,
1762 state_number=state_number)
1767 all_coordinates=got_coords[0]
1768 alignment_coordinates=got_coords[1]
1769 rmsd_coordinates=got_coords[2]
1770 rmf_file_name_index_dict=got_coords[3]
1771 all_rmf_file_names=got_coords[4]
1777 if density_custom_ranges:
1779 density_custom_ranges,
1782 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1788 os.mkdir(dircluster)
1791 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1792 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1794 rmf_frame_number=tpl[2]
1797 for key
in best_score_feature_keyword_list_dict:
1798 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1801 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1806 linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1812 if not linking_successful:
1819 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1820 prot = states[state_number]
1822 prot = prots[state_number]
1827 coords_f1=alignment_coordinates[cnt]
1829 coords_f2=alignment_coordinates[cnt]
1832 transformation = Ali.align()[1]
1854 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1855 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1856 o.init_pdb(out_pdb_fn,prot)
1857 o.write_pdb(out_pdb_fn,
1858 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1860 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1861 tmp_dict[
"rmf_file_full_path"]=rmf_name
1862 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1863 tmp_dict[
"local_rmf_frame_number"]=0
1865 clusstat.write(str(tmp_dict)+
"\n")
1870 h.set_name(
"System")
1872 o.init_rmf(out_rmf_fn, [h], rs)
1874 o.init_rmf(out_rmf_fn, [prot],rs)
1876 o.write_rmf(out_rmf_fn)
1877 o.close_rmf(out_rmf_fn)
1879 if density_custom_ranges:
1880 DensModule.add_subunits_density(prot)
1882 if density_custom_ranges:
1883 DensModule.write_mrc(path=dircluster)
1890 if self.number_of_processes > 1:
1896 rmf_file_name_index_dict)
1898 alignment_coordinates)
1905 [best_score_feature_keyword_list_dict,
1906 rmf_file_name_index_dict],
1912 print(
"setup clustering class")
1915 for n, model_coordinate_dict
in enumerate(all_coordinates):
1916 template_coordinate_dict = {}
1918 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1920 self.cluster_obj.set_template(alignment_coordinates[n])
1921 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1922 print(
"Global calculating the distance matrix")
1925 self.cluster_obj.dist_matrix()
1929 self.cluster_obj.do_cluster(number_of_clusters)
1932 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1933 if exit_after_display:
1935 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1942 print(
"setup clustering class")
1944 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1945 print(
"clustering with %s clusters" % str(number_of_clusters))
1946 self.cluster_obj.do_cluster(number_of_clusters)
1947 [best_score_feature_keyword_list_dict,
1948 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1951 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1952 if exit_after_display:
1954 if self.number_of_processes > 1:
1962 print(self.cluster_obj.get_cluster_labels())
1963 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1964 print(
"rank %s " % str(self.rank))
1965 print(
"cluster %s " % str(n))
1966 print(
"cluster label %s " % str(cl))
1967 print(self.cluster_obj.get_cluster_label_names(cl))
1968 cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1969 cluster_prov = prov + \
1970 [IMP.pmi.io.ClusterProvenance(cluster_size)]
1973 if density_custom_ranges:
1975 density_custom_ranges,
1978 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1980 os.mkdir(dircluster)
1984 rmsd_dict = {
"AVERAGE_RMSD":
1985 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1986 clusstat = open(dircluster +
"stat.out",
"w")
1987 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1990 tmp_dict.update(rmsd_dict)
1991 index = rmf_file_name_index_dict[structure_name]
1992 for key
in best_score_feature_keyword_list_dict:
1994 key] = best_score_feature_keyword_list_dict[
2000 rmf_name = structure_name.split(
"|")[0]
2001 rmf_frame_number = int(structure_name.split(
"|")[1])
2002 clusstat.write(str(tmp_dict) +
"\n")
2006 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
2011 linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
2017 if not linking_successful:
2023 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
2024 prot = states[state_number]
2026 prot = prots[state_number]
2032 model_index = self.cluster_obj.get_model_index_from_name(
2034 transformation = self.cluster_obj.get_transformation_to_first_member(
2054 if density_custom_ranges:
2055 DensModule.add_subunits_density(prot)
2060 o.init_pdb(dircluster + str(k) +
".pdb", prot)
2061 o.write_pdb(dircluster + str(k) +
".pdb")
2066 h.set_name(
"System")
2068 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
2070 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
2071 o.write_rmf(dircluster + str(k) +
".rmf3")
2072 o.close_rmf(dircluster + str(k) +
".rmf3")
2077 if density_custom_ranges:
2078 DensModule.write_mrc(path=dircluster)
2081 if self.number_of_processes>1:
2084 def get_cluster_rmsd(self,cluster_num):
2085 if self.cluster_obj
is None:
2087 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2089 def save_objects(self, objects, file_name):
2091 outf = open(file_name,
'wb')
2092 pickle.dump(objects, outf)
2095 def load_objects(self, file_name):
2097 inputf = open(file_name,
'rb')
2098 objects = pickle.load(inputf)
2105 This class contains analysis utilities to investigate ReplicaExchange results.
2118 Construction of the Class.
2119 @param model IMP.Model()
2120 @param stat_files list of string. Can be ascii stat files, rmf files names
2121 @param best_models Integer. Number of best scoring models, if None: all models will be read
2122 @param alignment boolean (Default=True). Align before computing the rmsd.
2126 self.best_models=best_models
2130 self.rbs1, self.beads1 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath1))
2131 self.rbs0, self.beads0 = IMP.pmi.tools.get_rbs_and_beads(IMP.pmi.tools.select_at_all_resolutions(self.stath0))
2139 self.clusters.append(c)
2140 for n0
in range(len(self.stath0)):
2142 self.pairwise_rmsd={}
2143 self.pairwise_molecular_assignment={}
2144 self.alignment=alignment
2145 self.symmetric_molecules={}
2146 self.issymmetricsel={}
2148 self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath0))
2149 self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath1))
2153 Setup the selection onto which the rmsd is computed
2154 @param kwargs use IMP.atom.Selection keywords
2162 Store names of symmetric molecules
2164 self.symmetric_molecules[molecule_name]=0
2169 Setup the selection onto which the alignment is computed
2170 @param kwargs use IMP.atom.Selection keywords
2178 def clean_clusters(self):
2179 for c
in self.clusters: del c
2183 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2185 Cluster the models based on RMSD.
2186 @param rmsd_cutoff Float the distance cutoff in Angstrom
2187 @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
2189 self.clean_clusters()
2190 not_clustered=set(range(len(self.stath1)))
2191 while len(not_clustered)>0:
2192 self.
aggregate(not_clustered, rmsd_cutoff, metric)
2198 Refine the clusters by merging the ones whose centers are close
2199 @param rmsd_cutoff cutoff distance in Angstorms
2202 clusters_copy=self.clusters
2203 for c0,c1
in itertools.combinations(self.clusters,2):
2204 if c0.center_index
is None:
2206 if c1.center_index
is None:
2208 d0=self.stath0[c0.center_index]
2209 d1=self.stath1[c1.center_index]
2210 rmsd, molecular_assignment = self.
rmsd()
2211 if rmsd <= rmsd_cutoff:
2212 if c1
in self.clusters:
2213 clusters_copy.remove(c1)
2215 self.clusters=clusters_copy
2222 def set_cluster_assignments(self, cluster_ids):
2223 if len(cluster_ids)!=len(self.stath0):
2224 raise ValueError(
'cluster ids has to be same length as number of frames')
2227 for i
in sorted(list(set(cluster_ids))):
2229 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
2230 self.clusters[idx].add_member(i,d)
2234 Return the model data from a cluster
2235 @param cluster IMP.pmi.output.Cluster object
2244 Save the data for the whole models into a pickle file
2245 @param filename string
2247 self.stath0.save_data(filename)
2251 Set the data from an external IMP.pmi.output.Data
2252 @param data IMP.pmi.output.Data
2254 self.stath0.data=data
2255 self.stath1.data=data
2259 Load the data from an external pickled file
2260 @param filename string
2262 self.stath0.load_data(filename)
2263 self.stath1.load_data(filename)
2264 self.best_models=len(self.stath0)
2266 def add_cluster(self,rmf_name_list):
2268 print(
"creating cluster index "+str(len(self.clusters)))
2269 self.clusters.append(c)
2270 current_len=len(self.stath0)
2272 for rmf
in rmf_name_list:
2273 print(
"adding rmf "+rmf)
2274 self.stath0.add_stat_file(rmf)
2275 self.stath1.add_stat_file(rmf)
2277 for n0
in range(current_len,len(self.stath0)):
2284 Save the clusters into a pickle file
2285 @param filename string
2288 import cPickle
as pickle
2291 fl=open(filename,
'wb')
2292 pickle.dump(self.clusters,fl)
2296 Load the clusters from a pickle file
2297 @param filename string
2298 @param append bool (Default=False), if True. append the clusters to the ones currently present
2301 import cPickle
as pickle
2304 fl=open(filename,
'rb')
2305 self.clean_clusters()
2307 self.clusters+=pickle.load(fl)
2309 self.clusters=pickle.load(fl)
2318 Compute the cluster center for a given cluster
2320 member_distance=defaultdict(float)
2322 for n0,n1
in itertools.combinations(cluster.members,2):
2325 rmsd, _ = self.
rmsd()
2326 member_distance[n0]+=rmsd
2328 if len(member_distance)>0:
2329 cluster.center_index=min(member_distance, key=member_distance.get)
2331 cluster.center_index=cluster.members[0]
2335 Save the coordinates of the current cluster a single rmf file
2337 print(
"saving coordinates",cluster)
2340 if rmf_name
is None:
2341 rmf_name=prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
2343 d1=self.stath1[cluster.members[0]]
2345 o.init_rmf(rmf_name, [self.stath1])
2346 for n1
in cluster.members:
2350 if self.alignment: self.align()
2351 o.write_rmf(rmf_name)
2353 o.close_rmf(rmf_name)
2357 remove structures that are similar
2358 append it to a new cluster
2360 print(
"pruning models")
2363 remaining=range(1,len(self.stath1),10)
2365 while len(remaining)>0:
2366 d0=self.stath0[selected]
2368 for n1
in remaining:
2370 if self.alignment: self.align()
2374 print(
"pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
2375 remaining=[x
for x
in remaining
if x
not in rm]
2376 if len(remaining)==0:
break
2377 selected=remaining[0]
2378 filtered.append(selected)
2381 self.clusters.append(c)
2391 Compute the precision of a cluster
2397 if not cluster.center_index
is None:
2398 members1=[cluster.center_index]
2400 members1=cluster.members
2404 for n1
in cluster.members:
2409 tmp_rmsd, _ = self.
rmsd()
2414 precision=rmsd/npairs
2415 cluster.precision=precision
2420 Compute the bipartite precision (ie the cross-precision)
2421 between two clusters
2425 for cn0,n0
in enumerate(cluster1.members):
2427 for cn1,n1
in enumerate(cluster2.members):
2429 tmp_rmsd, _ =self.
rmsd()
2430 if verbose: print(
"--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
2433 precision=rmsd/npairs
2436 def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
2438 Compute the Root mean square fluctuations
2439 of a molecule in a cluster
2440 Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
2442 rmsf=IMP.pmi.tools.OrderedDict()
2445 if cluster_ref
is not None:
2446 if not cluster_ref.center_index
is None:
2447 members0 = [cluster_ref.center_index]
2449 members0 = cluster_ref.members
2451 if not cluster.center_index
is None:
2452 members0 = [cluster.center_index]
2454 members0 = cluster.members
2457 copy_index=copy_index,state_index=state_index)
2458 ps0=s0.get_selected_particles()
2472 for n1
in cluster.members[::step]:
2474 print(
"--- rmsf %s %s"%(str(n0),str(n1)))
2477 s1=
IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2478 copy_index=copy_index,state_index=state_index)
2479 ps1 = s1.get_selected_particles()
2482 if self.alignment: self.align()
2483 for n,(p0,p1)
in enumerate(zip(ps0,ps1)):
2484 r=residue_indexes[n]
2496 for stath
in [self.stath0,self.stath1]:
2497 if molecule
not in self.symmetric_molecules:
2499 copy_index=copy_index,state_index=state_index)
2502 state_index=state_index)
2504 ps = s.get_selected_particles()
2513 def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
2518 for n1
in cluster.members[::step]:
2519 print(
"density "+str(n1))
2522 if self.alignment: self.align()
2523 dens.add_subunits_density(self.stath1)
2525 dens.write_mrc(path=prefix+
'/',suffix=str(cluster.cluster_id))
2528 def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
2531 import matplotlib.pyplot
as plt
2532 import matplotlib.cm
as cm
2533 from scipy.spatial.distance
import cdist
2535 if molecules
is None:
2539 unique_copies=[mol
for mol
in mols
if mol.get_copy_index() == 0]
2540 mol_names_unique=dict((mol.get_name(),mol)
for mol
in unique_copies)
2541 total_len_unique=sum(max(mol.get_residue_indexes())
for mol
in unique_copies)
2550 seqlen=max(mol.get_residue_indexes())
2551 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2555 for mol
in unique_copies:
2556 seqlen=max(mol.get_residue_indexes())
2557 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2561 for ncl,n1
in enumerate(cluster.members):
2565 coord_dict=IMP.pmi.tools.OrderedDict()
2567 mol_name=mol.get_name()
2568 copy_index=mol.get_copy_index()
2569 rindexes = mol.get_residue_indexes()
2570 coords = np.ones((max(rindexes),3))
2571 for rnum
in rindexes:
2573 selpart = sel.get_selected_particles()
2574 if len(selpart) == 0:
continue
2575 selpart = selpart[0]
2576 coords[rnum - 1, :] =
IMP.core.XYZ(selpart).get_coordinates()
2577 coord_dict[mol]=coords
2580 coords=np.concatenate(list(coord_dict.values()))
2581 dists = cdist(coords, coords)
2582 binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2584 binary_dists_dict={}
2586 len1 = max(mol1.get_residue_indexes())
2588 name1=mol1.get_name()
2589 name2=mol2.get_name()
2590 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2591 if (name1, name2)
not in binary_dists_dict:
2592 binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
2593 binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2594 binary_dists=np.zeros((total_len_unique,total_len_unique))
2596 for name1,name2
in binary_dists_dict:
2597 r1=index_dict[mol_names_unique[name1]]
2598 r2=index_dict[mol_names_unique[name2]]
2599 binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
2604 contact_freqs = binary_dists
2606 dist_maps.append(dists)
2607 av_dist_map += dists
2608 contact_freqs += binary_dists
2613 contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2615 contact_freqs =1.0/len(cluster)*contact_freqs
2616 av_dist_map=1.0/len(cluster)*contact_freqs
2618 fig = plt.figure(figsize=(100, 100))
2619 ax = fig.add_subplot(111)
2622 gap_between_components=50
2634 prot_list=list(zip(*sorted_tuple))[1]
2637 prot_list=list(zip(*sorted_tuple))[1]
2639 prot_listx = prot_list
2640 nresx = gap_between_components + \
2641 sum([max(mol.get_residue_indexes())
2642 + gap_between_components
for mol
in prot_listx])
2645 prot_listy = prot_list
2646 nresy = gap_between_components + \
2647 sum([max(mol.get_residue_indexes())
2648 + gap_between_components
for mol
in prot_listy])
2653 res = gap_between_components
2654 for mol
in prot_listx:
2655 resoffsetx[mol] = res
2656 res += max(mol.get_residue_indexes())
2658 res += gap_between_components
2662 res = gap_between_components
2663 for mol
in prot_listy:
2664 resoffsety[mol] = res
2665 res += max(mol.get_residue_indexes())
2667 res += gap_between_components
2669 resoffsetdiagonal = {}
2670 res = gap_between_components
2671 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2672 resoffsetdiagonal[mol] = res
2673 res += max(mol.get_residue_indexes())
2674 res += gap_between_components
2679 for n, prot
in enumerate(prot_listx):
2680 res = resoffsetx[prot]
2682 for proty
in prot_listy:
2683 resy = resoffsety[proty]
2684 endy = resendy[proty]
2685 ax.plot([res, res], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2686 ax.plot([end, end], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2687 xticks.append((float(res) + float(end)) / 2)
2692 for n, prot
in enumerate(prot_listy):
2693 res = resoffsety[prot]
2695 for protx
in prot_listx:
2696 resx = resoffsetx[protx]
2697 endx = resendx[protx]
2698 ax.plot([resx, endx], [res, res], linestyle=
'-',color=
'gray', lw=0.4)
2699 ax.plot([resx, endx], [end, end], linestyle=
'-',color=
'gray', lw=0.4)
2700 yticks.append((float(res) + float(end)) / 2)
2705 tmp_array = np.zeros((nresx, nresy))
2707 for px
in prot_listx:
2708 for py
in prot_listy:
2709 resx = resoffsetx[px]
2710 lengx = resendx[px] - 1
2711 resy = resoffsety[py]
2712 lengy = resendy[py] - 1
2713 indexes_x = index_dict[px]
2714 minx = min(indexes_x)
2715 maxx = max(indexes_x)
2716 indexes_y = index_dict[py]
2717 miny = min(indexes_y)
2718 maxy = max(indexes_y)
2719 tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2720 ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2722 cax = ax.imshow(tmp_array,
2727 interpolation=
'nearest')
2729 ax.set_xticks(xticks)
2730 ax.set_xticklabels(xlabels, rotation=90)
2731 ax.set_yticks(yticks)
2732 ax.set_yticklabels(ylabels)
2733 plt.setp(ax.get_xticklabels(), fontsize=6)
2734 plt.setp(ax.get_yticklabels(), fontsize=6)
2737 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2738 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2743 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf", dpi=300,transparent=
"False")
2747 def plot_rmsd_matrix(self,filename):
2749 self.compute_all_pairwise_rmsd()
2750 distance_matrix = np.zeros(
2751 (len(self.stath0), len(self.stath1)))
2752 for (n0,n1)
in self.pairwise_rmsd:
2753 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2755 import matplotlib
as mpl
2757 import matplotlib.pylab
as pl
2758 from scipy.cluster
import hierarchy
as hrc
2760 fig = pl.figure(figsize=(10,8))
2761 ax = fig.add_subplot(212)
2762 dendrogram = hrc.dendrogram(
2763 hrc.linkage(distance_matrix),
2766 leaves_order = dendrogram[
'leaves']
2767 ax.set_xlabel(
'Model')
2768 ax.set_ylabel(
'RMSD [Angstroms]')
2770 ax2 = fig.add_subplot(221)
2772 distance_matrix[leaves_order,
2775 interpolation=
'nearest')
2776 cb = fig.colorbar(cax)
2777 cb.set_label(
'RMSD [Angstroms]')
2778 ax2.set_xlabel(
'Model')
2779 ax2.set_ylabel(
'Model')
2781 pl.savefig(filename, dpi=300)
2790 Update the cluster id numbers
2792 for n,c
in enumerate(self.clusters):
2795 def get_molecule(self, hier, name, copy):
2797 return IMP.pmi.tools.get_molecules(s.get_selected_particles()[0])[0]
2803 self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2804 self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2805 for mol
in self.seldict0:
2806 for sel
in self.seldict0[mol]:
2807 self.issymmetricsel[sel]=
False
2808 for mol
in self.symmetric_molecules:
2809 self.symmetric_molecules[mol]=len(self.seldict0[mol])
2810 for sel
in self.seldict0[mol]:
2811 self.issymmetricsel[sel]=
True
2818 for rb
in self.rbs1:
2821 for bead
in self.beads1:
2826 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2828 initial filling of the clusters.
2831 print(
"clustering model "+str(n0))
2832 d0 = self.stath0[n0]
2834 print(
"creating cluster index "+str(len(self.clusters)))
2835 self.clusters.append(c)
2837 clustered = set([n0])
2839 print(
"--- trying to add model "+str(n1)+
" to cluster "+str(len(self.clusters)))
2840 d1 = self.stath1[n1]
2841 if self.alignment: self.align()
2842 rmsd, _ = self.
rmsd(metric=metric)
2843 if rmsd<rmsd_cutoff:
2844 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2848 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2853 merge the clusters that have close members
2854 @param rmsd_cutoff cutoff distance in Angstorms
2860 for c0, c1
in filter(
lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2861 n0, n1 = [c.members[0]
for c
in (c0,c1)]
2862 d0 = self.stath0[n0]
2863 d1 = self.stath1[n1]
2864 rmsd, _ = self.
rmsd()
2866 to_merge.append((c0,c1))
2868 for c0, c
in reversed(to_merge):
2872 self.clusters = [c
for c
in filter(
lambda x: len(x.members)>0, self.clusters)]
2877 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2879 print(
"check close members for clusters "+str(c0.cluster_id)+
" and "+str(c1.cluster_id))
2880 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2881 d0 = self.stath0[n0]
2882 d1 = self.stath1[n1]
2883 rmsd, _ = self.
rmsd(metric=metric)
2884 if rmsd<rmsd_cutoff:
2900 a function that returns the permutation best_sel of sels0 that minimizes metric
2902 best_rmsd2 = float(
'inf')
2904 if self.issymmetricsel[sels0[0]]:
2907 for offset
in range(N):
2908 order=[(offset+i)%N
for i
in range(N)]
2909 sels = [sels0[(offset+i)%N]
for i
in range(N)]
2912 r=metric(sel0, sel1)
2915 if rmsd2 < best_rmsd2:
2920 for sels
in itertools.permutations(sels0):
2922 for sel0, sel1
in itertools.takewhile(
lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2923 r=metric(sel0, sel1)
2925 if rmsd2 < best_rmsd2:
2939 return best_sel, best_rmsd2
2941 def compute_all_pairwise_rmsd(self):
2942 for d0
in self.stath0:
2943 for d1
in self.stath1:
2944 rmsd, _ = self.
rmsd()
2946 def rmsd(self,metric=IMP.atom.get_rmsd):
2948 Computes the RMSD. Resolves ambiguous pairs assignments
2951 n0=self.stath0.current_index
2952 n1=self.stath1.current_index
2953 if ((n0,n1)
in self.pairwise_rmsd)
and ((n0,n1)
in self.pairwise_molecular_assignment):
2954 return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2962 seldict_best_order={}
2963 molecular_assignment={}
2964 for molname, sels0
in self.seldict0.items():
2965 sels_best_order, best_rmsd2 = self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2967 Ncoords = len(sels_best_order[0].get_selected_particles())
2968 Ncopies = len(self.seldict1[molname])
2969 total_rmsd += Ncoords*best_rmsd2
2970 total_N += Ncoords*Ncopies
2972 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2973 p0 = sel0.get_selected_particles()[0]
2974 p1 = sel1.get_selected_particles()[0]
2975 m0 = IMP.pmi.tools.get_molecules([p0])[0]
2976 m1 = IMP.pmi.tools.get_molecules([p1])[0]
2980 molecular_assignment[(molname,c0)]=(molname,c1)
2982 total_rmsd = math.sqrt(total_rmsd/total_N)
2984 self.pairwise_rmsd[(n0,n1)]=total_rmsd
2985 self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2986 self.pairwise_rmsd[(n1,n0)]=total_rmsd
2987 self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2989 return total_rmsd, molecular_assignment
2993 Fix the reference structure for structural alignment, rmsd and chain assignemnt
2994 @param reference can be either "Absolute" (cluster center of the first cluster)
2995 or Relative (cluster center of the current cluster)
2997 if reference==
"Absolute":
2999 elif reference==
"Relative":
3000 if cluster.center_index:
3001 n0=cluster.center_index
3003 n0=cluster.members[0]
3008 compute the molecular assignemnts between multiple copies
3009 of the same sequence. It changes the Copy index of Molecules
3012 _, molecular_assignment = self.
rmsd()
3013 for (m0, c0), (m1,c1)
in molecular_assignment.items():
3014 mol0 = self.molcopydict0[m0][c0]
3015 mol1 = self.molcopydict1[m1][c1]
3018 p1.set_value(cik0,c0)
3022 Undo the Copy index assignment
3025 _, molecular_assignment = self.
rmsd()
3027 for (m0, c0), (m1,c1)
in molecular_assignment.items():
3028 mol0 = self.molcopydict0[m0][c0]
3029 mol1 = self.molcopydict1[m1][c1]
3032 p1.set_value(cik0,c1)
3039 s=
"AnalysisReplicaExchange\n"
3040 s+=
"---- number of clusters %s \n"%str(len(self.clusters))
3041 s+=
"---- number of models %s \n"%str(len(self.stath0))
3044 def __getitem__(self,int_slice_adaptor):
3045 if type(int_slice_adaptor)
is int:
3046 return self.clusters[int_slice_adaptor]
3047 elif type(int_slice_adaptor)
is slice:
3048 return self.__iter__(int_slice_adaptor)
3050 raise TypeError(
"Unknown Type")
3053 return len(self.clusters)
3055 def __iter__(self,slice_key=None):
3056 if slice_key
is None:
3057 for i
in range(len(self)):
3060 for i
in range(len(self))[slice_key]:
Simplify creation of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
def merge_aggregates
merge the clusters that have close members
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Representation of the system.
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def deprecated_pmi1_object
Mark a PMI1 class as deprecated.
def build_model
Create model.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
def deprecated_method
Python decorator to mark a method as deprecated.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and set up constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Deprecated building macro - use BuildSystem()
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
A decorator for a particle with x,y,z coordinates and a radius.