1 """@namespace IMP.pmi.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
19 from operator
import itemgetter
20 from collections
import defaultdict
26 class _RMFRestraints(object):
27 """All restraints that are written out to the RMF file"""
28 def __init__(self, model, user_restraints):
30 self._user_restraints = user_restraints
if user_restraints
else []
33 return (len(self._user_restraints)
34 + self._rmf_rs.get_number_of_restraints())
38 __nonzero__ = __bool__
40 def __getitem__(self, i):
41 class FakePMIWrapper(object):
42 def __init__(self, r):
43 self.r = IMP.RestraintSet.get_from(r)
44 get_restraint =
lambda self: self.r
45 lenuser = len(self._user_restraints)
47 return self._user_restraints[i]
48 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
49 r = self._rmf_rs.get_restraint(i - lenuser)
50 return FakePMIWrapper(r)
52 raise IndexError(
"Out of range")
56 """A macro to help setup and run replica exchange.
57 Supports Monte Carlo and molecular dynamics.
58 Produces trajectory RMF files, best PDB structures,
59 and output stat files.
65 monte_carlo_sample_objects=
None,
66 molecular_dynamics_sample_objects=
None,
68 rmf_output_objects=
None,
69 crosslink_restraints=
None,
70 monte_carlo_temperature=1.0,
71 simulated_annealing=
False,
72 simulated_annealing_minimum_temperature=1.0,
73 simulated_annealing_maximum_temperature=2.5,
74 simulated_annealing_minimum_temperature_nframes=100,
75 simulated_annealing_maximum_temperature_nframes=100,
76 replica_exchange_minimum_temperature=1.0,
77 replica_exchange_maximum_temperature=2.5,
78 replica_exchange_swap=
True,
80 number_of_best_scoring_models=500,
83 molecular_dynamics_steps=10,
84 molecular_dynamics_max_time_step=1.0,
85 number_of_frames=1000,
86 save_coordinates_mode=
"lowest_temperature",
87 nframes_write_coordinates=1,
88 write_initial_rmf=
True,
89 initial_rmf_name_suffix=
"initial",
90 stat_file_name_suffix=
"stat",
91 best_pdb_name_suffix=
"model",
93 do_create_directories=
True,
94 global_output_directory=
"./",
97 replica_stat_file_suffix=
"stat_replica",
98 em_object_for_rmf=
None,
100 replica_exchange_object=
None,
103 @param model The IMP model
104 @param representation PMI.representation.Representation object
105 (or list of them, for multi-state modeling)
106 @param root_hier Instead of passing Representation, pass the System hierarchy
107 @param monte_carlo_sample_objects Objects for MC sampling; a list of
108 structural components (generally the representation) that will
109 be moved and restraints with parameters that need to
111 For PMI2: just pass flat list of movers
112 @param molecular_dynamics_sample_objects Objects for MD sampling
113 For PMI2: just pass flat list of particles
114 @param output_objects A list of structural objects and restraints
115 that will be included in output (ie, statistics "stat" files). Any object
116 that provides a get_output() method can be used here. If None is passed
117 the macro will not write stat files.
118 @param rmf_output_objects A list of structural objects and restraints
119 that will be included in rmf. Any object
120 that provides a get_output() method can be used here.
121 @param crosslink_restraints List of cross-link restraints that will
122 be included in output RMF files (for visualization).
123 @param monte_carlo_temperature MC temp (may need to be optimized
124 based on post-sampling analysis)
125 @param simulated_annealing If True, perform simulated annealing
126 @param simulated_annealing_minimum_temperature Should generally be
127 the same as monte_carlo_temperature.
128 @param simulated_annealing_minimum_temperature_nframes Number of
129 frames to compute at minimum temperature.
130 @param simulated_annealing_maximum_temperature_nframes Number of
132 temps > simulated_annealing_maximum_temperature.
133 @param replica_exchange_minimum_temperature Low temp for REX; should
134 generally be the same as monte_carlo_temperature.
135 @param replica_exchange_maximum_temperature High temp for REX
136 @param replica_exchange_swap Boolean, enable disable temperature
138 @param num_sample_rounds Number of rounds of MC/MD per cycle
139 @param number_of_best_scoring_models Number of top-scoring PDB models
140 to keep around for analysis
141 @param monte_carlo_steps Number of MC steps per round
142 @param self_adaptive self adaptive scheme for monte carlo movers
143 @param molecular_dynamics_steps Number of MD steps per round
144 @param molecular_dynamics_max_time_step Max time step for MD
145 @param number_of_frames Number of REX frames to run
146 @param save_coordinates_mode string: how to save coordinates.
147 "lowest_temperature" (default) only the lowest temperatures is saved
148 "25th_score" all replicas whose score is below the 25th percentile
149 "50th_score" all replicas whose score is below the 50th percentile
150 "75th_score" all replicas whose score is below the 75th percentile
151 @param nframes_write_coordinates How often to write the coordinates
153 @param write_initial_rmf Write the initial configuration
154 @param global_output_directory Folder that will be created to house
156 @param test_mode Set to True to avoid writing any files, just test one frame.
163 self.output_objects = output_objects
164 self.rmf_output_objects=rmf_output_objects
165 self.representation = representation
167 if type(representation) == list:
168 self.is_multi_state =
True
169 self.root_hiers = [r.prot
for r
in representation]
170 self.vars[
"number_of_states"] = len(representation)
172 self.is_multi_state =
False
173 self.root_hier = representation.prot
174 self.vars[
"number_of_states"] = 1
175 elif root_hier
and type(root_hier) ==
IMP.atom.Hierarchy and root_hier.get_name()==
'System':
177 if self.output_objects
is not None:
179 if self.rmf_output_objects
is not None:
181 self.root_hier = root_hier
182 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
183 self.vars[
"number_of_states"] = len(states)
185 self.root_hiers = states
186 self.is_multi_state =
True
188 self.root_hier = root_hier
189 self.is_multi_state =
False
191 raise Exception(
"Must provide representation or System hierarchy (root_hier)")
193 self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
194 self.em_object_for_rmf = em_object_for_rmf
195 self.monte_carlo_sample_objects = monte_carlo_sample_objects
196 self.vars[
"self_adaptive"]=self_adaptive
197 if sample_objects
is not None:
198 self.monte_carlo_sample_objects+=sample_objects
199 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
200 self.replica_exchange_object = replica_exchange_object
201 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
202 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
204 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
206 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
207 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
208 self.vars[
"simulated_annealing"]=\
210 self.vars[
"simulated_annealing_minimum_temperature"]=\
211 simulated_annealing_minimum_temperature
212 self.vars[
"simulated_annealing_maximum_temperature"]=\
213 simulated_annealing_maximum_temperature
214 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
215 simulated_annealing_minimum_temperature_nframes
216 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
217 simulated_annealing_maximum_temperature_nframes
219 self.vars[
"num_sample_rounds"] = num_sample_rounds
221 "number_of_best_scoring_models"] = number_of_best_scoring_models
222 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
223 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
224 self.vars[
"number_of_frames"] = number_of_frames
225 if not save_coordinates_mode
in [
"lowest_temperature",
"25th_score",
"50th_score",
"75th_score"]:
226 raise Exception(
"save_coordinates_mode has unrecognized value")
228 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
229 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
230 self.vars[
"write_initial_rmf"] = write_initial_rmf
231 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
232 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
233 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
234 self.vars[
"do_clean_first"] = do_clean_first
235 self.vars[
"do_create_directories"] = do_create_directories
236 self.vars[
"global_output_directory"] = global_output_directory
237 self.vars[
"rmf_dir"] = rmf_dir
238 self.vars[
"best_pdb_dir"] = best_pdb_dir
239 self.vars[
"atomistic"] = atomistic
240 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
241 self.vars[
"geometries"] =
None
242 self.test_mode = test_mode
245 if self.vars[
"geometries"]
is None:
246 self.vars[
"geometries"] = list(geometries)
248 self.vars[
"geometries"].extend(geometries)
251 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
252 print(
"--- it stores the best scoring pdb models in pdbs/")
253 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
254 print(
"--- variables:")
255 keys = list(self.vars.keys())
258 print(
"------", v.ljust(30), self.vars[v])
260 def get_replica_exchange_object(self):
261 return self.replica_exchange_object
263 def _add_provenance(self, sampler_md, sampler_mc):
264 """Record details about the sampling in the IMP Hierarchies"""
265 if not self.is_multi_state
or self.pmi2:
266 output_hierarchies = [self.root_hier]
268 output_hierarchies = self.root_hiers
272 method =
"Molecular Dynamics"
273 iterations += self.vars[
"molecular_dynamics_steps"]
275 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
276 iterations += self.vars[
"monte_carlo_steps"]
278 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
280 iterations *= self.vars[
"num_sample_rounds"]
282 for h
in output_hierarchies:
283 pi = self.model.add_particle(
"sampling")
285 self.model, pi, method, self.vars[
"number_of_frames"],
287 p.set_number_of_replicas(
288 self.replica_exchange_object.get_number_of_replicas())
289 IMP.pmi.tools._add_pmi_provenance(h)
292 def execute_macro(self):
293 temp_index_factor = 100000.0
297 if self.monte_carlo_sample_objects
is not None:
298 print(
"Setting up MonteCarlo")
300 self.monte_carlo_sample_objects,
301 self.vars[
"monte_carlo_temperature"])
302 if self.vars[
"simulated_annealing"]:
303 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
304 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
305 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
306 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
307 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
308 if self.vars[
"self_adaptive"]:
309 sampler_mc.set_self_adaptive(isselfadaptive=self.vars[
"self_adaptive"])
310 if self.output_objects
is not None:
311 self.output_objects.append(sampler_mc)
312 if self.rmf_output_objects
is not None:
313 self.rmf_output_objects.append(sampler_mc)
314 samplers.append(sampler_mc)
317 if self.molecular_dynamics_sample_objects
is not None:
318 print(
"Setting up MolecularDynamics")
320 self.molecular_dynamics_sample_objects,
321 self.vars[
"monte_carlo_temperature"],
322 maximum_time_step=self.molecular_dynamics_max_time_step)
323 if self.vars[
"simulated_annealing"]:
324 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
325 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
326 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
327 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
328 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
329 if self.output_objects
is not None:
330 self.output_objects.append(sampler_md)
331 if self.rmf_output_objects
is not None:
332 self.rmf_output_objects.append(sampler_md)
333 samplers.append(sampler_md)
336 print(
"Setting up ReplicaExchange")
339 "replica_exchange_minimum_temperature"],
341 "replica_exchange_maximum_temperature"],
343 replica_exchange_object=self.replica_exchange_object)
344 self.replica_exchange_object = rex.rem
346 myindex = rex.get_my_index()
347 if self.output_objects
is not None:
348 self.output_objects.append(rex)
349 if self.rmf_output_objects
is not None:
350 self.rmf_output_objects.append(rex)
354 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
358 globaldir = self.vars[
"global_output_directory"] +
"/"
359 rmf_dir = globaldir + self.vars[
"rmf_dir"]
360 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
362 if not self.test_mode:
363 if self.vars[
"do_clean_first"]:
366 if self.vars[
"do_create_directories"]:
369 os.makedirs(globaldir)
377 if not self.is_multi_state:
383 for n
in range(self.vars[
"number_of_states"]):
385 os.makedirs(pdb_dir +
"/" + str(n))
392 if self.output_objects
is not None:
393 self.output_objects.append(sw)
394 if self.rmf_output_objects
is not None:
395 self.rmf_output_objects.append(sw)
397 print(
"Setting up stat file")
399 low_temp_stat_file = globaldir + \
400 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
401 if not self.test_mode:
402 if self.output_objects
is not None:
403 output.init_stat2(low_temp_stat_file,
405 extralabels=[
"rmf_file",
"rmf_frame_index"])
407 print(
"Stat file writing is disabled")
409 if self.rmf_output_objects
is not None:
410 print(
"Stat info being written in the rmf file")
412 print(
"Setting up replica stat file")
413 replica_stat_file = globaldir + \
414 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
415 if not self.test_mode:
416 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
418 if not self.test_mode:
419 print(
"Setting up best pdb files")
420 if not self.is_multi_state:
421 if self.vars[
"number_of_best_scoring_models"] > 0:
422 output.init_pdb_best_scoring(pdb_dir +
"/" +
423 self.vars[
"best_pdb_name_suffix"],
426 "number_of_best_scoring_models"],
427 replica_exchange=
True)
428 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
429 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
431 if self.vars[
"number_of_best_scoring_models"] > 0:
432 for n
in range(self.vars[
"number_of_states"]):
433 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
434 self.vars[
"best_pdb_name_suffix"],
437 "number_of_best_scoring_models"],
438 replica_exchange=
True)
439 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
440 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
443 if self.em_object_for_rmf
is not None:
444 if not self.is_multi_state
or self.pmi2:
445 output_hierarchies = [
447 self.em_object_for_rmf.get_density_as_hierarchy(
450 output_hierarchies = self.root_hiers
451 output_hierarchies.append(
452 self.em_object_for_rmf.get_density_as_hierarchy())
454 if not self.is_multi_state
or self.pmi2:
455 output_hierarchies = [self.root_hier]
457 output_hierarchies = self.root_hiers
460 if not self.test_mode:
461 print(
"Setting up and writing initial rmf coordinate file")
462 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
463 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
464 output_hierarchies,listofobjects=self.rmf_output_objects)
465 if self._rmf_restraints:
466 output.add_restraints_to_rmf(
467 init_suffix +
"." + str(myindex) +
".rmf3",
468 self._rmf_restraints)
469 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
470 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
474 if not self.test_mode:
475 mpivs=IMP.pmi.samplers.MPI_values(self.replica_exchange_object)
479 self._add_provenance(sampler_md, sampler_mc)
481 if not self.test_mode:
482 print(
"Setting up production rmf files")
483 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
484 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"],
485 listofobjects = self.rmf_output_objects)
487 if self._rmf_restraints:
488 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
490 ntimes_at_low_temp = 0
494 self.replica_exchange_object.set_was_used(
True)
495 nframes = self.vars[
"number_of_frames"]
498 for i
in range(nframes):
502 for nr
in range(self.vars[
"num_sample_rounds"]):
503 if sampler_md
is not None:
505 self.vars[
"molecular_dynamics_steps"])
506 if sampler_mc
is not None:
507 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
509 self.model).evaluate(
False)
510 mpivs.set_value(
"score",score)
511 output.set_output_entry(
"score", score)
515 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
517 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
518 save_frame=(min_temp_index == my_temp_index)
519 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
520 score_perc=mpivs.get_percentile(
"score")
521 save_frame=(score_perc*100.0<=25.0)
522 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
523 score_perc=mpivs.get_percentile(
"score")
524 save_frame=(score_perc*100.0<=50.0)
525 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
526 score_perc=mpivs.get_percentile(
"score")
527 save_frame=(score_perc*100.0<=75.0)
530 print(
"--- frame %s score %s " % (str(i), str(score)))
532 if not self.test_mode:
533 if i % self.vars[
"nframes_write_coordinates"]==0:
534 print(
'--- writing coordinates')
535 if self.vars[
"number_of_best_scoring_models"] > 0:
536 output.write_pdb_best_scoring(score)
537 output.write_rmf(rmfname)
538 output.set_output_entry(
"rmf_file", rmfname)
539 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
541 output.set_output_entry(
"rmf_file", rmfname)
542 output.set_output_entry(
"rmf_frame_index",
'-1')
543 if self.output_objects
is not None:
544 output.write_stat2(low_temp_stat_file)
545 ntimes_at_low_temp += 1
547 if not self.test_mode:
548 output.write_stat2(replica_stat_file)
549 if self.vars[
"replica_exchange_swap"]:
550 rex.swap_temp(i, score)
551 if self.representation:
552 for p, state
in self.representation._protocol_output:
553 p.add_replica_exchange(state, self)
555 if not self.test_mode:
556 print(
"closing production rmf files")
557 output.close_rmf(rmfname)
563 """A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
564 Easily create multi-state systems by calling this macro
565 repeatedly with different TopologyReader objects!
566 A useful function is get_molecules() which returns the PMI Molecules grouped by state
567 as a dictionary with key = (molecule name), value = IMP.pmi.topology.Molecule
568 Quick multi-state system:
571 reader1 = IMP.pmi.topology.TopologyReader(tfile1)
572 reader2 = IMP.pmi.topology.TopologyReader(tfile2)
573 bs = IMP.pmi.macros.BuildSystem(mdl)
574 bs.add_state(reader1)
575 bs.add_state(reader2)
576 bs.execute_macro() # build everything including degrees of freedom
577 IMP.atom.show_molecular_hierarchy(bs.get_hierarchy())
578 ### now you have a two state system, you add restraints etc
580 \note The "domain name" entry of the topology reader is not used.
581 All molecules are set up by the component name, but split into rigid bodies
586 sequence_connectivity_scale=4.0,
587 force_create_gmm_files=
False,
590 @param mdl An IMP Model
591 @param sequence_connectivity_scale For scaling the connectivity restraint
592 @param force_create_gmm_files If True, will sample and create GMMs
593 no matter what. If False, will only only sample if the
594 files don't exist. If number of Gaussians is zero, won't
596 @param resolutions The resolutions to build for structured regions
601 self._domain_res = []
603 self.force_create_gmm_files = force_create_gmm_files
604 self.resolutions = resolutions
608 keep_chain_id=
False, fasta_name_map=
None):
609 """Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
610 When you are done adding states, call execute_macro()
611 @param reader The TopologyReader object
612 @param fasta_name_map dictionary for converting protein names found in the fasta file
614 state = self.system.create_state()
615 self._readers.append(reader)
616 these_domain_res = {}
618 chain_ids = string.ascii_uppercase+string.ascii_lowercase+
'0123456789'
623 for molname
in reader.get_molecules():
624 copies = reader.get_molecules()[molname].domains
625 for nc,copyname
in enumerate(copies):
626 print(
"BuildSystem.add_state: setting up molecule %s copy number %s" % (molname,str(nc)))
627 copy = copies[copyname]
629 if keep_chain_id ==
True:
630 chain_id = copy[0].chain
632 chain_id = chain_ids[numchain]
635 fasta_flag=copy[0].fasta_flag
636 if fasta_flag ==
"RNA" or fasta_flag ==
"DNA": is_nucleic=
True
638 print(
"BuildSystem.add_state: molecule %s sequence has %s residues" % (molname,len(seq)))
639 orig_mol = state.create_molecule(molname,
645 print(
"BuildSystem.add_state: creating a copy for molecule %s" % molname)
646 mol = orig_mol.create_copy(chain_id)
649 for domainnumber,domain
in enumerate(copy):
650 print(
"BuildSystem.add_state: ---- setting up domain %s of molecule %s" % (domainnumber,molname))
653 these_domains[domain.get_unique_name()] = domain
654 if domain.residue_range==[]
or domain.residue_range
is None:
655 domain_res = mol.get_residues()
657 start = domain.residue_range[0]+domain.pdb_offset
658 if domain.residue_range[1]==
'END':
659 end = len(mol.sequence)
661 end = domain.residue_range[1]+domain.pdb_offset
662 domain_res = mol.residue_range(start-1,end-1)
663 print(
"BuildSystem.add_state: -------- domain %s of molecule %s extends from residue %s to residue %s " % (domainnumber,molname,start,end))
664 if domain.pdb_file==
"BEADS":
665 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by BEADS " % (domainnumber,molname))
666 mol.add_representation(
668 resolutions=[domain.bead_size],
669 setup_particles_as_densities=(
670 domain.em_residues_per_gaussian!=0),
671 color = domain.color)
672 these_domain_res[domain.get_unique_name()] = (set(),domain_res)
673 elif domain.pdb_file==
"IDEAL_HELIX":
674 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by IDEAL_HELIX " % (domainnumber,molname))
675 mol.add_representation(
677 resolutions=self.resolutions,
679 density_residues_per_component=domain.em_residues_per_gaussian,
680 density_prefix=domain.density_prefix,
681 density_force_compute=self.force_create_gmm_files,
682 color = domain.color)
683 these_domain_res[domain.get_unique_name()] = (domain_res,set())
685 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by pdb file %s " % (domainnumber,molname,domain.pdb_file))
686 domain_atomic = mol.add_structure(domain.pdb_file,
688 domain.residue_range,
691 domain_non_atomic = domain_res - domain_atomic
692 if not domain.em_residues_per_gaussian:
693 mol.add_representation(domain_atomic,
694 resolutions=self.resolutions,
695 color = domain.color)
696 if len(domain_non_atomic)>0:
697 mol.add_representation(domain_non_atomic,
698 resolutions=[domain.bead_size],
699 color = domain.color)
701 print(
"BuildSystem.add_state: -------- domain %s of molecule %s represented by gaussians " % (domainnumber,molname))
702 mol.add_representation(
704 resolutions=self.resolutions,
705 density_residues_per_component=domain.em_residues_per_gaussian,
706 density_prefix=domain.density_prefix,
707 density_force_compute=self.force_create_gmm_files,
708 color = domain.color)
709 if len(domain_non_atomic)>0:
710 mol.add_representation(domain_non_atomic,
711 resolutions=[domain.bead_size],
712 setup_particles_as_densities=
True,
713 color = domain.color)
714 these_domain_res[domain.get_unique_name()] = (
715 domain_atomic,domain_non_atomic)
716 self._domain_res.append(these_domain_res)
717 self._domains.append(these_domains)
718 print(
'BuildSystem.add_state: State',len(self.system.states),
'added')
721 """Return list of all molecules grouped by state.
722 For each state, it's a dictionary of Molecules where key is the molecule name
724 return [s.get_molecules()
for s
in self.system.get_states()]
726 def get_molecule(self,molname,copy_index=0,state_index=0):
727 return self.system.get_states()[state_index].
get_molecules()[molname][copy_index]
729 def execute_macro(self, max_rb_trans=4.0, max_rb_rot=0.04, max_bead_trans=4.0, max_srb_trans=4.0,max_srb_rot=0.04):
730 """Builds representations and sets up degrees of freedom"""
731 print(
"BuildSystem.execute_macro: building representations")
732 self.root_hier = self.system.build()
734 print(
"BuildSystem.execute_macro: setting up degrees of freedom")
736 for nstate,reader
in enumerate(self._readers):
737 rbs = reader.get_rigid_bodies()
738 srbs = reader.get_super_rigid_bodies()
739 csrbs = reader.get_chains_of_super_rigid_bodies()
742 domains_in_rbs = set()
744 print(
"BuildSystem.execute_macro: -------- building rigid body %s" % (str(rblist)))
745 all_res = IMP.pmi.tools.OrderedSet()
746 bead_res = IMP.pmi.tools.OrderedSet()
748 domain = self._domains[nstate][dname]
749 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
750 all_res|=self._domain_res[nstate][dname][0]
751 bead_res|=self._domain_res[nstate][dname][1]
752 domains_in_rbs.add(dname)
754 print(
"BuildSystem.execute_macro: -------- \
755 creating rigid body with max_trans %s \
756 max_rot %s non_rigid_max_trans %s" \
757 % (str(max_rb_trans),str(max_rb_rot),str(max_bead_trans)))
758 self.dof.create_rigid_body(all_res,
759 nonrigid_parts=bead_res,
760 max_trans=max_rb_trans,
762 nonrigid_max_trans=max_bead_trans)
765 for dname
in self._domains[nstate]:
766 domain = self._domains[nstate][dname]
767 if domain.pdb_file==
"BEADS" and dname
not in domains_in_rbs:
768 self.dof.create_flexible_beads(
769 self._domain_res[nstate][dname][1],max_trans=max_bead_trans)
773 print(
"BuildSystem.execute_macro: -------- building super rigid body %s" % (str(srblist)))
774 all_res = IMP.pmi.tools.OrderedSet()
775 for dname
in srblist:
776 print(
"BuildSystem.execute_macro: -------- adding %s" % (str(dname )))
777 all_res|=self._domain_res[nstate][dname][0]
778 all_res|=self._domain_res[nstate][dname][1]
780 print(
"BuildSystem.execute_macro: -------- \
781 creating super rigid body with max_trans %s max_rot %s " \
782 % (str(max_srb_trans),str(max_srb_rot)))
783 self.dof.create_super_rigid_body(all_res,max_trans=max_srb_trans,max_rot=max_srb_rot)
786 for csrblist
in csrbs:
787 all_res = IMP.pmi.tools.OrderedSet()
788 for dname
in csrblist:
789 all_res|=self._domain_res[nstate][dname][0]
790 all_res|=self._domain_res[nstate][dname][1]
791 all_res = list(all_res)
792 all_res.sort(key=
lambda r:r.get_index())
794 self.dof.create_main_chain_mover(all_res)
795 return self.root_hier,self.dof
799 """A macro to build a Representation based on a Topology and lists of movers
800 DEPRECATED - Use BuildSystem instead.
804 component_topologies,
805 list_of_rigid_bodies=[],
806 list_of_super_rigid_bodies=[],
807 chain_of_super_rigid_bodies=[],
808 sequence_connectivity_scale=4.0,
809 add_each_domain_as_rigid_body=
False,
810 force_create_gmm_files=
False):
812 @param model The IMP model
813 @param component_topologies List of
814 IMP.pmi.topology.ComponentTopology items
815 @param list_of_rigid_bodies List of lists of domain names that will
816 be moved as rigid bodies.
817 @param list_of_super_rigid_bodies List of lists of domain names
818 that will move together in an additional Monte Carlo move.
819 @param chain_of_super_rigid_bodies List of lists of domain names
820 (choices can only be from the same molecule). Each of these
821 groups will be moved rigidly. This helps to sample more
822 efficiently complex topologies, made of several rigid bodies,
823 connected by flexible linkers.
824 @param sequence_connectivity_scale For scaling the connectivity
826 @param add_each_domain_as_rigid_body That way you don't have to
827 put all of them in the list
828 @param force_create_gmm_files If True, will sample and create GMMs
829 no matter what. If False, will only only sample if the
830 files don't exist. If number of Gaussians is zero, won't
836 disorderedlength=
False)
838 data=component_topologies
839 if list_of_rigid_bodies==[]:
840 print(
"WARNING: No list of rigid bodies inputted to build_model()")
841 if list_of_super_rigid_bodies==[]:
842 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
843 if chain_of_super_rigid_bodies==[]:
844 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
845 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
846 +chain_of_super_rigid_bodies
for d
in sublist])
847 all_available = set([c._domain_name
for c
in component_topologies])
848 if not all_dnames <= all_available:
849 raise ValueError(
"All requested movers must reference domain "
850 "names in the component topologies")
854 super_rigid_bodies={}
855 chain_super_rigid_bodies={}
859 comp_name = c.molname
860 hier_name = c._domain_name
862 fasta_file = c.fasta_file
863 fasta_id = c.fasta_id
864 pdb_name = c.pdb_file
866 res_range = c.residue_range
867 offset = c.pdb_offset
868 bead_size = c.bead_size
869 em_num_components = c.em_residues_per_gaussian
870 em_txt_file_name = c.gmm_file
871 em_mrc_file_name = c.mrc_file
873 if comp_name
not in self.simo.get_component_names():
874 self.simo.create_component(comp_name,color=0.0)
875 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
878 if em_num_components==0:
882 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
889 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
890 res_range,include_res0,beadsize=bead_size,
891 color=color,offset=offset)
892 if em_num_components!=0:
894 print(
"will read GMM files")
896 print(
"will calculate GMMs")
898 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
899 em_mrc_file_name,em_num_components,read_em_files)
900 self.simo.add_all_atom_densities(comp_name, particles=beads)
905 self.resdensities[hier_name]=dens_hier
906 self.domain_dict[hier_name]=outhier+dens_hier
909 for c
in self.simo.get_component_names():
910 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
911 self.simo.setup_component_geometry(c)
914 for rblist
in list_of_rigid_bodies:
916 for rbmember
in rblist:
917 rb+=[h
for h
in self.domain_dict[rbmember]]
918 self.simo.set_rigid_body_from_hierarchies(rb)
919 for srblist
in list_of_super_rigid_bodies:
921 for srbmember
in rblist:
922 srb+=[h
for h
in self.domain_dict[srbmember]]
923 self.simo.set_super_rigid_body_from_hierarchies(srb)
924 for clist
in chain_of_super_rigid_bodies:
926 for crbmember
in rblist:
927 crb+=[h
for h
in self.domain_dict[crbmember]]
928 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
930 self.simo.set_floppy_bodies()
931 self.simo.setup_bonds()
934 '''Return the Representation object'''
937 def get_density_hierarchies(self,hier_name_list):
941 for hn
in hier_name_list:
943 dens_hier_list+=self.resdensities[hn]
944 return dens_hier_list
946 def set_gmm_models_directory(self,directory_name):
947 self.gmm_models_directory=directory_name
949 def get_pdb_bead_bits(self,hierarchy):
954 if "_pdb" in h.get_name():pdbbits.append(h)
955 if "_bead" in h.get_name():beadbits.append(h)
956 if "_helix" in h.get_name():helixbits.append(h)
957 return (pdbbits,beadbits,helixbits)
959 def scale_bead_radii(self,nresidues,scale):
961 for h
in self.domain_dict:
962 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
963 slope=(1.0-scale)/(1.0-float(nresidues))
968 if b
not in scaled_beads:
974 scale_factor=slope*float(num_residues)+1.0
976 new_radius=scale_factor*radius
979 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
982 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
984 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
987 for pb
in pdbbits+helixbits:
991 number_of_residues=len(set(res_ind))
995 outhier+=simo.add_component_density(compname,
997 num_components=num_components,
999 inputfile=txtfilename)
1000 if len(helixbits)!=0:
1001 outhier+=simo.add_component_density(compname,
1003 num_components=num_components,
1005 inputfile=txtfilename)
1010 num_components=number_of_residues//abs(num_components)+1
1011 outhier+=simo.add_component_density(compname,
1013 num_components=num_components,
1015 outputfile=txtfilename,
1016 outputmap=mrcfilename,
1017 multiply_by_total_mass=
True)
1019 if len(helixbits)!=0:
1020 num_components=number_of_residues//abs(num_components)+1
1021 outhier+=simo.add_component_density(compname,
1023 num_components=num_components,
1025 outputfile=txtfilename,
1026 outputmap=mrcfilename,
1027 multiply_by_total_mass=
True)
1029 return outhier,beadbits
1031 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
1032 beadsize=5,color=0.0,offset=0):
1033 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
1035 outhier=simo.autobuild_model(comname,
1039 resolutions=[0,1,10],
1042 missingbeadsize=beadsize)
1044 outhier=simo.autobuild_model(comname,
1051 missingbeadsize=beadsize)
1054 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
1055 outhier=simo.add_component_ideal_helix(comname,
1061 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
1062 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
1066 seq_len=len(simo.sequence_dict[comname])
1067 outhier=simo.add_component_necklace(comname,
1077 """Deprecated building macro - use BuildSystem()"""
1081 @param representation The PMI representation
1083 self.simo=representation
1084 self.gmm_models_directory=
"."
1086 self.rmf_frame_number={}
1087 self.rmf_names_map={}
1088 self.rmf_component_name={}
1090 def set_gmm_models_directory(self,directory_name):
1091 self.gmm_models_directory=directory_name
1094 sequence_connectivity_resolution=10,
1095 rmf_file=
None,rmf_frame_number=0,rmf_file_map=
None,
1096 skip_connectivity_these_domains=
None,
1097 skip_gaussian_in_rmf=
False, skip_gaussian_in_representation=
False):
1099 @param data_structure List of lists containing these entries:
1100 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
1101 res_range, read_em_files, bead_size, rb, super_rb,
1102 em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
1103 keep_gaussian_flexible_beads (optional)
1104 @param sequence_connectivity_scale
1106 @param rmf_frame_number
1107 @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
1112 self.resdensities={}
1113 super_rigid_bodies={}
1114 chain_super_rigid_bodies={}
1117 for d
in data_structure:
1125 res_range = d[7][0:2]
1130 read_em_files = d[8]
1134 em_num_components = d[12]
1135 em_txt_file_name = d[13]
1136 em_mrc_file_name = d[14]
1137 chain_of_super_rb = d[15]
1139 keep_gaussian_flexible_beads = d[16]
1141 keep_gaussian_flexible_beads =
True
1143 if comp_name
not in self.simo.get_component_names():
1144 self.simo.create_component(comp_name,color=0.0)
1145 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
1146 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
1149 if not read_em_files
is None:
1150 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
1151 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
1154 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
1156 if (keep_gaussian_flexible_beads):
1157 self.simo.add_all_atom_densities(comp_name, particles=beads)
1162 self.resdensities[hier_name]=dens_hier
1163 self.domain_dict[hier_name]=outhier+dens_hier
1166 if rb
not in rigid_bodies:
1167 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
1169 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
1172 if super_rb
is not None:
1174 if k
not in super_rigid_bodies:
1175 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1177 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1179 if chain_of_super_rb
is not None:
1180 for k
in chain_of_super_rb:
1181 if k
not in chain_super_rigid_bodies:
1182 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
1184 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
1188 self.rigid_bodies=rigid_bodies
1190 for c
in self.simo.get_component_names():
1191 if rmf_file
is not None:
1193 rfn=rmf_frame_number
1194 self.simo.set_coordinates_from_rmf(c, rf,rfn,
1195 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1197 for k
in rmf_file_map:
1199 rf=rmf_file_map[k][0]
1200 rfn=rmf_file_map[k][1]
1201 rcname=rmf_file_map[k][2]
1202 self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
1203 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1205 if c
in self.rmf_file:
1207 rfn=self.rmf_frame_number[c]
1208 rfm=self.rmf_names_map[c]
1209 rcname=self.rmf_component_name[c]
1210 self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
1211 rmf_component_name=rcname,
1212 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
1213 if (
not skip_connectivity_these_domains)
or (c
not in skip_connectivity_these_domains):
1214 self.simo.setup_component_sequence_connectivity(c,
1215 resolution=sequence_connectivity_resolution,
1216 scale=sequence_connectivity_scale)
1217 self.simo.setup_component_geometry(c)
1219 for rb
in rigid_bodies:
1220 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
1222 for k
in super_rigid_bodies:
1223 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
1225 for k
in chain_super_rigid_bodies:
1226 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1228 self.simo.set_floppy_bodies()
1229 self.simo.setup_bonds()
1231 def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1232 hiers=self.domain_dict[hier_name]
1233 for length
in lengths:
1234 for n
in range(len(hiers)-length):
1235 hs=hiers[n+1:n+length-1]
1236 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1237 for n
in range(1,len(hiers)-1,5):
1239 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1241 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1244 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1245 self.rmf_file[component_name]=rmf_file
1246 self.rmf_frame_number[component_name]=rmf_frame_number
1247 self.rmf_names_map[component_name]=rmf_names_map
1248 self.rmf_component_name[component_name]=rmf_component_name
1250 def get_density_hierarchies(self,hier_name_list):
1254 for hn
in hier_name_list:
1256 dens_hier_list+=self.resdensities[hn]
1257 return dens_hier_list
1259 def get_pdb_bead_bits(self,hierarchy):
1264 if "_pdb" in h.get_name():pdbbits.append(h)
1265 if "_bead" in h.get_name():beadbits.append(h)
1266 if "_helix" in h.get_name():helixbits.append(h)
1267 return (pdbbits,beadbits,helixbits)
1269 def scale_bead_radii(self,nresidues,scale):
1271 for h
in self.domain_dict:
1272 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1273 slope=(1.0-scale)/(1.0-float(nresidues))
1278 if b
not in scaled_beads:
1284 scale_factor=slope*float(num_residues)+1.0
1286 new_radius=scale_factor*radius
1289 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1292 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1294 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1298 for pb
in pdbbits+helixbits:
1302 number_of_residues=len(set(res_ind))
1306 outhier+=simo.add_component_density(compname,
1308 num_components=num_components,
1310 inputfile=txtfilename)
1311 if len(helixbits)!=0:
1312 outhier+=simo.add_component_density(compname,
1314 num_components=num_components,
1316 inputfile=txtfilename)
1321 if num_components<0:
1324 num_components=number_of_residues/abs(num_components)
1325 outhier+=simo.add_component_density(compname,
1327 num_components=num_components,
1329 outputfile=txtfilename,
1330 outputmap=mrcfilename,
1331 multiply_by_total_mass=
True)
1333 if len(helixbits)!=0:
1334 if num_components<0:
1337 num_components=number_of_residues/abs(num_components)
1338 outhier+=simo.add_component_density(compname,
1340 num_components=num_components,
1342 outputfile=txtfilename,
1343 outputmap=mrcfilename,
1344 multiply_by_total_mass=
True)
1346 return outhier,beadbits
1348 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1350 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1351 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1353 outhier=simo.autobuild_model(comname,
1357 resolutions=[0,1,10],
1360 missingbeadsize=beadsize)
1362 outhier=simo.autobuild_model(comname,
1369 missingbeadsize=beadsize)
1371 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1372 outhier=simo.add_component_ideal_helix(comname,
1378 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" and pdbname
is not "DENSITY" :
1381 seq_len=len(simo.sequence_dict[comname])
1382 outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1384 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is "DENSITY" :
1389 seq_len=len(simo.sequence_dict[comname])
1390 outhier=simo.add_component_necklace(comname,
1397 def set_coordinates(self,hier_name,xyz_tuple):
1398 hier=self.domain_dict[hier_name]
1406 def save_rmf(self,rmfname):
1409 o.init_rmf(rmfname,[self.simo.prot])
1410 o.write_rmf(rmfname)
1411 o.close_rmf(rmfname)
1420 missing_bead_size=20,
1421 residue_per_gaussian=
None):
1423 Construct a component for each subunit (no splitting, nothing fancy).
1424 You can pass the resolutions and the bead size for the missing residue regions.
1425 To use this macro, you must provide the following data structure:
1426 Component pdbfile chainid rgb color fastafile sequence id
1428 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1429 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1430 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1431 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1432 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1433 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1434 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1435 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1436 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1437 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1438 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1439 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1449 component_name = d[0]
1453 fasta_file = d[4][0]
1455 fastids = IMP.pmi.tools.get_ids_from_fasta_file(fasta_file)
1456 fasta_file_id = d[4][1]
1458 r.create_component(component_name,
1461 r.add_component_sequence(component_name,
1463 id=fastids[fasta_file_id])
1465 hierarchies = r.autobuild_model(component_name,
1468 resolutions=resolutions,
1469 missingbeadsize=missing_bead_size)
1471 r.show_component_table(component_name)
1473 r.set_rigid_bodies([component_name])
1475 r.set_chain_of_super_rigid_bodies(
1480 r.setup_component_sequence_connectivity(component_name, resolution=1)
1481 r.setup_component_geometry(component_name)
1485 r.set_floppy_bodies()
1488 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
1496 """A macro for running all the basic operations of analysis.
1497 Includes clustering, precision analysis, and making ensemble density maps.
1498 A number of plots are also supported.
1501 merge_directories=[
"./"],
1502 stat_file_name_suffix=
"stat",
1503 best_pdb_name_suffix=
"model",
1504 do_clean_first=
True,
1505 do_create_directories=
True,
1506 global_output_directory=
"output/",
1507 replica_stat_file_suffix=
"stat_replica",
1508 global_analysis_result_directory=
"./analysis/",
1511 @param model The IMP model
1512 @param stat_file_name_suffix
1513 @param merge_directories The directories containing output files
1514 @param best_pdb_name_suffix
1515 @param do_clean_first
1516 @param do_create_directories
1517 @param global_output_directory Where everything is
1518 @param replica_stat_file_suffix
1519 @param global_analysis_result_directory
1520 @param test_mode If True, nothing is changed on disk
1524 from mpi4py
import MPI
1525 self.comm = MPI.COMM_WORLD
1526 self.rank = self.comm.Get_rank()
1527 self.number_of_processes = self.comm.size
1530 self.number_of_processes = 1
1532 self.test_mode = test_mode
1533 self._protocol_output = []
1534 self.cluster_obj =
None
1536 stat_dir = global_output_directory
1537 self.stat_files = []
1539 for rd
in merge_directories:
1540 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
1541 if len(stat_files)==0:
1542 print(
"WARNING: no stat files found in",os.path.join(rd,stat_dir))
1543 self.stat_files += stat_files
1546 """Capture details of the modeling protocol.
1547 @param p an instance of IMP.pmi.output.ProtocolOutput or a subclass.
1550 self._protocol_output.append((p, p._last_state))
1553 score_key=
"SimplifiedModel_Total_Score_None",
1554 rmf_file_key=
"rmf_file",
1555 rmf_file_frame_key=
"rmf_frame_index",
1558 nframes_trajectory=10000):
1559 """ Get a trajectory of the modeling run, for generating demonstrative movies
1560 @param score_key The score for ranking models
1561 @param rmf_file_key Key pointing to RMF filename
1562 @param rmf_file_frame_key Key pointing to RMF frame number
1563 @param outputdir The local output directory used in the run
1564 @param get_every Extract every nth frame
1565 @param nframes_trajectory Total number of frames of the trajectory
1567 from operator
import itemgetter
1575 rmf_file_list=trajectory_models[0]
1576 rmf_file_frame_list=trajectory_models[1]
1577 score_list=list(map(float, trajectory_models[2]))
1579 max_score=max(score_list)
1580 min_score=min(score_list)
1583 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1584 binned_scores=[
None]*nframes_trajectory
1585 binned_model_indexes=[-1]*nframes_trajectory
1587 for model_index,s
in enumerate(score_list):
1588 bins_score_diffs=[abs(s-b)
for b
in bins]
1589 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1590 if binned_scores[bin_index]==
None:
1591 binned_scores[bin_index]=s
1592 binned_model_indexes[bin_index]=model_index
1594 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1595 new_diff=abs(s-bins[bin_index])
1596 if new_diff < old_diff:
1597 binned_scores[bin_index]=s
1598 binned_model_indexes[bin_index]=model_index
1600 print(binned_scores)
1601 print(binned_model_indexes)
1604 def _expand_ambiguity(self,prot,d):
1605 """If using PMI2, expand the dictionary to include copies as ambiguous options
1606 This also keeps the states separate.
1611 if '..' in key
or (type(val)
is tuple
and len(val)>=3):
1614 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1615 if type(val)
is tuple:
1623 for nst
in range(len(states)):
1625 copies = sel.get_selected_particles(with_representation=
False)
1627 for nc
in range(len(copies)):
1629 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1631 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1638 score_key=
"SimplifiedModel_Total_Score_None",
1639 rmf_file_key=
"rmf_file",
1640 rmf_file_frame_key=
"rmf_frame_index",
1642 prefiltervalue=
None,
1645 alignment_components=
None,
1646 number_of_best_scoring_models=10,
1647 rmsd_calculation_components=
None,
1648 distance_matrix_file=
'distances.mat',
1649 load_distance_matrix_file=
False,
1650 skip_clustering=
False,
1651 number_of_clusters=1,
1653 exit_after_display=
True,
1655 first_and_last_frames=
None,
1656 density_custom_ranges=
None,
1657 write_pdb_with_centered_coordinates=
False,
1659 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1660 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1661 Can pass None for copy or state to ignore that field.
1662 If you don't pass a specific copy number
1663 @param score_key The score for ranking models. Use "Total_Score"
1664 instead of default for PMI2.
1665 @param rmf_file_key Key pointing to RMF filename
1666 @param rmf_file_frame_key Key pointing to RMF frame number
1667 @param state_number State number to analyze
1668 @param prefiltervalue Only include frames where the
1669 score key is below this value
1670 @param feature_keys Keywords for which you want to
1671 calculate average, medians, etc.
1672 If you pass "Keyname" it'll include everything that matches "*Keyname*"
1673 @param outputdir The local output directory used in the run
1674 @param alignment_components Dictionary with keys=groupname, values are tuples
1675 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1676 @param number_of_best_scoring_models Num models to keep per run
1677 @param rmsd_calculation_components For calculating RMSD
1678 (same format as alignment_components)
1679 @param distance_matrix_file Where to store/read the distance matrix
1680 @param load_distance_matrix_file Try to load the distance matrix file
1681 @param skip_clustering Just extract the best scoring models
1683 @param number_of_clusters Number of k-means clusters
1684 @param display_plot Display the distance matrix
1685 @param exit_after_display Exit after displaying distance matrix
1686 @param get_every Extract every nth frame
1687 @param first_and_last_frames A tuple with the first and last frames to be
1688 analyzed. Values are percentages!
1689 Default: get all frames
1690 @param density_custom_ranges For density calculation
1691 (same format as alignment_components)
1692 @param write_pdb_with_centered_coordinates
1693 @param voxel_size Used for the density output
1697 self._outputdir = os.path.abspath(outputdir)
1698 self._number_of_clusters = number_of_clusters
1699 for p, state
in self._protocol_output:
1700 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1711 if not load_distance_matrix_file:
1712 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1713 my_stat_files = IMP.pmi.tools.chunk_list_into_segments(
1714 self.stat_files,self.number_of_processes)[self.rank]
1718 orig_score_key = score_key
1719 if score_key
not in po.get_keys():
1720 if 'Total_Score' in po.get_keys():
1721 score_key =
'Total_Score'
1722 print(
"WARNING: Using 'Total_Score' instead of "
1723 "'SimplifiedModel_Total_Score_None' for the score key")
1724 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1725 if k
in feature_keys:
1726 print(
"WARNING: no need to pass " +k+
" to feature_keys.")
1727 feature_keys.remove(k)
1735 get_every, provenance=prov)
1736 rmf_file_list=best_models[0]
1737 rmf_file_frame_list=best_models[1]
1738 score_list=best_models[2]
1739 feature_keyword_list_dict=best_models[3]
1745 if self.number_of_processes > 1:
1749 rmf_file_frame_list)
1750 for k
in feature_keyword_list_dict:
1752 feature_keyword_list_dict[k])
1755 score_rmf_tuples = list(zip(score_list,
1757 rmf_file_frame_list,
1758 list(range(len(score_list)))))
1760 if density_custom_ranges:
1761 for k
in density_custom_ranges:
1762 if type(density_custom_ranges[k])
is not list:
1763 raise Exception(
"Density custom ranges: values must be lists of tuples")
1766 if first_and_last_frames
is not None:
1767 nframes = len(score_rmf_tuples)
1768 first_frame = int(first_and_last_frames[0] * nframes)
1769 last_frame = int(first_and_last_frames[1] * nframes)
1770 if last_frame > len(score_rmf_tuples):
1772 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1775 best_score_rmf_tuples = sorted(score_rmf_tuples,
1776 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1777 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1779 prov.append(IMP.pmi.io.FilterProvenance(
"Best scoring",
1780 0, number_of_best_scoring_models))
1782 best_score_feature_keyword_list_dict = defaultdict(list)
1783 for tpl
in best_score_rmf_tuples:
1785 for f
in feature_keyword_list_dict:
1786 best_score_feature_keyword_list_dict[f].append(
1787 feature_keyword_list_dict[f][index])
1788 my_best_score_rmf_tuples = IMP.pmi.tools.chunk_list_into_segments(
1789 best_score_rmf_tuples,
1790 self.number_of_processes)[self.rank]
1793 prot_ahead = IMP.pmi.analysis.get_hiers_from_rmf(self.model,
1795 my_best_score_rmf_tuples[0][1])[0]
1797 if rmsd_calculation_components
is not None:
1798 tmp = self._expand_ambiguity(prot_ahead,rmsd_calculation_components)
1799 if tmp!=rmsd_calculation_components:
1800 print(
'Detected ambiguity, expand rmsd components to',tmp)
1801 rmsd_calculation_components = tmp
1802 if alignment_components
is not None:
1803 tmp = self._expand_ambiguity(prot_ahead,alignment_components)
1804 if tmp!=alignment_components:
1805 print(
'Detected ambiguity, expand alignment components to',tmp)
1806 alignment_components = tmp
1812 my_best_score_rmf_tuples[0],
1813 rmsd_calculation_components,
1814 state_number=state_number)
1816 my_best_score_rmf_tuples,
1817 alignment_components,
1818 rmsd_calculation_components,
1819 state_number=state_number)
1824 all_coordinates=got_coords[0]
1825 alignment_coordinates=got_coords[1]
1826 rmsd_coordinates=got_coords[2]
1827 rmf_file_name_index_dict=got_coords[3]
1828 all_rmf_file_names=got_coords[4]
1834 if density_custom_ranges:
1836 density_custom_ranges,
1839 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1845 os.mkdir(dircluster)
1848 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1849 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1851 rmf_frame_number=tpl[2]
1854 for key
in best_score_feature_keyword_list_dict:
1855 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1858 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
1863 linking_successful=IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
1869 if not linking_successful:
1876 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
1877 prot = states[state_number]
1879 prot = prots[state_number]
1884 coords_f1=alignment_coordinates[cnt]
1886 coords_f2=alignment_coordinates[cnt]
1889 transformation = Ali.align()[1]
1910 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1911 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1912 o.init_pdb(out_pdb_fn,prot)
1913 o.write_pdb(out_pdb_fn,
1914 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1916 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1917 tmp_dict[
"rmf_file_full_path"]=rmf_name
1918 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1919 tmp_dict[
"local_rmf_frame_number"]=0
1921 clusstat.write(str(tmp_dict)+
"\n")
1926 h.set_name(
"System")
1928 o.init_rmf(out_rmf_fn, [h], rs)
1930 o.init_rmf(out_rmf_fn, [prot],rs)
1932 o.write_rmf(out_rmf_fn)
1933 o.close_rmf(out_rmf_fn)
1935 if density_custom_ranges:
1936 DensModule.add_subunits_density(prot)
1938 if density_custom_ranges:
1939 DensModule.write_mrc(path=dircluster)
1946 if self.number_of_processes > 1:
1952 rmf_file_name_index_dict)
1954 alignment_coordinates)
1961 [best_score_feature_keyword_list_dict,
1962 rmf_file_name_index_dict],
1968 print(
"setup clustering class")
1971 for n, model_coordinate_dict
in enumerate(all_coordinates):
1972 template_coordinate_dict = {}
1974 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1976 self.cluster_obj.set_template(alignment_coordinates[n])
1977 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1978 print(
"Global calculating the distance matrix")
1981 self.cluster_obj.dist_matrix()
1985 self.cluster_obj.do_cluster(number_of_clusters)
1988 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1989 if exit_after_display:
1991 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1998 print(
"setup clustering class")
2000 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
2001 print(
"clustering with %s clusters" % str(number_of_clusters))
2002 self.cluster_obj.do_cluster(number_of_clusters)
2003 [best_score_feature_keyword_list_dict,
2004 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
2007 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
2008 if exit_after_display:
2010 if self.number_of_processes > 1:
2018 print(self.cluster_obj.get_cluster_labels())
2019 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
2020 print(
"rank %s " % str(self.rank))
2021 print(
"cluster %s " % str(n))
2022 print(
"cluster label %s " % str(cl))
2023 print(self.cluster_obj.get_cluster_label_names(cl))
2024 cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
2025 cluster_prov = prov + \
2026 [IMP.pmi.io.ClusterProvenance(cluster_size)]
2029 if density_custom_ranges:
2031 density_custom_ranges,
2034 dircluster = outputdir +
"/cluster." + str(n) +
"/"
2036 os.mkdir(dircluster)
2040 rmsd_dict = {
"AVERAGE_RMSD":
2041 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
2042 clusstat = open(dircluster +
"stat.out",
"w")
2043 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
2046 tmp_dict.update(rmsd_dict)
2047 index = rmf_file_name_index_dict[structure_name]
2048 for key
in best_score_feature_keyword_list_dict:
2050 key] = best_score_feature_keyword_list_dict[
2056 rmf_name = structure_name.split(
"|")[0]
2057 rmf_frame_number = int(structure_name.split(
"|")[1])
2058 clusstat.write(str(tmp_dict) +
"\n")
2062 prots,rs = IMP.pmi.analysis.get_hiers_and_restraints_from_rmf(
2067 linking_successful = IMP.pmi.analysis.link_hiers_and_restraints_to_rmf(
2073 if not linking_successful:
2079 states = IMP.atom.get_by_type(prots[0],IMP.atom.STATE_TYPE)
2080 prot = states[state_number]
2082 prot = prots[state_number]
2088 model_index = self.cluster_obj.get_model_index_from_name(
2090 transformation = self.cluster_obj.get_transformation_to_first_member(
2110 if density_custom_ranges:
2111 DensModule.add_subunits_density(prot)
2115 o.init_pdb(dircluster + str(k) +
".pdb", prot)
2116 o.write_pdb(dircluster + str(k) +
".pdb")
2121 h.set_name(
"System")
2123 o.init_rmf(dircluster + str(k) +
".rmf3", [h], rs)
2125 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
2126 o.write_rmf(dircluster + str(k) +
".rmf3")
2127 o.close_rmf(dircluster + str(k) +
".rmf3")
2132 if density_custom_ranges:
2133 DensModule.write_mrc(path=dircluster)
2136 if self.number_of_processes>1:
2139 def get_cluster_rmsd(self,cluster_num):
2140 if self.cluster_obj
is None:
2142 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
2144 def save_objects(self, objects, file_name):
2146 outf = open(file_name,
'wb')
2147 pickle.dump(objects, outf)
2150 def load_objects(self, file_name):
2152 inputf = open(file_name,
'rb')
2153 objects = pickle.load(inputf)
2160 This class contains analysis utilities to investigate ReplicaExchange results.
2173 Construction of the Class.
2174 @param model IMP.Model()
2175 @param stat_files list of string. Can be ascii stat files, rmf files names
2176 @param best_models Integer. Number of best scoring models, if None: all models will be read
2177 @param alignment boolean (Default=True). Align before computing the rmsd.
2181 self.best_models=best_models
2194 self.clusters.append(c)
2195 for n0
in range(len(self.stath0)):
2197 self.pairwise_rmsd={}
2198 self.pairwise_molecular_assignment={}
2199 self.alignment=alignment
2200 self.symmetric_molecules={}
2201 self.issymmetricsel={}
2203 self.molcopydict0=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath0))
2204 self.molcopydict1=IMP.pmi.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath1))
2208 Setup the selection onto which the rmsd is computed
2209 @param kwargs use IMP.atom.Selection keywords
2217 Store names of symmetric molecules
2219 self.symmetric_molecules[molecule_name]=0
2224 Setup the selection onto which the alignment is computed
2225 @param kwargs use IMP.atom.Selection keywords
2233 def clean_clusters(self):
2234 for c
in self.clusters: del c
2238 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2240 Cluster the models based on RMSD.
2241 @param rmsd_cutoff Float the distance cutoff in Angstrom
2242 @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
2244 self.clean_clusters()
2245 not_clustered=set(range(len(self.stath1)))
2246 while len(not_clustered)>0:
2247 self.
aggregate(not_clustered, rmsd_cutoff, metric)
2253 Refine the clusters by merging the ones whose centers are close
2254 @param rmsd_cutoff cutoff distance in Angstorms
2257 clusters_copy=self.clusters
2258 for c0,c1
in itertools.combinations(self.clusters,2):
2259 if c0.center_index
is None:
2261 if c1.center_index
is None:
2263 d0=self.stath0[c0.center_index]
2264 d1=self.stath1[c1.center_index]
2265 rmsd, molecular_assignment = self.
rmsd()
2266 if rmsd <= rmsd_cutoff:
2267 if c1
in self.clusters:
2268 clusters_copy.remove(c1)
2270 self.clusters=clusters_copy
2277 def set_cluster_assignments(self, cluster_ids):
2278 if len(cluster_ids)!=len(self.stath0):
2279 raise ValueError(
'cluster ids has to be same length as number of frames')
2282 for i
in sorted(list(set(cluster_ids))):
2284 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
2285 self.clusters[idx].add_member(i,d)
2289 Return the model data from a cluster
2290 @param cluster IMP.pmi.output.Cluster object
2299 Save the data for the whole models into a pickle file
2300 @param filename string
2302 self.stath0.save_data(filename)
2306 Set the data from an external IMP.pmi.output.Data
2307 @param data IMP.pmi.output.Data
2309 self.stath0.data=data
2310 self.stath1.data=data
2314 Load the data from an external pickled file
2315 @param filename string
2317 self.stath0.load_data(filename)
2318 self.stath1.load_data(filename)
2319 self.best_models=len(self.stath0)
2321 def add_cluster(self,rmf_name_list):
2323 print(
"creating cluster index "+str(len(self.clusters)))
2324 self.clusters.append(c)
2325 current_len=len(self.stath0)
2327 for rmf
in rmf_name_list:
2328 print(
"adding rmf "+rmf)
2329 self.stath0.add_stat_file(rmf)
2330 self.stath1.add_stat_file(rmf)
2332 for n0
in range(current_len,len(self.stath0)):
2339 Save the clusters into a pickle file
2340 @param filename string
2343 import cPickle
as pickle
2346 fl=open(filename,
'wb')
2347 pickle.dump(self.clusters,fl)
2351 Load the clusters from a pickle file
2352 @param filename string
2353 @param append bool (Default=False), if True. append the clusters to the ones currently present
2356 import cPickle
as pickle
2359 fl=open(filename,
'rb')
2360 self.clean_clusters()
2362 self.clusters+=pickle.load(fl)
2364 self.clusters=pickle.load(fl)
2373 Compute the cluster center for a given cluster
2375 member_distance=defaultdict(float)
2377 for n0,n1
in itertools.combinations(cluster.members,2):
2380 rmsd, _ = self.
rmsd()
2381 member_distance[n0]+=rmsd
2383 if len(member_distance)>0:
2384 cluster.center_index=min(member_distance, key=member_distance.get)
2386 cluster.center_index=cluster.members[0]
2390 Save the coordinates of the current cluster a single rmf file
2392 print(
"saving coordinates",cluster)
2395 if rmf_name
is None:
2396 rmf_name=prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
2398 d1=self.stath1[cluster.members[0]]
2399 o.init_rmf(rmf_name, [self.stath1])
2400 for n1
in cluster.members:
2404 if self.alignment: self.align()
2405 o.write_rmf(rmf_name)
2407 o.close_rmf(rmf_name)
2411 remove structures that are similar
2412 append it to a new cluster
2414 print(
"pruning models")
2417 remaining=range(1,len(self.stath1),10)
2419 while len(remaining)>0:
2420 d0=self.stath0[selected]
2422 for n1
in remaining:
2424 if self.alignment: self.align()
2428 print(
"pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
2429 remaining=[x
for x
in remaining
if x
not in rm]
2430 if len(remaining)==0:
break
2431 selected=remaining[0]
2432 filtered.append(selected)
2435 self.clusters.append(c)
2445 Compute the precision of a cluster
2451 if not cluster.center_index
is None:
2452 members1=[cluster.center_index]
2454 members1=cluster.members
2458 for n1
in cluster.members:
2463 tmp_rmsd, _ = self.
rmsd()
2468 precision=rmsd/npairs
2469 cluster.precision=precision
2474 Compute the bipartite precision (ie the cross-precision)
2475 between two clusters
2479 for cn0,n0
in enumerate(cluster1.members):
2481 for cn1,n1
in enumerate(cluster2.members):
2483 tmp_rmsd, _ =self.
rmsd()
2484 if verbose: print(
"--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
2487 precision=rmsd/npairs
2490 def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
2492 Compute the Root mean square fluctuations
2493 of a molecule in a cluster
2494 Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
2496 rmsf=IMP.pmi.tools.OrderedDict()
2499 if cluster_ref
is not None:
2500 if not cluster_ref.center_index
is None:
2501 members0 = [cluster_ref.center_index]
2503 members0 = cluster_ref.members
2505 if not cluster.center_index
is None:
2506 members0 = [cluster.center_index]
2508 members0 = cluster.members
2511 copy_index=copy_index,state_index=state_index)
2512 ps0=s0.get_selected_particles()
2526 for n1
in cluster.members[::step]:
2528 print(
"--- rmsf %s %s"%(str(n0),str(n1)))
2531 s1=
IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2532 copy_index=copy_index,state_index=state_index)
2533 ps1 = s1.get_selected_particles()
2536 if self.alignment: self.align()
2537 for n,(p0,p1)
in enumerate(zip(ps0,ps1)):
2538 r=residue_indexes[n]
2550 for stath
in [self.stath0,self.stath1]:
2551 if molecule
not in self.symmetric_molecules:
2553 copy_index=copy_index,state_index=state_index)
2556 state_index=state_index)
2558 ps = s.get_selected_particles()
2567 def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
2572 for n1
in cluster.members[::step]:
2573 print(
"density "+str(n1))
2576 if self.alignment: self.align()
2577 dens.add_subunits_density(self.stath1)
2579 dens.write_mrc(path=prefix+
'/',suffix=str(cluster.cluster_id))
2582 def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
2585 import matplotlib.pyplot
as plt
2586 import matplotlib.cm
as cm
2587 from scipy.spatial.distance
import cdist
2589 if molecules
is None:
2593 unique_copies=[mol
for mol
in mols
if mol.get_copy_index() == 0]
2594 mol_names_unique=dict((mol.get_name(),mol)
for mol
in unique_copies)
2595 total_len_unique=sum(max(mol.get_residue_indexes())
for mol
in unique_copies)
2604 seqlen=max(mol.get_residue_indexes())
2605 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2609 for mol
in unique_copies:
2610 seqlen=max(mol.get_residue_indexes())
2611 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2615 for ncl,n1
in enumerate(cluster.members):
2619 coord_dict=IMP.pmi.tools.OrderedDict()
2621 mol_name=mol.get_name()
2622 copy_index=mol.get_copy_index()
2623 rindexes = mol.get_residue_indexes()
2624 coords = np.ones((max(rindexes),3))
2625 for rnum
in rindexes:
2627 selpart = sel.get_selected_particles()
2628 if len(selpart) == 0:
continue
2629 selpart = selpart[0]
2630 coords[rnum - 1, :] =
IMP.core.XYZ(selpart).get_coordinates()
2631 coord_dict[mol]=coords
2634 coords=np.concatenate(list(coord_dict.values()))
2635 dists = cdist(coords, coords)
2636 binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2638 binary_dists_dict={}
2640 len1 = max(mol1.get_residue_indexes())
2642 name1=mol1.get_name()
2643 name2=mol2.get_name()
2644 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2645 if (name1, name2)
not in binary_dists_dict:
2646 binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
2647 binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2648 binary_dists=np.zeros((total_len_unique,total_len_unique))
2650 for name1,name2
in binary_dists_dict:
2651 r1=index_dict[mol_names_unique[name1]]
2652 r2=index_dict[mol_names_unique[name2]]
2653 binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
2658 contact_freqs = binary_dists
2660 dist_maps.append(dists)
2661 av_dist_map += dists
2662 contact_freqs += binary_dists
2667 contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2669 contact_freqs =1.0/len(cluster)*contact_freqs
2670 av_dist_map=1.0/len(cluster)*contact_freqs
2672 fig = plt.figure(figsize=(100, 100))
2673 ax = fig.add_subplot(111)
2676 gap_between_components=50
2688 prot_list=list(zip(*sorted_tuple))[1]
2691 prot_list=list(zip(*sorted_tuple))[1]
2693 prot_listx = prot_list
2694 nresx = gap_between_components + \
2695 sum([max(mol.get_residue_indexes())
2696 + gap_between_components
for mol
in prot_listx])
2699 prot_listy = prot_list
2700 nresy = gap_between_components + \
2701 sum([max(mol.get_residue_indexes())
2702 + gap_between_components
for mol
in prot_listy])
2707 res = gap_between_components
2708 for mol
in prot_listx:
2709 resoffsetx[mol] = res
2710 res += max(mol.get_residue_indexes())
2712 res += gap_between_components
2716 res = gap_between_components
2717 for mol
in prot_listy:
2718 resoffsety[mol] = res
2719 res += max(mol.get_residue_indexes())
2721 res += gap_between_components
2723 resoffsetdiagonal = {}
2724 res = gap_between_components
2725 for mol
in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2726 resoffsetdiagonal[mol] = res
2727 res += max(mol.get_residue_indexes())
2728 res += gap_between_components
2733 for n, prot
in enumerate(prot_listx):
2734 res = resoffsetx[prot]
2736 for proty
in prot_listy:
2737 resy = resoffsety[proty]
2738 endy = resendy[proty]
2739 ax.plot([res, res], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2740 ax.plot([end, end], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2741 xticks.append((float(res) + float(end)) / 2)
2746 for n, prot
in enumerate(prot_listy):
2747 res = resoffsety[prot]
2749 for protx
in prot_listx:
2750 resx = resoffsetx[protx]
2751 endx = resendx[protx]
2752 ax.plot([resx, endx], [res, res], linestyle=
'-',color=
'gray', lw=0.4)
2753 ax.plot([resx, endx], [end, end], linestyle=
'-',color=
'gray', lw=0.4)
2754 yticks.append((float(res) + float(end)) / 2)
2759 tmp_array = np.zeros((nresx, nresy))
2761 for px
in prot_listx:
2762 for py
in prot_listy:
2763 resx = resoffsetx[px]
2764 lengx = resendx[px] - 1
2765 resy = resoffsety[py]
2766 lengy = resendy[py] - 1
2767 indexes_x = index_dict[px]
2768 minx = min(indexes_x)
2769 maxx = max(indexes_x)
2770 indexes_y = index_dict[py]
2771 miny = min(indexes_y)
2772 maxy = max(indexes_y)
2773 tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2774 ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2776 cax = ax.imshow(tmp_array,
2781 interpolation=
'nearest')
2783 ax.set_xticks(xticks)
2784 ax.set_xticklabels(xlabels, rotation=90)
2785 ax.set_yticks(yticks)
2786 ax.set_yticklabels(ylabels)
2787 plt.setp(ax.get_xticklabels(), fontsize=6)
2788 plt.setp(ax.get_yticklabels(), fontsize=6)
2791 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2792 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2797 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf", dpi=300,transparent=
"False")
2801 def plot_rmsd_matrix(self,filename):
2803 self.compute_all_pairwise_rmsd()
2804 distance_matrix = np.zeros(
2805 (len(self.stath0), len(self.stath1)))
2806 for (n0,n1)
in self.pairwise_rmsd:
2807 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2809 import matplotlib
as mpl
2811 import matplotlib.pylab
as pl
2812 from scipy.cluster
import hierarchy
as hrc
2814 fig = pl.figure(figsize=(10,8))
2815 ax = fig.add_subplot(212)
2816 dendrogram = hrc.dendrogram(
2817 hrc.linkage(distance_matrix),
2820 leaves_order = dendrogram[
'leaves']
2821 ax.set_xlabel(
'Model')
2822 ax.set_ylabel(
'RMSD [Angstroms]')
2824 ax2 = fig.add_subplot(221)
2826 distance_matrix[leaves_order,
2829 interpolation=
'nearest')
2830 cb = fig.colorbar(cax)
2831 cb.set_label(
'RMSD [Angstroms]')
2832 ax2.set_xlabel(
'Model')
2833 ax2.set_ylabel(
'Model')
2835 pl.savefig(filename, dpi=300)
2844 Update the cluster id numbers
2846 for n,c
in enumerate(self.clusters):
2849 def get_molecule(self, hier, name, copy):
2857 self.seldict0=IMP.pmi.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2858 self.seldict1=IMP.pmi.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2859 for mol
in self.seldict0:
2860 for sel
in self.seldict0[mol]:
2861 self.issymmetricsel[sel]=
False
2862 for mol
in self.symmetric_molecules:
2863 self.symmetric_molecules[mol]=len(self.seldict0[mol])
2864 for sel
in self.seldict0[mol]:
2865 self.issymmetricsel[sel]=
True
2872 for rb
in self.rbs1:
2875 for bead
in self.beads1:
2880 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2882 initial filling of the clusters.
2885 print(
"clustering model "+str(n0))
2886 d0 = self.stath0[n0]
2888 print(
"creating cluster index "+str(len(self.clusters)))
2889 self.clusters.append(c)
2891 clustered = set([n0])
2893 print(
"--- trying to add model "+str(n1)+
" to cluster "+str(len(self.clusters)))
2894 d1 = self.stath1[n1]
2895 if self.alignment: self.align()
2896 rmsd, _ = self.
rmsd(metric=metric)
2897 if rmsd<rmsd_cutoff:
2898 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2902 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2907 merge the clusters that have close members
2908 @param rmsd_cutoff cutoff distance in Angstorms
2914 for c0, c1
in filter(
lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2915 n0, n1 = [c.members[0]
for c
in (c0,c1)]
2916 d0 = self.stath0[n0]
2917 d1 = self.stath1[n1]
2918 rmsd, _ = self.
rmsd()
2920 to_merge.append((c0,c1))
2922 for c0, c
in reversed(to_merge):
2926 self.clusters = [c
for c
in filter(
lambda x: len(x.members)>0, self.clusters)]
2931 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2933 print(
"check close members for clusters "+str(c0.cluster_id)+
" and "+str(c1.cluster_id))
2934 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2935 d0 = self.stath0[n0]
2936 d1 = self.stath1[n1]
2937 rmsd, _ = self.
rmsd(metric=metric)
2938 if rmsd<rmsd_cutoff:
2954 a function that returns the permutation best_sel of sels0 that minimizes metric
2956 best_rmsd2 = float(
'inf')
2958 if self.issymmetricsel[sels0[0]]:
2961 for offset
in range(N):
2962 order=[(offset+i)%N
for i
in range(N)]
2963 sels = [sels0[(offset+i)%N]
for i
in range(N)]
2966 r=metric(sel0, sel1)
2969 if rmsd2 < best_rmsd2:
2974 for sels
in itertools.permutations(sels0):
2976 for sel0, sel1
in itertools.takewhile(
lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2977 r=metric(sel0, sel1)
2979 if rmsd2 < best_rmsd2:
2993 return best_sel, best_rmsd2
2995 def compute_all_pairwise_rmsd(self):
2996 for d0
in self.stath0:
2997 for d1
in self.stath1:
2998 rmsd, _ = self.
rmsd()
3000 def rmsd(self,metric=IMP.atom.get_rmsd):
3002 Computes the RMSD. Resolves ambiguous pairs assignments
3005 n0=self.stath0.current_index
3006 n1=self.stath1.current_index
3007 if ((n0,n1)
in self.pairwise_rmsd)
and ((n0,n1)
in self.pairwise_molecular_assignment):
3008 return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
3016 seldict_best_order={}
3017 molecular_assignment={}
3018 for molname, sels0
in self.seldict0.items():
3019 sels_best_order, best_rmsd2 = self.
rmsd_helper(sels0, self.seldict1[molname], metric)
3021 Ncoords = len(sels_best_order[0].get_selected_particles())
3022 Ncopies = len(self.seldict1[molname])
3023 total_rmsd += Ncoords*best_rmsd2
3024 total_N += Ncoords*Ncopies
3026 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
3027 p0 = sel0.get_selected_particles()[0]
3028 p1 = sel1.get_selected_particles()[0]
3034 molecular_assignment[(molname,c0)]=(molname,c1)
3036 total_rmsd = math.sqrt(total_rmsd/total_N)
3038 self.pairwise_rmsd[(n0,n1)]=total_rmsd
3039 self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
3040 self.pairwise_rmsd[(n1,n0)]=total_rmsd
3041 self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
3043 return total_rmsd, molecular_assignment
3047 Fix the reference structure for structural alignment, rmsd and chain assignemnt
3048 @param reference can be either "Absolute" (cluster center of the first cluster)
3049 or Relative (cluster center of the current cluster)
3051 if reference==
"Absolute":
3053 elif reference==
"Relative":
3054 if cluster.center_index:
3055 n0=cluster.center_index
3057 n0=cluster.members[0]
3062 compute the molecular assignemnts between multiple copies
3063 of the same sequence. It changes the Copy index of Molecules
3066 _, molecular_assignment = self.
rmsd()
3067 for (m0, c0), (m1,c1)
in molecular_assignment.items():
3068 mol0 = self.molcopydict0[m0][c0]
3069 mol1 = self.molcopydict1[m1][c1]
3072 p1.set_value(cik0,c0)
3076 Undo the Copy index assignment
3079 _, molecular_assignment = self.
rmsd()
3081 for (m0, c0), (m1,c1)
in molecular_assignment.items():
3082 mol0 = self.molcopydict0[m0][c0]
3083 mol1 = self.molcopydict1[m1][c1]
3086 p1.set_value(cik0,c1)
3093 s=
"AnalysisReplicaExchange\n"
3094 s+=
"---- number of clusters %s \n"%str(len(self.clusters))
3095 s+=
"---- number of models %s \n"%str(len(self.stath0))
3098 def __getitem__(self,int_slice_adaptor):
3099 if type(int_slice_adaptor)
is int:
3100 return self.clusters[int_slice_adaptor]
3101 elif type(int_slice_adaptor)
is slice:
3102 return self.__iter__(int_slice_adaptor)
3104 raise TypeError(
"Unknown Type")
3107 return len(self.clusters)
3109 def __iter__(self,slice_key=None):
3110 if slice_key
is None:
3111 for i
in range(len(self)):
3114 for i
in range(len(self))[slice_key]:
A class to simplify create of constraints and movers for an IMP Hierarchy.
def rmsd
Computes the RMSD.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def load_clusters
Load the clusters from a pickle file.
def precision
Compute the precision of a cluster.
Extends the functionality of IMP.atom.Molecule.
A macro for running all the basic operations of analysis.
A container for models organized into clusters.
Sample using molecular dynamics.
def aggregate
initial filling of the clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
static XYZR setup_particle(Model *m, ParticleIndex pi)
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A helper output for model evaluation.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
def get_cluster_data
Return the model data from a cluster.
def __init__
Construction of the Class.
def get_molecules
Return list of all molecules grouped by state.
def set_data
Set the data from an external IMP.pmi.output.Data.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
This class contains analysis utilities to investigate ReplicaExchange results.
Add uncertainty to a particle.
A macro to build a IMP::pmi::topology::System based on a TopologyReader object.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
def merge_aggregates
merge the clusters that have close members
This class initializes the root node of the global IMP.atom.Hierarchy.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
A class to cluster structures.
def add_protocol_output
Capture details of the modeling protocol.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
Representation of the system.
def compute_cluster_center
Compute the cluster center for a given cluster.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
A decorator for keeping track of copies of a molecule.
static Hierarchy setup_particle(Model *m, ParticleIndex pi, ParticleIndexesAdaptor children=ParticleIndexesAdaptor())
Create a Hierarchy of level t by adding the needed attributes.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def build_model
Create model.
The standard decorator for manipulating molecular structures.
Performs alignment and RMSD calculation for two sets of coordinates.
def update_seldicts
Update the seldicts.
def update_clusters
Update the cluster id numbers.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def refine
Refine the clusters by merging the ones whose centers are close.
A decorator for a particle with x,y,z coordinates.
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
Class for easy writing of PDBs, RMFs, and stat files.
def set_symmetric
Store names of symmetric molecules.
A macro to build a Representation based on a Topology and lists of movers DEPRECATED - Use BuildSyste...
Tools for clustering and cluster analysis.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Sample using Monte Carlo.
Create movers and setup constraints for PMI objects.
def merge
merge two clusters
def add_state
Add a state using the topology info in a IMP::pmi::topology::TopologyReader object.
Deprecated building macro - use BuildSystem()
The general base class for IMP exceptions.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
class to link stat files to several rmf files
def save_data
Save the data for the whole models into a pickle file.
Class to handle individual particles of a Model object.
def execute_macro
Builds representations and sets up degrees of freedom.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
def save_clusters
Save the clusters into a pickle file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
Set up the representation of all proteins and nucleic acid macromolecules.
A macro to help setup and run replica exchange.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
A dictionary-like wrapper for reading and storing sequence data.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Compute mean density maps from structures.
def load_data
Load the data from an external pickled file.
Support for the RMF file format for storing hierarchical molecular data and markup.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
def get_representation
Return the Representation object.
A decorator for a particle with x,y,z coordinates and a radius.