1 """@namespace IMP.pmi1.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from operator
import itemgetter
19 from collections
import defaultdict
25 class _RMFRestraints(object):
26 """All restraints that are written out to the RMF file"""
27 def __init__(self, model, user_restraints):
29 self._user_restraints = user_restraints
if user_restraints
else []
32 return (len(self._user_restraints)
33 + self._rmf_rs.get_number_of_restraints())
37 __nonzero__ = __bool__
39 def __getitem__(self, i):
40 class FakePMIWrapper(object):
41 def __init__(self, r):
42 self.r = IMP.RestraintSet.get_from(r)
43 get_restraint =
lambda self: self.r
44 lenuser = len(self._user_restraints)
46 return self._user_restraints[i]
47 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
48 r = self._rmf_rs.get_restraint(i - lenuser)
49 return FakePMIWrapper(r)
51 raise IndexError(
"Out of range")
55 """A macro to help setup and run replica exchange.
56 Supports Monte Carlo and molecular dynamics.
57 Produces trajectory RMF files, best PDB structures,
58 and output stat files.
64 monte_carlo_sample_objects=
None,
65 molecular_dynamics_sample_objects=
None,
67 rmf_output_objects=
None,
68 crosslink_restraints=
None,
69 monte_carlo_temperature=1.0,
70 simulated_annealing=
False,
71 simulated_annealing_minimum_temperature=1.0,
72 simulated_annealing_maximum_temperature=2.5,
73 simulated_annealing_minimum_temperature_nframes=100,
74 simulated_annealing_maximum_temperature_nframes=100,
75 replica_exchange_minimum_temperature=1.0,
76 replica_exchange_maximum_temperature=2.5,
77 replica_exchange_swap=
True,
79 number_of_best_scoring_models=500,
82 molecular_dynamics_steps=10,
83 molecular_dynamics_max_time_step=1.0,
84 number_of_frames=1000,
85 save_coordinates_mode=
"lowest_temperature",
86 nframes_write_coordinates=1,
87 write_initial_rmf=
True,
88 initial_rmf_name_suffix=
"initial",
89 stat_file_name_suffix=
"stat",
90 best_pdb_name_suffix=
"model",
92 do_create_directories=
True,
93 global_output_directory=
"./",
96 replica_stat_file_suffix=
"stat_replica",
97 em_object_for_rmf=
None,
99 replica_exchange_object=
None,
102 @param model The IMP model
103 @param representation PMI.representation.Representation object
104 (or list of them, for multi-state modeling)
105 @param root_hier Instead of passing Representation, pass the System hierarchy
106 @param monte_carlo_sample_objects Objects for MC sampling; a list of
107 structural components (generally the representation) that will
108 be moved and restraints with parameters that need to
110 For PMI2: just pass flat list of movers
111 @param molecular_dynamics_sample_objects Objects for MD sampling
112 For PMI2: just pass flat list of particles
113 @param output_objects A list of structural objects and restraints
114 that will be included in output (ie, statistics "stat" files). Any object
115 that provides a get_output() method can be used here. If None is passed
116 the macro will not write stat files.
117 @param rmf_output_objects A list of structural objects and restraints
118 that will be included in rmf. Any object
119 that provides a get_output() method can be used here.
120 @param crosslink_restraints List of cross-link restraints that will
121 be included in output RMF files (for visualization).
122 @param monte_carlo_temperature MC temp (may need to be optimized
123 based on post-sampling analysis)
124 @param simulated_annealing If True, perform simulated annealing
125 @param simulated_annealing_minimum_temperature Should generally be
126 the same as monte_carlo_temperature.
127 @param simulated_annealing_minimum_temperature_nframes Number of
128 frames to compute at minimum temperature.
129 @param simulated_annealing_maximum_temperature_nframes Number of
131 temps > simulated_annealing_maximum_temperature.
132 @param replica_exchange_minimum_temperature Low temp for REX; should
133 generally be the same as monte_carlo_temperature.
134 @param replica_exchange_maximum_temperature High temp for REX
135 @param replica_exchange_swap Boolean, enable disable temperature
137 @param num_sample_rounds Number of rounds of MC/MD per cycle
138 @param number_of_best_scoring_models Number of top-scoring PDB models
139 to keep around for analysis
140 @param monte_carlo_steps Number of MC steps per round
141 @param self_adaptive self adaptive scheme for monte carlo movers
142 @param molecular_dynamics_steps Number of MD steps per round
143 @param molecular_dynamics_max_time_step Max time step for MD
144 @param number_of_frames Number of REX frames to run
145 @param save_coordinates_mode string: how to save coordinates.
146 "lowest_temperature" (default) only the lowest temperatures is saved
147 "25th_score" all replicas whose score is below the 25th percentile
148 "50th_score" all replicas whose score is below the 50th percentile
149 "75th_score" all replicas whose score is below the 75th percentile
150 @param nframes_write_coordinates How often to write the coordinates
152 @param write_initial_rmf Write the initial configuration
153 @param global_output_directory Folder that will be created to house
155 @param test_mode Set to True to avoid writing any files, just test one frame.
162 self.output_objects = output_objects
163 self.rmf_output_objects=rmf_output_objects
164 self.representation = representation
166 if type(representation) == list:
167 self.is_multi_state =
True
168 self.root_hiers = [r.prot
for r
in representation]
169 self.vars[
"number_of_states"] = len(representation)
171 self.is_multi_state =
False
172 self.root_hier = representation.prot
173 self.vars[
"number_of_states"] = 1
174 elif root_hier
and type(root_hier) ==
IMP.atom.Hierarchy and root_hier.get_name()==
'System':
176 if self.output_objects
is not None:
178 if self.rmf_output_objects
is not None:
180 self.root_hier = root_hier
181 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
182 self.vars[
"number_of_states"] = len(states)
184 self.root_hiers = states
185 self.is_multi_state =
True
187 self.root_hier = root_hier
188 self.is_multi_state =
False
190 raise Exception(
"Must provide representation or System hierarchy (root_hier)")
192 self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
193 self.em_object_for_rmf = em_object_for_rmf
194 self.monte_carlo_sample_objects = monte_carlo_sample_objects
195 self.vars[
"self_adaptive"]=self_adaptive
196 if sample_objects
is not None:
197 self.monte_carlo_sample_objects+=sample_objects
198 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
199 self.replica_exchange_object = replica_exchange_object
200 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
201 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
203 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
205 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
206 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
207 self.vars[
"simulated_annealing"]=\
209 self.vars[
"simulated_annealing_minimum_temperature"]=\
210 simulated_annealing_minimum_temperature
211 self.vars[
"simulated_annealing_maximum_temperature"]=\
212 simulated_annealing_maximum_temperature
213 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
214 simulated_annealing_minimum_temperature_nframes
215 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
216 simulated_annealing_maximum_temperature_nframes
218 self.vars[
"num_sample_rounds"] = num_sample_rounds
220 "number_of_best_scoring_models"] = number_of_best_scoring_models
221 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
222 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
223 self.vars[
"number_of_frames"] = number_of_frames
224 if not save_coordinates_mode
in [
"lowest_temperature",
"25th_score",
"50th_score",
"75th_score"]:
225 raise Exception(
"save_coordinates_mode has unrecognized value")
227 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
228 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
229 self.vars[
"write_initial_rmf"] = write_initial_rmf
230 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
231 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
232 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
233 self.vars[
"do_clean_first"] = do_clean_first
234 self.vars[
"do_create_directories"] = do_create_directories
235 self.vars[
"global_output_directory"] = global_output_directory
236 self.vars[
"rmf_dir"] = rmf_dir
237 self.vars[
"best_pdb_dir"] = best_pdb_dir
238 self.vars[
"atomistic"] = atomistic
239 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
240 self.vars[
"geometries"] =
None
241 self.test_mode = test_mode
244 if self.vars[
"geometries"]
is None:
245 self.vars[
"geometries"] = list(geometries)
247 self.vars[
"geometries"].extend(geometries)
250 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
251 print(
"--- it stores the best scoring pdb models in pdbs/")
252 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
253 print(
"--- variables:")
254 keys = list(self.vars.keys())
257 print(
"------", v.ljust(30), self.vars[v])
259 def get_replica_exchange_object(self):
260 return self.replica_exchange_object
262 def _add_provenance(self, sampler_md, sampler_mc):
263 """Record details about the sampling in the IMP Hierarchies"""
264 if not self.is_multi_state
or self.pmi2:
265 output_hierarchies = [self.root_hier]
267 output_hierarchies = self.root_hiers
271 method =
"Molecular Dynamics"
272 iterations += self.vars[
"molecular_dynamics_steps"]
274 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
275 iterations += self.vars[
"monte_carlo_steps"]
277 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
279 iterations *= self.vars[
"num_sample_rounds"]
281 for h
in output_hierarchies:
282 pi = self.model.add_particle(
"sampling")
284 self.model, pi, method, self.vars[
"number_of_frames"],
286 p.set_number_of_replicas(
287 self.replica_exchange_object.get_number_of_replicas())
288 IMP.pmi1.tools._add_pmi_provenance(h)
291 def execute_macro(self):
292 temp_index_factor = 100000.0
296 if self.monte_carlo_sample_objects
is not None:
297 print(
"Setting up MonteCarlo")
299 self.monte_carlo_sample_objects,
300 self.vars[
"monte_carlo_temperature"])
301 if self.vars[
"simulated_annealing"]:
302 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
303 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
304 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
305 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
306 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
307 if self.vars[
"self_adaptive"]:
308 sampler_mc.set_self_adaptive(isselfadaptive=self.vars[
"self_adaptive"])
309 if self.output_objects
is not None:
310 self.output_objects.append(sampler_mc)
311 if self.rmf_output_objects
is not None:
312 self.rmf_output_objects.append(sampler_mc)
313 samplers.append(sampler_mc)
316 if self.molecular_dynamics_sample_objects
is not None:
317 print(
"Setting up MolecularDynamics")
319 self.molecular_dynamics_sample_objects,
320 self.vars[
"monte_carlo_temperature"],
321 maximum_time_step=self.molecular_dynamics_max_time_step)
322 if self.vars[
"simulated_annealing"]:
323 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
324 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
325 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
326 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
327 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
328 if self.output_objects
is not None:
329 self.output_objects.append(sampler_md)
330 if self.rmf_output_objects
is not None:
331 self.rmf_output_objects.append(sampler_md)
332 samplers.append(sampler_md)
335 print(
"Setting up ReplicaExchange")
338 "replica_exchange_minimum_temperature"],
340 "replica_exchange_maximum_temperature"],
342 replica_exchange_object=self.replica_exchange_object)
343 self.replica_exchange_object = rex.rem
345 myindex = rex.get_my_index()
346 if self.output_objects
is not None:
347 self.output_objects.append(rex)
348 if self.rmf_output_objects
is not None:
349 self.rmf_output_objects.append(rex)
353 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
357 globaldir = self.vars[
"global_output_directory"] +
"/"
358 rmf_dir = globaldir + self.vars[
"rmf_dir"]
359 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
361 if not self.test_mode:
362 if self.vars[
"do_clean_first"]:
365 if self.vars[
"do_create_directories"]:
368 os.makedirs(globaldir)
376 if not self.is_multi_state:
382 for n
in range(self.vars[
"number_of_states"]):
384 os.makedirs(pdb_dir +
"/" + str(n))
391 if self.output_objects
is not None:
392 self.output_objects.append(sw)
393 if self.rmf_output_objects
is not None:
394 self.rmf_output_objects.append(sw)
396 print(
"Setting up stat file")
398 low_temp_stat_file = globaldir + \
399 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
402 if not self.test_mode:
405 if not self.test_mode:
406 if self.output_objects
is not None:
407 output.init_stat2(low_temp_stat_file,
409 extralabels=[
"rmf_file",
"rmf_frame_index"])
411 print(
"Stat file writing is disabled")
413 if self.rmf_output_objects
is not None:
414 print(
"Stat info being written in the rmf file")
416 print(
"Setting up replica stat file")
417 replica_stat_file = globaldir + \
418 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
419 if not self.test_mode:
420 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
422 if not self.test_mode:
423 print(
"Setting up best pdb files")
424 if not self.is_multi_state:
425 if self.vars[
"number_of_best_scoring_models"] > 0:
426 output.init_pdb_best_scoring(pdb_dir +
"/" +
427 self.vars[
"best_pdb_name_suffix"],
430 "number_of_best_scoring_models"],
431 replica_exchange=
True)
432 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
433 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
435 if self.vars[
"number_of_best_scoring_models"] > 0:
436 for n
in range(self.vars[
"number_of_states"]):
437 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
438 self.vars[
"best_pdb_name_suffix"],
441 "number_of_best_scoring_models"],
442 replica_exchange=
True)
443 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
444 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
447 if self.em_object_for_rmf
is not None:
448 if not self.is_multi_state
or self.pmi2:
449 output_hierarchies = [
451 self.em_object_for_rmf.get_density_as_hierarchy(
454 output_hierarchies = self.root_hiers
455 output_hierarchies.append(
456 self.em_object_for_rmf.get_density_as_hierarchy())
458 if not self.is_multi_state
or self.pmi2:
459 output_hierarchies = [self.root_hier]
461 output_hierarchies = self.root_hiers
464 if not self.test_mode:
465 print(
"Setting up and writing initial rmf coordinate file")
466 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
467 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
468 output_hierarchies,listofobjects=self.rmf_output_objects)
469 if self._rmf_restraints:
470 output.add_restraints_to_rmf(
471 init_suffix +
"." + str(myindex) +
".rmf3",
472 self._rmf_restraints)
473 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
474 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
478 if not self.test_mode:
479 mpivs=IMP.pmi1.samplers.MPI_values(self.replica_exchange_object)
483 self._add_provenance(sampler_md, sampler_mc)
485 if not self.test_mode:
486 print(
"Setting up production rmf files")
487 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
488 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"],
489 listofobjects = self.rmf_output_objects)
491 if self._rmf_restraints:
492 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
494 ntimes_at_low_temp = 0
498 self.replica_exchange_object.set_was_used(
True)
499 nframes = self.vars[
"number_of_frames"]
502 for i
in range(nframes):
506 for nr
in range(self.vars[
"num_sample_rounds"]):
507 if sampler_md
is not None:
509 self.vars[
"molecular_dynamics_steps"])
510 if sampler_mc
is not None:
511 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
513 self.model).evaluate(
False)
514 mpivs.set_value(
"score",score)
515 output.set_output_entry(
"score", score)
519 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
521 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
522 save_frame=(min_temp_index == my_temp_index)
523 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
524 score_perc=mpivs.get_percentile(
"score")
525 save_frame=(score_perc*100.0<=25.0)
526 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
527 score_perc=mpivs.get_percentile(
"score")
528 save_frame=(score_perc*100.0<=50.0)
529 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
530 score_perc=mpivs.get_percentile(
"score")
531 save_frame=(score_perc*100.0<=75.0)
534 if save_frame
or not self.test_mode:
538 print(
"--- frame %s score %s " % (str(i), str(score)))
540 if not self.test_mode:
541 if i % self.vars[
"nframes_write_coordinates"]==0:
542 print(
'--- writing coordinates')
543 if self.vars[
"number_of_best_scoring_models"] > 0:
544 output.write_pdb_best_scoring(score)
545 output.write_rmf(rmfname)
546 output.set_output_entry(
"rmf_file", rmfname)
547 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
549 output.set_output_entry(
"rmf_file", rmfname)
550 output.set_output_entry(
"rmf_frame_index",
'-1')
551 if self.output_objects
is not None:
552 output.write_stat2(low_temp_stat_file)
553 ntimes_at_low_temp += 1
555 if not self.test_mode:
556 output.write_stat2(replica_stat_file)
557 if self.vars[
"replica_exchange_swap"]:
558 rex.swap_temp(i, score)
559 if self.representation:
560 for p, state
in self.representation._protocol_output:
561 p.add_replica_exchange(state, self)
563 if not self.test_mode:
564 print(
"closing production rmf files")
565 output.close_rmf(rmfname)
569 """A macro to build a Representation based on a Topology and lists of movers
573 component_topologies,
574 list_of_rigid_bodies=[],
575 list_of_super_rigid_bodies=[],
576 chain_of_super_rigid_bodies=[],
577 sequence_connectivity_scale=4.0,
578 add_each_domain_as_rigid_body=
False,
579 force_create_gmm_files=
False):
581 @param model The IMP model
582 @param component_topologies List of
583 IMP.pmi1.topology.ComponentTopology items
584 @param list_of_rigid_bodies List of lists of domain names that will
585 be moved as rigid bodies.
586 @param list_of_super_rigid_bodies List of lists of domain names
587 that will move together in an additional Monte Carlo move.
588 @param chain_of_super_rigid_bodies List of lists of domain names
589 (choices can only be from the same molecule). Each of these
590 groups will be moved rigidly. This helps to sample more
591 efficiently complex topologies, made of several rigid bodies,
592 connected by flexible linkers.
593 @param sequence_connectivity_scale For scaling the connectivity
595 @param add_each_domain_as_rigid_body That way you don't have to
596 put all of them in the list
597 @param force_create_gmm_files If True, will sample and create GMMs
598 no matter what. If False, will only sample if the
599 files don't exist. If number of Gaussians is zero, won't
605 disorderedlength=
False)
607 data=component_topologies
608 if list_of_rigid_bodies==[]:
609 print(
"WARNING: No list of rigid bodies inputted to build_model()")
610 if list_of_super_rigid_bodies==[]:
611 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
612 if chain_of_super_rigid_bodies==[]:
613 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
614 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
615 +chain_of_super_rigid_bodies
for d
in sublist])
616 all_available = set([c._domain_name
for c
in component_topologies])
617 if not all_dnames <= all_available:
618 raise ValueError(
"All requested movers must reference domain "
619 "names in the component topologies")
623 super_rigid_bodies={}
624 chain_super_rigid_bodies={}
628 comp_name = c.molname
629 hier_name = c._domain_name
631 fasta_file = c.fasta_file
632 fasta_id = c.fasta_id
633 pdb_name = c.pdb_file
635 res_range = c.residue_range
636 offset = c.pdb_offset
637 bead_size = c.bead_size
638 em_num_components = c.em_residues_per_gaussian
639 em_txt_file_name = c.gmm_file
640 em_mrc_file_name = c.mrc_file
642 if comp_name
not in self.simo.get_component_names():
643 self.simo.create_component(comp_name,color=0.0)
644 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
647 if em_num_components==0:
651 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
658 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
659 res_range,include_res0,beadsize=bead_size,
660 color=color,offset=offset)
661 if em_num_components!=0:
663 print(
"will read GMM files")
665 print(
"will calculate GMMs")
667 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
668 em_mrc_file_name,em_num_components,read_em_files)
669 self.simo.add_all_atom_densities(comp_name, particles=beads)
674 self.resdensities[hier_name]=dens_hier
675 self.domain_dict[hier_name]=outhier+dens_hier
678 for c
in self.simo.get_component_names():
679 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
680 self.simo.setup_component_geometry(c)
683 for rblist
in list_of_rigid_bodies:
685 for rbmember
in rblist:
686 rb+=[h
for h
in self.domain_dict[rbmember]]
687 self.simo.set_rigid_body_from_hierarchies(rb)
688 for srblist
in list_of_super_rigid_bodies:
690 for srbmember
in rblist:
691 srb+=[h
for h
in self.domain_dict[srbmember]]
692 self.simo.set_super_rigid_body_from_hierarchies(srb)
693 for clist
in chain_of_super_rigid_bodies:
695 for crbmember
in rblist:
696 crb+=[h
for h
in self.domain_dict[crbmember]]
697 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
699 self.simo.set_floppy_bodies()
700 self.simo.setup_bonds()
708 '''Return the Representation object'''
711 def get_density_hierarchies(self,hier_name_list):
715 for hn
in hier_name_list:
717 dens_hier_list+=self.resdensities[hn]
718 return dens_hier_list
720 def set_gmm_models_directory(self,directory_name):
721 self.gmm_models_directory=directory_name
723 def get_pdb_bead_bits(self,hierarchy):
728 if "_pdb" in h.get_name():pdbbits.append(h)
729 if "_bead" in h.get_name():beadbits.append(h)
730 if "_helix" in h.get_name():helixbits.append(h)
731 return (pdbbits,beadbits,helixbits)
733 def scale_bead_radii(self,nresidues,scale):
735 for h
in self.domain_dict:
736 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
737 slope=(1.0-scale)/(1.0-float(nresidues))
742 if b
not in scaled_beads:
748 scale_factor=slope*float(num_residues)+1.0
750 new_radius=scale_factor*radius
753 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
756 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
758 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
761 for pb
in pdbbits+helixbits:
765 number_of_residues=len(set(res_ind))
769 outhier+=simo.add_component_density(compname,
771 num_components=num_components,
773 inputfile=txtfilename)
774 if len(helixbits)!=0:
775 outhier+=simo.add_component_density(compname,
777 num_components=num_components,
779 inputfile=txtfilename)
784 num_components=number_of_residues//abs(num_components)+1
785 outhier+=simo.add_component_density(compname,
787 num_components=num_components,
789 outputfile=txtfilename,
790 outputmap=mrcfilename,
791 multiply_by_total_mass=
True)
793 if len(helixbits)!=0:
794 num_components=number_of_residues//abs(num_components)+1
795 outhier+=simo.add_component_density(compname,
797 num_components=num_components,
799 outputfile=txtfilename,
800 outputmap=mrcfilename,
801 multiply_by_total_mass=
True)
803 return outhier,beadbits
805 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
806 beadsize=5,color=0.0,offset=0):
807 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" :
809 outhier=simo.autobuild_model(comname,
813 resolutions=[0,1,10],
816 missingbeadsize=beadsize)
818 outhier=simo.autobuild_model(comname,
825 missingbeadsize=beadsize)
828 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" :
829 outhier=simo.add_component_ideal_helix(comname,
835 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" :
836 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
840 seq_len=len(simo.sequence_dict[comname])
841 outhier=simo.add_component_necklace(comname,
854 @param representation The PMI representation
856 self.simo=representation
857 self.gmm_models_directory=
"."
859 self.rmf_frame_number={}
860 self.rmf_names_map={}
861 self.rmf_component_name={}
863 def set_gmm_models_directory(self,directory_name):
864 self.gmm_models_directory=directory_name
866 def build_model(self,data_structure,sequence_connectivity_scale=4.0,
867 sequence_connectivity_resolution=10,
868 rmf_file=
None,rmf_frame_number=0,rmf_file_map=
None,
869 skip_connectivity_these_domains=
None,
870 skip_gaussian_in_rmf=
False, skip_gaussian_in_representation=
False):
872 @param data_structure List of lists containing these entries:
873 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
874 res_range, read_em_files, bead_size, rb, super_rb,
875 em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
876 keep_gaussian_flexible_beads (optional)
877 @param sequence_connectivity_scale
879 @param rmf_frame_number
880 @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
886 super_rigid_bodies={}
887 chain_super_rigid_bodies={}
890 for d
in data_structure:
898 res_range = d[7][0:2]
907 em_num_components = d[12]
908 em_txt_file_name = d[13]
909 em_mrc_file_name = d[14]
910 chain_of_super_rb = d[15]
912 keep_gaussian_flexible_beads = d[16]
914 keep_gaussian_flexible_beads =
True
916 if comp_name
not in self.simo.get_component_names():
917 self.simo.create_component(comp_name,color=0.0)
918 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
919 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
922 if not read_em_files
is None:
923 if em_txt_file_name
is " ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
924 if em_mrc_file_name
is " ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
927 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
929 if (keep_gaussian_flexible_beads):
930 self.simo.add_all_atom_densities(comp_name, particles=beads)
935 self.resdensities[hier_name]=dens_hier
936 self.domain_dict[hier_name]=outhier+dens_hier
939 if rb
not in rigid_bodies:
940 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
942 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
945 if super_rb
is not None:
947 if k
not in super_rigid_bodies:
948 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
950 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
952 if chain_of_super_rb
is not None:
953 for k
in chain_of_super_rb:
954 if k
not in chain_super_rigid_bodies:
955 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
957 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
961 self.rigid_bodies=rigid_bodies
963 for c
in self.simo.get_component_names():
964 if rmf_file
is not None:
967 self.simo.set_coordinates_from_rmf(c, rf,rfn,
968 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
970 for k
in rmf_file_map:
972 rf=rmf_file_map[k][0]
973 rfn=rmf_file_map[k][1]
974 rcname=rmf_file_map[k][2]
975 self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
976 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
978 if c
in self.rmf_file:
980 rfn=self.rmf_frame_number[c]
981 rfm=self.rmf_names_map[c]
982 rcname=self.rmf_component_name[c]
983 self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
984 rmf_component_name=rcname,
985 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
986 if (
not skip_connectivity_these_domains)
or (c
not in skip_connectivity_these_domains):
987 self.simo.setup_component_sequence_connectivity(c,
988 resolution=sequence_connectivity_resolution,
989 scale=sequence_connectivity_scale)
990 self.simo.setup_component_geometry(c)
992 for rb
in rigid_bodies:
993 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
995 for k
in super_rigid_bodies:
996 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
998 for k
in chain_super_rigid_bodies:
999 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1001 self.simo.set_floppy_bodies()
1002 self.simo.setup_bonds()
1004 def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1005 hiers=self.domain_dict[hier_name]
1006 for length
in lengths:
1007 for n
in range(len(hiers)-length):
1008 hs=hiers[n+1:n+length-1]
1009 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1010 for n
in range(1,len(hiers)-1,5):
1012 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1014 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1017 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1018 self.rmf_file[component_name]=rmf_file
1019 self.rmf_frame_number[component_name]=rmf_frame_number
1020 self.rmf_names_map[component_name]=rmf_names_map
1021 self.rmf_component_name[component_name]=rmf_component_name
1023 def get_density_hierarchies(self,hier_name_list):
1027 for hn
in hier_name_list:
1029 dens_hier_list+=self.resdensities[hn]
1030 return dens_hier_list
1032 def get_pdb_bead_bits(self,hierarchy):
1037 if "_pdb" in h.get_name():pdbbits.append(h)
1038 if "_bead" in h.get_name():beadbits.append(h)
1039 if "_helix" in h.get_name():helixbits.append(h)
1040 return (pdbbits,beadbits,helixbits)
1042 def scale_bead_radii(self,nresidues,scale):
1044 for h
in self.domain_dict:
1045 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1046 slope=(1.0-scale)/(1.0-float(nresidues))
1051 if b
not in scaled_beads:
1057 scale_factor=slope*float(num_residues)+1.0
1059 new_radius=scale_factor*radius
1062 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1065 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1067 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1071 for pb
in pdbbits+helixbits:
1075 number_of_residues=len(set(res_ind))
1079 outhier+=simo.add_component_density(compname,
1081 num_components=num_components,
1083 inputfile=txtfilename)
1084 if len(helixbits)!=0:
1085 outhier+=simo.add_component_density(compname,
1087 num_components=num_components,
1089 inputfile=txtfilename)
1094 if num_components<0:
1097 num_components=number_of_residues/abs(num_components)
1098 outhier+=simo.add_component_density(compname,
1100 num_components=num_components,
1102 outputfile=txtfilename,
1103 outputmap=mrcfilename,
1104 multiply_by_total_mass=
True)
1106 if len(helixbits)!=0:
1107 if num_components<0:
1110 num_components=number_of_residues/abs(num_components)
1111 outhier+=simo.add_component_density(compname,
1113 num_components=num_components,
1115 outputfile=txtfilename,
1116 outputmap=mrcfilename,
1117 multiply_by_total_mass=
True)
1119 return outhier,beadbits
1121 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1123 if pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1124 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1126 outhier=simo.autobuild_model(comname,
1130 resolutions=[0,1,10],
1133 missingbeadsize=beadsize)
1135 outhier=simo.autobuild_model(comname,
1142 missingbeadsize=beadsize)
1144 elif pdbname
is not None and pdbname
is "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is not "DENSITY" :
1145 outhier=simo.add_component_ideal_helix(comname,
1151 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is "BEADS" and pdbname
is not "DENSITY" :
1154 seq_len=len(simo.sequence_dict[comname])
1155 outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1157 elif pdbname
is not None and pdbname
is not "IDEAL_HELIX" and pdbname
is not "BEADS" and pdbname
is "DENSITY" :
1162 seq_len=len(simo.sequence_dict[comname])
1163 outhier=simo.add_component_necklace(comname,
1170 def set_coordinates(self,hier_name,xyz_tuple):
1171 hier=self.domain_dict[hier_name]
1179 def save_rmf(self,rmfname):
1182 self.simo.model.update()
1183 o.init_rmf(rmfname,[self.simo.prot])
1184 o.write_rmf(rmfname)
1185 o.close_rmf(rmfname)
1193 missing_bead_size=20,
1194 residue_per_gaussian=
None):
1196 Construct a component for each subunit (no splitting, nothing fancy).
1197 You can pass the resolutions and the bead size for the missing residue regions.
1198 To use this macro, you must provide the following data structure:
1199 Component pdbfile chainid rgb color fastafile sequence id
1201 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1202 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1203 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1204 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1205 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1206 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1207 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1208 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1209 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1210 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1211 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1212 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1222 component_name = d[0]
1226 fasta_file = d[4][0]
1228 fastids = IMP.pmi1.tools.get_ids_from_fasta_file(fasta_file)
1229 fasta_file_id = d[4][1]
1231 r.create_component(component_name,
1234 r.add_component_sequence(component_name,
1236 id=fastids[fasta_file_id])
1238 hierarchies = r.autobuild_model(component_name,
1241 resolutions=resolutions,
1242 missingbeadsize=missing_bead_size)
1244 r.show_component_table(component_name)
1246 r.set_rigid_bodies([component_name])
1248 r.set_chain_of_super_rigid_bodies(
1253 r.setup_component_sequence_connectivity(component_name, resolution=1)
1254 r.setup_component_geometry(component_name)
1258 r.set_floppy_bodies()
1261 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
1269 """A macro for running all the basic operations of analysis.
1270 Includes clustering, precision analysis, and making ensemble density maps.
1271 A number of plots are also supported.
1274 merge_directories=[
"./"],
1275 stat_file_name_suffix=
"stat",
1276 best_pdb_name_suffix=
"model",
1277 do_clean_first=
True,
1278 do_create_directories=
True,
1279 global_output_directory=
"output/",
1280 replica_stat_file_suffix=
"stat_replica",
1281 global_analysis_result_directory=
"./analysis/",
1284 @param model The IMP model
1285 @param stat_file_name_suffix
1286 @param merge_directories The directories containing output files
1287 @param best_pdb_name_suffix
1288 @param do_clean_first
1289 @param do_create_directories
1290 @param global_output_directory Where everything is
1291 @param replica_stat_file_suffix
1292 @param global_analysis_result_directory
1293 @param test_mode If True, nothing is changed on disk
1297 from mpi4py
import MPI
1298 self.comm = MPI.COMM_WORLD
1299 self.rank = self.comm.Get_rank()
1300 self.number_of_processes = self.comm.size
1303 self.number_of_processes = 1
1305 self.test_mode = test_mode
1306 self._protocol_output = []
1307 self.cluster_obj =
None
1309 stat_dir = global_output_directory
1310 self.stat_files = []
1312 for rd
in merge_directories:
1313 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
1314 if len(stat_files)==0:
1315 print(
"WARNING: no stat files found in",os.path.join(rd,stat_dir))
1316 self.stat_files += stat_files
1319 """Capture details of the modeling protocol.
1320 @param p an instance of IMP.pmi1.output.ProtocolOutput or a subclass.
1323 self._protocol_output.append((p, p._last_state))
1326 score_key=
"SimplifiedModel_Total_Score_None",
1327 rmf_file_key=
"rmf_file",
1328 rmf_file_frame_key=
"rmf_frame_index",
1331 nframes_trajectory=10000):
1332 """ Get a trajectory of the modeling run, for generating demonstrative movies
1333 @param score_key The score for ranking models
1334 @param rmf_file_key Key pointing to RMF filename
1335 @param rmf_file_frame_key Key pointing to RMF frame number
1336 @param outputdir The local output directory used in the run
1337 @param get_every Extract every nth frame
1338 @param nframes_trajectory Total number of frames of the trajectory
1340 from operator
import itemgetter
1348 rmf_file_list=trajectory_models[0]
1349 rmf_file_frame_list=trajectory_models[1]
1350 score_list=list(map(float, trajectory_models[2]))
1352 max_score=max(score_list)
1353 min_score=min(score_list)
1356 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1357 binned_scores=[
None]*nframes_trajectory
1358 binned_model_indexes=[-1]*nframes_trajectory
1360 for model_index,s
in enumerate(score_list):
1361 bins_score_diffs=[abs(s-b)
for b
in bins]
1362 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1363 if binned_scores[bin_index]==
None:
1364 binned_scores[bin_index]=s
1365 binned_model_indexes[bin_index]=model_index
1367 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1368 new_diff=abs(s-bins[bin_index])
1369 if new_diff < old_diff:
1370 binned_scores[bin_index]=s
1371 binned_model_indexes[bin_index]=model_index
1373 print(binned_scores)
1374 print(binned_model_indexes)
1377 def _expand_ambiguity(self,prot,d):
1378 """If using PMI2, expand the dictionary to include copies as ambiguous options
1379 This also keeps the states separate.
1384 if '..' in key
or (type(val)
is tuple
and len(val)>=3):
1387 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1388 if type(val)
is tuple:
1396 for nst
in range(len(states)):
1398 copies = sel.get_selected_particles(with_representation=
False)
1400 for nc
in range(len(copies)):
1402 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1404 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1411 score_key=
"SimplifiedModel_Total_Score_None",
1412 rmf_file_key=
"rmf_file",
1413 rmf_file_frame_key=
"rmf_frame_index",
1415 prefiltervalue=
None,
1418 alignment_components=
None,
1419 number_of_best_scoring_models=10,
1420 rmsd_calculation_components=
None,
1421 distance_matrix_file=
'distances.mat',
1422 load_distance_matrix_file=
False,
1423 skip_clustering=
False,
1424 number_of_clusters=1,
1426 exit_after_display=
True,
1428 first_and_last_frames=
None,
1429 density_custom_ranges=
None,
1430 write_pdb_with_centered_coordinates=
False,
1432 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1433 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1434 Can pass None for copy or state to ignore that field.
1435 If you don't pass a specific copy number
1436 @param score_key The score for ranking models. Use "Total_Score"
1437 instead of default for PMI2.
1438 @param rmf_file_key Key pointing to RMF filename
1439 @param rmf_file_frame_key Key pointing to RMF frame number
1440 @param state_number State number to analyze
1441 @param prefiltervalue Only include frames where the
1442 score key is below this value
1443 @param feature_keys Keywords for which you want to
1444 calculate average, medians, etc.
1445 If you pass "Keyname" it'll include everything that matches "*Keyname*"
1446 @param outputdir The local output directory used in the run
1447 @param alignment_components Dictionary with keys=groupname, values are tuples
1448 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1449 @param number_of_best_scoring_models Num models to keep per run
1450 @param rmsd_calculation_components For calculating RMSD
1451 (same format as alignment_components)
1452 @param distance_matrix_file Where to store/read the distance matrix
1453 @param load_distance_matrix_file Try to load the distance matrix file
1454 @param skip_clustering Just extract the best scoring models
1456 @param number_of_clusters Number of k-means clusters
1457 @param display_plot Display the distance matrix
1458 @param exit_after_display Exit after displaying distance matrix
1459 @param get_every Extract every nth frame
1460 @param first_and_last_frames A tuple with the first and last frames to be
1461 analyzed. Values are percentages!
1462 Default: get all frames
1463 @param density_custom_ranges For density calculation
1464 (same format as alignment_components)
1465 @param write_pdb_with_centered_coordinates
1466 @param voxel_size Used for the density output
1470 self._outputdir = os.path.abspath(outputdir)
1471 self._number_of_clusters = number_of_clusters
1472 for p, state
in self._protocol_output:
1473 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1484 if not load_distance_matrix_file:
1485 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1486 my_stat_files = IMP.pmi1.tools.chunk_list_into_segments(
1487 self.stat_files,self.number_of_processes)[self.rank]
1491 orig_score_key = score_key
1492 if score_key
not in po.get_keys():
1493 if 'Total_Score' in po.get_keys():
1494 score_key =
'Total_Score'
1495 print(
"WARNING: Using 'Total_Score' instead of "
1496 "'SimplifiedModel_Total_Score_None' for the score key")
1497 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1498 if k
in feature_keys:
1499 print(
"WARNING: no need to pass " +k+
" to feature_keys.")
1500 feature_keys.remove(k)
1508 get_every, provenance=prov)
1509 rmf_file_list=best_models[0]
1510 rmf_file_frame_list=best_models[1]
1511 score_list=best_models[2]
1512 feature_keyword_list_dict=best_models[3]
1518 if self.number_of_processes > 1:
1522 rmf_file_frame_list)
1523 for k
in feature_keyword_list_dict:
1525 feature_keyword_list_dict[k])
1528 score_rmf_tuples = list(zip(score_list,
1530 rmf_file_frame_list,
1531 list(range(len(score_list)))))
1533 if density_custom_ranges:
1534 for k
in density_custom_ranges:
1535 if type(density_custom_ranges[k])
is not list:
1536 raise Exception(
"Density custom ranges: values must be lists of tuples")
1539 if first_and_last_frames
is not None:
1540 nframes = len(score_rmf_tuples)
1541 first_frame = int(first_and_last_frames[0] * nframes)
1542 last_frame = int(first_and_last_frames[1] * nframes)
1543 if last_frame > len(score_rmf_tuples):
1545 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1548 best_score_rmf_tuples = sorted(score_rmf_tuples,
1549 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1550 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1552 prov.append(IMP.pmi1.io.FilterProvenance(
"Best scoring",
1553 0, number_of_best_scoring_models))
1555 best_score_feature_keyword_list_dict = defaultdict(list)
1556 for tpl
in best_score_rmf_tuples:
1558 for f
in feature_keyword_list_dict:
1559 best_score_feature_keyword_list_dict[f].append(
1560 feature_keyword_list_dict[f][index])
1561 my_best_score_rmf_tuples = IMP.pmi1.tools.chunk_list_into_segments(
1562 best_score_rmf_tuples,
1563 self.number_of_processes)[self.rank]
1566 prot_ahead = IMP.pmi1.analysis.get_hiers_from_rmf(self.model,
1568 my_best_score_rmf_tuples[0][1])[0]
1574 my_best_score_rmf_tuples[0],
1575 rmsd_calculation_components,
1576 state_number=state_number)
1578 my_best_score_rmf_tuples,
1579 alignment_components,
1580 rmsd_calculation_components,
1581 state_number=state_number)
1586 all_coordinates=got_coords[0]
1587 alignment_coordinates=got_coords[1]
1588 rmsd_coordinates=got_coords[2]
1589 rmf_file_name_index_dict=got_coords[3]
1590 all_rmf_file_names=got_coords[4]
1596 if density_custom_ranges:
1598 density_custom_ranges,
1601 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1607 os.mkdir(dircluster)
1610 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1611 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1613 rmf_frame_number=tpl[2]
1616 for key
in best_score_feature_keyword_list_dict:
1617 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1620 prots,rs = IMP.pmi1.analysis.get_hiers_and_restraints_from_rmf(
1625 linking_successful=IMP.pmi1.analysis.link_hiers_and_restraints_to_rmf(
1631 if not linking_successful:
1637 prot = prots[state_number]
1642 coords_f1=alignment_coordinates[cnt]
1644 coords_f2=alignment_coordinates[cnt]
1647 transformation = Ali.align()[1]
1669 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1670 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1671 o.init_pdb(out_pdb_fn,prot)
1672 o.write_pdb(out_pdb_fn,
1673 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1675 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1676 tmp_dict[
"rmf_file_full_path"]=rmf_name
1677 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1678 tmp_dict[
"local_rmf_frame_number"]=0
1680 clusstat.write(str(tmp_dict)+
"\n")
1682 o.init_rmf(out_rmf_fn, [prot],rs)
1684 o.write_rmf(out_rmf_fn)
1685 o.close_rmf(out_rmf_fn)
1687 if density_custom_ranges:
1688 DensModule.add_subunits_density(prot)
1690 if density_custom_ranges:
1691 DensModule.write_mrc(path=dircluster)
1698 if self.number_of_processes > 1:
1704 rmf_file_name_index_dict)
1706 alignment_coordinates)
1713 [best_score_feature_keyword_list_dict,
1714 rmf_file_name_index_dict],
1720 print(
"setup clustering class")
1723 for n, model_coordinate_dict
in enumerate(all_coordinates):
1724 template_coordinate_dict = {}
1726 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1728 self.cluster_obj.set_template(alignment_coordinates[n])
1729 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1730 print(
"Global calculating the distance matrix")
1733 self.cluster_obj.dist_matrix()
1737 self.cluster_obj.do_cluster(number_of_clusters)
1740 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1741 if exit_after_display:
1743 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1750 print(
"setup clustering class")
1752 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1753 print(
"clustering with %s clusters" % str(number_of_clusters))
1754 self.cluster_obj.do_cluster(number_of_clusters)
1755 [best_score_feature_keyword_list_dict,
1756 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1759 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1760 if exit_after_display:
1762 if self.number_of_processes > 1:
1770 print(self.cluster_obj.get_cluster_labels())
1771 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1772 print(
"rank %s " % str(self.rank))
1773 print(
"cluster %s " % str(n))
1774 print(
"cluster label %s " % str(cl))
1775 print(self.cluster_obj.get_cluster_label_names(cl))
1776 cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1777 cluster_prov = prov + \
1778 [IMP.pmi1.io.ClusterProvenance(cluster_size)]
1781 if density_custom_ranges:
1783 density_custom_ranges,
1786 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1788 os.mkdir(dircluster)
1792 rmsd_dict = {
"AVERAGE_RMSD":
1793 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1794 clusstat = open(dircluster +
"stat.out",
"w")
1795 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1798 tmp_dict.update(rmsd_dict)
1799 index = rmf_file_name_index_dict[structure_name]
1800 for key
in best_score_feature_keyword_list_dict:
1802 key] = best_score_feature_keyword_list_dict[
1808 rmf_name = structure_name.split(
"|")[0]
1809 rmf_frame_number = int(structure_name.split(
"|")[1])
1810 clusstat.write(str(tmp_dict) +
"\n")
1814 prots,rs = IMP.pmi1.analysis.get_hiers_and_restraints_from_rmf(
1819 linking_successful = IMP.pmi1.analysis.link_hiers_and_restraints_to_rmf(
1825 if not linking_successful:
1830 prot = prots[state_number]
1836 model_index = self.cluster_obj.get_model_index_from_name(
1838 transformation = self.cluster_obj.get_transformation_to_first_member(
1858 if density_custom_ranges:
1859 DensModule.add_subunits_density(prot)
1864 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1865 o.write_pdb(dircluster + str(k) +
".pdb")
1867 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1868 o.write_rmf(dircluster + str(k) +
".rmf3")
1869 o.close_rmf(dircluster + str(k) +
".rmf3")
1874 if density_custom_ranges:
1875 DensModule.write_mrc(path=dircluster)
1878 if self.number_of_processes>1:
1881 def get_cluster_rmsd(self,cluster_num):
1882 if self.cluster_obj
is None:
1884 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1886 def save_objects(self, objects, file_name):
1888 outf = open(file_name,
'wb')
1889 pickle.dump(objects, outf)
1892 def load_objects(self, file_name):
1894 inputf = open(file_name,
'rb')
1895 objects = pickle.load(inputf)
1902 This class contains analysis utilities to investigate ReplicaExchange results.
1915 Construction of the Class.
1916 @param model IMP.Model()
1917 @param stat_files list of string. Can be ascii stat files, rmf files names
1918 @param best_models Integer. Number of best scoring models, if None: all models will be read
1919 @param alignment boolean (Default=True). Align before computing the rmsd.
1923 self.best_models=best_models
1936 self.clusters.append(c)
1937 for n0
in range(len(self.stath0)):
1939 self.pairwise_rmsd={}
1940 self.pairwise_molecular_assignment={}
1941 self.alignment=alignment
1942 self.symmetric_molecules={}
1943 self.issymmetricsel={}
1945 self.molcopydict0=IMP.pmi1.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath0))
1946 self.molcopydict1=IMP.pmi1.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath1))
1950 Setup the selection onto which the rmsd is computed
1951 @param kwargs use IMP.atom.Selection keywords
1959 Store names of symmetric molecules
1961 self.symmetric_molecules[molecule_name]=0
1966 Setup the selection onto which the alignment is computed
1967 @param kwargs use IMP.atom.Selection keywords
1975 def clean_clusters(self):
1976 for c
in self.clusters: del c
1980 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1982 Cluster the models based on RMSD.
1983 @param rmsd_cutoff Float the distance cutoff in Angstrom
1984 @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1986 self.clean_clusters()
1987 not_clustered=set(range(len(self.stath1)))
1988 while len(not_clustered)>0:
1989 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1995 Refine the clusters by merging the ones whose centers are close
1996 @param rmsd_cutoff cutoff distance in Angstorms
1999 clusters_copy=self.clusters
2000 for c0,c1
in itertools.combinations(self.clusters,2):
2001 if c0.center_index
is None:
2003 if c1.center_index
is None:
2005 d0=self.stath0[c0.center_index]
2006 d1=self.stath1[c1.center_index]
2007 rmsd, molecular_assignment = self.
rmsd()
2008 if rmsd <= rmsd_cutoff:
2009 if c1
in self.clusters:
2010 clusters_copy.remove(c1)
2012 self.clusters=clusters_copy
2019 def set_cluster_assignments(self, cluster_ids):
2020 if len(cluster_ids)!=len(self.stath0):
2021 raise ValueError(
'cluster ids has to be same length as number of frames')
2024 for i
in sorted(list(set(cluster_ids))):
2026 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
2027 self.clusters[idx].add_member(i,d)
2031 Return the model data from a cluster
2032 @param cluster IMP.pmi1.output.Cluster object
2041 Save the data for the whole models into a pickle file
2042 @param filename string
2044 self.stath0.save_data(filename)
2048 Set the data from an external IMP.pmi1.output.Data
2049 @param data IMP.pmi1.output.Data
2051 self.stath0.data=data
2052 self.stath1.data=data
2056 Load the data from an external pickled file
2057 @param filename string
2059 self.stath0.load_data(filename)
2060 self.stath1.load_data(filename)
2061 self.best_models=len(self.stath0)
2063 def add_cluster(self,rmf_name_list):
2065 print(
"creating cluster index "+str(len(self.clusters)))
2066 self.clusters.append(c)
2067 current_len=len(self.stath0)
2069 for rmf
in rmf_name_list:
2070 print(
"adding rmf "+rmf)
2071 self.stath0.add_stat_file(rmf)
2072 self.stath1.add_stat_file(rmf)
2074 for n0
in range(current_len,len(self.stath0)):
2081 Save the clusters into a pickle file
2082 @param filename string
2085 import cPickle
as pickle
2088 fl=open(filename,
'wb')
2089 pickle.dump(self.clusters,fl)
2093 Load the clusters from a pickle file
2094 @param filename string
2095 @param append bool (Default=False), if True. append the clusters to the ones currently present
2098 import cPickle
as pickle
2101 fl=open(filename,
'rb')
2102 self.clean_clusters()
2104 self.clusters+=pickle.load(fl)
2106 self.clusters=pickle.load(fl)
2115 Compute the cluster center for a given cluster
2117 member_distance=defaultdict(float)
2119 for n0,n1
in itertools.combinations(cluster.members,2):
2122 rmsd, _ = self.
rmsd()
2123 member_distance[n0]+=rmsd
2125 if len(member_distance)>0:
2126 cluster.center_index=min(member_distance, key=member_distance.get)
2128 cluster.center_index=cluster.members[0]
2132 Save the coordinates of the current cluster a single rmf file
2134 print(
"saving coordinates",cluster)
2137 if rmf_name
is None:
2138 rmf_name=prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
2140 d1=self.stath1[cluster.members[0]]
2142 o.init_rmf(rmf_name, [self.stath1])
2143 for n1
in cluster.members:
2147 if self.alignment: self.align()
2148 o.write_rmf(rmf_name)
2150 o.close_rmf(rmf_name)
2154 remove structures that are similar
2155 append it to a new cluster
2157 print(
"pruning models")
2160 remaining=range(1,len(self.stath1),10)
2162 while len(remaining)>0:
2163 d0=self.stath0[selected]
2165 for n1
in remaining:
2167 if self.alignment: self.align()
2171 print(
"pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
2172 remaining=[x
for x
in remaining
if x
not in rm]
2173 if len(remaining)==0:
break
2174 selected=remaining[0]
2175 filtered.append(selected)
2178 self.clusters.append(c)
2188 Compute the precision of a cluster
2194 if not cluster.center_index
is None:
2195 members1=[cluster.center_index]
2197 members1=cluster.members
2201 for n1
in cluster.members:
2206 tmp_rmsd, _ = self.
rmsd()
2211 precision=rmsd/npairs
2212 cluster.precision=precision
2217 Compute the bipartite precision (ie the cross-precision)
2218 between two clusters
2222 for cn0,n0
in enumerate(cluster1.members):
2224 for cn1,n1
in enumerate(cluster2.members):
2226 tmp_rmsd, _ =self.
rmsd()
2227 if verbose: print(
"--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
2230 precision=rmsd/npairs
2233 def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
2235 Compute the Root mean square fluctuations
2236 of a molecule in a cluster
2237 Returns an IMP.pmi1.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
2239 rmsf=IMP.pmi1.tools.OrderedDict()
2242 if cluster_ref
is not None:
2243 if not cluster_ref.center_index
is None:
2244 members0 = [cluster_ref.center_index]
2246 members0 = cluster_ref.members
2248 if not cluster.center_index
is None:
2249 members0 = [cluster.center_index]
2251 members0 = cluster.members
2254 copy_index=copy_index,state_index=state_index)
2255 ps0=s0.get_selected_particles()
2269 for n1
in cluster.members[::step]:
2271 print(
"--- rmsf %s %s"%(str(n0),str(n1)))
2274 s1=
IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2275 copy_index=copy_index,state_index=state_index)
2276 ps1 = s1.get_selected_particles()
2279 if self.alignment: self.align()
2280 for n,(p0,p1)
in enumerate(zip(ps0,ps1)):
2281 r=residue_indexes[n]
2293 for stath
in [self.stath0,self.stath1]:
2294 if molecule
not in self.symmetric_molecules:
2296 copy_index=copy_index,state_index=state_index)
2299 state_index=state_index)
2301 ps = s.get_selected_particles()
2310 def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
2315 for n1
in cluster.members[::step]:
2316 print(
"density "+str(n1))
2319 if self.alignment: self.align()
2320 dens.add_subunits_density(self.stath1)
2322 dens.write_mrc(path=prefix+
'/',suffix=str(cluster.cluster_id))
2325 def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
2328 import matplotlib.pyplot
as plt
2329 import matplotlib.cm
as cm
2330 from scipy.spatial.distance
import cdist
2332 if molecules
is None:
2336 unique_copies=[mol
for mol
in mols
if mol.get_copy_index() == 0]
2337 mol_names_unique=dict((mol.get_name(),mol)
for mol
in unique_copies)
2338 total_len_unique=sum(max(mol.get_residue_indexes())
for mol
in unique_copies)
2347 seqlen=max(mol.get_residue_indexes())
2348 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2352 for mol
in unique_copies:
2353 seqlen=max(mol.get_residue_indexes())
2354 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2358 for ncl,n1
in enumerate(cluster.members):
2362 coord_dict=IMP.pmi1.tools.OrderedDict()
2364 mol_name=mol.get_name()
2365 copy_index=mol.get_copy_index()
2366 rindexes = mol.get_residue_indexes()
2367 coords = np.ones((max(rindexes),3))
2368 for rnum
in rindexes:
2370 selpart = sel.get_selected_particles()
2371 if len(selpart) == 0:
continue
2372 selpart = selpart[0]
2373 coords[rnum - 1, :] =
IMP.core.XYZ(selpart).get_coordinates()
2374 coord_dict[mol]=coords
2377 coords=np.concatenate(list(coord_dict.values()))
2378 dists = cdist(coords, coords)
2379 binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2381 binary_dists_dict={}
2383 len1 = max(mol1.get_residue_indexes())
2385 name1=mol1.get_name()
2386 name2=mol2.get_name()
2387 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2388 if (name1, name2)
not in binary_dists_dict:
2389 binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
2390 binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2391 binary_dists=np.zeros((total_len_unique,total_len_unique))
2393 for name1,name2
in binary_dists_dict:
2394 r1=index_dict[mol_names_unique[name1]]
2395 r2=index_dict[mol_names_unique[name2]]
2396 binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
2401 contact_freqs = binary_dists
2403 dist_maps.append(dists)
2404 av_dist_map += dists
2405 contact_freqs += binary_dists
2410 contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2412 contact_freqs =1.0/len(cluster)*contact_freqs
2413 av_dist_map=1.0/len(cluster)*contact_freqs
2415 fig = plt.figure(figsize=(100, 100))
2416 ax = fig.add_subplot(111)
2419 gap_between_components=50
2431 prot_list=list(zip(*sorted_tuple))[1]
2434 prot_list=list(zip(*sorted_tuple))[1]
2436 prot_listx = prot_list
2437 nresx = gap_between_components + \
2438 sum([max(mol.get_residue_indexes())
2439 + gap_between_components
for mol
in prot_listx])
2442 prot_listy = prot_list
2443 nresy = gap_between_components + \
2444 sum([max(mol.get_residue_indexes())
2445 + gap_between_components
for mol
in prot_listy])
2450 res = gap_between_components
2451 for mol
in prot_listx:
2452 resoffsetx[mol] = res
2453 res += max(mol.get_residue_indexes())
2455 res += gap_between_components
2459 res = gap_between_components
2460 for mol
in prot_listy:
2461 resoffsety[mol] = res
2462 res += max(mol.get_residue_indexes())
2464 res += gap_between_components
2466 resoffsetdiagonal = {}
2467 res = gap_between_components
2468 for mol
in IMP.pmi1.tools.OrderedSet(prot_listx + prot_listy):
2469 resoffsetdiagonal[mol] = res
2470 res += max(mol.get_residue_indexes())
2471 res += gap_between_components
2476 for n, prot
in enumerate(prot_listx):
2477 res = resoffsetx[prot]
2479 for proty
in prot_listy:
2480 resy = resoffsety[proty]
2481 endy = resendy[proty]
2482 ax.plot([res, res], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2483 ax.plot([end, end], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2484 xticks.append((float(res) + float(end)) / 2)
2489 for n, prot
in enumerate(prot_listy):
2490 res = resoffsety[prot]
2492 for protx
in prot_listx:
2493 resx = resoffsetx[protx]
2494 endx = resendx[protx]
2495 ax.plot([resx, endx], [res, res], linestyle=
'-',color=
'gray', lw=0.4)
2496 ax.plot([resx, endx], [end, end], linestyle=
'-',color=
'gray', lw=0.4)
2497 yticks.append((float(res) + float(end)) / 2)
2502 tmp_array = np.zeros((nresx, nresy))
2504 for px
in prot_listx:
2505 for py
in prot_listy:
2506 resx = resoffsetx[px]
2507 lengx = resendx[px] - 1
2508 resy = resoffsety[py]
2509 lengy = resendy[py] - 1
2510 indexes_x = index_dict[px]
2511 minx = min(indexes_x)
2512 maxx = max(indexes_x)
2513 indexes_y = index_dict[py]
2514 miny = min(indexes_y)
2515 maxy = max(indexes_y)
2516 tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2517 ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2519 cax = ax.imshow(tmp_array,
2524 interpolation=
'nearest')
2526 ax.set_xticks(xticks)
2527 ax.set_xticklabels(xlabels, rotation=90)
2528 ax.set_yticks(yticks)
2529 ax.set_yticklabels(ylabels)
2530 plt.setp(ax.get_xticklabels(), fontsize=6)
2531 plt.setp(ax.get_yticklabels(), fontsize=6)
2534 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2535 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2540 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf", dpi=300,transparent=
"False")
2544 def plot_rmsd_matrix(self,filename):
2546 self.compute_all_pairwise_rmsd()
2547 distance_matrix = np.zeros(
2548 (len(self.stath0), len(self.stath1)))
2549 for (n0,n1)
in self.pairwise_rmsd:
2550 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2552 import matplotlib
as mpl
2554 import matplotlib.pylab
as pl
2555 from scipy.cluster
import hierarchy
as hrc
2557 fig = pl.figure(figsize=(10,8))
2558 ax = fig.add_subplot(212)
2559 dendrogram = hrc.dendrogram(
2560 hrc.linkage(distance_matrix),
2563 leaves_order = dendrogram[
'leaves']
2564 ax.set_xlabel(
'Model')
2565 ax.set_ylabel(
'RMSD [Angstroms]')
2567 ax2 = fig.add_subplot(221)
2569 distance_matrix[leaves_order,
2572 interpolation=
'nearest')
2573 cb = fig.colorbar(cax)
2574 cb.set_label(
'RMSD [Angstroms]')
2575 ax2.set_xlabel(
'Model')
2576 ax2.set_ylabel(
'Model')
2578 pl.savefig(filename, dpi=300)
2587 Update the cluster id numbers
2589 for n,c
in enumerate(self.clusters):
2592 def get_molecule(self, hier, name, copy):
2600 self.seldict0=IMP.pmi1.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2601 self.seldict1=IMP.pmi1.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2602 for mol
in self.seldict0:
2603 for sel
in self.seldict0[mol]:
2604 self.issymmetricsel[sel]=
False
2605 for mol
in self.symmetric_molecules:
2606 self.symmetric_molecules[mol]=len(self.seldict0[mol])
2607 for sel
in self.seldict0[mol]:
2608 self.issymmetricsel[sel]=
True
2615 for rb
in self.rbs1:
2618 for bead
in self.beads1:
2623 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2625 initial filling of the clusters.
2628 print(
"clustering model "+str(n0))
2629 d0 = self.stath0[n0]
2631 print(
"creating cluster index "+str(len(self.clusters)))
2632 self.clusters.append(c)
2634 clustered = set([n0])
2636 print(
"--- trying to add model "+str(n1)+
" to cluster "+str(len(self.clusters)))
2637 d1 = self.stath1[n1]
2638 if self.alignment: self.align()
2639 rmsd, _ = self.
rmsd(metric=metric)
2640 if rmsd<rmsd_cutoff:
2641 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2645 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2650 merge the clusters that have close members
2651 @param rmsd_cutoff cutoff distance in Angstorms
2657 for c0, c1
in filter(
lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2658 n0, n1 = [c.members[0]
for c
in (c0,c1)]
2659 d0 = self.stath0[n0]
2660 d1 = self.stath1[n1]
2661 rmsd, _ = self.
rmsd()
2663 to_merge.append((c0,c1))
2665 for c0, c
in reversed(to_merge):
2669 self.clusters = [c
for c
in filter(
lambda x: len(x.members)>0, self.clusters)]
2674 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2676 print(
"check close members for clusters "+str(c0.cluster_id)+
" and "+str(c1.cluster_id))
2677 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2678 d0 = self.stath0[n0]
2679 d1 = self.stath1[n1]
2680 rmsd, _ = self.
rmsd(metric=metric)
2681 if rmsd<rmsd_cutoff:
2697 a function that returns the permutation best_sel of sels0 that minimizes metric
2699 best_rmsd2 = float(
'inf')
2701 if self.issymmetricsel[sels0[0]]:
2704 for offset
in range(N):
2705 order=[(offset+i)%N
for i
in range(N)]
2706 sels = [sels0[(offset+i)%N]
for i
in range(N)]
2709 r=metric(sel0, sel1)
2712 if rmsd2 < best_rmsd2:
2717 for sels
in itertools.permutations(sels0):
2719 for sel0, sel1
in itertools.takewhile(
lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2720 r=metric(sel0, sel1)
2722 if rmsd2 < best_rmsd2:
2736 return best_sel, best_rmsd2
2738 def compute_all_pairwise_rmsd(self):
2739 for d0
in self.stath0:
2740 for d1
in self.stath1:
2741 rmsd, _ = self.
rmsd()
2743 def rmsd(self,metric=IMP.atom.get_rmsd):
2745 Computes the RMSD. Resolves ambiguous pairs assignments
2748 n0=self.stath0.current_index
2749 n1=self.stath1.current_index
2750 if ((n0,n1)
in self.pairwise_rmsd)
and ((n0,n1)
in self.pairwise_molecular_assignment):
2751 return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2759 seldict_best_order={}
2760 molecular_assignment={}
2761 for molname, sels0
in self.seldict0.items():
2762 sels_best_order, best_rmsd2 = self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2764 Ncoords = len(sels_best_order[0].get_selected_particles())
2765 Ncopies = len(self.seldict1[molname])
2766 total_rmsd += Ncoords*best_rmsd2
2767 total_N += Ncoords*Ncopies
2769 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2770 p0 = sel0.get_selected_particles()[0]
2771 p1 = sel1.get_selected_particles()[0]
2777 molecular_assignment[(molname,c0)]=(molname,c1)
2779 total_rmsd = math.sqrt(total_rmsd/total_N)
2781 self.pairwise_rmsd[(n0,n1)]=total_rmsd
2782 self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2783 self.pairwise_rmsd[(n1,n0)]=total_rmsd
2784 self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2786 return total_rmsd, molecular_assignment
2790 Fix the reference structure for structural alignment, rmsd and chain assignemnt
2791 @param reference can be either "Absolute" (cluster center of the first cluster)
2792 or Relative (cluster center of the current cluster)
2794 if reference==
"Absolute":
2796 elif reference==
"Relative":
2797 if cluster.center_index:
2798 n0=cluster.center_index
2800 n0=cluster.members[0]
2805 compute the molecular assignemnts between multiple copies
2806 of the same sequence. It changes the Copy index of Molecules
2809 _, molecular_assignment = self.
rmsd()
2810 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2811 mol0 = self.molcopydict0[m0][c0]
2812 mol1 = self.molcopydict1[m1][c1]
2815 p1.set_value(cik0,c0)
2819 Undo the Copy index assignment
2822 _, molecular_assignment = self.
rmsd()
2824 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2825 mol0 = self.molcopydict0[m0][c0]
2826 mol1 = self.molcopydict1[m1][c1]
2829 p1.set_value(cik0,c1)
2836 s=
"AnalysisReplicaExchange\n"
2837 s+=
"---- number of clusters %s \n"%str(len(self.clusters))
2838 s+=
"---- number of models %s \n"%str(len(self.stath0))
2841 def __getitem__(self,int_slice_adaptor):
2842 if type(int_slice_adaptor)
is int:
2843 return self.clusters[int_slice_adaptor]
2844 elif type(int_slice_adaptor)
is slice:
2845 return self.__iter__(int_slice_adaptor)
2847 raise TypeError(
"Unknown Type")
2850 return len(self.clusters)
2852 def __iter__(self,slice_key=None):
2853 if slice_key
is None:
2854 for i
in range(len(self)):
2857 for i
in range(len(self))[slice_key]:
class to link stat files to several rmf files
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignemnts between multiple copies of the same sequence.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def load_data
Load the data from an external pickled file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
A helper output for model evaluation.
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
Sample using molecular dynamics.
def load_clusters
Load the clusters from a pickle file.
A container for models organized into clusters.
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Class for easy writing of PDBs, RMFs, and stat files.
Performs alignment and RMSD calculation for two sets of coordinates.
static XYZR setup_particle(Model *m, ParticleIndex pi)
def aggregate
initial filling of the clusters.
def build_model
Create model.
A class to cluster structures.
def merge
merge two clusters
A macro for running all the basic operations of analysis.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignemnt.
def __init__
Construction of the Class.
def update_clusters
Update the cluster id numbers.
Extends the functionality of IMP.atom.Molecule.
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
This class contains analysis utilities to investigate ReplicaExchange results.
def save_data
Save the data for the whole models into a pickle file.
Utility classes and functions for reading and storing PMI files.
A macro to help setup and run replica exchange.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
def compute_cluster_center
Compute the cluster center for a given cluster.
def merge_aggregates
merge the clusters that have close members
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def set_data
Set the data from an external IMP.pmi1.output.Data.
Sample using Monte Carlo.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Tools for clustering and cluster analysis.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Add uncertainty to a particle.
A decorator for keeping track of copies of a molecule.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
Set up the representation of all proteins and nucleic acid macromolecules.
The standard decorator for manipulating molecular structures.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def deprecated_method
Python decorator to mark a method as deprecated.
def update_seldicts
Update the seldicts.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def precision
Compute the precision of a cluster.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def get_cluster_data
Return the model data from a cluster.
A decorator for a particle with x,y,z coordinates.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
def save_clusters
Save the clusters into a pickle file.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def get_representation
Return the Representation object.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi1.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
def deprecated_object
Python decorator to mark a class as deprecated.
A macro to build a Representation based on a Topology and lists of movers.
The general base class for IMP exceptions.
def get_best_models
Given a list of stat files, read them all and find the best models.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
def add_protocol_output
Capture details of the modeling protocol.
A class for reading stat files (either rmf or ascii v1 and v2)
Compute mean density maps from structures.
def rmsd
Computes the RMSD.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Classes for writing output files and processing them.
def refine
Refine the clusters by merging the ones whose centers are close.
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Support for the RMF file format for storing hierarchical molecular data and markup.
def set_symmetric
Store names of symmetric molecules.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.