1 """@namespace IMP.pmi1.macros
2 Protocols for sampling structures and analyzing them.
5 from __future__
import print_function, division
18 from operator
import itemgetter
19 from collections
import defaultdict
25 class _RMFRestraints(object):
26 """All restraints that are written out to the RMF file"""
27 def __init__(self, model, user_restraints):
29 self._user_restraints = user_restraints
if user_restraints
else []
32 return (len(self._user_restraints)
33 + self._rmf_rs.get_number_of_restraints())
37 __nonzero__ = __bool__
39 def __getitem__(self, i):
40 class FakePMIWrapper(object):
41 def __init__(self, r):
42 self.r = IMP.RestraintSet.get_from(r)
43 get_restraint =
lambda self: self.r
44 lenuser = len(self._user_restraints)
46 return self._user_restraints[i]
47 elif 0 <= i - lenuser < self._rmf_rs.get_number_of_restraints():
48 r = self._rmf_rs.get_restraint(i - lenuser)
49 return FakePMIWrapper(r)
51 raise IndexError(
"Out of range")
55 """A macro to help setup and run replica exchange.
56 Supports Monte Carlo and molecular dynamics.
57 Produces trajectory RMF files, best PDB structures,
58 and output stat files.
64 monte_carlo_sample_objects=
None,
65 molecular_dynamics_sample_objects=
None,
67 rmf_output_objects=
None,
68 crosslink_restraints=
None,
69 monte_carlo_temperature=1.0,
70 simulated_annealing=
False,
71 simulated_annealing_minimum_temperature=1.0,
72 simulated_annealing_maximum_temperature=2.5,
73 simulated_annealing_minimum_temperature_nframes=100,
74 simulated_annealing_maximum_temperature_nframes=100,
75 replica_exchange_minimum_temperature=1.0,
76 replica_exchange_maximum_temperature=2.5,
77 replica_exchange_swap=
True,
79 number_of_best_scoring_models=500,
82 molecular_dynamics_steps=10,
83 molecular_dynamics_max_time_step=1.0,
84 number_of_frames=1000,
85 save_coordinates_mode=
"lowest_temperature",
86 nframes_write_coordinates=1,
87 write_initial_rmf=
True,
88 initial_rmf_name_suffix=
"initial",
89 stat_file_name_suffix=
"stat",
90 best_pdb_name_suffix=
"model",
92 do_create_directories=
True,
93 global_output_directory=
"./",
96 replica_stat_file_suffix=
"stat_replica",
97 em_object_for_rmf=
None,
99 replica_exchange_object=
None,
102 @param model The IMP model
103 @param representation PMI.representation.Representation object
104 (or list of them, for multi-state modeling)
105 @param root_hier Instead of passing Representation, pass the System hierarchy
106 @param monte_carlo_sample_objects Objects for MC sampling; a list of
107 structural components (generally the representation) that will
108 be moved and restraints with parameters that need to
110 For PMI2: just pass flat list of movers
111 @param molecular_dynamics_sample_objects Objects for MD sampling
112 For PMI2: just pass flat list of particles
113 @param output_objects A list of structural objects and restraints
114 that will be included in output (ie, statistics "stat" files). Any object
115 that provides a get_output() method can be used here. If None is passed
116 the macro will not write stat files.
117 @param rmf_output_objects A list of structural objects and restraints
118 that will be included in rmf. Any object
119 that provides a get_output() method can be used here.
120 @param crosslink_restraints List of cross-link restraints that will
121 be included in output RMF files (for visualization).
122 @param monte_carlo_temperature MC temp (may need to be optimized
123 based on post-sampling analysis)
124 @param simulated_annealing If True, perform simulated annealing
125 @param simulated_annealing_minimum_temperature Should generally be
126 the same as monte_carlo_temperature.
127 @param simulated_annealing_minimum_temperature_nframes Number of
128 frames to compute at minimum temperature.
129 @param simulated_annealing_maximum_temperature_nframes Number of
131 temps > simulated_annealing_maximum_temperature.
132 @param replica_exchange_minimum_temperature Low temp for REX; should
133 generally be the same as monte_carlo_temperature.
134 @param replica_exchange_maximum_temperature High temp for REX
135 @param replica_exchange_swap Boolean, enable disable temperature
137 @param num_sample_rounds Number of rounds of MC/MD per cycle
138 @param number_of_best_scoring_models Number of top-scoring PDB models
139 to keep around for analysis
140 @param monte_carlo_steps Number of MC steps per round
141 @param self_adaptive self adaptive scheme for monte carlo movers
142 @param molecular_dynamics_steps Number of MD steps per round
143 @param molecular_dynamics_max_time_step Max time step for MD
144 @param number_of_frames Number of REX frames to run
145 @param save_coordinates_mode string: how to save coordinates.
146 "lowest_temperature" (default) only the lowest temperatures is saved
147 "25th_score" all replicas whose score is below the 25th percentile
148 "50th_score" all replicas whose score is below the 50th percentile
149 "75th_score" all replicas whose score is below the 75th percentile
150 @param nframes_write_coordinates How often to write the coordinates
152 @param write_initial_rmf Write the initial configuration
153 @param global_output_directory Folder that will be created to house
155 @param test_mode Set to True to avoid writing any files, just test one frame.
162 self.output_objects = output_objects
163 self.rmf_output_objects=rmf_output_objects
164 self.representation = representation
166 if type(representation) == list:
167 self.is_multi_state =
True
168 self.root_hiers = [r.prot
for r
in representation]
169 self.vars[
"number_of_states"] = len(representation)
171 self.is_multi_state =
False
172 self.root_hier = representation.prot
173 self.vars[
"number_of_states"] = 1
174 elif root_hier
and type(root_hier) ==
IMP.atom.Hierarchy and root_hier.get_name()==
'System':
176 if self.output_objects
is not None:
178 if self.rmf_output_objects
is not None:
180 self.root_hier = root_hier
181 states = IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)
182 self.vars[
"number_of_states"] = len(states)
184 self.root_hiers = states
185 self.is_multi_state =
True
187 self.root_hier = root_hier
188 self.is_multi_state =
False
190 raise Exception(
"Must provide representation or System hierarchy (root_hier)")
192 self._rmf_restraints = _RMFRestraints(model, crosslink_restraints)
193 self.em_object_for_rmf = em_object_for_rmf
194 self.monte_carlo_sample_objects = monte_carlo_sample_objects
195 self.vars[
"self_adaptive"]=self_adaptive
196 if sample_objects
is not None:
197 self.monte_carlo_sample_objects+=sample_objects
198 self.molecular_dynamics_sample_objects=molecular_dynamics_sample_objects
199 self.replica_exchange_object = replica_exchange_object
200 self.molecular_dynamics_max_time_step = molecular_dynamics_max_time_step
201 self.vars[
"monte_carlo_temperature"] = monte_carlo_temperature
203 "replica_exchange_minimum_temperature"] = replica_exchange_minimum_temperature
205 "replica_exchange_maximum_temperature"] = replica_exchange_maximum_temperature
206 self.vars[
"replica_exchange_swap"] = replica_exchange_swap
207 self.vars[
"simulated_annealing"]=\
209 self.vars[
"simulated_annealing_minimum_temperature"]=\
210 simulated_annealing_minimum_temperature
211 self.vars[
"simulated_annealing_maximum_temperature"]=\
212 simulated_annealing_maximum_temperature
213 self.vars[
"simulated_annealing_minimum_temperature_nframes"]=\
214 simulated_annealing_minimum_temperature_nframes
215 self.vars[
"simulated_annealing_maximum_temperature_nframes"]=\
216 simulated_annealing_maximum_temperature_nframes
218 self.vars[
"num_sample_rounds"] = num_sample_rounds
220 "number_of_best_scoring_models"] = number_of_best_scoring_models
221 self.vars[
"monte_carlo_steps"] = monte_carlo_steps
222 self.vars[
"molecular_dynamics_steps"]=molecular_dynamics_steps
223 self.vars[
"number_of_frames"] = number_of_frames
224 if not save_coordinates_mode
in [
"lowest_temperature",
"25th_score",
"50th_score",
"75th_score"]:
225 raise Exception(
"save_coordinates_mode has unrecognized value")
227 self.vars[
"save_coordinates_mode"] = save_coordinates_mode
228 self.vars[
"nframes_write_coordinates"] = nframes_write_coordinates
229 self.vars[
"write_initial_rmf"] = write_initial_rmf
230 self.vars[
"initial_rmf_name_suffix"] = initial_rmf_name_suffix
231 self.vars[
"best_pdb_name_suffix"] = best_pdb_name_suffix
232 self.vars[
"stat_file_name_suffix"] = stat_file_name_suffix
233 self.vars[
"do_clean_first"] = do_clean_first
234 self.vars[
"do_create_directories"] = do_create_directories
235 self.vars[
"global_output_directory"] = global_output_directory
236 self.vars[
"rmf_dir"] = rmf_dir
237 self.vars[
"best_pdb_dir"] = best_pdb_dir
238 self.vars[
"atomistic"] = atomistic
239 self.vars[
"replica_stat_file_suffix"] = replica_stat_file_suffix
240 self.vars[
"geometries"] =
None
241 self.test_mode = test_mode
244 if self.vars[
"geometries"]
is None:
245 self.vars[
"geometries"] = list(geometries)
247 self.vars[
"geometries"].extend(geometries)
250 print(
"ReplicaExchange0: it generates initial.*.rmf3, stat.*.out, rmfs/*.rmf3 for each replica ")
251 print(
"--- it stores the best scoring pdb models in pdbs/")
252 print(
"--- the stat.*.out and rmfs/*.rmf3 are saved only at the lowest temperature")
253 print(
"--- variables:")
254 keys = list(self.vars.keys())
257 print(
"------", v.ljust(30), self.vars[v])
259 def get_replica_exchange_object(self):
260 return self.replica_exchange_object
262 def _add_provenance(self, sampler_md, sampler_mc):
263 """Record details about the sampling in the IMP Hierarchies"""
264 if not self.is_multi_state
or self.pmi2:
265 output_hierarchies = [self.root_hier]
267 output_hierarchies = self.root_hiers
271 method =
"Molecular Dynamics"
272 iterations += self.vars[
"molecular_dynamics_steps"]
274 method =
"Hybrid MD/MC" if sampler_md
else "Monte Carlo"
275 iterations += self.vars[
"monte_carlo_steps"]
277 if iterations == 0
or self.vars[
"number_of_frames"] == 0:
279 iterations *= self.vars[
"num_sample_rounds"]
281 for h
in output_hierarchies:
282 pi = self.model.add_particle(
"sampling")
284 self.model, pi, method, self.vars[
"number_of_frames"],
286 p.set_number_of_replicas(
287 self.replica_exchange_object.get_number_of_replicas())
288 IMP.pmi1.tools._add_pmi_provenance(h)
291 def execute_macro(self):
292 temp_index_factor = 100000.0
296 if self.monte_carlo_sample_objects
is not None:
297 print(
"Setting up MonteCarlo")
299 self.monte_carlo_sample_objects,
300 self.vars[
"monte_carlo_temperature"])
301 if self.vars[
"simulated_annealing"]:
302 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
303 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
304 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
305 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
306 sampler_mc.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
307 if self.vars[
"self_adaptive"]:
308 sampler_mc.set_self_adaptive(isselfadaptive=self.vars[
"self_adaptive"])
309 if self.output_objects
is not None:
310 self.output_objects.append(sampler_mc)
311 if self.rmf_output_objects
is not None:
312 self.rmf_output_objects.append(sampler_mc)
313 samplers.append(sampler_mc)
316 if self.molecular_dynamics_sample_objects
is not None:
317 print(
"Setting up MolecularDynamics")
319 self.molecular_dynamics_sample_objects,
320 self.vars[
"monte_carlo_temperature"],
321 maximum_time_step=self.molecular_dynamics_max_time_step)
322 if self.vars[
"simulated_annealing"]:
323 tmin=self.vars[
"simulated_annealing_minimum_temperature"]
324 tmax=self.vars[
"simulated_annealing_maximum_temperature"]
325 nfmin=self.vars[
"simulated_annealing_minimum_temperature_nframes"]
326 nfmax=self.vars[
"simulated_annealing_maximum_temperature_nframes"]
327 sampler_md.set_simulated_annealing(tmin,tmax,nfmin,nfmax)
328 if self.output_objects
is not None:
329 self.output_objects.append(sampler_md)
330 if self.rmf_output_objects
is not None:
331 self.rmf_output_objects.append(sampler_md)
332 samplers.append(sampler_md)
335 print(
"Setting up ReplicaExchange")
338 "replica_exchange_minimum_temperature"],
340 "replica_exchange_maximum_temperature"],
342 replica_exchange_object=self.replica_exchange_object)
343 self.replica_exchange_object = rex.rem
345 myindex = rex.get_my_index()
346 if self.output_objects
is not None:
347 self.output_objects.append(rex)
348 if self.rmf_output_objects
is not None:
349 self.rmf_output_objects.append(rex)
353 min_temp_index = int(min(rex.get_temperatures()) * temp_index_factor)
357 globaldir = self.vars[
"global_output_directory"] +
"/"
358 rmf_dir = globaldir + self.vars[
"rmf_dir"]
359 pdb_dir = globaldir + self.vars[
"best_pdb_dir"]
361 if not self.test_mode:
362 if self.vars[
"do_clean_first"]:
365 if self.vars[
"do_create_directories"]:
368 os.makedirs(globaldir)
376 if not self.is_multi_state:
382 for n
in range(self.vars[
"number_of_states"]):
384 os.makedirs(pdb_dir +
"/" + str(n))
391 if self.output_objects
is not None:
392 self.output_objects.append(sw)
393 if self.rmf_output_objects
is not None:
394 self.rmf_output_objects.append(sw)
396 print(
"Setting up stat file")
398 low_temp_stat_file = globaldir + \
399 self.vars[
"stat_file_name_suffix"] +
"." + str(myindex) +
".out"
402 if not self.test_mode:
405 if not self.test_mode:
406 if self.output_objects
is not None:
407 output.init_stat2(low_temp_stat_file,
409 extralabels=[
"rmf_file",
"rmf_frame_index"])
411 print(
"Stat file writing is disabled")
413 if self.rmf_output_objects
is not None:
414 print(
"Stat info being written in the rmf file")
416 print(
"Setting up replica stat file")
417 replica_stat_file = globaldir + \
418 self.vars[
"replica_stat_file_suffix"] +
"." + str(myindex) +
".out"
419 if not self.test_mode:
420 output.init_stat2(replica_stat_file, [rex], extralabels=[
"score"])
422 if not self.test_mode:
423 print(
"Setting up best pdb files")
424 if not self.is_multi_state:
425 if self.vars[
"number_of_best_scoring_models"] > 0:
426 output.init_pdb_best_scoring(pdb_dir +
"/" +
427 self.vars[
"best_pdb_name_suffix"],
430 "number_of_best_scoring_models"],
431 replica_exchange=
True)
432 output.write_psf(pdb_dir +
"/" +
"model.psf",pdb_dir +
"/" +
433 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
435 if self.vars[
"number_of_best_scoring_models"] > 0:
436 for n
in range(self.vars[
"number_of_states"]):
437 output.init_pdb_best_scoring(pdb_dir +
"/" + str(n) +
"/" +
438 self.vars[
"best_pdb_name_suffix"],
441 "number_of_best_scoring_models"],
442 replica_exchange=
True)
443 output.write_psf(pdb_dir +
"/" + str(n) +
"/" +
"model.psf",pdb_dir +
"/" + str(n) +
"/" +
444 self.vars[
"best_pdb_name_suffix"]+
".0.pdb")
447 if self.em_object_for_rmf
is not None:
448 if not self.is_multi_state
or self.pmi2:
449 output_hierarchies = [
451 self.em_object_for_rmf.get_density_as_hierarchy(
454 output_hierarchies = self.root_hiers
455 output_hierarchies.append(
456 self.em_object_for_rmf.get_density_as_hierarchy())
458 if not self.is_multi_state
or self.pmi2:
459 output_hierarchies = [self.root_hier]
461 output_hierarchies = self.root_hiers
464 if not self.test_mode:
465 print(
"Setting up and writing initial rmf coordinate file")
466 init_suffix = globaldir + self.vars[
"initial_rmf_name_suffix"]
467 output.init_rmf(init_suffix +
"." + str(myindex) +
".rmf3",
468 output_hierarchies,listofobjects=self.rmf_output_objects)
469 if self._rmf_restraints:
470 output.add_restraints_to_rmf(
471 init_suffix +
"." + str(myindex) +
".rmf3",
472 self._rmf_restraints)
473 output.write_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
474 output.close_rmf(init_suffix +
"." + str(myindex) +
".rmf3")
478 if not self.test_mode:
479 mpivs=IMP.pmi1.samplers.MPI_values(self.replica_exchange_object)
483 self._add_provenance(sampler_md, sampler_mc)
485 if not self.test_mode:
486 print(
"Setting up production rmf files")
487 rmfname = rmf_dir +
"/" + str(myindex) +
".rmf3"
488 output.init_rmf(rmfname, output_hierarchies, geometries=self.vars[
"geometries"],
489 listofobjects = self.rmf_output_objects)
491 if self._rmf_restraints:
492 output.add_restraints_to_rmf(rmfname, self._rmf_restraints)
494 ntimes_at_low_temp = 0
498 self.replica_exchange_object.set_was_used(
True)
499 nframes = self.vars[
"number_of_frames"]
502 for i
in range(nframes):
506 for nr
in range(self.vars[
"num_sample_rounds"]):
507 if sampler_md
is not None:
509 self.vars[
"molecular_dynamics_steps"])
510 if sampler_mc
is not None:
511 sampler_mc.optimize(self.vars[
"monte_carlo_steps"])
513 self.model).evaluate(
False)
514 mpivs.set_value(
"score",score)
515 output.set_output_entry(
"score", score)
519 my_temp_index = int(rex.get_my_temp() * temp_index_factor)
521 if self.vars[
"save_coordinates_mode"] ==
"lowest_temperature":
522 save_frame=(min_temp_index == my_temp_index)
523 elif self.vars[
"save_coordinates_mode"] ==
"25th_score":
524 score_perc=mpivs.get_percentile(
"score")
525 save_frame=(score_perc*100.0<=25.0)
526 elif self.vars[
"save_coordinates_mode"] ==
"50th_score":
527 score_perc=mpivs.get_percentile(
"score")
528 save_frame=(score_perc*100.0<=50.0)
529 elif self.vars[
"save_coordinates_mode"] ==
"75th_score":
530 score_perc=mpivs.get_percentile(
"score")
531 save_frame=(score_perc*100.0<=75.0)
534 if save_frame
or not self.test_mode:
538 print(
"--- frame %s score %s " % (str(i), str(score)))
540 if not self.test_mode:
541 if i % self.vars[
"nframes_write_coordinates"]==0:
542 print(
'--- writing coordinates')
543 if self.vars[
"number_of_best_scoring_models"] > 0:
544 output.write_pdb_best_scoring(score)
545 output.write_rmf(rmfname)
546 output.set_output_entry(
"rmf_file", rmfname)
547 output.set_output_entry(
"rmf_frame_index", ntimes_at_low_temp)
549 output.set_output_entry(
"rmf_file", rmfname)
550 output.set_output_entry(
"rmf_frame_index",
'-1')
551 if self.output_objects
is not None:
552 output.write_stat2(low_temp_stat_file)
553 ntimes_at_low_temp += 1
555 if not self.test_mode:
556 output.write_stat2(replica_stat_file)
557 if self.vars[
"replica_exchange_swap"]:
558 rex.swap_temp(i, score)
559 if self.representation:
560 for p, state
in self.representation._protocol_output:
561 p.add_replica_exchange(state, self)
563 if not self.test_mode:
564 print(
"closing production rmf files")
565 output.close_rmf(rmfname)
569 """A macro to build a Representation based on a Topology and lists of movers
573 component_topologies,
574 list_of_rigid_bodies=[],
575 list_of_super_rigid_bodies=[],
576 chain_of_super_rigid_bodies=[],
577 sequence_connectivity_scale=4.0,
578 add_each_domain_as_rigid_body=
False,
579 force_create_gmm_files=
False):
581 @param model The IMP model
582 @param component_topologies List of
583 IMP.pmi1.topology.ComponentTopology items
584 @param list_of_rigid_bodies List of lists of domain names that will
585 be moved as rigid bodies.
586 @param list_of_super_rigid_bodies List of lists of domain names
587 that will move together in an additional Monte Carlo move.
588 @param chain_of_super_rigid_bodies List of lists of domain names
589 (choices can only be from the same molecule). Each of these
590 groups will be moved rigidly. This helps to sample more
591 efficiently complex topologies, made of several rigid bodies,
592 connected by flexible linkers.
593 @param sequence_connectivity_scale For scaling the connectivity
595 @param add_each_domain_as_rigid_body That way you don't have to
596 put all of them in the list
597 @param force_create_gmm_files If True, will sample and create GMMs
598 no matter what. If False, will only sample if the
599 files don't exist. If number of Gaussians is zero, won't
605 disorderedlength=
False)
607 data=component_topologies
608 if list_of_rigid_bodies==[]:
609 print(
"WARNING: No list of rigid bodies inputted to build_model()")
610 if list_of_super_rigid_bodies==[]:
611 print(
"WARNING: No list of super rigid bodies inputted to build_model()")
612 if chain_of_super_rigid_bodies==[]:
613 print(
"WARNING: No chain of super rigid bodies inputted to build_model()")
614 all_dnames = set([d
for sublist
in list_of_rigid_bodies+list_of_super_rigid_bodies\
615 +chain_of_super_rigid_bodies
for d
in sublist])
616 all_available = set([c._domain_name
for c
in component_topologies])
617 if not all_dnames <= all_available:
618 raise ValueError(
"All requested movers must reference domain "
619 "names in the component topologies")
623 super_rigid_bodies={}
624 chain_super_rigid_bodies={}
628 comp_name = c.molname
629 hier_name = c._domain_name
631 fasta_file = c.fasta_file
632 fasta_id = c.fasta_id
633 pdb_name = c.pdb_file
635 res_range = c.residue_range
636 offset = c.pdb_offset
637 bead_size = c.bead_size
638 em_num_components = c.em_residues_per_gaussian
639 em_txt_file_name = c.gmm_file
640 em_mrc_file_name = c.mrc_file
642 if comp_name
not in self.simo.get_component_names():
643 self.simo.create_component(comp_name,color=0.0)
644 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
647 if em_num_components==0:
651 if (
not os.path.isfile(em_txt_file_name))
or force_create_gmm_files:
658 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,
659 res_range,include_res0,beadsize=bead_size,
660 color=color,offset=offset)
661 if em_num_components!=0:
663 print(
"will read GMM files")
665 print(
"will calculate GMMs")
667 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,
668 em_mrc_file_name,em_num_components,read_em_files)
669 self.simo.add_all_atom_densities(comp_name, particles=beads)
674 self.resdensities[hier_name]=dens_hier
675 self.domain_dict[hier_name]=outhier+dens_hier
678 for c
in self.simo.get_component_names():
679 self.simo.setup_component_sequence_connectivity(c,scale=sequence_connectivity_scale)
680 self.simo.setup_component_geometry(c)
683 for rblist
in list_of_rigid_bodies:
685 for rbmember
in rblist:
686 rb+=[h
for h
in self.domain_dict[rbmember]]
687 self.simo.set_rigid_body_from_hierarchies(rb)
688 for srblist
in list_of_super_rigid_bodies:
690 for srbmember
in rblist:
691 srb+=[h
for h
in self.domain_dict[srbmember]]
692 self.simo.set_super_rigid_body_from_hierarchies(srb)
693 for clist
in chain_of_super_rigid_bodies:
695 for crbmember
in rblist:
696 crb+=[h
for h
in self.domain_dict[crbmember]]
697 self.simo.set_chain_of_super_rigid_bodies(crb,2,3)
699 self.simo.set_floppy_bodies()
700 self.simo.setup_bonds()
708 '''Return the Representation object'''
711 def get_density_hierarchies(self,hier_name_list):
715 for hn
in hier_name_list:
717 dens_hier_list+=self.resdensities[hn]
718 return dens_hier_list
720 def set_gmm_models_directory(self,directory_name):
721 self.gmm_models_directory=directory_name
723 def get_pdb_bead_bits(self,hierarchy):
728 if "_pdb" in h.get_name():pdbbits.append(h)
729 if "_bead" in h.get_name():beadbits.append(h)
730 if "_helix" in h.get_name():helixbits.append(h)
731 return (pdbbits,beadbits,helixbits)
733 def scale_bead_radii(self,nresidues,scale):
735 for h
in self.domain_dict:
736 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
737 slope=(1.0-scale)/(1.0-float(nresidues))
742 if b
not in scaled_beads:
748 scale_factor=slope*float(num_residues)+1.0
750 new_radius=scale_factor*radius
753 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
756 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
758 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
761 for pb
in pdbbits+helixbits:
765 number_of_residues=len(set(res_ind))
769 outhier+=simo.add_component_density(compname,
771 num_components=num_components,
773 inputfile=txtfilename)
774 if len(helixbits)!=0:
775 outhier+=simo.add_component_density(compname,
777 num_components=num_components,
779 inputfile=txtfilename)
784 num_components=number_of_residues//abs(num_components)+1
785 outhier+=simo.add_component_density(compname,
787 num_components=num_components,
789 outputfile=txtfilename,
790 outputmap=mrcfilename,
791 multiply_by_total_mass=
True)
793 if len(helixbits)!=0:
794 num_components=number_of_residues//abs(num_components)+1
795 outhier+=simo.add_component_density(compname,
797 num_components=num_components,
799 outputfile=txtfilename,
800 outputmap=mrcfilename,
801 multiply_by_total_mass=
True)
803 return outhier,beadbits
805 def autobuild(self,simo,comname,pdbname,chain,resrange,include_res0=False,
806 beadsize=5,color=0.0,offset=0):
807 if (pdbname
is not None and pdbname !=
"IDEAL_HELIX"
808 and pdbname !=
"BEADS"):
810 outhier=simo.autobuild_model(comname,
814 resolutions=[0,1,10],
817 missingbeadsize=beadsize)
819 outhier=simo.autobuild_model(comname,
826 missingbeadsize=beadsize)
829 elif pdbname ==
"IDEAL_HELIX":
830 outhier=simo.add_component_ideal_helix(comname,
836 elif pdbname ==
"BEADS":
837 outhier=simo.add_component_necklace(comname,resrange[0],resrange[1],beadsize,color=color)
841 seq_len=len(simo.sequence_dict[comname])
842 outhier=simo.add_component_necklace(comname,
855 @param representation The PMI representation
857 self.simo=representation
858 self.gmm_models_directory=
"."
860 self.rmf_frame_number={}
861 self.rmf_names_map={}
862 self.rmf_component_name={}
864 def set_gmm_models_directory(self,directory_name):
865 self.gmm_models_directory=directory_name
867 def build_model(self,data_structure,sequence_connectivity_scale=4.0,
868 sequence_connectivity_resolution=10,
869 rmf_file=
None,rmf_frame_number=0,rmf_file_map=
None,
870 skip_connectivity_these_domains=
None,
871 skip_gaussian_in_rmf=
False, skip_gaussian_in_representation=
False):
873 @param data_structure List of lists containing these entries:
874 comp_name, hier_name, color, fasta_file, fasta_id, pdb_name, chain_id,
875 res_range, read_em_files, bead_size, rb, super_rb,
876 em_num_components, em_txt_file_name, em_mrc_file_name, chain_of_super_rb,
877 keep_gaussian_flexible_beads (optional)
878 @param sequence_connectivity_scale
880 @param rmf_frame_number
881 @param rmf_file_map : a dictionary that map key=component_name:value=(rmf_file_name,
887 super_rigid_bodies={}
888 chain_super_rigid_bodies={}
891 for d
in data_structure:
899 res_range = d[7][0:2]
908 em_num_components = d[12]
909 em_txt_file_name = d[13]
910 em_mrc_file_name = d[14]
911 chain_of_super_rb = d[15]
913 keep_gaussian_flexible_beads = d[16]
915 keep_gaussian_flexible_beads =
True
917 if comp_name
not in self.simo.get_component_names():
918 self.simo.create_component(comp_name,color=0.0)
919 self.simo.add_component_sequence(comp_name,fasta_file,fasta_id)
920 outhier=self.autobuild(self.simo,comp_name,pdb_name,chain_id,res_range,read=read_em_files,beadsize=bead_size,color=color,offset=offset)
923 if not read_em_files
is None:
924 if em_txt_file_name ==
" ": em_txt_file_name=self.gmm_models_directory+
"/"+hier_name+
".txt"
925 if em_mrc_file_name ==
" ": em_mrc_file_name=self.gmm_models_directory+
"/"+hier_name+
".mrc"
928 dens_hier,beads=self.create_density(self.simo,comp_name,outhier,em_txt_file_name,em_mrc_file_name,em_num_components,read_em_files)
930 if (keep_gaussian_flexible_beads):
931 self.simo.add_all_atom_densities(comp_name, particles=beads)
936 self.resdensities[hier_name]=dens_hier
937 self.domain_dict[hier_name]=outhier+dens_hier
940 if rb
not in rigid_bodies:
941 rigid_bodies[rb]=[h
for h
in self.domain_dict[hier_name]]
943 rigid_bodies[rb]+=[h
for h
in self.domain_dict[hier_name]]
946 if super_rb
is not None:
948 if k
not in super_rigid_bodies:
949 super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
951 super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
953 if chain_of_super_rb
is not None:
954 for k
in chain_of_super_rb:
955 if k
not in chain_super_rigid_bodies:
956 chain_super_rigid_bodies[k]=[h
for h
in self.domain_dict[hier_name]]
958 chain_super_rigid_bodies[k]+=[h
for h
in self.domain_dict[hier_name]]
962 self.rigid_bodies=rigid_bodies
964 for c
in self.simo.get_component_names():
965 if rmf_file
is not None:
968 self.simo.set_coordinates_from_rmf(c, rf,rfn,
969 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
971 for k
in rmf_file_map:
973 rf=rmf_file_map[k][0]
974 rfn=rmf_file_map[k][1]
975 rcname=rmf_file_map[k][2]
976 self.simo.set_coordinates_from_rmf(cname, rf,rfn,rcname,
977 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
979 if c
in self.rmf_file:
981 rfn=self.rmf_frame_number[c]
982 rfm=self.rmf_names_map[c]
983 rcname=self.rmf_component_name[c]
984 self.simo.set_coordinates_from_rmf(c, rf,rfn,representation_name_to_rmf_name_map=rfm,
985 rmf_component_name=rcname,
986 skip_gaussian_in_rmf=skip_gaussian_in_rmf, skip_gaussian_in_representation=skip_gaussian_in_representation)
987 if (
not skip_connectivity_these_domains)
or (c
not in skip_connectivity_these_domains):
988 self.simo.setup_component_sequence_connectivity(c,
989 resolution=sequence_connectivity_resolution,
990 scale=sequence_connectivity_scale)
991 self.simo.setup_component_geometry(c)
993 for rb
in rigid_bodies:
994 self.simo.set_rigid_body_from_hierarchies(rigid_bodies[rb])
996 for k
in super_rigid_bodies:
997 self.simo.set_super_rigid_body_from_hierarchies(super_rigid_bodies[k])
999 for k
in chain_super_rigid_bodies:
1000 self.simo.set_chain_of_super_rigid_bodies(chain_super_rigid_bodies[k],2,3)
1002 self.simo.set_floppy_bodies()
1003 self.simo.setup_bonds()
1005 def set_main_chain_mover(self,hier_name,lengths=[5,10,20,30]):
1006 hiers=self.domain_dict[hier_name]
1007 for length
in lengths:
1008 for n
in range(len(hiers)-length):
1009 hs=hiers[n+1:n+length-1]
1010 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n+length].get_particle()),min_size=3)
1011 for n
in range(1,len(hiers)-1,5):
1013 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1015 self.simo.set_super_rigid_body_from_hierarchies(hs, axis=(hiers[n].get_particle(),hiers[n-1].get_particle()),min_size=3)
1018 def set_rmf_file(self,component_name,rmf_file,rmf_frame_number,rmf_names_map=None,rmf_component_name=None):
1019 self.rmf_file[component_name]=rmf_file
1020 self.rmf_frame_number[component_name]=rmf_frame_number
1021 self.rmf_names_map[component_name]=rmf_names_map
1022 self.rmf_component_name[component_name]=rmf_component_name
1024 def get_density_hierarchies(self,hier_name_list):
1028 for hn
in hier_name_list:
1030 dens_hier_list+=self.resdensities[hn]
1031 return dens_hier_list
1033 def get_pdb_bead_bits(self,hierarchy):
1038 if "_pdb" in h.get_name():pdbbits.append(h)
1039 if "_bead" in h.get_name():beadbits.append(h)
1040 if "_helix" in h.get_name():helixbits.append(h)
1041 return (pdbbits,beadbits,helixbits)
1043 def scale_bead_radii(self,nresidues,scale):
1045 for h
in self.domain_dict:
1046 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(self.domain_dict[h])
1047 slope=(1.0-scale)/(1.0-float(nresidues))
1052 if b
not in scaled_beads:
1058 scale_factor=slope*float(num_residues)+1.0
1060 new_radius=scale_factor*radius
1063 print(
"particle with radius "+str(radius)+
" and "+str(num_residues)+
" residues scaled to a new radius "+str(new_radius))
1066 def create_density(self,simo,compname,comphier,txtfilename,mrcfilename,num_components,read=True):
1068 (pdbbits,beadbits,helixbits)=self.get_pdb_bead_bits(comphier)
1072 for pb
in pdbbits+helixbits:
1076 number_of_residues=len(set(res_ind))
1080 outhier+=simo.add_component_density(compname,
1082 num_components=num_components,
1084 inputfile=txtfilename)
1085 if len(helixbits)!=0:
1086 outhier+=simo.add_component_density(compname,
1088 num_components=num_components,
1090 inputfile=txtfilename)
1095 if num_components<0:
1098 num_components=number_of_residues/abs(num_components)
1099 outhier+=simo.add_component_density(compname,
1101 num_components=num_components,
1103 outputfile=txtfilename,
1104 outputmap=mrcfilename,
1105 multiply_by_total_mass=
True)
1107 if len(helixbits)!=0:
1108 if num_components<0:
1111 num_components=number_of_residues/abs(num_components)
1112 outhier+=simo.add_component_density(compname,
1114 num_components=num_components,
1116 outputfile=txtfilename,
1117 outputmap=mrcfilename,
1118 multiply_by_total_mass=
True)
1120 return outhier,beadbits
1122 def autobuild(self,simo,comname,pdbname,chain,resrange,read=True,beadsize=5,color=0.0,offset=0):
1124 if (pdbname
is not None and pdbname !=
"IDEAL_HELIX"
1125 and pdbname !=
"BEADS" and pdbname !=
"DENSITY"):
1126 if resrange[-1]==-1: resrange=(resrange[0],len(simo.sequence_dict[comname]))
1128 outhier=simo.autobuild_model(comname,
1132 resolutions=[0,1,10],
1135 missingbeadsize=beadsize)
1137 outhier=simo.autobuild_model(comname,
1144 missingbeadsize=beadsize)
1146 elif pdbname ==
"IDEAL_HELIX":
1147 outhier=simo.add_component_ideal_helix(comname,
1153 elif pdbname ==
"BEADS":
1156 seq_len=len(simo.sequence_dict[comname])
1157 outhier=simo.add_component_necklace(comname,resrange[0],seq_len,beadsize,color=color)
1159 elif pdbname ==
"DENSITY":
1164 seq_len=len(simo.sequence_dict[comname])
1165 outhier=simo.add_component_necklace(comname,
1172 def set_coordinates(self,hier_name,xyz_tuple):
1173 hier=self.domain_dict[hier_name]
1181 def save_rmf(self,rmfname):
1184 self.simo.model.update()
1185 o.init_rmf(rmfname,[self.simo.prot])
1186 o.write_rmf(rmfname)
1187 o.close_rmf(rmfname)
1195 missing_bead_size=20,
1196 residue_per_gaussian=
None):
1198 Construct a component for each subunit (no splitting, nothing fancy).
1199 You can pass the resolutions and the bead size for the missing residue regions.
1200 To use this macro, you must provide the following data structure:
1201 Component pdbfile chainid rgb color fastafile sequence id
1203 data = [("Rpb1", pdbfile, "A", 0.00000000, (fastafile, 0)),
1204 ("Rpb2", pdbfile, "B", 0.09090909, (fastafile, 1)),
1205 ("Rpb3", pdbfile, "C", 0.18181818, (fastafile, 2)),
1206 ("Rpb4", pdbfile, "D", 0.27272727, (fastafile, 3)),
1207 ("Rpb5", pdbfile, "E", 0.36363636, (fastafile, 4)),
1208 ("Rpb6", pdbfile, "F", 0.45454545, (fastafile, 5)),
1209 ("Rpb7", pdbfile, "G", 0.54545455, (fastafile, 6)),
1210 ("Rpb8", pdbfile, "H", 0.63636364, (fastafile, 7)),
1211 ("Rpb9", pdbfile, "I", 0.72727273, (fastafile, 8)),
1212 ("Rpb10", pdbfile, "L", 0.81818182, (fastafile, 9)),
1213 ("Rpb11", pdbfile, "J", 0.90909091, (fastafile, 10)),
1214 ("Rpb12", pdbfile, "K", 1.00000000, (fastafile, 11))]
1224 component_name = d[0]
1228 fasta_file = d[4][0]
1230 fastids = IMP.pmi1.tools.get_ids_from_fasta_file(fasta_file)
1231 fasta_file_id = d[4][1]
1233 r.create_component(component_name,
1236 r.add_component_sequence(component_name,
1238 id=fastids[fasta_file_id])
1240 hierarchies = r.autobuild_model(component_name,
1243 resolutions=resolutions,
1244 missingbeadsize=missing_bead_size)
1246 r.show_component_table(component_name)
1248 r.set_rigid_bodies([component_name])
1250 r.set_chain_of_super_rigid_bodies(
1255 r.setup_component_sequence_connectivity(component_name, resolution=1)
1256 r.setup_component_geometry(component_name)
1260 r.set_floppy_bodies()
1263 r.set_current_coordinates_as_reference_for_rmsd(
"Reference")
1271 """A macro for running all the basic operations of analysis.
1272 Includes clustering, precision analysis, and making ensemble density maps.
1273 A number of plots are also supported.
1276 merge_directories=[
"./"],
1277 stat_file_name_suffix=
"stat",
1278 best_pdb_name_suffix=
"model",
1279 do_clean_first=
True,
1280 do_create_directories=
True,
1281 global_output_directory=
"output/",
1282 replica_stat_file_suffix=
"stat_replica",
1283 global_analysis_result_directory=
"./analysis/",
1286 @param model The IMP model
1287 @param stat_file_name_suffix
1288 @param merge_directories The directories containing output files
1289 @param best_pdb_name_suffix
1290 @param do_clean_first
1291 @param do_create_directories
1292 @param global_output_directory Where everything is
1293 @param replica_stat_file_suffix
1294 @param global_analysis_result_directory
1295 @param test_mode If True, nothing is changed on disk
1299 from mpi4py
import MPI
1300 self.comm = MPI.COMM_WORLD
1301 self.rank = self.comm.Get_rank()
1302 self.number_of_processes = self.comm.size
1305 self.number_of_processes = 1
1307 self.test_mode = test_mode
1308 self._protocol_output = []
1309 self.cluster_obj =
None
1311 stat_dir = global_output_directory
1312 self.stat_files = []
1314 for rd
in merge_directories:
1315 stat_files = glob.glob(os.path.join(rd,stat_dir,
"stat.*.out"))
1316 if len(stat_files)==0:
1317 print(
"WARNING: no stat files found in",os.path.join(rd,stat_dir))
1318 self.stat_files += stat_files
1321 """Capture details of the modeling protocol.
1322 @param p an instance of IMP.pmi1.output.ProtocolOutput or a subclass.
1325 self._protocol_output.append((p, p._last_state))
1328 score_key=
"SimplifiedModel_Total_Score_None",
1329 rmf_file_key=
"rmf_file",
1330 rmf_file_frame_key=
"rmf_frame_index",
1333 nframes_trajectory=10000):
1334 """ Get a trajectory of the modeling run, for generating demonstrative movies
1335 @param score_key The score for ranking models
1336 @param rmf_file_key Key pointing to RMF filename
1337 @param rmf_file_frame_key Key pointing to RMF frame number
1338 @param outputdir The local output directory used in the run
1339 @param get_every Extract every nth frame
1340 @param nframes_trajectory Total number of frames of the trajectory
1342 from operator
import itemgetter
1350 rmf_file_list=trajectory_models[0]
1351 rmf_file_frame_list=trajectory_models[1]
1352 score_list=list(map(float, trajectory_models[2]))
1354 max_score=max(score_list)
1355 min_score=min(score_list)
1358 bins=[(max_score-min_score)*math.exp(-float(i))+min_score
for i
in range(nframes_trajectory)]
1359 binned_scores=[
None]*nframes_trajectory
1360 binned_model_indexes=[-1]*nframes_trajectory
1362 for model_index,s
in enumerate(score_list):
1363 bins_score_diffs=[abs(s-b)
for b
in bins]
1364 bin_index=min(enumerate(bins_score_diffs), key=itemgetter(1))[0]
1365 if binned_scores[bin_index]==
None:
1366 binned_scores[bin_index]=s
1367 binned_model_indexes[bin_index]=model_index
1369 old_diff=abs(binned_scores[bin_index]-bins[bin_index])
1370 new_diff=abs(s-bins[bin_index])
1371 if new_diff < old_diff:
1372 binned_scores[bin_index]=s
1373 binned_model_indexes[bin_index]=model_index
1375 print(binned_scores)
1376 print(binned_model_indexes)
1379 def _expand_ambiguity(self,prot,d):
1380 """If using PMI2, expand the dictionary to include copies as ambiguous options
1381 This also keeps the states separate.
1386 if '..' in key
or (type(val)
is tuple
and len(val)>=3):
1389 states = IMP.atom.get_by_type(prot,IMP.atom.STATE_TYPE)
1390 if type(val)
is tuple:
1398 for nst
in range(len(states)):
1400 copies = sel.get_selected_particles(with_representation=
False)
1402 for nc
in range(len(copies)):
1404 newdict[
'%s.%i..%i'%(name,nst,nc)] = (start,stop,name,nc,nst)
1406 newdict[
'%s..%i'%(name,nc)] = (start,stop,name,nc,nst)
1413 score_key=
"SimplifiedModel_Total_Score_None",
1414 rmf_file_key=
"rmf_file",
1415 rmf_file_frame_key=
"rmf_frame_index",
1417 prefiltervalue=
None,
1420 alignment_components=
None,
1421 number_of_best_scoring_models=10,
1422 rmsd_calculation_components=
None,
1423 distance_matrix_file=
'distances.mat',
1424 load_distance_matrix_file=
False,
1425 skip_clustering=
False,
1426 number_of_clusters=1,
1428 exit_after_display=
True,
1430 first_and_last_frames=
None,
1431 density_custom_ranges=
None,
1432 write_pdb_with_centered_coordinates=
False,
1434 """ Get the best scoring models, compute a distance matrix, cluster them, and create density maps.
1435 Tuple format: "molname" just the molecule, or (start,stop,molname,copy_num(optional),state_num(optional)
1436 Can pass None for copy or state to ignore that field.
1437 If you don't pass a specific copy number
1438 @param score_key The score for ranking models. Use "Total_Score"
1439 instead of default for PMI2.
1440 @param rmf_file_key Key pointing to RMF filename
1441 @param rmf_file_frame_key Key pointing to RMF frame number
1442 @param state_number State number to analyze
1443 @param prefiltervalue Only include frames where the
1444 score key is below this value
1445 @param feature_keys Keywords for which you want to
1446 calculate average, medians, etc.
1447 If you pass "Keyname" it'll include everything that matches "*Keyname*"
1448 @param outputdir The local output directory used in the run
1449 @param alignment_components Dictionary with keys=groupname, values are tuples
1450 for aligning the structures e.g. {"Rpb1": (20,100,"Rpb1"),"Rpb2":"Rpb2"}
1451 @param number_of_best_scoring_models Num models to keep per run
1452 @param rmsd_calculation_components For calculating RMSD
1453 (same format as alignment_components)
1454 @param distance_matrix_file Where to store/read the distance matrix
1455 @param load_distance_matrix_file Try to load the distance matrix file
1456 @param skip_clustering Just extract the best scoring models
1458 @param number_of_clusters Number of k-means clusters
1459 @param display_plot Display the distance matrix
1460 @param exit_after_display Exit after displaying distance matrix
1461 @param get_every Extract every nth frame
1462 @param first_and_last_frames A tuple with the first and last frames to be
1463 analyzed. Values are percentages!
1464 Default: get all frames
1465 @param density_custom_ranges For density calculation
1466 (same format as alignment_components)
1467 @param write_pdb_with_centered_coordinates
1468 @param voxel_size Used for the density output
1472 self._outputdir = os.path.abspath(outputdir)
1473 self._number_of_clusters = number_of_clusters
1474 for p, state
in self._protocol_output:
1475 p.add_replica_exchange_analysis(state, self, density_custom_ranges)
1486 if not load_distance_matrix_file:
1487 if len(self.stat_files)==0: print(
"ERROR: no stat file found in the given path");
return
1488 my_stat_files = IMP.pmi1.tools.chunk_list_into_segments(
1489 self.stat_files,self.number_of_processes)[self.rank]
1493 orig_score_key = score_key
1494 if score_key
not in po.get_keys():
1495 if 'Total_Score' in po.get_keys():
1496 score_key =
'Total_Score'
1497 print(
"WARNING: Using 'Total_Score' instead of "
1498 "'SimplifiedModel_Total_Score_None' for the score key")
1499 for k
in [orig_score_key,score_key,rmf_file_key,rmf_file_frame_key]:
1500 if k
in feature_keys:
1501 print(
"WARNING: no need to pass " +k+
" to feature_keys.")
1502 feature_keys.remove(k)
1510 get_every, provenance=prov)
1511 rmf_file_list=best_models[0]
1512 rmf_file_frame_list=best_models[1]
1513 score_list=best_models[2]
1514 feature_keyword_list_dict=best_models[3]
1520 if self.number_of_processes > 1:
1524 rmf_file_frame_list)
1525 for k
in feature_keyword_list_dict:
1527 feature_keyword_list_dict[k])
1530 score_rmf_tuples = list(zip(score_list,
1532 rmf_file_frame_list,
1533 list(range(len(score_list)))))
1535 if density_custom_ranges:
1536 for k
in density_custom_ranges:
1537 if type(density_custom_ranges[k])
is not list:
1538 raise Exception(
"Density custom ranges: values must be lists of tuples")
1541 if first_and_last_frames
is not None:
1542 nframes = len(score_rmf_tuples)
1543 first_frame = int(first_and_last_frames[0] * nframes)
1544 last_frame = int(first_and_last_frames[1] * nframes)
1545 if last_frame > len(score_rmf_tuples):
1547 score_rmf_tuples = score_rmf_tuples[first_frame:last_frame]
1550 best_score_rmf_tuples = sorted(score_rmf_tuples,
1551 key=
lambda x: float(x[0]))[:number_of_best_scoring_models]
1552 best_score_rmf_tuples=[t+(n,)
for n,t
in enumerate(best_score_rmf_tuples)]
1554 prov.append(IMP.pmi1.io.FilterProvenance(
"Best scoring",
1555 0, number_of_best_scoring_models))
1557 best_score_feature_keyword_list_dict = defaultdict(list)
1558 for tpl
in best_score_rmf_tuples:
1560 for f
in feature_keyword_list_dict:
1561 best_score_feature_keyword_list_dict[f].append(
1562 feature_keyword_list_dict[f][index])
1563 my_best_score_rmf_tuples = IMP.pmi1.tools.chunk_list_into_segments(
1564 best_score_rmf_tuples,
1565 self.number_of_processes)[self.rank]
1568 prot_ahead = IMP.pmi1.analysis.get_hiers_from_rmf(self.model,
1570 my_best_score_rmf_tuples[0][1])[0]
1576 my_best_score_rmf_tuples[0],
1577 rmsd_calculation_components,
1578 state_number=state_number)
1580 my_best_score_rmf_tuples,
1581 alignment_components,
1582 rmsd_calculation_components,
1583 state_number=state_number)
1588 all_coordinates=got_coords[0]
1589 alignment_coordinates=got_coords[1]
1590 rmsd_coordinates=got_coords[2]
1591 rmf_file_name_index_dict=got_coords[3]
1592 all_rmf_file_names=got_coords[4]
1598 if density_custom_ranges:
1600 density_custom_ranges,
1603 dircluster=os.path.join(outputdir,
"all_models."+str(n))
1609 os.mkdir(dircluster)
1612 clusstat=open(os.path.join(dircluster,
"stat."+str(self.rank)+
".out"),
"w")
1613 for cnt,tpl
in enumerate(my_best_score_rmf_tuples):
1615 rmf_frame_number=tpl[2]
1618 for key
in best_score_feature_keyword_list_dict:
1619 tmp_dict[key]=best_score_feature_keyword_list_dict[key][index]
1622 prots,rs = IMP.pmi1.analysis.get_hiers_and_restraints_from_rmf(
1627 linking_successful=IMP.pmi1.analysis.link_hiers_and_restraints_to_rmf(
1633 if not linking_successful:
1639 prot = prots[state_number]
1644 coords_f1=alignment_coordinates[cnt]
1646 coords_f2=alignment_coordinates[cnt]
1649 transformation = Ali.align()[1]
1671 out_pdb_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".pdb")
1672 out_rmf_fn=os.path.join(dircluster,str(cnt)+
"."+str(self.rank)+
".rmf3")
1673 o.init_pdb(out_pdb_fn,prot)
1674 o.write_pdb(out_pdb_fn,
1675 translate_to_geometric_center=write_pdb_with_centered_coordinates)
1677 tmp_dict[
"local_pdb_file_name"]=os.path.basename(out_pdb_fn)
1678 tmp_dict[
"rmf_file_full_path"]=rmf_name
1679 tmp_dict[
"local_rmf_file_name"]=os.path.basename(out_rmf_fn)
1680 tmp_dict[
"local_rmf_frame_number"]=0
1682 clusstat.write(str(tmp_dict)+
"\n")
1684 o.init_rmf(out_rmf_fn, [prot],rs)
1686 o.write_rmf(out_rmf_fn)
1687 o.close_rmf(out_rmf_fn)
1689 if density_custom_ranges:
1690 DensModule.add_subunits_density(prot)
1692 if density_custom_ranges:
1693 DensModule.write_mrc(path=dircluster)
1700 if self.number_of_processes > 1:
1706 rmf_file_name_index_dict)
1708 alignment_coordinates)
1715 [best_score_feature_keyword_list_dict,
1716 rmf_file_name_index_dict],
1722 print(
"setup clustering class")
1725 for n, model_coordinate_dict
in enumerate(all_coordinates):
1726 template_coordinate_dict = {}
1728 if alignment_components
is not None and len(self.cluster_obj.all_coords) == 0:
1730 self.cluster_obj.set_template(alignment_coordinates[n])
1731 self.cluster_obj.fill(all_rmf_file_names[n], rmsd_coordinates[n])
1732 print(
"Global calculating the distance matrix")
1735 self.cluster_obj.dist_matrix()
1739 self.cluster_obj.do_cluster(number_of_clusters)
1742 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1743 if exit_after_display:
1745 self.cluster_obj.save_distance_matrix_file(file_name=distance_matrix_file)
1752 print(
"setup clustering class")
1754 self.cluster_obj.load_distance_matrix_file(file_name=distance_matrix_file)
1755 print(
"clustering with %s clusters" % str(number_of_clusters))
1756 self.cluster_obj.do_cluster(number_of_clusters)
1757 [best_score_feature_keyword_list_dict,
1758 rmf_file_name_index_dict] = self.load_objects(
".macro.pkl")
1761 self.cluster_obj.plot_matrix(figurename=os.path.join(outputdir,
'dist_matrix.pdf'))
1762 if exit_after_display:
1764 if self.number_of_processes > 1:
1772 print(self.cluster_obj.get_cluster_labels())
1773 for n, cl
in enumerate(self.cluster_obj.get_cluster_labels()):
1774 print(
"rank %s " % str(self.rank))
1775 print(
"cluster %s " % str(n))
1776 print(
"cluster label %s " % str(cl))
1777 print(self.cluster_obj.get_cluster_label_names(cl))
1778 cluster_size = len(self.cluster_obj.get_cluster_label_names(cl))
1779 cluster_prov = prov + \
1780 [IMP.pmi1.io.ClusterProvenance(cluster_size)]
1783 if density_custom_ranges:
1785 density_custom_ranges,
1788 dircluster = outputdir +
"/cluster." + str(n) +
"/"
1790 os.mkdir(dircluster)
1794 rmsd_dict = {
"AVERAGE_RMSD":
1795 str(self.cluster_obj.get_cluster_label_average_rmsd(cl))}
1796 clusstat = open(dircluster +
"stat.out",
"w")
1797 for k, structure_name
in enumerate(self.cluster_obj.get_cluster_label_names(cl)):
1800 tmp_dict.update(rmsd_dict)
1801 index = rmf_file_name_index_dict[structure_name]
1802 for key
in best_score_feature_keyword_list_dict:
1804 key] = best_score_feature_keyword_list_dict[
1810 rmf_name = structure_name.split(
"|")[0]
1811 rmf_frame_number = int(structure_name.split(
"|")[1])
1812 clusstat.write(str(tmp_dict) +
"\n")
1816 prots,rs = IMP.pmi1.analysis.get_hiers_and_restraints_from_rmf(
1821 linking_successful = IMP.pmi1.analysis.link_hiers_and_restraints_to_rmf(
1827 if not linking_successful:
1832 prot = prots[state_number]
1838 model_index = self.cluster_obj.get_model_index_from_name(
1840 transformation = self.cluster_obj.get_transformation_to_first_member(
1860 if density_custom_ranges:
1861 DensModule.add_subunits_density(prot)
1866 o.init_pdb(dircluster + str(k) +
".pdb", prot)
1867 o.write_pdb(dircluster + str(k) +
".pdb")
1869 o.init_rmf(dircluster + str(k) +
".rmf3", [prot],rs)
1870 o.write_rmf(dircluster + str(k) +
".rmf3")
1871 o.close_rmf(dircluster + str(k) +
".rmf3")
1876 if density_custom_ranges:
1877 DensModule.write_mrc(path=dircluster)
1880 if self.number_of_processes>1:
1883 def get_cluster_rmsd(self,cluster_num):
1884 if self.cluster_obj
is None:
1886 return self.cluster_obj.get_cluster_label_average_rmsd(cluster_num)
1888 def save_objects(self, objects, file_name):
1890 outf = open(file_name,
'wb')
1891 pickle.dump(objects, outf)
1894 def load_objects(self, file_name):
1896 inputf = open(file_name,
'rb')
1897 objects = pickle.load(inputf)
1904 This class contains analysis utilities to investigate ReplicaExchange results.
1917 Construction of the Class.
1918 @param model IMP.Model()
1919 @param stat_files list of string. Can be ascii stat files, rmf files names
1920 @param best_models Integer. Number of best scoring models, if None: all models will be read
1921 @param alignment boolean (Default=True). Align before computing the rmsd.
1925 self.best_models=best_models
1938 self.clusters.append(c)
1939 for n0
in range(len(self.stath0)):
1941 self.pairwise_rmsd={}
1942 self.pairwise_molecular_assignment={}
1943 self.alignment=alignment
1944 self.symmetric_molecules={}
1945 self.issymmetricsel={}
1947 self.molcopydict0=IMP.pmi1.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath0))
1948 self.molcopydict1=IMP.pmi1.tools.get_molecules_dictionary_by_copy(
IMP.atom.get_leaves(self.stath1))
1952 Setup the selection onto which the rmsd is computed
1953 @param kwargs use IMP.atom.Selection keywords
1961 Store names of symmetric molecules
1963 self.symmetric_molecules[molecule_name]=0
1968 Setup the selection onto which the alignment is computed
1969 @param kwargs use IMP.atom.Selection keywords
1977 def clean_clusters(self):
1978 for c
in self.clusters: del c
1982 def cluster(self, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
1984 Cluster the models based on RMSD.
1985 @param rmsd_cutoff Float the distance cutoff in Angstrom
1986 @param metric (Default=IMP.atom.get_rmsd) the metric that will be used to compute rmsds
1988 self.clean_clusters()
1989 not_clustered=set(range(len(self.stath1)))
1990 while len(not_clustered)>0:
1991 self.
aggregate(not_clustered, rmsd_cutoff, metric)
1997 Refine the clusters by merging the ones whose centers are close
1998 @param rmsd_cutoff cutoff distance in Angstorms
2001 clusters_copy=self.clusters
2002 for c0,c1
in itertools.combinations(self.clusters,2):
2003 if c0.center_index
is None:
2005 if c1.center_index
is None:
2007 d0=self.stath0[c0.center_index]
2008 d1=self.stath1[c1.center_index]
2009 rmsd, molecular_assignment = self.
rmsd()
2010 if rmsd <= rmsd_cutoff:
2011 if c1
in self.clusters:
2012 clusters_copy.remove(c1)
2014 self.clusters=clusters_copy
2021 def set_cluster_assignments(self, cluster_ids):
2022 if len(cluster_ids)!=len(self.stath0):
2023 raise ValueError(
'cluster ids has to be same length as number of frames')
2026 for i
in sorted(list(set(cluster_ids))):
2028 for i, (idx, d)
in enumerate(zip(cluster_ids, self.stath0)):
2029 self.clusters[idx].add_member(i,d)
2033 Return the model data from a cluster
2034 @param cluster IMP.pmi1.output.Cluster object
2043 Save the data for the whole models into a pickle file
2044 @param filename string
2046 self.stath0.save_data(filename)
2050 Set the data from an external IMP.pmi1.output.Data
2051 @param data IMP.pmi1.output.Data
2053 self.stath0.data=data
2054 self.stath1.data=data
2058 Load the data from an external pickled file
2059 @param filename string
2061 self.stath0.load_data(filename)
2062 self.stath1.load_data(filename)
2063 self.best_models=len(self.stath0)
2065 def add_cluster(self,rmf_name_list):
2067 print(
"creating cluster index "+str(len(self.clusters)))
2068 self.clusters.append(c)
2069 current_len=len(self.stath0)
2071 for rmf
in rmf_name_list:
2072 print(
"adding rmf "+rmf)
2073 self.stath0.add_stat_file(rmf)
2074 self.stath1.add_stat_file(rmf)
2076 for n0
in range(current_len,len(self.stath0)):
2083 Save the clusters into a pickle file
2084 @param filename string
2087 import cPickle
as pickle
2090 fl=open(filename,
'wb')
2091 pickle.dump(self.clusters,fl)
2095 Load the clusters from a pickle file
2096 @param filename string
2097 @param append bool (Default=False), if True. append the clusters to the ones currently present
2100 import cPickle
as pickle
2103 fl=open(filename,
'rb')
2104 self.clean_clusters()
2106 self.clusters+=pickle.load(fl)
2108 self.clusters=pickle.load(fl)
2117 Compute the cluster center for a given cluster
2119 member_distance=defaultdict(float)
2121 for n0,n1
in itertools.combinations(cluster.members,2):
2124 rmsd, _ = self.
rmsd()
2125 member_distance[n0]+=rmsd
2127 if len(member_distance)>0:
2128 cluster.center_index=min(member_distance, key=member_distance.get)
2130 cluster.center_index=cluster.members[0]
2134 Save the coordinates of the current cluster a single rmf file
2136 print(
"saving coordinates",cluster)
2139 if rmf_name
is None:
2140 rmf_name=prefix+
'/'+str(cluster.cluster_id)+
".rmf3"
2142 d1=self.stath1[cluster.members[0]]
2144 o.init_rmf(rmf_name, [self.stath1])
2145 for n1
in cluster.members:
2149 if self.alignment: self.align()
2150 o.write_rmf(rmf_name)
2152 o.close_rmf(rmf_name)
2156 remove structures that are similar
2157 append it to a new cluster
2159 print(
"pruning models")
2162 remaining=range(1,len(self.stath1),10)
2164 while len(remaining)>0:
2165 d0=self.stath0[selected]
2167 for n1
in remaining:
2169 if self.alignment: self.align()
2173 print(
"pruning model %s, similar to model %s, rmsd %s"%(str(n1),str(selected),str(d)))
2174 remaining=[x
for x
in remaining
if x
not in rm]
2175 if len(remaining)==0:
break
2176 selected=remaining[0]
2177 filtered.append(selected)
2180 self.clusters.append(c)
2190 Compute the precision of a cluster
2196 if not cluster.center_index
is None:
2197 members1=[cluster.center_index]
2199 members1=cluster.members
2203 for n1
in cluster.members:
2208 tmp_rmsd, _ = self.
rmsd()
2213 precision=rmsd/npairs
2214 cluster.precision=precision
2219 Compute the bipartite precision (ie the cross-precision)
2220 between two clusters
2224 for cn0,n0
in enumerate(cluster1.members):
2226 for cn1,n1
in enumerate(cluster2.members):
2228 tmp_rmsd, _ =self.
rmsd()
2229 if verbose: print(
"--- rmsd between structure %s and structure %s is %s"%(str(cn0),str(cn1),str(tmp_rmsd)))
2232 precision=rmsd/npairs
2235 def rmsf(self,cluster,molecule,copy_index=0,state_index=0,cluster_ref=None,step=1):
2237 Compute the Root mean square fluctuations
2238 of a molecule in a cluster
2239 Returns an IMP.pmi1.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf
2241 rmsf=IMP.pmi1.tools.OrderedDict()
2244 if cluster_ref
is not None:
2245 if not cluster_ref.center_index
is None:
2246 members0 = [cluster_ref.center_index]
2248 members0 = cluster_ref.members
2250 if not cluster.center_index
is None:
2251 members0 = [cluster.center_index]
2253 members0 = cluster.members
2256 copy_index=copy_index,state_index=state_index)
2257 ps0=s0.get_selected_particles()
2271 for n1
in cluster.members[::step]:
2273 print(
"--- rmsf %s %s"%(str(n0),str(n1)))
2276 s1=
IMP.atom.Selection(self.stath1,molecule=molecule,residue_indexes=residue_indexes,resolution=1,
2277 copy_index=copy_index,state_index=state_index)
2278 ps1 = s1.get_selected_particles()
2281 if self.alignment: self.align()
2282 for n,(p0,p1)
in enumerate(zip(ps0,ps1)):
2283 r=residue_indexes[n]
2295 for stath
in [self.stath0,self.stath1]:
2296 if molecule
not in self.symmetric_molecules:
2298 copy_index=copy_index,state_index=state_index)
2301 state_index=state_index)
2303 ps = s.get_selected_particles()
2312 def save_densities(self,cluster,density_custom_ranges,voxel_size=5,reference="Absolute", prefix="./",step=1):
2317 for n1
in cluster.members[::step]:
2318 print(
"density "+str(n1))
2321 if self.alignment: self.align()
2322 dens.add_subunits_density(self.stath1)
2324 dens.write_mrc(path=prefix+
'/',suffix=str(cluster.cluster_id))
2327 def contact_map(self,cluster,contact_threshold=15,log_scale=False,consolidate=False,molecules=None,prefix='./',reference="Absolute"):
2330 import matplotlib.pyplot
as plt
2331 import matplotlib.cm
as cm
2332 from scipy.spatial.distance
import cdist
2334 if molecules
is None:
2338 unique_copies=[mol
for mol
in mols
if mol.get_copy_index() == 0]
2339 mol_names_unique=dict((mol.get_name(),mol)
for mol
in unique_copies)
2340 total_len_unique=sum(max(mol.get_residue_indexes())
for mol
in unique_copies)
2349 seqlen=max(mol.get_residue_indexes())
2350 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2354 for mol
in unique_copies:
2355 seqlen=max(mol.get_residue_indexes())
2356 index_dict[mol] = range(prev_stop, prev_stop + seqlen)
2360 for ncl,n1
in enumerate(cluster.members):
2364 coord_dict=IMP.pmi1.tools.OrderedDict()
2366 mol_name=mol.get_name()
2367 copy_index=mol.get_copy_index()
2368 rindexes = mol.get_residue_indexes()
2369 coords = np.ones((max(rindexes),3))
2370 for rnum
in rindexes:
2372 selpart = sel.get_selected_particles()
2373 if len(selpart) == 0:
continue
2374 selpart = selpart[0]
2375 coords[rnum - 1, :] =
IMP.core.XYZ(selpart).get_coordinates()
2376 coord_dict[mol]=coords
2379 coords=np.concatenate(list(coord_dict.values()))
2380 dists = cdist(coords, coords)
2381 binary_dists = np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2383 binary_dists_dict={}
2385 len1 = max(mol1.get_residue_indexes())
2387 name1=mol1.get_name()
2388 name2=mol2.get_name()
2389 dists = cdist(coord_dict[mol1], coord_dict[mol2])
2390 if (name1, name2)
not in binary_dists_dict:
2391 binary_dists_dict[(name1, name2)] = np.zeros((len1,len1))
2392 binary_dists_dict[(name1, name2)] += np.where((dists <= contact_threshold) & (dists >= 1.0), 1.0, 0.0)
2393 binary_dists=np.zeros((total_len_unique,total_len_unique))
2395 for name1,name2
in binary_dists_dict:
2396 r1=index_dict[mol_names_unique[name1]]
2397 r2=index_dict[mol_names_unique[name2]]
2398 binary_dists[min(r1):max(r1)+1,min(r2):max(r2)+1] = np.where((binary_dists_dict[(name1, name2)]>=1.0),1.0,0.0)
2403 contact_freqs = binary_dists
2405 dist_maps.append(dists)
2406 av_dist_map += dists
2407 contact_freqs += binary_dists
2412 contact_freqs =-np.log(1.0-1.0/(len(cluster)+1)*contact_freqs)
2414 contact_freqs =1.0/len(cluster)*contact_freqs
2415 av_dist_map=1.0/len(cluster)*contact_freqs
2417 fig = plt.figure(figsize=(100, 100))
2418 ax = fig.add_subplot(111)
2421 gap_between_components=50
2433 prot_list=list(zip(*sorted_tuple))[1]
2436 prot_list=list(zip(*sorted_tuple))[1]
2438 prot_listx = prot_list
2439 nresx = gap_between_components + \
2440 sum([max(mol.get_residue_indexes())
2441 + gap_between_components
for mol
in prot_listx])
2444 prot_listy = prot_list
2445 nresy = gap_between_components + \
2446 sum([max(mol.get_residue_indexes())
2447 + gap_between_components
for mol
in prot_listy])
2452 res = gap_between_components
2453 for mol
in prot_listx:
2454 resoffsetx[mol] = res
2455 res += max(mol.get_residue_indexes())
2457 res += gap_between_components
2461 res = gap_between_components
2462 for mol
in prot_listy:
2463 resoffsety[mol] = res
2464 res += max(mol.get_residue_indexes())
2466 res += gap_between_components
2468 resoffsetdiagonal = {}
2469 res = gap_between_components
2470 for mol
in IMP.pmi1.tools.OrderedSet(prot_listx + prot_listy):
2471 resoffsetdiagonal[mol] = res
2472 res += max(mol.get_residue_indexes())
2473 res += gap_between_components
2478 for n, prot
in enumerate(prot_listx):
2479 res = resoffsetx[prot]
2481 for proty
in prot_listy:
2482 resy = resoffsety[proty]
2483 endy = resendy[proty]
2484 ax.plot([res, res], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2485 ax.plot([end, end], [resy, endy], linestyle=
'-',color=
'gray', lw=0.4)
2486 xticks.append((float(res) + float(end)) / 2)
2491 for n, prot
in enumerate(prot_listy):
2492 res = resoffsety[prot]
2494 for protx
in prot_listx:
2495 resx = resoffsetx[protx]
2496 endx = resendx[protx]
2497 ax.plot([resx, endx], [res, res], linestyle=
'-',color=
'gray', lw=0.4)
2498 ax.plot([resx, endx], [end, end], linestyle=
'-',color=
'gray', lw=0.4)
2499 yticks.append((float(res) + float(end)) / 2)
2504 tmp_array = np.zeros((nresx, nresy))
2506 for px
in prot_listx:
2507 for py
in prot_listy:
2508 resx = resoffsetx[px]
2509 lengx = resendx[px] - 1
2510 resy = resoffsety[py]
2511 lengy = resendy[py] - 1
2512 indexes_x = index_dict[px]
2513 minx = min(indexes_x)
2514 maxx = max(indexes_x)
2515 indexes_y = index_dict[py]
2516 miny = min(indexes_y)
2517 maxy = max(indexes_y)
2518 tmp_array[resx:lengx,resy:lengy] = contact_freqs[minx:maxx,miny:maxy]
2519 ret[(px,py)]=np.argwhere(contact_freqs[minx:maxx,miny:maxy] == 1.0)+1
2521 cax = ax.imshow(tmp_array,
2526 interpolation=
'nearest')
2528 ax.set_xticks(xticks)
2529 ax.set_xticklabels(xlabels, rotation=90)
2530 ax.set_yticks(yticks)
2531 ax.set_yticklabels(ylabels)
2532 plt.setp(ax.get_xticklabels(), fontsize=6)
2533 plt.setp(ax.get_yticklabels(), fontsize=6)
2536 fig.set_size_inches(0.005 * nresx, 0.005 * nresy)
2537 [i.set_linewidth(2.0)
for i
in ax.spines.values()]
2542 plt.savefig(prefix+
"/contact_map."+str(cluster.cluster_id)+
".pdf", dpi=300,transparent=
"False")
2546 def plot_rmsd_matrix(self,filename):
2548 self.compute_all_pairwise_rmsd()
2549 distance_matrix = np.zeros(
2550 (len(self.stath0), len(self.stath1)))
2551 for (n0,n1)
in self.pairwise_rmsd:
2552 distance_matrix[n0, n1] = self.pairwise_rmsd[(n0,n1)]
2554 import matplotlib
as mpl
2556 import matplotlib.pylab
as pl
2557 from scipy.cluster
import hierarchy
as hrc
2559 fig = pl.figure(figsize=(10,8))
2560 ax = fig.add_subplot(212)
2561 dendrogram = hrc.dendrogram(
2562 hrc.linkage(distance_matrix),
2565 leaves_order = dendrogram[
'leaves']
2566 ax.set_xlabel(
'Model')
2567 ax.set_ylabel(
'RMSD [Angstroms]')
2569 ax2 = fig.add_subplot(221)
2571 distance_matrix[leaves_order,
2574 interpolation=
'nearest')
2575 cb = fig.colorbar(cax)
2576 cb.set_label(
'RMSD [Angstroms]')
2577 ax2.set_xlabel(
'Model')
2578 ax2.set_ylabel(
'Model')
2580 pl.savefig(filename, dpi=300)
2589 Update the cluster id numbers
2591 for n,c
in enumerate(self.clusters):
2594 def get_molecule(self, hier, name, copy):
2602 self.seldict0=IMP.pmi1.tools.get_selections_dictionary(self.sel0_rmsd.get_selected_particles())
2603 self.seldict1=IMP.pmi1.tools.get_selections_dictionary(self.sel1_rmsd.get_selected_particles())
2604 for mol
in self.seldict0:
2605 for sel
in self.seldict0[mol]:
2606 self.issymmetricsel[sel]=
False
2607 for mol
in self.symmetric_molecules:
2608 self.symmetric_molecules[mol]=len(self.seldict0[mol])
2609 for sel
in self.seldict0[mol]:
2610 self.issymmetricsel[sel]=
True
2617 for rb
in self.rbs1:
2620 for bead
in self.beads1:
2625 def aggregate(self, idxs, rmsd_cutoff=10, metric=IMP.atom.get_rmsd):
2627 initial filling of the clusters.
2630 print(
"clustering model "+str(n0))
2631 d0 = self.stath0[n0]
2633 print(
"creating cluster index "+str(len(self.clusters)))
2634 self.clusters.append(c)
2636 clustered = set([n0])
2638 print(
"--- trying to add model "+str(n1)+
" to cluster "+str(len(self.clusters)))
2639 d1 = self.stath1[n1]
2640 if self.alignment: self.align()
2641 rmsd, _ = self.
rmsd(metric=metric)
2642 if rmsd<rmsd_cutoff:
2643 print(
"--- model "+str(n1)+
" added, rmsd="+str(rmsd))
2647 print(
"--- model "+str(n1)+
" NOT added, rmsd="+str(rmsd))
2652 merge the clusters that have close members
2653 @param rmsd_cutoff cutoff distance in Angstorms
2659 for c0, c1
in filter(
lambda x: len(x[0].members)>1, itertools.combinations(self.clusters, 2)):
2660 n0, n1 = [c.members[0]
for c
in (c0,c1)]
2661 d0 = self.stath0[n0]
2662 d1 = self.stath1[n1]
2663 rmsd, _ = self.
rmsd()
2665 to_merge.append((c0,c1))
2667 for c0, c
in reversed(to_merge):
2671 self.clusters = [c
for c
in filter(
lambda x: len(x.members)>0, self.clusters)]
2676 returns true if c0 and c1 have members that are closer than rmsd_cutoff
2678 print(
"check close members for clusters "+str(c0.cluster_id)+
" and "+str(c1.cluster_id))
2679 for n0, n1
in itertools.product(c0.members[1:], c1.members):
2680 d0 = self.stath0[n0]
2681 d1 = self.stath1[n1]
2682 rmsd, _ = self.
rmsd(metric=metric)
2683 if rmsd<rmsd_cutoff:
2699 a function that returns the permutation best_sel of sels0 that minimizes metric
2701 best_rmsd2 = float(
'inf')
2703 if self.issymmetricsel[sels0[0]]:
2706 for offset
in range(N):
2707 order=[(offset+i)%N
for i
in range(N)]
2708 sels = [sels0[(offset+i)%N]
for i
in range(N)]
2711 r=metric(sel0, sel1)
2714 if rmsd2 < best_rmsd2:
2719 for sels
in itertools.permutations(sels0):
2721 for sel0, sel1
in itertools.takewhile(
lambda x: rmsd2<best_rmsd2, zip(sels, sels1)):
2722 r=metric(sel0, sel1)
2724 if rmsd2 < best_rmsd2:
2738 return best_sel, best_rmsd2
2740 def compute_all_pairwise_rmsd(self):
2741 for d0
in self.stath0:
2742 for d1
in self.stath1:
2743 rmsd, _ = self.
rmsd()
2745 def rmsd(self,metric=IMP.atom.get_rmsd):
2747 Computes the RMSD. Resolves ambiguous pairs assignments
2750 n0=self.stath0.current_index
2751 n1=self.stath1.current_index
2752 if ((n0,n1)
in self.pairwise_rmsd)
and ((n0,n1)
in self.pairwise_molecular_assignment):
2753 return self.pairwise_rmsd[(n0,n1)], self.pairwise_molecular_assignment[(n0,n1)]
2761 seldict_best_order={}
2762 molecular_assignment={}
2763 for molname, sels0
in self.seldict0.items():
2764 sels_best_order, best_rmsd2 = self.
rmsd_helper(sels0, self.seldict1[molname], metric)
2766 Ncoords = len(sels_best_order[0].get_selected_particles())
2767 Ncopies = len(self.seldict1[molname])
2768 total_rmsd += Ncoords*best_rmsd2
2769 total_N += Ncoords*Ncopies
2771 for sel0, sel1
in zip(sels_best_order, self.seldict1[molname]):
2772 p0 = sel0.get_selected_particles()[0]
2773 p1 = sel1.get_selected_particles()[0]
2779 molecular_assignment[(molname,c0)]=(molname,c1)
2781 total_rmsd = math.sqrt(total_rmsd/total_N)
2783 self.pairwise_rmsd[(n0,n1)]=total_rmsd
2784 self.pairwise_molecular_assignment[(n0,n1)]=molecular_assignment
2785 self.pairwise_rmsd[(n1,n0)]=total_rmsd
2786 self.pairwise_molecular_assignment[(n1,n0)]=molecular_assignment
2788 return total_rmsd, molecular_assignment
2792 Fix the reference structure for structural alignment, rmsd and chain assignment
2793 @param reference can be either "Absolute" (cluster center of the first cluster)
2794 or Relative (cluster center of the current cluster)
2796 if reference==
"Absolute":
2798 elif reference==
"Relative":
2799 if cluster.center_index:
2800 n0=cluster.center_index
2802 n0=cluster.members[0]
2807 compute the molecular assignments between multiple copies
2808 of the same sequence. It changes the Copy index of Molecules
2811 _, molecular_assignment = self.
rmsd()
2812 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2813 mol0 = self.molcopydict0[m0][c0]
2814 mol1 = self.molcopydict1[m1][c1]
2817 p1.set_value(cik0,c0)
2821 Undo the Copy index assignment
2824 _, molecular_assignment = self.
rmsd()
2826 for (m0, c0), (m1,c1)
in molecular_assignment.items():
2827 mol0 = self.molcopydict0[m0][c0]
2828 mol1 = self.molcopydict1[m1][c1]
2831 p1.set_value(cik0,c1)
2838 s=
"AnalysisReplicaExchange\n"
2839 s+=
"---- number of clusters %s \n"%str(len(self.clusters))
2840 s+=
"---- number of models %s \n"%str(len(self.stath0))
2843 def __getitem__(self,int_slice_adaptor):
2844 if type(int_slice_adaptor)
is int:
2845 return self.clusters[int_slice_adaptor]
2846 elif type(int_slice_adaptor)
is slice:
2847 return self.__iter__(int_slice_adaptor)
2849 raise TypeError(
"Unknown Type")
2852 return len(self.clusters)
2854 def __iter__(self,slice_key=None):
2855 if slice_key
is None:
2856 for i
in range(len(self)):
2859 for i
in range(len(self))[slice_key]:
class to link stat files to several rmf files
def add_provenance
Add provenance information in prov (a list of _TempProvenance objects) to each of the IMP hierarchies...
def clustering
Get the best scoring models, compute a distance matrix, cluster them, and create density maps...
def apply_molecular_assignments
compute the molecular assignments between multiple copies of the same sequence.
def set_alignment_selection
Setup the selection onto which the alignment is computed.
def load_data
Load the data from an external pickled file.
def have_close_members
returns true if c0 and c1 have members that are closer than rmsd_cutoff
A helper output for model evaluation.
A member of a rigid body, it has internal (local) coordinates.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Sample using replica exchange.
Sample using molecular dynamics.
def load_clusters
Load the clusters from a pickle file.
A container for models organized into clusters.
def get_modeling_trajectory
Get a trajectory of the modeling run, for generating demonstrative movies.
Class for easy writing of PDBs, RMFs, and stat files.
Performs alignment and RMSD calculation for two sets of coordinates.
static XYZR setup_particle(Model *m, ParticleIndex pi)
def aggregate
initial filling of the clusters.
def build_model
Create model.
A class to cluster structures.
def merge
merge two clusters
A macro for running all the basic operations of analysis.
def set_reference
Fix the reference structure for structural alignment, rmsd and chain assignment.
def __init__
Construction of the Class.
def update_clusters
Update the cluster id numbers.
Extends the functionality of IMP.atom.Molecule.
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
This class contains analysis utilities to investigate ReplicaExchange results.
def save_data
Save the data for the whole models into a pickle file.
Utility classes and functions for reading and storing PMI files.
A macro to help setup and run replica exchange.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
def compute_cluster_center
Compute the cluster center for a given cluster.
def merge_aggregates
merge the clusters that have close members
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def set_data
Set the data from an external IMP.pmi1.output.Data.
Sample using Monte Carlo.
def undo_apply_molecular_assignments
Undo the Copy index assignment.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Tools for clustering and cluster analysis.
def prune_redundant_structures
remove structures that are similar append it to a new cluster
Add uncertainty to a particle.
A decorator for keeping track of copies of a molecule.
def cluster
Cluster the models based on RMSD.
static bool get_is_setup(Model *m, ParticleIndex pi)
Set up the representation of all proteins and nucleic acid macromolecules.
The standard decorator for manipulating molecular structures.
def get_trajectory_models
Given a list of stat files, read them all and find a trajectory of models.
def deprecated_method
Python decorator to mark a method as deprecated.
def update_seldicts
Update the seldicts.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
def precision
Compute the precision of a cluster.
def read_coordinates_of_rmfs
Read in coordinates of a set of RMF tuples.
def get_cluster_data
Return the model data from a cluster.
A decorator for a particle with x,y,z coordinates.
def bipartite_precision
Compute the bipartite precision (ie the cross-precision) between two clusters.
def BuildModel0
Construct a component for each subunit (no splitting, nothing fancy).
def save_clusters
Save the clusters into a pickle file.
static Uncertainty setup_particle(Model *m, ParticleIndex pi, Float uncertainty)
def get_representation
Return the Representation object.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
def rmsf
Compute the Root mean square fluctuations of a molecule in a cluster Returns an IMP.pmi1.tools.OrderedDict() where the keys are the residue indexes and the value is the rmsf.
def deprecated_object
Python decorator to mark a class as deprecated.
A macro to build a Representation based on a Topology and lists of movers.
The general base class for IMP exceptions.
def get_best_models
Given a list of stat files, read them all and find the best models.
static SampleProvenance setup_particle(Model *m, ParticleIndex pi, std::string method, int frames, int iterations, int replicas)
def add_protocol_output
Capture details of the modeling protocol.
A class for reading stat files (either rmf or ascii v1 and v2)
Compute mean density maps from structures.
def rmsd
Computes the RMSD.
def rmsd_helper
a function that returns the permutation best_sel of sels0 that minimizes metric
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Classes for writing output files and processing them.
def refine
Refine the clusters by merging the ones whose centers are close.
void add_geometries(RMF::FileHandle file, const display::GeometriesTemp &r)
Add geometries to the file.
algebra::Transformation3D get_transformation_aligning_first_to_second(const Selection &s1, const Selection &s2)
Get the transformation to align two selections.
def set_rmsd_selection
Setup the selection onto which the rmsd is computed.
void add_provenance(Model *m, ParticleIndex pi, Provenance p)
Add provenance to part of the model.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Support for the RMF file format for storing hierarchical molecular data and markup.
def set_symmetric
Store names of symmetric molecules.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def save_coordinates
Save the coordinates of the current cluster a single rmf file.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A decorator for a particle with x,y,z coordinates and a radius.