3 """@namespace IMP.pmi.tools
4 Miscellaneous utilities.
7 from __future__
import print_function
12 def _get_restraint_set_key():
13 if not hasattr(_get_restraint_set_key,
'pmi_rs_key'):
14 _get_restraint_set_key.pmi_rs_key =
IMP.ModelKey(
"PMI restraints")
15 return _get_restraint_set_key.pmi_rs_key
17 def _add_restraint_set(model, mk):
19 model.add_data(mk, rs)
23 """Add a PMI restraint to the model.
24 Since Model.add_restraint() no longer exists (in modern IMP restraints
25 should be added to a ScoringFunction instead) store them instead in
26 a RestraintSet, and keep a reference to it in the Model."""
27 mk = _get_restraint_set_key()
28 if model.get_has_data(mk):
29 rs = IMP.RestraintSet.get_from(model.get_data(mk))
31 rs = _add_restraint_set(model, mk)
32 rs.add_restraint(restraint)
35 """Get a RestraintSet containing all PMI restraints added to the model"""
36 mk = _get_restraint_set_key()
37 if not model.get_has_data(mk):
38 print(
"WARNING: no restraints added to model yet")
39 _add_restraint_set(model, mk)
40 return IMP.RestraintSet.get_from(model.get_data(mk))
42 class Stopwatch(object):
44 def __init__(self, isdelta=True):
48 self.starttime = time.clock()
50 self.isdelta = isdelta
52 def set_label(self, labelstr):
58 newtime = time.clock()
62 "_delta_seconds"] = str(
65 self.starttime = newtime
67 output[
"Stopwatch_" + self.label +
68 "_elapsed_seconds"] = str(time.clock() - self.starttime)
72 class SetupNuisance(object):
74 def __init__(self, m, initialvalue, minvalue, maxvalue, isoptimized=True):
79 nuisance.set_lower(minvalue)
81 nuisance.set_upper(maxvalue)
84 nuisance.set_is_optimized(nuisance.get_nuisance_key(), isoptimized)
85 self.nuisance = nuisance
87 def get_particle(self):
91 class SetupWeight(object):
93 def __init__(self, m, isoptimized=True):
97 self.weight.set_weights_are_optimized(
True)
99 def get_particle(self):
103 class ParticleToSampleFilter(object):
104 def __init__(self, sampled_objects):
105 self.sampled_objects=sampled_objects
108 def add_filter(self,filter_string):
109 self.filters.append(filter_string)
111 def get_particles_to_sample(self):
112 particles_to_sample={}
113 for so
in self.sampled_objects:
114 ps_dict=so.get_particles_to_sample()
116 for f
in self.filters:
118 if key
not in particles_to_sample:
119 particles_to_sample[key]=ps_dict[key]
121 particles_to_sample[key]+=ps_dict[key]
122 return particles_to_sample
124 class ParticleToSampleList(object):
126 def __init__(self, label="None"):
128 self.dictionary_particle_type = {}
129 self.dictionary_particle_transformation = {}
130 self.dictionary_particle_name = {}
137 particle_transformation,
139 if not particle_type
in [
"Rigid_Bodies",
"Floppy_Bodies",
"Nuisances",
"X_coord",
"Weights"]:
140 raise TypeError(
"not the right particle type")
142 self.dictionary_particle_type[particle] = particle_type
143 if particle_type ==
"Rigid_Bodies":
144 if type(particle_transformation) == tuple
and len(particle_transformation) == 2
and type(particle_transformation[0]) == float
and type(particle_transformation[1]) == float:
145 self.dictionary_particle_transformation[
146 particle] = particle_transformation
147 self.dictionary_particle_name[particle] = name
149 raise TypeError(
"ParticleToSampleList: not the right transformation format for Rigid_Bodies, should be a tuple a floats")
151 if type(particle_transformation) == float:
152 self.dictionary_particle_transformation[
153 particle] = particle_transformation
154 self.dictionary_particle_name[particle] = name
156 raise TypeError(
"ParticleToSampleList: not the right transformation format sould be a float")
158 def get_particles_to_sample(self):
160 for particle
in self.dictionary_particle_type:
161 key = self.dictionary_particle_type[
162 particle] +
"ParticleToSampleList_" + self.dictionary_particle_name[particle] +
"_" + self.label
165 self.dictionary_particle_transformation[particle])
170 class Variance(object):
172 def __init__(self, model, tau, niter, prot, th_profile, write_data=False):
173 global sqrt, os, random
174 from math
import sqrt
179 self.write_data = write_data
184 self.particles = particles
186 self.refpos = [
IMP.core.XYZ(p).get_coordinates()
for p
in particles]
187 self.model_profile = th_profile
189 def perturb_particles(self, perturb=True):
190 for i, p
in enumerate(self.particles):
191 newpos = array(self.refpos[i])
193 newpos += random.normal(0, self.tau, 3)
197 def get_profile(self):
198 model_profile = self.model_profile
199 p = model_profile.calculate_profile(self.particles, IMP.saxs.CA_ATOMS)
200 return array([model_profile.get_intensity(i)
for i
in
201 range(model_profile.size())])
203 def init_variances(self):
205 N = self.model_profile.size()
206 a = self.profiles[0][:]
208 self.V = self.m * self.m.T
209 self.normm = linalg.norm(self.m)
210 self.normV = linalg.norm(self.V)
212 def update_variances(self):
213 a = matrix(self.profiles[-1])
214 n = float(len(self.profiles))
215 self.m = a.T / n + (n - 1) / n * self.m
216 self.V = a.T * a + self.V
217 self.oldnormm = self.normm
218 self.oldnormV = self.normV
219 self.normm = linalg.norm(self.m)
220 self.normV = linalg.norm(self.V)
221 self.diffm = (self.oldnormm - self.normm) / self.oldnormm
222 self.diffV = (self.oldnormV - self.normV) / self.oldnormV
224 def get_direct_stats(self, a):
229 for q, I
in enumerate(prof):
234 Sigma = (matrix(a - m))
235 Sigma = Sigma.T * Sigma / (nprof - 1)
236 mi = matrix(diag(1. / m))
237 Sigmarel = mi.T * Sigma * mi
238 return m, V, Sigma, Sigmarel
240 def store_data(self):
241 if not os.path.isdir(
'data'):
243 profiles = matrix(self.profiles)
244 self.directm, self.directV, self.Sigma, self.Sigmarel = \
245 self.get_direct_stats(array(profiles))
246 directV = self.directV
248 save(
'data/profiles', profiles)
250 fl = open(
'data/profiles.dat',
'w')
251 for i, l
in enumerate(array(profiles).T):
252 self.model_profile.get_q(i)
255 fl.write(
'%s ' % (k - self.directm[i]))
258 fl = open(
'data/profiles_rel.dat',
'w')
259 for i, l
in enumerate(array(profiles).T):
260 self.model_profile.get_q(i)
263 fl.write(
'%s ' % ((k - self.directm[i]) / self.directm[i]))
265 save(
'data/m', self.directm)
266 save(
'data/V', self.directV)
268 save(
'data/Sigma', Sigma)
270 fl = open(
'data/Sigma.dat',
'w')
271 model_profile = self.model_profile
272 for i
in range(model_profile.size()):
273 qi = model_profile.get_q(i)
274 for j
in range(model_profile.size()):
275 qj = model_profile.get_q(j)
276 vij = self.Sigma[i, j]
277 fl.write(
'%s %s %s\n' % (qi, qj, vij))
280 fl = open(
'data/eigenvals',
'w')
281 for i
in linalg.eigvalsh(Sigma):
283 Sigmarel = self.Sigmarel
284 save(
'data/Sigmarel', Sigmarel)
286 fl = open(
'data/Sigmarel.dat',
'w')
287 model_profile = self.model_profile
288 for i
in range(model_profile.size()):
289 qi = model_profile.get_q(i)
290 for j
in range(model_profile.size()):
291 qj = model_profile.get_q(j)
292 vij = self.Sigmarel[i, j]
293 fl.write(
'%s %s %s\n' % (qi, qj, vij))
296 fl = open(
'data/eigenvals_rel',
'w')
297 for i
in linalg.eigvalsh(Sigmarel):
300 fl = open(
'data/mean.dat',
'w')
301 for i
in range(len(self.directm)):
302 qi = self.model_profile.get_q(i)
304 fl.write(
'%s ' % self.directm[i])
305 fl.write(
'%s ' % sqrt(self.Sigma[i, i]))
308 def try_chol(self, jitter):
311 linalg.cholesky(Sigma + matrix(eye(len(Sigma))) * jitter)
312 except linalg.LinAlgError:
313 print(
"Decomposition failed with jitter =", jitter)
315 print(
"Successful decomposition with jitter =", jitter)
318 self.profiles = [self.get_profile()]
320 for n
in range(self.niter):
321 self.perturb_particles()
322 self.profiles.append(self.get_profile())
334 def get_cov(self, relative=True):
344 number_of_cross_links=10,
345 ambiguity_probability=0.1,
346 confidence_score_range=[0,100],
347 avoid_same_particles=
False):
348 '''Return a random cross-link dataset as a string.
349 Every line is a residue pair, together with UniqueIdentifier
352 residue_pairs=get_random_residue_pairs(representation, resolution, number_of_cross_links, avoid_same_particles)
354 from random
import random
356 cmin=float(min(confidence_score_range))
357 cmax=float(max(confidence_score_range))
361 for (name1, r1, name2, r2)
in residue_pairs:
362 if random() > ambiguity_probability:
364 score=random()*(cmax-cmin)+cmin
365 dataset+=str(name1)+
" "+str(name2)+
" "+str(r1)+
" "+str(r2)+
" "+str(score)+
" "+str(unique_identifier)+
"\n"
372 def get_cross_link_data(directory, filename, dist, omega, sigma,
373 don=
None, doff=
None, prior=0, type_of_profile=
"gofr"):
375 (distmin, distmax, ndist) = dist
376 (omegamin, omegamax, nomega) = omega
377 (sigmamin, sigmamax, nsigma) = sigma
384 dictionary = eval(line)
387 xpot = dictionary[directory][filename][
"distance"]
388 pot = dictionary[directory][filename][type_of_profile]
390 dist_grid =
get_grid(distmin, distmax, ndist,
False)
391 omega_grid = get_log_grid(omegamin, omegamax, nomega)
392 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
394 if not don
is None and not doff
is None:
416 def get_cross_link_data_from_length(length, xxx_todo_changeme3, xxx_todo_changeme4, xxx_todo_changeme5):
417 (distmin, distmax, ndist) = xxx_todo_changeme3
418 (omegamin, omegamax, nomega) = xxx_todo_changeme4
419 (sigmamin, sigmamax, nsigma) = xxx_todo_changeme5
422 dist_grid =
get_grid(distmin, distmax, ndist,
False)
423 omega_grid = get_log_grid(omegamin, omegamax, nomega)
424 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
430 def get_grid(gmin, gmax, ngrid, boundaries):
432 dx = (gmax - gmin) / float(ngrid)
433 for i
in range(0, ngrid + 1):
434 if(
not boundaries
and i == 0):
436 if(
not boundaries
and i == ngrid):
438 grid.append(gmin + float(i) * dx)
444 def get_log_grid(gmin, gmax, ngrid):
445 from math
import exp, log
447 for i
in range(0, ngrid + 1):
448 grid.append(gmin * exp(float(i) / ngrid * log(gmax / gmin)))
456 example '"{ID_Score}" > 28 AND "{Sample}" ==
457 "%10_1%" OR ":Sample}" == "%10_2%" OR ":Sample}"
458 == "%10_3%" OR ":Sample}" == "%8_1%" OR ":Sample}" == "%8_2%"'
461 import pyparsing
as pp
463 operator = pp.Regex(
">=|<=|!=|>|<|==|in").setName(
"operator")
464 value = pp.QuotedString(
466 r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?")
467 identifier = pp.Word(pp.alphas, pp.alphanums +
"_")
468 comparison_term = identifier | value
469 condition = pp.Group(comparison_term + operator + comparison_term)
471 expr = pp.operatorPrecedence(condition, [
472 (
"OR", 2, pp.opAssoc.LEFT, ),
473 (
"AND", 2, pp.opAssoc.LEFT, ),
476 parsedstring = str(expr.parseString(inputstring)) \
482 .replace(
"{",
"float(entry['") \
483 .replace(
"}",
"'])") \
484 .replace(
":",
"str(entry['") \
485 .replace(
"}",
"'])") \
486 .replace(
"AND",
"and") \
491 def open_file_or_inline_text(filename):
493 fl = open(filename,
"r")
495 fl = filename.split(
"\n")
502 for i
in range(0, len(prot0) - 1):
503 for j
in range(i + 1, len(prot0)):
506 drmsd += (dist0 - dist1) ** 2
508 return math.sqrt(drmsd / npairs)
513 def get_ids_from_fasta_file(fastafile):
515 with open(fastafile)
as ff:
524 this function works with plain hierarchies, as read from the pdb,
525 no multi-scale hierarchies
532 atom_type=IMP.atom.AT_CA)
540 print(
"get_closest_residue_position: exiting while loop without result")
542 p = sel.get_selected_particles()
547 print(
"get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (hier, resindex))
548 raise Exception(
"get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (
551 raise ValueError(
"got multiple residues for hierarchy %s and residue %i; the list of particles is %s" % (hier, resindex, str([pp.get_name()
for pp
in p])))
555 Get the xyz position of the terminal residue at the given resolution.
556 @param hier hierarchy containing the terminal residue
557 @param terminus either 'N' or 'C'
558 @param resolution resolution to use.
566 if max(residues) >= termresidue
and not termresidue
is None:
567 termresidue = max(residues)
569 elif termresidue
is None:
570 termresidue = max(residues)
572 elif terminus ==
"N":
573 if min(residues) <= termresidue
and not termresidue
is None:
574 termresidue = min(residues)
576 elif termresidue
is None:
577 termresidue = min(residues)
580 raise ValueError(
"terminus argument should be either N or C")
587 Return the residue index gaps and contiguous segments in the hierarchy.
589 @param hierarchy hierarchy to examine
590 @param start first residue index
591 @param end last residue index
593 @return A list of lists of the form
594 [[1,100,"cont"],[101,120,"gap"],[121,200,"cont"]]
597 for n, rindex
in enumerate(range(start, end + 1)):
599 atom_type=IMP.atom.AT_CA)
601 if len(sel.get_selected_particles()) == 0:
605 rindexcont = start - 1
606 if rindexgap == rindex - 1:
612 gaps.append([rindex, rindex,
"gap"])
618 rindexgap = start - 1
620 if rindexcont == rindex - 1:
627 gaps.append([rindex, rindex,
"cont"])
638 def set_map_element(self, xvalue, yvalue):
639 self.map[xvalue] = yvalue
641 def get_map_element(self, invalue):
642 if type(invalue) == float:
646 dist = (invalue - x) * (invalue - x)
655 return self.map[minx]
656 elif type(invalue) == str:
657 return self.map[invalue]
659 raise TypeError(
"wrong type for map")
665 selection_arguments=
None,
667 name_is_ambiguous=
False,
671 representation_type=
None):
673 this function uses representation=SimplifiedModel
674 it returns the corresponding selected particles
675 representation_type="Beads", "Res:X", "Densities", "Representation", "Molecule"
678 if resolution
is None:
680 resolution_particles =
None
681 hierarchies_particles =
None
682 names_particles =
None
683 residue_range_particles =
None
684 residue_particles =
None
685 representation_type_particles =
None
687 if not resolution
is None:
688 resolution_particles = []
689 hs = representation.get_hierarchies_at_given_resolution(resolution)
693 if not hierarchies
is None:
694 hierarchies_particles = []
695 for h
in hierarchies:
700 if name_is_ambiguous:
701 for namekey
in representation.hier_dict:
704 representation.hier_dict[namekey])
705 elif name
in representation.hier_dict:
708 print(
"select: component %s is not there" % name)
710 if not first_residue
is None and not last_residue
is None:
712 residue_indexes=range(first_residue, last_residue + 1))
714 for p
in sel.get_selected_particles()]
716 if not residue
is None:
719 for p
in sel.get_selected_particles()]
721 if not representation_type
is None:
722 representation_type_particles = []
723 if representation_type ==
"Molecule":
724 for name
in representation.hier_representation:
725 for repr_type
in representation.hier_representation[name]:
726 if repr_type ==
"Beads" or "Res:" in repr_type:
727 h = representation.hier_representation[name][repr_type]
730 elif representation_type ==
"PDB":
731 for name
in representation.hier_representation:
732 for repr_type
in representation.hier_representation[name]:
733 if repr_type ==
"Res:" in repr_type:
734 h = representation.hier_representation[name][repr_type]
738 for name
in representation.hier_representation:
739 h = representation.hier_representation[
744 selections = [hierarchies_particles, names_particles,
745 residue_range_particles, residue_particles, representation_type_particles]
747 if resolution
is None:
748 selected_particles = set(allparticles)
750 selected_particles = set(resolution_particles)
754 selected_particles = (set(s) & selected_particles)
756 return list(selected_particles)
763 name_is_ambiguous=
False):
764 if isinstance(tupleselection, tuple)
and len(tupleselection) == 3:
766 name=tupleselection[2],
767 first_residue=tupleselection[0],
768 last_residue=tupleselection[1],
769 name_is_ambiguous=name_is_ambiguous)
770 elif isinstance(tupleselection, str):
773 name_is_ambiguous=name_is_ambiguous)
775 raise ValueError(
'you passed something bad to select_by_tuple()')
777 particles = IMP.pmi.tools.sort_by_residues(particles)
782 def get_db_from_csv(csvfilename):
785 csvr = csv.DictReader(open(csvfilename,
"rU"))
792 """Store the representations for a system."""
797 self.root_hierarchy_dict = {}
798 self.preroot_fragment_hierarchy_dict = {}
799 self.particle_to_name = {}
802 def add_name(self, name):
803 if name
not in self.db:
806 def add_residue_number(self, name, resn):
809 if resn
not in self.db[name]:
810 self.db[name][resn] = {}
812 def add_resolution(self, name, resn, resolution):
814 resolution = float(resolution)
816 self.add_residue_number(name, resn)
817 if resolution
not in self.db[name][resn]:
818 self.db[name][resn][resolution] = []
822 resolution = float(resolution)
824 self.add_residue_number(name, resn)
825 self.add_resolution(name, resn, resolution)
826 self.db[name][resn][resolution] += particles
828 (rh, prf) = self.get_root_hierarchy(p)
829 self.root_hierarchy_dict[p] = rh
830 self.preroot_fragment_hierarchy_dict[p] = prf
831 self.particle_to_name[p] = name
832 if self.model
is None:
833 self.model = particles[0].get_model()
839 names = list(self.db.keys())
845 resolution = float(resolution)
846 return self.db[name][resn][resolution]
848 def get_particles_at_closest_resolution(self, name, resn, resolution):
850 resolution = float(resolution)
851 closestres = min(self.get_residue_resolutions(name, resn),
852 key=
lambda x: abs(float(x) - float(resolution)))
853 return self.get_particles(name, resn, closestres)
855 def get_residue_resolutions(self, name, resn):
857 resolutions = list(self.db[name][resn].keys())
861 def get_molecule_resolutions(self, name):
863 for resn
in self.db[name]:
864 resolutions.update(list(self.db[name][resn].keys()))
868 def get_residue_numbers(self, name):
869 residue_numbers = list(self.db[name].keys())
870 residue_numbers.sort()
871 return residue_numbers
873 def get_particles_by_resolution(self, name, resolution):
874 resolution = float(resolution)
876 for resn
in self.get_residue_numbers(name):
877 result = self.get_particles_at_closest_resolution(
881 pstemp = [p
for p
in result
if p
not in particles]
885 def get_all_particles_by_resolution(self, resolution):
886 resolution = float(resolution)
888 for name
in self.get_names():
889 particles += self.get_particles_by_resolution(name, resolution)
892 def get_root_hierarchy(self, particle):
893 prerootfragment = particle
903 prerootfragment = particle
909 def get_all_root_hierarchies_by_resolution(self, resolution):
911 resolution = float(resolution)
912 particles = self.get_all_particles_by_resolution(resolution)
914 rh = self.root_hierarchy_dict[p]
915 if rh
not in hierarchies:
919 def get_preroot_fragments_by_resolution(self, name, resolution):
921 resolution = float(resolution)
922 particles = self.get_particles_by_resolution(name, resolution)
924 fr = self.preroot_fragment_hierarchy_dict[p]
925 if fr
not in fragments:
929 def show(self, name):
931 for resn
in self.get_residue_numbers(name):
933 for resolution
in self.get_residue_resolutions(name, resn):
934 print(
"----", resolution)
935 for p
in self.get_particles(name, resn, resolution):
936 print(
"--------", p.get_name())
940 '''Return the component name provided a particle and a list of names'''
942 protname = root.get_name()
944 while not protname
in list_of_names:
945 root0 = root.get_parent()
948 protname = root0.get_name()
953 if "Beads" in protname:
956 return (protname, is_a_bead)
961 Retrieve the residue indexes for the given particle.
963 The particle must be an instance of Fragment,Residue or Atom
974 print(
"get_residue_indexes> input is not Fragment, Residue or Atom")
979 def sort_by_residues(particles):
982 sorted_particles_residues = sorted(
984 key=
lambda tup: tup[1])
985 particles = [p[0]
for p
in sorted_particles_residues]
989 def get_residue_to_particle_map(particles):
991 particles = sort_by_residues(particles)
994 return dict(zip(particles_residues, particles))
1002 """Synchronize data over a parallel run"""
1003 from mpi4py
import MPI
1004 comm = MPI.COMM_WORLD
1005 rank = comm.Get_rank()
1006 number_of_processes = comm.size
1009 comm.send(data, dest=0, tag=11)
1012 for i
in range(1, number_of_processes):
1013 data_tmp = comm.recv(source=i, tag=11)
1014 if type(data) == list:
1016 elif type(data) == dict:
1017 data.update(data_tmp)
1019 raise TypeError(
"data not supported, use list or dictionaries")
1021 for i
in range(1, number_of_processes):
1022 comm.send(data, dest=i, tag=11)
1025 data = comm.recv(source=0, tag=11)
1029 """Synchronize data over a parallel run"""
1030 from mpi4py
import MPI
1031 comm = MPI.COMM_WORLD
1032 rank = comm.Get_rank()
1033 number_of_processes = comm.size
1036 comm.send(data, dest=0, tag=11)
1038 for i
in range(1, number_of_processes):
1039 data_tmp = comm.recv(source=i, tag=11)
1041 data[k]+=data_tmp[k]
1043 for i
in range(1, number_of_processes):
1044 comm.send(data, dest=i, tag=11)
1047 data = comm.recv(source=0, tag=11)
1057 Yield all sublists of length >= lmin and <= lmax
1065 for j
in range(i + 1, n):
1066 if len(l[i:j]) <= lmax
and len(l[i:j]) >= lmin:
1070 def flatten_list(l):
1071 return [item
for sublist
in l
for item
in sublist]
1075 """ Yield successive length-sized chunks from a list.
1077 for i
in range(0, len(list), length):
1078 yield list[i:i + length]
1081 def chunk_list_into_segments(seq, num):
1083 avg = len(seq) / float(num)
1087 while last < len(seq):
1088 out.append(seq[int(last):int(last + avg)])
1100 Apply a translation to a hierarchy along the input vector.
1104 if type(translation_vector) == list:
1123 def translate_hierarchies(hierarchies, translation_vector):
1124 for h
in hierarchies:
1128 def translate_hierarchies_to_reference_frame(hierarchies):
1133 for h
in hierarchies:
1143 IMP.pmi.tools.translate_hierarchies(hierarchies, (-xc, -yc, -zc))
1150 def normal_density_function(expected_value, sigma, x):
1152 1 / math.sqrt(2 * math.pi) / sigma *
1153 math.exp(-(x - expected_value) ** 2 / 2 / sigma / sigma)
1157 def log_normal_density_function(expected_value, sigma, x):
1159 1 / math.sqrt(2 * math.pi) / sigma / x *
1160 math.exp(-(math.log(x / expected_value) ** 2 / 2 / sigma / sigma))
1164 def get_random_residue_pairs(representation, resolution,
1166 avoid_same_particles=
False,
1169 from random
import choice
1172 names=list(representation.hier_dict.keys())
1175 prot = representation.hier_dict[name]
1176 particles +=
select(representation,name=name,resolution=resolution)
1177 random_residue_pairs = []
1178 while len(random_residue_pairs)<=number:
1179 p1 = choice(particles)
1180 p2 = choice(particles)
1181 if p1==p2
and avoid_same_particles:
continue
1184 name1 = representation.get_prot_name_from_particle(p1)
1185 name2 = representation.get_prot_name_from_particle(p2)
1186 random_residue_pairs.append((name1, r1, name2, r2))
1188 return random_residue_pairs
1191 def get_random_data_point(
1197 begin_end_nbins_tuple,
1202 begin = begin_end_nbins_tuple[0]
1203 end = begin_end_nbins_tuple[1]
1204 nbins = begin_end_nbins_tuple[2]
1207 fmod_grid =
get_grid(begin, end, nbins,
True)
1209 fmod_grid = get_log_grid(begin, end, nbins)
1216 for i
in range(0, ntrials):
1217 a.append([random.random(),
True])
1220 for j
in range(1, len(fmod_grid)):
1222 fjm1 = fmod_grid[j - 1]
1226 pj = normal_density_function(expected_value, sigma, fj)
1227 pjm1 = normal_density_function(expected_value, sigma, fjm1)
1229 pj = log_normal_density_function(expected_value, sigma, fj)
1230 pjm1 = log_normal_density_function(expected_value, sigma, fjm1)
1232 norm += (pj + pjm1) / 2.0 * df
1238 for i
in range(len(cumul)):
1241 if (aa[0] <= cumul[i] / norm
and aa[1]):
1242 random_points.append(
1243 int(fmod_grid[i] / sensitivity) * sensitivity)
1247 random_points = [expected_value] * ntrials
1249 for i
in range(len(random_points)):
1250 if random.random() < outlierprob:
1251 a = random.uniform(begin, end)
1252 random_points[i] = int(a / sensitivity) * sensitivity
1253 print(random_points)
1255 for i in range(ntrials):
1256 if random.random() > OUTLIERPROB_:
1257 r=truncnorm.rvs(0.0,1.0,expected_value,BETA_)
1258 if r>1.0: print r,expected_value,BETA_
1261 random_points.append(int(r/sensitivity)*sensitivity)
1266 for r
in random_points:
1270 rmean /= float(ntrials)
1271 rmean2 /= float(ntrials)
1272 stddev = math.sqrt(max(rmean2 - rmean * rmean, 0.))
1273 return rmean, stddev
1275 is_already_printed = {}
1278 def print_deprecation_warning(old_name, new_name):
1279 if old_name
not in is_already_printed:
1280 print(
"WARNING: " + old_name +
" is deprecated, use " + new_name +
" instead")
1281 is_already_printed[old_name] =
True
1284 def print_multicolumn(list_of_strings, ncolumns=2, truncate=40):
1290 for i
in range(len(l) % cols):
1293 split = [l[i:i + len(l) / cols]
for i
in range(0, len(l), len(l) / cols)]
1294 for row
in zip(*split):
1295 print(
"".join(str.ljust(i, truncate)
for i
in row))
1299 '''Read dssp file and get secondary structure information.
1300 Values are all PDB residue numbering.
1301 @return dict of sel tuples
1302 helix : [ [ ['A',5,7] ] , [['B',15,17]] , ...] two helices A:5-7,B:15-17
1303 beta : [ [ ['A',1,3] , ['A',100,102] ] , ...] one sheet: A:1-3 & A:100-102
1304 loop : same format as helix, it's the contiguous loops
1307 from collections
import defaultdict
1310 sses = {
'helix': [],
1313 helix_classes =
'GHI'
1314 strand_classes =
'EB'
1315 loop_classes = [
' ',
'',
'T',
'S']
1317 for h
in helix_classes:
1318 sse_dict[h] =
'helix'
1319 for s
in strand_classes:
1320 sse_dict[s] =
'beta'
1321 for l
in loop_classes:
1322 sse_dict[l] =
'loop'
1327 beta_dict = defaultdict(list)
1331 for line
in open(dssp_fn,
'r'):
1332 fields = line.split()
1336 if fields[1] ==
"RESIDUE":
1344 elif limit_to_chains !=
'' and line[11]
not in limit_to_chains:
1349 pdb_res_num = int(line[5:10])
1350 chain =
'chain' + line[11]
1355 if prev_sstype
is None:
1356 cur_sse = [pdb_res_num, pdb_res_num, chain]
1357 elif sstype != prev_sstype
or chain_break:
1359 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1360 sses[sse_dict[prev_sstype]].append([cur_sse])
1361 if sse_dict[prev_sstype] ==
'beta':
1362 beta_dict[prev_beta_id].append(cur_sse)
1363 cur_sse = [pdb_res_num, pdb_res_num, chain]
1365 cur_sse[1] = pdb_res_num
1370 prev_sstype = sstype
1371 prev_beta_id = beta_id
1374 if not prev_sstype
is None:
1375 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1376 sses[sse_dict[prev_sstype]].append([cur_sse])
1377 if sse_dict[prev_sstype] ==
'beta':
1378 beta_dict[prev_beta_id].append(cur_sse)
1381 for beta_sheet
in beta_dict:
1382 sses[
'beta'].append(beta_dict[beta_sheet])
1388 ''' get chimera command to check if you've correctly made the dssp dictionary
1389 colors each helix and beta sheet'''
1391 'helix':
'color green ',
1392 'beta':
'color blue ',
1393 'loop':
'color red '}
1394 for skey
in dssp_dict.keys():
1395 for sgroup
in dssp_dict[skey]:
1397 start, stop, chain = sse
1398 chain = chain.strip(
'chain')
1400 skey] +=
'#%i:%s-%s.%s ' % (chimera_model_num, start, stop, chain)
1401 print(
'; '.join([cmds[k]
for k
in cmds]))
1407 '''Change color code to hexadecimal to rgb'''
1409 self._NUMERALS =
'0123456789abcdefABCDEF'
1410 self._HEXDEC = dict((v, int(v, 16))
for v
in (x+y
for x
in self._NUMERALS
for y
in self._NUMERALS))
1411 self.LOWERCASE, self.UPPERCASE =
'x',
'X'
1413 def rgb(self,triplet):
1414 return float(self._HEXDEC[triplet[0:2]]), float(self._HEXDEC[triplet[2:4]]), float(self._HEXDEC[triplet[4:6]])
1416 def triplet(self,rgb, lettercase=None):
1417 if lettercase
is None: lettercase=self.LOWERCASE
1418 return format(rgb[0]<<16 | rgb[1]<<8 | rgb[2],
'06'+lettercase)
1421 class OrderedSet(collections.MutableSet):
1423 def __init__(self, iterable=None):
1425 end += [
None, end, end]
1427 if iterable
is not None:
1431 return len(self.map)
1433 def __contains__(self, key):
1434 return key
in self.map
1437 if key
not in self.map:
1440 curr[2] = end[1] = self.map[key] = [key, curr, end]
1442 def discard(self, key):
1444 key, prev, next = self.map.pop(key)
1451 while curr
is not end:
1455 def __reversed__(self):
1458 while curr
is not end:
1462 def pop(self, last=True):
1464 raise KeyError(
'set is empty')
1466 key = self.end[1][0]
1468 key = self.end[2][0]
1474 return '%s()' % (self.__class__.__name__,)
1475 return '%s(%r)' % (self.__class__.__name__, list(self))
1477 def __eq__(self, other):
1478 if isinstance(other, OrderedSet):
1479 return len(self) == len(other)
and list(self) == list(other)
1480 return set(self) == set(other)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
A decorator to associate a particle with a part of a protein/DNA/RNA.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
void add_particles(RMF::FileHandle fh, const ParticlesTemp &hs)
static Weight setup_particle(Model *m, ParticleIndex pi)
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
ParticlesTemp get_particles(Model *m, const ParticleIndexes &ps)
void add_particle(RMF::FileHandle fh, Particle *hs)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Add resolution to a particle.
Object used to hold a set of restraints.
algebra::GridD< 3, algebra::DenseGridStorageD< 3, float >, float > get_grid(DensityMap *in_map)
static bool get_is_setup(Model *m, ParticleIndex pi)
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
A decorator for a particle representing an atom.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
Gather all the molecular particles of a certain level in the hierarchy.
A decorator for a particle with x,y,z coordinates.
static Scale setup_particle(Model *m, ParticleIndex pi)
static bool get_is_setup(const IMP::ParticleAdaptor &p)
A decorator for a residue.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
void show(Hierarchy h, std::ostream &out=std::cout)
Print out a molecular hierarchy.
The general base class for IMP exceptions.
Class to handle individual model particles.
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
double get_resolution(Model *m, ParticleIndex pi)
Estimate the resolution of the hierarchy as used by Representation.
A decorator for a rigid body.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...