3 """@namespace IMP.pmi.tools
4 Miscellaneous utilities.
12 class Stopwatch(object):
14 def __init__(self, isdelta=True):
18 self.starttime = time.clock()
20 self.isdelta = isdelta
22 def set_label(self, labelstr):
28 newtime = time.clock()
32 "_delta_seconds"] = str(
35 self.starttime = newtime
37 output[
"Stopwatch_" + self.label +
38 "_elapsed_seconds"] = str(time.clock() - self.starttime)
42 class SetupNuisance(object):
44 def __init__(self, m, initialvalue, minvalue, maxvalue, isoptimized=True):
49 nuisance.set_lower(minvalue)
51 nuisance.set_upper(maxvalue)
54 nuisance.set_is_optimized(nuisance.get_nuisance_key(), isoptimized)
55 self.nuisance = nuisance
57 def get_particle(self):
61 class SetupWeight(object):
63 def __init__(self, m, isoptimized=True):
67 self.weight.set_weights_are_optimized(
True)
69 def get_particle(self):
73 class ParticleToSampleFilter(object):
74 def __init__(self, sampled_objects):
75 self.sampled_objects=sampled_objects
78 def add_filter(self,filter_string):
79 self.filters.append(filter_string)
81 def get_particles_to_sample(self):
82 particles_to_sample={}
83 for so
in self.sampled_objects:
84 ps_dict=so.get_particles_to_sample()
86 for f
in self.filters:
88 if key
not in particles_to_sample:
89 particles_to_sample[key]=ps_dict[key]
91 particles_to_sample[key]+=ps_dict[key]
92 return particles_to_sample
94 class ParticleToSampleList(object):
96 def __init__(self, label="None"):
98 self.dictionary_particle_type = {}
99 self.dictionary_particle_transformation = {}
100 self.dictionary_particle_name = {}
107 particle_transformation,
109 if not particle_type
in [
"Rigid_Bodies",
"Floppy_Bodies",
"Nuisances",
"X_coord",
"Weights"]:
110 raise TypeError(
"not the right particle type")
112 self.dictionary_particle_type[particle] = particle_type
113 if particle_type ==
"Rigid_Bodies":
114 if type(particle_transformation) == tuple
and len(particle_transformation) == 2
and type(particle_transformation[0]) == float
and type(particle_transformation[1]) == float:
115 self.dictionary_particle_transformation[
116 particle] = particle_transformation
117 self.dictionary_particle_name[particle] = name
119 raise TypeError(
"ParticleToSampleList: not the right transformation format for Rigid_Bodies, should be a tuple a floats")
121 if type(particle_transformation) == float:
122 self.dictionary_particle_transformation[
123 particle] = particle_transformation
124 self.dictionary_particle_name[particle] = name
126 raise TypeError(
"ParticleToSampleList: not the right transformation format sould be a float")
128 def get_particles_to_sample(self):
130 for particle
in self.dictionary_particle_type:
131 key = self.dictionary_particle_type[
132 particle] +
"ParticleToSampleList_" + self.dictionary_particle_name[particle] +
"_" + self.label
135 self.dictionary_particle_transformation[particle])
140 class Variance(object):
142 def __init__(self, model, tau, niter, prot, th_profile, write_data=False):
143 global sqrt, os, random
144 from math
import sqrt
149 self.write_data = write_data
154 self.particles = particles
156 self.refpos = [
IMP.core.XYZ(p).get_coordinates()
for p
in particles]
157 self.model_profile = th_profile
159 def perturb_particles(self, perturb=True):
160 for i, p
in enumerate(self.particles):
161 newpos = array(self.refpos[i])
163 newpos += random.normal(0, self.tau, 3)
167 def get_profile(self):
168 model_profile = self.model_profile
169 p = model_profile.calculate_profile(self.particles, IMP.saxs.CA_ATOMS)
170 return array([model_profile.get_intensity(i)
for i
in
171 xrange(model_profile.size())])
173 def init_variances(self):
175 N = self.model_profile.size()
176 a = self.profiles[0][:]
178 self.V = self.m * self.m.T
179 self.normm = linalg.norm(self.m)
180 self.normV = linalg.norm(self.V)
182 def update_variances(self):
183 a = matrix(self.profiles[-1])
184 n = float(len(self.profiles))
185 self.m = a.T / n + (n - 1) / n * self.m
186 self.V = a.T * a + self.V
187 self.oldnormm = self.normm
188 self.oldnormV = self.normV
189 self.normm = linalg.norm(self.m)
190 self.normV = linalg.norm(self.V)
191 self.diffm = (self.oldnormm - self.normm) / self.oldnormm
192 self.diffV = (self.oldnormV - self.normV) / self.oldnormV
194 def get_direct_stats(self, a):
199 for q, I
in enumerate(prof):
204 Sigma = (matrix(a - m))
205 Sigma = Sigma.T * Sigma / (nprof - 1)
206 mi = matrix(diag(1. / m))
207 Sigmarel = mi.T * Sigma * mi
208 return m, V, Sigma, Sigmarel
210 def store_data(self):
211 if not os.path.isdir(
'data'):
213 profiles = matrix(self.profiles)
214 self.directm, self.directV, self.Sigma, self.Sigmarel = \
215 self.get_direct_stats(array(profiles))
216 directV = self.directV
218 save(
'data/profiles', profiles)
220 fl = open(
'data/profiles.dat',
'w')
221 for i, l
in enumerate(array(profiles).T):
222 self.model_profile.get_q(i)
225 fl.write(
'%s ' % (k - self.directm[i]))
228 fl = open(
'data/profiles_rel.dat',
'w')
229 for i, l
in enumerate(array(profiles).T):
230 self.model_profile.get_q(i)
233 fl.write(
'%s ' % ((k - self.directm[i]) / self.directm[i]))
235 save(
'data/m', self.directm)
236 save(
'data/V', self.directV)
238 save(
'data/Sigma', Sigma)
240 fl = open(
'data/Sigma.dat',
'w')
241 model_profile = self.model_profile
242 for i
in xrange(model_profile.size()):
243 qi = model_profile.get_q(i)
244 for j
in xrange(model_profile.size()):
245 qj = model_profile.get_q(j)
246 vij = self.Sigma[i, j]
247 fl.write(
'%s %s %s\n' % (qi, qj, vij))
250 fl = open(
'data/eigenvals',
'w')
251 for i
in linalg.eigvalsh(Sigma):
253 Sigmarel = self.Sigmarel
254 save(
'data/Sigmarel', Sigmarel)
256 fl = open(
'data/Sigmarel.dat',
'w')
257 model_profile = self.model_profile
258 for i
in xrange(model_profile.size()):
259 qi = model_profile.get_q(i)
260 for j
in xrange(model_profile.size()):
261 qj = model_profile.get_q(j)
262 vij = self.Sigmarel[i, j]
263 fl.write(
'%s %s %s\n' % (qi, qj, vij))
266 fl = open(
'data/eigenvals_rel',
'w')
267 for i
in linalg.eigvalsh(Sigmarel):
270 fl = open(
'data/mean.dat',
'w')
271 for i
in xrange(len(self.directm)):
272 qi = self.model_profile.get_q(i)
274 fl.write(
'%s ' % self.directm[i])
275 fl.write(
'%s ' % sqrt(self.Sigma[i, i]))
278 def try_chol(self, jitter):
281 linalg.cholesky(Sigma + matrix(eye(len(Sigma))) * jitter)
282 except linalg.LinAlgError:
283 print "Decomposition failed with jitter =", jitter
285 print "Successful decomposition with jitter =", jitter
288 self.profiles = [self.get_profile()]
290 for n
in xrange(self.niter):
291 self.perturb_particles()
292 self.profiles.append(self.get_profile())
304 def get_cov(self, relative=True):
314 number_of_cross_links=10,
315 ambiguity_probability=0.1,
316 confidence_score_range=[0,100],
317 avoid_same_particles=
False):
319 '''returns a random cross-link dataset into a string were every
320 line is a residue pair, together with UniqueIdentifier and XL score'''
322 residue_pairs=get_random_residue_pairs(representation, resolution, number_of_cross_links, avoid_same_particles)
324 from random
import random
326 cmin=float(min(confidence_score_range))
327 cmax=float(max(confidence_score_range))
331 for (name1, r1, name2, r2)
in residue_pairs:
332 if random() > ambiguity_probability:
334 score=random()*(cmax-cmin)+cmin
335 dataset+=str(name1)+
" "+str(name2)+
" "+str(r1)+
" "+str(r2)+
" "+str(score)+
" "+str(unique_identifier)+
"\n"
342 def get_cross_link_data(directory, filename, (distmin, distmax, ndist),
343 (omegamin, omegamax, nomega),
344 (sigmamin, sigmamax, nsigma),
345 don=
None, doff=
None, prior=0, type_of_profile=
"gofr"):
353 dictionary = eval(line)
356 xpot = dictionary[directory][filename][
"distance"]
357 pot = dictionary[directory][filename][type_of_profile]
359 dist_grid =
get_grid(distmin, distmax, ndist,
False)
360 omega_grid = get_log_grid(omegamin, omegamax, nomega)
361 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
363 if not don
is None and not doff
is None:
385 def get_cross_link_data_from_length(length, (distmin, distmax, ndist),
386 (omegamin, omegamax, nomega),
387 (sigmamin, sigmamax, nsigma)):
390 dist_grid =
get_grid(distmin, distmax, ndist,
False)
391 omega_grid = get_log_grid(omegamin, omegamax, nomega)
392 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
398 def get_grid(gmin, gmax, ngrid, boundaries):
400 dx = (gmax - gmin) / float(ngrid)
401 for i
in range(0, ngrid + 1):
402 if(
not boundaries
and i == 0):
404 if(
not boundaries
and i == ngrid):
406 grid.append(gmin + float(i) * dx)
412 def get_log_grid(gmin, gmax, ngrid):
413 from math
import exp, log
415 for i
in range(0, ngrid + 1):
416 grid.append(gmin * exp(float(i) / ngrid * log(gmax / gmin)))
424 example '"{ID_Score}" > 28 AND "{Sample}" ==
425 "%10_1%" OR ":Sample}" == "%10_2%" OR ":Sample}"
426 == "%10_3%" OR ":Sample}" == "%8_1%" OR ":Sample}" == "%8_2%"'
429 import pyparsing
as pp
431 operator = pp.Regex(
">=|<=|!=|>|<|==|in").setName(
"operator")
432 value = pp.QuotedString(
434 r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?")
435 identifier = pp.Word(pp.alphas, pp.alphanums +
"_")
436 comparison_term = identifier | value
437 condition = pp.Group(comparison_term + operator + comparison_term)
439 expr = pp.operatorPrecedence(condition, [
440 (
"OR", 2, pp.opAssoc.LEFT, ),
441 (
"AND", 2, pp.opAssoc.LEFT, ),
444 parsedstring = str(expr.parseString(inputstring)) \
450 .replace(
"{",
"float(entry['") \
451 .replace(
"}",
"'])") \
452 .replace(
":",
"str(entry['") \
453 .replace(
"}",
"'])") \
454 .replace(
"AND",
"and") \
459 def open_file_or_inline_text(filename):
461 fl = open(filename,
"r")
463 fl = filename.split(
"\n")
470 for i
in range(0, len(prot0) - 1):
471 for j
in range(i + 1, len(prot0)):
474 drmsd += (dist0 - dist1) ** 2
476 return math.sqrt(drmsd / npairs)
481 def get_ids_from_fasta_file(fastafile):
483 ff = open(fastafile,
"r")
492 this function works with plain hierarchies, as read from the pdb,
493 no multi-scale hierarchies
500 atom_type=IMP.atom.AT_CA)
508 print "get_closest_residue_position: exiting while loop without result"
510 p = sel.get_selected_particles()
515 print "get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (hier, resindex)
516 raise Exception,
"get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (
519 raise ValueError(
"got multiple residues for hierarchy %s and residue %i; the list of particles is %s" % (hier, resindex, str([pp.get_name()
for pp
in p])))
523 this function get the xyz position of the
524 C or N terminal residue of a hierarchy, given the resolution.
525 the argument of terminus can be either N or C
533 if max(residues) >= termresidue
and not termresidue
is None:
534 termresidue = max(residues)
536 elif termresidue
is None:
537 termresidue = max(residues)
539 elif terminus ==
"N":
540 if min(residues) <= termresidue
and not termresidue
is None:
541 termresidue = min(residues)
543 elif termresidue
is None:
544 termresidue = min(residues)
547 raise ValueError(
"terminus argument should be either N or C")
554 returns the residue index gaps and contiguous segments as tuples given the hierarchy, the first
555 residue and the last residue indexes. The list is organized as
556 [[1,100,"cont"],[101,120,"gap"],[121,200,"cont"]]
559 for n, rindex
in enumerate(range(start, end + 1)):
561 atom_type=IMP.atom.AT_CA)
563 if len(sel.get_selected_particles()) == 0:
567 rindexcont = start - 1
568 if rindexgap == rindex - 1:
574 gaps.append([rindex, rindex,
"gap"])
580 rindexgap = start - 1
582 if rindexcont == rindex - 1:
589 gaps.append([rindex, rindex,
"cont"])
600 def set_map_element(self, xvalue, yvalue):
601 self.map[xvalue] = yvalue
603 def get_map_element(self, invalue):
604 if type(invalue) == float:
608 dist = (invalue - x) * (invalue - x)
617 return self.map[minx]
618 elif type(invalue) == str:
619 return self.map[invalue]
621 raise TypeError(
"wrong type for map")
627 selection_arguments=
None,
629 name_is_ambiguous=
False,
633 representation_type=
None):
635 this function uses representation=SimplifiedModel
636 it returns the corresponding selected particles
637 representation_type="Beads", "Res:X", "Densities", "Representation", "Molecule"
640 if resolution
is None:
642 resolution_particles =
None
643 hierarchies_particles =
None
644 names_particles =
None
645 residue_range_particles =
None
646 residue_particles =
None
647 representation_type_particles =
None
649 if not resolution
is None:
650 resolution_particles = []
651 hs = representation.get_hierarchies_at_given_resolution(resolution)
655 if not hierarchies
is None:
656 hierarchies_particles = []
657 for h
in hierarchies:
662 if name_is_ambiguous:
663 for namekey
in representation.hier_dict:
666 representation.hier_dict[namekey])
667 elif name
in representation.hier_dict:
670 print "select: component %s is not there" % name
672 if not first_residue
is None and not last_residue
is None:
674 residue_indexes=range(first_residue, last_residue + 1))
676 for p
in sel.get_selected_particles()]
678 if not residue
is None:
681 for p
in sel.get_selected_particles()]
683 if not representation_type
is None:
684 representation_type_particles = []
685 if representation_type ==
"Molecule":
686 for name
in representation.hier_representation:
687 for repr_type
in representation.hier_representation[name]:
688 if repr_type ==
"Beads" or "Res:" in repr_type:
689 h = representation.hier_representation[name][repr_type]
692 elif representation_type ==
"PDB":
693 for name
in representation.hier_representation:
694 for repr_type
in representation.hier_representation[name]:
695 if repr_type ==
"Res:" in repr_type:
696 h = representation.hier_representation[name][repr_type]
700 for name
in representation.hier_representation:
701 h = representation.hier_representation[
706 selections = [hierarchies_particles, names_particles,
707 residue_range_particles, residue_particles, representation_type_particles]
709 if resolution
is None:
710 selected_particles = set(allparticles)
712 selected_particles = set(resolution_particles)
716 selected_particles = (set(s) & selected_particles)
718 return list(selected_particles)
725 name_is_ambiguous=
False):
726 if isinstance(tupleselection, tuple)
and len(tupleselection) == 3:
728 name=tupleselection[2],
729 first_residue=tupleselection[0],
730 last_residue=tupleselection[1],
731 name_is_ambiguous=name_is_ambiguous)
732 elif isinstance(tupleselection, str):
735 name_is_ambiguous=name_is_ambiguous)
737 raise ValueError(
'you passed something bad to select_by_tuple()')
739 particles = IMP.pmi.tools.sort_by_residues(particles)
744 def get_db_from_csv(csvfilename):
747 csvr = csv.DictReader(open(csvfilename,
"rU"))
753 class HierarchyDatabase(object):
758 self.root_hierarchy_dict = {}
759 self.preroot_fragment_hierarchy_dict = {}
760 self.particle_to_name = {}
763 def add_name(self, name):
764 if name
not in self.db:
767 def add_residue_number(self, name, resn):
770 if resn
not in self.db[name]:
771 self.db[name][resn] = {}
773 def add_resolution(self, name, resn, resolution):
775 resolution = float(resolution)
777 self.add_residue_number(name, resn)
778 if resolution
not in self.db[name][resn]:
779 self.db[name][resn][resolution] = []
783 resolution = float(resolution)
785 self.add_residue_number(name, resn)
786 self.add_resolution(name, resn, resolution)
787 self.db[name][resn][resolution] += particles
789 (rh, prf) = self.get_root_hierarchy(p)
790 self.root_hierarchy_dict[p] = rh
791 self.preroot_fragment_hierarchy_dict[p] = prf
792 self.particle_to_name[p] = name
793 if self.model
is None:
794 self.model = particles[0].get_model()
800 names = self.db.keys()
806 resolution = float(resolution)
807 return self.db[name][resn][resolution]
809 def get_particles_at_closest_resolution(self, name, resn, resolution):
811 resolution = float(resolution)
812 closestres = min(self.get_residue_resolutions(name, resn),
813 key=
lambda x: abs(float(x) - float(resolution)))
814 return self.get_particles(name, resn, closestres)
816 def get_residue_resolutions(self, name, resn):
818 resolutions = self.db[name][resn].keys()
822 def get_molecule_resolutions(self, name):
824 for resn
in self.db[name]:
825 resolutions.update(self.db[name][resn].keys())
829 def get_residue_numbers(self, name):
830 residue_numbers = self.db[name].keys()
831 residue_numbers.sort()
832 return residue_numbers
834 def get_particles_by_resolution(self, name, resolution):
835 resolution = float(resolution)
837 for resn
in self.get_residue_numbers(name):
838 result = self.get_particles_at_closest_resolution(
842 pstemp = [p
for p
in result
if p
not in particles]
846 def get_all_particles_by_resolution(self, resolution):
847 resolution = float(resolution)
849 for name
in self.get_names():
850 particles += self.get_particles_by_resolution(name, resolution)
853 def get_root_hierarchy(self, particle):
854 prerootfragment = particle
864 prerootfragment = particle
870 def get_all_root_hierarchies_by_resolution(self, resolution):
872 resolution = float(resolution)
873 particles = self.get_all_particles_by_resolution(resolution)
875 rh = self.root_hierarchy_dict[p]
876 if rh
not in hierarchies:
880 def get_preroot_fragments_by_resolution(self, name, resolution):
882 resolution = float(resolution)
883 particles = self.get_particles_by_resolution(name, resolution)
885 fr = self.preroot_fragment_hierarchy_dict[p]
886 if fr
not in fragments:
890 def show(self, name):
892 for resn
in self.get_residue_numbers(name):
894 for resolution
in self.get_residue_resolutions(name, resn):
895 print "----", resolution
896 for p
in self.get_particles(name, resn, resolution):
897 print "--------", p.get_name()
901 ''' this function returns the component name provided a particle and a list of names'''
903 protname = root.get_name()
905 while not protname
in list_of_names:
906 root0 = root.get_parent()
909 protname = root0.get_name()
914 if "Beads" in protname:
917 return (protname, is_a_bead)
922 This "overloaded" function retrieves the residue indexes
923 for each particle which is an instance of Fragment,Residue or Atom
934 print "get_residue_indexes> input is not Fragment, Residue or Atom"
939 def sort_by_residues(particles):
942 sorted_particles_residues = sorted(
944 key=
lambda tup: tup[1])
945 particles = [p[0]
for p
in sorted_particles_residues]
949 def get_residue_to_particle_map(particles):
951 particles = sort_by_residues(particles)
954 return dict(zip(particles_residues, particles))
962 """Synchronize data over a parallel run"""
963 from mpi4py
import MPI
964 comm = MPI.COMM_WORLD
965 rank = comm.Get_rank()
966 number_of_processes = comm.size
969 comm.send(data, dest=0, tag=11)
972 for i
in range(1, number_of_processes):
973 data_tmp = comm.recv(source=i, tag=11)
974 if type(data) == list:
976 elif type(data) == dict:
977 data.update(data_tmp)
979 raise TypeError(
"data not supported, use list or dictionaries")
981 for i
in range(1, number_of_processes):
982 comm.send(data, dest=i, tag=11)
985 data = comm.recv(source=0, tag=11)
989 """Synchronize data over a parallel run"""
990 from mpi4py
import MPI
991 comm = MPI.COMM_WORLD
992 rank = comm.Get_rank()
993 number_of_processes = comm.size
996 comm.send(data, dest=0, tag=11)
998 for i
in range(1, number_of_processes):
999 data_tmp = comm.recv(source=i, tag=11)
1001 data[k]+=data_tmp[k]
1003 for i
in range(1, number_of_processes):
1004 comm.send(data, dest=i, tag=11)
1007 data = comm.recv(source=0, tag=11)
1017 this iterator yields all sublists
1018 of length >= lmin and <= lmax
1026 for j
in xrange(i + 1, n):
1027 if len(l[i:j]) <= lmax
and len(l[i:j]) >= lmin:
1031 def flatten_list(l):
1032 return [item
for sublist
in l
for item
in sublist]
1036 """ Yield successive length-sized chunks from a list.
1038 for i
in xrange(0, len(list), length):
1039 yield list[i:i + length]
1042 def chunk_list_into_segments(seq, num):
1043 avg = len(seq) / float(num)
1047 while last < len(seq):
1048 out.append(seq[int(last):int(last + avg)])
1060 this will apply a translation to a hierarchy along the input vector
1064 if type(translation_vector) == list:
1083 def translate_hierarchies(hierarchies, translation_vector):
1084 for h
in hierarchies:
1088 def translate_hierarchies_to_reference_frame(hierarchies):
1093 for h
in hierarchies:
1103 IMP.pmi.tools.translate_hierarchies(hierarchies, (-xc, -yc, -zc))
1110 def normal_density_function(expected_value, sigma, x):
1112 1 / math.sqrt(2 * math.pi) / sigma *
1113 math.exp(-(x - expected_value) ** 2 / 2 / sigma / sigma)
1117 def log_normal_density_function(expected_value, sigma, x):
1119 1 / math.sqrt(2 * math.pi) / sigma / x *
1120 math.exp(-(math.log(x / expected_value) ** 2 / 2 / sigma / sigma))
1124 def get_random_residue_pairs(representation, resolution,
1126 avoid_same_particles=
False,
1129 from random
import choice
1132 names=representation.hier_dict.keys()
1135 prot = representation.hier_dict[name]
1136 particles +=
select(representation,name=name,resolution=resolution)
1137 random_residue_pairs = []
1138 while len(random_residue_pairs)<=number:
1139 p1 = choice(particles)
1140 p2 = choice(particles)
1141 if p1==p2
and avoid_same_particles:
continue
1144 name1 = representation.get_prot_name_from_particle(p1)
1145 name2 = representation.get_prot_name_from_particle(p2)
1146 random_residue_pairs.append((name1, r1, name2, r2))
1148 return random_residue_pairs
1151 def get_random_data_point(
1157 begin_end_nbins_tuple,
1162 begin = begin_end_nbins_tuple[0]
1163 end = begin_end_nbins_tuple[1]
1164 nbins = begin_end_nbins_tuple[2]
1167 fmod_grid =
get_grid(begin, end, nbins,
True)
1169 fmod_grid = get_log_grid(begin, end, nbins)
1176 for i
in range(0, ntrials):
1177 a.append([random.random(),
True])
1180 for j
in range(1, len(fmod_grid)):
1182 fjm1 = fmod_grid[j - 1]
1186 pj = normal_density_function(expected_value, sigma, fj)
1187 pjm1 = normal_density_function(expected_value, sigma, fjm1)
1189 pj = log_normal_density_function(expected_value, sigma, fj)
1190 pjm1 = log_normal_density_function(expected_value, sigma, fjm1)
1192 norm += (pj + pjm1) / 2.0 * df
1198 for i
in range(len(cumul)):
1201 if (aa[0] <= cumul[i] / norm
and aa[1]):
1202 random_points.append(
1203 int(fmod_grid[i] / sensitivity) * sensitivity)
1207 random_points = [expected_value] * ntrials
1209 for i
in range(len(random_points)):
1210 if random.random() < outlierprob:
1211 a = random.uniform(begin, end)
1212 random_points[i] = int(a / sensitivity) * sensitivity
1215 for i in range(ntrials):
1216 if random.random() > OUTLIERPROB_:
1217 r=truncnorm.rvs(0.0,1.0,expected_value,BETA_)
1218 if r>1.0: print r,expected_value,BETA_
1221 random_points.append(int(r/sensitivity)*sensitivity)
1226 for r
in random_points:
1230 rmean /= float(ntrials)
1231 rmean2 /= float(ntrials)
1232 stddev = math.sqrt(max(rmean2 - rmean * rmean, 0.))
1233 return rmean, stddev
1235 is_already_printed = {}
1238 def print_deprecation_warning(old_name, new_name):
1239 if old_name
not in is_already_printed:
1240 print "WARNING: " + old_name +
" is deprecated, use " + new_name +
" instead"
1241 is_already_printed[old_name] =
True
1244 def print_multicolumn(list_of_strings, ncolumns=2, truncate=40):
1250 for i
in range(len(l) % cols):
1253 split = [l[i:i + len(l) / cols]
for i
in range(0, len(l), len(l) / cols)]
1254 for row
in zip(*split):
1255 print "".join(str.ljust(i, truncate)
for i
in row)
1259 ''' read dssp file, get SSEs. values are all PDB residue numbering. returns dict of sel tuples
1260 helix : [ [ ['A',5,7] ] , [['B',15,17]] , ...] two helices A:5-7,B:15-17
1261 beta : [ [ ['A',1,3] , ['A',100,102] ] , ...] one sheet: A:1-3 & A:100-102
1262 loop : same format as helix, it's the contiguous loops
1265 from collections
import defaultdict
1268 sses = {
'helix': [],
1271 helix_classes =
'GHI'
1272 strand_classes =
'EB'
1273 loop_classes = [
' ',
'',
'T',
'S']
1275 for h
in helix_classes:
1276 sse_dict[h] =
'helix'
1277 for s
in strand_classes:
1278 sse_dict[s] =
'beta'
1279 for l
in loop_classes:
1280 sse_dict[l] =
'loop'
1285 beta_dict = defaultdict(list)
1289 for line
in open(dssp_fn,
'r'):
1290 fields = line.split()
1294 if fields[1] ==
"RESIDUE":
1302 elif limit_to_chains !=
'' and line[11]
not in limit_to_chains:
1307 pdb_res_num = int(line[5:10])
1308 chain =
'chain' + line[11]
1313 if prev_sstype
is None:
1314 cur_sse = [pdb_res_num, pdb_res_num, chain]
1315 elif sstype != prev_sstype
or chain_break:
1317 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1318 sses[sse_dict[prev_sstype]].append([cur_sse])
1319 if sse_dict[prev_sstype] ==
'beta':
1320 beta_dict[prev_beta_id].append(cur_sse)
1321 cur_sse = [pdb_res_num, pdb_res_num, chain]
1323 cur_sse[1] = pdb_res_num
1328 prev_sstype = sstype
1329 prev_beta_id = beta_id
1332 if not prev_sstype
is None:
1333 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1334 sses[sse_dict[prev_sstype]].append([cur_sse])
1335 if sse_dict[prev_sstype] ==
'beta':
1336 beta_dict[prev_beta_id].append(cur_sse)
1339 for beta_sheet
in beta_dict:
1340 sses[
'beta'].append(beta_dict[beta_sheet])
1346 ''' get chimera command to check if you've correctly made the dssp dictionary
1347 colors each helix and beta sheet'''
1349 'helix':
'color green ',
1350 'beta':
'color blue ',
1351 'loop':
'color red '}
1352 for skey
in dssp_dict.keys():
1353 for sgroup
in dssp_dict[skey]:
1355 start, stop, chain = sse
1356 chain = chain.strip(
'chain')
1358 skey] +=
'#%i:%s-%s.%s ' % (chimera_model_num, start, stop, chain)
1359 print '; '.join([cmds[k]
for k
in cmds])
1365 '''a class to change color code to hexadecimal to rgb'''
1367 self._NUMERALS =
'0123456789abcdefABCDEF'
1368 self._HEXDEC = dict((v, int(v, 16))
for v
in (x+y
for x
in self._NUMERALS
for y
in self._NUMERALS))
1369 self.LOWERCASE, self.UPPERCASE =
'x',
'X'
1371 def rgb(self,triplet):
1372 return float(self._HEXDEC[triplet[0:2]]), float(self._HEXDEC[triplet[2:4]]), float(self._HEXDEC[triplet[4:6]])
1374 def triplet(self,rgb, lettercase=None):
1375 if lettercase
is None: lettercase=self.LOWERCASE
1376 return format(rgb[0]<<16 | rgb[1]<<8 | rgb[2],
'06'+lettercase)
1379 class OrderedSet(collections.MutableSet):
1381 def __init__(self, iterable=None):
1383 end += [
None, end, end]
1385 if iterable
is not None:
1389 return len(self.map)
1391 def __contains__(self, key):
1392 return key
in self.map
1395 if key
not in self.map:
1398 curr[2] = end[1] = self.map[key] = [key, curr, end]
1400 def discard(self, key):
1402 key, prev, next = self.map.pop(key)
1409 while curr
is not end:
1413 def __reversed__(self):
1416 while curr
is not end:
1420 def pop(self, last=True):
1422 raise KeyError(
'set is empty')
1424 key = self.end[1][0]
1426 key = self.end[2][0]
1432 return '%s()' % (self.__class__.__name__,)
1433 return '%s(%r)' % (self.__class__.__name__, list(self))
1435 def __eq__(self, other):
1436 if isinstance(other, OrderedSet):
1437 return len(self) == len(other)
and list(self) == list(other)
1438 return set(self) == set(other)
A decorator to associate a particle with a part of a protein/DNA/RNA.
static Weight setup_particle(kernel::Model *m, ParticleIndex pi)
Ints get_index(const kernel::ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
ParticlesTemp get_particles(kernel::Model *m, const ParticleIndexes &ps)
double get_resolution(kernel::Model *m, kernel::ParticleIndex pi)
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static Scale setup_particle(kernel::Model *m, ParticleIndex pi)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Add resolution to a particle.
algebra::GridD< 3, algebra::DenseGridStorageD< 3, float >, float > get_grid(DensityMap *in_map)
The standard decorator for manipulating molecular structures.
A decorator for a particle representing an atom.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
void add_particles(RMF::FileHandle fh, const kernel::ParticlesTemp &hs)
A decorator for a particle with x,y,z coordinates.
Class to handle individual model particles.
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
A decorator for a residue.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static bool get_is_setup(kernel::Model *m, kernel::ParticleIndex pi)
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
void show(Hierarchy h, std::ostream &out=std::cout)
Print out a molecular hierarchy.
std::string get_data_path(std::string file_name)
Return the full path to installed data.
A decorator for a rigid body.
void add_particle(RMF::FileHandle fh, kernel::Particle *hs)
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...