3 """@namespace IMP.pmi.tools
4 Miscellaneous utilities.
12 class Stopwatch(object):
14 def __init__(self, isdelta=True):
18 self.starttime = time.clock()
20 self.isdelta = isdelta
22 def set_label(self, labelstr):
28 newtime = time.clock()
32 "_delta_seconds"] = str(
35 self.starttime = newtime
37 output[
"Stopwatch_" + self.label +
38 "_elapsed_seconds"] = str(time.clock() - self.starttime)
42 class SetupNuisance(object):
44 def __init__(self, m, initialvalue, minvalue, maxvalue, isoptimized=True):
49 nuisance.set_lower(minvalue)
51 nuisance.set_upper(maxvalue)
54 nuisance.set_is_optimized(nuisance.get_nuisance_key(), isoptimized)
55 self.nuisance = nuisance
57 def get_particle(self):
61 class SetupWeight(object):
63 def __init__(self, m, isoptimized=True):
67 self.weight.set_weights_are_optimized(
True)
69 def get_particle(self):
73 class ParticleToSampleFilter(object):
74 def __init__(self, sampled_objects):
75 self.sampled_objects=sampled_objects
78 def add_filter(self,filter_string):
79 self.filters.append(filter_string)
81 def get_particles_to_sample(self):
82 particles_to_sample={}
83 for so
in self.sampled_objects:
84 ps_dict=so.get_particles_to_sample()
86 for f
in self.filters:
88 if key
not in particles_to_sample:
89 particles_to_sample[key]=ps_dict[key]
91 particles_to_sample[key]+=ps_dict[key]
92 return particles_to_sample
94 class ParticleToSampleList(object):
96 def __init__(self, label="None"):
98 self.dictionary_particle_type = {}
99 self.dictionary_particle_transformation = {}
100 self.dictionary_particle_name = {}
107 particle_transformation,
109 if not particle_type
in [
"Rigid_Bodies",
"Floppy_Bodies",
"Nuisances",
"X_coord",
"Weights"]:
110 print "ParticleToSampleList: not the right particle type"
113 self.dictionary_particle_type[particle] = particle_type
114 if particle_type ==
"Rigid_Bodies":
115 if type(particle_transformation) == tuple
and len(particle_transformation) == 2
and type(particle_transformation[0]) == float
and type(particle_transformation[1]) == float:
116 self.dictionary_particle_transformation[
117 particle] = particle_transformation
118 self.dictionary_particle_name[particle] = name
120 print "ParticleToSampleList: not the right transformation format for Rigid_Bodies, should be a tuple a floats"
123 if type(particle_transformation) == float:
124 self.dictionary_particle_transformation[
125 particle] = particle_transformation
126 self.dictionary_particle_name[particle] = name
128 print "ParticleToSampleList: not the right transformation format sould be a float"
131 def get_particles_to_sample(self):
133 for particle
in self.dictionary_particle_type:
134 key = self.dictionary_particle_type[
135 particle] +
"ParticleToSampleList_" + self.dictionary_particle_name[particle] +
"_" + self.label
138 self.dictionary_particle_transformation[particle])
143 class Variance(object):
145 def __init__(self, model, tau, niter, prot, th_profile, write_data=False):
146 global sqrt, os, random
147 from math
import sqrt
152 self.write_data = write_data
157 self.particles = particles
159 self.refpos = [
IMP.core.XYZ(p).get_coordinates()
for p
in particles]
160 self.model_profile = th_profile
162 def perturb_particles(self, perturb=True):
163 for i, p
in enumerate(self.particles):
164 newpos = array(self.refpos[i])
166 newpos += random.normal(0, self.tau, 3)
170 def get_profile(self):
171 model_profile = self.model_profile
172 p = model_profile.calculate_profile(self.particles, IMP.saxs.CA_ATOMS)
173 return array([model_profile.get_intensity(i)
for i
in
174 xrange(model_profile.size())])
176 def init_variances(self):
178 N = self.model_profile.size()
179 a = self.profiles[0][:]
181 self.V = self.m * self.m.T
182 self.normm = linalg.norm(self.m)
183 self.normV = linalg.norm(self.V)
185 def update_variances(self):
186 a = matrix(self.profiles[-1])
187 n = float(len(self.profiles))
188 self.m = a.T / n + (n - 1) / n * self.m
189 self.V = a.T * a + self.V
190 self.oldnormm = self.normm
191 self.oldnormV = self.normV
192 self.normm = linalg.norm(self.m)
193 self.normV = linalg.norm(self.V)
194 self.diffm = (self.oldnormm - self.normm) / self.oldnormm
195 self.diffV = (self.oldnormV - self.normV) / self.oldnormV
197 def get_direct_stats(self, a):
202 for q, I
in enumerate(prof):
207 Sigma = (matrix(a - m))
208 Sigma = Sigma.T * Sigma / (nprof - 1)
209 mi = matrix(diag(1. / m))
210 Sigmarel = mi.T * Sigma * mi
211 return m, V, Sigma, Sigmarel
213 def store_data(self):
214 if not os.path.isdir(
'data'):
216 profiles = matrix(self.profiles)
217 self.directm, self.directV, self.Sigma, self.Sigmarel = \
218 self.get_direct_stats(array(profiles))
219 directV = self.directV
221 save(
'data/profiles', profiles)
223 fl = open(
'data/profiles.dat',
'w')
224 for i, l
in enumerate(array(profiles).T):
225 self.model_profile.get_q(i)
228 fl.write(
'%s ' % (k - self.directm[i]))
231 fl = open(
'data/profiles_rel.dat',
'w')
232 for i, l
in enumerate(array(profiles).T):
233 self.model_profile.get_q(i)
236 fl.write(
'%s ' % ((k - self.directm[i]) / self.directm[i]))
238 save(
'data/m', self.directm)
239 save(
'data/V', self.directV)
241 save(
'data/Sigma', Sigma)
243 fl = open(
'data/Sigma.dat',
'w')
244 model_profile = self.model_profile
245 for i
in xrange(model_profile.size()):
246 qi = model_profile.get_q(i)
247 for j
in xrange(model_profile.size()):
248 qj = model_profile.get_q(j)
249 vij = self.Sigma[i, j]
250 fl.write(
'%s %s %s\n' % (qi, qj, vij))
253 fl = open(
'data/eigenvals',
'w')
254 for i
in linalg.eigvalsh(Sigma):
256 Sigmarel = self.Sigmarel
257 save(
'data/Sigmarel', Sigmarel)
259 fl = open(
'data/Sigmarel.dat',
'w')
260 model_profile = self.model_profile
261 for i
in xrange(model_profile.size()):
262 qi = model_profile.get_q(i)
263 for j
in xrange(model_profile.size()):
264 qj = model_profile.get_q(j)
265 vij = self.Sigmarel[i, j]
266 fl.write(
'%s %s %s\n' % (qi, qj, vij))
269 fl = open(
'data/eigenvals_rel',
'w')
270 for i
in linalg.eigvalsh(Sigmarel):
273 fl = open(
'data/mean.dat',
'w')
274 for i
in xrange(len(self.directm)):
275 qi = self.model_profile.get_q(i)
277 fl.write(
'%s ' % self.directm[i])
278 fl.write(
'%s ' % sqrt(self.Sigma[i, i]))
281 def try_chol(self, jitter):
284 linalg.cholesky(Sigma + matrix(eye(len(Sigma))) * jitter)
285 except linalg.LinAlgError:
286 print "Decomposition failed with jitter =", jitter
288 print "Successful decomposition with jitter =", jitter
291 self.profiles = [self.get_profile()]
293 for n
in xrange(self.niter):
294 self.perturb_particles()
295 self.profiles.append(self.get_profile())
307 def get_cov(self, relative=True):
317 number_of_cross_links=10,
318 ambiguity_probability=0.1,
319 confidence_score_range=[0,100],
320 avoid_same_particles=
False):
322 '''returns a random cross-link dataset into a string were every
323 line is a residue pair, together with UniqueIdentifier and XL score'''
325 residue_pairs=get_random_residue_pairs(representation, resolution, number_of_cross_links, avoid_same_particles)
327 from random
import random
329 cmin=float(min(confidence_score_range))
330 cmax=float(max(confidence_score_range))
334 for (name1, r1, name2, r2)
in residue_pairs:
335 if random() > ambiguity_probability:
337 score=random()*(cmax-cmin)+cmin
338 dataset+=str(name1)+
" "+str(name2)+
" "+str(r1)+
" "+str(r2)+
" "+str(score)+
" "+str(unique_identifier)+
"\n"
345 def get_cross_link_data(directory, filename, (distmin, distmax, ndist),
346 (omegamin, omegamax, nomega),
347 (sigmamin, sigmamax, nsigma),
348 don=
None, doff=
None, prior=0, type_of_profile=
"gofr"):
356 dictionary = eval(line)
359 xpot = dictionary[directory][filename][
"distance"]
360 pot = dictionary[directory][filename][type_of_profile]
362 dist_grid =
get_grid(distmin, distmax, ndist,
False)
363 omega_grid = get_log_grid(omegamin, omegamax, nomega)
364 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
366 if not don
is None and not doff
is None:
388 def get_cross_link_data_from_length(length, (distmin, distmax, ndist),
389 (omegamin, omegamax, nomega),
390 (sigmamin, sigmamax, nsigma)):
393 dist_grid =
get_grid(distmin, distmax, ndist,
False)
394 omega_grid = get_log_grid(omegamin, omegamax, nomega)
395 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
401 def get_grid(gmin, gmax, ngrid, boundaries):
403 dx = (gmax - gmin) / float(ngrid)
404 for i
in range(0, ngrid + 1):
405 if(
not boundaries
and i == 0):
407 if(
not boundaries
and i == ngrid):
409 grid.append(gmin + float(i) * dx)
415 def get_log_grid(gmin, gmax, ngrid):
416 from math
import exp, log
418 for i
in range(0, ngrid + 1):
419 grid.append(gmin * exp(float(i) / ngrid * log(gmax / gmin)))
427 example '"{ID_Score}" > 28 AND "{Sample}" ==
428 "%10_1%" OR ":Sample}" == "%10_2%" OR ":Sample}"
429 == "%10_3%" OR ":Sample}" == "%8_1%" OR ":Sample}" == "%8_2%"'
432 import pyparsing
as pp
434 operator = pp.Regex(
">=|<=|!=|>|<|==|in").setName(
"operator")
435 value = pp.QuotedString(
437 r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?")
438 identifier = pp.Word(pp.alphas, pp.alphanums +
"_")
439 comparison_term = identifier | value
440 condition = pp.Group(comparison_term + operator + comparison_term)
442 expr = pp.operatorPrecedence(condition, [
443 (
"OR", 2, pp.opAssoc.LEFT, ),
444 (
"AND", 2, pp.opAssoc.LEFT, ),
447 parsedstring = str(expr.parseString(inputstring)) \
453 .replace(
"{",
"float(entry['") \
454 .replace(
"}",
"'])") \
455 .replace(
":",
"str(entry['") \
456 .replace(
"}",
"'])") \
457 .replace(
"AND",
"and") \
462 def open_file_or_inline_text(filename):
464 fl = open(filename,
"r")
466 fl = filename.split(
"\n")
473 for i
in range(0, len(prot0) - 1):
474 for j
in range(i + 1, len(prot0)):
477 drmsd += (dist0 - dist1) ** 2
479 return math.sqrt(drmsd / npairs)
484 def get_ids_from_fasta_file(fastafile):
486 ff = open(fastafile,
"r")
495 this function works with plain hierarchies, as read from the pdb,
496 no multi-scale hierarchies
503 atom_type=IMP.atom.AT_CA)
511 print "get_closest_residue_position: exiting while loop without result"
513 p = sel.get_selected_particles()
518 print "get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (hier, resindex)
519 raise Exception,
"get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (
522 print "get_closest_residue_position: got multiple residues for hierarchy %s and residue %i" % (hier, resindex)
523 print "the list of particles is", [pp.get_name()
for pp
in p]
529 this function get the xyz position of the
530 C or N terminal residue of a hierarchy, given the resolution.
531 the argument of terminus can be either N or C
539 if max(residues) >= termresidue
and not termresidue
is None:
540 termresidue = max(residues)
542 elif termresidue
is None:
543 termresidue = max(residues)
545 elif terminus ==
"N":
546 if min(residues) <= termresidue
and not termresidue
is None:
547 termresidue = min(residues)
549 elif termresidue
is None:
550 termresidue = min(residues)
553 print "get_position_terminal_residue> terminus argument should be either N or C"
561 returns the residue index gaps and contiguous segments as tuples given the hierarchy, the first
562 residue and the last residue indexes. The list is organized as
563 [[1,100,"cont"],[101,120,"gap"],[121,200,"cont"]]
566 for n, rindex
in enumerate(range(start, end + 1)):
568 atom_type=IMP.atom.AT_CA)
570 if len(sel.get_selected_particles()) == 0:
574 rindexcont = start - 1
575 if rindexgap == rindex - 1:
581 gaps.append([rindex, rindex,
"gap"])
587 rindexgap = start - 1
589 if rindexcont == rindex - 1:
596 gaps.append([rindex, rindex,
"cont"])
607 def set_map_element(self, xvalue, yvalue):
608 self.map[xvalue] = yvalue
610 def get_map_element(self, invalue):
611 if type(invalue) == float:
615 dist = (invalue - x) * (invalue - x)
624 return self.map[minx]
625 elif type(invalue) == str:
626 return self.map[invalue]
628 print "wrong type for map"
635 selection_arguments=
None,
637 name_is_ambiguous=
False,
641 representation_type=
None):
643 this function uses representation=SimplifiedModel
644 it returns the corresponding selected particles
645 representation_type="Beads", "Res:X", "Densities", "Representation", "Molecule"
648 if resolution
is None:
650 resolution_particles =
None
651 hierarchies_particles =
None
652 names_particles =
None
653 residue_range_particles =
None
654 residue_particles =
None
655 representation_type_particles =
None
657 if not resolution
is None:
658 resolution_particles = []
659 hs = representation.get_hierarchies_at_given_resolution(resolution)
663 if not hierarchies
is None:
664 hierarchies_particles = []
665 for h
in hierarchies:
670 if name_is_ambiguous:
671 for namekey
in representation.hier_dict:
674 representation.hier_dict[namekey])
675 elif name
in representation.hier_dict:
678 print "select: component %s is not there" % name
680 if not first_residue
is None and not last_residue
is None:
682 residue_indexes=range(first_residue, last_residue + 1))
684 for p
in sel.get_selected_particles()]
686 if not residue
is None:
689 for p
in sel.get_selected_particles()]
691 if not representation_type
is None:
692 representation_type_particles = []
693 if representation_type ==
"Molecule":
694 for name
in representation.hier_representation:
695 for repr_type
in representation.hier_representation[name]:
696 if repr_type ==
"Beads" or "Res:" in repr_type:
697 h = representation.hier_representation[name][repr_type]
700 elif representation_type ==
"PDB":
701 for name
in representation.hier_representation:
702 for repr_type
in representation.hier_representation[name]:
703 if repr_type ==
"Res:" in repr_type:
704 h = representation.hier_representation[name][repr_type]
708 for name
in representation.hier_representation:
709 h = representation.hier_representation[
714 selections = [hierarchies_particles, names_particles,
715 residue_range_particles, residue_particles, representation_type_particles]
717 if resolution
is None:
718 selected_particles = set(allparticles)
720 selected_particles = set(resolution_particles)
724 selected_particles = (set(s) & selected_particles)
726 return list(selected_particles)
733 name_is_ambiguous=
False):
734 if isinstance(tupleselection, tuple)
and len(tupleselection) == 3:
736 name=tupleselection[2],
737 first_residue=tupleselection[0],
738 last_residue=tupleselection[1],
739 name_is_ambiguous=name_is_ambiguous)
740 elif isinstance(tupleselection, str):
743 name_is_ambiguous=name_is_ambiguous)
745 print 'you passed something bad to select_by_tuple()'
748 particles = IMP.pmi.tools.sort_by_residues(particles)
753 def get_db_from_csv(csvfilename):
756 csvr = csv.DictReader(open(csvfilename,
"rU"))
762 class HierarchyDatabase(object):
767 self.root_hierarchy_dict = {}
768 self.preroot_fragment_hierarchy_dict = {}
769 self.particle_to_name = {}
772 def add_name(self, name):
773 if name
not in self.db:
776 def add_residue_number(self, name, resn):
779 if resn
not in self.db[name]:
780 self.db[name][resn] = {}
782 def add_resolution(self, name, resn, resolution):
784 resolution = float(resolution)
786 self.add_residue_number(name, resn)
787 if resolution
not in self.db[name][resn]:
788 self.db[name][resn][resolution] = []
792 resolution = float(resolution)
794 self.add_residue_number(name, resn)
795 self.add_resolution(name, resn, resolution)
796 self.db[name][resn][resolution] += particles
798 (rh, prf) = self.get_root_hierarchy(p)
799 self.root_hierarchy_dict[p] = rh
800 self.preroot_fragment_hierarchy_dict[p] = prf
801 self.particle_to_name[p] = name
802 if self.model
is None:
803 self.model = particles[0].get_model()
809 names = self.db.keys()
815 resolution = float(resolution)
816 return self.db[name][resn][resolution]
818 def get_particles_at_closest_resolution(self, name, resn, resolution):
820 resolution = float(resolution)
821 closestres = min(self.get_residue_resolutions(name, resn),
822 key=
lambda x: abs(float(x) - float(resolution)))
823 return self.get_particles(name, resn, closestres)
825 def get_residue_resolutions(self, name, resn):
827 resolutions = self.db[name][resn].keys()
831 def get_molecule_resolutions(self, name):
833 for resn
in self.db[name]:
834 resolutions.update(self.db[name][resn].keys())
838 def get_residue_numbers(self, name):
839 residue_numbers = self.db[name].keys()
840 residue_numbers.sort()
841 return residue_numbers
843 def get_particles_by_resolution(self, name, resolution):
844 resolution = float(resolution)
846 for resn
in self.get_residue_numbers(name):
847 result = self.get_particles_at_closest_resolution(
851 pstemp = [p
for p
in result
if p
not in particles]
855 def get_all_particles_by_resolution(self, resolution):
856 resolution = float(resolution)
858 for name
in self.get_names():
859 particles += self.get_particles_by_resolution(name, resolution)
862 def get_root_hierarchy(self, particle):
863 prerootfragment = particle
873 prerootfragment = particle
879 def get_all_root_hierarchies_by_resolution(self, resolution):
881 resolution = float(resolution)
882 particles = self.get_all_particles_by_resolution(resolution)
884 rh = self.root_hierarchy_dict[p]
885 if rh
not in hierarchies:
889 def get_preroot_fragments_by_resolution(self, name, resolution):
891 resolution = float(resolution)
892 particles = self.get_particles_by_resolution(name, resolution)
894 fr = self.preroot_fragment_hierarchy_dict[p]
895 if fr
not in fragments:
899 def show(self, name):
901 for resn
in self.get_residue_numbers(name):
903 for resolution
in self.get_residue_resolutions(name, resn):
904 print "----", resolution
905 for p
in self.get_particles(name, resn, resolution):
906 print "--------", p.get_name()
910 ''' this function returns the component name provided a particle and a list of names'''
912 protname = root.get_name()
914 while not protname
in list_of_names:
915 root0 = root.get_parent()
918 protname = root0.get_name()
923 if "Beads" in protname:
926 return (protname, is_a_bead)
931 This "overloaded" function retrieves the residue indexes
932 for each particle which is an instance of Fragmen,Residue or Atom
943 print "get_residue_indexes> input is not Fragment, Residue or Atom"
948 def sort_by_residues(particles):
951 sorted_particles_residues = sorted(
953 key=
lambda tup: tup[1])
954 particles = [p[0]
for p
in sorted_particles_residues]
958 def get_residue_to_particle_map(particles):
960 particles = sort_by_residues(particles)
963 return dict(zip(particles_residues, particles))
971 """Synchronize data over a parallel run"""
972 from mpi4py
import MPI
973 comm = MPI.COMM_WORLD
974 rank = comm.Get_rank()
975 number_of_processes = comm.size
978 comm.send(data, dest=0, tag=11)
981 for i
in range(1, number_of_processes):
982 data_tmp = comm.recv(source=i, tag=11)
983 if type(data) == list:
985 elif type(data) == dict:
986 data.update(data_tmp)
988 print "tools.scatter_and_gather: data not supported, use list or dictionaries"
991 for i
in range(1, number_of_processes):
992 comm.send(data, dest=i, tag=11)
995 data = comm.recv(source=0, tag=11)
1005 this iterator yields all sublists
1006 of length >= lmin and <= lmax
1014 for j
in xrange(i + 1, n):
1015 if len(l[i:j]) <= lmax
and len(l[i:j]) >= lmin:
1019 def flatten_list(l):
1020 return [item
for sublist
in l
for item
in sublist]
1024 """ Yield successive length-sized chunks from a list.
1026 for i
in xrange(0, len(list), length):
1027 yield list[i:i + length]
1030 def chunk_list_into_segments(seq, num):
1031 avg = len(seq) / float(num)
1035 while last < len(seq):
1036 out.append(seq[int(last):int(last + avg)])
1048 this will apply a translation to a hierarchy along the input vector
1052 if type(translation_vector) == list:
1071 def translate_hierarchies(hierarchies, translation_vector):
1072 for h
in hierarchies:
1076 def translate_hierarchies_to_reference_frame(hierarchies):
1081 for h
in hierarchies:
1091 IMP.pmi.tools.translate_hierarchies(hierarchies, (-xc, -yc, -zc))
1098 def normal_density_function(expected_value, sigma, x):
1100 1 / math.sqrt(2 * math.pi) / sigma *
1101 math.exp(-(x - expected_value) ** 2 / 2 / sigma / sigma)
1105 def log_normal_density_function(expected_value, sigma, x):
1107 1 / math.sqrt(2 * math.pi) / sigma / x *
1108 math.exp(-(math.log(x / expected_value) ** 2 / 2 / sigma / sigma))
1112 def get_random_residue_pairs(representation, resolution,
1114 avoid_same_particles=
False,
1117 from random
import choice
1120 names=representation.hier_dict.keys()
1123 prot = representation.hier_dict[name]
1124 particles +=
select(representation,name=name,resolution=resolution)
1125 random_residue_pairs = []
1126 while len(random_residue_pairs)<=number:
1127 p1 = choice(particles)
1128 p2 = choice(particles)
1129 if p1==p2
and avoid_same_particles:
continue
1132 name1 = representation.get_prot_name_from_particle(p1)
1133 name2 = representation.get_prot_name_from_particle(p2)
1134 random_residue_pairs.append((name1, r1, name2, r2))
1136 return random_residue_pairs
1139 def get_random_data_point(
1145 begin_end_nbins_tuple,
1150 begin = begin_end_nbins_tuple[0]
1151 end = begin_end_nbins_tuple[1]
1152 nbins = begin_end_nbins_tuple[2]
1155 fmod_grid =
get_grid(begin, end, nbins,
True)
1157 fmod_grid = get_log_grid(begin, end, nbins)
1164 for i
in range(0, ntrials):
1165 a.append([random.random(),
True])
1168 for j
in range(1, len(fmod_grid)):
1170 fjm1 = fmod_grid[j - 1]
1174 pj = normal_density_function(expected_value, sigma, fj)
1175 pjm1 = normal_density_function(expected_value, sigma, fjm1)
1177 pj = log_normal_density_function(expected_value, sigma, fj)
1178 pjm1 = log_normal_density_function(expected_value, sigma, fjm1)
1180 norm += (pj + pjm1) / 2.0 * df
1186 for i
in range(len(cumul)):
1189 if (aa[0] <= cumul[i] / norm
and aa[1]):
1190 random_points.append(
1191 int(fmod_grid[i] / sensitivity) * sensitivity)
1195 random_points = [expected_value] * ntrials
1197 for i
in range(len(random_points)):
1198 if random.random() < outlierprob:
1199 a = random.uniform(begin, end)
1200 random_points[i] = int(a / sensitivity) * sensitivity
1203 for i in range(ntrials):
1204 if random.random() > OUTLIERPROB_:
1205 r=truncnorm.rvs(0.0,1.0,expected_value,BETA_)
1206 if r>1.0: print r,expected_value,BETA_
1209 random_points.append(int(r/sensitivity)*sensitivity)
1214 for r
in random_points:
1218 rmean /= float(ntrials)
1219 rmean2 /= float(ntrials)
1220 stddev = math.sqrt(max(rmean2 - rmean * rmean, 0.))
1221 return rmean, stddev
1223 is_already_printed = {}
1226 def print_deprecation_warning(old_name, new_name):
1227 if old_name
not in is_already_printed:
1228 print "WARNING: " + old_name +
" is deprecated, use " + new_name +
" instead"
1229 is_already_printed[old_name] =
True
1232 def print_multicolumn(list_of_strings, ncolumns=2, truncate=40):
1238 for i
in range(len(l) % cols):
1241 split = [l[i:i + len(l) / cols]
for i
in range(0, len(l), len(l) / cols)]
1242 for row
in zip(*split):
1243 print "".join(str.ljust(i, truncate)
for i
in row)
1247 ''' read dssp file, get SSEs. values are all PDB residue numbering. returns dict of sel tuples
1248 helix : [ [ ['A',5,7] ] , [['B',15,17]] , ...] two helices A:5-7,B:15-17
1249 beta : [ [ ['A',1,3] , ['A',100,102] ] , ...] one sheet: A:1-3 & A:100-102
1250 loop : same format as helix, it's the contiguous loops
1253 from collections
import defaultdict
1256 sses = {
'helix': [],
1259 helix_classes =
'GHI'
1260 strand_classes =
'EB'
1261 loop_classes = [
' ',
'',
'T',
'S']
1263 for h
in helix_classes:
1264 sse_dict[h] =
'helix'
1265 for s
in strand_classes:
1266 sse_dict[s] =
'beta'
1267 for l
in loop_classes:
1268 sse_dict[l] =
'loop'
1273 beta_dict = defaultdict(list)
1277 for line
in open(dssp_fn,
'r'):
1278 fields = line.split()
1282 if fields[1] ==
"RESIDUE":
1290 elif limit_to_chains !=
'' and line[11]
not in limit_to_chains:
1295 pdb_res_num = int(line[5:10])
1296 chain =
'chain' + line[11]
1301 if prev_sstype
is None:
1302 cur_sse = [pdb_res_num, pdb_res_num, chain]
1303 elif sstype != prev_sstype
or chain_break:
1305 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1306 sses[sse_dict[prev_sstype]].append([cur_sse])
1307 if sse_dict[prev_sstype] ==
'beta':
1308 beta_dict[prev_beta_id].append(cur_sse)
1309 cur_sse = [pdb_res_num, pdb_res_num, chain]
1311 cur_sse[1] = pdb_res_num
1316 prev_sstype = sstype
1317 prev_beta_id = beta_id
1320 if not prev_sstype
is None:
1321 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1322 sses[sse_dict[prev_sstype]].append([cur_sse])
1323 if sse_dict[prev_sstype] ==
'beta':
1324 beta_dict[prev_beta_id].append(cur_sse)
1327 for beta_sheet
in beta_dict:
1328 sses[
'beta'].append(beta_dict[beta_sheet])
1334 ''' get chimera command to check if you've correctly made the dssp dictionary
1335 colors each helix and beta sheet'''
1337 'helix':
'color green ',
1338 'beta':
'color blue ',
1339 'loop':
'color red '}
1340 for skey
in dssp_dict.keys():
1341 for sgroup
in dssp_dict[skey]:
1343 start, stop, chain = sse
1344 chain = chain.strip(
'chain')
1346 skey] +=
'#%i:%s-%s.%s ' % (chimera_model_num, start, stop, chain)
1347 print '; '.join([cmds[k]
for k
in cmds])
1353 '''a class to change color code to hexadecimal to rgb'''
1355 self._NUMERALS =
'0123456789abcdefABCDEF'
1356 self._HEXDEC = dict((v, int(v, 16))
for v
in (x+y
for x
in self._NUMERALS
for y
in self._NUMERALS))
1357 self.LOWERCASE, self.UPPERCASE =
'x',
'X'
1359 def rgb(self,triplet):
1360 return float(self._HEXDEC[triplet[0:2]]), float(self._HEXDEC[triplet[2:4]]), float(self._HEXDEC[triplet[4:6]])
1362 def triplet(self,rgb, lettercase=None):
1363 if lettercase
is None: lettercase=self.LOWERCASE
1364 return format(rgb[0]<<16 | rgb[1]<<8 | rgb[2],
'06'+lettercase)
1367 class OrderedSet(collections.MutableSet):
1369 def __init__(self, iterable=None):
1371 end += [
None, end, end]
1373 if iterable
is not None:
1377 return len(self.map)
1379 def __contains__(self, key):
1380 return key
in self.map
1383 if key
not in self.map:
1386 curr[2] = end[1] = self.map[key] = [key, curr, end]
1388 def discard(self, key):
1390 key, prev, next = self.map.pop(key)
1397 while curr
is not end:
1401 def __reversed__(self):
1404 while curr
is not end:
1408 def pop(self, last=True):
1410 raise KeyError(
'set is empty')
1412 key = self.end[1][0]
1414 key = self.end[2][0]
1420 return '%s()' % (self.__class__.__name__,)
1421 return '%s(%r)' % (self.__class__.__name__, list(self))
1423 def __eq__(self, other):
1424 if isinstance(other, OrderedSet):
1425 return len(self) == len(other)
and list(self) == list(other)
1426 return set(self) == set(other)
A decorator to associate a particle with a part of a protein/DNA/RNA.
static Weight setup_particle(kernel::Model *m, ParticleIndex pi)
Ints get_index(const kernel::ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
ParticlesTemp get_particles(kernel::Model *m, const ParticleIndexes &ps)
double get_resolution(kernel::Model *m, kernel::ParticleIndex pi)
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static Scale setup_particle(kernel::Model *m, ParticleIndex pi)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Add resolution to a particle.
algebra::GridD< 3, algebra::DenseGridStorageD< 3, float >, float > get_grid(DensityMap *in_map)
The standard decorator for manipulating molecular structures.
A decorator for a particle representing an atom.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
void add_particles(RMF::FileHandle fh, const kernel::ParticlesTemp &hs)
A decorator for a particle with x,y,z coordinates.
Class to handle individual model particles.
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
A decorator for a residue.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static bool get_is_setup(kernel::Model *m, kernel::ParticleIndex pi)
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
void show(Hierarchy h, std::ostream &out=std::cout)
Print out a molecular hierarchy.
std::string get_data_path(std::string file_name)
Return the full path to installed data.
A decorator for a rigid body.
void add_particle(RMF::FileHandle fh, kernel::Particle *hs)
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...