3 """@namespace IMP.pmi.tools
4 Miscellaneous utilities.
7 from __future__
import print_function
13 class Stopwatch(object):
15 def __init__(self, isdelta=True):
19 self.starttime = time.clock()
21 self.isdelta = isdelta
23 def set_label(self, labelstr):
29 newtime = time.clock()
33 "_delta_seconds"] = str(
36 self.starttime = newtime
38 output[
"Stopwatch_" + self.label +
39 "_elapsed_seconds"] = str(time.clock() - self.starttime)
43 class SetupNuisance(object):
45 def __init__(self, m, initialvalue, minvalue, maxvalue, isoptimized=True):
50 nuisance.set_lower(minvalue)
52 nuisance.set_upper(maxvalue)
55 nuisance.set_is_optimized(nuisance.get_nuisance_key(), isoptimized)
56 self.nuisance = nuisance
58 def get_particle(self):
62 class SetupWeight(object):
64 def __init__(self, m, isoptimized=True):
68 self.weight.set_weights_are_optimized(
True)
70 def get_particle(self):
74 class ParticleToSampleFilter(object):
75 def __init__(self, sampled_objects):
76 self.sampled_objects=sampled_objects
79 def add_filter(self,filter_string):
80 self.filters.append(filter_string)
82 def get_particles_to_sample(self):
83 particles_to_sample={}
84 for so
in self.sampled_objects:
85 ps_dict=so.get_particles_to_sample()
87 for f
in self.filters:
89 if key
not in particles_to_sample:
90 particles_to_sample[key]=ps_dict[key]
92 particles_to_sample[key]+=ps_dict[key]
93 return particles_to_sample
95 class ParticleToSampleList(object):
97 def __init__(self, label="None"):
99 self.dictionary_particle_type = {}
100 self.dictionary_particle_transformation = {}
101 self.dictionary_particle_name = {}
108 particle_transformation,
110 if not particle_type
in [
"Rigid_Bodies",
"Floppy_Bodies",
"Nuisances",
"X_coord",
"Weights"]:
111 raise TypeError(
"not the right particle type")
113 self.dictionary_particle_type[particle] = particle_type
114 if particle_type ==
"Rigid_Bodies":
115 if type(particle_transformation) == tuple
and len(particle_transformation) == 2
and type(particle_transformation[0]) == float
and type(particle_transformation[1]) == float:
116 self.dictionary_particle_transformation[
117 particle] = particle_transformation
118 self.dictionary_particle_name[particle] = name
120 raise TypeError(
"ParticleToSampleList: not the right transformation format for Rigid_Bodies, should be a tuple a floats")
122 if type(particle_transformation) == float:
123 self.dictionary_particle_transformation[
124 particle] = particle_transformation
125 self.dictionary_particle_name[particle] = name
127 raise TypeError(
"ParticleToSampleList: not the right transformation format sould be a float")
129 def get_particles_to_sample(self):
131 for particle
in self.dictionary_particle_type:
132 key = self.dictionary_particle_type[
133 particle] +
"ParticleToSampleList_" + self.dictionary_particle_name[particle] +
"_" + self.label
136 self.dictionary_particle_transformation[particle])
141 class Variance(object):
143 def __init__(self, model, tau, niter, prot, th_profile, write_data=False):
144 global sqrt, os, random
145 from math
import sqrt
150 self.write_data = write_data
155 self.particles = particles
157 self.refpos = [
IMP.core.XYZ(p).get_coordinates()
for p
in particles]
158 self.model_profile = th_profile
160 def perturb_particles(self, perturb=True):
161 for i, p
in enumerate(self.particles):
162 newpos = array(self.refpos[i])
164 newpos += random.normal(0, self.tau, 3)
168 def get_profile(self):
169 model_profile = self.model_profile
170 p = model_profile.calculate_profile(self.particles, IMP.saxs.CA_ATOMS)
171 return array([model_profile.get_intensity(i)
for i
in
172 range(model_profile.size())])
174 def init_variances(self):
176 N = self.model_profile.size()
177 a = self.profiles[0][:]
179 self.V = self.m * self.m.T
180 self.normm = linalg.norm(self.m)
181 self.normV = linalg.norm(self.V)
183 def update_variances(self):
184 a = matrix(self.profiles[-1])
185 n = float(len(self.profiles))
186 self.m = a.T / n + (n - 1) / n * self.m
187 self.V = a.T * a + self.V
188 self.oldnormm = self.normm
189 self.oldnormV = self.normV
190 self.normm = linalg.norm(self.m)
191 self.normV = linalg.norm(self.V)
192 self.diffm = (self.oldnormm - self.normm) / self.oldnormm
193 self.diffV = (self.oldnormV - self.normV) / self.oldnormV
195 def get_direct_stats(self, a):
200 for q, I
in enumerate(prof):
205 Sigma = (matrix(a - m))
206 Sigma = Sigma.T * Sigma / (nprof - 1)
207 mi = matrix(diag(1. / m))
208 Sigmarel = mi.T * Sigma * mi
209 return m, V, Sigma, Sigmarel
211 def store_data(self):
212 if not os.path.isdir(
'data'):
214 profiles = matrix(self.profiles)
215 self.directm, self.directV, self.Sigma, self.Sigmarel = \
216 self.get_direct_stats(array(profiles))
217 directV = self.directV
219 save(
'data/profiles', profiles)
221 fl = open(
'data/profiles.dat',
'w')
222 for i, l
in enumerate(array(profiles).T):
223 self.model_profile.get_q(i)
226 fl.write(
'%s ' % (k - self.directm[i]))
229 fl = open(
'data/profiles_rel.dat',
'w')
230 for i, l
in enumerate(array(profiles).T):
231 self.model_profile.get_q(i)
234 fl.write(
'%s ' % ((k - self.directm[i]) / self.directm[i]))
236 save(
'data/m', self.directm)
237 save(
'data/V', self.directV)
239 save(
'data/Sigma', Sigma)
241 fl = open(
'data/Sigma.dat',
'w')
242 model_profile = self.model_profile
243 for i
in range(model_profile.size()):
244 qi = model_profile.get_q(i)
245 for j
in range(model_profile.size()):
246 qj = model_profile.get_q(j)
247 vij = self.Sigma[i, j]
248 fl.write(
'%s %s %s\n' % (qi, qj, vij))
251 fl = open(
'data/eigenvals',
'w')
252 for i
in linalg.eigvalsh(Sigma):
254 Sigmarel = self.Sigmarel
255 save(
'data/Sigmarel', Sigmarel)
257 fl = open(
'data/Sigmarel.dat',
'w')
258 model_profile = self.model_profile
259 for i
in range(model_profile.size()):
260 qi = model_profile.get_q(i)
261 for j
in range(model_profile.size()):
262 qj = model_profile.get_q(j)
263 vij = self.Sigmarel[i, j]
264 fl.write(
'%s %s %s\n' % (qi, qj, vij))
267 fl = open(
'data/eigenvals_rel',
'w')
268 for i
in linalg.eigvalsh(Sigmarel):
271 fl = open(
'data/mean.dat',
'w')
272 for i
in range(len(self.directm)):
273 qi = self.model_profile.get_q(i)
275 fl.write(
'%s ' % self.directm[i])
276 fl.write(
'%s ' % sqrt(self.Sigma[i, i]))
279 def try_chol(self, jitter):
282 linalg.cholesky(Sigma + matrix(eye(len(Sigma))) * jitter)
283 except linalg.LinAlgError:
284 print(
"Decomposition failed with jitter =", jitter)
286 print(
"Successful decomposition with jitter =", jitter)
289 self.profiles = [self.get_profile()]
291 for n
in range(self.niter):
292 self.perturb_particles()
293 self.profiles.append(self.get_profile())
305 def get_cov(self, relative=True):
315 number_of_cross_links=10,
316 ambiguity_probability=0.1,
317 confidence_score_range=[0,100],
318 avoid_same_particles=
False):
319 '''Return a random cross-link dataset as a string.
320 Every line is a residue pair, together with UniqueIdentifier
323 residue_pairs=get_random_residue_pairs(representation, resolution, number_of_cross_links, avoid_same_particles)
325 from random
import random
327 cmin=float(min(confidence_score_range))
328 cmax=float(max(confidence_score_range))
332 for (name1, r1, name2, r2)
in residue_pairs:
333 if random() > ambiguity_probability:
335 score=random()*(cmax-cmin)+cmin
336 dataset+=str(name1)+
" "+str(name2)+
" "+str(r1)+
" "+str(r2)+
" "+str(score)+
" "+str(unique_identifier)+
"\n"
343 def get_cross_link_data(directory, filename, dist, omega, sigma,
344 don=
None, doff=
None, prior=0, type_of_profile=
"gofr"):
346 (distmin, distmax, ndist) = dist
347 (omegamin, omegamax, nomega) = omega
348 (sigmamin, sigmamax, nsigma) = sigma
355 dictionary = eval(line)
358 xpot = dictionary[directory][filename][
"distance"]
359 pot = dictionary[directory][filename][type_of_profile]
361 dist_grid =
get_grid(distmin, distmax, ndist,
False)
362 omega_grid = get_log_grid(omegamin, omegamax, nomega)
363 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
365 if not don
is None and not doff
is None:
387 def get_cross_link_data_from_length(length, xxx_todo_changeme3, xxx_todo_changeme4, xxx_todo_changeme5):
388 (distmin, distmax, ndist) = xxx_todo_changeme3
389 (omegamin, omegamax, nomega) = xxx_todo_changeme4
390 (sigmamin, sigmamax, nsigma) = xxx_todo_changeme5
393 dist_grid =
get_grid(distmin, distmax, ndist,
False)
394 omega_grid = get_log_grid(omegamin, omegamax, nomega)
395 sigma_grid = get_log_grid(sigmamin, sigmamax, nsigma)
401 def get_grid(gmin, gmax, ngrid, boundaries):
403 dx = (gmax - gmin) / float(ngrid)
404 for i
in range(0, ngrid + 1):
405 if(
not boundaries
and i == 0):
407 if(
not boundaries
and i == ngrid):
409 grid.append(gmin + float(i) * dx)
415 def get_log_grid(gmin, gmax, ngrid):
416 from math
import exp, log
418 for i
in range(0, ngrid + 1):
419 grid.append(gmin * exp(float(i) / ngrid * log(gmax / gmin)))
427 example '"{ID_Score}" > 28 AND "{Sample}" ==
428 "%10_1%" OR ":Sample}" == "%10_2%" OR ":Sample}"
429 == "%10_3%" OR ":Sample}" == "%8_1%" OR ":Sample}" == "%8_2%"'
432 import pyparsing
as pp
434 operator = pp.Regex(
">=|<=|!=|>|<|==|in").setName(
"operator")
435 value = pp.QuotedString(
437 r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?")
438 identifier = pp.Word(pp.alphas, pp.alphanums +
"_")
439 comparison_term = identifier | value
440 condition = pp.Group(comparison_term + operator + comparison_term)
442 expr = pp.operatorPrecedence(condition, [
443 (
"OR", 2, pp.opAssoc.LEFT, ),
444 (
"AND", 2, pp.opAssoc.LEFT, ),
447 parsedstring = str(expr.parseString(inputstring)) \
453 .replace(
"{",
"float(entry['") \
454 .replace(
"}",
"'])") \
455 .replace(
":",
"str(entry['") \
456 .replace(
"}",
"'])") \
457 .replace(
"AND",
"and") \
462 def open_file_or_inline_text(filename):
464 fl = open(filename,
"r")
466 fl = filename.split(
"\n")
473 for i
in range(0, len(prot0) - 1):
474 for j
in range(i + 1, len(prot0)):
477 drmsd += (dist0 - dist1) ** 2
479 return math.sqrt(drmsd / npairs)
484 def get_ids_from_fasta_file(fastafile):
486 ff = open(fastafile,
"r")
495 this function works with plain hierarchies, as read from the pdb,
496 no multi-scale hierarchies
503 atom_type=IMP.atom.AT_CA)
511 print(
"get_closest_residue_position: exiting while loop without result")
513 p = sel.get_selected_particles()
518 print(
"get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (hier, resindex))
519 raise Exception(
"get_closest_residue_position: got NO residues for hierarchy %s and residue %i" % (
522 raise ValueError(
"got multiple residues for hierarchy %s and residue %i; the list of particles is %s" % (hier, resindex, str([pp.get_name()
for pp
in p])))
526 Get the xyz position of the terminal residue at the given resolution.
527 @param hier hierarchy containing the terminal residue
528 @param terminus either 'N' or 'C'
529 @param resolution resolution to use.
537 if max(residues) >= termresidue
and not termresidue
is None:
538 termresidue = max(residues)
540 elif termresidue
is None:
541 termresidue = max(residues)
543 elif terminus ==
"N":
544 if min(residues) <= termresidue
and not termresidue
is None:
545 termresidue = min(residues)
547 elif termresidue
is None:
548 termresidue = min(residues)
551 raise ValueError(
"terminus argument should be either N or C")
558 Return the residue index gaps and contiguous segments in the hierarchy.
560 @param hierarchy hierarchy to examine
561 @param start first residue index
562 @param end last residue index
564 @return A list of lists of the form
565 [[1,100,"cont"],[101,120,"gap"],[121,200,"cont"]]
568 for n, rindex
in enumerate(range(start, end + 1)):
570 atom_type=IMP.atom.AT_CA)
572 if len(sel.get_selected_particles()) == 0:
576 rindexcont = start - 1
577 if rindexgap == rindex - 1:
583 gaps.append([rindex, rindex,
"gap"])
589 rindexgap = start - 1
591 if rindexcont == rindex - 1:
598 gaps.append([rindex, rindex,
"cont"])
609 def set_map_element(self, xvalue, yvalue):
610 self.map[xvalue] = yvalue
612 def get_map_element(self, invalue):
613 if type(invalue) == float:
617 dist = (invalue - x) * (invalue - x)
626 return self.map[minx]
627 elif type(invalue) == str:
628 return self.map[invalue]
630 raise TypeError(
"wrong type for map")
636 selection_arguments=
None,
638 name_is_ambiguous=
False,
642 representation_type=
None):
644 this function uses representation=SimplifiedModel
645 it returns the corresponding selected particles
646 representation_type="Beads", "Res:X", "Densities", "Representation", "Molecule"
649 if resolution
is None:
651 resolution_particles =
None
652 hierarchies_particles =
None
653 names_particles =
None
654 residue_range_particles =
None
655 residue_particles =
None
656 representation_type_particles =
None
658 if not resolution
is None:
659 resolution_particles = []
660 hs = representation.get_hierarchies_at_given_resolution(resolution)
664 if not hierarchies
is None:
665 hierarchies_particles = []
666 for h
in hierarchies:
671 if name_is_ambiguous:
672 for namekey
in representation.hier_dict:
675 representation.hier_dict[namekey])
676 elif name
in representation.hier_dict:
679 print(
"select: component %s is not there" % name)
681 if not first_residue
is None and not last_residue
is None:
683 residue_indexes=range(first_residue, last_residue + 1))
685 for p
in sel.get_selected_particles()]
687 if not residue
is None:
690 for p
in sel.get_selected_particles()]
692 if not representation_type
is None:
693 representation_type_particles = []
694 if representation_type ==
"Molecule":
695 for name
in representation.hier_representation:
696 for repr_type
in representation.hier_representation[name]:
697 if repr_type ==
"Beads" or "Res:" in repr_type:
698 h = representation.hier_representation[name][repr_type]
701 elif representation_type ==
"PDB":
702 for name
in representation.hier_representation:
703 for repr_type
in representation.hier_representation[name]:
704 if repr_type ==
"Res:" in repr_type:
705 h = representation.hier_representation[name][repr_type]
709 for name
in representation.hier_representation:
710 h = representation.hier_representation[
715 selections = [hierarchies_particles, names_particles,
716 residue_range_particles, residue_particles, representation_type_particles]
718 if resolution
is None:
719 selected_particles = set(allparticles)
721 selected_particles = set(resolution_particles)
725 selected_particles = (set(s) & selected_particles)
727 return list(selected_particles)
734 name_is_ambiguous=
False):
735 if isinstance(tupleselection, tuple)
and len(tupleselection) == 3:
737 name=tupleselection[2],
738 first_residue=tupleselection[0],
739 last_residue=tupleselection[1],
740 name_is_ambiguous=name_is_ambiguous)
741 elif isinstance(tupleselection, str):
744 name_is_ambiguous=name_is_ambiguous)
746 raise ValueError(
'you passed something bad to select_by_tuple()')
748 particles = IMP.pmi.tools.sort_by_residues(particles)
753 def get_db_from_csv(csvfilename):
756 csvr = csv.DictReader(open(csvfilename,
"rU"))
763 """Store the representations for a system."""
768 self.root_hierarchy_dict = {}
769 self.preroot_fragment_hierarchy_dict = {}
770 self.particle_to_name = {}
773 def add_name(self, name):
774 if name
not in self.db:
777 def add_residue_number(self, name, resn):
780 if resn
not in self.db[name]:
781 self.db[name][resn] = {}
783 def add_resolution(self, name, resn, resolution):
785 resolution = float(resolution)
787 self.add_residue_number(name, resn)
788 if resolution
not in self.db[name][resn]:
789 self.db[name][resn][resolution] = []
793 resolution = float(resolution)
795 self.add_residue_number(name, resn)
796 self.add_resolution(name, resn, resolution)
797 self.db[name][resn][resolution] += particles
799 (rh, prf) = self.get_root_hierarchy(p)
800 self.root_hierarchy_dict[p] = rh
801 self.preroot_fragment_hierarchy_dict[p] = prf
802 self.particle_to_name[p] = name
803 if self.model
is None:
804 self.model = particles[0].get_model()
810 names = list(self.db.keys())
816 resolution = float(resolution)
817 return self.db[name][resn][resolution]
819 def get_particles_at_closest_resolution(self, name, resn, resolution):
821 resolution = float(resolution)
822 closestres = min(self.get_residue_resolutions(name, resn),
823 key=
lambda x: abs(float(x) - float(resolution)))
824 return self.get_particles(name, resn, closestres)
826 def get_residue_resolutions(self, name, resn):
828 resolutions = list(self.db[name][resn].keys())
832 def get_molecule_resolutions(self, name):
834 for resn
in self.db[name]:
835 resolutions.update(list(self.db[name][resn].keys()))
839 def get_residue_numbers(self, name):
840 residue_numbers = list(self.db[name].keys())
841 residue_numbers.sort()
842 return residue_numbers
844 def get_particles_by_resolution(self, name, resolution):
845 resolution = float(resolution)
847 for resn
in self.get_residue_numbers(name):
848 result = self.get_particles_at_closest_resolution(
852 pstemp = [p
for p
in result
if p
not in particles]
856 def get_all_particles_by_resolution(self, resolution):
857 resolution = float(resolution)
859 for name
in self.get_names():
860 particles += self.get_particles_by_resolution(name, resolution)
863 def get_root_hierarchy(self, particle):
864 prerootfragment = particle
874 prerootfragment = particle
880 def get_all_root_hierarchies_by_resolution(self, resolution):
882 resolution = float(resolution)
883 particles = self.get_all_particles_by_resolution(resolution)
885 rh = self.root_hierarchy_dict[p]
886 if rh
not in hierarchies:
890 def get_preroot_fragments_by_resolution(self, name, resolution):
892 resolution = float(resolution)
893 particles = self.get_particles_by_resolution(name, resolution)
895 fr = self.preroot_fragment_hierarchy_dict[p]
896 if fr
not in fragments:
900 def show(self, name):
902 for resn
in self.get_residue_numbers(name):
904 for resolution
in self.get_residue_resolutions(name, resn):
905 print(
"----", resolution)
906 for p
in self.get_particles(name, resn, resolution):
907 print(
"--------", p.get_name())
911 '''Return the component name provided a particle and a list of names'''
913 protname = root.get_name()
915 while not protname
in list_of_names:
916 root0 = root.get_parent()
919 protname = root0.get_name()
924 if "Beads" in protname:
927 return (protname, is_a_bead)
932 Retrieve the residue indexes for the given particle.
934 The particle must be an instance of Fragment,Residue or Atom
945 print(
"get_residue_indexes> input is not Fragment, Residue or Atom")
950 def sort_by_residues(particles):
953 sorted_particles_residues = sorted(
955 key=
lambda tup: tup[1])
956 particles = [p[0]
for p
in sorted_particles_residues]
960 def get_residue_to_particle_map(particles):
962 particles = sort_by_residues(particles)
965 return dict(zip(particles_residues, particles))
973 """Synchronize data over a parallel run"""
974 from mpi4py
import MPI
975 comm = MPI.COMM_WORLD
976 rank = comm.Get_rank()
977 number_of_processes = comm.size
980 comm.send(data, dest=0, tag=11)
983 for i
in range(1, number_of_processes):
984 data_tmp = comm.recv(source=i, tag=11)
985 if type(data) == list:
987 elif type(data) == dict:
988 data.update(data_tmp)
990 raise TypeError(
"data not supported, use list or dictionaries")
992 for i
in range(1, number_of_processes):
993 comm.send(data, dest=i, tag=11)
996 data = comm.recv(source=0, tag=11)
1000 """Synchronize data over a parallel run"""
1001 from mpi4py
import MPI
1002 comm = MPI.COMM_WORLD
1003 rank = comm.Get_rank()
1004 number_of_processes = comm.size
1007 comm.send(data, dest=0, tag=11)
1009 for i
in range(1, number_of_processes):
1010 data_tmp = comm.recv(source=i, tag=11)
1012 data[k]+=data_tmp[k]
1014 for i
in range(1, number_of_processes):
1015 comm.send(data, dest=i, tag=11)
1018 data = comm.recv(source=0, tag=11)
1028 Yield all sublists of length >= lmin and <= lmax
1036 for j
in range(i + 1, n):
1037 if len(l[i:j]) <= lmax
and len(l[i:j]) >= lmin:
1041 def flatten_list(l):
1042 return [item
for sublist
in l
for item
in sublist]
1046 """ Yield successive length-sized chunks from a list.
1048 for i
in range(0, len(list), length):
1049 yield list[i:i + length]
1052 def chunk_list_into_segments(seq, num):
1054 avg = len(seq) / float(num)
1058 while last < len(seq):
1059 out.append(seq[int(last):int(last + avg)])
1071 Apply a translation to a hierarchy along the input vector.
1075 if type(translation_vector) == list:
1094 def translate_hierarchies(hierarchies, translation_vector):
1095 for h
in hierarchies:
1099 def translate_hierarchies_to_reference_frame(hierarchies):
1104 for h
in hierarchies:
1114 IMP.pmi.tools.translate_hierarchies(hierarchies, (-xc, -yc, -zc))
1121 def normal_density_function(expected_value, sigma, x):
1123 1 / math.sqrt(2 * math.pi) / sigma *
1124 math.exp(-(x - expected_value) ** 2 / 2 / sigma / sigma)
1128 def log_normal_density_function(expected_value, sigma, x):
1130 1 / math.sqrt(2 * math.pi) / sigma / x *
1131 math.exp(-(math.log(x / expected_value) ** 2 / 2 / sigma / sigma))
1135 def get_random_residue_pairs(representation, resolution,
1137 avoid_same_particles=
False,
1140 from random
import choice
1143 names=list(representation.hier_dict.keys())
1146 prot = representation.hier_dict[name]
1147 particles +=
select(representation,name=name,resolution=resolution)
1148 random_residue_pairs = []
1149 while len(random_residue_pairs)<=number:
1150 p1 = choice(particles)
1151 p2 = choice(particles)
1152 if p1==p2
and avoid_same_particles:
continue
1155 name1 = representation.get_prot_name_from_particle(p1)
1156 name2 = representation.get_prot_name_from_particle(p2)
1157 random_residue_pairs.append((name1, r1, name2, r2))
1159 return random_residue_pairs
1162 def get_random_data_point(
1168 begin_end_nbins_tuple,
1173 begin = begin_end_nbins_tuple[0]
1174 end = begin_end_nbins_tuple[1]
1175 nbins = begin_end_nbins_tuple[2]
1178 fmod_grid =
get_grid(begin, end, nbins,
True)
1180 fmod_grid = get_log_grid(begin, end, nbins)
1187 for i
in range(0, ntrials):
1188 a.append([random.random(),
True])
1191 for j
in range(1, len(fmod_grid)):
1193 fjm1 = fmod_grid[j - 1]
1197 pj = normal_density_function(expected_value, sigma, fj)
1198 pjm1 = normal_density_function(expected_value, sigma, fjm1)
1200 pj = log_normal_density_function(expected_value, sigma, fj)
1201 pjm1 = log_normal_density_function(expected_value, sigma, fjm1)
1203 norm += (pj + pjm1) / 2.0 * df
1209 for i
in range(len(cumul)):
1212 if (aa[0] <= cumul[i] / norm
and aa[1]):
1213 random_points.append(
1214 int(fmod_grid[i] / sensitivity) * sensitivity)
1218 random_points = [expected_value] * ntrials
1220 for i
in range(len(random_points)):
1221 if random.random() < outlierprob:
1222 a = random.uniform(begin, end)
1223 random_points[i] = int(a / sensitivity) * sensitivity
1224 print(random_points)
1226 for i in range(ntrials):
1227 if random.random() > OUTLIERPROB_:
1228 r=truncnorm.rvs(0.0,1.0,expected_value,BETA_)
1229 if r>1.0: print r,expected_value,BETA_
1232 random_points.append(int(r/sensitivity)*sensitivity)
1237 for r
in random_points:
1241 rmean /= float(ntrials)
1242 rmean2 /= float(ntrials)
1243 stddev = math.sqrt(max(rmean2 - rmean * rmean, 0.))
1244 return rmean, stddev
1246 is_already_printed = {}
1249 def print_deprecation_warning(old_name, new_name):
1250 if old_name
not in is_already_printed:
1251 print(
"WARNING: " + old_name +
" is deprecated, use " + new_name +
" instead")
1252 is_already_printed[old_name] =
True
1255 def print_multicolumn(list_of_strings, ncolumns=2, truncate=40):
1261 for i
in range(len(l) % cols):
1264 split = [l[i:i + len(l) / cols]
for i
in range(0, len(l), len(l) / cols)]
1265 for row
in zip(*split):
1266 print(
"".join(str.ljust(i, truncate)
for i
in row))
1270 '''Read dssp file and get secondary structure information.
1271 Values are all PDB residue numbering.
1272 @return dict of sel tuples
1273 helix : [ [ ['A',5,7] ] , [['B',15,17]] , ...] two helices A:5-7,B:15-17
1274 beta : [ [ ['A',1,3] , ['A',100,102] ] , ...] one sheet: A:1-3 & A:100-102
1275 loop : same format as helix, it's the contiguous loops
1278 from collections
import defaultdict
1281 sses = {
'helix': [],
1284 helix_classes =
'GHI'
1285 strand_classes =
'EB'
1286 loop_classes = [
' ',
'',
'T',
'S']
1288 for h
in helix_classes:
1289 sse_dict[h] =
'helix'
1290 for s
in strand_classes:
1291 sse_dict[s] =
'beta'
1292 for l
in loop_classes:
1293 sse_dict[l] =
'loop'
1298 beta_dict = defaultdict(list)
1302 for line
in open(dssp_fn,
'r'):
1303 fields = line.split()
1307 if fields[1] ==
"RESIDUE":
1315 elif limit_to_chains !=
'' and line[11]
not in limit_to_chains:
1320 pdb_res_num = int(line[5:10])
1321 chain =
'chain' + line[11]
1326 if prev_sstype
is None:
1327 cur_sse = [pdb_res_num, pdb_res_num, chain]
1328 elif sstype != prev_sstype
or chain_break:
1330 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1331 sses[sse_dict[prev_sstype]].append([cur_sse])
1332 if sse_dict[prev_sstype] ==
'beta':
1333 beta_dict[prev_beta_id].append(cur_sse)
1334 cur_sse = [pdb_res_num, pdb_res_num, chain]
1336 cur_sse[1] = pdb_res_num
1341 prev_sstype = sstype
1342 prev_beta_id = beta_id
1345 if not prev_sstype
is None:
1346 if sse_dict[prev_sstype]
in [
'helix',
'loop']:
1347 sses[sse_dict[prev_sstype]].append([cur_sse])
1348 if sse_dict[prev_sstype] ==
'beta':
1349 beta_dict[prev_beta_id].append(cur_sse)
1352 for beta_sheet
in beta_dict:
1353 sses[
'beta'].append(beta_dict[beta_sheet])
1359 ''' get chimera command to check if you've correctly made the dssp dictionary
1360 colors each helix and beta sheet'''
1362 'helix':
'color green ',
1363 'beta':
'color blue ',
1364 'loop':
'color red '}
1365 for skey
in dssp_dict.keys():
1366 for sgroup
in dssp_dict[skey]:
1368 start, stop, chain = sse
1369 chain = chain.strip(
'chain')
1371 skey] +=
'#%i:%s-%s.%s ' % (chimera_model_num, start, stop, chain)
1372 print(
'; '.join([cmds[k]
for k
in cmds]))
1378 '''Change color code to hexadecimal to rgb'''
1380 self._NUMERALS =
'0123456789abcdefABCDEF'
1381 self._HEXDEC = dict((v, int(v, 16))
for v
in (x+y
for x
in self._NUMERALS
for y
in self._NUMERALS))
1382 self.LOWERCASE, self.UPPERCASE =
'x',
'X'
1384 def rgb(self,triplet):
1385 return float(self._HEXDEC[triplet[0:2]]), float(self._HEXDEC[triplet[2:4]]), float(self._HEXDEC[triplet[4:6]])
1387 def triplet(self,rgb, lettercase=None):
1388 if lettercase
is None: lettercase=self.LOWERCASE
1389 return format(rgb[0]<<16 | rgb[1]<<8 | rgb[2],
'06'+lettercase)
1392 class OrderedSet(collections.MutableSet):
1394 def __init__(self, iterable=None):
1396 end += [
None, end, end]
1398 if iterable
is not None:
1402 return len(self.map)
1404 def __contains__(self, key):
1405 return key
in self.map
1408 if key
not in self.map:
1411 curr[2] = end[1] = self.map[key] = [key, curr, end]
1413 def discard(self, key):
1415 key, prev, next = self.map.pop(key)
1422 while curr
is not end:
1426 def __reversed__(self):
1429 while curr
is not end:
1433 def pop(self, last=True):
1435 raise KeyError(
'set is empty')
1437 key = self.end[1][0]
1439 key = self.end[2][0]
1445 return '%s()' % (self.__class__.__name__,)
1446 return '%s(%r)' % (self.__class__.__name__, list(self))
1448 def __eq__(self, other):
1449 if isinstance(other, OrderedSet):
1450 return len(self) == len(other)
and list(self) == list(other)
1451 return set(self) == set(other)
A decorator to associate a particle with a part of a protein/DNA/RNA.
static Weight setup_particle(kernel::Model *m, ParticleIndex pi)
Ints get_index(const kernel::ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
ParticlesTemp get_particles(kernel::Model *m, const ParticleIndexes &ps)
double get_resolution(kernel::Model *m, kernel::ParticleIndex pi)
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static Scale setup_particle(kernel::Model *m, ParticleIndex pi)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Add resolution to a particle.
algebra::GridD< 3, algebra::DenseGridStorageD< 3, float >, float > get_grid(DensityMap *in_map)
The standard decorator for manipulating molecular structures.
A decorator for a particle representing an atom.
void transform(XYZ a, const algebra::Transformation3D &tr)
Apply a transformation to the particle.
Hierarchies get_by_type(Hierarchy mhd, GetByType t)
void add_particles(RMF::FileHandle fh, const kernel::ParticlesTemp &hs)
A decorator for a particle with x,y,z coordinates.
Class to handle individual model particles.
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
A decorator for a residue.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
static bool get_is_setup(kernel::Model *m, kernel::ParticleIndex pi)
static bool get_is_setup(const IMP::kernel::ParticleAdaptor &p)
void show(Hierarchy h, std::ostream &out=std::cout)
Print out a molecular hierarchy.
std::string get_data_path(std::string file_name)
Return the full path to installed data.
A decorator for a rigid body.
void add_particle(RMF::FileHandle fh, kernel::Particle *hs)
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...