3 """@namespace IMP.isd.TBLReader
4 Classes to handle TBL files.
7 from __future__
import print_function
13 IUPAC_CONVENTION =
'iupac'
14 TYPE_AMINO_ACID =
'AMINO_ACID'
29 return 'resid' in x
and 'name' in x
34 atom_dict = {
'segid':
'',
38 pseudoatom_char =
'*',
'%',
'#'
40 def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
42 self.sequence = sequence
44 self.offset = sequence_match[1] - sequence_match[0]
45 self.ignore = ignore_warnings
46 self.pseudo_dict = Load(pseudoatoms_dict)
48 def extract_contributions(self, contribs):
58 c = c[:c.rfind(
')') + 1]
60 new_contribs.append(c)
64 def split_contribution(self, contrib):
66 words = contrib.split(
'(')
67 atoms = [word.split(
')')[0]
for word
in words
if word]
71 def resolve_pseudoatom(self, residue_type, atom_name):
75 elif '#' in atom_name:
78 atom_name = atom_name.replace(char,
'%')
85 group = self.pseudo_dict[residue_type][atom_name]
89 key = atom_name, residue_type
91 if not key
in self.missing_atoms:
93 msg =
'Could not resolve pseudoatom %s.%s.' % (
94 residue_type, atom_name)
101 self.missing_atoms.append(key)
107 def to_iupac(self, residue_type, atom_name):
109 raise NotImplementedError
111 iupac_name = self.thesaurus.convert_atom(residue_type,
117 iupac_name = self.thesaurus.convert_atom(residue_type,
125 key = atom_name, residue_type
127 if not key
in self.missing_atoms:
129 if '*' in atom_name
or '#' in atom_name:
131 msg =
'Pseudoatoms not upported: %s' % atom_name
140 msg =
'Warning: atom %s not found in residue %s.' % key
143 raise KeyError(msg % key)
145 self.missing_atoms.append(key)
151 def resolve_dihedral_name(self, atoms):
153 raise NotImplementedError
155 names = [a[
'name']
for a
in atoms]
158 res_type = self.sequence[atoms[1][
'resid']]
161 print(
'Residue number overflow in atoms', atoms)
164 for dihedral
in self.connectivity[res_type].dihedrals.values():
166 keys = sorted([k
for k
in dihedral.keys()
if 'atom' in k])
172 if name[-1]
in (
'-',
'+'):
175 atom_names.append(name)
177 if atom_names == names:
178 return dihedral[
'name']
180 msg =
'Could not determine name of dihedral angles defined by atoms %s.' % str(
189 def extract_atom(self, a):
191 atom = dict(self.atom_dict)
199 words = [x
for x
in words
if x !=
'"']
201 for i
in range(len(words)):
212 for key
in atom.keys():
217 atom[key] = words[i + 1][:-1]
219 atom[key] = words[i + 1]
226 'Value or keyword "%s" unknown. Source: "%s", decomposed into "%s"' %
227 (word, str(a), str(words)))
229 atom[
'resid'] = int(atom[
'resid']) + self.offset
230 atom[
'name'] = atom[
'name'].upper()
234 def build_contributions(self, atoms):
241 res_type = self.sequence[a[
'resid']]
244 print(
'Residue number overflow in atoms', atoms)
247 atom_name = a[
'name']
249 if atom_name[-1]
in self.pseudoatom_char:
250 group = self.resolve_pseudoatom(res_type, atom_name)
258 group1, group2 = groups
262 res_1 = atoms[0][
'resid']
263 res_2 = atoms[1][
'resid']
265 for i
in range(len(group1)):
269 for j
in range(len(group2)):
271 if (res_1, name_1) != (res_2, group2[j]):
272 contribs.append(((res_1, name_1), (res_2, group2[j])))
276 def extract_target_values(self, line):
278 end = line.rfind(
')')
280 values = line[end + 1:].split()
283 distances = [float(x)
for x
in values[:3]]
289 val = line.split(
'volume=')
292 volume = float(val[1].split()[0].split(
',')[0])
297 return distances, volume
299 def read_contents(self, filename):
303 filename = os.path.expanduser(filename)
306 lines = f.readlines()
315 if not x
or x[0] ==
'!':
318 not_valid = [kw
for kw
in keywords
if kw
in x]
323 all += x.lower() +
' '
325 return [x.strip()
for x
in all.split(
'assi')]
327 def find_contributions(self, line):
329 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
333 if 1
in [x.count(
'resid')
for x
in contribs]:
339 start = line.find(
'(')
342 atoms[-1][-1] += line
345 stop = line.find(
')')
347 selection = [x.strip()
348 for x
in line[start:stop + 1].split(
'or')]
350 for i
in range(len(selection)):
362 atoms.append(selection)
364 line = line[stop + 1:]
373 for i
in range(len(atoms)):
377 for atom
in atoms[i]:
381 if n >= 0
and len(atom[n + 1:].strip()) > 3:
382 distances = atom[n + 1:].strip()
398 contribs.append(
'%s %s' % (i, j))
400 contribs[0] +=
' ' + distances
405 if distances
is None and volume
is None:
406 raise ValueError(
"could not find either volume or "
407 "distance: %s %s %s" % (distances, volume, contributions))
408 if distances
is None:
409 distances = [volume ** (-1. / 6), 0, 0]
412 volume = dist ** (-6)
413 lower = dist - distances[1]
414 upper = dist + distances[2]
415 return (tuple(contributions), dist, lower, upper, volume)
417 def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
419 """reads a tbl file and parses distance restraints.
422 self.naming_system = naming_system
424 assigns = self.read_contents(filename)
427 self.missing_atoms = []
432 contribs = self.find_contributions(line)
434 if False in [check_assigns(x)
for x
in contribs]:
437 distances, volume = self.extract_target_values(contribs[0])
439 if (distances
is None and volume
is None):
440 distances, volume = self.extract_target_values(contribs[-1])
442 new_contribs = self.extract_contributions(contribs)
446 for contrib
in new_contribs:
448 atoms = self.split_contribution(contrib)
449 atoms = [self.extract_atom(x)
for x
in atoms]
451 contributions += self.build_contributions(atoms)
454 r = self.create_distance_restraint(distances, volume,
464 d = decompose_restraints(restraints)
466 for _type
in d.keys():
471 for _type, val
in d.items():
474 new_key = key +
'_%s' % _type
480 d = {key: list(d.values())[0]}
482 d = {key: restraints}
486 def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
488 self.naming_system = naming_system
490 assigns = self.read_contents(filename)
493 self.missing_atoms = []
498 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
500 values, volume = self.extract_target_values(contribs[0])
501 new_contribs = self.extract_contributions(contribs)
506 if len(new_contribs) > 1:
508 'Inconsistency in data file, multiple contributions detected.')
510 atoms = self.split_contribution(new_contribs[0])
511 atoms = [self.extract_atom(x)
for x
in atoms]
513 name = self.resolve_dihedral_name(atoms)
515 r = create_dihedral_restraint(seq_number, name, values, atoms)
523 def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
525 self.naming_system = naming_system
527 assigns = self.read_contents(filename)
530 self.missing_atoms = []
533 fake_atom_names = (
'OO',
'X',
'Y',
'Z')
537 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
538 distances, volume = self.extract_target_values(contribs[0])
539 new_contribs = self.extract_contributions(contribs)
543 for contrib
in new_contribs:
545 atoms = self.split_contribution(contrib)
546 atoms = [self.extract_atom(x)
for x
in atoms]
548 atoms = [a
for a
in atoms
if not a[
'name']
in fake_atom_names]
550 contributions += self.build_contributions(atoms)
553 r = create_rdc_restraint(
564 if __name__ ==
'__main__':
567 sequence = read_sequence_file(
'seq.dat', first_residue_number=1)
568 reader = TBLReader(sequence, ignore_warnings=
True)
569 reader.read_distances(noe, key=
'test')
kernel::Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to installed data.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...