3 """@namespace IMP.isd.TBLReader
4 Classes to handle TBL files.
7 from __future__
import print_function
11 IUPAC_CONVENTION =
'iupac'
12 TYPE_AMINO_ACID =
'AMINO_ACID'
27 return 'resid' in x
and 'name' in x
32 atom_dict = {
'segid':
'',
36 pseudoatom_char =
'*',
'%',
'#'
38 def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
40 self.sequence = sequence
42 self.offset = sequence_match[1] - sequence_match[0]
43 self.ignore = ignore_warnings
44 self.pseudo_dict = Load(pseudoatoms_dict)
46 def extract_contributions(self, contribs):
56 c = c[:c.rfind(
')') + 1]
58 new_contribs.append(c)
62 def split_contribution(self, contrib):
64 words = contrib.split(
'(')
65 atoms = [word.split(
')')[0]
for word
in words
if word]
69 def resolve_pseudoatom(self, residue_type, atom_name):
73 elif '#' in atom_name:
76 atom_name = atom_name.replace(char,
'%')
83 group = self.pseudo_dict[residue_type][atom_name]
87 key = atom_name, residue_type
89 if key
not in self.missing_atoms:
91 msg =
'Could not resolve pseudoatom %s.%s.' % (
92 residue_type, atom_name)
99 self.missing_atoms.append(key)
105 def to_iupac(self, residue_type, atom_name):
107 raise NotImplementedError
109 iupac_name = self.thesaurus.convert_atom(residue_type,
115 iupac_name = self.thesaurus.convert_atom(residue_type,
123 key = atom_name, residue_type
125 if key
not in self.missing_atoms:
127 if '*' in atom_name
or '#' in atom_name:
129 msg =
'Pseudoatoms not supported: %s' % atom_name
138 msg =
'Warning: atom %s not found in residue %s.' % key
141 raise KeyError(msg % key)
143 self.missing_atoms.append(key)
149 def resolve_dihedral_name(self, atoms):
151 raise NotImplementedError
153 names = [a[
'name']
for a
in atoms]
156 res_type = self.sequence[atoms[1][
'resid']]
159 print(
'Residue number overflow in atoms', atoms)
162 for dihedral
in self.connectivity[res_type].dihedrals.values():
164 keys = sorted([k
for k
in dihedral.keys()
if 'atom' in k])
170 if name[-1]
in (
'-',
'+'):
173 atom_names.append(name)
175 if atom_names == names:
176 return dihedral[
'name']
178 msg = (
'Could not determine name of dihedral angles defined '
179 'by atoms %s.' % str(names))
187 def extract_atom(self, a):
189 atom = dict(self.atom_dict)
197 words = [x
for x
in words
if x !=
'"']
199 for i
in range(len(words)):
210 for key
in atom.keys():
215 atom[key] = words[i + 1][:-1]
217 atom[key] = words[i + 1]
224 'Value or keyword "%s" unknown. Source: "%s", '
225 'decomposed into "%s"' % (word, str(a), str(words)))
227 atom[
'resid'] = int(atom[
'resid']) + self.offset
228 atom[
'name'] = atom[
'name'].upper()
232 def build_contributions(self, atoms):
239 res_type = self.sequence[a[
'resid']]
242 print(
'Residue number overflow in atoms', atoms)
245 atom_name = a[
'name']
247 if atom_name[-1]
in self.pseudoatom_char:
248 group = self.resolve_pseudoatom(res_type, atom_name)
256 group1, group2 = groups
260 res_1 = atoms[0][
'resid']
261 res_2 = atoms[1][
'resid']
263 for i
in range(len(group1)):
267 for j
in range(len(group2)):
269 if (res_1, name_1) != (res_2, group2[j]):
270 contribs.append(((res_1, name_1), (res_2, group2[j])))
274 def extract_target_values(self, line):
276 end = line.rfind(
')')
278 values = line[end + 1:].split()
281 distances = [float(x)
for x
in values[:3]]
287 val = line.split(
'volume=')
290 volume = float(val[1].split()[0].split(
',')[0])
295 return distances, volume
297 def read_contents(self, filename):
301 filename = os.path.expanduser(filename)
304 lines = f.readlines()
313 if not x
or x[0] ==
'!':
316 not_valid = [kw
for kw
in keywords
if kw
in x]
321 all += x.lower() +
' '
323 return [x.strip()
for x
in all.split(
'assi')]
325 def find_contributions(self, line):
327 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
331 if 1
in [x.count(
'resid')
for x
in contribs]:
337 start = line.find(
'(')
340 atoms[-1][-1] += line
343 stop = line.find(
')')
345 selection = [x.strip()
346 for x
in line[start:stop + 1].split(
'or')]
348 for i
in range(len(selection)):
360 atoms.append(selection)
362 line = line[stop + 1:]
371 for i
in range(len(atoms)):
375 for atom
in atoms[i]:
379 if n >= 0
and len(atom[n + 1:].strip()) > 3:
380 distances = atom[n + 1:].strip()
396 contribs.append(
'%s %s' % (i, j))
398 contribs[0] +=
' ' + distances
403 if distances
is None and volume
is None:
404 raise ValueError(
"could not find either volume or "
406 % (distances, volume, contributions))
407 if distances
is None:
408 distances = [volume ** (-1. / 6), 0, 0]
411 volume = dist ** (-6)
412 lower = dist - distances[1]
413 upper = dist + distances[2]
414 return (tuple(contributions), dist, lower, upper, volume)
416 def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
418 """reads a tbl file and parses distance restraints.
421 self.naming_system = naming_system
423 assigns = self.read_contents(filename)
426 self.missing_atoms = []
431 contribs = self.find_contributions(line)
433 if False in [check_assigns(x)
for x
in contribs]:
436 distances, volume = self.extract_target_values(contribs[0])
438 if (distances
is None and volume
is None):
439 distances, volume = self.extract_target_values(contribs[-1])
441 new_contribs = self.extract_contributions(contribs)
445 for contrib
in new_contribs:
447 atoms = self.split_contribution(contrib)
448 atoms = [self.extract_atom(x)
for x
in atoms]
450 contributions += self.build_contributions(atoms)
453 r = self.create_distance_restraint(distances, volume,
463 d = decompose_restraints(restraints)
465 for _type
in d.keys():
470 for _type, val
in d.items():
473 new_key = key +
'_%s' % _type
479 d = {key: list(d.values())[0]}
481 d = {key: restraints}
485 def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
487 self.naming_system = naming_system
489 assigns = self.read_contents(filename)
492 self.missing_atoms = []
497 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
499 values, volume = self.extract_target_values(contribs[0])
500 new_contribs = self.extract_contributions(contribs)
505 if len(new_contribs) > 1:
507 'Inconsistency in data file, multiple contributions '
510 atoms = self.split_contribution(new_contribs[0])
511 atoms = [self.extract_atom(x)
for x
in atoms]
513 name = self.resolve_dihedral_name(atoms)
515 r = create_dihedral_restraint(seq_number, name,
524 def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
526 self.naming_system = naming_system
528 assigns = self.read_contents(filename)
531 self.missing_atoms = []
534 fake_atom_names = (
'OO',
'X',
'Y',
'Z')
538 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
539 distances, volume = self.extract_target_values(contribs[0])
540 new_contribs = self.extract_contributions(contribs)
544 for contrib
in new_contribs:
546 atoms = self.split_contribution(contrib)
547 atoms = [self.extract_atom(x)
for x
in atoms]
549 atoms = [a
for a
in atoms
if not a[
'name']
in fake_atom_names]
551 contributions += self.build_contributions(atoms)
554 r = create_rdc_restraint(
566 if __name__ ==
'__main__':
569 sequence = read_sequence_file(
'seq.dat', first_residue_number=1)
570 reader = TBLReader(sequence, ignore_warnings=
True)
571 reader.read_distances(noe, key=
'test')
Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...