3 """@namespace IMP.isd.TBLReader
4 Classes to handle TBL files.
10 IUPAC_CONVENTION =
'iupac'
11 TYPE_AMINO_ACID =
'AMINO_ACID'
26 return 'resid' in x
and 'name' in x
31 atom_dict = {
'segid':
'',
35 pseudoatom_char =
'*',
'%',
'#'
37 def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
39 self.sequence = sequence
41 self.offset = sequence_match[1] - sequence_match[0]
42 self.ignore = ignore_warnings
43 self.pseudo_dict = Load(pseudoatoms_dict)
45 def extract_contributions(self, contribs):
55 c = c[:c.rfind(
')') + 1]
57 new_contribs.append(c)
61 def split_contribution(self, contrib):
63 words = contrib.split(
'(')
64 atoms = [word.split(
')')[0]
for word
in words
if word]
68 def resolve_pseudoatom(self, residue_type, atom_name):
72 elif '#' in atom_name:
75 atom_name = atom_name.replace(char,
'%')
82 group = self.pseudo_dict[residue_type][atom_name]
86 key = atom_name, residue_type
88 if key
not in self.missing_atoms:
90 msg =
'Could not resolve pseudoatom %s.%s.' % (
91 residue_type, atom_name)
98 self.missing_atoms.append(key)
104 def to_iupac(self, residue_type, atom_name):
106 raise NotImplementedError
108 iupac_name = self.thesaurus.convert_atom(residue_type,
114 iupac_name = self.thesaurus.convert_atom(residue_type,
122 key = atom_name, residue_type
124 if key
not in self.missing_atoms:
126 if '*' in atom_name
or '#' in atom_name:
128 msg =
'Pseudoatoms not supported: %s' % atom_name
137 msg =
'Warning: atom %s not found in residue %s.' % key
140 raise KeyError(msg % key)
142 self.missing_atoms.append(key)
148 def resolve_dihedral_name(self, atoms):
150 raise NotImplementedError
152 names = [a[
'name']
for a
in atoms]
155 res_type = self.sequence[atoms[1][
'resid']]
158 print(
'Residue number overflow in atoms', atoms)
161 for dihedral
in self.connectivity[res_type].dihedrals.values():
163 keys = sorted([k
for k
in dihedral.keys()
if 'atom' in k])
169 if name[-1]
in (
'-',
'+'):
172 atom_names.append(name)
174 if atom_names == names:
175 return dihedral[
'name']
177 msg = (
'Could not determine name of dihedral angles defined '
178 'by atoms %s.' % str(names))
186 def extract_atom(self, a):
188 atom = dict(self.atom_dict)
196 words = [x
for x
in words
if x !=
'"']
198 for i
in range(len(words)):
209 for key
in atom.keys():
214 atom[key] = words[i + 1][:-1]
216 atom[key] = words[i + 1]
223 'Value or keyword "%s" unknown. Source: "%s", '
224 'decomposed into "%s"' % (word, str(a), str(words)))
226 atom[
'resid'] = int(atom[
'resid']) + self.offset
227 atom[
'name'] = atom[
'name'].upper()
231 def build_contributions(self, atoms):
238 res_type = self.sequence[a[
'resid']]
241 print(
'Residue number overflow in atoms', atoms)
244 atom_name = a[
'name']
246 if atom_name[-1]
in self.pseudoatom_char:
247 group = self.resolve_pseudoatom(res_type, atom_name)
255 group1, group2 = groups
259 res_1 = atoms[0][
'resid']
260 res_2 = atoms[1][
'resid']
262 for i
in range(len(group1)):
266 for j
in range(len(group2)):
268 if (res_1, name_1) != (res_2, group2[j]):
269 contribs.append(((res_1, name_1), (res_2, group2[j])))
273 def extract_target_values(self, line):
275 end = line.rfind(
')')
277 values = line[end + 1:].split()
280 distances = [float(x)
for x
in values[:3]]
286 val = line.split(
'volume=')
289 volume = float(val[1].split()[0].split(
',')[0])
294 return distances, volume
296 def read_contents(self, filename):
300 filename = os.path.expanduser(filename)
303 lines = f.readlines()
312 if not x
or x[0] ==
'!':
315 not_valid = [kw
for kw
in keywords
if kw
in x]
320 all += x.lower() +
' '
322 return [x.strip()
for x
in all.split(
'assi')]
324 def find_contributions(self, line):
326 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
330 if 1
in [x.count(
'resid')
for x
in contribs]:
336 start = line.find(
'(')
339 atoms[-1][-1] += line
342 stop = line.find(
')')
344 selection = [x.strip()
345 for x
in line[start:stop + 1].split(
'or')]
347 for i
in range(len(selection)):
359 atoms.append(selection)
361 line = line[stop + 1:]
370 for i
in range(len(atoms)):
374 for atom
in atoms[i]:
378 if n >= 0
and len(atom[n + 1:].strip()) > 3:
379 distances = atom[n + 1:].strip()
395 contribs.append(
'%s %s' % (i, j))
397 contribs[0] +=
' ' + distances
402 if distances
is None and volume
is None:
403 raise ValueError(
"could not find either volume or "
405 % (distances, volume, contributions))
406 if distances
is None:
407 distances = [volume ** (-1. / 6), 0, 0]
410 volume = dist ** (-6)
411 lower = dist - distances[1]
412 upper = dist + distances[2]
413 return (tuple(contributions), dist, lower, upper, volume)
415 def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
417 """reads a tbl file and parses distance restraints.
420 self.naming_system = naming_system
422 assigns = self.read_contents(filename)
425 self.missing_atoms = []
430 contribs = self.find_contributions(line)
432 if False in [check_assigns(x)
for x
in contribs]:
435 distances, volume = self.extract_target_values(contribs[0])
437 if (distances
is None and volume
is None):
438 distances, volume = self.extract_target_values(contribs[-1])
440 new_contribs = self.extract_contributions(contribs)
444 for contrib
in new_contribs:
446 atoms = self.split_contribution(contrib)
447 atoms = [self.extract_atom(x)
for x
in atoms]
449 contributions += self.build_contributions(atoms)
452 r = self.create_distance_restraint(distances, volume,
462 d = decompose_restraints(restraints)
464 for _type
in d.keys():
469 for _type, val
in d.items():
472 new_key = key +
'_%s' % _type
478 d = {key: list(d.values())[0]}
480 d = {key: restraints}
484 def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
486 self.naming_system = naming_system
488 assigns = self.read_contents(filename)
491 self.missing_atoms = []
496 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
498 values, volume = self.extract_target_values(contribs[0])
499 new_contribs = self.extract_contributions(contribs)
504 if len(new_contribs) > 1:
506 'Inconsistency in data file, multiple contributions '
509 atoms = self.split_contribution(new_contribs[0])
510 atoms = [self.extract_atom(x)
for x
in atoms]
512 name = self.resolve_dihedral_name(atoms)
514 r = create_dihedral_restraint(seq_number, name,
523 def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
525 self.naming_system = naming_system
527 assigns = self.read_contents(filename)
530 self.missing_atoms = []
533 fake_atom_names = (
'OO',
'X',
'Y',
'Z')
537 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
538 distances, volume = self.extract_target_values(contribs[0])
539 new_contribs = self.extract_contributions(contribs)
543 for contrib
in new_contribs:
545 atoms = self.split_contribution(contrib)
546 atoms = [self.extract_atom(x)
for x
in atoms]
548 atoms = [a
for a
in atoms
if not a[
'name']
in fake_atom_names]
550 contributions += self.build_contributions(atoms)
553 r = create_rdc_restraint(
565 if __name__ ==
'__main__':
568 sequence = read_sequence_file(
'seq.dat', first_residue_number=1)
569 reader = TBLReader(sequence, ignore_warnings=
True)
570 reader.read_distances(noe, key=
'test')
Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...