IMP logo
IMP Reference Guide  develop.d97d4ead1f,2024/11/21
The Integrative Modeling Platform
TBLReader.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.isd.TBLReader
4  Classes to handle TBL files.
5 """
6 
7 import os
8 import IMP.isd
9 from IMP.isd.utils import Load, read_sequence_file
10 IUPAC_CONVENTION = 'iupac'
11 TYPE_AMINO_ACID = 'AMINO_ACID'
12 pseudoatoms_dict = IMP.isd.get_data_path('CHARMM_pseudoatoms.dict')
13 
14 
15 def del_comment(x):
16 
17  n = x.find('!')
18 
19  if n >= 0:
20  x = x[:n]
21 
22  return x
23 
24 
25 def check_assigns(x):
26  return 'resid' in x and 'name' in x
27 
28 
29 class TBLReader:
30 
31  atom_dict = {'segid': '',
32  'resid': -1,
33  'name': ''}
34 
35  pseudoatom_char = '*', '%', '#'
36 
37  def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
38 
39  self.sequence = sequence
40  # sequence_match = (a,b) a: NOE numbering, b: sequence numbering
41  self.offset = sequence_match[1] - sequence_match[0]
42  self.ignore = ignore_warnings
43  self.pseudo_dict = Load(pseudoatoms_dict)
44 
45  def extract_contributions(self, contribs):
46 
47  new_contribs = []
48 
49  for c in contribs:
50 
51  if not c:
52  continue
53 
54  c = c[c.find('('):]
55  c = c[:c.rfind(')') + 1]
56 
57  new_contribs.append(c)
58 
59  return new_contribs
60 
61  def split_contribution(self, contrib):
62 
63  words = contrib.split('(')
64  atoms = [word.split(')')[0] for word in words if word]
65 
66  return atoms
67 
68  def resolve_pseudoatom(self, residue_type, atom_name):
69 
70  if '*' in atom_name:
71  char = '*'
72  elif '#' in atom_name:
73  char = '#'
74 
75  atom_name = atom_name.replace(char, '%')
76 
77  # TODO: Assumes that pseudo-atom names are compatible with
78  # IUPAC name, since pseudo_dict can handle IUPAC
79  # names only.
80 
81  try:
82  group = self.pseudo_dict[residue_type][atom_name]
83 
84  except KeyError:
85 
86  key = atom_name, residue_type
87 
88  if key not in self.missing_atoms:
89 
90  msg = 'Could not resolve pseudoatom %s.%s.' % (
91  residue_type, atom_name)
92 
93  if self.ignore:
94  print(msg)
95  else:
96  raise KeyError(msg)
97 
98  self.missing_atoms.append(key)
99 
100  return atom_name
101 
102  return group
103 
104  def to_iupac(self, residue_type, atom_name):
105 
106  raise NotImplementedError
107 
108  iupac_name = self.thesaurus.convert_atom(residue_type,
109  atom_name,
110  self.naming_system,
111  IUPAC_CONVENTION,
112  TYPE_AMINO_ACID)
113  try:
114  iupac_name = self.thesaurus.convert_atom(residue_type,
115  atom_name,
116  self.naming_system,
117  IUPAC_CONVENTION,
118  TYPE_AMINO_ACID)
119 
120  except: # noqa: E722
121 
122  key = atom_name, residue_type
123 
124  if key not in self.missing_atoms:
125 
126  if '*' in atom_name or '#' in atom_name:
127 
128  msg = 'Pseudoatoms not supported: %s' % atom_name
129 
130  if self.ignore:
131  print(msg)
132 
133  else:
134  raise KeyError(msg)
135 
136  elif self.ignore:
137  msg = 'Warning: atom %s not found in residue %s.' % key
138  print(msg)
139  else:
140  raise KeyError(msg % key)
141 
142  self.missing_atoms.append(key)
143 
144  return atom_name
145 
146  return iupac_name
147 
148  def resolve_dihedral_name(self, atoms):
149 
150  raise NotImplementedError
151 
152  names = [a['name'] for a in atoms]
153 
154  try:
155  res_type = self.sequence[atoms[1]['resid']]
156 
157  except IndexError:
158  print('Residue number overflow in atoms', atoms)
159  return ''
160 
161  for dihedral in self.connectivity[res_type].dihedrals.values():
162 
163  keys = sorted([k for k in dihedral.keys() if 'atom' in k])
164 
165  atom_names = []
166 
167  for k in keys:
168  name = dihedral[k]
169  if name[-1] in ('-', '+'):
170  name = name[:-1]
171 
172  atom_names.append(name)
173 
174  if atom_names == names:
175  return dihedral['name']
176 
177  msg = ('Could not determine name of dihedral angles defined '
178  'by atoms %s.' % str(names))
179 
180  if self.ignore:
181  print(msg)
182  return ''
183 
184  raise KeyError(msg)
185 
186  def extract_atom(self, a):
187 
188  atom = dict(self.atom_dict)
189 
190  words = a.split()
191 
192  skip_next = False
193 
194  # correct for segid statements
195 
196  words = [x for x in words if x != '"']
197 
198  for i in range(len(words)):
199 
200  if skip_next:
201  skip_next = False
202  continue
203 
204  word = words[i]
205 
206  if word == 'and':
207  continue
208 
209  for key in atom.keys():
210 
211  if key in word:
212 
213  if key == 'segid':
214  atom[key] = words[i + 1][:-1]
215  else:
216  atom[key] = words[i + 1]
217 
218  skip_next = True
219  break
220 
221  else:
222  raise KeyError(
223  'Value or keyword "%s" unknown. Source: "%s", '
224  'decomposed into "%s"' % (word, str(a), str(words)))
225 
226  atom['resid'] = int(atom['resid']) + self.offset
227  atom['name'] = atom['name'].upper()
228 
229  return atom
230 
231  def build_contributions(self, atoms):
232 
233  groups = []
234 
235  for a in atoms:
236 
237  try:
238  res_type = self.sequence[a['resid']]
239 
240  except IndexError:
241  print('Residue number overflow in atoms', atoms)
242  return []
243 
244  atom_name = a['name']
245 
246  if atom_name[-1] in self.pseudoatom_char:
247  group = self.resolve_pseudoatom(res_type, atom_name)
248 
249  else:
250  # group = [self.to_iupac(res_type, atom_name)]
251  group = [atom_name]
252 
253  groups.append(group)
254 
255  group1, group2 = groups
256 
257  contribs = []
258 
259  res_1 = atoms[0]['resid']
260  res_2 = atoms[1]['resid']
261 
262  for i in range(len(group1)):
263 
264  name_1 = group1[i]
265 
266  for j in range(len(group2)):
267 
268  if (res_1, name_1) != (res_2, group2[j]):
269  contribs.append(((res_1, name_1), (res_2, group2[j])))
270 
271  return contribs
272 
273  def extract_target_values(self, line):
274 
275  end = line.rfind(')')
276 
277  values = line[end + 1:].split()
278 
279  try:
280  distances = [float(x) for x in values[:3]]
281  except: # noqa: E722
282  distances = None
283 
284  # read volume from ARIA 1.x restraint files
285 
286  val = line.split('volume=')
287 
288  if len(val) > 1:
289  volume = float(val[1].split()[0].split(',')[0])
290  else:
291 
292  volume = None
293 
294  return distances, volume
295 
296  def read_contents(self, filename):
297 
298  keywords = 'class',
299 
300  filename = os.path.expanduser(filename)
301 
302  f = open(filename)
303  lines = f.readlines()
304  f.close()
305 
306  all = ''
307 
308  for x in lines:
309 
310  x = x.strip()
311 
312  if not x or x[0] == '!':
313  continue
314 
315  not_valid = [kw for kw in keywords if kw in x]
316 
317  if not_valid:
318  continue
319 
320  all += x.lower() + ' '
321 
322  return [x.strip() for x in all.split('assi')]
323 
324  def find_contributions(self, line):
325 
326  contribs = [del_comment(x).strip() for x in line.split('or')]
327 
328  # use alternative parser for implicitly listed atom pairs
329 
330  if 1 in [x.count('resid') for x in contribs]:
331 
332  atoms = []
333 
334  while line:
335 
336  start = line.find('(')
337 
338  if start < 0:
339  atoms[-1][-1] += line
340  break
341 
342  stop = line.find(')')
343 
344  selection = [x.strip()
345  for x in line[start:stop + 1].split('or')]
346 
347  for i in range(len(selection)):
348 
349  val = selection[i]
350 
351  if '(' not in val:
352  val = '(' + val
353 
354  if ')' not in val:
355  val += ')'
356 
357  selection[i] = val
358 
359  atoms.append(selection)
360 
361  line = line[stop + 1:]
362 
363  if len(atoms) != 2:
364  raise
365 
366  # find and isolate target distances
367 
368  ll = []
369 
370  for i in range(len(atoms)):
371 
372  g = []
373 
374  for atom in atoms[i]:
375 
376  n = atom.rfind(')')
377 
378  if n >= 0 and len(atom[n + 1:].strip()) > 3:
379  distances = atom[n + 1:].strip()
380  atom = atom[:n + 1]
381 
382  g.append(atom)
383 
384  ll.append(g)
385 
386  a, b = ll
387 
388  if len(a) > len(b):
389  a, b = b, a
390 
391  contribs = []
392 
393  for i in a:
394  for j in b:
395  contribs.append('%s %s' % (i, j))
396 
397  contribs[0] += ' ' + distances
398 
399  return contribs
400 
401  def create_distance_restraint(self, distances, volume, contributions):
402  if distances is None and volume is None:
403  raise ValueError("could not find either volume or "
404  "distance: %s %s %s"
405  % (distances, volume, contributions))
406  if distances is None:
407  distances = [volume ** (-1. / 6), 0, 0]
408  dist = distances[0]
409  if volume is None:
410  volume = dist ** (-6)
411  lower = dist - distances[1]
412  upper = dist + distances[2]
413  return (tuple(contributions), dist, lower, upper, volume)
414 
415  def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
416  decompose=False):
417  """reads a tbl file and parses distance restraints.
418  """
419 
420  self.naming_system = naming_system
421 
422  assigns = self.read_contents(filename)
423 
424  restraints = []
425  self.missing_atoms = []
426  seq_number = 0
427 
428  for line in assigns:
429 
430  contribs = self.find_contributions(line)
431 
432  if False in [check_assigns(x) for x in contribs]:
433  continue
434 
435  distances, volume = self.extract_target_values(contribs[0])
436 
437  if (distances is None and volume is None):
438  distances, volume = self.extract_target_values(contribs[-1])
439 
440  new_contribs = self.extract_contributions(contribs)
441 
442  contributions = []
443 
444  for contrib in new_contribs:
445 
446  atoms = self.split_contribution(contrib)
447  atoms = [self.extract_atom(x) for x in atoms]
448 
449  contributions += self.build_contributions(atoms)
450 
451  if contributions:
452  r = self.create_distance_restraint(distances, volume,
453  contributions)
454 
455  restraints.append(r)
456  seq_number += 1
457 
458  if restraints:
459 
460  if decompose:
461 
462  d = decompose_restraints(restraints) # noqa: F821
463 
464  for _type in d.keys():
465  if not d[_type]:
466  del d[_type]
467 
468  if len(d) > 1:
469  for _type, val in d.items():
470 
471  if val:
472  new_key = key + '_%s' % _type
473  d[new_key] = val
474 
475  del d[_type]
476 
477  else:
478  d = {key: list(d.values())[0]}
479  else:
480  d = {key: restraints}
481 
482  return d
483 
484  def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
485 
486  self.naming_system = naming_system
487 
488  assigns = self.read_contents(filename)
489 
490  restraints = []
491  self.missing_atoms = []
492  seq_number = 0
493 
494  for line in assigns:
495 
496  contribs = [del_comment(x).strip() for x in line.split('or')]
497 
498  values, volume = self.extract_target_values(contribs[0])
499  new_contribs = self.extract_contributions(contribs)
500 
501  if not new_contribs:
502  continue
503 
504  if len(new_contribs) > 1:
505  raise ValueError(
506  'Inconsistency in data file, multiple contributions '
507  'detected.')
508 
509  atoms = self.split_contribution(new_contribs[0])
510  atoms = [self.extract_atom(x) for x in atoms]
511 
512  name = self.resolve_dihedral_name(atoms)
513 
514  r = create_dihedral_restraint(seq_number, name, # noqa: F821
515  values, atoms)
516 
517  restraints.append(r)
518  seq_number += 1
519 
520  if restraints:
521  return restraints
522 
523  def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
524 
525  self.naming_system = naming_system
526 
527  assigns = self.read_contents(filename)
528 
529  restraints = []
530  self.missing_atoms = []
531  seq_number = 0
532 
533  fake_atom_names = ('OO', 'X', 'Y', 'Z')
534 
535  for line in assigns:
536 
537  contribs = [del_comment(x).strip() for x in line.split('or')]
538  distances, volume = self.extract_target_values(contribs[0])
539  new_contribs = self.extract_contributions(contribs)
540 
541  contributions = []
542 
543  for contrib in new_contribs:
544 
545  atoms = self.split_contribution(contrib)
546  atoms = [self.extract_atom(x) for x in atoms]
547 
548  atoms = [a for a in atoms if not a['name'] in fake_atom_names]
549 
550  contributions += self.build_contributions(atoms)
551 
552  if contributions:
553  r = create_rdc_restraint( # noqa: F821
554  seq_number,
555  distances[0],
556  contributions)
557 
558  restraints.append(r)
559  seq_number += 1
560 
561  if restraints:
562  return restraints
563 
564 
565 if __name__ == '__main__':
566 
567  noe = 'noe.tbl'
568  sequence = read_sequence_file('seq.dat', first_residue_number=1)
569  reader = TBLReader(sequence, ignore_warnings=True)
570  reader.read_distances(noe, key='test')
Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Miscellaneous utilities.
Definition: utils.py:1
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...