IMP logo
IMP Reference Guide  develop.b3a5ae88fa,2024/05/04
The Integrative Modeling Platform
TBLReader.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.isd.TBLReader
4  Classes to handle TBL files.
5 """
6 
7 from __future__ import print_function
8 import os
9 import IMP.isd
10 from IMP.isd.utils import Load, read_sequence_file
11 IUPAC_CONVENTION = 'iupac'
12 TYPE_AMINO_ACID = 'AMINO_ACID'
13 pseudoatoms_dict = IMP.isd.get_data_path('CHARMM_pseudoatoms.dict')
14 
15 
16 def del_comment(x):
17 
18  n = x.find('!')
19 
20  if n >= 0:
21  x = x[:n]
22 
23  return x
24 
25 
26 def check_assigns(x):
27  return 'resid' in x and 'name' in x
28 
29 
30 class TBLReader:
31 
32  atom_dict = {'segid': '',
33  'resid': -1,
34  'name': ''}
35 
36  pseudoatom_char = '*', '%', '#'
37 
38  def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
39 
40  self.sequence = sequence
41  # sequence_match = (a,b) a: NOE numbering, b: sequence numbering
42  self.offset = sequence_match[1] - sequence_match[0]
43  self.ignore = ignore_warnings
44  self.pseudo_dict = Load(pseudoatoms_dict)
45 
46  def extract_contributions(self, contribs):
47 
48  new_contribs = []
49 
50  for c in contribs:
51 
52  if not c:
53  continue
54 
55  c = c[c.find('('):]
56  c = c[:c.rfind(')') + 1]
57 
58  new_contribs.append(c)
59 
60  return new_contribs
61 
62  def split_contribution(self, contrib):
63 
64  words = contrib.split('(')
65  atoms = [word.split(')')[0] for word in words if word]
66 
67  return atoms
68 
69  def resolve_pseudoatom(self, residue_type, atom_name):
70 
71  if '*' in atom_name:
72  char = '*'
73  elif '#' in atom_name:
74  char = '#'
75 
76  atom_name = atom_name.replace(char, '%')
77 
78  # TODO: Assumes that pseudo-atom names are compatible with
79  # IUPAC name, since pseudo_dict can handle IUPAC
80  # names only.
81 
82  try:
83  group = self.pseudo_dict[residue_type][atom_name]
84 
85  except KeyError:
86 
87  key = atom_name, residue_type
88 
89  if key not in self.missing_atoms:
90 
91  msg = 'Could not resolve pseudoatom %s.%s.' % (
92  residue_type, atom_name)
93 
94  if self.ignore:
95  print(msg)
96  else:
97  raise KeyError(msg)
98 
99  self.missing_atoms.append(key)
100 
101  return atom_name
102 
103  return group
104 
105  def to_iupac(self, residue_type, atom_name):
106 
107  raise NotImplementedError
108 
109  iupac_name = self.thesaurus.convert_atom(residue_type,
110  atom_name,
111  self.naming_system,
112  IUPAC_CONVENTION,
113  TYPE_AMINO_ACID)
114  try:
115  iupac_name = self.thesaurus.convert_atom(residue_type,
116  atom_name,
117  self.naming_system,
118  IUPAC_CONVENTION,
119  TYPE_AMINO_ACID)
120 
121  except: # noqa: E722
122 
123  key = atom_name, residue_type
124 
125  if key not in self.missing_atoms:
126 
127  if '*' in atom_name or '#' in atom_name:
128 
129  msg = 'Pseudoatoms not supported: %s' % atom_name
130 
131  if self.ignore:
132  print(msg)
133 
134  else:
135  raise KeyError(msg)
136 
137  elif self.ignore:
138  msg = 'Warning: atom %s not found in residue %s.' % key
139  print(msg)
140  else:
141  raise KeyError(msg % key)
142 
143  self.missing_atoms.append(key)
144 
145  return atom_name
146 
147  return iupac_name
148 
149  def resolve_dihedral_name(self, atoms):
150 
151  raise NotImplementedError
152 
153  names = [a['name'] for a in atoms]
154 
155  try:
156  res_type = self.sequence[atoms[1]['resid']]
157 
158  except IndexError:
159  print('Residue number overflow in atoms', atoms)
160  return ''
161 
162  for dihedral in self.connectivity[res_type].dihedrals.values():
163 
164  keys = sorted([k for k in dihedral.keys() if 'atom' in k])
165 
166  atom_names = []
167 
168  for k in keys:
169  name = dihedral[k]
170  if name[-1] in ('-', '+'):
171  name = name[:-1]
172 
173  atom_names.append(name)
174 
175  if atom_names == names:
176  return dihedral['name']
177 
178  msg = ('Could not determine name of dihedral angles defined '
179  'by atoms %s.' % str(names))
180 
181  if self.ignore:
182  print(msg)
183  return ''
184 
185  raise KeyError(msg)
186 
187  def extract_atom(self, a):
188 
189  atom = dict(self.atom_dict)
190 
191  words = a.split()
192 
193  skip_next = False
194 
195  # correct for segid statements
196 
197  words = [x for x in words if x != '"']
198 
199  for i in range(len(words)):
200 
201  if skip_next:
202  skip_next = False
203  continue
204 
205  word = words[i]
206 
207  if word == 'and':
208  continue
209 
210  for key in atom.keys():
211 
212  if key in word:
213 
214  if key == 'segid':
215  atom[key] = words[i + 1][:-1]
216  else:
217  atom[key] = words[i + 1]
218 
219  skip_next = True
220  break
221 
222  else:
223  raise KeyError(
224  'Value or keyword "%s" unknown. Source: "%s", '
225  'decomposed into "%s"' % (word, str(a), str(words)))
226 
227  atom['resid'] = int(atom['resid']) + self.offset
228  atom['name'] = atom['name'].upper()
229 
230  return atom
231 
232  def build_contributions(self, atoms):
233 
234  groups = []
235 
236  for a in atoms:
237 
238  try:
239  res_type = self.sequence[a['resid']]
240 
241  except IndexError:
242  print('Residue number overflow in atoms', atoms)
243  return []
244 
245  atom_name = a['name']
246 
247  if atom_name[-1] in self.pseudoatom_char:
248  group = self.resolve_pseudoatom(res_type, atom_name)
249 
250  else:
251  # group = [self.to_iupac(res_type, atom_name)]
252  group = [atom_name]
253 
254  groups.append(group)
255 
256  group1, group2 = groups
257 
258  contribs = []
259 
260  res_1 = atoms[0]['resid']
261  res_2 = atoms[1]['resid']
262 
263  for i in range(len(group1)):
264 
265  name_1 = group1[i]
266 
267  for j in range(len(group2)):
268 
269  if (res_1, name_1) != (res_2, group2[j]):
270  contribs.append(((res_1, name_1), (res_2, group2[j])))
271 
272  return contribs
273 
274  def extract_target_values(self, line):
275 
276  end = line.rfind(')')
277 
278  values = line[end + 1:].split()
279 
280  try:
281  distances = [float(x) for x in values[:3]]
282  except: # noqa: E722
283  distances = None
284 
285  # read volume from ARIA 1.x restraint files
286 
287  val = line.split('volume=')
288 
289  if len(val) > 1:
290  volume = float(val[1].split()[0].split(',')[0])
291  else:
292 
293  volume = None
294 
295  return distances, volume
296 
297  def read_contents(self, filename):
298 
299  keywords = 'class',
300 
301  filename = os.path.expanduser(filename)
302 
303  f = open(filename)
304  lines = f.readlines()
305  f.close()
306 
307  all = ''
308 
309  for x in lines:
310 
311  x = x.strip()
312 
313  if not x or x[0] == '!':
314  continue
315 
316  not_valid = [kw for kw in keywords if kw in x]
317 
318  if not_valid:
319  continue
320 
321  all += x.lower() + ' '
322 
323  return [x.strip() for x in all.split('assi')]
324 
325  def find_contributions(self, line):
326 
327  contribs = [del_comment(x).strip() for x in line.split('or')]
328 
329  # use alternative parser for implicitly listed atom pairs
330 
331  if 1 in [x.count('resid') for x in contribs]:
332 
333  atoms = []
334 
335  while line:
336 
337  start = line.find('(')
338 
339  if start < 0:
340  atoms[-1][-1] += line
341  break
342 
343  stop = line.find(')')
344 
345  selection = [x.strip()
346  for x in line[start:stop + 1].split('or')]
347 
348  for i in range(len(selection)):
349 
350  val = selection[i]
351 
352  if '(' not in val:
353  val = '(' + val
354 
355  if ')' not in val:
356  val += ')'
357 
358  selection[i] = val
359 
360  atoms.append(selection)
361 
362  line = line[stop + 1:]
363 
364  if len(atoms) != 2:
365  raise
366 
367  # find and isolate target distances
368 
369  ll = []
370 
371  for i in range(len(atoms)):
372 
373  g = []
374 
375  for atom in atoms[i]:
376 
377  n = atom.rfind(')')
378 
379  if n >= 0 and len(atom[n + 1:].strip()) > 3:
380  distances = atom[n + 1:].strip()
381  atom = atom[:n + 1]
382 
383  g.append(atom)
384 
385  ll.append(g)
386 
387  a, b = ll
388 
389  if len(a) > len(b):
390  a, b = b, a
391 
392  contribs = []
393 
394  for i in a:
395  for j in b:
396  contribs.append('%s %s' % (i, j))
397 
398  contribs[0] += ' ' + distances
399 
400  return contribs
401 
402  def create_distance_restraint(self, distances, volume, contributions):
403  if distances is None and volume is None:
404  raise ValueError("could not find either volume or "
405  "distance: %s %s %s"
406  % (distances, volume, contributions))
407  if distances is None:
408  distances = [volume ** (-1. / 6), 0, 0]
409  dist = distances[0]
410  if volume is None:
411  volume = dist ** (-6)
412  lower = dist - distances[1]
413  upper = dist + distances[2]
414  return (tuple(contributions), dist, lower, upper, volume)
415 
416  def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
417  decompose=False):
418  """reads a tbl file and parses distance restraints.
419  """
420 
421  self.naming_system = naming_system
422 
423  assigns = self.read_contents(filename)
424 
425  restraints = []
426  self.missing_atoms = []
427  seq_number = 0
428 
429  for line in assigns:
430 
431  contribs = self.find_contributions(line)
432 
433  if False in [check_assigns(x) for x in contribs]:
434  continue
435 
436  distances, volume = self.extract_target_values(contribs[0])
437 
438  if (distances is None and volume is None):
439  distances, volume = self.extract_target_values(contribs[-1])
440 
441  new_contribs = self.extract_contributions(contribs)
442 
443  contributions = []
444 
445  for contrib in new_contribs:
446 
447  atoms = self.split_contribution(contrib)
448  atoms = [self.extract_atom(x) for x in atoms]
449 
450  contributions += self.build_contributions(atoms)
451 
452  if contributions:
453  r = self.create_distance_restraint(distances, volume,
454  contributions)
455 
456  restraints.append(r)
457  seq_number += 1
458 
459  if restraints:
460 
461  if decompose:
462 
463  d = decompose_restraints(restraints) # noqa: F821
464 
465  for _type in d.keys():
466  if not d[_type]:
467  del d[_type]
468 
469  if len(d) > 1:
470  for _type, val in d.items():
471 
472  if val:
473  new_key = key + '_%s' % _type
474  d[new_key] = val
475 
476  del d[_type]
477 
478  else:
479  d = {key: list(d.values())[0]}
480  else:
481  d = {key: restraints}
482 
483  return d
484 
485  def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
486 
487  self.naming_system = naming_system
488 
489  assigns = self.read_contents(filename)
490 
491  restraints = []
492  self.missing_atoms = []
493  seq_number = 0
494 
495  for line in assigns:
496 
497  contribs = [del_comment(x).strip() for x in line.split('or')]
498 
499  values, volume = self.extract_target_values(contribs[0])
500  new_contribs = self.extract_contributions(contribs)
501 
502  if not new_contribs:
503  continue
504 
505  if len(new_contribs) > 1:
506  raise ValueError(
507  'Inconsistency in data file, multiple contributions '
508  'detected.')
509 
510  atoms = self.split_contribution(new_contribs[0])
511  atoms = [self.extract_atom(x) for x in atoms]
512 
513  name = self.resolve_dihedral_name(atoms)
514 
515  r = create_dihedral_restraint(seq_number, name, # noqa: F821
516  values, atoms)
517 
518  restraints.append(r)
519  seq_number += 1
520 
521  if restraints:
522  return restraints
523 
524  def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
525 
526  self.naming_system = naming_system
527 
528  assigns = self.read_contents(filename)
529 
530  restraints = []
531  self.missing_atoms = []
532  seq_number = 0
533 
534  fake_atom_names = ('OO', 'X', 'Y', 'Z')
535 
536  for line in assigns:
537 
538  contribs = [del_comment(x).strip() for x in line.split('or')]
539  distances, volume = self.extract_target_values(contribs[0])
540  new_contribs = self.extract_contributions(contribs)
541 
542  contributions = []
543 
544  for contrib in new_contribs:
545 
546  atoms = self.split_contribution(contrib)
547  atoms = [self.extract_atom(x) for x in atoms]
548 
549  atoms = [a for a in atoms if not a['name'] in fake_atom_names]
550 
551  contributions += self.build_contributions(atoms)
552 
553  if contributions:
554  r = create_rdc_restraint( # noqa: F821
555  seq_number,
556  distances[0],
557  contributions)
558 
559  restraints.append(r)
560  seq_number += 1
561 
562  if restraints:
563  return restraints
564 
565 
566 if __name__ == '__main__':
567 
568  noe = 'noe.tbl'
569  sequence = read_sequence_file('seq.dat', first_residue_number=1)
570  reader = TBLReader(sequence, ignore_warnings=True)
571  reader.read_distances(noe, key='test')
Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Miscellaneous utilities.
Definition: utils.py:1
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...