IMP logo
IMP Reference Guide  2.7.0
The Integrative Modeling Platform
TBLReader.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.isd.TBLReader
4  Classes to handle TBL files.
5 """
6 
7 from __future__ import print_function
8 import sys
9 import os
10 import IMP.isd
11 from IMP.isd.utils import Load, read_sequence_file
12 #from Isd.io.nomenclature import IUPAC_CONVENTION
13 IUPAC_CONVENTION = 'iupac'
14 TYPE_AMINO_ACID = 'AMINO_ACID'
15 pseudoatoms_dict = IMP.isd.get_data_path('CHARMM_pseudoatoms.dict')
16 
17 
18 def del_comment(x):
19 
20  n = x.find('!')
21 
22  if n >= 0:
23  x = x[:n]
24 
25  return x
26 
27 
28 def check_assigns(x):
29  return 'resid' in x and 'name' in x
30 
31 
32 class TBLReader:
33 
34  atom_dict = {'segid': '',
35  'resid': -1,
36  'name': ''}
37 
38  pseudoatom_char = '*', '%', '#'
39 
40  def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
41 
42  self.sequence = sequence
43  # sequence_match = (a,b) a: NOE numbering, b: sequence numbering
44  self.offset = sequence_match[1] - sequence_match[0]
45  self.ignore = ignore_warnings
46  self.pseudo_dict = Load(pseudoatoms_dict)
47 
48  def extract_contributions(self, contribs):
49 
50  new_contribs = []
51 
52  for c in contribs:
53 
54  if not c:
55  continue
56 
57  c = c[c.find('('):]
58  c = c[:c.rfind(')') + 1]
59 
60  new_contribs.append(c)
61 
62  return new_contribs
63 
64  def split_contribution(self, contrib):
65 
66  words = contrib.split('(')
67  atoms = [word.split(')')[0] for word in words if word]
68 
69  return atoms
70 
71  def resolve_pseudoatom(self, residue_type, atom_name):
72 
73  if '*' in atom_name:
74  char = '*'
75  elif '#' in atom_name:
76  char = '#'
77 
78  atom_name = atom_name.replace(char, '%')
79 
80  # TODO: Assumes that pseudo-atom names are compatible with
81  # IUPAC name, since pseudo_dict can handle IUPAC
82  # names only.
83 
84  try:
85  group = self.pseudo_dict[residue_type][atom_name]
86 
87  except:
88 
89  key = atom_name, residue_type
90 
91  if not key in self.missing_atoms:
92 
93  msg = 'Could not resolve pseudoatom %s.%s.' % (
94  residue_type, atom_name)
95 
96  if self.ignore:
97  print(msg)
98  else:
99  raise KeyError(msg)
100 
101  self.missing_atoms.append(key)
102 
103  return atom_name
104 
105  return group
106 
107  def to_iupac(self, residue_type, atom_name):
108 
109  raise NotImplementedError
110 
111  iupac_name = self.thesaurus.convert_atom(residue_type,
112  atom_name,
113  self.naming_system,
114  IUPAC_CONVENTION,
115  TYPE_AMINO_ACID)
116  try:
117  iupac_name = self.thesaurus.convert_atom(residue_type,
118  atom_name,
119  self.naming_system,
120  IUPAC_CONVENTION,
121  TYPE_AMINO_ACID)
122 
123  except:
124 
125  key = atom_name, residue_type
126 
127  if not key in self.missing_atoms:
128 
129  if '*' in atom_name or '#' in atom_name:
130 
131  msg = 'Pseudoatoms not upported: %s' % atom_name
132 
133  if self.ignore:
134  print(msg)
135 
136  else:
137  raise KeyError(msg)
138 
139  elif self.ignore:
140  msg = 'Warning: atom %s not found in residue %s.' % key
141  print(msg)
142  else:
143  raise KeyError(msg % key)
144 
145  self.missing_atoms.append(key)
146 
147  return atom_name
148 
149  return iupac_name
150 
151  def resolve_dihedral_name(self, atoms):
152 
153  raise NotImplementedError
154 
155  names = [a['name'] for a in atoms]
156 
157  try:
158  res_type = self.sequence[atoms[1]['resid']]
159 
160  except IndexError:
161  print('Residue number overflow in atoms', atoms)
162  return ''
163 
164  for dihedral in self.connectivity[res_type].dihedrals.values():
165 
166  keys = sorted([k for k in dihedral.keys() if 'atom' in k])
167 
168  atom_names = []
169 
170  for k in keys:
171  name = dihedral[k]
172  if name[-1] in ('-', '+'):
173  name = name[:-1]
174 
175  atom_names.append(name)
176 
177  if atom_names == names:
178  return dihedral['name']
179 
180  msg = 'Could not determine name of dihedral angles defined by atoms %s.' % str(
181  names)
182 
183  if self.ignore:
184  print(msg)
185  return ''
186 
187  raise KeyError(msg)
188 
189  def extract_atom(self, a):
190 
191  atom = dict(self.atom_dict)
192 
193  words = a.split()
194 
195  skip_next = False
196 
197  # correct for segid statements
198 
199  words = [x for x in words if x != '"']
200 
201  for i in range(len(words)):
202 
203  if skip_next:
204  skip_next = False
205  continue
206 
207  word = words[i]
208 
209  if word == 'and':
210  continue
211 
212  for key in atom.keys():
213 
214  if key in word:
215 
216  if key == 'segid':
217  atom[key] = words[i + 1][:-1]
218  else:
219  atom[key] = words[i + 1]
220 
221  skip_next = True
222  break
223 
224  else:
225  raise KeyError(
226  'Value or keyword "%s" unknown. Source: "%s", decomposed into "%s"' %
227  (word, str(a), str(words)))
228 
229  atom['resid'] = int(atom['resid']) + self.offset
230  atom['name'] = atom['name'].upper()
231 
232  return atom
233 
234  def build_contributions(self, atoms):
235 
236  groups = []
237 
238  for a in atoms:
239 
240  try:
241  res_type = self.sequence[a['resid']]
242 
243  except IndexError:
244  print('Residue number overflow in atoms', atoms)
245  return []
246 
247  atom_name = a['name']
248 
249  if atom_name[-1] in self.pseudoatom_char:
250  group = self.resolve_pseudoatom(res_type, atom_name)
251 
252  else:
253  #group = [self.to_iupac(res_type, atom_name)]
254  group = [atom_name]
255 
256  groups.append(group)
257 
258  group1, group2 = groups
259 
260  contribs = []
261 
262  res_1 = atoms[0]['resid']
263  res_2 = atoms[1]['resid']
264 
265  for i in range(len(group1)):
266 
267  name_1 = group1[i]
268 
269  for j in range(len(group2)):
270 
271  if (res_1, name_1) != (res_2, group2[j]):
272  contribs.append(((res_1, name_1), (res_2, group2[j])))
273 
274  return contribs
275 
276  def extract_target_values(self, line):
277 
278  end = line.rfind(')')
279 
280  values = line[end + 1:].split()
281 
282  try:
283  distances = [float(x) for x in values[:3]]
284  except:
285  distances = None
286 
287  # read volume from ARIA 1.x restraint files
288 
289  val = line.split('volume=')
290 
291  if len(val) > 1:
292  volume = float(val[1].split()[0].split(',')[0])
293  else:
294 
295  volume = None
296 
297  return distances, volume
298 
299  def read_contents(self, filename):
300 
301  keywords = 'class',
302 
303  filename = os.path.expanduser(filename)
304 
305  f = open(filename)
306  lines = f.readlines()
307  f.close()
308 
309  all = ''
310 
311  for x in lines:
312 
313  x = x.strip()
314 
315  if not x or x[0] == '!':
316  continue
317 
318  not_valid = [kw for kw in keywords if kw in x]
319 
320  if not_valid:
321  continue
322 
323  all += x.lower() + ' '
324 
325  return [x.strip() for x in all.split('assi')]
326 
327  def find_contributions(self, line):
328 
329  contribs = [del_comment(x).strip() for x in line.split('or')]
330 
331  # use alternative parser for implicitly listed atom pairs
332 
333  if 1 in [x.count('resid') for x in contribs]:
334 
335  atoms = []
336 
337  while line:
338 
339  start = line.find('(')
340 
341  if start < 0:
342  atoms[-1][-1] += line
343  break
344 
345  stop = line.find(')')
346 
347  selection = [x.strip()
348  for x in line[start:stop + 1].split('or')]
349 
350  for i in range(len(selection)):
351 
352  val = selection[i]
353 
354  if not '(' in val:
355  val = '(' + val
356 
357  if not ')' in val:
358  val += ')'
359 
360  selection[i] = val
361 
362  atoms.append(selection)
363 
364  line = line[stop + 1:]
365 
366  if len(atoms) != 2:
367  raise
368 
369  # find and isolate target distances
370 
371  l = []
372 
373  for i in range(len(atoms)):
374 
375  g = []
376 
377  for atom in atoms[i]:
378 
379  n = atom.rfind(')')
380 
381  if n >= 0 and len(atom[n + 1:].strip()) > 3:
382  distances = atom[n + 1:].strip()
383  atom = atom[:n + 1]
384 
385  g.append(atom)
386 
387  l.append(g)
388 
389  a, b = l
390 
391  if len(a) > len(b):
392  a, b = b, a
393 
394  contribs = []
395 
396  for i in a:
397  for j in b:
398  contribs.append('%s %s' % (i, j))
399 
400  contribs[0] += ' ' + distances
401 
402  return contribs
403 
404  def create_distance_restraint(self, distances, volume, contributions):
405  if distances is None and volume is None:
406  raise ValueError("could not find either volume or "
407  "distance: %s %s %s" % (distances, volume, contributions))
408  if distances is None:
409  distances = [volume ** (-1. / 6), 0, 0]
410  dist = distances[0]
411  if volume is None:
412  volume = dist ** (-6)
413  lower = dist - distances[1]
414  upper = dist + distances[2]
415  return (tuple(contributions), dist, lower, upper, volume)
416 
417  def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
418  decompose=False):
419  """reads a tbl file and parses distance restraints.
420  """
421 
422  self.naming_system = naming_system
423 
424  assigns = self.read_contents(filename)
425 
426  restraints = []
427  self.missing_atoms = []
428  seq_number = 0
429 
430  for line in assigns:
431 
432  contribs = self.find_contributions(line)
433 
434  if False in [check_assigns(x) for x in contribs]:
435  continue
436 
437  distances, volume = self.extract_target_values(contribs[0])
438 
439  if (distances is None and volume is None):
440  distances, volume = self.extract_target_values(contribs[-1])
441 
442  new_contribs = self.extract_contributions(contribs)
443 
444  contributions = []
445 
446  for contrib in new_contribs:
447 
448  atoms = self.split_contribution(contrib)
449  atoms = [self.extract_atom(x) for x in atoms]
450 
451  contributions += self.build_contributions(atoms)
452 
453  if contributions:
454  r = self.create_distance_restraint(distances, volume,
455  contributions)
456 
457  restraints.append(r)
458  seq_number += 1
459 
460  if restraints:
461 
462  if decompose:
463 
464  d = decompose_restraints(restraints)
465 
466  for _type in d.keys():
467  if not d[_type]:
468  del d[_type]
469 
470  if len(d) > 1:
471  for _type, val in d.items():
472 
473  if val:
474  new_key = key + '_%s' % _type
475  d[new_key] = val
476 
477  del d[_type]
478 
479  else:
480  d = {key: list(d.values())[0]}
481  else:
482  d = {key: restraints}
483 
484  return d
485 
486  def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
487 
488  self.naming_system = naming_system
489 
490  assigns = self.read_contents(filename)
491 
492  restraints = []
493  self.missing_atoms = []
494  seq_number = 0
495 
496  for line in assigns:
497 
498  contribs = [del_comment(x).strip() for x in line.split('or')]
499 
500  values, volume = self.extract_target_values(contribs[0])
501  new_contribs = self.extract_contributions(contribs)
502 
503  if not new_contribs:
504  continue
505 
506  if len(new_contribs) > 1:
507  raise ValueError(
508  'Inconsistency in data file, multiple contributions detected.')
509 
510  atoms = self.split_contribution(new_contribs[0])
511  atoms = [self.extract_atom(x) for x in atoms]
512 
513  name = self.resolve_dihedral_name(atoms)
514 
515  r = create_dihedral_restraint(seq_number, name, values, atoms)
516 
517  restraints.append(r)
518  seq_number += 1
519 
520  if restraints:
521  return restraints
522 
523  def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
524 
525  self.naming_system = naming_system
526 
527  assigns = self.read_contents(filename)
528 
529  restraints = []
530  self.missing_atoms = []
531  seq_number = 0
532 
533  fake_atom_names = ('OO', 'X', 'Y', 'Z')
534 
535  for line in assigns:
536 
537  contribs = [del_comment(x).strip() for x in line.split('or')]
538  distances, volume = self.extract_target_values(contribs[0])
539  new_contribs = self.extract_contributions(contribs)
540 
541  contributions = []
542 
543  for contrib in new_contribs:
544 
545  atoms = self.split_contribution(contrib)
546  atoms = [self.extract_atom(x) for x in atoms]
547 
548  atoms = [a for a in atoms if not a['name'] in fake_atom_names]
549 
550  contributions += self.build_contributions(atoms)
551 
552  if contributions:
553  r = create_rdc_restraint(
554  seq_number,
555  distances[0],
556  contributions)
557 
558  restraints.append(r)
559  seq_number += 1
560 
561  if restraints:
562  return restraints
563 
564 if __name__ == '__main__':
565 
566  noe = 'noe.tbl'
567  sequence = read_sequence_file('seq.dat', first_residue_number=1)
568  reader = TBLReader(sequence, ignore_warnings=True)
569  reader.read_distances(noe, key='test')
Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Miscellaneous utilities.
Definition: utils.py:1
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...