IMP  2.3.0
The Integrative Modeling Platform
TBLReader.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.isd.TBLReader
4  Classes to handle TBL files.
5 """
6 
7 import sys
8 import os
9 import IMP.isd
10 from IMP.isd.utils import Load, read_sequence_file
11 #from Isd.io.nomenclature import IUPAC_CONVENTION
12 IUPAC_CONVENTION = 'iupac'
13 TYPE_AMINO_ACID = 'AMINO_ACID'
14 pseudoatoms_dict = IMP.isd.get_data_path('CHARMM_pseudoatoms.dict')
15 
16 
17 def del_comment(x):
18 
19  n = x.find('!')
20 
21  if n >= 0:
22  x = x[:n]
23 
24  return x
25 
26 
27 def check_assigns(x):
28  return 'resid' in x and 'name' in x
29 
30 
31 class TBLReader:
32 
33  atom_dict = {'segid': '',
34  'resid': -1,
35  'name': ''}
36 
37  pseudoatom_char = '*', '%', '#'
38 
39  def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
40 
41  self.sequence = sequence
42  # sequence_match = (a,b) a: NOE numbering, b: sequence numbering
43  self.offset = sequence_match[1] - sequence_match[0]
44  self.ignore = ignore_warnings
45  self.pseudo_dict = Load(pseudoatoms_dict)
46 
47  def extract_contributions(self, contribs):
48 
49  new_contribs = []
50 
51  for c in contribs:
52 
53  if not c:
54  continue
55 
56  c = c[c.find('('):]
57  c = c[:c.rfind(')') + 1]
58 
59  new_contribs.append(c)
60 
61  return new_contribs
62 
63  def split_contribution(self, contrib):
64 
65  words = contrib.split('(')
66  atoms = [word.split(')')[0] for word in words if word]
67 
68  return atoms
69 
70  def resolve_pseudoatom(self, residue_type, atom_name):
71 
72  if '*' in atom_name:
73  char = '*'
74  elif '#' in atom_name:
75  char = '#'
76 
77  atom_name = atom_name.replace(char, '%')
78 
79  # TODO: Assumes that pseudo-atom names are compatible with
80  # IUPAC name, since pseudo_dict can handle IUPAC
81  # names only.
82 
83  try:
84  group = self.pseudo_dict[residue_type][atom_name]
85 
86  except:
87 
88  key = atom_name, residue_type
89 
90  if not key in self.missing_atoms:
91 
92  msg = 'Could not resolve pseudoatom %s.%s.' % (
93  residue_type, atom_name)
94 
95  if self.ignore:
96  print msg
97  else:
98  raise KeyError(msg)
99 
100  self.missing_atoms.append(key)
101 
102  return atom_name
103 
104  return group
105 
106  def to_iupac(self, residue_type, atom_name):
107 
108  raise NotImplementedError
109 
110  iupac_name = self.thesaurus.convert_atom(residue_type,
111  atom_name,
112  self.naming_system,
113  IUPAC_CONVENTION,
114  TYPE_AMINO_ACID)
115  try:
116  iupac_name = self.thesaurus.convert_atom(residue_type,
117  atom_name,
118  self.naming_system,
119  IUPAC_CONVENTION,
120  TYPE_AMINO_ACID)
121 
122  except:
123 
124  key = atom_name, residue_type
125 
126  if not key in self.missing_atoms:
127 
128  if '*' in atom_name or '#' in atom_name:
129 
130  msg = 'Pseudoatoms not upported: %s' % atom_name
131 
132  if self.ignore:
133  print msg
134 
135  else:
136  raise KeyError(msg)
137 
138  elif self.ignore:
139  msg = 'Warning: atom %s not found in residue %s.' % key
140  print msg
141  else:
142  raise KeyError(msg % key)
143 
144  self.missing_atoms.append(key)
145 
146  return atom_name
147 
148  return iupac_name
149 
150  def resolve_dihedral_name(self, atoms):
151 
152  raise NotImplementedError
153 
154  names = [a['name'] for a in atoms]
155 
156  try:
157  res_type = self.sequence[atoms[1]['resid']]
158 
159  except IndexError:
160  print 'Residue number overflow in atoms', atoms
161  return ''
162 
163  for dihedral in self.connectivity[res_type].dihedrals.values():
164 
165  keys = sorted([k for k in dihedral.keys() if 'atom' in k])
166 
167  atom_names = []
168 
169  for k in keys:
170  name = dihedral[k]
171  if name[-1] in ('-', '+'):
172  name = name[:-1]
173 
174  atom_names.append(name)
175 
176  if atom_names == names:
177  return dihedral['name']
178 
179  msg = 'Could not determine name of dihedral angles defined by atoms %s.' % str(
180  names)
181 
182  if self.ignore:
183  print msg
184  return ''
185 
186  raise KeyError(msg)
187 
188  def extract_atom(self, a):
189 
190  atom = dict(self.atom_dict)
191 
192  words = a.split()
193 
194  skip_next = False
195 
196  # correct for segid statements
197 
198  words = [x for x in words if x != '"']
199 
200  for i in range(len(words)):
201 
202  if skip_next:
203  skip_next = False
204  continue
205 
206  word = words[i]
207 
208  if word == 'and':
209  continue
210 
211  for key in atom.keys():
212 
213  if key in word:
214 
215  if key == 'segid':
216  atom[key] = words[i + 1][:-1]
217  else:
218  atom[key] = words[i + 1]
219 
220  skip_next = True
221  break
222 
223  else:
224  raise KeyError(
225  'Value or keyword "%s" unknown. Source: "%s", decomposed into "%s"' %
226  (word, str(a), str(words)))
227 
228  atom['resid'] = int(atom['resid']) + self.offset
229  atom['name'] = atom['name'].upper()
230 
231  return atom
232 
233  def build_contributions(self, atoms):
234 
235  groups = []
236 
237  for a in atoms:
238 
239  try:
240  res_type = self.sequence[a['resid']]
241 
242  except IndexError:
243  print 'Residue number overflow in atoms', atoms
244  return []
245 
246  atom_name = a['name']
247 
248  if atom_name[-1] in self.pseudoatom_char:
249  group = self.resolve_pseudoatom(res_type, atom_name)
250 
251  else:
252  #group = [self.to_iupac(res_type, atom_name)]
253  group = [atom_name]
254 
255  groups.append(group)
256 
257  group1, group2 = groups
258 
259  contribs = []
260 
261  res_1 = atoms[0]['resid']
262  res_2 = atoms[1]['resid']
263 
264  for i in range(len(group1)):
265 
266  name_1 = group1[i]
267 
268  for j in range(len(group2)):
269 
270  if (res_1, name_1) != (res_2, group2[j]):
271  contribs.append(((res_1, name_1), (res_2, group2[j])))
272 
273  return contribs
274 
275  def extract_target_values(self, line):
276 
277  end = line.rfind(')')
278 
279  values = line[end + 1:].split()
280 
281  try:
282  distances = [float(x) for x in values[:3]]
283  except:
284  distances = None
285 
286  # read volume from ARIA 1.x restraint files
287 
288  val = line.split('volume=')
289 
290  if len(val) > 1:
291  volume = float(val[1].split()[0].split(',')[0])
292  else:
293 
294  volume = None
295 
296  return distances, volume
297 
298  def read_contents(self, filename):
299 
300  keywords = 'class',
301 
302  filename = os.path.expanduser(filename)
303 
304  f = open(filename)
305  lines = f.readlines()
306  f.close()
307 
308  all = ''
309 
310  for x in lines:
311 
312  x = x.strip()
313 
314  if not x or x[0] == '!':
315  continue
316 
317  not_valid = [kw for kw in keywords if kw in x]
318 
319  if not_valid:
320  continue
321 
322  all += x.lower() + ' '
323 
324  return [x.strip() for x in all.split('assi')]
325 
326  def find_contributions(self, line):
327 
328  contribs = [del_comment(x).strip() for x in line.split('or')]
329 
330  # use alternative parser for implicitly listed atom pairs
331 
332  if 1 in [x.count('resid') for x in contribs]:
333 
334  atoms = []
335 
336  while line:
337 
338  start = line.find('(')
339 
340  if start < 0:
341  atoms[-1][-1] += line
342  break
343 
344  stop = line.find(')')
345 
346  selection = [x.strip()
347  for x in line[start:stop + 1].split('or')]
348 
349  for i in range(len(selection)):
350 
351  val = selection[i]
352 
353  if not '(' in val:
354  val = '(' + val
355 
356  if not ')' in val:
357  val += ')'
358 
359  selection[i] = val
360 
361  atoms.append(selection)
362 
363  line = line[stop + 1:]
364 
365  if len(atoms) != 2:
366  raise
367 
368  # find and isolate target distances
369 
370  l = []
371 
372  for i in range(len(atoms)):
373 
374  g = []
375 
376  for atom in atoms[i]:
377 
378  n = atom.rfind(')')
379 
380  if n >= 0 and len(atom[n + 1:].strip()) > 3:
381  distances = atom[n + 1:].strip()
382  atom = atom[:n + 1]
383 
384  g.append(atom)
385 
386  l.append(g)
387 
388  a, b = l
389 
390  if len(a) > len(b):
391  a, b = b, a
392 
393  contribs = []
394 
395  for i in a:
396  for j in b:
397  contribs.append('%s %s' % (i, j))
398 
399  contribs[0] += ' ' + distances
400 
401  return contribs
402 
403  def create_distance_restraint(self, distances, volume, contributions):
404  if distances is None and volume is None:
405  raise ValueError("could not find either volume or "
406  "distance: %s %s %s" % (distances, volume, contributions))
407  if distances is None:
408  distances = [volume ** (-1. / 6), 0, 0]
409  dist = distances[0]
410  if volume is None:
411  volume = dist ** (-6)
412  lower = dist - distances[1]
413  upper = dist + distances[2]
414  return (tuple(contributions), dist, lower, upper, volume)
415 
416  def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
417  decompose=False):
418  """reads a tbl file and parses distance restraints.
419  """
420 
421  self.naming_system = naming_system
422 
423  assigns = self.read_contents(filename)
424 
425  restraints = []
426  self.missing_atoms = []
427  seq_number = 0
428 
429  for line in assigns:
430 
431  contribs = self.find_contributions(line)
432 
433  if False in [check_assigns(x) for x in contribs]:
434  continue
435 
436  distances, volume = self.extract_target_values(contribs[0])
437 
438  if (distances is None and volume is None):
439  distances, volume = self.extract_target_values(contribs[-1])
440 
441  new_contribs = self.extract_contributions(contribs)
442 
443  contributions = []
444 
445  for contrib in new_contribs:
446 
447  atoms = self.split_contribution(contrib)
448  atoms = [self.extract_atom(x) for x in atoms]
449 
450  contributions += self.build_contributions(atoms)
451 
452  if contributions:
453  r = self.create_distance_restraint(distances, volume,
454  contributions)
455 
456  restraints.append(r)
457  seq_number += 1
458 
459  if restraints:
460 
461  if decompose:
462 
463  d = decompose_restraints(restraints)
464 
465  for _type in d.keys():
466  if not d[_type]:
467  del d[_type]
468 
469  if len(d) > 1:
470  for _type, val in d.items():
471 
472  if val:
473  new_key = key + '_%s' % _type
474  d[new_key] = val
475 
476  del d[_type]
477 
478  else:
479  d = {key: d.values()[0]}
480  else:
481  d = {key: restraints}
482 
483  return d
484 
485  def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
486 
487  self.naming_system = naming_system
488 
489  assigns = self.read_contents(filename)
490 
491  restraints = []
492  self.missing_atoms = []
493  seq_number = 0
494 
495  for line in assigns:
496 
497  contribs = [del_comment(x).strip() for x in line.split('or')]
498 
499  values, volume = self.extract_target_values(contribs[0])
500  new_contribs = self.extract_contributions(contribs)
501 
502  if not new_contribs:
503  continue
504 
505  if len(new_contribs) > 1:
506  raise ValueError(
507  'Inconsistency in data file, multiple contributions detected.')
508 
509  atoms = self.split_contribution(new_contribs[0])
510  atoms = [self.extract_atom(x) for x in atoms]
511 
512  name = self.resolve_dihedral_name(atoms)
513 
514  r = create_dihedral_restraint(seq_number, name, values, atoms)
515 
516  restraints.append(r)
517  seq_number += 1
518 
519  if restraints:
520  return restraints
521 
522  def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
523 
524  self.naming_system = naming_system
525 
526  assigns = self.read_contents(filename)
527 
528  restraints = []
529  self.missing_atoms = []
530  seq_number = 0
531 
532  fake_atom_names = ('OO', 'X', 'Y', 'Z')
533 
534  for line in assigns:
535 
536  contribs = [del_comment(x).strip() for x in line.split('or')]
537  distances, volume = self.extract_target_values(contribs[0])
538  new_contribs = self.extract_contributions(contribs)
539 
540  contributions = []
541 
542  for contrib in new_contribs:
543 
544  atoms = self.split_contribution(contrib)
545  atoms = [self.extract_atom(x) for x in atoms]
546 
547  atoms = [a for a in atoms if not a['name'] in fake_atom_names]
548 
549  contributions += self.build_contributions(atoms)
550 
551  if contributions:
552  r = create_rdc_restraint(
553  seq_number,
554  distances[0],
555  contributions)
556 
557  restraints.append(r)
558  seq_number += 1
559 
560  if restraints:
561  return restraints
562 
563 if __name__ == '__main__':
564 
565  noe = 'noe.tbl'
566  sequence = read_sequence_file('seq.dat', first_residue_number=1)
567  reader = TBLReader(sequence, ignore_warnings=True)
568  reader.read_distances(noe, key='test')
kernel::Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to installed data.
Miscellaneous utilities.
Definition: utils.py:1
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...