IMP  2.1.0
The Integrative Modeling Platform
TBLReader.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.isd.TBLReader
4  Classes to handle TBL files.
5 """
6 
7 import sys, os
8 import IMP.isd
9 from IMP.isd.utils import Load, read_sequence_file
10 #from Isd.io.nomenclature import IUPAC_CONVENTION
11 IUPAC_CONVENTION='iupac'
12 TYPE_AMINO_ACID = 'AMINO_ACID'
13 pseudoatoms_dict = IMP.isd.get_data_path('CHARMM_pseudoatoms.dict')
14 
15 def del_comment(x):
16 
17  n = x.find('!')
18 
19  if n >= 0:
20  x = x[:n]
21 
22  return x
23 
24 def check_assigns(x):
25  return 'resid' in x and 'name' in x
26 
27 class TBLReader:
28 
29  atom_dict = {'segid': '',
30  'resid': -1,
31  'name': ''}
32 
33  pseudoatom_char = '*', '%', '#'
34 
35  def __init__(self, sequence, ignore_warnings=False, sequence_match=(1,1)):
36 
37  self.sequence = sequence
38  #sequence_match = (a,b) a: NOE numbering, b: sequence numbering
39  self.offset = sequence_match[1]-sequence_match[0]
40  self.ignore = ignore_warnings
41  self.pseudo_dict = Load(pseudoatoms_dict)
42 
43  def extract_contributions(self, contribs):
44 
45  new_contribs = []
46 
47  for c in contribs:
48 
49  if not c:
50  continue
51 
52  c = c[c.find('('):]
53  c = c[:c.rfind(')')+1]
54 
55  new_contribs.append(c)
56 
57  return new_contribs
58 
59  def split_contribution(self, contrib):
60 
61  words = contrib.split('(')
62  atoms = [word.split(')')[0] for word in words if word]
63 
64  return atoms
65 
66  def resolve_pseudoatom(self, residue_type, atom_name):
67 
68  if '*' in atom_name:
69  char = '*'
70  elif '#' in atom_name:
71  char = '#'
72 
73  atom_name = atom_name.replace(char, '%')
74 
75  ## TODO: Assumes that pseudo-atom names are compatible with
76  ## IUPAC name, since pseudo_dict can handle IUPAC
77  ## names only.
78 
79  try:
80  group = self.pseudo_dict[residue_type][atom_name]
81 
82  except:
83 
84  key = atom_name, residue_type
85 
86  if not key in self.missing_atoms:
87 
88  msg = 'Could not resolve pseudoatom %s.%s.' % (residue_type, atom_name)
89 
90  if self.ignore:
91  print msg
92  else:
93  raise KeyError, msg
94 
95  self.missing_atoms.append(key)
96 
97  return atom_name
98 
99  return group
100 
101  def to_iupac(self, residue_type, atom_name):
102 
103  raise NotImplementedError
104 
105  iupac_name = self.thesaurus.convert_atom(residue_type,
106  atom_name,
107  self.naming_system,
108  IUPAC_CONVENTION,
109  TYPE_AMINO_ACID)
110  try:
111  iupac_name = self.thesaurus.convert_atom(residue_type,
112  atom_name,
113  self.naming_system,
114  IUPAC_CONVENTION,
115  TYPE_AMINO_ACID)
116 
117  except:
118 
119  key = atom_name, residue_type
120 
121  if not key in self.missing_atoms:
122 
123  if '*' in atom_name or '#' in atom_name:
124 
125  msg = 'Pseudoatoms not upported: %s' % atom_name
126 
127  if self.ignore:
128  print msg
129 
130  else:
131  raise KeyError, msg
132 
133  elif self.ignore:
134  msg = 'Warning: atom %s not found in residue %s.' % key
135  print msg
136  else:
137  raise KeyError, msg % key
138 
139  self.missing_atoms.append(key)
140 
141  return atom_name
142 
143  return iupac_name
144 
145  def resolve_dihedral_name(self, atoms):
146 
147  raise NotImplementedError
148 
149  names = [a['name'] for a in atoms]
150 
151  try:
152  res_type = self.sequence[atoms[1]['resid']]
153 
154  except IndexError:
155  print 'Residue number overflow in atoms', atoms
156  return ''
157 
158  for dihedral in self.connectivity[res_type].dihedrals.values():
159 
160  keys = [k for k in dihedral.keys() if 'atom' in k]
161  keys.sort()
162 
163  atom_names = []
164 
165  for k in keys:
166  name = dihedral[k]
167  if name[-1] in ('-', '+'):
168  name = name[:-1]
169 
170  atom_names.append(name)
171 
172  if atom_names == names:
173  return dihedral['name']
174 
175  msg = 'Could not determine name of dihedral angles defined by atoms %s.' % str(names)
176 
177  if self.ignore:
178  print msg
179  return ''
180 
181  raise KeyError, msg
182 
183  def extract_atom(self, a):
184 
185  atom = dict(self.atom_dict)
186 
187  words = a.split()
188 
189  skip_next = False
190 
191  ## correct for segid statements
192 
193  words = [x for x in words if x <> '"']
194 
195  for i in range(len(words)):
196 
197  if skip_next:
198  skip_next = False
199  continue
200 
201  word = words[i]
202 
203  if word == 'and':
204  continue
205 
206  for key in atom.keys():
207 
208  if key in word:
209 
210  if key == 'segid':
211  atom[key] = words[i+1][:-1]
212  else:
213  atom[key] = words[i+1]
214 
215  skip_next = True
216  break
217 
218  else:
219  raise KeyError, 'Value or keyword "%s" unknown. Source: "%s", decomposed into "%s"' % \
220  (word, str(a), str(words))
221 
222  atom['resid'] = int(atom['resid']) + self.offset
223  atom['name'] = atom['name'].upper()
224 
225  return atom
226 
227  def build_contributions(self, atoms):
228 
229  groups = []
230 
231  for a in atoms:
232 
233  try:
234  res_type = self.sequence[a['resid']]
235 
236  except IndexError:
237  print 'Residue number overflow in atoms', atoms
238  return []
239 
240  atom_name = a['name']
241 
242  if atom_name[-1] in self.pseudoatom_char:
243  group = self.resolve_pseudoatom(res_type, atom_name)
244 
245  else:
246  #group = [self.to_iupac(res_type, atom_name)]
247  group = [atom_name]
248 
249  groups.append(group)
250 
251  group1, group2 = groups
252 
253  contribs = []
254 
255  res_1 = atoms[0]['resid']
256  res_2 = atoms[1]['resid']
257 
258  for i in range(len(group1)):
259 
260  name_1 = group1[i]
261 
262  for j in range(len(group2)):
263 
264  if (res_1, name_1) <> (res_2, group2[j]):
265  contribs.append(((res_1, name_1), (res_2, group2[j])))
266 
267  return contribs
268 
269  def extract_target_values(self, line):
270 
271  end = line.rfind(')')
272 
273  values = line[end+1:].split()
274 
275  try:
276  distances = [float(x) for x in values[:3]]
277  except:
278  distances = None
279 
280  ## read volume from ARIA 1.x restraint files
281 
282  val = line.split('volume=')
283 
284  if len(val) > 1:
285  volume = float(val[1].split()[0].split(',')[0])
286  else:
287 
288  volume = None
289 
290  return distances, volume
291 
292  def read_contents(self, filename):
293 
294  keywords = 'class',
295 
296  filename = os.path.expanduser(filename)
297 
298  f = open(filename)
299  lines = f.readlines()
300  f.close()
301 
302  all = ''
303 
304  for x in lines:
305 
306  x = x.strip()
307 
308  if not x or x[0] == '!':
309  continue
310 
311  not_valid = [kw for kw in keywords if kw in x]
312 
313  if not_valid:
314  continue
315 
316  all += x.lower() + ' '
317 
318  return [x.strip() for x in all.split('assi')]
319 
320  def find_contributions(self, line):
321 
322  contribs = [del_comment(x).strip() for x in line.split('or')]
323 
324  ## use alternative parser for implicitly listed atom pairs
325 
326  if 1 in [x.count('resid') for x in contribs]:
327 
328  atoms = []
329 
330  while line:
331 
332  start = line.find('(')
333 
334  if start < 0:
335  atoms[-1][-1] += line
336  break
337 
338  stop = line.find(')')
339 
340  selection = [x.strip() for x in line[start:stop+1].split('or')]
341 
342  for i in range(len(selection)):
343 
344  val = selection[i]
345 
346  if not '(' in val:
347  val = '(' + val
348 
349  if not ')' in val:
350  val += ')'
351 
352  selection[i] = val
353 
354  atoms.append(selection)
355 
356  line = line[stop+1:]
357 
358  if len(atoms) <> 2:
359  raise
360 
361  ## find and isolate target distances
362 
363  l = []
364 
365  for i in range(len(atoms)):
366 
367  g = []
368 
369  for atom in atoms[i]:
370 
371  n = atom.rfind(')')
372 
373  if n >= 0 and len(atom[n+1:].strip()) > 3:
374  distances = atom[n+1:].strip()
375  atom = atom[:n+1]
376 
377  g.append(atom)
378 
379  l.append(g)
380 
381  a, b = l
382 
383  if len(a) > len(b):
384  a, b = b, a
385 
386  contribs = []
387 
388  for i in a:
389  for j in b:
390  contribs.append('%s %s' % (i,j))
391 
392  contribs[0] += ' ' + distances
393 
394  return contribs
395 
396  def create_distance_restraint(self, distances, volume, contributions):
397  if distances is None and volume is None:
398  raise ValueError, "could not find either volume or "\
399  "distance: %s %s %s" % (distances,volume,contributions)
400  if distances is None:
401  distances = [volume**(-1./6),0,0]
402  dist = distances[0]
403  if volume is None:
404  volume = dist ** (-6)
405  lower = dist - distances[1]
406  upper = dist + distances[2]
407  return (tuple(contributions), dist, lower, upper, volume)
408 
409  def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
410  decompose=False):
411  """reads a tbl file and parses distance restraints.
412  """
413 
414  self.naming_system = naming_system
415 
416  assigns = self.read_contents(filename)
417 
418  restraints = []
419  self.missing_atoms = []
420  seq_number = 0
421 
422  for line in assigns:
423 
424  contribs = self.find_contributions(line)
425 
426  if False in [check_assigns(x) for x in contribs]:
427  continue
428 
429  distances, volume = self.extract_target_values(contribs[0])
430 
431  if (distances is None and volume is None):
432  distances, volume = self.extract_target_values(contribs[-1])
433 
434  new_contribs = self.extract_contributions(contribs)
435 
436  contributions = []
437 
438  for contrib in new_contribs:
439 
440  atoms = self.split_contribution(contrib)
441  atoms = [self.extract_atom(x) for x in atoms]
442 
443  contributions += self.build_contributions(atoms)
444 
445  if contributions:
446  r = self.create_distance_restraint(distances, volume,
447  contributions)
448 
449  restraints.append(r)
450  seq_number += 1
451 
452  if restraints:
453 
454  if decompose:
455 
456  d = decompose_restraints(restraints)
457 
458  for _type in d.keys():
459  if not d[_type]:
460  del d[_type]
461 
462  if len(d) > 1:
463  for _type, val in d.items():
464 
465  if val:
466  new_key = key + '_%s' % _type
467  d[new_key] = val
468 
469  del d[_type]
470 
471  else:
472  d = {key: d.values()[0]}
473  else:
474  d = {key: restraints}
475 
476  return d
477 
478  def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
479 
480  self.naming_system = naming_system
481 
482  assigns = self.read_contents(filename)
483 
484  restraints = []
485  self.missing_atoms = []
486  seq_number = 0
487 
488  for line in assigns:
489 
490  contribs = [del_comment(x).strip() for x in line.split('or')]
491 
492  values, volume = self.extract_target_values(contribs[0])
493  new_contribs = self.extract_contributions(contribs)
494 
495  if not new_contribs:
496  continue
497 
498  if len(new_contribs) > 1:
499  raise ValueError, 'Inconsistency in data file, multiple contributions detected.'
500 
501  atoms = self.split_contribution(new_contribs[0])
502  atoms = [self.extract_atom(x) for x in atoms]
503 
504  name = self.resolve_dihedral_name(atoms)
505 
506  r = create_dihedral_restraint(seq_number, name, values, atoms)
507 
508  restraints.append(r)
509  seq_number += 1
510 
511  if restraints:
512  return restraints
513 
514  def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
515 
516  self.naming_system = naming_system
517 
518  assigns = self.read_contents(filename)
519 
520  restraints = []
521  self.missing_atoms = []
522  seq_number = 0
523 
524  fake_atom_names = ('OO', 'X', 'Y', 'Z')
525 
526  for line in assigns:
527 
528  contribs = [del_comment(x).strip() for x in line.split('or')]
529  distances, volume = self.extract_target_values(contribs[0])
530  new_contribs = self.extract_contributions(contribs)
531 
532  contributions = []
533 
534  for contrib in new_contribs:
535 
536  atoms = self.split_contribution(contrib)
537  atoms = [self.extract_atom(x) for x in atoms]
538 
539  atoms = [a for a in atoms if not a['name'] in fake_atom_names]
540 
541  contributions += self.build_contributions(atoms)
542 
543  if contributions:
544  r = create_rdc_restraint(seq_number, distances[0], contributions)
545 
546  restraints.append(r)
547  seq_number += 1
548 
549  if restraints:
550  return restraints
551 
552 if __name__ == '__main__':
553 
554  noe = 'noe.tbl'
555  sequence = read_sequence_file('seq.dat', first_residue_number=1)
556  reader = TBLReader(sequence, ignore_warnings=True)
557  reader.read_distances(noe, key='test')
kernel::Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to installed data.
Miscellaneous utilities.
Definition: utils.py:1
See IMP.isd for more information.