3 """@namespace IMP.isd.TBLReader
4 Classes to handle TBL files.
12 IUPAC_CONVENTION =
'iupac'
13 TYPE_AMINO_ACID =
'AMINO_ACID'
28 return 'resid' in x
and 'name' in x
33 atom_dict = {
'segid':
'',
37 pseudoatom_char =
'*',
'%',
'#'
39 def __init__(self, sequence, ignore_warnings=False, sequence_match=(1, 1)):
41 self.sequence = sequence
43 self.offset = sequence_match[1] - sequence_match[0]
44 self.ignore = ignore_warnings
45 self.pseudo_dict = Load(pseudoatoms_dict)
47 def extract_contributions(self, contribs):
57 c = c[:c.rfind(
')') + 1]
59 new_contribs.append(c)
63 def split_contribution(self, contrib):
65 words = contrib.split(
'(')
66 atoms = [word.split(
')')[0]
for word
in words
if word]
70 def resolve_pseudoatom(self, residue_type, atom_name):
74 elif '#' in atom_name:
77 atom_name = atom_name.replace(char,
'%')
84 group = self.pseudo_dict[residue_type][atom_name]
88 key = atom_name, residue_type
90 if not key
in self.missing_atoms:
92 msg =
'Could not resolve pseudoatom %s.%s.' % (
93 residue_type, atom_name)
100 self.missing_atoms.append(key)
106 def to_iupac(self, residue_type, atom_name):
108 raise NotImplementedError
110 iupac_name = self.thesaurus.convert_atom(residue_type,
116 iupac_name = self.thesaurus.convert_atom(residue_type,
124 key = atom_name, residue_type
126 if not key
in self.missing_atoms:
128 if '*' in atom_name
or '#' in atom_name:
130 msg =
'Pseudoatoms not upported: %s' % atom_name
139 msg =
'Warning: atom %s not found in residue %s.' % key
142 raise KeyError(msg % key)
144 self.missing_atoms.append(key)
150 def resolve_dihedral_name(self, atoms):
152 raise NotImplementedError
154 names = [a[
'name']
for a
in atoms]
157 res_type = self.sequence[atoms[1][
'resid']]
160 print 'Residue number overflow in atoms', atoms
163 for dihedral
in self.connectivity[res_type].dihedrals.values():
165 keys = sorted([k
for k
in dihedral.keys()
if 'atom' in k])
171 if name[-1]
in (
'-',
'+'):
174 atom_names.append(name)
176 if atom_names == names:
177 return dihedral[
'name']
179 msg =
'Could not determine name of dihedral angles defined by atoms %s.' % str(
188 def extract_atom(self, a):
190 atom = dict(self.atom_dict)
198 words = [x
for x
in words
if x !=
'"']
200 for i
in range(len(words)):
211 for key
in atom.keys():
216 atom[key] = words[i + 1][:-1]
218 atom[key] = words[i + 1]
225 'Value or keyword "%s" unknown. Source: "%s", decomposed into "%s"' %
226 (word, str(a), str(words)))
228 atom[
'resid'] = int(atom[
'resid']) + self.offset
229 atom[
'name'] = atom[
'name'].upper()
233 def build_contributions(self, atoms):
240 res_type = self.sequence[a[
'resid']]
243 print 'Residue number overflow in atoms', atoms
246 atom_name = a[
'name']
248 if atom_name[-1]
in self.pseudoatom_char:
249 group = self.resolve_pseudoatom(res_type, atom_name)
257 group1, group2 = groups
261 res_1 = atoms[0][
'resid']
262 res_2 = atoms[1][
'resid']
264 for i
in range(len(group1)):
268 for j
in range(len(group2)):
270 if (res_1, name_1) != (res_2, group2[j]):
271 contribs.append(((res_1, name_1), (res_2, group2[j])))
275 def extract_target_values(self, line):
277 end = line.rfind(
')')
279 values = line[end + 1:].split()
282 distances = [float(x)
for x
in values[:3]]
288 val = line.split(
'volume=')
291 volume = float(val[1].split()[0].split(
',')[0])
296 return distances, volume
298 def read_contents(self, filename):
302 filename = os.path.expanduser(filename)
305 lines = f.readlines()
314 if not x
or x[0] ==
'!':
317 not_valid = [kw
for kw
in keywords
if kw
in x]
322 all += x.lower() +
' '
324 return [x.strip()
for x
in all.split(
'assi')]
326 def find_contributions(self, line):
328 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
332 if 1
in [x.count(
'resid')
for x
in contribs]:
338 start = line.find(
'(')
341 atoms[-1][-1] += line
344 stop = line.find(
')')
346 selection = [x.strip()
347 for x
in line[start:stop + 1].split(
'or')]
349 for i
in range(len(selection)):
361 atoms.append(selection)
363 line = line[stop + 1:]
372 for i
in range(len(atoms)):
376 for atom
in atoms[i]:
380 if n >= 0
and len(atom[n + 1:].strip()) > 3:
381 distances = atom[n + 1:].strip()
397 contribs.append(
'%s %s' % (i, j))
399 contribs[0] +=
' ' + distances
404 if distances
is None and volume
is None:
405 raise ValueError(
"could not find either volume or "
406 "distance: %s %s %s" % (distances, volume, contributions))
407 if distances
is None:
408 distances = [volume ** (-1. / 6), 0, 0]
411 volume = dist ** (-6)
412 lower = dist - distances[1]
413 upper = dist + distances[2]
414 return (tuple(contributions), dist, lower, upper, volume)
416 def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
418 """reads a tbl file and parses distance restraints.
421 self.naming_system = naming_system
423 assigns = self.read_contents(filename)
426 self.missing_atoms = []
431 contribs = self.find_contributions(line)
433 if False in [check_assigns(x)
for x
in contribs]:
436 distances, volume = self.extract_target_values(contribs[0])
438 if (distances
is None and volume
is None):
439 distances, volume = self.extract_target_values(contribs[-1])
441 new_contribs = self.extract_contributions(contribs)
445 for contrib
in new_contribs:
447 atoms = self.split_contribution(contrib)
448 atoms = [self.extract_atom(x)
for x
in atoms]
450 contributions += self.build_contributions(atoms)
453 r = self.create_distance_restraint(distances, volume,
463 d = decompose_restraints(restraints)
465 for _type
in d.keys():
470 for _type, val
in d.items():
473 new_key = key +
'_%s' % _type
479 d = {key: d.values()[0]}
481 d = {key: restraints}
485 def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
487 self.naming_system = naming_system
489 assigns = self.read_contents(filename)
492 self.missing_atoms = []
497 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
499 values, volume = self.extract_target_values(contribs[0])
500 new_contribs = self.extract_contributions(contribs)
505 if len(new_contribs) > 1:
507 'Inconsistency in data file, multiple contributions detected.')
509 atoms = self.split_contribution(new_contribs[0])
510 atoms = [self.extract_atom(x)
for x
in atoms]
512 name = self.resolve_dihedral_name(atoms)
514 r = create_dihedral_restraint(seq_number, name, values, atoms)
522 def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
524 self.naming_system = naming_system
526 assigns = self.read_contents(filename)
529 self.missing_atoms = []
532 fake_atom_names = (
'OO',
'X',
'Y',
'Z')
536 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
537 distances, volume = self.extract_target_values(contribs[0])
538 new_contribs = self.extract_contributions(contribs)
542 for contrib
in new_contribs:
544 atoms = self.split_contribution(contrib)
545 atoms = [self.extract_atom(x)
for x
in atoms]
547 atoms = [a
for a
in atoms
if not a[
'name']
in fake_atom_names]
549 contributions += self.build_contributions(atoms)
552 r = create_rdc_restraint(
563 if __name__ ==
'__main__':
566 sequence = read_sequence_file(
'seq.dat', first_residue_number=1)
567 reader = TBLReader(sequence, ignore_warnings=
True)
568 reader.read_distances(noe, key=
'test')
kernel::Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to installed data.
See IMP.isd for more information.