3 """@namespace IMP.isd.TBLReader
4 Classes to handle TBL files.
11 IUPAC_CONVENTION=
'iupac'
12 TYPE_AMINO_ACID =
'AMINO_ACID'
25 return 'resid' in x
and 'name' in x
29 atom_dict = {
'segid':
'',
33 pseudoatom_char =
'*',
'%',
'#'
35 def __init__(self, sequence, ignore_warnings=False, sequence_match=(1,1)):
37 self.sequence = sequence
39 self.offset = sequence_match[1]-sequence_match[0]
40 self.ignore = ignore_warnings
41 self.pseudo_dict = Load(pseudoatoms_dict)
43 def extract_contributions(self, contribs):
53 c = c[:c.rfind(
')')+1]
55 new_contribs.append(c)
59 def split_contribution(self, contrib):
61 words = contrib.split(
'(')
62 atoms = [word.split(
')')[0]
for word
in words
if word]
66 def resolve_pseudoatom(self, residue_type, atom_name):
70 elif '#' in atom_name:
73 atom_name = atom_name.replace(char,
'%')
80 group = self.pseudo_dict[residue_type][atom_name]
84 key = atom_name, residue_type
86 if not key
in self.missing_atoms:
88 msg =
'Could not resolve pseudoatom %s.%s.' % (residue_type, atom_name)
95 self.missing_atoms.append(key)
101 def to_iupac(self, residue_type, atom_name):
103 raise NotImplementedError
105 iupac_name = self.thesaurus.convert_atom(residue_type,
111 iupac_name = self.thesaurus.convert_atom(residue_type,
119 key = atom_name, residue_type
121 if not key
in self.missing_atoms:
123 if '*' in atom_name
or '#' in atom_name:
125 msg =
'Pseudoatoms not upported: %s' % atom_name
134 msg =
'Warning: atom %s not found in residue %s.' % key
137 raise KeyError, msg % key
139 self.missing_atoms.append(key)
145 def resolve_dihedral_name(self, atoms):
147 raise NotImplementedError
149 names = [a[
'name']
for a
in atoms]
152 res_type = self.sequence[atoms[1][
'resid']]
155 print 'Residue number overflow in atoms', atoms
158 for dihedral
in self.connectivity[res_type].dihedrals.values():
160 keys = [k
for k
in dihedral.keys()
if 'atom' in k]
167 if name[-1]
in (
'-',
'+'):
170 atom_names.append(name)
172 if atom_names == names:
173 return dihedral[
'name']
175 msg =
'Could not determine name of dihedral angles defined by atoms %s.' % str(names)
183 def extract_atom(self, a):
185 atom = dict(self.atom_dict)
193 words = [x
for x
in words
if x <>
'"']
195 for i
in range(len(words)):
206 for key
in atom.keys():
211 atom[key] = words[i+1][:-1]
213 atom[key] = words[i+1]
219 raise KeyError,
'Value or keyword "%s" unknown. Source: "%s", decomposed into "%s"' % \
220 (word, str(a), str(words))
222 atom[
'resid'] = int(atom[
'resid']) + self.offset
223 atom[
'name'] = atom[
'name'].upper()
227 def build_contributions(self, atoms):
234 res_type = self.sequence[a[
'resid']]
237 print 'Residue number overflow in atoms', atoms
240 atom_name = a[
'name']
242 if atom_name[-1]
in self.pseudoatom_char:
243 group = self.resolve_pseudoatom(res_type, atom_name)
251 group1, group2 = groups
255 res_1 = atoms[0][
'resid']
256 res_2 = atoms[1][
'resid']
258 for i
in range(len(group1)):
262 for j
in range(len(group2)):
264 if (res_1, name_1) <> (res_2, group2[j]):
265 contribs.append(((res_1, name_1), (res_2, group2[j])))
269 def extract_target_values(self, line):
271 end = line.rfind(
')')
273 values = line[end+1:].split()
276 distances = [float(x)
for x
in values[:3]]
282 val = line.split(
'volume=')
285 volume = float(val[1].split()[0].split(
',')[0])
290 return distances, volume
292 def read_contents(self, filename):
296 filename = os.path.expanduser(filename)
299 lines = f.readlines()
308 if not x
or x[0] ==
'!':
311 not_valid = [kw
for kw
in keywords
if kw
in x]
316 all += x.lower() +
' '
318 return [x.strip()
for x
in all.split(
'assi')]
320 def find_contributions(self, line):
322 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
326 if 1
in [x.count(
'resid')
for x
in contribs]:
332 start = line.find(
'(')
335 atoms[-1][-1] += line
338 stop = line.find(
')')
340 selection = [x.strip()
for x
in line[start:stop+1].split(
'or')]
342 for i
in range(len(selection)):
354 atoms.append(selection)
365 for i
in range(len(atoms)):
369 for atom
in atoms[i]:
373 if n >= 0
and len(atom[n+1:].strip()) > 3:
374 distances = atom[n+1:].strip()
390 contribs.append(
'%s %s' % (i,j))
392 contribs[0] +=
' ' + distances
397 if distances
is None and volume
is None:
398 raise ValueError,
"could not find either volume or "\
399 "distance: %s %s %s" % (distances,volume,contributions)
400 if distances
is None:
401 distances = [volume**(-1./6),0,0]
404 volume = dist ** (-6)
405 lower = dist - distances[1]
406 upper = dist + distances[2]
407 return (tuple(contributions), dist, lower, upper, volume)
409 def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
411 """reads a tbl file and parses distance restraints.
414 self.naming_system = naming_system
416 assigns = self.read_contents(filename)
419 self.missing_atoms = []
424 contribs = self.find_contributions(line)
426 if False in [check_assigns(x)
for x
in contribs]:
429 distances, volume = self.extract_target_values(contribs[0])
431 if (distances
is None and volume
is None):
432 distances, volume = self.extract_target_values(contribs[-1])
434 new_contribs = self.extract_contributions(contribs)
438 for contrib
in new_contribs:
440 atoms = self.split_contribution(contrib)
441 atoms = [self.extract_atom(x)
for x
in atoms]
443 contributions += self.build_contributions(atoms)
446 r = self.create_distance_restraint(distances, volume,
456 d = decompose_restraints(restraints)
458 for _type
in d.keys():
463 for _type, val
in d.items():
466 new_key = key +
'_%s' % _type
472 d = {key: d.values()[0]}
474 d = {key: restraints}
478 def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):
480 self.naming_system = naming_system
482 assigns = self.read_contents(filename)
485 self.missing_atoms = []
490 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
492 values, volume = self.extract_target_values(contribs[0])
493 new_contribs = self.extract_contributions(contribs)
498 if len(new_contribs) > 1:
499 raise ValueError,
'Inconsistency in data file, multiple contributions detected.'
501 atoms = self.split_contribution(new_contribs[0])
502 atoms = [self.extract_atom(x)
for x
in atoms]
504 name = self.resolve_dihedral_name(atoms)
506 r = create_dihedral_restraint(seq_number, name, values, atoms)
514 def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):
516 self.naming_system = naming_system
518 assigns = self.read_contents(filename)
521 self.missing_atoms = []
524 fake_atom_names = (
'OO',
'X',
'Y',
'Z')
528 contribs = [del_comment(x).strip()
for x
in line.split(
'or')]
529 distances, volume = self.extract_target_values(contribs[0])
530 new_contribs = self.extract_contributions(contribs)
534 for contrib
in new_contribs:
536 atoms = self.split_contribution(contrib)
537 atoms = [self.extract_atom(x)
for x
in atoms]
539 atoms = [a
for a
in atoms
if not a[
'name']
in fake_atom_names]
541 contributions += self.build_contributions(atoms)
544 r = create_rdc_restraint(seq_number, distances[0], contributions)
552 if __name__ ==
'__main__':
555 sequence = read_sequence_file(
'seq.dat', first_residue_number=1)
556 reader = TBLReader(sequence, ignore_warnings=
True)
557 reader.read_distances(noe, key=
'test')
kernel::Restraint * create_distance_restraint(const Selection &n0, const Selection &n1, double x0, double k, std::string name="Distance%1%")
std::string get_data_path(std::string file_name)
Return the full path to installed data.
See IMP.isd for more information.