IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/22
The Integrative Modeling Platform
TALOSReader.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.isd.TALOSReader
4  Classes to handle TALOS files or folders.
5 """
6 
7 import os
8 from math import pi
9 from IMP.isd.utils import check_residue, read_sequence_file
10 
11 
13 
14  """ reads a TALOS file, or a TALOS folder, and stores the data """
15 
16  def __init__(self, sequence, detailed_input, keep_all=False,
17  sequence_match=(1, 1)):
18  """start the TALOSReader
19  sequence : a dictionary of sequence number keys and 3-letter code
20  values.
21  detailed_input : True if the input will be either predAll.tab or the
22  pred/res???.tab files. False if it's pred.tab
23  keep_all : whether to keep outliers or not, when detailed_input==True.
24  sequence_match : in the form (talos_no, sequence_no), assigns a
25  correspondence between residue numberings.
26  """
27  self.detailed_input = detailed_input
28  self.data = {}
29  self.keep_all = keep_all
30  self.sequence = sequence
31  self.offset = sequence_match[1] - sequence_match[0]
32 
33  def add_full_datum(self, resno, phi, psi):
34  """in the case of a list of predictions for one residue, add an entry
35  to data which is:
36  'full' : always True
37  'num' : number of predictions
38  'phi' : the list of predictions for phi
39  'psi' : same for psi
40 
41  """
42  if resno not in self.data:
43  self.data[resno] = {
44  'full': True, 'num': len(phi), 'phi': phi, 'psi': psi}
45  else:
46  raise RuntimeError("would overwrite data for residue %d" % resno)
47 
48  def add_mean_datum(self, resno, num, phi, psi):
49  """in the case of a single (average) prediction output by talos for a
50  given residue, add an entry to data which is:
51  'full' : always False
52  'num' : the number of matches this average was calculated from
53  'phi' : a tuple in the form (mean, error)
54  'psi' : same as phi.
55 
56  """
57 
58  if resno not in self.data:
59  self.data[
60  resno] = {
61  'full': False,
62  'num': num,
63  'phi': phi,
64  'psi': psi}
65  else:
66  raise RuntimeError("would overwrite data for residue %d" % resno)
67 
68  def _read_one_residue(self, fname):
69  fl = open(fname)
70  resno = int(os.path.basename(fname)[3:6]) + self.offset
71  phi = []
72  psi = []
73  for line in fl:
74  tokens = line.split()
75  if len(tokens) < 1:
76  continue
77  if tokens[1] == 'RESNAMES':
78  check_residue(self.sequence[resno], tokens[3])
79  continue
80  if not tokens[0].isdigit():
81  continue
82  if float(tokens[4]) < 0.999 and not self.keep_all:
83  continue
84  phi.append(float(tokens[1]) * 2 * pi / 360.)
85  psi.append(float(tokens[2]) * 2 * pi / 360.)
86  self.add_full_datum(resno, phi, psi)
87 
88  def _read_predAll(self, fname):
89  fl = open(fname)
90  resno = -1
91  phi = []
92  psi = []
93  for line in fl:
94  tokens = line.split()
95  if len(tokens) == 0 or not tokens[0].isdigit():
96  continue
97  oldresno = resno
98  resno = int(tokens[1]) + self.offset
99  if resno != oldresno:
100  if oldresno != -1:
101  self.add_full_datum(resno, phi, psi)
102  phi = []
103  psi = []
104  resname = tokens[2][1]
105  check_residue(self.sequence[resno], resname)
106  if float(tokens[6]) < 0.999 and not self.keep_all:
107  continue
108  phi.append(float(tokens[3]) * 2 * pi / 360.)
109  psi.append(float(tokens[4]) * 2 * pi / 360.)
110 
111  def _read_observations(self, fname):
112  if fname.endswith('predAll.tab'):
113  self._read_predAll(fname)
114  else:
115  self._read_one_residue(fname)
116 
117  def _read_averages(self, fname):
118  fl = open(fname)
119  for line in fl:
120  tokens = line.split()
121  if not tokens[0].isdigit():
122  continue
123  resno = int(tokens[0]) + self.offset
124  check_residue(resno, tokens[1])
125  phi, psi, dphi, dpsi = [2 * pi * float(a) / 360.
126  for a in tokens[2:6]]
127  num = int(tokens[8])
128  if num == 0:
129  continue
130  self.add_mean_datum(resno, num, (phi, dphi), (psi, dpsi))
131 
132  def read(self, fname):
133  "reads a TALOS file and returns data. See add_datum methods."
134  if self.detailed_input:
135  self._read_observations(fname)
136  else:
137  self._read_averages(fname)
138 
139  def get_data(self):
140  return self.data
141 
142 
143 if __name__ == '__main__':
144 
145  talos = 'pred.tab'
146  sequence = read_sequence_file('seq.dat', sequence_match=(1, 5))
147  reader = TALOSReader(sequence)
148  reader.read(talos)
149  data = reader.get_data()
def add_full_datum
in the case of a list of predictions for one residue, add an entry to data which is: 'full' : always ...
Definition: TALOSReader.py:33
def __init__
start the TALOSReader sequence : a dictionary of sequence number keys and 3-letter code values...
Definition: TALOSReader.py:21
reads a TALOS file, or a TALOS folder, and stores the data
Definition: TALOSReader.py:12
def read
reads a TALOS file and returns data.
Definition: TALOSReader.py:132
def add_mean_datum
in the case of a single (average) prediction output by talos for a given residue, add an entry to dat...
Definition: TALOSReader.py:48
Miscellaneous utilities.
Definition: utils.py:1