IMP logo
IMP Reference Guide  develop.cb6747d2d1,2024/03/28
The Integrative Modeling Platform
TALOSReader.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.isd.TALOSReader
4  Classes to handle TALOS files or folders.
5 """
6 
7 from __future__ import print_function
8 import os
9 from math import pi
10 from IMP.isd.utils import check_residue, read_sequence_file
11 
12 
14 
15  """ reads a TALOS file, or a TALOS folder, and stores the data """
16 
17  def __init__(self, sequence, detailed_input, keep_all=False,
18  sequence_match=(1, 1)):
19  """start the TALOSReader
20  sequence : a dictionary of sequence number keys and 3-letter code
21  values.
22  detailed_input : True if the input will be either predAll.tab or the
23  pred/res???.tab files. False if it's pred.tab
24  keep_all : whether to keep outliers or not, when detailed_input==True.
25  sequence_match : in the form (talos_no, sequence_no), assigns a
26  correspondence between residue numberings.
27  """
28  self.detailed_input = detailed_input
29  self.data = {}
30  self.keep_all = keep_all
31  self.sequence = sequence
32  self.offset = sequence_match[1] - sequence_match[0]
33 
34  def add_full_datum(self, resno, phi, psi):
35  """in the case of a list of predictions for one residue, add an entry
36  to data which is:
37  'full' : always True
38  'num' : number of predictions
39  'phi' : the list of predictions for phi
40  'psi' : same for psi
41 
42  """
43  if resno not in self.data:
44  self.data[resno] = {
45  'full': True, 'num': len(phi), 'phi': phi, 'psi': psi}
46  else:
47  raise RuntimeError("would overwrite data for residue %d" % resno)
48 
49  def add_mean_datum(self, resno, num, phi, psi):
50  """in the case of a single (average) prediction output by talos for a
51  given residue, add an entry to data which is:
52  'full' : always False
53  'num' : the number of matches this average was calculated from
54  'phi' : a tuple in the form (mean, error)
55  'psi' : same as phi.
56 
57  """
58 
59  if resno not in self.data:
60  self.data[
61  resno] = {
62  'full': False,
63  'num': num,
64  'phi': phi,
65  'psi': psi}
66  else:
67  raise RuntimeError("would overwrite data for residue %d" % resno)
68 
69  def _read_one_residue(self, fname):
70  fl = open(fname)
71  resno = int(os.path.basename(fname)[3:6]) + self.offset
72  phi = []
73  psi = []
74  for line in fl:
75  tokens = line.split()
76  if len(tokens) < 1:
77  continue
78  if tokens[1] == 'RESNAMES':
79  check_residue(self.sequence[resno], tokens[3])
80  continue
81  if not tokens[0].isdigit():
82  continue
83  if float(tokens[4]) < 0.999 and not self.keep_all:
84  continue
85  phi.append(float(tokens[1]) * 2 * pi / 360.)
86  psi.append(float(tokens[2]) * 2 * pi / 360.)
87  self.add_full_datum(resno, phi, psi)
88 
89  def _read_predAll(self, fname):
90  fl = open(fname)
91  resno = -1
92  phi = []
93  psi = []
94  for line in fl:
95  tokens = line.split()
96  if len(tokens) == 0 or not tokens[0].isdigit():
97  continue
98  oldresno = resno
99  resno = int(tokens[1]) + self.offset
100  if resno != oldresno:
101  if oldresno != -1:
102  self.add_full_datum(resno, phi, psi)
103  phi = []
104  psi = []
105  resname = tokens[2][1]
106  check_residue(self.sequence[resno], resname)
107  if float(tokens[6]) < 0.999 and not self.keep_all:
108  continue
109  phi.append(float(tokens[3]) * 2 * pi / 360.)
110  psi.append(float(tokens[4]) * 2 * pi / 360.)
111 
112  def _read_observations(self, fname):
113  if fname.endswith('predAll.tab'):
114  self._read_predAll(fname)
115  else:
116  self._read_one_residue(fname)
117 
118  def _read_averages(self, fname):
119  fl = open(fname)
120  for line in fl:
121  tokens = line.split()
122  if not tokens[0].isdigit():
123  continue
124  resno = int(tokens[0]) + self.offset
125  check_residue(resno, tokens[1])
126  phi, psi, dphi, dpsi = [2 * pi * float(a) / 360.
127  for a in tokens[2:6]]
128  num = int(tokens[8])
129  if num == 0:
130  continue
131  self.add_mean_datum(resno, num, (phi, dphi), (psi, dpsi))
132 
133  def read(self, fname):
134  "reads a TALOS file and returns data. See add_datum methods."
135  if self.detailed_input:
136  self._read_observations(fname)
137  else:
138  self._read_averages(fname)
139 
140  def get_data(self):
141  return self.data
142 
143 
144 if __name__ == '__main__':
145 
146  talos = 'pred.tab'
147  sequence = read_sequence_file('seq.dat', sequence_match=(1, 5))
148  reader = TALOSReader(sequence)
149  reader.read(talos)
150  data = reader.get_data()
def add_full_datum
in the case of a list of predictions for one residue, add an entry to data which is: 'full' : always ...
Definition: TALOSReader.py:34
def __init__
start the TALOSReader sequence : a dictionary of sequence number keys and 3-letter code values...
Definition: TALOSReader.py:22
reads a TALOS file, or a TALOS folder, and stores the data
Definition: TALOSReader.py:13
def read
reads a TALOS file and returns data.
Definition: TALOSReader.py:133
def add_mean_datum
in the case of a single (average) prediction output by talos for a given residue, add an entry to dat...
Definition: TALOSReader.py:49
Miscellaneous utilities.
Definition: utils.py:1