IMP  2.0.0
The Integrative Modeling Platform
TALOSReader.py
1 #!/usr/bin/env python
2 
3 import sys,os
4 from math import pi
5 from IMP.isd.utils import check_residue, read_sequence_file
6 
7 class TALOSReader:
8  """ reads a TALOS file, or a TALOS folder, and stores the data """
9 
10  def __init__(self, sequence, detailed_input, keep_all=False,
11  sequence_match=(1,1)):
12  """start the TALOSReader
13  sequence : a dictionnary of sequence number keys and 3-letter code
14  values.
15  detailed_input : True if the input will be either predAll.tab or the
16  pred/res???.tab files. False if it's pred.tab
17  keep_all : whether to keep outliers or not, when detailed_input==True.
18  sequence_match : in the form (talos_no, sequence_no), assigns a
19  correspondence between residue numberings.
20  """
21  self.detailed_input = detailed_input
22  self.data={}
23  self.keep_all=keep_all
24  self.sequence=sequence
25  self.offset = sequence_match[1]-sequence_match[0]
26 
27  def add_full_datum(self, resno, phi, psi):
28  """in the case of a list of predictions for one residue, add an entry to
29  data which is:
30  'full' : always True
31  'num' : number of predictions
32  'phi' : the list of predictions for phi
33  'psi' : same for psi
34 
35  """
36  if resno not in self.data:
37  self.data[resno]={'full':True, 'num':len(phi), 'phi':phi, 'psi':psi}
38  else:
39  raise RuntimeError, "would overwrite data for residue %d" % resno
40 
41  def add_mean_datum(self, resno, num, phi, psi):
42  """in the case of a single (average) prediction output by talos for a
43  given residue, add an entry to data which is:
44  'full' : always False
45  'num' : the number of matches this average was calculated from
46  'phi' : a tuple in the form (mean, error)
47  'psi' : same as phi.
48 
49  """
50 
51  if resno not in self.data:
52  self.data[resno]={'full':False, 'num':num, 'phi':phi, 'psi':psi}
53  else:
54  raise RuntimeError, "would overwrite data for residue %d" % resno
55 
56  def _read_one_residue(self,fname):
57  fl=open(fname)
58  resno = int(os.path.basename(fname)[3:6]) + self.offset
59  phi=[]
60  psi=[]
61  for line in fl:
62  tokens=line.split()
63  if len(tokens) < 1:
64  continue
65  if tokens[1] == 'RESNAMES':
66  check_residue(self.sequence[resno], tokens[3])
67  continue
68  if not tokens[0].isdigit():
69  continue
70  if float(tokens[4]) < 0.999 and not self.keep_all:
71  continue
72  phi.append(float(tokens[1])*2*pi/360.)
73  psi.append(float(tokens[2])*2*pi/360.)
74  self.add_full_datum(resno, phi, psi)
75 
76  def _read_predAll(self, fname):
77  fl=open(fname)
78  resno=-1
79  for line in fl:
80  tokens=line.split()
81  if len(tokens) == 0 or not tokens[0].isdigit():
82  continue
83  oldresno = resno
84  resno = int(tokens[1]) + self.offset
85  if resno != oldresno:
86  if oldresno != -1:
87  self.add_full_datum(resno, phi, psi)
88  phi=[]
89  psi=[]
90  resname = tokens[2][1]
91  check_residue(self.sequence[resno], resname)
92  if float(tokens[6]) < 0.999 and not self.keep_all:
93  continue
94  phi.append(float(tokens[3])*2*pi/360.)
95  psi.append(float(tokens[4])*2*pi/360.)
96 
97  def _read_observations(self,fname):
98  if fname.endswith('predAll.tab'):
99  self._read_predAll(fname)
100  else:
101  self._read_one_residue(fname)
102 
103  def _read_averages(self, fname):
104  fl=open(fname)
105  for line in fl:
106  tokens=line.split()
107  if not tokens[0].isdigit():
108  continue
109  resno = int(tokens[0]) + self.offset
110  check_residue(resno,tokens[1])
111  phi,psi,dphi,dpsi = map(lambda a: 2*pi*float(a)/360.,
112  tokens[2:6])
113  num=int(tokens[8])
114  if num == 0:
115  continue
116  self.add_mean_datum(resno, num, (phi, dphi), (psi, dpsi))
117 
118  def read(self,fname):
119  "reads a TALOS file and returns data. See add_datum methods."
120  if self.detailed_input:
121  self._read_observations(fname)
122  else:
123  self._read_averages(fname)
124 
125  def get_data(self):
126  return self.data
127 
128 
129 
130 if __name__ == '__main__':
131 
132  talos = 'pred.tab'
133  sequence = read_sequence_file('seq.dat', sequence_match=(1,5))
134  reader = TALOSReader(sequence)
135  reader.read(talos)
136  data=reader.get_data()