IMP  2.3.1
The Integrative Modeling Platform
process_output.py
1 #! /usr/bin/env python
2 
3 # This application works with either argparse (python 2.7) or optparse
4 # (python 2.6)
5 
6 argparse = None
7 try:
8  import argparse
9 except ImportError:
10  from optparse import OptionParser
11  from optparse import Option, OptionValueError
12 
13 
14 import difflib
15 
16 if argparse:
17  parser = argparse.ArgumentParser(
18  description='Process output data file saved as dictionaries. It has two modality: print selected fields for all lines or print a particular line where a filed has a given value. Example of usage: process_output.py --soft -s To E S -f log.3.native-2-no-red. process_output.py --soft --search_field EV0 --search_value 5.67750116023 -f log.3.native-2-no-red')
19  parser.add_argument(
20  '-f',
21  action="store",
22  dest="filename",
23  help="file name to process")
24  parser.add_argument(
25  '-s',
26  dest="fields",
27  nargs="+",
28  help="Specify all fields to be printed. Multiple flags will append a list of fields to be printed")
29  parser.add_argument(
30  '-t',
31  dest="single_column_field",
32  help="Specify a single column field to be printed. It will be printed as a column. If the field name is not complete, it will print all fields whose name contain the queried string.")
33  parser.add_argument(
34  '-p',
35  action="store_true",
36  dest="print_fields",
37  default=False,
38  help="print the fields contained in the file")
39  parser.add_argument(
40  '--head',
41  action="store_true",
42  dest="print_header",
43  default=False,
44  help="print the fields contained in the file (only stat2)")
45  parser.add_argument(
46  '-n',
47  action="store",
48  dest="print_raw_number",
49  help="print the selected raw")
50  parser.add_argument(
51  '--soft',
52  action="store_true",
53  dest="soft_match",
54  default=False,
55  help="Soft match. Closest matching field will be printed, e.g. S will give Step_Number, En will give energy, etc. ")
56  parser.add_argument(
57  '--search_field',
58  dest="search_field",
59  help="Search a line from the file. Specify the field to be searched for. ")
60  parser.add_argument(
61  '--search_value',
62  dest="search_value",
63  help="Search a line from the file. Specify the value to be searched for. ")
64  parser.add_argument(
65  '--nframe',
66  action="store_true",
67  dest="nframe",
68  default=False,
69  help="Print the frame number as initial column")
70 
71  result = parser.parse_args()
72 
73 else:
74  class MultipleOption(Option):
75  ACTIONS = Option.ACTIONS + ("extend",)
76  STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
77  TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
78  ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
79 
80  def take_action(self, action, dest, opt, value, values, parser):
81  if action == "extend":
82  values.ensure_value(dest, []).append(value)
83  else:
84  Option.take_action(
85  self,
86  action,
87  dest,
88  opt,
89  value,
90  values,
91  parser)
92 
93  parser = OptionParser(
94  option_class=MultipleOption,
95  usage='Process output data file saved as dictionaries. It has two modality: print selected fields for all lines or print a particular line where a filed has a given value. Example of usage: process_output.py --soft -s To -s E -s S -f log.3.native-2-no-red. process_output.py --soft --search_field EV0 --search_value 5.67750116023 -f log.3.native-2-no-red')
96  parser.add_option(
97  '-f',
98  action="store",
99  dest="filename",
100  help="file name to process")
101  parser.add_option(
102  '-s',
103  dest="fields",
104  action="extend",
105  type="string",
106  help="Specify all fields to be printed. Multiple flags will append a list of fields to be printed")
107  parser.add_option(
108  '-t',
109  dest="single_column_field",
110  help="Specify a single column field to be printed. It will be printed as a column. If the field name is not complete, it will print all fields whose name contain the queried string.")
111  parser.add_option(
112  '-p',
113  action="store_true",
114  dest="print_fields",
115  default=False,
116  help="print the fields contained in the file")
117  parser.add_option(
118  '--head',
119  action="store_true",
120  dest="print_header",
121  default=False,
122  help="print the fields contained in the file (only stat2)")
123  parser.add_option(
124  '-n',
125  action="store",
126  dest="print_raw_number",
127  help="print the selected raw")
128  parser.add_option(
129  '--soft',
130  action="store_true",
131  dest="soft_match",
132  default=False,
133  help="Soft match. Closest matching field will be printed, e.g. S will give Step_Number, En will give energy, etc. ")
134  parser.add_option(
135  '--search_field',
136  dest="search_field",
137  help="Search a line from the file. Specify the field to be searched for. ")
138  parser.add_option(
139  '--search_value',
140  dest="search_value",
141  help="Search a line from the file. Specify the value to be searched for. ")
142  parser.add_option(
143  '--nframe',
144  dest="nframe",
145  action="store_true",
146  default=False,
147  help="Print the frame number as initial column")
148 
149  (result, args) = parser.parse_args()
150 
151 
152 isstat1 = False
153 isstat2 = False
154 
155 # open the file
156 if not result.filename is None:
157  f = open(result.filename, "r")
158 else:
159  raise ValueError("No file name provided. Use -h for help")
160 
161 # get the keys from the first line
162 for line in f.readlines():
163  d = eval(line)
164  klist = d.keys()
165  # check if it is a stat2 file
166  if "STAT2HEADER" in klist:
167  import operator
168  isstat2 = True
169  for k in klist:
170  if "STAT2HEADER" in str(k):
171  if result.print_header:
172  print k, d[k]
173  del d[k]
174  stat2_dict = d
175  # get the list of keys sorted by value
176  kkeys = [k[0]
177  for k in sorted(stat2_dict.iteritems(), key=operator.itemgetter(1))]
178  klist = [k[1]
179  for k in sorted(stat2_dict.iteritems(), key=operator.itemgetter(1))]
180  invstat2_dict = {}
181  for k in kkeys:
182  invstat2_dict.update({stat2_dict[k]: k})
183  else:
184  isstat1 = True
185  klist.sort()
186 
187  break
188 f.close()
189 
190 # print the keys
191 if result.print_fields:
192  for key in klist:
193  if len(key) <= 100:
194  print key
195  else:
196  print key[0:100], "... omitting the rest of the string (>100 characters)"
197 
198 
199 # the field string matching is by default strict, i.e., the input string
200 # must be the same as the one in the file
201 match_strictness = 1.0
202 if result.soft_match:
203  match_strictness = 0.1
204 
205 # print the queried fields
206 if not result.fields is None:
207  field_list = []
208  # check whether the fields exist and convert them to best maching existing
209  # field names
210  for field in result.fields:
211  found_entries = difflib.get_close_matches(
212  field,
213  klist,
214  1,
215  match_strictness)
216  if len(found_entries) == 0:
217  raise ValueError("field " + field + " non found")
218  else:
219  field_list.append(found_entries[0])
220 
221  # print comment line
222  s0 = ' '.join(["%20s" % (field) for field in field_list])
223  print "# " + s0
224 
225  # print fields values
226  f = open(result.filename, "r")
227  line_number = 0
228  for line in f.readlines():
229  line_number += 1
230  try:
231  d = eval(line)
232  except:
233  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
234  continue
235  if isstat1:
236  s0 = ' '.join(["%20s" % (str(d[field])) for field in field_list])
237  elif isstat2:
238  if line_number == 1:
239  continue
240  s0 = ' '.join(["%20s" % (str(d[invstat2_dict[field]]))
241  for field in field_list])
242  if not result.nframe:
243  print "> " + s0
244  else:
245  print str(line_number)+ " > " + s0
246  f.close()
247 
248 
249 if not result.single_column_field is None:
250  field_list = []
251  for k in klist:
252  if result.single_column_field in k:
253  field_list.append(k)
254 
255  f = open(result.filename, "r")
256  line_number = 0
257  for line in f.readlines():
258  line_number += 1
259  try:
260  d = eval(line)
261  except:
262  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
263  continue
264  if isstat1:
265  for key in field_list:
266  print key, d[key]
267  elif isstat2:
268  if line_number == 1:
269  continue
270  for key in field_list:
271  print key, d[invstat2_dict[key]]
272  print " "
273  f.close()
274 
275 if (not result.search_field is None) and (not result.search_value is None):
276  # check whether the fields exist and convert them to best maching existing
277  # field names
278  found_entries = difflib.get_close_matches(
279  result.search_field,
280  klist,
281  1,
282  match_strictness)
283  if len(found_entries) == 0:
284  raise ValueError("field " + results.search_field + " non found")
285  else:
286  corrected_field = found_entries[0]
287  # print fields values
288  f = open(result.filename, "r")
289  line_number = 0
290  for line in f.readlines():
291  line_number += 1
292  try:
293  d = eval(line)
294  except:
295  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
296  continue
297 
298  if isstat1:
299  if (str(d[corrected_field]) == result.search_value):
300  for key in klist:
301  print key, d[key]
302  elif isstat2:
303  if linenumber == 1:
304  continue
305  if (str(d[invstat2_dict[corrected_field]]) == result.search_value):
306  for key in klist:
307  print key, d[invstat2_dict[key]]
308  f.close()
309 
310 if not result.print_raw_number is None:
311  # check whether the fields exist and convert them to best maching existing
312  # field names
313  f = open(result.filename, "r")
314  line_number = 0
315  for line in f.readlines():
316  line_number += 1
317  if isstat1:
318  if (line_number == int(result.print_raw_number)):
319  try:
320  d = eval(line)
321  except:
322  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
323  break
324  for key in klist:
325  print key, d[key]
326 
327  elif isstat2:
328  if (line_number == int(result.print_raw_number) + 1):
329  try:
330  d = eval(line)
331  except:
332  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
333  break
334  for key in klist:
335  print key, d[invstat2_dict[key]]
336  f.close()
IMP::kernel::OptionParser OptionParser