IMP  2.3.0
The Integrative Modeling Platform
process_output.py
1 #! /usr/bin/env python
2 
3 # This application works with either argparse (python 2.7) or optparse
4 # (python 2.6)
5 
6 argparse = None
7 try:
8  import argparse
9 except ImportError:
10  from optparse import OptionParser
11  from optparse import Option, OptionValueError
12 
13 
14 import difflib
15 
16 if argparse:
17  parser = argparse.ArgumentParser(
18  description='Process output data file saved as dictionaries. It has two modality: print selected fields for all lines or print a particular line where a filed has a given value. Example of usage: process_output.py --soft -s To E S -f log.3.native-2-no-red. process_output.py --soft --search_field EV0 --search_value 5.67750116023 -f log.3.native-2-no-red')
19  parser.add_argument(
20  '-f',
21  action="store",
22  dest="filename",
23  help="file name to process")
24  parser.add_argument(
25  '-s',
26  dest="fields",
27  nargs="+",
28  help="Specify all fields to be printed. Multiple flags will append a list of fields to be printed")
29  parser.add_argument(
30  '-t',
31  dest="single_column_field",
32  help="Specify a single column field to be printed. It will be printed as a column. If the field name is not complete, it will print all fields whose name contain the queried string.")
33  parser.add_argument(
34  '-p',
35  action="store_true",
36  dest="print_fields",
37  default=False,
38  help="print the fields contained in the file")
39  parser.add_argument(
40  '--head',
41  action="store_true",
42  dest="print_header",
43  default=False,
44  help="print the fields contained in the file (only stat2)")
45  parser.add_argument(
46  '-n',
47  action="store",
48  dest="print_raw_number",
49  help="print the selected raw")
50  parser.add_argument(
51  '--soft',
52  action="store_true",
53  dest="soft_match",
54  default=False,
55  help="Soft match. Closest matching field will be printed, e.g. S will give Step_Number, En will give energy, etc. ")
56  parser.add_argument(
57  '--search_field',
58  dest="search_field",
59  help="Search a line from the file. Specify the field to be searched for. ")
60  parser.add_argument(
61  '--search_value',
62  dest="search_value",
63  help="Search a line from the file. Specify the value to be searched for. ")
64  parser.add_argument(
65  '--nframe',
66  action="store_true",
67  dest="nframe",
68  default=False,
69  help="Print the frame number as initial column")
70 
71  result = parser.parse_args()
72 
73 else:
74  class MultipleOption(Option):
75  ACTIONS = Option.ACTIONS + ("extend",)
76  STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
77  TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
78  ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
79 
80  def take_action(self, action, dest, opt, value, values, parser):
81  if action == "extend":
82  values.ensure_value(dest, []).append(value)
83  else:
84  Option.take_action(
85  self,
86  action,
87  dest,
88  opt,
89  value,
90  values,
91  parser)
92 
93  parser = OptionParser(
94  option_class=MultipleOption,
95  usage='Process output data file saved as dictionaries. It has two modality: print selected fields for all lines or print a particular line where a filed has a given value. Example of usage: process_output.py --soft -s To -s E -s S -f log.3.native-2-no-red. process_output.py --soft --search_field EV0 --search_value 5.67750116023 -f log.3.native-2-no-red')
96  parser.add_option(
97  '-f',
98  action="store",
99  dest="filename",
100  help="file name to process")
101  parser.add_option(
102  '-s',
103  dest="fields",
104  action="extend",
105  type="string",
106  help="Specify all fields to be printed. Multiple flags will append a list of fields to be printed")
107  parser.add_option(
108  '-t',
109  dest="single_column_field",
110  help="Specify a single column field to be printed. It will be printed as a column. If the field name is not complete, it will print all fields whose name contain the queried string.")
111  parser.add_option(
112  '-p',
113  action="store_true",
114  dest="print_fields",
115  default=False,
116  help="print the fields contained in the file")
117  parser.add_option(
118  '--head',
119  action="store_true",
120  dest="print_header",
121  default=False,
122  help="print the fields contained in the file (only stat2)")
123  parser.add_option(
124  '-n',
125  action="store",
126  dest="print_raw_number",
127  help="print the selected raw")
128  parser.add_option(
129  '--soft',
130  action="store_true",
131  dest="soft_match",
132  default=False,
133  help="Soft match. Closest matching field will be printed, e.g. S will give Step_Number, En will give energy, etc. ")
134  parser.add_option(
135  '--search_field',
136  dest="search_field",
137  help="Search a line from the file. Specify the field to be searched for. ")
138  parser.add_option(
139  '--search_value',
140  dest="search_value",
141  help="Search a line from the file. Specify the value to be searched for. ")
142  parser.add_option(
143  '--nframe',
144  dest="nframe",
145  action="store_true",
146  default=False,
147  help="Print the frame number as initial column")
148 
149  (result, args) = parser.parse_args()
150 
151 
152 isstat1 = False
153 isstat2 = False
154 
155 # open the file
156 if not result.filename is None:
157  f = open(result.filename, "r")
158 else:
159  print "Error: No file name provided. Use -h for help"
160  exit()
161 
162 # get the keys from the first line
163 for line in f.readlines():
164  d = eval(line)
165  klist = d.keys()
166  # check if it is a stat2 file
167  if "STAT2HEADER" in klist:
168  import operator
169  isstat2 = True
170  for k in klist:
171  if "STAT2HEADER" in str(k):
172  if result.print_header:
173  print k, d[k]
174  del d[k]
175  stat2_dict = d
176  # get the list of keys sorted by value
177  kkeys = [k[0]
178  for k in sorted(stat2_dict.iteritems(), key=operator.itemgetter(1))]
179  klist = [k[1]
180  for k in sorted(stat2_dict.iteritems(), key=operator.itemgetter(1))]
181  invstat2_dict = {}
182  for k in kkeys:
183  invstat2_dict.update({stat2_dict[k]: k})
184  else:
185  isstat1 = True
186  klist.sort()
187 
188  break
189 f.close()
190 
191 # print the keys
192 if result.print_fields:
193  for key in klist:
194  if len(key) <= 100:
195  print key
196  else:
197  print key[0:100], "... omitting the rest of the string (>100 characters)"
198 
199 
200 # the field string matching is by default strict, i.e., the input string
201 # must be the same as the one in the file
202 match_strictness = 1.0
203 if result.soft_match:
204  match_strictness = 0.1
205 
206 # print the queried fields
207 if not result.fields is None:
208  field_list = []
209  # check whether the fields exist and convert them to best maching existing
210  # field names
211  for field in result.fields:
212  found_entries = difflib.get_close_matches(
213  field,
214  klist,
215  1,
216  match_strictness)
217  if len(found_entries) == 0:
218  print "Error: field " + field + " non found"
219  exit()
220  else:
221  field_list.append(found_entries[0])
222 
223  # print comment line
224  s0 = ' '.join(["%20s" % (field) for field in field_list])
225  print "# " + s0
226 
227  # print fields values
228  f = open(result.filename, "r")
229  line_number = 0
230  for line in f.readlines():
231  line_number += 1
232  try:
233  d = eval(line)
234  except:
235  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
236  continue
237  if isstat1:
238  s0 = ' '.join(["%20s" % (str(d[field])) for field in field_list])
239  elif isstat2:
240  if line_number == 1:
241  continue
242  s0 = ' '.join(["%20s" % (str(d[invstat2_dict[field]]))
243  for field in field_list])
244  if not result.nframe:
245  print "> " + s0
246  else:
247  print str(line_number)+ " > " + s0
248  f.close()
249 
250 
251 if not result.single_column_field is None:
252  field_list = []
253  for k in klist:
254  if result.single_column_field in k:
255  field_list.append(k)
256 
257  f = open(result.filename, "r")
258  line_number = 0
259  for line in f.readlines():
260  line_number += 1
261  try:
262  d = eval(line)
263  except:
264  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
265  continue
266  if isstat1:
267  for key in field_list:
268  print key, d[key]
269  elif isstat2:
270  if line_number == 1:
271  continue
272  for key in field_list:
273  print key, d[invstat2_dict[key]]
274  print " "
275  f.close()
276 
277 if (not result.search_field is None) and (not result.search_value is None):
278  # check whether the fields exist and convert them to best maching existing
279  # field names
280  found_entries = difflib.get_close_matches(
281  result.search_field,
282  klist,
283  1,
284  match_strictness)
285  if len(found_entries) == 0:
286  print "Error: field " + results.search_field + " non found"
287  exit()
288  else:
289  corrected_field = found_entries[0]
290  # print fields values
291  f = open(result.filename, "r")
292  line_number = 0
293  for line in f.readlines():
294  line_number += 1
295  try:
296  d = eval(line)
297  except:
298  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
299  continue
300 
301  if isstat1:
302  if (str(d[corrected_field]) == result.search_value):
303  for key in klist:
304  print key, d[key]
305  elif isstat2:
306  if linenumber == 1:
307  continue
308  if (str(d[invstat2_dict[corrected_field]]) == result.search_value):
309  for key in klist:
310  print key, d[invstat2_dict[key]]
311  f.close()
312 
313 if not result.print_raw_number is None:
314  # check whether the fields exist and convert them to best maching existing
315  # field names
316  f = open(result.filename, "r")
317  line_number = 0
318  for line in f.readlines():
319  line_number += 1
320  if isstat1:
321  if (line_number == int(result.print_raw_number)):
322  try:
323  d = eval(line)
324  except:
325  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
326  break
327  for key in klist:
328  print key, d[key]
329 
330  elif isstat2:
331  if (line_number == int(result.print_raw_number) + 1):
332  try:
333  d = eval(line)
334  except:
335  print "# Warning: skipped line number " + str(line_number) + " not a valid line"
336  break
337  for key in klist:
338  print key, d[invstat2_dict[key]]
339  f.close()
IMP::kernel::OptionParser OptionParser