IMP logo
IMP Reference Guide  2.5.0
The Integrative Modeling Platform
process_output.py
1 #! /usr/bin/env python
2 
3 # This application works with either argparse (python 2.7) or optparse
4 # (python 2.6)
5 
6 
7 from __future__ import print_function
8 argparse = None
9 try:
10  import argparse
11 except ImportError:
12  from optparse import OptionParser
13  from optparse import Option, OptionValueError
14 
15 
16 import difflib
17 
18 if argparse:
19  parser = argparse.ArgumentParser(
20  description='Process output data file saved as dictionaries. It has two modality: print selected fields for all lines or print a particular line where a filed has a given value. Example of usage: process_output.py --soft -s To E S -f log.3.native-2-no-red. process_output.py --soft --search_field EV0 --search_value 5.67750116023 -f log.3.native-2-no-red')
21  parser.add_argument(
22  '-f',
23  action="store",
24  dest="filename",
25  help="file name to process")
26  parser.add_argument(
27  '-s',
28  dest="fields",
29  nargs="+",
30  help="Specify all fields to be printed. Multiple flags will append a list of fields to be printed")
31  parser.add_argument(
32  '-t',
33  dest="single_column_field",
34  help="Specify a single column field to be printed. It will be printed as a column. If the field name is not complete, it will print all fields whose name contain the queried string.")
35  parser.add_argument(
36  '-p',
37  action="store_true",
38  dest="print_fields",
39  default=False,
40  help="print the fields contained in the file")
41  parser.add_argument(
42  '--head',
43  action="store_true",
44  dest="print_header",
45  default=False,
46  help="print the fields contained in the file (only stat2)")
47  parser.add_argument(
48  '-n',
49  action="store",
50  dest="print_raw_number",
51  help="print the selected raw")
52  parser.add_argument(
53  '--soft',
54  action="store_true",
55  dest="soft_match",
56  default=False,
57  help="Soft match. Closest matching field will be printed, e.g. S will give Step_Number, En will give energy, etc. ")
58  parser.add_argument(
59  '--search_field',
60  dest="search_field",
61  help="Search a line from the file. Specify the field to be searched for. ")
62  parser.add_argument(
63  '--search_value',
64  dest="search_value",
65  help="Search a line from the file. Specify the value to be searched for. ")
66  parser.add_argument(
67  '--nframe',
68  action="store_true",
69  dest="nframe",
70  default=False,
71  help="Print the frame number as initial column")
72 
73  result = parser.parse_args()
74 
75 else:
76  class MultipleOption(Option):
77  ACTIONS = Option.ACTIONS + ("extend",)
78  STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
79  TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
80  ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
81 
82  def take_action(self, action, dest, opt, value, values, parser):
83  if action == "extend":
84  values.ensure_value(dest, []).append(value)
85  else:
86  Option.take_action(
87  self,
88  action,
89  dest,
90  opt,
91  value,
92  values,
93  parser)
94 
95  parser = OptionParser(
96  option_class=MultipleOption,
97  usage='Process output data file saved as dictionaries. It has two modality: print selected fields for all lines or print a particular line where a filed has a given value. Example of usage: process_output.py --soft -s To -s E -s S -f log.3.native-2-no-red. process_output.py --soft --search_field EV0 --search_value 5.67750116023 -f log.3.native-2-no-red')
98  parser.add_option(
99  '-f',
100  action="store",
101  dest="filename",
102  help="file name to process")
103  parser.add_option(
104  '-s',
105  dest="fields",
106  action="extend",
107  type="string",
108  help="Specify all fields to be printed. Multiple flags will append a list of fields to be printed")
109  parser.add_option(
110  '-t',
111  dest="single_column_field",
112  help="Specify a single column field to be printed. It will be printed as a column. If the field name is not complete, it will print all fields whose name contain the queried string.")
113  parser.add_option(
114  '-p',
115  action="store_true",
116  dest="print_fields",
117  default=False,
118  help="print the fields contained in the file")
119  parser.add_option(
120  '--head',
121  action="store_true",
122  dest="print_header",
123  default=False,
124  help="print the fields contained in the file (only stat2)")
125  parser.add_option(
126  '-n',
127  action="store",
128  dest="print_raw_number",
129  help="print the selected raw")
130  parser.add_option(
131  '--soft',
132  action="store_true",
133  dest="soft_match",
134  default=False,
135  help="Soft match. Closest matching field will be printed, e.g. S will give Step_Number, En will give energy, etc. ")
136  parser.add_option(
137  '--search_field',
138  dest="search_field",
139  help="Search a line from the file. Specify the field to be searched for. ")
140  parser.add_option(
141  '--search_value',
142  dest="search_value",
143  help="Search a line from the file. Specify the value to be searched for. ")
144  parser.add_option(
145  '--nframe',
146  dest="nframe",
147  action="store_true",
148  default=False,
149  help="Print the frame number as initial column")
150 
151  (result, args) = parser.parse_args()
152 
153 
154 isstat1 = False
155 isstat2 = False
156 
157 # open the file
158 if not result.filename is None:
159  f = open(result.filename, "r")
160 else:
161  raise ValueError("No file name provided. Use -h for help")
162 
163 # get the keys from the first line
164 for line in f.readlines():
165  d = eval(line)
166  klist = list(d.keys())
167  # check if it is a stat2 file
168  if "STAT2HEADER" in klist:
169  import operator
170  isstat2 = True
171  for k in klist:
172  if "STAT2HEADER" in str(k):
173  if result.print_header:
174  print(k, d[k])
175  del d[k]
176  stat2_dict = d
177  # get the list of keys sorted by value
178  kkeys = [k[0]
179  for k in sorted(stat2_dict.items(), key=operator.itemgetter(1))]
180  klist = [k[1]
181  for k in sorted(stat2_dict.items(), key=operator.itemgetter(1))]
182  invstat2_dict = {}
183  for k in kkeys:
184  invstat2_dict.update({stat2_dict[k]: k})
185  else:
186  isstat1 = True
187  klist.sort()
188 
189  break
190 f.close()
191 
192 # print the keys
193 if result.print_fields:
194  for key in klist:
195  if len(key) <= 100:
196  print(key)
197  else:
198  print(key[0:100], "... omitting the rest of the string (>100 characters)")
199 
200 
201 # the field string matching is by default strict, i.e., the input string
202 # must be the same as the one in the file
203 match_strictness = 1.0
204 if result.soft_match:
205  match_strictness = 0.1
206 
207 # print the queried fields
208 if not result.fields is None:
209  field_list = []
210  # check whether the fields exist and convert them to best maching existing
211  # field names
212  for field in result.fields:
213  found_entries = difflib.get_close_matches(
214  field,
215  klist,
216  1,
217  match_strictness)
218  if len(found_entries) == 0:
219  raise ValueError("field " + field + " non found")
220  else:
221  field_list.append(found_entries[0])
222 
223  # print comment line
224  s0 = ' '.join(["%20s" % (field) for field in field_list])
225  print("# " + s0)
226 
227  # print fields values
228  f = open(result.filename, "r")
229  line_number = 0
230  for line in f.readlines():
231  line_number += 1
232  try:
233  d = eval(line)
234  except:
235  print("# Warning: skipped line number " + str(line_number) + " not a valid line")
236  continue
237  if isstat1:
238  s0 = ' '.join(["%20s" % (str(d[field])) for field in field_list])
239  elif isstat2:
240  if line_number == 1:
241  continue
242  s0 = ' '.join(["%20s" % (str(d[invstat2_dict[field]]))
243  for field in field_list])
244  if not result.nframe:
245  print("> " + s0)
246  else:
247  print(str(line_number)+ " > " + s0)
248  f.close()
249 
250 
251 if not result.single_column_field is None:
252  field_list = []
253  for k in klist:
254  if result.single_column_field in k:
255  field_list.append(k)
256 
257  f = open(result.filename, "r")
258  line_number = 0
259  for line in f.readlines():
260  line_number += 1
261  try:
262  d = eval(line)
263  except:
264  print("# Warning: skipped line number " + str(line_number) + " not a valid line")
265  continue
266  if isstat1:
267  for key in field_list:
268  print(key, d[key])
269  elif isstat2:
270  if line_number == 1:
271  continue
272  for key in field_list:
273  print(key, d[invstat2_dict[key]])
274  print(" ")
275  f.close()
276 
277 if (not result.search_field is None) and (not result.search_value is None):
278  # check whether the fields exist and convert them to best maching existing
279  # field names
280  found_entries = difflib.get_close_matches(
281  result.search_field,
282  klist,
283  1,
284  match_strictness)
285  if len(found_entries) == 0:
286  raise ValueError("field " + results.search_field + " non found")
287  else:
288  corrected_field = found_entries[0]
289  # print fields values
290  f = open(result.filename, "r")
291  line_number = 0
292  for line in f.readlines():
293  line_number += 1
294  try:
295  d = eval(line)
296  except:
297  print("# Warning: skipped line number " + str(line_number) + " not a valid line")
298  continue
299 
300  if isstat1:
301  if (str(d[corrected_field]) == result.search_value):
302  for key in klist:
303  print(key, d[key])
304  elif isstat2:
305  if line_number == 1:
306  continue
307  if (str(d[invstat2_dict[corrected_field]]) == result.search_value):
308  for key in klist:
309  print(key, d[invstat2_dict[key]])
310  f.close()
311 
312 if not result.print_raw_number is None:
313  # check whether the fields exist and convert them to best maching existing
314  # field names
315  f = open(result.filename, "r")
316  line_number = 0
317  for line in f.readlines():
318  line_number += 1
319  if isstat1:
320  if (line_number == int(result.print_raw_number)):
321  try:
322  d = eval(line)
323  except:
324  print("# Warning: skipped line number " + str(line_number) + " not a valid line")
325  break
326  for key in klist:
327  print(key, d[key])
328 
329  elif isstat2:
330  if (line_number == int(result.print_raw_number) + 1):
331  try:
332  d = eval(line)
333  except:
334  print("# Warning: skipped line number " + str(line_number) + " not a valid line")
335  break
336  for key in klist:
337  print(key, d[invstat2_dict[key]])
338  f.close()