IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
process_output.py
1 #! /usr/bin/env python
2 
3 from __future__ import print_function
4 import argparse
5 import difflib
6 
7 
8 p = argparse.ArgumentParser(
9  description="Process output data file saved as dictionaries. "
10  "It has two modalities: print selected fields for all "
11  "lines or print a particular line where a field has a "
12  "given value. Example of usage: process_output.py "
13  "--soft -s To E S -f log.3.native-2-no-red. "
14  "process_output.py --soft --search_field EV0 "
15  "--search_value 5.67750116023 "
16  "-f log.3.native-2-no-red")
17 p.add_argument('-f', action="store", dest="filename",
18  help="file name to process")
19 p.add_argument('-s', dest="fields", nargs="+",
20  help="Specify all fields to be printed. Multiple flags "
21  "will append a list of fields to be printed")
22 p.add_argument('-t', dest="single_column_field",
23  help="Specify a single column field to be printed. It "
24  "will be printed as a column. If the field name is "
25  "not complete, it will print all fields whose name "
26  "contain the queried string.")
27 p.add_argument('-p', action="store_true", dest="print_fields",
28  default=False, help="print the fields contained in the file")
29 p.add_argument('--head', action="store_true", dest="print_header",
30  default=False,
31  help="print the fields contained in the file (only stat2)")
32 p.add_argument('-n', action="store", dest="print_raw_number",
33  help="print the selected raw")
34 p.add_argument('--soft', action="store_true", dest="soft_match", default=False,
35  help="Soft match. Closest matching field will be printed, "
36  "e.g. S will give Step_Number, En will give energy, etc. ")
37 p.add_argument('--search_field', dest="search_field",
38  help="Search a line from the file. Specify the field to "
39  "be searched for. ")
40 p.add_argument('--search_value', dest="search_value",
41  help="Search a line from the file. Specify the value to "
42  "be searched for. ")
43 p.add_argument('--nframe', action="store_true", dest="nframe", default=False,
44  help="Print the frame number as initial column")
45 
46 result = p.parse_args()
47 
48 isstat1 = False
49 isstat2 = False
50 
51 # open the file
52 if result.filename is not None:
53  f = open(result.filename, "r")
54 else:
55  raise ValueError("No file name provided. Use -h for help")
56 
57 # get the keys from the first line
58 for line in f.readlines():
59  d = eval(line)
60  klist = list(d.keys())
61  # check if it is a stat2 file
62  if "STAT2HEADER" in klist:
63  import operator
64  isstat2 = True
65  for k in klist:
66  if "STAT2HEADER" in str(k):
67  if result.print_header:
68  print(k, d[k])
69  del d[k]
70  stat2_dict = d
71  # get the list of keys sorted by value
72  kkeys = [k[0]
73  for k in sorted(stat2_dict.items(),
74  key=operator.itemgetter(1))]
75  klist = [k[1]
76  for k in sorted(stat2_dict.items(),
77  key=operator.itemgetter(1))]
78  invstat2_dict = {}
79  for k in kkeys:
80  invstat2_dict.update({stat2_dict[k]: k})
81  else:
82  isstat1 = True
83  klist.sort()
84 
85  break
86 f.close()
87 
88 # print the keys
89 if result.print_fields:
90  for key in klist:
91  if len(key) <= 100:
92  print(key)
93  else:
94  print(key[0:100],
95  "... omitting the rest of the string (>100 characters)")
96 
97 
98 # the field string matching is by default strict, i.e., the input string
99 # must be the same as the one in the file
100 match_strictness = 1.0
101 if result.soft_match:
102  match_strictness = 0.1
103 
104 # print the queried fields
105 if result.fields is not None:
106  field_list = []
107  # check whether the fields exist and convert them to best matching existing
108  # field names
109  for field in result.fields:
110  found_entries = difflib.get_close_matches(
111  field,
112  klist,
113  1,
114  match_strictness)
115  if len(found_entries) == 0:
116  raise ValueError("field " + field + " non found")
117  else:
118  field_list.append(found_entries[0])
119 
120  # print comment line
121  s0 = ' '.join(["%20s" % (field) for field in field_list])
122  print("# " + s0)
123 
124  # print fields values
125  f = open(result.filename, "r")
126  line_number = 0
127  for line in f.readlines():
128  line_number += 1
129  try:
130  d = eval(line)
131  except: # noqa: E722
132  print("# Warning: skipped line number " +
133  str(line_number) + " not a valid line")
134  continue
135  if isstat1:
136  s0 = ' '.join(["%20s" % (str(d[field])) for field in field_list])
137  elif isstat2:
138  if line_number == 1:
139  continue
140  s0 = ' '.join(["%20s" % (str(d[invstat2_dict[field]]))
141  for field in field_list])
142  if not result.nframe:
143  print("> " + s0)
144  else:
145  print(str(line_number) + " > " + s0)
146  f.close()
147 
148 
149 if result.single_column_field is not None:
150  field_list = []
151  for k in klist:
152  if result.single_column_field in k:
153  field_list.append(k)
154 
155  f = open(result.filename, "r")
156  line_number = 0
157  for line in f.readlines():
158  line_number += 1
159  try:
160  d = eval(line)
161  except: # noqa: E722
162  print("# Warning: skipped line number " +
163  str(line_number) + " not a valid line")
164  continue
165  if isstat1:
166  for key in field_list:
167  print(key, d[key])
168  elif isstat2:
169  if line_number == 1:
170  continue
171  for key in field_list:
172  print(key, d[invstat2_dict[key]])
173  print(" ")
174  f.close()
175 
176 if (result.search_field is not None) and (result.search_value is not None):
177  # check whether the fields exist and convert them to best matching existing
178  # field names
179  found_entries = difflib.get_close_matches(
180  result.search_field,
181  klist,
182  1,
183  match_strictness)
184  if len(found_entries) == 0:
185  raise ValueError("field " + result.search_field + " non found")
186  else:
187  corrected_field = found_entries[0]
188  # print fields values
189  f = open(result.filename, "r")
190  line_number = 0
191  for line in f.readlines():
192  line_number += 1
193  try:
194  d = eval(line)
195  except: # noqa: E722
196  print("# Warning: skipped line number " +
197  str(line_number) + " not a valid line")
198  continue
199 
200  if isstat1:
201  if (str(d[corrected_field]) == result.search_value):
202  for key in klist:
203  print(key, d[key])
204  elif isstat2:
205  if line_number == 1:
206  continue
207  if (str(d[invstat2_dict[corrected_field]]) == result.search_value):
208  for key in klist:
209  print(key, d[invstat2_dict[key]])
210  f.close()
211 
212 if result.print_raw_number is not None:
213  # check whether the fields exist and convert them to best matching existing
214  # field names
215  f = open(result.filename, "r")
216  line_number = 0
217  for line in f.readlines():
218  line_number += 1
219  if isstat1:
220  if (line_number == int(result.print_raw_number)):
221  try:
222  d = eval(line)
223  except: # noqa: E722
224  print("# Warning: skipped line number "
225  + str(line_number) + " not a valid line")
226  break
227  for key in klist:
228  print(key, d[key])
229 
230  elif isstat2:
231  if (line_number == int(result.print_raw_number) + 1):
232  try:
233  d = eval(line)
234  except: # noqa: E722
235  print("# Warning: skipped line number "
236  + str(line_number) + " not a valid line")
237  break
238  for key in klist:
239  print(key, d[invstat2_dict[key]])
240  f.close()