IMP logo
IMP Reference Guide  2.22.0
The Integrative Modeling Platform
process_output.py
1 #! /usr/bin/env python
2 
3 import argparse
4 import difflib
5 
6 
7 p = argparse.ArgumentParser(
8  description="Process output data file saved as dictionaries. "
9  "It has two modalities: print selected fields for all "
10  "lines or print a particular line where a field has a "
11  "given value. Example of usage: process_output.py "
12  "--soft -s To E S -f log.3.native-2-no-red. "
13  "process_output.py --soft --search_field EV0 "
14  "--search_value 5.67750116023 "
15  "-f log.3.native-2-no-red")
16 p.add_argument('-f', action="store", dest="filename",
17  help="file name to process")
18 p.add_argument('-s', dest="fields", nargs="+",
19  help="Specify all fields to be printed. Multiple flags "
20  "will append a list of fields to be printed")
21 p.add_argument('-t', dest="single_column_field",
22  help="Specify a single column field to be printed. It "
23  "will be printed as a column. If the field name is "
24  "not complete, it will print all fields whose name "
25  "contain the queried string.")
26 p.add_argument('-p', action="store_true", dest="print_fields",
27  default=False, help="print the fields contained in the file")
28 p.add_argument('--head', action="store_true", dest="print_header",
29  default=False,
30  help="print the fields contained in the file (only stat2)")
31 p.add_argument('-n', action="store", dest="print_raw_number",
32  help="print the selected raw")
33 p.add_argument('--soft', action="store_true", dest="soft_match", default=False,
34  help="Soft match. Closest matching field will be printed, "
35  "e.g. S will give Step_Number, En will give energy, etc. ")
36 p.add_argument('--search_field', dest="search_field",
37  help="Search a line from the file. Specify the field to "
38  "be searched for. ")
39 p.add_argument('--search_value', dest="search_value",
40  help="Search a line from the file. Specify the value to "
41  "be searched for. ")
42 p.add_argument('--nframe', action="store_true", dest="nframe", default=False,
43  help="Print the frame number as initial column")
44 
45 result = p.parse_args()
46 
47 isstat1 = False
48 isstat2 = False
49 
50 # open the file
51 if result.filename is not None:
52  f = open(result.filename, "r")
53 else:
54  raise ValueError("No file name provided. Use -h for help")
55 
56 # get the keys from the first line
57 for line in f.readlines():
58  d = eval(line)
59  klist = list(d.keys())
60  # check if it is a stat2 file
61  if "STAT2HEADER" in klist:
62  import operator
63  isstat2 = True
64  for k in klist:
65  if "STAT2HEADER" in str(k):
66  if result.print_header:
67  print(k, d[k])
68  del d[k]
69  stat2_dict = d
70  # get the list of keys sorted by value
71  kkeys = [k[0]
72  for k in sorted(stat2_dict.items(),
73  key=operator.itemgetter(1))]
74  klist = [k[1]
75  for k in sorted(stat2_dict.items(),
76  key=operator.itemgetter(1))]
77  invstat2_dict = {}
78  for k in kkeys:
79  invstat2_dict.update({stat2_dict[k]: k})
80  else:
81  isstat1 = True
82  klist.sort()
83 
84  break
85 f.close()
86 
87 # print the keys
88 if result.print_fields:
89  for key in klist:
90  if len(key) <= 100:
91  print(key)
92  else:
93  print(key[0:100],
94  "... omitting the rest of the string (>100 characters)")
95 
96 
97 # the field string matching is by default strict, i.e., the input string
98 # must be the same as the one in the file
99 match_strictness = 1.0
100 if result.soft_match:
101  match_strictness = 0.1
102 
103 # print the queried fields
104 if result.fields is not None:
105  field_list = []
106  # check whether the fields exist and convert them to best matching existing
107  # field names
108  for field in result.fields:
109  found_entries = difflib.get_close_matches(
110  field,
111  klist,
112  1,
113  match_strictness)
114  if len(found_entries) == 0:
115  raise ValueError("field " + field + " non found")
116  else:
117  field_list.append(found_entries[0])
118 
119  # print comment line
120  s0 = ' '.join(["%20s" % (field) for field in field_list])
121  print("# " + s0)
122 
123  # print fields values
124  f = open(result.filename, "r")
125  line_number = 0
126  for line in f.readlines():
127  line_number += 1
128  try:
129  d = eval(line)
130  except: # noqa: E722
131  print("# Warning: skipped line number " +
132  str(line_number) + " not a valid line")
133  continue
134  if isstat1:
135  s0 = ' '.join(["%20s" % (str(d[field])) for field in field_list])
136  elif isstat2:
137  if line_number == 1:
138  continue
139  s0 = ' '.join(["%20s" % (str(d[invstat2_dict[field]]))
140  for field in field_list])
141  if not result.nframe:
142  print("> " + s0)
143  else:
144  print(str(line_number) + " > " + s0)
145  f.close()
146 
147 
148 if result.single_column_field is not None:
149  field_list = []
150  for k in klist:
151  if result.single_column_field in k:
152  field_list.append(k)
153 
154  f = open(result.filename, "r")
155  line_number = 0
156  for line in f.readlines():
157  line_number += 1
158  try:
159  d = eval(line)
160  except: # noqa: E722
161  print("# Warning: skipped line number " +
162  str(line_number) + " not a valid line")
163  continue
164  if isstat1:
165  for key in field_list:
166  print(key, d[key])
167  elif isstat2:
168  if line_number == 1:
169  continue
170  for key in field_list:
171  print(key, d[invstat2_dict[key]])
172  print(" ")
173  f.close()
174 
175 if (result.search_field is not None) and (result.search_value is not None):
176  # check whether the fields exist and convert them to best matching existing
177  # field names
178  found_entries = difflib.get_close_matches(
179  result.search_field,
180  klist,
181  1,
182  match_strictness)
183  if len(found_entries) == 0:
184  raise ValueError("field " + result.search_field + " non found")
185  else:
186  corrected_field = found_entries[0]
187  # print fields values
188  f = open(result.filename, "r")
189  line_number = 0
190  for line in f.readlines():
191  line_number += 1
192  try:
193  d = eval(line)
194  except: # noqa: E722
195  print("# Warning: skipped line number " +
196  str(line_number) + " not a valid line")
197  continue
198 
199  if isstat1:
200  if (str(d[corrected_field]) == result.search_value):
201  for key in klist:
202  print(key, d[key])
203  elif isstat2:
204  if line_number == 1:
205  continue
206  if (str(d[invstat2_dict[corrected_field]]) == result.search_value):
207  for key in klist:
208  print(key, d[invstat2_dict[key]])
209  f.close()
210 
211 if result.print_raw_number is not None:
212  # check whether the fields exist and convert them to best matching existing
213  # field names
214  f = open(result.filename, "r")
215  line_number = 0
216  for line in f.readlines():
217  line_number += 1
218  if isstat1:
219  if (line_number == int(result.print_raw_number)):
220  try:
221  d = eval(line)
222  except: # noqa: E722
223  print("# Warning: skipped line number "
224  + str(line_number) + " not a valid line")
225  break
226  for key in klist:
227  print(key, d[key])
228 
229  elif isstat2:
230  if (line_number == int(result.print_raw_number) + 1):
231  try:
232  d = eval(line)
233  except: # noqa: E722
234  print("# Warning: skipped line number "
235  + str(line_number) + " not a valid line")
236  break
237  for key in klist:
238  print(key, d[invstat2_dict[key]])
239  f.close()