3 from __future__
import print_function
12 def __init__(self, stepno, statline, header):
19 def add(self, ftype, category, *data):
21 self.dumps[category] = data[0]
23 self.trajs[category] = {
'ftype': data[0],
28 raise ValueError(
"unknown file type")
30 def get_stats_header(self):
45 """Manages information on a given simulation.
46 Assumes the existence of a _stats.txt file, and handles more files if
48 folder : the folder which contains _stats.txt
49 prefix : the stats file is supposed to be prefix+'_stats.txt'
52 def __init__(self, folder, prefix):
56 self.stats_file = os.path.join(folder, prefix +
'stats.txt')
57 if not os.path.isfile(self.stats_file):
58 raise ValueError(
'cannot find stats file %s' % self.stats_file)
61 for fl
in os.listdir(folder):
63 match = re.match(prefix +
r'(.*)', fl)
67 if tail ==
'stats.txt':
70 category = tail.split(
'_')[0]
71 if category
not in files:
73 files[category].append(tail)
77 for cat, fnames
in files.items():
79 or os.path.splitext(fnames[0].split(
'_')[-1])[0].isdigit():
81 if cat
not in self.dumpfiles:
82 self.dumpfiles[cat] = []
85 indexno = int(os.path.splitext(fname.split(
'_')[1])[0])
86 self.dumpfiles[cat].append((indexno, fname))
87 self.dumpfiles[cat] = dict(self.dumpfiles[cat])
89 if len(self.dumpfiles[cat]) != \
90 len(set(self.dumpfiles[cat].keys())):
91 raise ValueError(
"found duplicates in %s %s %s"
92 % (folder, prefix, fname))
96 ext = os.path.splitext(fname)[1]
97 if ext.startswith(
'.rmf'):
98 self.trajfiles[cat] = (ext[1:], fname)
100 raise ValueError(
"Unknown extension: %s in file %s"
103 def get_stats_header(self):
104 if not hasattr(self,
'stats_handle'):
105 self.stats_handle = open(self.stats_file)
108 self.stats_handle.readline()
109 self.stats_first_line = self.stats_handle.readline()
110 if self.stats_first_line.startswith(
'#'):
111 raise ValueError(
'stats file must be 1-line only')
112 self.stats_handle = open(self.stats_file)
113 self.stats_header = self.stats_handle.readline()
114 return self.stats_header
116 def get_first_stats_line(self):
118 self.get_stats_header()
119 return self.stats_first_line
121 def _get_next_stats(self):
123 self.get_stats_header()
124 for line
in self.stats_handle:
128 """iterate over all time steps"""
130 for stat
in self._get_next_stats():
132 stepno = int(stat.split()[1])
133 step = LogStep(stepno, stat, self.get_stats_header())
135 for cat, df
in self.dumpfiles.items():
137 fullpath = os.path.join(self.folder,
138 self.prefix + df[stepno])
139 step.add(
'dump', cat, fullpath)
140 for cat, tf
in self.trajfiles.items():
141 fullpath = os.path.join(self.folder,
143 step.add(
'traj', cat, tf[0], fullpath, stepno, tf[1])
150 """uses column to demux a replica trajectory. Assumes column points to a
151 float or integer type, which is allowed to change over time. Attribution is
152 based on order of float params. State 0 will be lowest param etc. Use
153 reverse=True to start with highest.
156 def __init__(self, logs, outfolder, column, reverse=False):
158 self.reverse = reverse
160 self.outfolder = outfolder
161 self.stat_handles = {}
162 self.traj_handles_in = {}
163 self.traj_handles_out = {}
166 if not os.path.isdir(outfolder):
168 for log
in range(len(self.logs)):
169 fname = os.path.join(outfolder,
'p%d' % log)
170 if not os.path.isdir(fname):
172 self.folders[log] = fname
174 h0 = self.logs[0].get_stats_header()
175 for log
in self.logs[1:]:
176 if h0 != log.get_stats_header():
177 raise ValueError(
"headers must be identical!")
179 tokens = [idx
for idx, i
in enumerate(h0.split())
if self.column
in i]
181 raise ValueError(
"column %d not found in this header\n%s"
183 elif len(tokens) > 1:
184 raise ValueError(
"column %d found multiple times!\n%s"
186 self.colno = tokens[0]
188 def get_param(self, statline):
189 return float(statline.split()[self.colno])
191 def _write_step_stats(self, stateno, lstep):
193 if stateno
not in self.stat_handles:
194 self.stat_handles[stateno] = open(
195 os.path.join(self.folders[stateno],
196 str(stateno) +
'_stats.txt'),
'w')
197 self.stat_handles[stateno].write(lstep.get_stats_header())
199 self.stat_handles[stateno].write(lstep.get_stats())
201 def _write_step_dump(self, stateno, lstep):
202 for cat, fname
in lstep.get_dumps().items():
203 shutil.copyfile(fname,
204 os.path.join(self.folders[stateno],
205 str(stateno) +
'_' + cat
206 + fname.split(cat)[1]))
208 def _write_traj_rmf(self, infile, instep, outfile, stateno, cat):
211 if infile
not in self.traj_handles_in:
212 src = RMF.open_rmf_file_read_only(infile)
213 self.traj_handles_in[infile] = src
214 src = self.traj_handles_in[infile]
216 if outfile
not in self.traj_handles_out:
217 dest = RMF.create_rmf_file(outfile)
218 self.traj_handles_out[outfile] = dest
219 RMF.clone_file_info(src, dest)
220 RMF.clone_hierarchy(src, dest)
221 RMF.clone_static_frame(src, dest)
222 dest = self.traj_handles_out[outfile]
224 frameid = src.get_frames()[instep - 1]
225 src.set_current_frame(frameid)
226 dest.add_frame(src.get_name(frameid), src.get_type(frameid))
227 RMF.clone_loaded_frame(src, dest)
229 def _write_step_traj(self, stateno, lstep):
231 for cat, data
in lstep.get_trajs().items():
232 destfile = os.path.join(self.outfolder,
'p' + str(stateno),
233 str(stateno) +
'_' + data[
'tail'])
234 if data[
'ftype'].startswith(
'rmf'):
235 self._write_traj_rmf(data[
'fullpath'], data[
'stepno'],
236 destfile, stateno, cat)
238 raise ValueError(
"unknown trajectory file type")
240 def _write_step(self, stateno, lstep):
241 self._write_step_stats(stateno, lstep)
242 self._write_step_dump(stateno, lstep)
243 self._write_step_traj(stateno, lstep)
247 log_iterators = [list(log.items())
for log
in self.logs]
248 print(
"Demuxing", len(log_iterators),
"replicas")
249 for idx, steps
in enumerate(zip(*log_iterators)):
250 if idx % 10 == 0
and idx > 0:
251 print(
"step", idx,
'\r', end=
' ')
254 params = [(self.get_param(i.get_stats()), i)
for i
in steps]
255 params.sort(reverse=self.reverse)
257 for i
in range(len(params)):
258 self._write_step(i, params[i][1])
262 def get_prefix(folder):
263 rval = [re.match(
r'(.*_)stats.txt', f)
for f
in os.listdir(folder)]
264 rval = [i
for i
in rval
if i]
266 raise ValueError(
"stats file not unique, found %d" % len(rval))
267 return rval[0].group(1)
270 if __name__ ==
'__main__':
271 if len(sys.argv) == 1
or len(sys.argv) > 4:
272 sys.exit(
"""demux_trajs.py column [infolder [outfolder]]
273 expects r?? folders in infolder and will write p?? folders in
274 outfolder. infolder must contain a _stats.txt file which will contain
275 a header. column must be a substring matching to one of the columns in
276 the _stats.txt files. It will typically be a temperature, or a state
277 number. That column will be used for demuxing. Folders are optional
278 and will be taken as ./ if not indicated.
281 if len(sys.argv) == 3:
282 infolder = sys.argv[2]
284 elif len(sys.argv) == 4:
285 infolder = sys.argv[2]
286 outfolder = sys.argv[3]
291 folders = [os.path.join(infolder, f)
292 for f
in os.listdir(infolder)
if re.match(
r'r\d+', f)]
294 for f, prefix
in zip(folders, map(get_prefix, folders))]
295 demux =
Demuxer(replica_logs, outfolder, column, reverse=
True)
def items
iterate over all time steps
Manages information on a given simulation.
uses column to demux a replica trajectory.