3 from __future__
import print_function
12 def __init__(self, stepno, statline, header):
19 def add(self, ftype, category, *data):
21 self.dumps[category] = data[0]
23 self.trajs[category] = {
'ftype': data[0],
28 raise ValueError(
"unknown file type")
30 def get_stats_header(self):
45 """Manages information on a given simulation.
46 Assumes the existence of a _stats.txt file, and handles more files if
48 folder : the folder which contains _stats.txt
49 prefix : the stats file is supposed to be prefix+'_stats.txt'
52 def __init__(self, folder, prefix):
56 self.stats_file = os.path.join(folder, prefix +
'stats.txt')
57 if not os.path.isfile(self.stats_file):
58 raise ValueError(
'cannot find stats file %s' % self.stats_file)
61 for fl
in os.listdir(folder):
63 match = re.match(prefix +
r'(.*)', fl)
67 if tail ==
'stats.txt':
70 category = tail.split(
'_')[0]
71 if not category
in files:
73 files[category].append(tail)
77 for cat, fnames
in files.items():
79 or os.path.splitext(fnames[0].split(
'_')[-1])[0].isdigit():
81 if not cat
in self.dumpfiles:
82 self.dumpfiles[cat] = []
85 indexno = int(os.path.splitext(fname.split(
'_')[1])[0])
86 self.dumpfiles[cat].append((indexno, fname))
87 self.dumpfiles[cat] = dict(self.dumpfiles[cat])
89 if len(self.dumpfiles[cat]) != \
90 len(set(self.dumpfiles[cat].keys())):
91 raise ValueError(
"found duplicates in %s %s %s"
92 % (folder, prefix, fname))
96 ext = os.path.splitext(fname)[1]
97 if ext.startswith(
'.rmf'):
98 self.trajfiles[cat] = (ext[1:], fname)
100 raise ValueError(
"Unknown extension: %s in file %s"
103 def get_stats_header(self):
104 if not hasattr(self,
'stats_handle'):
105 self.stats_handle = open(self.stats_file)
108 self.stats_handle.readline()
109 self.stats_first_line = self.stats_handle.readline()
110 if self.stats_first_line.startswith(
'#'):
111 raise ValueError(
'stats file must be 1-line only')
112 self.stats_handle = open(self.stats_file)
113 self.stats_header = self.stats_handle.readline()
114 return self.stats_header
116 def get_first_stats_line(self):
118 self.get_stats_header()
119 return self.stats_first_line
121 def _get_next_stats(self):
123 self.get_stats_header()
124 for line
in self.stats_handle:
128 """iterate over all time steps"""
130 for stat
in self._get_next_stats():
132 stepno = int(stat.split()[1])
133 step = LogStep(stepno, stat, self.get_stats_header())
135 for cat, df
in self.dumpfiles.items():
137 fullpath = os.path.join(self.folder,
138 self.prefix + df[stepno])
139 step.add(
'dump', cat, fullpath)
140 for cat, tf
in self.trajfiles.items():
141 fullpath = os.path.join(self.folder,
143 step.add(
'traj', cat, tf[0], fullpath, stepno, tf[1])
150 """uses column to demux a replica trajectory. Assumes column points to a
151 float or integer type, which is allowed to change over time. Attribution is
152 based on order of float params. State 0 will be lowest param etc. Use
153 reverse=True to start with highest.
156 def __init__(self, logs, outfolder, column, reverse=False):
158 self.reverse = reverse
160 self.outfolder = outfolder
161 self.stat_handles = {}
162 self.traj_handles_in = {}
163 self.traj_handles_out = {}
166 if not os.path.isdir(outfolder):
168 for l
in range(len(self.logs)):
169 fname = os.path.join(outfolder,
'p%d' % l)
170 if not os.path.isdir(fname):
172 self.folders[l] = fname
174 h0 = self.logs[0].get_stats_header()
175 for log
in self.logs[1:]:
176 if h0 != log.get_stats_header():
177 raise ValueError(
"headers must be identical!")
179 tokens = [idx
for idx, i
in enumerate(h0.split())
if self.column
in i]
181 raise ValueError(
"column %d not found in this header\n%s"
183 elif len(tokens) > 1:
184 raise ValueError(
"column %d found multiple times!\n%s"
186 self.colno = tokens[0]
188 def get_param(self, statline):
189 return float(statline.split()[self.colno])
191 def _write_step_stats(self, stateno, lstep):
193 if stateno
not in self.stat_handles:
194 self.stat_handles[stateno] = open(
195 os.path.join(self.folders[stateno],
196 str(stateno) +
'_stats.txt'),
'w')
197 self.stat_handles[stateno].write(lstep.get_stats_header())
199 self.stat_handles[stateno].write(lstep.get_stats())
201 def _write_step_dump(self, stateno, lstep):
202 for cat, fname
in lstep.get_dumps().items():
203 shutil.copyfile(fname, os.path.join(self.folders[stateno],
204 str(stateno) +
'_' + cat + fname.split(cat)[1]))
206 def _write_traj_rmf(self, infile, instep, outfile, stateno, cat):
209 if infile
not in self.traj_handles_in:
210 src = RMF.open_rmf_file_read_only(infile)
211 self.traj_handles_in[infile] = src
212 src = self.traj_handles_in[infile]
214 if outfile
not in self.traj_handles_out:
215 dest = RMF.create_rmf_file(outfile)
216 self.traj_handles_out[outfile] = dest
217 RMF.clone_file_info(src, dest)
218 RMF.clone_hierarchy(src, dest)
219 RMF.clone_static_frame(src, dest)
220 dest = self.traj_handles_out[outfile]
222 frameid = src.get_frames()[instep - 1]
223 src.set_current_frame(frameid)
224 dest.add_frame(src.get_name(frameid), src.get_type(frameid))
225 RMF.clone_loaded_frame(src, dest)
227 def _write_step_traj(self, stateno, lstep):
229 for cat, data
in lstep.get_trajs().items():
230 destfile = os.path.join(self.outfolder,
'p' + str(stateno),
231 str(stateno) +
'_' + data[
'tail'])
232 if data[
'ftype'].startswith(
'rmf'):
233 self._write_traj_rmf(data[
'fullpath'], data[
'stepno'],
234 destfile, stateno, cat)
236 raise ValueError(
"unknown trajectory file type")
238 def _write_step(self, stateno, lstep):
239 self._write_step_stats(stateno, lstep)
240 self._write_step_dump(stateno, lstep)
241 self._write_step_traj(stateno, lstep)
245 log_iterators = [list(l.items())
for l
in self.logs]
246 print(
"Demuxing", len(log_iterators),
"replicas")
247 for idx, steps
in enumerate(zip(*log_iterators)):
248 if idx % 10 == 0
and idx > 0:
249 print(
"step", idx,
'\r', end=
' ')
252 params = [(self.get_param(i.get_stats()), i)
for i
in steps]
253 params.sort(reverse=self.reverse)
255 for i
in range(len(params)):
256 self._write_step(i, params[i][1])
260 def get_prefix(folder):
261 rval = [re.match(
r'(.*_)stats.txt', f)
for f
in os.listdir(folder)]
262 rval = [i
for i
in rval
if i]
264 raise ValueError(
"stats file not unique, found %d" % len(rval))
265 return rval[0].group(1)
267 if __name__ ==
'__main__':
268 if len(sys.argv) == 1
or len(sys.argv) > 4:
269 sys.exit(
"""demux_trajs.py column [infolder [outfolder]]
270 expects r?? folders in infolder and will write p?? folders in outfolder.
271 infolder must contain a _stats.txt file which will contain a header.
272 column must be a substring matching to one of the columns in the
273 _stats.txt files. It will typically be a temperature, or a state number.
274 That column will be used for demuxing. Folders are optional and will be
275 taken as ./ if not indicated.
278 if len(sys.argv) == 3:
279 infolder = sys.argv[2]
281 elif len(sys.argv) == 4:
282 infolder = sys.argv[2]
283 outfolder = sys.argv[3]
288 folders = [os.path.join(infolder, f)
289 for f
in os.listdir(infolder)
if re.match(
r'r\d+', f)]
291 for f, prefix
in zip(folders, map(get_prefix, folders))]
292 demux =
Demuxer(replica_logs, outfolder, column, reverse=
True)
def items
iterate over all time steps
Manages information on a given simulation.
uses column to demux a replica trajectory.