11 def __init__(self, stepno, statline, header):
18 def add(self, ftype, category, *data):
20 self.dumps[category] = data[0]
22 self.trajs[category] = {
'ftype': data[0],
27 raise ValueError(
"unknown file type")
29 def get_stats_header(self):
44 """Manages information on a given simulation.
45 Assumes the existence of a _stats.txt file, and handles more files if
47 folder : the folder which contains _stats.txt
48 prefix : the stats file is supposed to be prefix+'_stats.txt'
51 def __init__(self, folder, prefix):
55 self.stats_file = os.path.join(folder, prefix +
'stats.txt')
56 if not os.path.isfile(self.stats_file):
57 raise ValueError(
'cannot find stats file %s' % self.stats_file)
60 for fl
in os.listdir(folder):
62 match = re.match(prefix +
r'(.*)', fl)
66 if tail ==
'stats.txt':
69 category = tail.split(
'_')[0]
70 if category
not in files:
72 files[category].append(tail)
76 for cat, fnames
in files.items():
78 or os.path.splitext(fnames[0].split(
'_')[-1])[0].isdigit():
80 if cat
not in self.dumpfiles:
81 self.dumpfiles[cat] = []
84 indexno = int(os.path.splitext(fname.split(
'_')[1])[0])
85 self.dumpfiles[cat].append((indexno, fname))
86 self.dumpfiles[cat] = dict(self.dumpfiles[cat])
88 if len(self.dumpfiles[cat]) != \
89 len(set(self.dumpfiles[cat].keys())):
90 raise ValueError(
"found duplicates in %s %s %s"
91 % (folder, prefix, fname))
95 ext = os.path.splitext(fname)[1]
96 if ext.startswith(
'.rmf'):
97 self.trajfiles[cat] = (ext[1:], fname)
99 raise ValueError(
"Unknown extension: %s in file %s"
102 def get_stats_header(self):
103 if not hasattr(self,
'stats_handle'):
104 self.stats_handle = open(self.stats_file)
107 self.stats_handle.readline()
108 self.stats_first_line = self.stats_handle.readline()
109 if self.stats_first_line.startswith(
'#'):
110 raise ValueError(
'stats file must be 1-line only')
111 self.stats_handle = open(self.stats_file)
112 self.stats_header = self.stats_handle.readline()
113 return self.stats_header
115 def get_first_stats_line(self):
117 self.get_stats_header()
118 return self.stats_first_line
120 def _get_next_stats(self):
122 self.get_stats_header()
123 for line
in self.stats_handle:
127 """iterate over all time steps"""
129 for stat
in self._get_next_stats():
131 stepno = int(stat.split()[1])
132 step = LogStep(stepno, stat, self.get_stats_header())
134 for cat, df
in self.dumpfiles.items():
136 fullpath = os.path.join(self.folder,
137 self.prefix + df[stepno])
138 step.add(
'dump', cat, fullpath)
139 for cat, tf
in self.trajfiles.items():
140 fullpath = os.path.join(self.folder,
142 step.add(
'traj', cat, tf[0], fullpath, stepno, tf[1])
149 """uses column to demux a replica trajectory. Assumes column points to a
150 float or integer type, which is allowed to change over time. Attribution is
151 based on order of float params. State 0 will be lowest param etc. Use
152 reverse=True to start with highest.
155 def __init__(self, logs, outfolder, column, reverse=False):
157 self.reverse = reverse
159 self.outfolder = outfolder
160 self.stat_handles = {}
161 self.traj_handles_in = {}
162 self.traj_handles_out = {}
165 if not os.path.isdir(outfolder):
167 for log
in range(len(self.logs)):
168 fname = os.path.join(outfolder,
'p%d' % log)
169 if not os.path.isdir(fname):
171 self.folders[log] = fname
173 h0 = self.logs[0].get_stats_header()
174 for log
in self.logs[1:]:
175 if h0 != log.get_stats_header():
176 raise ValueError(
"headers must be identical!")
178 tokens = [idx
for idx, i
in enumerate(h0.split())
if self.column
in i]
180 raise ValueError(
"column %d not found in this header\n%s"
182 elif len(tokens) > 1:
183 raise ValueError(
"column %d found multiple times!\n%s"
185 self.colno = tokens[0]
187 def get_param(self, statline):
188 return float(statline.split()[self.colno])
190 def _write_step_stats(self, stateno, lstep):
192 if stateno
not in self.stat_handles:
193 self.stat_handles[stateno] = open(
194 os.path.join(self.folders[stateno],
195 str(stateno) +
'_stats.txt'),
'w')
196 self.stat_handles[stateno].write(lstep.get_stats_header())
198 self.stat_handles[stateno].write(lstep.get_stats())
200 def _write_step_dump(self, stateno, lstep):
201 for cat, fname
in lstep.get_dumps().items():
202 shutil.copyfile(fname,
203 os.path.join(self.folders[stateno],
204 str(stateno) +
'_' + cat
205 + fname.split(cat)[1]))
207 def _write_traj_rmf(self, infile, instep, outfile, stateno, cat):
210 if infile
not in self.traj_handles_in:
211 src = RMF.open_rmf_file_read_only(infile)
212 self.traj_handles_in[infile] = src
213 src = self.traj_handles_in[infile]
215 if outfile
not in self.traj_handles_out:
216 dest = RMF.create_rmf_file(outfile)
217 self.traj_handles_out[outfile] = dest
218 RMF.clone_file_info(src, dest)
219 RMF.clone_hierarchy(src, dest)
220 RMF.clone_static_frame(src, dest)
221 dest = self.traj_handles_out[outfile]
223 frameid = src.get_frames()[instep - 1]
224 src.set_current_frame(frameid)
225 dest.add_frame(src.get_name(frameid), src.get_type(frameid))
226 RMF.clone_loaded_frame(src, dest)
228 def _write_step_traj(self, stateno, lstep):
230 for cat, data
in lstep.get_trajs().items():
231 destfile = os.path.join(self.outfolder,
'p' + str(stateno),
232 str(stateno) +
'_' + data[
'tail'])
233 if data[
'ftype'].startswith(
'rmf'):
234 self._write_traj_rmf(data[
'fullpath'], data[
'stepno'],
235 destfile, stateno, cat)
237 raise ValueError(
"unknown trajectory file type")
239 def _write_step(self, stateno, lstep):
240 self._write_step_stats(stateno, lstep)
241 self._write_step_dump(stateno, lstep)
242 self._write_step_traj(stateno, lstep)
246 log_iterators = [list(log.items())
for log
in self.logs]
247 print(
"Demuxing", len(log_iterators),
"replicas")
248 for idx, steps
in enumerate(zip(*log_iterators)):
249 if idx % 10 == 0
and idx > 0:
250 print(
"step", idx,
'\r', end=
' ')
253 params = [(self.get_param(i.get_stats()), i)
for i
in steps]
254 params.sort(reverse=self.reverse)
256 for i
in range(len(params)):
257 self._write_step(i, params[i][1])
261 def get_prefix(folder):
262 rval = [re.match(
r'(.*_)stats.txt', f)
for f
in os.listdir(folder)]
263 rval = [i
for i
in rval
if i]
265 raise ValueError(
"stats file not unique, found %d" % len(rval))
266 return rval[0].group(1)
269 if __name__ ==
'__main__':
270 if len(sys.argv) == 1
or len(sys.argv) > 4:
271 sys.exit(
"""demux_trajs.py column [infolder [outfolder]]
272 expects r?? folders in infolder and will write p?? folders in
273 outfolder. infolder must contain a _stats.txt file which will contain
274 a header. column must be a substring matching to one of the columns in
275 the _stats.txt files. It will typically be a temperature, or a state
276 number. That column will be used for demuxing. Folders are optional
277 and will be taken as ./ if not indicated.
280 if len(sys.argv) == 3:
281 infolder = sys.argv[2]
283 elif len(sys.argv) == 4:
284 infolder = sys.argv[2]
285 outfolder = sys.argv[3]
290 folders = [os.path.join(infolder, f)
291 for f
in os.listdir(infolder)
if re.match(
r'r\d+', f)]
293 for f, prefix
in zip(folders, map(get_prefix, folders))]
294 demux =
Demuxer(replica_logs, outfolder, column, reverse=
True)
def items
iterate over all time steps
Manages information on a given simulation.
uses column to demux a replica trajectory.