11 def __init__(self, stepno, statline, header):
18 def add(self, ftype, category, *data):
20 self.dumps[category] = data[0]
22 self.trajs[category] = {
'ftype': data[0],
27 raise ValueError(
"unknown file type")
29 def get_stats_header(self):
44 """Manages information on a given simulation.
45 Assumes the existence of a _stats.txt file, and handles more files if
47 folder : the folder which contains _stats.txt
48 prefix : the stats file is supposed to be prefix+'_stats.txt'
51 def __init__(self, folder, prefix):
55 self.stats_file = os.path.join(folder, prefix +
'stats.txt')
56 if not os.path.isfile(self.stats_file):
57 raise ValueError(
'cannot find stats file %s' % self.stats_file)
60 for fl
in os.listdir(folder):
62 match = re.match(prefix +
r'(.*)', fl)
66 if tail ==
'stats.txt':
69 category = tail.split(
'_')[0]
70 if not category
in files:
72 files[category].append(tail)
76 for cat, fnames
in files.iteritems():
78 or os.path.splitext(fnames[0].split(
'_')[-1])[0].isdigit():
80 if not cat
in self.dumpfiles:
81 self.dumpfiles[cat] = []
84 indexno = int(os.path.splitext(fname.split(
'_')[1])[0])
85 self.dumpfiles[cat].append((indexno, fname))
86 self.dumpfiles[cat] = dict(self.dumpfiles[cat])
88 if len(self.dumpfiles[cat]) != \
89 len(set(self.dumpfiles[cat].keys())):
90 raise ValueError(
"found duplicates in %s %s %s"
91 % (folder, prefix, fname))
95 ext = os.path.splitext(fname)[1]
96 if ext.startswith(
'.rmf'):
97 self.trajfiles[cat] = (ext[1:], fname)
99 raise ValueError(
"Unknown extension: %s in file %s"
102 def get_stats_header(self):
103 if not hasattr(self,
'stats_handle'):
104 self.stats_handle = open(self.stats_file)
107 self.stats_handle.readline()
108 self.stats_first_line = self.stats_handle.readline()
109 if self.stats_first_line.startswith(
'#'):
110 raise ValueError(
'stats file must be 1-line only')
111 self.stats_handle = open(self.stats_file)
112 self.stats_header = self.stats_handle.readline()
113 return self.stats_header
115 def get_first_stats_line(self):
117 self.get_stats_header()
118 return self.stats_first_line
120 def _get_next_stats(self):
122 self.get_stats_header()
123 for line
in self.stats_handle:
127 """iterate over all time steps"""
129 for stat
in self._get_next_stats():
131 stepno = int(stat.split()[1])
132 step = LogStep(stepno, stat, self.get_stats_header())
134 for cat, df
in self.dumpfiles.iteritems():
136 fullpath = os.path.join(self.folder,
137 self.prefix + df[stepno])
138 step.add(
'dump', cat, fullpath)
139 for cat, tf
in self.trajfiles.iteritems():
140 fullpath = os.path.join(self.folder,
142 step.add(
'traj', cat, tf[0], fullpath, stepno, tf[1])
149 """uses column to demux a replica trajectory. Assumes column points to a
150 float or integer type, which is allowed to change over time. Attribution is
151 based on order of float params. State 0 will be lowest param etc. Use
152 reverse=True to start with highest.
155 def __init__(self, logs, outfolder, column, reverse=False):
157 self.reverse = reverse
159 self.outfolder = outfolder
160 self.stat_handles = {}
161 self.traj_handles_in = {}
162 self.traj_handles_out = {}
165 if not os.path.isdir(outfolder):
167 for l
in xrange(len(self.logs)):
168 fname = os.path.join(outfolder,
'p%d' % l)
169 if not os.path.isdir(fname):
171 self.folders[l] = fname
173 h0 = self.logs[0].get_stats_header()
174 for log
in self.logs[1:]:
175 if h0 != log.get_stats_header():
176 raise "headers must be identical!"
178 tokens = [idx
for idx, i
in enumerate(h0.split())
if self.column
in i]
180 raise ValueError(
"column %d not found in this header\n%s"
182 elif len(tokens) > 1:
183 raise ValueError(
"column %d found multiple times!\n%s"
185 self.colno = tokens[0]
187 def get_param(self, statline):
188 return float(statline.split()[self.colno])
190 def _write_step_stats(self, stateno, lstep):
192 if stateno
not in self.stat_handles:
193 self.stat_handles[stateno] = open(
194 os.path.join(self.folders[stateno],
195 str(stateno) +
'_stats.txt'),
'w')
196 self.stat_handles[stateno].write(lstep.get_stats_header())
198 self.stat_handles[stateno].write(lstep.get_stats())
200 def _write_step_dump(self, stateno, lstep):
201 for cat, fname
in lstep.get_dumps().iteritems():
202 shutil.copyfile(fname, os.path.join(self.folders[stateno],
203 str(stateno) +
'_' + cat + fname.split(cat)[1]))
205 def _write_traj_rmf(self, infile, instep, outfile, stateno, cat):
208 if infile
not in self.traj_handles_in:
209 src = RMF.open_rmf_file_read_only(infile)
210 self.traj_handles_in[infile] = src
211 src = self.traj_handles_in[infile]
213 if outfile
not in self.traj_handles_out:
214 dest = RMF.create_rmf_file(outfile)
215 self.traj_handles_out[outfile] = dest
216 RMF.clone_file_info(src, dest)
217 RMF.clone_hierarchy(src, dest)
218 RMF.clone_static_frame(src, dest)
219 dest = self.traj_handles_out[outfile]
221 frameid = src.get_frames()[instep - 1]
222 src.set_current_frame(frameid)
223 dest.add_frame(src.get_name(frameid), src.get_type(frameid))
224 RMF.clone_loaded_frame(src, dest)
226 def _write_step_traj(self, stateno, lstep):
228 for cat, data
in lstep.get_trajs().iteritems():
229 destfile = os.path.join(self.outfolder,
'p' + str(stateno),
230 str(stateno) +
'_' + data[
'tail'])
231 if data[
'ftype'].startswith(
'rmf'):
232 self._write_traj_rmf(data[
'fullpath'], data[
'stepno'],
233 destfile, stateno, cat)
235 raise ValueError(
"unknown trajectory file type")
237 def _write_step(self, stateno, lstep):
238 self._write_step_stats(stateno, lstep)
239 self._write_step_dump(stateno, lstep)
240 self._write_step_traj(stateno, lstep)
244 log_iterators = [l.items()
for l
in self.logs]
245 print "Demuxing", len(log_iterators),
"replicas"
246 for idx, steps
in enumerate(zip(*log_iterators)):
247 if idx % 10 == 0
and idx > 0:
248 print "step", idx,
'\r',
251 params = [(self.get_param(i.get_stats()), i)
for i
in steps]
252 params.sort(reverse=self.reverse)
254 for i
in xrange(len(params)):
255 self._write_step(i, params[i][1])
259 def get_prefix(folder):
260 rval = [re.match(
r'(.*_)stats.txt', f)
for f
in os.listdir(folder)]
261 rval = [i
for i
in rval
if i]
263 raise ValueError(
"stats file not unique, found %d" % len(rval))
264 return rval[0].group(1)
266 if __name__ ==
'__main__':
267 if len(sys.argv) == 1
or len(sys.argv) > 4:
268 sys.exit(
"""demux_trajs.py column [infolder [outfolder]]
269 expects r?? folders in infolder and will write p?? folders in outfolder.
270 infolder must contain a _stats.txt file which will contain a header.
271 column must be a substring matching to one of the columns in the
272 _stats.txt files. It will typically be a temperature, or a state number.
273 That column will be used for demuxing. Folders are optional and will be
274 taken as ./ if not indicated.
277 if len(sys.argv) == 3:
278 infolder = sys.argv[2]
280 elif len(sys.argv) == 4:
281 infolder = sys.argv[2]
282 outfolder = sys.argv[3]
287 folders = [os.path.join(infolder, f)
288 for f
in os.listdir(infolder)
if re.match(
r'r\d+', f)]
290 for f, prefix
in zip(folders, map(get_prefix, folders))]
291 demux =
Demuxer(replica_logs, outfolder, column, reverse=
True)
def items
iterate over all time steps
Manages information on a given simulation.
uses column to demux a replica trajectory.