10 """Statistics gathering and printing class for ISD gibbs sampling.
11 Also manages the restart file (TODO).
12 - prefix: all outputted files will have this prefix
13 - rate: print statistics every so many gibbs sampling steps
14 - trajrate: print trajectories (pdb) every multiple of rate (default 1).
15 implies that update_coordinates() has been called by that time
16 otherwise writing will not occur.
17 - statfile: suffix of the statistics file
18 - num_entries_per_line: number of entries per line in the output. -1 to
20 - repeat_title: if 0 (default) only print it in the beginning. Else repeat
21 it every 'repeat_title' outputted lines in the statistics
23 - separate_lines: If False the entries are not separated (default).
24 If True, the lines are separated with stars.
25 - compress: If set to a positive number of steps, compress trajectories
26 each time so many steps have elapsed, appending the current
27 frame number to the filename. Only works in append mode, and
28 when it is set to a multiple of rate.
30 TODO: check if everything was updated nicely
33 def __init__(self, prefix='r01', rate=1, trajrate=1, statfile='_stats.txt',
34 num_entries_per_line=5, repeat_title=0,
35 separate_lines=
False, compress=10000):
38 self.trajrate = trajrate
39 self.statfile = prefix + statfile
40 self.compress = compress
46 self.__counter_pos = 0
54 self.write_title =
True
55 if num_entries_per_line == 0
or num_entries_per_line < -1:
56 raise ValueError(
"number of entries per line is >0 or equal to -1")
57 if num_entries_per_line == -1:
58 self.wrap_stats =
False
60 self.wrap_stats =
True
61 self.num_entries_per_line = num_entries_per_line
62 self.add_numbers_to_titles =
True
64 self.repeat_title = repeat_title
65 self.separate_lines = separate_lines
66 self.comment_marker =
'#'
68 def _get_unique_category_name(self, name):
70 if name
in self.categories.keys():
74 ncat =
''.join([name,
'%d' % i])
75 if ncat
not in self.categories.keys():
84 if ncat
not in self.categories.keys():
89 """creates a logging entry for a simulation substep of the gibbs
90 sampler. Each category has its own counter, initialized to zero.
91 The global category does not need to be created, it's already created
92 by the init method, and its key is 'global'.
93 - name: an optional name, must be string.
94 Returns: a unique key to refer to this category, which will start with
97 ncat = self._get_unique_category_name(name)
98 self.categories[ncat] = {
'counter': Entry(
'step',
'%10d', 0)}
101 def _append_to_stats(self, name, entry):
102 """append to stats, or put in front if entry is a counter"""
103 if name ==
'counter':
104 self.entries.insert(self.__counter_pos, entry)
105 self.__counter_pos += 1
107 self.entries.append(entry)
110 """add an entry for the statistics file
111 - key: which category it belongs to (key returned by add_category)
112 You must specify at least one of the two following:
113 - name: a name for this entry
114 - entry: an instance of the Entry class.
115 Arguments: - name only: must already have an entry by that name.
116 - entry only: name is set to the entry title and added. If
117 it didn't exist before it is stored as well.
118 - name and entry: name is used instead of the title.
119 - nothing: raises an error.
120 Currently, not providing entry only makes sense for the counter since
121 there is no method to create an entry without adding it to the
124 if not entry
and not name:
125 raise ValueError(
"Should specify at least one of name or entry")
128 name = entry.get_title()
129 self._append_to_stats(name, entry)
130 self.categories[key][name] = entry
132 if name
not in self.categories[key]:
133 raise ValueError(
"entry %s:%s does not exist!" % (key, name))
134 self._append_to_stats(name, self.categories[key][name])
137 """updates an entry and change its value to value"""
138 if key
not in self.categories:
139 raise ValueError(
"unknown category: %s" % key)
140 if name
not in self.categories[key]:
141 raise ValueError(
"unknown entry %s:%s" % (key, name))
142 self.categories[key][name].set_value(value)
145 extension=
'pdb', hierarchies=
None, restraints=
None):
146 """adds a placeholder for coordinates
148 will write the whole system as provided, in rmf3 format
149 - hierarchies must contain protein hierarchies
150 - restraints is a list of restraints
152 will write provided data as-is
153 - append: whether to append to a trajectory or to write multiple
154 files. With this format, a trajectory is just a string, you can
155 stuff whatever you want in it. If append is False, files will be
156 numbered according to the counter of their category.
157 - extension: the file extension to use
159 if key
not in self.categories:
160 raise ValueError(
"unknown category: %s" % key)
161 self.categories[key][name] =
None
163 self.coordinates.append((key, name,
'raw', (append, extension)))
164 elif format ==
'rmf3':
167 assert hierarchies
is not None
168 rh = RMF.create_rmf_file(self.prefix +
'_' + name +
'_traj.rmf3')
172 self.coordinates.append((key, name,
'rmf3', rh))
174 raise ValueError(
"format can only be rmf3 or raw")
177 """updates the coordinates of key:name entry. Format should match with
178 the format specified at init time (raw or rmf3)
179 note that setting value to None is equivalent to not calling this
182 if key
not in self.categories:
183 raise ValueError(
"unknown category: %s" % key)
184 if name
not in self.categories[key]:
185 raise ValueError(
"unknown coordinates %s:%s" % (key, name))
186 self.categories[key][name] = value
189 """increments the counter of category 'key' by 'value' steps."""
190 if key
not in self.categories:
191 raise ValueError(
"unknown category: %s" % key)
192 cnt = self.categories[key][
'counter']
193 cnt.set_value(cnt.get_raw_value() + value)
195 def get_entry_category(self, entry):
197 for cat
in self.categories:
198 if entry
in self.categories[cat].values():
201 def format_titles(self):
203 for (i, entry)
in enumerate(self.entries):
204 if self.add_numbers_to_titles:
205 if self.num_entries_per_line > 0:
206 title =
'%d:' % ((i % self.num_entries_per_line) + 1)
208 title =
'%d:' % (i + 1)
211 cat = self.get_entry_category(entry)
212 ti = entry.get_title()
213 title +=
'%s:%s' % (cat, ti)
217 def get_formatted_entries(self):
218 return [ent.get_value()
for ent
in self.entries]
220 def should_wrap_line(self, pos, line):
222 num = self.num_entries_per_line
223 if pos % num == num - 1
and pos != len(line) - 1:
227 def prepare_line(self, line, marker='L'):
229 out += self.separator
230 for i, tok
in enumerate(line):
232 ln = 2 + (i / self.num_entries_per_line)
233 if self.should_wrap_line(i, line):
234 out +=
'\n%s%d' % (marker, ln)
235 out += self.separator
237 if not self.should_wrap_line(i, line):
241 def compress_file(self, fname):
242 gz = gzip.open(fname +
'.gz',
'wb')
243 fl = open(fname,
'rb')
247 os.system(
'rm %s' % fname)
249 def new_stage(self, name):
250 fl = open(self.statfile,
'a')
251 fl.write(
"### STAGE %s\n" % name)
255 """Writes statistics to the stats file and writes/appends
256 trajectories. Only does that if the global step matches
257 the output rate. Trajectories are written more sparsely, see trajrate.
258 Returns: True if data was written, False if not.
260 stepno = self.categories[
'global'][
'counter'].get_raw_value()
261 if stepno % self.rate != 0:
264 fl = open(self.statfile,
'a')
267 self.write_title =
False
268 titles = self.format_titles()
269 fl.write(self.prepare_line(titles, marker=self.comment_marker))
270 elif self.repeat_title > 0:
271 if (stepno / self.rate) % self.repeat_title == 0:
272 self.write_title =
True
274 entries = self.get_formatted_entries()
275 fl.write(self.prepare_line(entries))
276 if self.separate_lines:
277 fl.write(
'*' * 80 +
'\n')
280 if stepno % (self.rate * self.trajrate) != 0:
282 for key, name, format, args
in self.coordinates:
283 if self.categories[key][name]
is None:
286 do_append, extension = args
288 pdbname = self.prefix +
'_traj.' + extension
289 if self.compress > 0
and stepno % self.compress == 0:
290 newname =
"%s_traj_%d.%s" % (
294 os.system(
'mv %s %s' % (pdbname, newname))
295 self.compress_file(newname)
296 fl = open(pdbname,
'a')
298 num = self.categories[key][
'counter'].get_raw_value()
300 self.prefix + (
'_%s_%010d.%s' %
301 (name, num, extension)),
'w')
302 fl.write(self.categories[key][name])
304 elif format ==
'rmf3':
310 self.categories[key][name] =
None
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
Classes to handle ISD statistics files.
def increment_counter
increments the counter of category 'key' by 'value' steps.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
def update
updates an entry and change its value to value
def write_stats
Writes statistics to the stats file and writes/appends trajectories.
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
def add_category
creates a logging entry for a simulation substep of the gibbs sampler.
def update_coordinates
updates the coordinates of key:name entry.
Support for the RMF file format for storing hierarchical molecular data and markup.
def add_coordinates
adds a placeholder for coordinates
def add_entry
add an entry for the statistics file