3 from __future__
import print_function
11 """Statistics gathering and printing class for ISD gibbs sampling.
12 Also manages the restart file (TODO).
13 - prefix: all outputted files will have this prefix
14 - rate: print statistics every so many gibbs sampling steps
15 - trajrate: print trajectories (pdb) every multiple of rate (default 1).
16 implies that update_coordinates() has been called by that time
17 otherwise writing will not occur.
18 - statfile: suffix of the statistics file
19 - num_entries_per_line: number of entries per line in the output. -1 to
21 - repeat_title: if 0 (default) only print it in the beginning. Else repeat
22 it every 'repeat_title' outputted lines in the statistics
24 - separate_lines: If False the entries are not separated (default).
25 If True, the lines are separated with stars.
26 - compress: If set to a positive number of steps, compress trajectories
27 each time so many steps have elapsed, appending the current
28 frame number to the filename. Only works in append mode, and
29 when it is set to a multiple of rate.
31 TODO: check if everything was updated nicely
34 def __init__(self, prefix='r01', rate=1, trajrate=1, statfile='_stats.txt',
35 num_entries_per_line=5, repeat_title=0,
36 separate_lines=
False, compress=10000):
39 self.trajrate = trajrate
40 self.statfile = prefix + statfile
41 self.compress = compress
47 self.__counter_pos = 0
55 self.write_title =
True
56 if num_entries_per_line == 0
or num_entries_per_line < -1:
57 raise ValueError(
"number of entries per line is >0 or equal to -1")
58 if num_entries_per_line == -1:
59 self.wrap_stats =
False
61 self.wrap_stats =
True
62 self.num_entries_per_line = num_entries_per_line
63 self.add_numbers_to_titles =
True
65 self.repeat_title = repeat_title
66 self.separate_lines = separate_lines
67 self.comment_marker =
'#'
69 def _get_unique_category_name(self, name):
71 if name
in self.categories.keys():
75 ncat =
''.join([name,
'%d' % i])
76 if ncat
not in self.categories.keys():
85 if ncat
not in self.categories.keys():
90 """creates a logging entry for a simulation substep of the gibbs
91 sampler. Each category has its own counter, initialized to zero.
92 The global category does not need to be created, it's already created
93 by the init method, and its key is 'global'.
94 - name: an optional name, must be string.
95 Returns: a unique key to refer to this category, which will start with
98 ncat = self._get_unique_category_name(name)
99 self.categories[ncat] = {
'counter': Entry(
'step',
'%10d', 0)}
102 def _append_to_stats(self, name, entry):
103 """append to stats, or put in front if entry is a counter"""
104 if name ==
'counter':
105 self.entries.insert(self.__counter_pos, entry)
106 self.__counter_pos += 1
108 self.entries.append(entry)
111 """add an entry for the statistics file
112 - key: which category it belongs to (key returned by add_category)
113 You must specify at least one of the two following:
114 - name: a name for this entry
115 - entry: an instance of the Entry class.
116 Arguments: - name only: must already have an entry by that name.
117 - entry only: name is set to the entry title and added. If
118 it didn't exist before it is stored as well.
119 - name and entry: name is used instead of the title.
120 - nothing: raises an error.
121 Currently, not providing entry only makes sense for the counter since
122 there is no method to create an entry without adding it to the
125 if not entry
and not name:
126 raise ValueError(
"Should specify at least one of name or entry")
129 name = entry.get_title()
130 self._append_to_stats(name, entry)
131 self.categories[key][name] = entry
133 if name
not in self.categories[key]:
134 raise ValueError(
"entry %s:%s does not exist!" % (key, name))
135 self._append_to_stats(name, self.categories[key][name])
138 """updates an entry and change its value to value"""
139 if key
not in self.categories:
140 raise ValueError(
"unknown category: %s" % key)
141 if name
not in self.categories[key]:
142 raise ValueError(
"unknown entry %s:%s" % (key, name))
143 self.categories[key][name].set_value(value)
146 extension=
'pdb', hierarchies=
None, restraints=
None):
147 """adds a placeholder for coordinates
149 will write the whole system as provided, in rmf3 format
150 - hierarchies must contain protein hierarchies
151 - restraints is a list of restraints
153 will write provided data as-is
154 - append: whether to append to a trajectory or to write multiple
155 files. With this format, a trajectory is just a string, you can
156 stuff whatever you want in it. If append is False, files will be
157 numbered according to the counter of their category.
158 - extension: the file extension to use
160 if key
not in self.categories:
161 raise ValueError(
"unknown category: %s" % key)
162 self.categories[key][name] =
None
164 self.coordinates.append((key, name,
'raw', (append, extension)))
165 elif format ==
'rmf3':
168 assert hierarchies
is not None
169 rh = RMF.create_rmf_file(self.prefix +
'_' + name +
'_traj.rmf3')
173 self.coordinates.append((key, name,
'rmf3', rh))
175 raise ValueError(
"format can only be rmf3 or raw")
178 """updates the coordinates of key:name entry. Format should match with
179 the format specified at init time (raw or rmf3)
180 note that setting value to None is equivalent to not calling this
183 if key
not in self.categories:
184 raise ValueError(
"unknown category: %s" % key)
185 if name
not in self.categories[key]:
186 raise ValueError(
"unknown coordinates %s:%s" % (key, name))
187 self.categories[key][name] = value
190 """increments the counter of category 'key' by 'value' steps."""
191 if key
not in self.categories:
192 raise ValueError(
"unknown category: %s" % key)
193 cnt = self.categories[key][
'counter']
194 cnt.set_value(cnt.get_raw_value() + value)
196 def get_entry_category(self, entry):
198 for cat
in self.categories:
199 if entry
in self.categories[cat].values():
202 def format_titles(self):
204 for (i, entry)
in enumerate(self.entries):
205 if self.add_numbers_to_titles:
206 if self.num_entries_per_line > 0:
207 title =
'%d:' % ((i % self.num_entries_per_line) + 1)
209 title =
'%d:' % (i + 1)
212 cat = self.get_entry_category(entry)
213 ti = entry.get_title()
214 title +=
'%s:%s' % (cat, ti)
218 def get_formatted_entries(self):
219 return [ent.get_value()
for ent
in self.entries]
221 def should_wrap_line(self, pos, line):
223 num = self.num_entries_per_line
224 if pos % num == num - 1
and pos != len(line) - 1:
228 def prepare_line(self, line, marker='L'):
230 out += self.separator
231 for i, tok
in enumerate(line):
233 ln = 2 + (i / self.num_entries_per_line)
234 if self.should_wrap_line(i, line):
235 out +=
'\n%s%d' % (marker, ln)
236 out += self.separator
238 if not self.should_wrap_line(i, line):
242 def compress_file(self, fname):
243 gz = gzip.open(fname +
'.gz',
'wb')
244 fl = open(fname,
'rb')
248 os.system(
'rm %s' % fname)
250 def new_stage(self, name):
251 fl = open(self.statfile,
'a')
252 fl.write(
"### STAGE %s\n" % name)
256 """Writes statistics to the stats file and writes/appends
257 trajectories. Only does that if the global step matches
258 the output rate. Trajectories are written more sparsely, see trajrate.
259 Returns: True if data was written, False if not.
261 stepno = self.categories[
'global'][
'counter'].get_raw_value()
262 if stepno % self.rate != 0:
265 fl = open(self.statfile,
'a')
268 self.write_title =
False
269 titles = self.format_titles()
270 fl.write(self.prepare_line(titles, marker=self.comment_marker))
271 elif self.repeat_title > 0:
272 if (stepno / self.rate) % self.repeat_title == 0:
273 self.write_title =
True
275 entries = self.get_formatted_entries()
276 fl.write(self.prepare_line(entries))
277 if self.separate_lines:
278 fl.write(
'*' * 80 +
'\n')
281 if stepno % (self.rate * self.trajrate) != 0:
283 for key, name, format, args
in self.coordinates:
284 if self.categories[key][name]
is None:
287 do_append, extension = args
289 pdbname = self.prefix +
'_traj.' + extension
290 if self.compress > 0
and stepno % self.compress == 0:
291 newname =
"%s_traj_%d.%s" % (
295 os.system(
'mv %s %s' % (pdbname, newname))
296 self.compress_file(newname)
297 fl = open(pdbname,
'a')
299 num = self.categories[key][
'counter'].get_raw_value()
301 self.prefix + (
'_%s_%010d.%s' %
302 (name, num, extension)),
'w')
303 fl.write(self.categories[key][name])
305 elif format ==
'rmf3':
311 self.categories[key][name] =
None
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
Classes to handle ISD statistics files.
def increment_counter
increments the counter of category 'key' by 'value' steps.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
def update
updates an entry and change its value to value
def write_stats
Writes statistics to the stats file and writes/appends trajectories.
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
def add_category
creates a logging entry for a simulation substep of the gibbs sampler.
def update_coordinates
updates the coordinates of key:name entry.
Support for the RMF file format for storing hierarchical molecular data and markup.
def add_coordinates
adds a placeholder for coordinates
def add_entry
add an entry for the statistics file