10 """Statistics gathering and printing class for ISD gibbs sampling.
11 Also manages the restart file (TODO).
12 - prefix: all outputted files will have this prefix
13 - rate: print statistics every so many gibbs sampling steps
14 - trajrate: print trajectories (pdb) every multiple of rate (default 1).
15 implies that update_coordinates() has been called by that time
16 otherwise writing will not occur.
17 - statfile: suffix of the statistics file
18 - num_entries_per_line: number of entries per line in the output. -1 to
20 - repeat_title: if 0 (default) only print it in the beginning. Else repeat
21 it every 'repeat_title' outputted lines in the statistics file.
22 - separate_lines: If False the entries are not separated (default). If True,
23 the lines are separated with stars.
24 - compress: If set to a positive number of steps, compress trajectories each
25 time so many steps have elapsed, appending the current frame
26 number to the filename. Only works in append mode, and when it
27 is set to a multiple of rate.
29 TODO: check if everything was updated nicely
32 def __init__(self, prefix='r01', rate=1, trajrate=1, statfile='_stats.txt',
33 num_entries_per_line=5, repeat_title=0,
34 separate_lines=
False, compress=10000):
37 self.trajrate = trajrate
38 self.statfile = prefix + statfile
39 self.compress = compress
45 self.__counter_pos = 0
53 self.write_title =
True
54 if num_entries_per_line == 0
or num_entries_per_line < -1:
55 raise ValueError(
"number of entries per line is >0 or equal to -1")
56 if num_entries_per_line == -1:
57 self.wrap_stats =
False
59 self.wrap_stats =
True
60 self.num_entries_per_line = num_entries_per_line
61 self.add_numbers_to_titles =
True
63 self.repeat_title = repeat_title
64 self.separate_lines = separate_lines
65 self.comment_marker =
'#'
67 def _get_unique_category_name(self, name):
69 if name
in self.categories.keys():
73 ncat =
''.join([name,
'%d' % i])
74 if ncat
not in self.categories.keys():
83 if ncat
not in self.categories.keys():
88 """creates a logging entry for a simulation substep of the gibbs
89 sampler. Each category has its own counter, initialized to zero.
90 The global category does not need to be created, it's already created by
91 the init method, and its key is 'global'.
92 - name: an optional name, must be string.
93 Returns: a unique key to refer to this category, which will start with
96 ncat = self._get_unique_category_name(name)
97 self.categories[ncat] = {
'counter': Entry(
'step',
'%10d', 0)}
100 def _append_to_stats(self, name, entry):
101 """append to stats, or put in front if entry is a counter"""
102 if name ==
'counter':
103 self.entries.insert(self.__counter_pos, entry)
104 self.__counter_pos += 1
106 self.entries.append(entry)
109 """add an entry for the statistics file
110 - key: which category it belongs to (key returned by add_category)
111 You must specify at least one of the two following:
112 - name: a name for this entry
113 - entry: an instance of the Entry class.
114 Arguments: - name only: must already have an entry by that name.
115 - entry only: name is set to the entry title and added. If
116 it didn't exist before it is stored as well.
117 - name and entry: name is used instead of the title.
118 - nothing: raises an error.
119 Currently, not providing entry only makes sense for the counter since
120 there is no method to create an entry without adding it to the
123 if not entry
and not name:
124 raise ValueError(
"Should specify at least one of name or entry")
127 name = entry.get_title()
128 self._append_to_stats(name, entry)
129 self.categories[key][name] = entry
131 if not name
in self.categories[key]:
132 raise ValueError(
"entry %s:%s does not exist!" % (key, name))
133 self._append_to_stats(name, self.categories[key][name])
136 """updates an entry and change its value to value"""
137 if not key
in self.categories:
138 raise ValueError(
"unknown category: %s" % key)
139 if not name
in self.categories[key]:
140 raise ValueError(
"unknown entry %s:%s" % (key, name))
141 self.categories[key][name].set_value(value)
144 extension=
'pdb', hierarchies=
None, restraints=
None):
145 """adds a placeholder for coordinates
147 will write the whole system as provided, in rmf3 format
148 - hierarchies must contain protein hierarchies
149 - restraints is a list of restraints
151 will write provided data as-is
152 - append: whether to append to a trajectory or to write multiple
153 files. With this format, a trajectory is just a string, you can
154 stuff whatever you want in it. If append is False, files will be
155 numbered according to the counter of their category.
156 - extension: the file extension to use
158 if not key
in self.categories:
159 raise ValueError(
"unknown category: %s" % key)
160 self.categories[key][name] =
None
162 self.coordinates.append((key, name,
'raw', (append, extension)))
163 elif format ==
'rmf3':
166 assert hierarchies
is not None
167 rh = RMF.create_rmf_file(self.prefix +
'_' + name +
'_traj.rmf3')
171 self.coordinates.append((key, name,
'rmf3', rh))
173 raise ValueError,
"format can only be rmf3 or raw"
176 """updates the coordinates of key:name entry. Format should match with
177 the format specified at init time (raw or rmf3)
178 note that setting value to None is equivalent to not calling this
181 if not key
in self.categories:
182 raise ValueError(
"unknown category: %s" % key)
183 if not name
in self.categories[key]:
184 raise ValueError(
"unknown coordinates %s:%s" % (key, name))
185 self.categories[key][name] = value
188 """increments the counter of category 'key' by 'value' steps."""
189 if not key
in self.categories:
190 raise ValueError(
"unknown category: %s" % key)
191 cnt = self.categories[key][
'counter']
192 cnt.set_value(cnt.get_raw_value() + value)
194 def get_entry_category(self, entry):
196 for cat
in self.categories:
197 if entry
in self.categories[cat].values():
200 def format_titles(self):
202 for (i, entry)
in enumerate(self.entries):
203 if self.add_numbers_to_titles:
204 if self.num_entries_per_line>0:
205 title =
'%d:' % ((i % self.num_entries_per_line) + 1)
207 title =
'%d:' % (i + 1)
210 cat = self.get_entry_category(entry)
211 ti = entry.get_title()
212 title +=
'%s:%s' % (cat, ti)
216 def get_formatted_entries(self):
217 return [ent.get_value()
for ent
in self.entries]
219 def should_wrap_line(self, pos, line):
221 num = self.num_entries_per_line
222 if pos % num == num - 1
and pos != len(line) - 1:
226 def prepare_line(self, line, marker='L'):
228 out += self.separator
229 for i, tok
in enumerate(line):
231 ln = 2 + (i / self.num_entries_per_line)
232 if self.should_wrap_line(i, line):
233 out +=
'\n%s%d' % (marker, ln)
234 out += self.separator
236 if not self.should_wrap_line(i, line):
240 def compress_file(self, fname):
241 gz = gzip.open(fname +
'.gz',
'wb')
242 fl = open(fname,
'rb')
246 os.system(
'rm %s' % fname)
248 def new_stage(self, name):
249 fl = open(self.statfile,
'a')
250 fl.write(
"### STAGE %s\n" % name)
254 """Writes statistics to the stats file and writes/appends
255 trajectories. Only does that if the global step matches
256 the output rate. Trajectories are written more sparsely, see trajrate.
257 Returns: True if data was written, False if not.
259 stepno = self.categories[
'global'][
'counter'].get_raw_value()
260 if stepno % self.rate != 0:
263 fl = open(self.statfile,
'a')
266 self.write_title =
False
267 titles = self.format_titles()
268 fl.write(self.prepare_line(titles, marker=self.comment_marker))
269 elif self.repeat_title > 0:
270 if (stepno / self.rate) % self.repeat_title == 0:
271 self.write_title =
True
273 entries = self.get_formatted_entries()
274 fl.write(self.prepare_line(entries))
275 if self.separate_lines:
276 fl.write(
'*' * 80 +
'\n')
279 if stepno % (self.rate * self.trajrate) != 0:
281 for key, name, format, args
in self.coordinates:
282 if self.categories[key][name]
is None:
285 do_append, extension = args
287 pdbname = self.prefix +
'_traj.' + extension
288 if self.compress > 0
and stepno % self.compress == 0:
289 newname =
"%s_traj_%d.%s" % (self.prefix, stepno, extension)
290 os.system(
'mv %s %s' % (pdbname, newname))
291 self.compress_file(newname)
292 fl = open(pdbname,
'a')
294 num = self.categories[key][
'counter'].get_raw_value()
295 fl = open(self.prefix + (
'_%s_%010d.%s' % (name, num, extension)),
'w')
296 fl.write(self.categories[key][name])
298 elif format ==
'rmf3':
304 self.categories[key][name] =
None
void save_frame(RMF::FileHandle file, unsigned int, std::string name="")
void add_restraints(RMF::NodeHandle fh, const kernel::Restraints &hs)
Classes to handle ISD statistics files.
def increment_counter
increments the counter of category 'key' by 'value' steps.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
def update
updates an entry and change its value to value
def write_stats
Writes statistics to the stats file and writes/appends trajectories.
def add_category
creates a logging entry for a simulation substep of the gibbs sampler.
def update_coordinates
updates the coordinates of key:name entry.
See IMP.rmf for more information.
def add_coordinates
adds a placeholder for coordinates
def add_entry
add an entry for the statistics file