8 """Statistics gathering and printing class for ISD gibbs sampling.
9 Also manages the restart file (TODO).
10 - prefix: all outputted files will have this prefix
11 - rate: print statistics every so many gibbs sampling steps
12 - trajrate: print trajectories (pdb) every multiple of rate (default 1).
13 - statfile: suffix of the statistics file
14 - append: whether to append to a trajectory or to write multiple files.
15 For the statistics class, a trajectory is just a string, you can
16 stuff whatever you want in it. If append is False, files will be
17 numbered according to the counter of their category.
18 - num_entries_per_line: number of entries per line in the output. -1 to
20 - repeat_title: if 0 (default) only print it in the beginning. Else repeat
21 it every 'repeat_title' outputted lines in the statistics file.
22 - separate_lines: If False the entries are not separated (default). If True,
23 the lines are separated with stars.
24 - compress: If set to a positive number of steps, compress trajectories each
25 time so many steps have elapsed, appending the current frame
26 number to the filename. Only works in append mode, and when it
27 is set to a multiple of rate.
29 TODO: check if everything was updated nicely
32 def __init__(self, prefix='r01', rate=1, trajrate=1, statfile='_stats.txt',
33 append=
True, num_entries_per_line=5, repeat_title=0,
34 separate_lines=
False,compress=10000):
37 self.trajrate=trajrate
38 self.statfile=prefix+statfile
40 self.compress=compress
46 self.__counter_pos = 0
55 if num_entries_per_line == 0
or num_entries_per_line < -1:
56 raise ValueError,
"number of entries per line is >0 or equal to -1"
57 if num_entries_per_line == -1:
58 self.wrap_stats =
False
60 self.wrap_stats =
True
61 self.num_entries_per_line = num_entries_per_line
62 self.add_numbers_to_titles =
True
64 self.repeat_title = repeat_title
65 self.separate_lines = separate_lines
66 self.comment_marker=
'#'
68 def _get_unique_category_name(self, name):
70 if name
in self.categories.keys():
74 ncat=
''.join([name,
'%d' % i])
75 if ncat
not in self.categories.keys():
84 if ncat
not in self.categories.keys():
89 """creates a logging entry for a simulation substep of the gibbs
90 sampler. Each category has its own counter, initialized to zero.
91 The global category does not need to be created, it's already created by
92 the init method, and its key is 'global'.
93 - name: an optional name, must be string.
94 Returns: a unique key to refer to this category, which will start with
97 ncat = self._get_unique_category_name(name)
98 self.categories[ncat]={
'counter':Entry(
'step',
'%10d', 0)}
101 def _append_to_stats(self, name, entry):
102 """append to stats, or put in front if entry is a counter"""
103 if name ==
'counter':
104 self.entries.insert(self.__counter_pos, entry)
105 self.__counter_pos += 1
107 self.entries.append(entry)
109 def add_entry(self, key, name=None, entry=None):
110 """add an entry for the statistics file
111 - key: which category it belongs to (key returned by add_category)
112 You must specify at least one of the two following:
113 - name: a name for this entry
114 - entry: an instance of the Entry class.
115 Arguments: - name only: must already have an entry by that name.
116 - entry only: name is set to the entry title and added. If
117 it didn't exist before it is stored as well.
118 - name and entry: name is used instead of the title.
119 - nothing: raises an error.
120 Currently, not providing entry only makes sense for the counter since
121 there is no method to create an entry without adding it to the
124 if not entry
and not name:
125 raise ValueError,
"Should specify at least one of name or entry"
128 name = entry.get_title()
129 self._append_to_stats(name, entry)
130 self.categories[key][name]=entry
132 if not name
in self.categories[key]:
133 raise ValueError,
"entry %s:%s does not exist!" % (key,name)
134 self._append_to_stats(name, self.categories[key][name])
136 def update(self, key, name, value):
137 """updates an entry and change its value to value"""
138 if not key
in self.categories:
139 raise ValueError,
"unknown category: %s" % key
140 if not name
in self.categories[key]:
141 raise ValueError,
"unknown entry %s:%s" % (key,name)
142 self.categories[key][name].set_value(value)
145 """adds a placeholder for coordinates"""
146 if not key
in self.categories:
147 raise ValueError,
"unknown category: %s" % key
148 self.categories[key][name]=
None
149 self.coordinates.append((key,name))
152 """updates the coordinates of key:name entry. Format should match with
153 the format specified at init time (pdb or cdf)
155 if not key
in self.categories:
156 raise ValueError,
"unknown category: %s" % key
157 if not name
in self.categories[key]:
158 raise ValueError,
"unknown coordinates %s:%s" % (key,name)
159 self.categories[key][name]=value
162 """increments the counter of category 'key' by 'value' steps."""
163 if not key
in self.categories:
164 raise ValueError,
"unknown category: %s" % key
165 cnt=self.categories[key][
'counter']
166 cnt.set_value(cnt.get_raw_value() + value)
168 def get_entry_category(self, entry):
170 for cat
in self.categories:
171 if entry
in self.categories[cat].values():
174 def format_titles(self):
176 for (i,entry)
in enumerate(self.entries):
177 if self.add_numbers_to_titles:
178 title=
'%d:' % ( (i % self.num_entries_per_line) + 1 )
181 cat = self.get_entry_category(entry)
182 ti = entry.get_title()
183 title +=
'%s:%s' % (cat,ti)
187 def get_formatted_entries(self):
188 return [ent.get_value()
for ent
in self.entries]
190 def should_wrap_line(self, pos, line):
192 num = self.num_entries_per_line
193 if pos % num == num - 1
and pos != len(line)-1:
197 def prepare_line(self, line, marker='L'):
199 out += self.separator
200 for i,tok
in enumerate(line):
202 ln = 2 + (i / self.num_entries_per_line)
203 if self.should_wrap_line(i,line):
204 out +=
'\n%s%d' % (marker,ln)
205 out += self.separator
207 if not self.should_wrap_line(i,line):
211 def compress_file(self, fname):
212 gz=gzip.open(fname+
'.gz',
'wb')
217 os.system(
'rm %s' % fname)
219 def new_stage(self, name):
220 fl=open(self.statfile,
'a')
221 fl.write(
"### STAGE %s\n" % name)
225 """Writes statistics to the stats file and writes/appends
226 trajectories. Only does that if the global step matches
227 the output rate. Trajectories are written more sparsely, see trajrate.
228 Returns: True if data was written, False if not.
230 stepno = self.categories[
'global'][
'counter'].get_raw_value()
231 if stepno % self.rate != 0:
234 fl=open(self.statfile,
'a')
237 self.write_title =
False
238 titles = self.format_titles()
239 fl.write(self.prepare_line(titles, marker=self.comment_marker))
240 elif self.repeat_title > 0:
241 if (stepno/self.rate) % self.repeat_title == 0:
242 self.write_title =
True
244 entries = self.get_formatted_entries()
245 fl.write(self.prepare_line(entries))
246 if self.separate_lines:
247 fl.write(
'*'*80+
'\n')
250 if stepno % (self.rate*self.trajrate) != 0:
252 for key,name
in self.coordinates:
253 if self.categories[key][name]
is None:
254 raise ValueError,
"The trajectory was not passed to the stats class!"
256 pdbname=self.prefix+
'_traj.pdb'
257 if self.compress > 0
and stepno % self.compress == 0:
258 newname =
"%s_traj_%d.pdb" % (self.prefix, stepno)
259 os.system(
'mv %s %s' % (pdbname, newname))
260 self.compress_file(newname)
261 fl=open(pdbname,
'a')
263 num=self.categories[key][
'counter'].get_raw_value()
264 fl=open(self.prefix + (
'_%s_%010d.pdb' % (name,num)),
'w')
265 fl.write(self.categories[key][name])