10     """Statistics gathering and printing class for ISD gibbs sampling. 
   11     Also manages the restart file (TODO). 
   12     - prefix: all outputted files will have this prefix 
   13     - rate: print statistics every so many gibbs sampling steps 
   14     - trajrate: print trajectories (pdb) every multiple of rate (default 1). 
   15                 implies that update_coordinates() has been called by that time 
   16                 otherwise writing will not occur. 
   17     - statfile: suffix of the statistics file 
   18     - num_entries_per_line: number of entries per line in the output. -1 to 
   20     - repeat_title: if 0 (default) only print it in the beginning. Else repeat 
   21                     it every 'repeat_title' outputted lines in the statistics 
   23     - separate_lines: If False the entries are not separated (default). 
   24                       If True, the lines are separated with stars. 
   25     - compress: If set to a positive number of steps, compress trajectories 
   26                 each time so many steps have elapsed, appending the current 
   27                 frame number to the filename. Only works in append mode, and 
   28                 when it is set to a multiple of rate. 
   30     TODO: check if everything was updated nicely 
   33     def __init__(self, prefix='r01', rate=1, trajrate=1, statfile='_stats.txt',
 
   34                  num_entries_per_line=5, repeat_title=0,
 
   35                  separate_lines=
False, compress=10000):
 
   38         self.trajrate = trajrate
 
   39         self.statfile = prefix + statfile
 
   40         self.compress = compress
 
   46         self.__counter_pos = 0
 
   54         self.write_title = 
True 
   55         if num_entries_per_line == 0 
or num_entries_per_line < -1:
 
   56             raise ValueError(
"number of entries per line is >0 or equal to -1")
 
   57         if num_entries_per_line == -1:
 
   58             self.wrap_stats = 
False 
   60             self.wrap_stats = 
True 
   61         self.num_entries_per_line = num_entries_per_line
 
   62         self.add_numbers_to_titles = 
True 
   64         self.repeat_title = repeat_title
 
   65         self.separate_lines = separate_lines
 
   66         self.comment_marker = 
'#' 
   68     def _get_unique_category_name(self, name):
 
   70             if name 
in self.categories.keys():
 
   74                     ncat = 
''.join([name, 
'%d' % i])
 
   75                     if ncat 
not in self.categories.keys():
 
   84                 if ncat 
not in self.categories.keys():
 
   89         """creates a logging entry for a simulation substep of the gibbs 
   90         sampler. Each category has its own counter, initialized to zero. 
   91         The global category does not need to be created, it's already created 
   92         by the init method, and its key is 'global'. 
   93         - name: an optional name, must be string. 
   94         Returns: a unique key to refer to this category, which will start with 
   97         ncat = self._get_unique_category_name(name)
 
   98         self.categories[ncat] = {
'counter': Entry(
'step', 
'%10d', 0)}
 
  101     def _append_to_stats(self, name, entry):
 
  102         """append to stats, or put in front if entry is a counter""" 
  103         if name == 
'counter':
 
  104             self.entries.insert(self.__counter_pos, entry)
 
  105             self.__counter_pos += 1
 
  107             self.entries.append(entry)
 
  110         """add an entry for the statistics file 
  111         - key: which category it belongs to (key returned by add_category) 
  112         You must specify at least one of the two following: 
  113         - name: a name for this entry 
  114         - entry: an instance of the Entry class. 
  115         Arguments:  - name only: must already have an entry by that name. 
  116                     - entry only: name is set to the entry title and added. If 
  117                       it didn't exist before it is stored as well. 
  118                     - name and entry: name is used instead of the title. 
  119                     - nothing: raises an error. 
  120         Currently, not providing entry only makes sense for the counter since 
  121         there is no method to create an entry without adding it to the 
  124         if not entry 
and not name:
 
  125             raise ValueError(
"Should specify at least one of name or entry")
 
  128                 name = entry.get_title()
 
  129             self._append_to_stats(name, entry)
 
  130             self.categories[key][name] = entry
 
  132             if name 
not in self.categories[key]:
 
  133                 raise ValueError(
"entry %s:%s does not exist!" % (key, name))
 
  134             self._append_to_stats(name, self.categories[key][name])
 
  137         """updates an entry and change its value to value""" 
  138         if key 
not in self.categories:
 
  139             raise ValueError(
"unknown category: %s" % key)
 
  140         if name 
not in self.categories[key]:
 
  141             raise ValueError(
"unknown entry %s:%s" % (key, name))
 
  142         self.categories[key][name].set_value(value)
 
  145                         extension=
'pdb', hierarchies=
None, restraints=
None):
 
  146         """adds a placeholder for coordinates 
  148             will write the whole system as provided, in rmf3 format 
  149             - hierarchies must contain protein hierarchies 
  150             - restraints is a list of restraints 
  152             will write provided data as-is 
  153             - append: whether to append to a trajectory or to write multiple 
  154               files. With this format, a trajectory is just a string, you can 
  155               stuff whatever you want in it. If append is False, files will be 
  156               numbered according to the counter of their category. 
  157             - extension: the file extension to use 
  159         if key 
not in self.categories:
 
  160             raise ValueError(
"unknown category: %s" % key)
 
  161         self.categories[key][name] = 
None 
  163             self.coordinates.append((key, name, 
'raw', (append, extension)))
 
  164         elif format == 
'rmf3':
 
  167             assert hierarchies 
is not None 
  168             rh = RMF.create_rmf_file(self.prefix + 
'_' + name + 
'_traj.rmf3')
 
  172             self.coordinates.append((key, name, 
'rmf3', rh))
 
  174             raise ValueError(
"format can only be rmf3 or raw")
 
  177         """updates the coordinates of key:name entry. Format should match with 
  178         the format specified at init time (raw or rmf3) 
  179         note that setting value to None is equivalent to not calling this 
  182         if key 
not in self.categories:
 
  183             raise ValueError(
"unknown category: %s" % key)
 
  184         if name 
not in self.categories[key]:
 
  185             raise ValueError(
"unknown coordinates %s:%s" % (key, name))
 
  186         self.categories[key][name] = value
 
  189         """increments the counter of category 'key' by 'value' steps.""" 
  190         if key 
not in self.categories:
 
  191             raise ValueError(
"unknown category: %s" % key)
 
  192         cnt = self.categories[key][
'counter']
 
  193         cnt.set_value(cnt.get_raw_value() + value)
 
  195     def get_entry_category(self, entry):
 
  197         for cat 
in self.categories:
 
  198             if entry 
in self.categories[cat].values():
 
  201     def format_titles(self):
 
  203         for (i, entry) 
in enumerate(self.entries):
 
  204             if self.add_numbers_to_titles:
 
  205                 if self.num_entries_per_line > 0:
 
  206                     title = 
'%d:' % ((i % self.num_entries_per_line) + 1)
 
  208                     title = 
'%d:' % (i + 1)
 
  211             cat = self.get_entry_category(entry)
 
  212             ti = entry.get_title()
 
  213             title += 
'%s:%s' % (cat, ti)
 
  217     def get_formatted_entries(self):
 
  218         return [ent.get_value() 
for ent 
in self.entries]
 
  220     def should_wrap_line(self, pos, line):
 
  222             num = self.num_entries_per_line
 
  223             if pos % num == num - 1 
and pos != len(line) - 1:
 
  227     def prepare_line(self, line, marker='L'):
 
  229         out += self.separator
 
  230         for i, tok 
in enumerate(line):
 
  232             ln = 2 + (i / self.num_entries_per_line)
 
  233             if self.should_wrap_line(i, line):
 
  234                 out += 
'\n%s%d' % (marker, ln)
 
  235             out += self.separator
 
  237         if not self.should_wrap_line(i, line):
 
  241     def compress_file(self, fname):
 
  242         gz = gzip.open(fname + 
'.gz', 
'wb')
 
  243         fl = open(fname, 
'rb')
 
  247         os.system(
'rm %s' % fname)
 
  249     def new_stage(self, name):
 
  250         fl = open(self.statfile, 
'a')
 
  251         fl.write(
"### STAGE %s\n" % name)
 
  255         """Writes statistics to the stats file and writes/appends 
  256         trajectories. Only does that if the global step matches 
  257         the output rate. Trajectories are written more sparsely, see trajrate. 
  258         Returns: True if data was written, False if not. 
  260         stepno = self.categories[
'global'][
'counter'].get_raw_value()
 
  261         if stepno % self.rate != 0:
 
  264         fl = open(self.statfile, 
'a')
 
  267             self.write_title = 
False 
  268             titles = self.format_titles()
 
  269             fl.write(self.prepare_line(titles, marker=self.comment_marker))
 
  270         elif self.repeat_title > 0:
 
  271             if (stepno / self.rate) % self.repeat_title == 0:
 
  272                 self.write_title = 
True 
  274         entries = self.get_formatted_entries()
 
  275         fl.write(self.prepare_line(entries))
 
  276         if self.separate_lines:
 
  277             fl.write(
'*' * 80 + 
'\n')
 
  280         if stepno % (self.rate * self.trajrate) != 0:
 
  282         for key, name, format, args 
in self.coordinates:
 
  283             if self.categories[key][name] 
is None:
 
  286                 do_append, extension = args
 
  288                     pdbname = self.prefix + 
'_traj.' + extension
 
  289                     if self.compress > 0 
and stepno % self.compress == 0:
 
  290                         newname = 
"%s_traj_%d.%s" % (
 
  294                         os.system(
'mv %s %s' % (pdbname, newname))
 
  295                         self.compress_file(newname)
 
  296                     fl = open(pdbname, 
'a')
 
  298                     num = self.categories[key][
'counter'].get_raw_value()
 
  300                         self.prefix + (
'_%s_%010d.%s' %
 
  301                                        (name, num, extension)), 
'w')
 
  302                 fl.write(self.categories[key][name])
 
  304             elif format == 
'rmf3':
 
  310             self.categories[key][name] = 
None 
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame. 
 
Classes to handle ISD statistics files. 
 
def increment_counter
increments the counter of category 'key' by 'value' steps. 
 
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
 
def update
updates an entry and change its value to value 
 
def write_stats
Writes statistics to the stats file and writes/appends trajectories. 
 
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
 
def add_category
creates a logging entry for a simulation substep of the gibbs sampler. 
 
def update_coordinates
updates the coordinates of key:name entry. 
 
Support for the RMF file format for storing hierarchical molecular data and markup. 
 
def add_coordinates
adds a placeholder for coordinates 
 
def add_entry
add an entry for the statistics file