IMP logo
IMP Reference Guide  2.20.2
The Integrative Modeling Platform
Statistics.py
1 #!/usr/bin/env python
2 
3 from __future__ import print_function
4 from IMP.isd.Entry import Entry
5 import gzip
6 import os
7 
8 
9 class Statistics:
10 
11  """Statistics gathering and printing class for ISD gibbs sampling.
12  Also manages the restart file (TODO).
13  - prefix: all outputted files will have this prefix
14  - rate: print statistics every so many gibbs sampling steps
15  - trajrate: print trajectories (pdb) every multiple of rate (default 1).
16  implies that update_coordinates() has been called by that time
17  otherwise writing will not occur.
18  - statfile: suffix of the statistics file
19  - num_entries_per_line: number of entries per line in the output. -1 to
20  disable wrapping.
21  - repeat_title: if 0 (default) only print it in the beginning. Else repeat
22  it every 'repeat_title' outputted lines in the statistics
23  file.
24  - separate_lines: If False the entries are not separated (default).
25  If True, the lines are separated with stars.
26  - compress: If set to a positive number of steps, compress trajectories
27  each time so many steps have elapsed, appending the current
28  frame number to the filename. Only works in append mode, and
29  when it is set to a multiple of rate.
30 
31  TODO: check if everything was updated nicely
32  """
33 
34  def __init__(self, prefix='r01', rate=1, trajrate=1, statfile='_stats.txt',
35  num_entries_per_line=5, repeat_title=0,
36  separate_lines=False, compress=10000):
37  self.prefix = prefix
38  self.rate = rate
39  self.trajrate = trajrate
40  self.statfile = prefix + statfile
41  self.compress = compress
42  # list of the things that will be printed to the stats file, in order.
43  self.entries = []
44  # list of coordinate entries
45  self.coordinates = []
46  # internal
47  self.__counter_pos = 0
48  # the same entries but sorted by category
49  self.categories = {}
50  # create the global category along with the global counter
51  self.add_category('global')
52  # that makes it the first entry of the table.
53  self.add_entry('global', name='counter')
54  # output-specific flags
55  self.write_title = True
56  if num_entries_per_line == 0 or num_entries_per_line < -1:
57  raise ValueError("number of entries per line is >0 or equal to -1")
58  if num_entries_per_line == -1:
59  self.wrap_stats = False
60  else:
61  self.wrap_stats = True
62  self.num_entries_per_line = num_entries_per_line
63  self.add_numbers_to_titles = True
64  self.separator = ' '
65  self.repeat_title = repeat_title
66  self.separate_lines = separate_lines
67  self.comment_marker = '#'
68 
69  def _get_unique_category_name(self, name):
70  if name:
71  if name in self.categories.keys():
72  i = 0
73  while True:
74  i += 1
75  ncat = ''.join([name, '%d' % i])
76  if ncat not in self.categories.keys():
77  break
78  else:
79  ncat = name
80  else:
81  i = 0
82  while True:
83  i += 1
84  ncat = '%d' % i
85  if ncat not in self.categories.keys():
86  break
87  return ncat
88 
89  def add_category(self, name=None):
90  """creates a logging entry for a simulation substep of the gibbs
91  sampler. Each category has its own counter, initialized to zero.
92  The global category does not need to be created, it's already created
93  by the init method, and its key is 'global'.
94  - name: an optional name, must be string.
95  Returns: a unique key to refer to this category, which will start with
96  the optional name.
97  """
98  ncat = self._get_unique_category_name(name)
99  self.categories[ncat] = {'counter': Entry('step', '%10d', 0)}
100  return ncat
101 
102  def _append_to_stats(self, name, entry):
103  """append to stats, or put in front if entry is a counter"""
104  if name == 'counter':
105  self.entries.insert(self.__counter_pos, entry)
106  self.__counter_pos += 1
107  else:
108  self.entries.append(entry)
109 
110  def add_entry(self, key, name=None, entry=None):
111  """add an entry for the statistics file
112  - key: which category it belongs to (key returned by add_category)
113  You must specify at least one of the two following:
114  - name: a name for this entry
115  - entry: an instance of the Entry class.
116  Arguments: - name only: must already have an entry by that name.
117  - entry only: name is set to the entry title and added. If
118  it didn't exist before it is stored as well.
119  - name and entry: name is used instead of the title.
120  - nothing: raises an error.
121  Currently, not providing entry only makes sense for the counter since
122  there is no method to create an entry without adding it to the
123  statistics file.
124  """
125  if not entry and not name:
126  raise ValueError("Should specify at least one of name or entry")
127  if entry:
128  if not name:
129  name = entry.get_title()
130  self._append_to_stats(name, entry)
131  self.categories[key][name] = entry
132  else:
133  if name not in self.categories[key]:
134  raise ValueError("entry %s:%s does not exist!" % (key, name))
135  self._append_to_stats(name, self.categories[key][name])
136 
137  def update(self, key, name, value):
138  """updates an entry and change its value to value"""
139  if key not in self.categories:
140  raise ValueError("unknown category: %s" % key)
141  if name not in self.categories[key]:
142  raise ValueError("unknown entry %s:%s" % (key, name))
143  self.categories[key][name].set_value(value)
144 
145  def add_coordinates(self, key, name, format='raw', append=True,
146  extension='pdb', hierarchies=None, restraints=None):
147  """adds a placeholder for coordinates
148  - format = rmf3:
149  will write the whole system as provided, in rmf3 format
150  - hierarchies must contain protein hierarchies
151  - restraints is a list of restraints
152  - format = raw:
153  will write provided data as-is
154  - append: whether to append to a trajectory or to write multiple
155  files. With this format, a trajectory is just a string, you can
156  stuff whatever you want in it. If append is False, files will be
157  numbered according to the counter of their category.
158  - extension: the file extension to use
159  """
160  if key not in self.categories:
161  raise ValueError("unknown category: %s" % key)
162  self.categories[key][name] = None
163  if format == 'raw':
164  self.coordinates.append((key, name, 'raw', (append, extension)))
165  elif format == 'rmf3':
166  import RMF
167  import IMP.rmf
168  assert hierarchies is not None
169  rh = RMF.create_rmf_file(self.prefix + '_' + name + '_traj.rmf3')
170  IMP.rmf.add_hierarchies(rh, hierarchies)
171  if restraints:
172  IMP.rmf.add_restraints(rh, restraints)
173  self.coordinates.append((key, name, 'rmf3', rh))
174  else:
175  raise ValueError("format can only be rmf3 or raw")
176 
177  def update_coordinates(self, key, name, value=True):
178  """updates the coordinates of key:name entry. Format should match with
179  the format specified at init time (raw or rmf3)
180  note that setting value to None is equivalent to not calling this
181  function
182  """
183  if key not in self.categories:
184  raise ValueError("unknown category: %s" % key)
185  if name not in self.categories[key]:
186  raise ValueError("unknown coordinates %s:%s" % (key, name))
187  self.categories[key][name] = value
188 
189  def increment_counter(self, key, value):
190  """increments the counter of category 'key' by 'value' steps."""
191  if key not in self.categories:
192  raise ValueError("unknown category: %s" % key)
193  cnt = self.categories[key]['counter']
194  cnt.set_value(cnt.get_raw_value() + value)
195 
196  def get_entry_category(self, entry):
197  # ugly, find something better
198  for cat in self.categories:
199  if entry in self.categories[cat].values():
200  return cat
201 
202  def format_titles(self):
203  titles = []
204  for (i, entry) in enumerate(self.entries):
205  if self.add_numbers_to_titles:
206  if self.num_entries_per_line > 0:
207  title = '%d:' % ((i % self.num_entries_per_line) + 1)
208  else:
209  title = '%d:' % (i + 1)
210  else:
211  title = ''
212  cat = self.get_entry_category(entry)
213  ti = entry.get_title()
214  title += '%s:%s' % (cat, ti)
215  titles.append(title)
216  return titles
217 
218  def get_formatted_entries(self):
219  return [ent.get_value() for ent in self.entries]
220 
221  def should_wrap_line(self, pos, line):
222  if self.wrap_stats:
223  num = self.num_entries_per_line
224  if pos % num == num - 1 and pos != len(line) - 1:
225  return True
226  return False
227 
228  def prepare_line(self, line, marker='L'):
229  out = marker + '1'
230  out += self.separator
231  for i, tok in enumerate(line):
232  out += tok
233  ln = 2 + (i / self.num_entries_per_line)
234  if self.should_wrap_line(i, line):
235  out += '\n%s%d' % (marker, ln)
236  out += self.separator
237  # don't add a newline if we just did
238  if not self.should_wrap_line(i, line):
239  out += '\n'
240  return out
241 
242  def compress_file(self, fname):
243  gz = gzip.open(fname + '.gz', 'wb')
244  fl = open(fname, 'rb')
245  gz.writelines(fl)
246  gz.close()
247  fl.close()
248  os.system('rm %s' % fname)
249 
250  def new_stage(self, name):
251  fl = open(self.statfile, 'a')
252  fl.write("### STAGE %s\n" % name)
253  fl.close()
254 
255  def write_stats(self):
256  """Writes statistics to the stats file and writes/appends
257  trajectories. Only does that if the global step matches
258  the output rate. Trajectories are written more sparsely, see trajrate.
259  Returns: True if data was written, False if not.
260  """
261  stepno = self.categories['global']['counter'].get_raw_value()
262  if stepno % self.rate != 0:
263  return False
264  # stats file
265  fl = open(self.statfile, 'a')
266  # do title if necessary
267  if self.write_title:
268  self.write_title = False
269  titles = self.format_titles()
270  fl.write(self.prepare_line(titles, marker=self.comment_marker))
271  elif self.repeat_title > 0:
272  if (stepno / self.rate) % self.repeat_title == 0:
273  self.write_title = True
274  # write stats
275  entries = self.get_formatted_entries()
276  fl.write(self.prepare_line(entries))
277  if self.separate_lines:
278  fl.write('*' * 80 + '\n')
279  fl.close()
280  # write trajs
281  if stepno % (self.rate * self.trajrate) != 0:
282  return True
283  for key, name, format, args in self.coordinates:
284  if self.categories[key][name] is None:
285  continue
286  if format == 'raw':
287  do_append, extension = args
288  if do_append:
289  pdbname = self.prefix + '_traj.' + extension
290  if self.compress > 0 and stepno % self.compress == 0:
291  newname = "%s_traj_%d.%s" % (
292  self.prefix,
293  stepno,
294  extension)
295  os.system('mv %s %s' % (pdbname, newname))
296  self.compress_file(newname)
297  fl = open(pdbname, 'a')
298  else:
299  num = self.categories[key]['counter'].get_raw_value()
300  fl = open(
301  self.prefix + ('_%s_%010d.%s' %
302  (name, num, extension)), 'w')
303  fl.write(self.categories[key][name])
304  fl.close()
305  elif format == 'rmf3':
306  import IMP.rmf
307  IMP.rmf.save_frame(args)
308  args.flush()
309  else:
310  raise RuntimeError
311  self.categories[key][name] = None
312  return True
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
Classes to handle ISD statistics files.
Definition: Entry.py:1
def increment_counter
increments the counter of category 'key' by 'value' steps.
Definition: Statistics.py:189
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
def update
updates an entry and change its value to value
Definition: Statistics.py:137
def write_stats
Writes statistics to the stats file and writes/appends trajectories.
Definition: Statistics.py:255
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
def add_category
creates a logging entry for a simulation substep of the gibbs sampler.
Definition: Statistics.py:89
def update_coordinates
updates the coordinates of key:name entry.
Definition: Statistics.py:177
Support for the RMF file format for storing hierarchical molecular data and markup.
def add_coordinates
adds a placeholder for coordinates
Definition: Statistics.py:152
def add_entry
add an entry for the statistics file
Definition: Statistics.py:110