IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/21
The Integrative Modeling Platform
cache.h
Go to the documentation of this file.
1 /**
2  * \file IMP/cache.h
3  * \brief Helper classes for various types of caching
4  *
5  * Copyright 2007-2022 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPKERNEL_CACHE_H
10 #define IMPKERNEL_CACHE_H
11 
12 #include <IMP/kernel_config.h>
13 #include "internal/cache.h"
14 #include "check_macros.h"
15 #include "log_macros.h"
16 #include "Vector.h"
17 #include <IMP/hash.h>
18 #include <boost/multi_index_container.hpp>
19 #include <boost/multi_index/sequenced_index.hpp>
20 #include <boost/multi_index/hashed_index.hpp>
21 #include <boost/multi_index/global_fun.hpp>
22 #include "Array.h"
23 #include <functional>
24 
25 #ifdef _MSC_VER
26 #include <cstddef> // for offsetof
27 #endif
28 
29 IMPKERNEL_BEGIN_NAMESPACE
30 
31 //! Helper class for writing memoizers
32 /** A memoizer stores the results of a computation to look up later.
33  The result type must support
34  - \c operator=
35  - \c operator==
36  - default construction
37  */
38 template <class Generator,
39  class Checker = std::equal_to<typename Generator::result_type> >
40 class Memoizer {
41  Generator gen_;
42  Checker checker_;
43  mutable bool has_result_;
44  mutable typename Generator::result_type result_;
45  mutable int num_misses_;
46  mutable int num_stats_;
47 
48  public:
49  typedef typename Generator::result_type Value;
50  typedef typename Generator::argument_type Key;
51  Memoizer(const Generator &gen, const Checker &checker = Checker())
52  : gen_(gen),
53  checker_(checker),
54  has_result_(false),
55  num_misses_(0),
56  num_stats_(0) {}
57  Generator &access_generator() { return gen_; }
58  const Generator &get_generator() const { return gen_; }
59  void reset() {
60  has_result_ = false;
61  result_ = Value();
62  }
63  const Value &get() const {
64  if (!has_result_) {
65  result_ = gen_();
66  has_result_ = true;
67  ++num_misses_;
68  }
69  ++num_stats_;
70  IMP_INTERNAL_CHECK(checker_(result_, gen_()),
71  "Wrong result returned from generator");
72  return result_;
73  }
74  //! Update the stored result manually
75  void set(const Value &v) const {
76  IMP_INTERNAL_CHECK(checker_(v, gen_()), "Wrong result passed on set");
77  has_result_ = true;
78  result_ = v;
79  }
80  double get_hit_rate() const {
81  return 1.0 - static_cast<double>(num_misses_) / num_stats_;
82  }
83 };
84 
85 /** Implement a cache on sparse pairs of values. The cache
86  is infinite (or at least n^2).
87 */
88 template <class Generator, class Checker>
90  public:
91  typedef typename Generator::argument_type::value_type Key;
92  typedef typename Generator::result_type::value_type Entry;
94 
95  private:
96  Generator gen_;
97  Checker checker_;
98 
99  static Key get_0(Entry e) { return std::get<0>(e); }
100  static Key get_1(Entry e) { return std::get<1>(e); }
101 
102  // The This:: is needed to make certain gcc versions (4.7) happier
103  typedef boost::multi_index::global_fun<Entry, Key, &This::get_0> P0Member;
104  typedef boost::multi_index::global_fun<Entry, Key, &This::get_1> P1Member;
105  typedef boost::multi_index::hashed_non_unique<P0Member> Hash0Index;
106  typedef boost::multi_index::hashed_non_unique<P1Member> Hash1Index;
107  typedef boost::multi_index::indexed_by<Hash0Index, Hash1Index> IndexBy;
108  typedef boost::multi_index_container<Entry, IndexBy> Cache;
109  typedef typename boost::multi_index::nth_index<Cache, 0>::type::const_iterator
110  Hash0Iterator;
111  typedef typename boost::multi_index::nth_index<Cache, 1>::type::const_iterator
112  Hash1Iterator;
113  Cache cache_;
114  Vector<Key> cleared_, domain_;
115 
116  struct EntryEqual {
117  Array<2, Key> v;
118  EntryEqual(Key t0, Key t1) : v(t0, t1) {}
119  template <class O>
120  bool operator()(const O &o) const {
121  return v[0] == o[0] && v[1] == o[1];
122  }
123  };
124 
125  Hash0Iterator get(Key t0, Key t1) const {
126  Hash0Iterator b, e;
127  boost::tie(b, e) = cache_.template get<0>().equal_range(t0);
128  /*IMP_LOG_VERBOSE( "Found first matches "
129  << Vector<Entry>(b,e) << " for " << t0 << std::endl);*/
130  Hash0Iterator f = std::find_if(b, e, EntryEqual(t0, t1));
131  // otherwise it returns something not equal end()
132  if (f == e)
133  return cache_.template get<0>().end();
134  else
135  return f;
136  }
137 
138  void check_it() const {
139 #if IMP_HAS_CHECKS >= IMP_INTERNAL
140  Vector<Entry> cur(cache_.begin(), cache_.end());
141  IMP_INTERNAL_CHECK(checker_(cur), "Cached and newly computed don't match: "
142  << cur << " vs "
143  << gen_(domain_, this)
144  << " and cleared is " << cleared_);
145  for (Hash0Iterator c = cache_.template get<0>().begin();
146  c != cache_.template get<0>().end(); ++c) {
148  get(c->operator[](1), c->operator[](0)) ==
149  cache_.template get<0>().end(),
150  "Both an entry and its flip are in the table: " << *c << ": " << cur);
151  }
152 #endif
153  }
154  void fill_it() {
156  for (unsigned int i = 0; i < cleared_.size(); ++i) {
157  {
158  Hash0Iterator b, e;
159  boost::tie(b, e) = cache_.template get<0>().equal_range(cleared_[i]);
160  IMP_INTERNAL_CHECK(b == e, "Cleared entry " << cleared_[i]
161  << " was not cleared.");
162  }
163  {
164  Hash1Iterator b, e;
165  boost::tie(b, e) = cache_.template get<1>().equal_range(cleared_[i]);
166  IMP_INTERNAL_CHECK(b == e, "Cleared entry " << cleared_[i]
167  << " was not cleared.");
168  }
169  }
170  }
171  IMP_LOG_VERBOSE("Filling from " << cleared_ << std::endl);
172  Vector<Entry> nv = gen_(cleared_, *this);
173  IMP_LOG_VERBOSE("Inserting " << nv << " into pair memoizer" << std::endl);
175  for (unsigned int i = 0; i < nv.size(); ++i) {
177  std::find_if(nv.begin(), nv.end(),
178  EntryEqual(nv[i][1], nv[i][0])) == nv.end(),
179  "An entry and its flip are already in list: " << nv);
180  }
181  }
182  cache_.insert(nv.begin(), nv.end());
183  check_it();
184  IMP_LOG_VERBOSE("To get " << typename Generator::result_type(cache_.begin(),
185  cache_.end())
186  << std::endl);
187  cleared_.clear();
188  }
189  template <class F, class It>
190  F do_apply(It b, It e, F f) const {
191  for (It c = b; c != e; ++c) {
192  f(*c);
193  }
194  return f;
195  }
196 
197  public:
199  const Generator &gen = Generator(),
200  const Checker &check = Checker())
201  : gen_(gen), checker_(check), cleared_(domain), domain_(domain) {
202  IMP_LOG_TERSE("Domain for memoizer is " << domain << std::endl);
203  }
204  template <class F>
205  F apply(F f) {
207  if (!cleared_.empty()) fill_it();
208  check_it();
209  return do_apply(cache_.begin(), cache_.end(), f);
210  }
211  /** Apply a function to the current (unfilled) state of the memoizer.*/
212  template <class F>
215  return do_apply(cache_.begin(), cache_.end(), f);
216  }
217  //! Clear all entries involve the Key
218  /** The removed entries are returned */
219  void remove(const Key &a) {
220  if (std::find(cleared_.begin(), cleared_.end(), a) != cleared_.end()) {
221  return;
222  }
223  Vector<Entry> ret;
224  cleared_.push_back(a);
225  {
226  Hash0Iterator b, e;
227  boost::tie(b, e) = cache_.template get<0>().equal_range(a);
228  cache_.template get<0>().erase(b, e);
229  }
230  {
231  Hash1Iterator b, e;
232  boost::tie(b, e) = cache_.template get<1>().equal_range(a);
233  cache_.template get<1>().erase(b, e);
234  }
235  }
236  void insert(const Entry &e) { cache_.insert(e); }
237  void clear() {
238  cache_.clear();
239  cleared_ = domain_;
240  }
241  const Generator &get_generator() const { return gen_; }
242  Generator &access_generator() const { return gen_; }
243 };
244 
245 //! Implement a simple least recently used cache.
246 /** As with the Memoizer, it is parameterized by a generator that is
247  used to generate values if they are not in the cache.
248 
249  The Generator should have a method:
250  - Generator::operator()(Key, Cache);
251 */
252 template <class Generator,
253  class Checker = std::equal_to<typename Generator::result_type> >
254 class LRUCache {
255  public:
256  typedef typename Generator::result_type Value;
257  typedef typename Generator::argument_type Key;
258 
259  private:
260  Generator gen_;
261  Checker checker_;
262  unsigned int max_size_;
263  mutable int num_stats_;
264  mutable int num_misses_;
265  struct KVP {
266  Key key;
267  Value value;
268  KVP(const Key &k, const Value &v) : key(k), value(v) {}
269  };
270  typedef boost::multi_index::member<KVP, Key, &KVP::key> KeyMember;
271  typedef boost::multi_index::hashed_unique<KeyMember> HashIndex;
272  typedef boost::multi_index::sequenced<> Sequenced;
273  typedef boost::multi_index_container<
274  KVP, boost::multi_index::indexed_by<HashIndex, Sequenced> > Map;
275  mutable Map map_;
276  typedef typename boost::multi_index::template nth_index<
277  Map, 0>::type::const_iterator LookupIterator;
278  typedef typename boost::multi_index::template nth_index<
279  Map, 1>::type::const_iterator OrderIterator;
280  Value add_value(const Key &k) const {
281  Value v = gen_(k, *this);
282  map_.template get<1>().push_front(KVP(k, v));
283  while (map_.size() > max_size_) {
284  IMP_LOG_VERBOSE("Cache overflow" << std::endl);
285  map_.template get<1>().pop_back();
286  }
287  return v;
288  }
289 
290  public:
291  LRUCache(const Generator &gen, unsigned int size,
292  const Checker checker = Checker())
293  : gen_(gen),
294  checker_(checker),
295  max_size_(size),
296  num_stats_(0),
297  num_misses_(0) {}
298  Value get(const Key &k) const {
299  LookupIterator it = map_.template get<0>().find(k);
300  ++num_stats_;
301  if (it == map_.template get<0>().end()) {
302  IMP_LOG_VERBOSE("Cache miss on " << k << std::endl);
303  ++num_misses_;
304  Value v = add_value(k);
305  IMP_INTERNAL_CHECK(max_size_ == 0 || map_.template get<0>().find(k) !=
306  map_.template get<0>().end(),
307  "Failed to insert into cache");
308  return v;
309  } else {
310  map_.template get<1>().relocate(map_.template project<1>(it),
311  map_.template get<1>().begin());
312  // not good with floating point values
313  /*IMP_INTERNAL_CHECK(checker_(it->value,
314  gen_(k, *this)),
315  "Results don't match: " << it->value << " != "
316  << gen_(k, *this));*/
318  map_.template get<0>().find(k) != map_.template get<0>().end(),
319  "Gone, gone I tell you");
320  return it->value;
321  }
322  }
323  double get_hit_rate() const {
324  return 1.0 - static_cast<double>(num_misses_) / num_stats_;
325  }
326  Vector<Key> get_keys() const {
327  Vector<Key> ret;
328  for (OrderIterator it = map_.template get<1>().begin();
329  it != map_.template get<1>().end(); ++it) {
330  ret.push_back(it->key);
331  }
332  return ret;
333  }
334  typedef OrderIterator ContentIterator;
335  ContentIterator contents_begin() const {
336  return map_.template get<1>().begin();
337  }
338  ContentIterator contents_end() const { return map_.template get<1>().end(); }
339  void insert(Key k, Value v) {
340  LookupIterator it = map_.template get<0>().find(k);
341  if (it == map_.template get<0>().end()) {
342  map_.template get<1>().push_front(KVP(k, v));
343  while (map_.size() > max_size_) {
344  map_.template get<1>().pop_back();
345  }
346  }
347  }
348  unsigned int size() const { return map_.size(); }
349  Generator &access_generator() { return gen_; }
350  const Generator &get_generator() const { return gen_; }
351 };
352 
353 IMPKERNEL_END_NAMESPACE
354 
355 #endif /* IMPKERNEL_CACHE_H */
Helper class for writing memoizers.
Definition: cache.h:40
#define IMP_IF_CHECK(level)
Execute the code block if a certain level checks are on.
Definition: check_macros.h:104
#define IMP_FUNCTION_LOG
Beginning logging for a non-member function.
Definition: log_macros.h:293
Helper functions for implementing hashes.
#define IMP_LOG_VERBOSE(expr)
Definition: log_macros.h:83
Classes to handle static sized arrays of things.
#define IMP_LOG_TERSE(expr)
Definition: log_macros.h:72
void set(const Value &v) const
Update the stored result manually.
Definition: cache.h:75
#define IMP_INTERNAL_CHECK(expr, message)
An assertion to check for internal errors in IMP. An IMP::ErrorException will be thrown.
Definition: check_macros.h:139
Logging and error reporting support.
Implement a simple least recently used cache.
Definition: cache.h:254
A class for storing lists of IMP items.
Helper macros for throwing and handling exceptions.