IMP  2.0.0
The Integrative Modeling Platform
cache.h
Go to the documentation of this file.
1 /**
2  * \file IMP/base/cache.h \brief Various general useful functions for IMP.
3  *
4  * Copyright 2007-2013 IMP Inventors. All rights reserved.
5  *
6  */
7 
8 #ifndef IMPBASE_CACHE_H
9 #define IMPBASE_CACHE_H
10 
11 #include <IMP/base/base_config.h>
12 #include "internal/cache.h"
13 #include "check_macros.h"
14 #include "log_macros.h"
15 #include "Vector.h"
16 #include <IMP/base/hash.h>
17 #include <boost/multi_index_container.hpp>
18 #include <boost/multi_index/sequenced_index.hpp>
19 #include <boost/multi_index/hashed_index.hpp>
20 #include <boost/multi_index/global_fun.hpp>
21 #include "Array.h"
22 #include <functional>
23 
24 #ifdef _MSC_VER
25 #include <cstddef> // for offsetof
26 #endif
27 
28 IMPBASE_BEGIN_NAMESPACE
29 
30 
31 
32 /** This is a helper class for writing memoizers: things that
33  store the results of a computation to look up later. The result
34  type must support
35  - \c operator=
36  - \c operator==
37  - default construction
38 */
39 template <class Generator,
40  class Checker=std::equal_to<typename Generator::result_type> >
41 class Memoizer {
42  Generator gen_;
43  Checker checker_;
44  mutable bool has_result_;
45  mutable typename Generator::result_type result_;
46  mutable int num_misses_;
47  mutable int num_stats_;
48 public:
49  typedef typename Generator::result_type Value;
50  typedef typename Generator::argument_type Key;
51  Memoizer(const Generator &gen,
52  const Checker &checker=Checker()): gen_(gen),
53  checker_(checker),
54  has_result_(false),
55  num_misses_(0),
56  num_stats_(0){}
57  Generator &access_generator() {return gen_;}
58  const Generator &get_generator() const {return gen_;}
59  void reset() {
60  has_result_=false;
61  result_= Value();
62  }
63  const Value &get() const {
64  if (!has_result_) {
65  result_= gen_();
66  has_result_=true;
67  ++num_misses_;
68  }
69  ++num_stats_;
70  IMP_INTERNAL_CHECK(checker_(result_,
71  gen_()),
72  "Wrong result returned from generator");
73  return result_;
74  }
75  //! Update the stored result manually
76  void set(const Value &v) const {
77  IMP_INTERNAL_CHECK(checker_(v, gen_()),
78  "Wrong result passed on set");
79  has_result_=true;
80  result_=v;
81  }
82  double get_hit_rate() const {
83  return 1.0-static_cast<double>(num_misses_)/num_stats_;
84  }
85 };
86 
87 
88 /** Implement a cache on sparse pairs of values. The cache
89  is infinite (or at least n^2).
90 */
91 template <class Generator, class Checker>
93 public:
94  typedef typename Generator::argument_type::value_type Key;
95  typedef typename Generator::result_type::value_type Entry;
97 private:
98  Generator gen_;
99  Checker checker_;
100 
101  static Key get_0( Entry e) {return e[0];}
102  static Key get_1( Entry e) {return e[1];}
103 
104  // The This:: is needed to make certain gcc versions (4.7) happier
105  typedef boost::multi_index::global_fun<Entry,
106  Key,
107  &This::get_0 > P0Member;
108  typedef boost::multi_index::global_fun<Entry,
109  Key,
110  &This::get_1 > P1Member;
111  typedef boost::multi_index::hashed_non_unique<P0Member> Hash0Index;
112  typedef boost::multi_index::hashed_non_unique<P1Member> Hash1Index;
113  typedef boost::multi_index::indexed_by<Hash0Index,
114  Hash1Index > IndexBy;
115  typedef boost::multi_index_container<Entry,
116  IndexBy> Cache;
117  typedef typename boost::multi_index::nth_index<Cache, 0>
118  ::type::const_iterator Hash0Iterator;
119  typedef typename boost::multi_index::nth_index<Cache, 1>
120  ::type::const_iterator Hash1Iterator;
121  Cache cache_;
122  Vector<Key> cleared_, domain_;
123 
124  struct EntryEqual {
125  Array<2, Key> v;
126  EntryEqual(Key t0, Key t1):
127  v(t0, t1){}
128  template <class O>
129  bool operator()(const O &o) const {
130  return v[0]==o[0] && v[1] == o[1];
131  }
132  };
133 
134  Hash0Iterator get(Key t0, Key t1) const {
135  Hash0Iterator b,e;
136  boost::tie(b,e)=cache_.template get<0>().equal_range(t0);
137  /*IMP_LOG_VERBOSE( "Found first matches "
138  << Vector<Entry>(b,e) << " for " << t0 << std::endl);*/
139  Hash0Iterator f= std::find_if(b,e, EntryEqual(t0, t1));
140  // otherwise it returns something not equal end()
141  if (f==e) return cache_.template get<0>().end();
142  else return f;
143  }
144 
145  void check_it() const {
146 #if IMP_HAS_CHECKS >= IMP_INTERNAL
147  Vector<Entry> cur(cache_.begin(), cache_.end());
148  IMP_INTERNAL_CHECK(checker_(cur),
149  "Cached and newly computed don't match: "
150  << cur << " vs " << gen_(domain_, this)
151  << " and cleared is " << cleared_);
152  for (Hash0Iterator c= cache_.template get<0>().begin();
153  c != cache_.template get<0>().end(); ++c) {
154  IMP_INTERNAL_CHECK(get(c->operator[](1), c->operator[](0))
155  == cache_.template get<0>().end(),
156  "Both an entry and its flip are in the table: "
157  << *c << ": " << cur);
158  }
159 #endif
160  }
161  void fill_it() {
163  for (unsigned int i=0; i< cleared_.size(); ++i) {
164  {
165  Hash0Iterator b,e;
166  boost::tie(b,e)=cache_.template get<0>().equal_range(cleared_[i]);
167  IMP_INTERNAL_CHECK(b==e, "Cleared entry " << cleared_[i]
168  << " was not cleared.");
169  }
170  {
171  Hash1Iterator b,e;
172  boost::tie(b,e)=cache_.template get<1>().equal_range(cleared_[i]);
173  IMP_INTERNAL_CHECK(b==e, "Cleared entry " << cleared_[i]
174  << " was not cleared.");
175  }
176  }
177  }
178  IMP_LOG_VERBOSE( "Filling from " << cleared_ << std::endl);
179  Vector<Entry> nv= gen_(cleared_, *this);
180  IMP_LOG_VERBOSE( "Inserting " << nv << " into pair memoizer" << std::endl);
182  for (unsigned int i=0; i< nv.size(); ++i) {
183  IMP_INTERNAL_CHECK(std::find_if(nv.begin(), nv.end(),
184  EntryEqual(nv[i][1],
185  nv[i][0]))
186  == nv.end(),
187  "An entry and its flip are already in list: "
188  << nv);
189 
190  }
191  }
192  cache_.insert(nv.begin(), nv.end());
193  check_it();
194  IMP_LOG_VERBOSE( "To get "
195  << typename Generator::result_type(cache_.begin(),
196  cache_.end())
197  << std::endl);
198  cleared_.clear();
199  }
200  template <class F, class It>
201  F do_apply( It b, It e, F f) const {
202  for (It c=b; c!= e; ++c) {
203  f(*c);
204  }
205  return f;
206  }
207 public:
209  const Generator &gen= Generator(),
210  const Checker &check= Checker()):
211  gen_(gen),
212  checker_(check),
213  cleared_(domain),
214  domain_(domain){
215  IMP_LOG_TERSE( "Domain for memoizer is " << domain << std::endl);
216  }
217  template <class F>
218  F apply(F f) {
220  if (!cleared_.empty()) fill_it();
221  check_it();
222  return do_apply(cache_.begin(), cache_.end(), f);
223  }
224  /** Apply a function to the current (unfilled) state of the memoizer.*/
225  template <class F>
228  return do_apply(cache_.begin(), cache_.end(), f);
229  }
230  //! Clear all entries involve the Key
231  /** The removed entries are returned */
232  void remove(const Key &a) {
233  if (std::find(cleared_.begin(), cleared_.end(), a) != cleared_.end()) {
234  return;
235  }
236  Vector<Entry> ret;
237  cleared_.push_back(a);
238  {
239  Hash0Iterator b,e;
240  boost::tie(b,e)=cache_.template get<0>().equal_range(a);
241  cache_.template get<0>().erase(b,e);
242  }
243  {
244  Hash1Iterator b,e;
245  boost::tie(b,e)=cache_.template get<1>().equal_range(a);
246  cache_.template get<1>().erase(b,e);
247  }
248  }
249  void insert(const Entry &e) {
250  cache_.insert(e);
251  }
252  void clear() {
253  cache_.clear();
254  cleared_=domain_;
255  }
256  const Generator &get_generator() const {return gen_;}
257  Generator &access_generator() const {return gen_;}
258 };
259 
260 
261 /** Implement a simple least recently used cache. As with
262  the Memoizer, it is parameterized by a generator that is
263  used to generate values if they are not in the cache.
264 
265  The Generator should have a method:
266  - Generator::operator()(Key, Cache);
267 */
268 template <class Generator,
269  class Checker=std::equal_to<typename Generator::result_type> >
270 class LRUCache {
271 public:
272  typedef typename Generator::result_type Value;
273  typedef typename Generator::argument_type Key;
274 private:
275  Generator gen_;
276  Checker checker_;
277  unsigned int max_size_;
278  mutable int num_stats_;
279  mutable int num_misses_;
280  struct KVP {
281  Key key;
282  Value value;
283  KVP(const Key &k, const Value &v): key(k), value(v){}
284  };
285  typedef boost::multi_index::member<KVP,
286  Key,
287  &KVP::key > KeyMember;
288  typedef boost::multi_index::hashed_unique<KeyMember> HashIndex;
289  typedef boost::multi_index::sequenced< > Sequenced;
290  typedef boost::multi_index_container<KVP,
291  boost::multi_index::indexed_by<HashIndex,
292  Sequenced > > Map;
293  mutable Map map_;
294  typedef typename boost::multi_index::template nth_index<Map, 0>
295  ::type::const_iterator LookupIterator;
296  typedef typename boost::multi_index::template nth_index<Map, 1>
297  ::type::const_iterator OrderIterator;
298  Value add_value(const Key &k) const {
299  Value v= gen_(k, *this);
300  map_.template get<1>().push_front(KVP(k, v));
301  while (map_.size() > max_size_) {
302  IMP_LOG_VERBOSE( "Cache overflow" << std::endl);
303  map_.template get<1>().pop_back();
304  }
305  return v;
306  }
307 public:
308  LRUCache(const Generator &gen, unsigned int size,
309  const Checker checker=Checker()): gen_(gen),
310  checker_(checker),
311  max_size_(size),
312  num_stats_(0),
313  num_misses_(0){}
314  Value get(const Key &k) const {
315  LookupIterator it=map_.template get<0>().find(k);
316  ++num_stats_;
317  if (it == map_.template get<0>().end()) {
318  IMP_LOG_VERBOSE( "Cache miss on " << k << std::endl);
319  ++num_misses_;
320  Value v=add_value(k);
321  IMP_INTERNAL_CHECK(max_size_==0 || map_.template get<0>().find(k)
322  != map_.template get<0>().end(),
323  "Failed to insert into cache");
324  return v;
325  } else {
326  map_.template get<1>().relocate(map_.template project<1>(it),
327  map_.template get<1>().begin());
328  // not good with floating point values
329  /*IMP_INTERNAL_CHECK(checker_(it->value,
330  gen_(k, *this)),
331  "Results don't match: " << it->value << " != "
332  << gen_(k, *this));*/
333  IMP_INTERNAL_CHECK(map_.template get<0>().find(k)
334  != map_.template get<0>().end(),
335  "Gone, gone I tell you");
336  return it->value;
337  }
338 
339  }
340  double get_hit_rate() const {
341  return 1.0-static_cast<double>(num_misses_)/num_stats_;
342  }
343  Vector<Key> get_keys() const {
344  Vector<Key> ret;
345  for (OrderIterator it= map_.template get<1>().begin();
346  it != map_.template get<1>().end(); ++it) {
347  ret.push_back(it->key);
348  }
349  return ret;
350  }
351  typedef OrderIterator ContentIterator;
352  ContentIterator contents_begin() const {
353  return map_.template get<1>().begin();
354  }
355  ContentIterator contents_end() const {
356  return map_.template get<1>().end();
357  }
358  void insert(Key k, Value v) {
359  LookupIterator it=map_.template get<0>().find(k);
360  if (it == map_.template get<0>().end()) {
361  map_.template get<1>().push_front(KVP(k, v));
362  while (map_.size() > max_size_) {
363  map_.template get<1>().pop_back();
364  }
365  }
366  }
367  unsigned int size() const {
368  return map_.size();
369  }
370  Generator &access_generator() {
371  return gen_;
372  }
373  const Generator &get_generator() const {
374  return gen_;
375  }
376 };
377 
378 
379 IMPBASE_END_NAMESPACE
380 
381 #endif /* IMPBASE_CACHE_H */