IMP  2.0.1
The Integrative Modeling Platform
assignment_containers.h
Go to the documentation of this file.
1 /**
2  * \file IMP/domino/assignment_containers.h
3  * \brief A beyesian infererence-based sampler.
4  *
5  * Copyright 2007-2013 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPDOMINO_ASSIGNMENT_CONTAINERS_H
10 #define IMPDOMINO_ASSIGNMENT_CONTAINERS_H
11 
12 #include <IMP/domino/domino_config.h>
13 #include "Assignment.h"
14 #include "Order.h"
15 #include "subset_scores.h"
16 #include <IMP/base/map.h>
18 #if IMP_DOMINO_HAS_RMF
19 #include <RMF/HDF5/Group.h>
20 #include <RMF/HDF5/File.h>
21 #endif
22 #include <boost/shared_array.hpp>
23 #include <algorithm>
24 #include <IMP/base/hash.h>
25 #include <IMP/base/Vector.h>
26 
27 #include <queue>
28 #include <IMP/random.h>
29 #include <boost/random.hpp>
30 #include <cstdio>
31 
32 #ifdef _MSC_VER
33 #include <io.h>
34 #endif
35 
36 IMPDOMINO_BEGIN_NAMESPACE
37 
38 
39 /** The base class for containers of assignments. Assignments are stored
40  in these rather than as Assignments to help increase efficiency as
41  well as provide flexibility as to how and where they are stored.
42 */
43 class IMPDOMINOEXPORT AssignmentContainer: public IMP::base::Object {
44  public:
45  AssignmentContainer(std::string name="AssignmentsContainer %1%");
46  virtual unsigned int get_number_of_assignments() const=0;
47  virtual Assignment get_assignment(unsigned int i) const=0;
48  virtual Assignments get_assignments(IntRange ir) const=0;
49  virtual Assignments get_assignments() const=0;
50  virtual void add_assignment(const Assignment& a)=0;
51  virtual void add_assignments(const Assignments &as)=0;
52  //! Get all the assignments for the ith particle
53  virtual Ints get_particle_assignments(unsigned int i) const=0;
54 
55  virtual ~AssignmentContainer();
56 };
57 
58 
60 
61 /** Store a set of assignments in a somewhat more compact form in memory
62  than the ListAssignmentContainer.
63  */
64 class IMPDOMINOEXPORT PackedAssignmentContainer: public AssignmentContainer {
65  // store all as one vector
66  Ints d_;
67  int width_;
68  public:
69  PackedAssignmentContainer(std::string name="PackedAssignmentsContainer %1%");
71 };
72 
73 #ifndef IMP_DOXYGEN
74 
75 inline unsigned int
76 PackedAssignmentContainer::get_number_of_assignments() const {
77  if (width_==-1) return 0;
78  return d_.size()/width_;
79 }
80 
81 inline Assignment
82 PackedAssignmentContainer::get_assignment(unsigned int i) const {
83  IMP_USAGE_CHECK(i < get_number_of_assignments(),
84  "Invalid assignment requested: " << i);
85  IMP_USAGE_CHECK(width_ >0, "Uninitualized PackedAssignmentContainer.");
86  return Assignment(d_.begin()+i*width_,
87  d_.begin()+(i+1)*width_);
88 }
89 
90 inline void PackedAssignmentContainer::add_assignment(const Assignment& a) {
92  if (width_==-1) {
93  width_=a.size();
94  }
95  IMP_USAGE_CHECK(static_cast<int>(a.size())== width_,
96  "Sizes don't match " << width_
97  << " vs " << a.size());
99  for (unsigned int i=0; i< get_number_of_assignments(); ++i) {
100  IMP_INTERNAL_CHECK(get_assignment(i) != a,
101  "Assignment " << a << " already here.");
102  }
103  }
104  d_.insert(d_.end(), a.begin(), a.end());
105 }
106 #endif
107 
108 /** Simple storage of a set of Assignments. Prefer PackedAssignmentContainer,
109  I think.
110  */
111 class IMPDOMINOEXPORT ListAssignmentContainer: public AssignmentContainer {
112  // store all as one vector
113  Assignments d_;
114  public:
115  ListAssignmentContainer(std::string name="ListAssignmentsContainer %1%");
117 };
118 
119 #ifndef IMP_DOXYGEN
120 inline unsigned int ListAssignmentContainer::get_number_of_assignments() const {
121  return d_.size();
122 }
123 
124 inline Assignment
125 ListAssignmentContainer::get_assignment(unsigned int i) const {
126  return d_[i];
127 }
128 
129 inline void ListAssignmentContainer::add_assignment(const Assignment& a) {
130  d_.push_back(a);
131 }
132 #endif
133 
134 
135 
136 
137 
138 /** Store a list of k assignments chosen from all of the ones added to this
139  table. The states are chosen uniformly.
140 
141  This doesn't seem very useful
142  */
143 class IMPDOMINOEXPORT SampleAssignmentContainer: public AssignmentContainer {
144  // store all as one vector
145  Ints d_;
146  int width_;
147  unsigned int k_;
148  unsigned int i_;
149  boost::uniform_real<double> select_;
150  boost::uniform_int<> place_;
151  public:
152  SampleAssignmentContainer(unsigned int k,
153  std::string name="SampleAssignmentsContainer %1%");
155 };
156 
157 #ifndef IMP_DOXYGEN
158 inline unsigned int
159 SampleAssignmentContainer::get_number_of_assignments() const {
160  return d_.size()/width_;
161 }
162 
163 inline Assignment
164 SampleAssignmentContainer::get_assignment(unsigned int i) const {
165  return Assignment(d_.begin()+i*width_,
166  d_.begin()+(i+1)*width_);
167 }
168 #endif
169 
170 
171 
172 
173 
174 #if IMP_DOMINO_HAS_RMF || defined(IMP_DOXYGEN)
175 /** Store the assignments in an HDF5DataSet. Make sure to delete this
176  container before trying to read from the same data set (unless
177  you pass the data set explicitly, in which case it may be OK).
178 
179  The format on disk should
180  not, yet, be considered stable.
181  */
182 class IMPDOMINOEXPORT WriteHDF5AssignmentContainer: public AssignmentContainer {
183  RMF::HDF5::IndexDataSet2D ds_;
184  Order order_;
185  Ints cache_;
186  unsigned int max_cache_;
187  void flush();
188  public:
189  WriteHDF5AssignmentContainer(RMF::HDF5::Group parent,
190  const Subset &s,
191  const ParticlesTemp &all_particles,
192  std::string name);
193 
194  WriteHDF5AssignmentContainer(RMF::HDF5::IndexDataSet2D dataset,
195  const Subset &s,
196  const ParticlesTemp &all_particles,
197  std::string name);
198  void set_cache_size(unsigned int words);
199  IMP_ASSIGNMENT_CONTAINER_INLINE(WriteHDF5AssignmentContainer,
200  IMP_UNUSED(out),flush());
201 };
202 
203 /** Store the assignments in an HDF5DataSet. The format on disk should not,
204  yet, be considered stable.
205  */
206 class IMPDOMINOEXPORT ReadHDF5AssignmentContainer: public AssignmentContainer {
207  RMF::HDF5::IndexConstDataSet2D ds_;
208  Order order_;
209  Ints cache_;
210  unsigned int max_cache_;
211  void flush();
212  public:
213 
214  ReadHDF5AssignmentContainer(RMF::HDF5::IndexConstDataSet2D dataset,
215  const Subset &s,
216  const ParticlesTemp &all_particles,
217  std::string name);
218  void set_cache_size(unsigned int words);
219  IMP_ASSIGNMENT_CONTAINER_INLINE(ReadHDF5AssignmentContainer,
220  IMP_UNUSED(out),);
221 };
222 #endif
223 
224 /** Store the assignments on disk as binary data. Use a ReadAssignmentContainer
225  to read them back. The resulting file is not guaranteed to work on any
226  platform other than the one it was created on and the format may change.
227  */
228 class IMPDOMINOEXPORT WriteAssignmentContainer: public AssignmentContainer {
229  int f_;
230  Order order_;
231  Ints cache_;
232  unsigned int max_cache_;
233  int number_;
234  void flush();
235  public:
236  WriteAssignmentContainer(std::string out_file,
237  const Subset &s,
238  const ParticlesTemp &all_particles,
239  std::string name);
240  void set_cache_size(unsigned int words);
241 #ifdef _MSC_VER
242  IMP_ASSIGNMENT_CONTAINER_INLINE(WriteAssignmentContainer,
243  IMP_UNUSED(out),{
244  flush(); _close(f_);
245  });
246 #else
247  IMP_ASSIGNMENT_CONTAINER_INLINE(WriteAssignmentContainer,
248  IMP_UNUSED(out),{
249  flush(); close(f_);
250  });
251 #endif
252 };
253 
254 /** Read the assignments from binary data on disk. Use a
255  WriteAssignmentContainer to write them. Make sure to destroy the
256  WriteAssignmentContainer before trying to read from the file.
257  */
258 class IMPDOMINOEXPORT ReadAssignmentContainer: public AssignmentContainer {
259  int f_;
260  Order order_;
261  mutable Ints cache_;
262  unsigned int max_cache_;
263  mutable int offset_;
264  int size_;
265  public:
266  ReadAssignmentContainer(std::string out_file,
267  const Subset &s,
268  const ParticlesTemp &all_particles,
269  std::string name);
270  void set_cache_size(unsigned int words);
271 #ifdef _MSC_VER
272  IMP_ASSIGNMENT_CONTAINER_INLINE(ReadAssignmentContainer,
273  IMP_UNUSED(out),{_close(f_);});
274 #else
275  IMP_ASSIGNMENT_CONTAINER_INLINE(ReadAssignmentContainer,
276  IMP_UNUSED(out),{close(f_);});
277 #endif
278 };
279 
280 /** Expose a range [begin, end) of an inner assignement container to
281  consumers. One cannot add assignments to this container.
282  */
283 class IMPDOMINOEXPORT RangeViewAssignmentContainer: public AssignmentContainer {
284  Pointer<AssignmentContainer> inner_;
285  int begin_, end_;
286  public:
288  unsigned int begin, unsigned int end);
290 };
291 
292 
293 /** Store a set of k top scoring assignemnts
294  */
295 class IMPDOMINOEXPORT HeapAssignmentContainer: public AssignmentContainer {
296  typedef std::pair<Assignment,double> AP;
297  struct GreaterSecond {
298  bool operator()(const AP &a,
299  const AP &b) {
300  return a.second < b.second;
301  }
302  };
303  typedef base::Vector<AP> C;
304  C d_;
305  Subset subset_;
306  Slices slices_;
307  Restraints rs_;
308  unsigned int k_; // max number of assignments (heap size)
309  Pointer<RestraintCache> rssf_;//to score candidate assignments
310  public:
311  HeapAssignmentContainer(Subset subset, unsigned int k,
312  RestraintCache *rssf,
313  std::string name="HeapAssignmentsContainer %1%");
315 };
316 
317 /** This is a wrapper for an AssignmentContainer that throws a ValueException
318  if more than a certain number of states are added.*/
319 class IMPDOMINOEXPORT CappedAssignmentContainer:
320  public AssignmentContainer {
321  typedef AssignmentContainer P;
322  Pointer<AssignmentContainer> contained_;
323  unsigned int max_;
324  void check_number() const;
325  public:
327  int max_states,
328  std::string name);
329  IMP_ASSIGNMENT_CONTAINER_INLINE(CappedAssignmentContainer,
330  IMP_UNUSED(out),{
331  });
332 };
333 
334 
335 
336 IMPDOMINO_END_NAMESPACE
337 
338 #endif /* IMPDOMINO_ASSIGNMENT_CONTAINERS_H */