IMP logo
IMP Reference Guide  develop.7cb8855c60,2024/10/08
The Integrative Modeling Platform
subset_filters.h
Go to the documentation of this file.
1 /**
2  * \file IMP/domino/subset_filters.h
3  * \brief A Bayesian inference-based sampler.
4  *
5  * Copyright 2007-2022 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPDOMINO_SUBSET_FILTERS_H
10 #define IMPDOMINO_SUBSET_FILTERS_H
11 
12 #include <IMP/domino/domino_config.h>
13 #include "particle_states.h"
14 #include "Assignment.h"
15 #include "particle_states.h"
16 #include "Subset.h"
17 #include "domino_macros.h"
18 #include "subset_scores.h"
19 #include <IMP/Object.h>
20 #include <IMP/Pointer.h>
21 #include <boost/unordered_map.hpp>
22 #include <IMP/Configuration.h>
23 #include <IMP/Model.h>
24 #include <IMP/macros.h>
25 #include <boost/dynamic_bitset.hpp>
26 #include <IMP/utility_macros.h>
27 #include <boost/property_map/property_map.hpp>
28 
29 #include <boost/pending/disjoint_sets.hpp>
30 
31 IMPDOMINO_BEGIN_NAMESPACE
32 
33 /** An instance of this type is created by the
34  SubsetFilterTable::get_subset_filter method(). It's job
35  is to reject some of the Assignments corresponding to the
36  Subset it was created with. It has one
37  method of interest, get_is_ok() which true if the state
38  passes the filter.
39 
40  The passed Assignment has the particles ordered in the
41  same order as they were in the Subset that was passed to the
42  table in order to create the filter.
43 */
44 class IMPDOMINOEXPORT SubsetFilter : public IMP::Object {
45  public:
46  SubsetFilter(std::string name = "SubsetFilter%1%");
47  //! Return true if the given state passes this filter for the Subset
48  //! it was created with
49  virtual bool get_is_ok(const Assignment &state) const = 0;
50 
51  //! Return a next possible acceptable state for the particle in pos
52  /** The default implementation returns the current value +1. This method
53  needs to make sure it does not skip any valid states.
54 
55  The method can assume \c !get_is_ok(state) and that the state
56  minus pos is ok.
57  */
58  virtual int get_next_state(int pos, const Assignment &state) const {
59  return state[pos] + 1;
60  }
61 
62  virtual ~SubsetFilter();
63 };
64 
66 
67 /** A SubsetFilterTable class which produces SubsetFilter objects upon
68  demand. When the get_subset_filter() method is called, it is passed
69  the Subset that is to be filtered. It is also passed subsets of
70  that Subset which have previously been filtered (and so don't need
71  to be checked again).
72 
73  For example, if the passed set is {a,b,c} and the prior_subsets
74  are {a,b} and {b,c}, then only properties than involve a and c need
75  to be checked, as ones involve a and b and b and c have already been
76  checked previously.
77 */
78 class IMPDOMINOEXPORT SubsetFilterTable : public IMP::Object {
79  public:
80  SubsetFilterTable(std::string name = "SubsetFilterTable%1%") : Object(name) {}
81  /** Return a SubsetFilter which acts on the Subset s, given that all
82  the prior_subsets have already been filtered. This should return
83  nullptr if there is no filtering to be done.
84  */
85  virtual SubsetFilter *get_subset_filter(
86  const Subset &s, const Subsets &prior_subsets) const = 0;
87 
88  //! The strength is a rough metric of how this filter restricts the subset
89  /** It is still kind of nebulous, but as a rough guide, it should be
90  the fraction of the states that are eliminated by the filter.
91  */
92  virtual double get_strength(const Subset &s,
93  const Subsets &prior_subsets) const = 0;
94 
95  virtual ~SubsetFilterTable();
96 };
97 
99 
100 //! Filter a configuration of the subset using the Model thresholds
101 /** This filter table creates filters using the maximum scores
102  set in the Model for various restraints.
103  */
104 class IMPDOMINOEXPORT RestraintScoreSubsetFilterTable
105  : public SubsetFilterTable {
107  mutable Restraints rs_;
108 
109  public:
111  /** Create the RestraintCache internally with unbounded size.*/
113  ParticleStatesTable *pst);
115  const IMP::domino::Subset &s, const IMP::domino::Subsets &excluded) const
116  override;
117  virtual double get_strength(const IMP::domino::Subset &s,
118  const IMP::domino::Subsets &excluded) const
119  override;
121 };
122 
124 
125 //! Filter a configuration of the subset using the Model thresholds
126 /** Filter based on an allowed number of failures for the restraints
127  in a list passed.
128  */
130  : public SubsetFilterTable {
132  Restraints rs_;
133  int max_violated_;
135  const Subsets &excluded) const;
136 
137  public:
139  RestraintCache *rc,
140  int max_number_allowed_violations);
141  int get_maximum_number_of_violated_restraints() const {
142  return max_violated_;
143  }
145  const IMP::domino::Subset &s, const IMP::domino::Subsets &excluded) const
146  override;
147  virtual double get_strength(const IMP::domino::Subset &s,
148  const IMP::domino::Subsets &excluded) const
149  override;
151 };
152 
155 
156 /** \brief A base class
157 
158  A number of filters work on disjoint sets of the input particles.
159  These can be specified in several different ways
160  - implicitly via having the same ParticleStates objects
161  - as a list of particle equivalencies
162  - as a list of disjoint sets of equivalent particles
163  */
164 class IMPDOMINOEXPORT DisjointSetsSubsetFilterTable : public SubsetFilterTable {
166  ParticlesTemp elements_;
167  boost::vector_property_map<int> parent_, rank_;
168  mutable boost::disjoint_sets<boost::vector_property_map<int>,
169  boost::vector_property_map<int> > disjoint_sets_;
170  boost::unordered_map<const Particle *, int> index_;
171  mutable Vector<ParticlesTemp> sets_;
172  mutable boost::unordered_map<const Particle *, int> set_indexes_;
173 
174  int get_index(Particle *p);
175 
176  void build_sets() const;
177 
178  protected:
179  unsigned int get_number_of_sets() const {
180  build_sets();
181  return sets_.size();
182  }
183  ParticlesTemp get_set(unsigned int i) const { return sets_[i]; }
184  DisjointSetsSubsetFilterTable(ParticleStatesTable *pst, std::string name);
185  DisjointSetsSubsetFilterTable(std::string name);
186 #ifndef IMP_DOXYGEN
187  void get_indexes(const Subset &s, const Subsets &excluded,
188  Vector<Ints> &ret, int lb, Ints &used) const;
189  int get_index_in_set(Particle *p) const {
190  if (set_indexes_.find(p) == set_indexes_.end()) {
191  return -1;
192  } else {
193  return set_indexes_.find(p)->second;
194  }
195  }
196 #endif
197 
198  public:
199  void add_set(const ParticlesTemp &ps);
200  void add_pair(const ParticlePair &pp);
201 };
202 
203 #if !defined(SWIG) && !defined(IMP_DOXYGEN)
204 inline DisjointSetsSubsetFilterTable::DisjointSetsSubsetFilterTable(
205  ParticleStatesTable *pst, std::string name)
206  : SubsetFilterTable(name), pst_(pst), disjoint_sets_(rank_, parent_) {}
207 inline DisjointSetsSubsetFilterTable::DisjointSetsSubsetFilterTable(
208  std::string name)
209  : SubsetFilterTable(name), disjoint_sets_(rank_, parent_) {}
210 #endif
211 
212 /** \brief Do not allow two particles to be in the same state.
213 
214  If a ParticleStatesTable is passed, then two particles cannot
215  be in the same state if they have the same ParticleStates,
216  otherwise, if a ParticlePairs is passed then pairs found in the
217  list are not allowed to have the same state index.
218  */
220 
221 /** \brief Do not allow two particles to be in the same state.
222 
223  If a ParticleStatesTable is passed, then two particles must
224  be in the same state if they have the same ParticleStates,
225  otherwise, if a ParticlePairs is passed then pairs found in the
226  list must have the same state index.
227  */
229 
230 /** \brief Define sets of equivalent particles
231 
232  Particles in an equivalency set are assumed to be equivalent under
233  exchange. Given that, one should only generate each of the equivalent
234  conformations once. More specifically, given equivalent particles
235  p0 and p1, if p0 is given state s0 and p1 is given state s1, then
236  p1 will never be given state s0 when p0 is given the state s1.
237 */
239 
240 /** \brief Define sets of equivalent and exclusive particles
241 
242  This is equivalent to having both an EquivalenceSubsetFilterTable
243  and an ExclusionSubsetFilterTable on the same particles, but faster.
244 */
245 IMP_DISJOINT_SUBSET_FILTER_TABLE_DECL(EquivalenceAndExclusion);
246 
247 /** \brief Maintain an explicit list of what states each particle
248  is allowed to have.
249 
250  This filter maintains a list for each particle storing whether
251  that particle is allowed to be in a certain state or not.
252  */
253 class IMPDOMINOEXPORT ListSubsetFilterTable : public SubsetFilterTable {
254  public:
255 #if !defined(IMP_DOXYGEN) && !defined(SWIG)
256  boost::unordered_map<Particle *, int> map_;
259  mutable double num_ok_, num_test_;
260  int get_index(Particle *p) const;
261  void load_indexes(const Subset &s, Ints &indexes) const;
262  void mask_allowed_states(Particle *p,
263  const boost::dynamic_bitset<> &bs);
264 #endif
265  public:
267  double get_ok_rate() const { return num_ok_ / num_test_; }
268  unsigned int get_number_of_particle_states(Particle *p) const {
269  int i = get_index(p);
270  if (i == -1) {
271  return pst_->get_particle_states(p)->get_number_of_particle_states();
272  }
273  IMP_USAGE_CHECK(i >= 0,
274  "Particle " << p->get_name()
275  << " is unknown. It probably is not in the "
276  << " ParticleStatesTable. Boom.");
277  return states_[i].size();
278  }
279  void set_allowed_states(Particle *p, const Ints &states);
280  virtual IMP::domino::SubsetFilter *get_subset_filter(
281  const IMP::domino::Subset &s, const IMP::domino::Subsets &excluded) const
282  override;
283  virtual double get_strength(const IMP::domino::Subset &s,
284  const IMP::domino::Subsets &excluded) const
285  override;
287 };
288 
290 
291 /** For provided pairs of particles, on all them to be in certain
292  explicitly lists pairs of states. That is, if the particle
293  pair (p0, p1) is added, with the list [(0,1), (3,4)], then
294  (p0, p1) can only be in (0,1) or (3,4). Note, this class
295  assumes that the single particles are handled appropriately.
296  That is, that something else is restricting p0 to only 0 or 3.
297 */
298 class IMPDOMINOEXPORT PairListSubsetFilterTable : public SubsetFilterTable {
299  boost::unordered_map<ParticlePair, IntPairs> allowed_;
300  void fill(const Subset &s, const Subsets &e, IntPairs &indexes,
301  Vector<IntPairs> &allowed) const;
302 
303  public:
305  void set_allowed_states(ParticlePair p, const IntPairs &states);
306  virtual IMP::domino::SubsetFilter *get_subset_filter(
307  const IMP::domino::Subset &s, const IMP::domino::Subsets &excluded) const
308  override;
309  virtual double get_strength(const IMP::domino::Subset &s,
310  const IMP::domino::Subsets &excluded) const
311  override;
313 };
314 
316 
317 /** Randomly reject some of the states. The purpose of this is
318  to try to generate a sampling of the total states when there
319  are a very large number of acceptable states.
320 */
321 class IMPDOMINOEXPORT ProbabilisticSubsetFilterTable
322  : public SubsetFilterTable {
323  double p_;
324  bool leaves_only_;
325 
326  public:
327  /** \param[in] p Allow states to pass with probability p
328  \param[in] leaves_only If true, only filter the leaves of
329  the merge tree.
330  */
331  ProbabilisticSubsetFilterTable(double p, bool leaves_only = false);
332  virtual IMP::domino::SubsetFilter *get_subset_filter(
333  const IMP::domino::Subset &s, const IMP::domino::Subsets &excluded) const
334  override;
335  virtual double get_strength(const IMP::domino::Subset &s,
336  const IMP::domino::Subsets &excluded) const
337  override;
339 };
340 
342 
343 IMPDOMINO_END_NAMESPACE
344 
345 #endif /* IMPDOMINO_SUBSET_FILTERS_H */
Maintain an explicit list of what states each particle is allowed to have.
virtual SubsetFilter * get_subset_filter(const Subset &s, const Subsets &prior_subsets) const =0
A Bayesian inference-based sampler.
RestraintsTemp get_restraints(const Subset &s, const ParticleStatesTable *pst, const DependencyGraph &dg, RestraintSet *rs)
A class to store a fixed array of same-typed values.
Definition: Array.h:40
#define IMP_OBJECT_METHODS(Name)
Define the basic things needed by any Object.
Definition: object_macros.h:25
Storage of a model, its restraints, constraints and particles.
Filter a configuration of the subset using the Model thresholds.
Represent a subset of the particles being optimized.
Definition: Subset.h:33
Various general useful macros for IMP.
A more IMP-like version of the std::vector.
Definition: Vector.h:50
A smart pointer to a reference counted object.
Definition: Pointer.h:87
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Common base class for heavy weight IMP objects.
Definition: Object.h:111
Provide a consistent interface for things that take Restraints as arguments.
Definition: Restraint.h:362
Various general useful macros for IMP.
Various important macros for implementing decorators.
A smart pointer to a ref-counted Object that is a class member.
Definition: Pointer.h:143
virtual double get_strength(const Subset &s, const Subsets &prior_subsets) const =0
The strength is a rough metric of how this filter restricts the subset.
A Bayesian inference-based sampler.
#define IMP_OBJECTS(Name, PluralName)
Define the types for storing lists of object pointers.
Definition: object_macros.h:44
A nullptr-initialized pointer to an IMP Object.
A shared base class to help in debugging and things.
Store a configuration of a subset.
Definition: Assignment.h:35
Object(std::string name)
Construct an object with the given name.
Class to handle individual particles of a Model object.
Definition: Particle.h:43
#define IMP_USAGE_CHECK(expr, message)
A runtime test for incorrect usage of a class or method.
Definition: check_macros.h:168
A Bayesian inference-based sampler.
Store a set of configurations of the model.
Filter a configuration of the subset using the Model thresholds.
virtual int get_next_state(int pos, const Assignment &state) const
Return a next possible acceptable state for the particle in pos.
A Bayesian inference-based sampler.
ParticleIndexes get_indexes(const ParticlesTemp &ps)
Get the indexes from a list of particles.
#define IMP_DISJOINT_SUBSET_FILTER_TABLE_DECL(Name)
Definition: domino_macros.h:19