00001 /** 00002 * \file KMCentersNodeSplit.h 00003 * \brief a split node in the kc-tree with two children 00004 * 00005 * Copyright 2007-2010 IMP Inventors. All rights reserved. 00006 * 00007 */ 00008 00009 #ifndef IMPSTATISTICS_KM_CENTERS_NODE_SPLIT_H 00010 #define IMPSTATISTICS_KM_CENTERS_NODE_SPLIT_H 00011 00012 #include "KMCentersNode.h" 00013 #include "KMData.h" 00014 #include "KMRectangle.h" 00015 IMPSTATISTICS_BEGIN_NAMESPACE 00016 00017 #ifndef IMP_DOXYGEN 00018 00019 //! kc-tree splitting node. 00020 /** Splitting nodes contain a cutting dimension and a cutting value. These 00021 indicate the axis-parellel plane which subdivide the box for this node. 00022 The extent of the bounding box along the cutting dimension is maintained 00023 (this is used to speed up point to box distance calculations) [we do not 00024 store the entire bounding box since this may be wasteful of space in 00025 high dimensions]. We also store pointers to the 2 children. 00026 \unstable{KMCentersNodeSplit} 00027 */ 00028 class IMPSTATISTICSEXPORT KMCentersNodeSplit : public KMCentersNode 00029 { 00030 public: 00031 KMCentersNodeSplit(){} 00032 //!Constractor 00033 /** 00034 \param[in] dim the dimension of the points 00035 \param[in] bb the bounding box 00036 \param[in] cd the cutting dimension 00037 \param[in] cv the cutting value 00038 */ 00039 KMCentersNodeSplit( 00040 int level,KMRectangle &bb,KMCenters *centers,int cd,double cv, double lv, 00041 double hv, KMCentersNode * lc=NULL, KMCentersNode * hc=NULL) 00042 : KMCentersNode(bb, centers,level) { 00043 cut_dim_ = cd; 00044 cut_val_ = cv; 00045 cd_bnds_[0] = lv; // lower bound for rectangle 00046 cd_bnds_[1] = hv; // upper bound for rectangle 00047 children_[0]= lc; // left child 00048 children_[1] = hc; // right child 00049 } 00050 virtual ~KMCentersNodeSplit(); 00051 //! Compute the sums of a split node. 00052 /** The sums of such a node derive from the children. 00053 The sum on a specific dimension is the sum of the sums of the children 00054 on that dimension. 00055 */ 00056 void compute_sums(); 00057 //! Compute neighbors for centers 00058 void get_neighbors(const std::vector<int> &cands, 00059 KMPointArray *sums, KMPoint *sum_sqs,std::vector<int> *weights); 00060 void get_assignments(const std::vector<int> &cands, 00061 std::vector<int> &close_center); 00062 /** Let m denote the number of data points 00063 descended from this node. Then with probability 1/(2m-1), this cell is 00064 chosen. Otherwise, let mL and mR denote the number of points associated 00065 with the left and right subtrees, respectively. We sample from the left 00066 subtree with probability (2mL-1)/(2m-1) and sample from the right subtree 00067 with probability (2mR-1)/(2m-1). 00068 00069 The rationale is that, assuming there is exactly one point per leaf node, 00070 a subtree with m points has exactly 2m-1 total nodes. (This should be 00071 true for this implementation, but it depends in general on the fact that 00072 there is exactly one point per leaf node.) Hence, the root should be 00073 sampled with probability 1/(2m-1), and the subtrees should be sampled 00074 with the given probabilities. 00075 */ 00076 KMPoint sample_center(); 00077 00078 //! Print node 00079 void show(std::ostream &out = std::cout) const; 00080 protected: 00081 int cut_dim_; // dim orthogonal to cutting plane 00082 double cut_val_; // location of cutting plane 00083 double cd_bnds_[2]; // lower and upper bounds of 00084 // rectangle along cut_dim 00085 KMCentersNode *children_[2]; // left and right children 00086 }; 00087 00088 #endif 00089 00090 IMPSTATISTICS_END_NAMESPACE 00091 #endif /* IMPSTATISTICS_KM_CENTERS_NODE_SPLIT_H */