00001 /** 00002 * \file KMLocalSearch.h \brief Generic algorithm from k-means 00003 * clustering by local search 00004 * 00005 * Copyright 2007-2010 IMP Inventors. All rights reserved. 00006 * 00007 */ 00008 00009 #ifndef IMPSTATISTICS_KM_LOCAL_SEARCH_H 00010 #define IMPSTATISTICS_KM_LOCAL_SEARCH_H 00011 00012 #include "statistics_config.h" 00013 #include "KMFilterCenters.h" 00014 #include "KMCenters.h" 00015 #include "KMTerminationCondition.h" 00016 #include <IMP/base_types.h> 00017 #include <vector> 00018 00019 IMPSTATISTICS_BEGIN_NAMESPACE 00020 00021 #ifndef IMP_DOXYGEN 00022 00023 //! KMLocalSearch 00024 /** A generic algorithm for k-means clustering by local search. 00025 The generic algorithm begins by generating an initial solution "curr" and 00026 saving it in "best". The value of "curr" reflects the current solution and 00027 "best" reflects the best solution seen so far. 00028 The algorithm consists of some number of basic iterations, called "stages" 00029 (for example one stage of Lloyd algorithm). Stages are grouped into "runs". 00030 Intuitively, a run involves a (small) number of stages in search of a better 00031 solution. A run might end, say, because a better solution was found or a fixed 00032 number of stages have been performed without any improvement. 00033 After a run is finished, we check to see whether we want to "accept" the 00034 solution. Presumably this happens if the cost is lower, but it may happen even 00035 if the cost is inferior in other circumstances (e.g., as part of a simulated 00036 annealing approach). Accepting a solution means copying the current solution to 00037 the saved solution. In some cases, the acceptance may involve reseting the 00038 current solution to a random solution. 00039 The generic algorithm: 00040 \verbatim 00041 reset() // resets curr and best 00042 while ( !is_done() ) { // while not done 00043 begin_run() // begin a new run 00044 do{ // do while run is not done 00045 begin_stage() // end of stage processing 00046 preform_stage() //apply a stage 00047 end_stage() // end of stage processing 00048 } while ( !is_run_done() ) // while run is not done 00049 try_acceptance() // accept if appropriate 00050 endRun() // end of run processing 00051 } 00052 return best // return best solution 00053 \endverbatim 00054 \unstable{KMLocalSearch} 00055 */ 00056 class IMPSTATISTICSEXPORT KMLocalSearch { 00057 public: 00058 KMLocalSearch(KMFilterCenters *sol, KMTerminationCondition *term); 00059 00060 virtual ~KMLocalSearch(){} 00061 //! Execute the k-mean clustering 00062 virtual void execute(); 00063 //!Return total number of stages 00064 int get_total_number_of_stages() const { 00065 return stage_num_; 00066 } 00067 const KMFilterCentersResults &get_best() const {return best_;} 00068 protected: 00069 virtual void reset(); 00070 virtual bool is_done() const; 00071 virtual void begin_run(); 00072 //apply stage 00073 virtual void preform_stage() {} 00074 virtual void begin_stage() {} 00075 //! End of stage processing 00076 virtual void end_stage(); 00077 virtual bool is_run_done() { 00078 return is_done(); 00079 } 00080 virtual void end_run() {} 00081 //! Test acceptance 00082 virtual void try_acceptance(); 00083 virtual void log_stage(std::ostream &out=std::cout); 00084 00085 Int num_of_data_points_; 00086 Int num_of_centers_; 00087 Int dim_; 00088 Int stage_num_; 00089 Int run_init_stage_; // the stage at which a new run started 00090 KMFilterCenters *curr_; //current solution 00091 KMFilterCentersResults best_; //best solution so far 00092 KMTerminationCondition *term_; 00093 }; 00094 00095 #endif 00096 00097 IMPSTATISTICS_END_NAMESPACE 00098 #endif /* IMPSTATISTICS_KM_LOCAL_SEARCH_H */