9 #ifndef IMPEM2D_HIERARCHICAL_CLUSTERING_H
10 #define IMPEM2D_HIERARCHICAL_CLUSTERING_H
12 #include "IMP/em2d/em2d_config.h"
22 IMPEM2D_BEGIN_NAMESPACE
24 typedef std::pair<unsigned int ,double> pair_cluster_id_distance;
25 typedef std::list< pair_cluster_id_distance > list_cluster_id_distance;
32 void print_vector(
const std::vector<T> &v) {
33 for (
unsigned int i=0;i<v.size();++i) {
34 std::cout << v[i] <<
" , ";
36 std::cout << std::endl;
40 class IMPEM2DEXPORT ClusterSet {
46 ClusterSet(
unsigned int N);
54 void do_join_clusters(
unsigned int cluster_id1,
55 unsigned int cluster_id2,
56 double distance_between_clusters);
63 Ints get_cluster_elements(
unsigned int id)
const;
72 Ints get_clusters_below_cutoff(
double cutoff)
const;
77 Ints get_cluster_formed_at_step(
unsigned int step)
const;
80 double get_distance_at_step(
unsigned int step)
const;
82 unsigned int get_id_for_cluster_at_step(
unsigned int step)
const {
83 return step+n_elements_;
102 FloatsList get_linkage_matrix_in_matlab_format()
const;
105 unsigned int get_number_of_steps()
const {
109 void show(std::ostream &out)
const;
113 void check_step_value(
unsigned int s)
const;
114 unsigned int get_step_from_id(
unsigned int id)
const {
115 return (
id-n_elements_);
119 unsigned int n_elements_;
120 Ints joined_ids1_,joined_ids2_;
121 Floats cluster_distances_;
132 class IMPEM2DEXPORT SingleLinkage {
145 double operator()(
unsigned int id1,
147 const ClusterSet &cluster_set,
150 void show(std::ostream &out)
const {
151 out <<
"SingleLinkage";
159 class IMPEM2DEXPORT CompleteLinkage {
168 double operator()(
unsigned int id1,
170 const ClusterSet &cluster_set,
173 void show(std::ostream &out)
const {
174 out <<
"CompleteLinkage";
184 class IMPEM2DEXPORT AverageDistanceLinkage {
186 AverageDistanceLinkage(){}
192 double operator()(
unsigned int id1,
194 const ClusterSet &cluster_set,
197 void show(std::ostream &out)
const {
198 out <<
"AverageDistanceLinkage";
214 template<
class LinkageFunction>
222 "starting hierarchical_agglomerative_clustering " << std::endl);
224 unsigned int N = distances.size();
227 std::vector< list_cluster_id_distance > lists(N);
231 std::vector<bool> active_list(N);
232 std::fill(active_list.begin(),active_list.end(),
true);
234 for (
unsigned int n=0;n<N;++n) {
235 for(
unsigned int i=0;i<N;++i) {
237 lists[n].push_back(std::make_pair(i,distances[n][i]));
245 ClusterSet cluster_set(N);
246 LinkageFunction linkage_function;
247 unsigned int steps = N-1;
249 for (
unsigned int k=0;k<steps;++k) {
252 double minimum_distance=std::numeric_limits<double>::max();
255 for (
unsigned int j=0;j<N;++j) {
256 if(active_list[j]==
true) {
258 if(lists[j].front().second < minimum_distance) {
259 minimum_distance=lists[j].front().second;
265 unsigned int l2=lists[l1].front().first;
266 minimum_distance=lists[l2].front().second;
267 IMP_LOG_TERSE(
"step " << k <<
": joining clusters " << cluster_id[l1]
268 <<
" and " << cluster_id[l2]
269 <<
" to form cluster "
270 << cluster_set.get_id_for_cluster_at_step(k) <<
" distance = "
271 << minimum_distance << std::endl);
272 cluster_set.do_join_clusters(cluster_id[l1],
275 active_list[l2]=
false;
276 cluster_id[l1]=cluster_set.get_id_for_cluster_at_step(k);
280 for (
unsigned int i=0;i<N;++i) {
281 IMP_LOG_TERSE(
"Updating list of distances " << i << std::endl);
282 if(active_list[i]==
true && i!=l1) {
285 list_cluster_id_distance::iterator it;
286 for (it=lists[i].begin() ; it!=lists[i].end() ; ++it) {
287 if((*it).first == l1 || (*it).first == l2 ) {
293 double dist = linkage_function(cluster_id[l1],
297 IMP_LOG_TERSE(
"Distance by linkage function "<< dist<< std::endl);
298 lists[i].push_back(std::make_pair(l1,dist));
299 lists[l1].push_back(std::make_pair(i,dist));
303 for (
unsigned int i=0;i<N;++i) {
304 if(active_list[i]==
true) {
312 IMPEM2D_END_NAMESPACE