9 #ifndef IMPEM2D_HIERARCHICAL_CLUSTERING_H
10 #define IMPEM2D_HIERARCHICAL_CLUSTERING_H
12 #include "IMP/em2d/em2d_config.h"
22 IMPEM2D_BEGIN_NAMESPACE
24 typedef std::pair<unsigned int, double> pair_cluster_id_distance;
25 typedef std::list<pair_cluster_id_distance> list_cluster_id_distance;
30 void print_vector(
const std::vector<T> &v) {
31 for (
unsigned int i = 0; i < v.size(); ++i) {
32 std::cout << v[i] <<
" , ";
34 std::cout << std::endl;
50 void do_join_clusters(
unsigned int cluster_id1,
unsigned int cluster_id2,
51 double distance_between_clusters);
57 Ints get_cluster_elements(
unsigned int id)
const;
64 Ints get_clusters_below_cutoff(
double cutoff)
const;
67 Ints get_cluster_formed_at_step(
unsigned int step)
const;
70 double get_distance_at_step(
unsigned int step)
const;
72 unsigned int get_id_for_cluster_at_step(
unsigned int step)
const {
73 return step + n_elements_;
91 FloatsList get_linkage_matrix_in_matlab_format()
const;
96 void show(std::ostream &out)
const;
99 void check_step_value(
unsigned int s)
const;
100 unsigned int get_step_from_id(
unsigned int id)
const {
101 return (
id - n_elements_);
105 unsigned int n_elements_;
106 Ints joined_ids1_, joined_ids2_;
107 Floats cluster_distances_;
129 double operator()(
unsigned int id1,
unsigned int id2,
133 void show(std::ostream &out)
const {
134 out <<
"SingleLinkage";
149 double operator()(
unsigned int id1,
unsigned int id2,
152 void show(std::ostream &out)
const {
153 out <<
"CompleteLinkage";
167 double operator()(
unsigned int id1,
unsigned int id2,
170 void show(std::ostream &out)
const {
171 out <<
"AverageDistanceLinkage";
183 template <
class LinkageFunction>
190 IMP_LOG_TERSE(
"starting hierarchical_agglomerative_clustering " << std::endl);
192 unsigned int N = distances.size();
195 std::vector<list_cluster_id_distance> lists(N);
199 std::vector<bool> active_list(N);
200 std::fill(active_list.begin(), active_list.end(),
true);
202 for (
unsigned int n = 0; n < N; ++n) {
203 for (
unsigned int i = 0; i < N; ++i) {
205 lists[n].push_back(std::make_pair(i, distances[n][i]));
214 LinkageFunction linkage_function;
215 unsigned int steps = N - 1;
217 for (
unsigned int k = 0; k < steps; ++k) {
220 double minimum_distance = std::numeric_limits<double>::max();
223 for (
unsigned int j = 0; j < N; ++j) {
224 if (active_list[j] ==
true) {
226 if (lists[j].front().second < minimum_distance) {
227 minimum_distance = lists[j].front().second;
233 unsigned int l2 = lists[l1].front().first;
234 minimum_distance = lists[l2].front().second;
235 IMP_LOG_TERSE(
"step " << k <<
": joining clusters " << cluster_id[l1]
236 <<
" and " << cluster_id[l2] <<
" to form cluster "
237 << cluster_set.get_id_for_cluster_at_step(k)
238 <<
" distance = " << minimum_distance << std::endl);
241 active_list[l2] =
false;
242 cluster_id[l1] = cluster_set.get_id_for_cluster_at_step(k);
246 for (
unsigned int i = 0; i < N; ++i) {
247 IMP_LOG_TERSE(
"Updating list of distances " << i << std::endl);
248 if (active_list[i] ==
true && i != l1) {
251 list_cluster_id_distance::iterator it;
252 for (it = lists[i].begin(); it != lists[i].end(); ++it) {
253 if ((*it).first == l1 || (*it).first == l2) {
259 double dist = linkage_function(cluster_id[l1], cluster_id[i],
260 cluster_set, distances);
261 IMP_LOG_TERSE(
"Distance by linkage function " << dist << std::endl);
262 lists[i].push_back(std::make_pair(l1, dist));
263 lists[l1].push_back(std::make_pair(i, dist));
267 for (
unsigned int i = 0; i < N; ++i) {
268 if (active_list[i] ==
true) {
276 IMPEM2D_END_NAMESPACE
ClusterSet do_hierarchical_agglomerative_clustering(const FloatsList &distances)
Function to perform agglomerative clustering.
Functor for hierarchical clustering based on single linkage.
#define IMP_LOG_TERSE(expr)
Scoring functions for 2D Copyright 2007-2016 IMP Inventors. All rights reserved.
void do_join_clusters(unsigned int cluster_id1, unsigned int cluster_id2, double distance_between_clusters)
join operation
IMP::Vector< Floats > FloatsList
Standard way to pass a bunch of Floats values.
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values.
A class to store the clusters generated during hierarchical clustering.
IMP::Vector< Ints > IntsList
Standard way to pass a bunch of Ints values.
std::ostream & show(Hierarchy h, std::ostream &out=std::cout)
Print the hierarchy using a given decorator to display each node.
Functor for hierarchical clustering based on complete linkage.
Functor for hierarchical clustering based on average-linkage.
Logging and error reporting support.
unsigned int get_number_of_steps() const
Returns the number of steps of clustering recorded.
Comparison of pairs by checking the second element.