9 #ifndef IMPEM2D_HIERARCHICAL_CLUSTERING_H
10 #define IMPEM2D_HIERARCHICAL_CLUSTERING_H
12 #include "IMP/em2d/em2d_config.h"
14 #include <IMP/em2d/internal/clustering_helper.h>
23 IMPEM2D_BEGIN_NAMESPACE
26 void print_vector(
const std::vector<T> &v) {
27 for (
unsigned int i = 0; i < v.size(); ++i) {
28 std::cout << v[i] <<
" , ";
30 std::cout << std::endl;
46 void do_join_clusters(
unsigned int cluster_id1,
unsigned int cluster_id2,
47 double distance_between_clusters);
53 Ints get_cluster_elements(
unsigned int id)
const;
60 Ints get_clusters_below_cutoff(
double cutoff)
const;
63 Ints get_cluster_formed_at_step(
unsigned int step)
const;
66 double get_distance_at_step(
unsigned int step)
const;
68 unsigned int get_id_for_cluster_at_step(
unsigned int step)
const {
69 return step + n_elements_;
87 FloatsList get_linkage_matrix_in_matlab_format()
const;
92 void show(std::ostream &out)
const;
95 void check_step_value(
unsigned int s)
const;
96 unsigned int get_step_from_id(
unsigned int id)
const {
97 return (
id - n_elements_);
101 unsigned int n_elements_;
102 Ints joined_ids1_, joined_ids2_;
103 Floats cluster_distances_;
125 double operator()(
unsigned int id1,
unsigned int id2,
129 void show(std::ostream &out)
const {
130 out <<
"SingleLinkage";
145 double operator()(
unsigned int id1,
unsigned int id2,
148 void show(std::ostream &out)
const {
149 out <<
"CompleteLinkage";
163 double operator()(
unsigned int id1,
unsigned int id2,
166 void show(std::ostream &out)
const {
167 out <<
"AverageDistanceLinkage";
179 template <
class LinkageFunction>
186 IMP_LOG_TERSE(
"starting hierarchical_agglomerative_clustering " << std::endl);
188 unsigned int N = distances.size();
191 std::vector<internal::list_cluster_id_distance> lists(N);
195 std::vector<bool> active_list(N);
196 std::fill(active_list.begin(), active_list.end(),
true);
198 for (
unsigned int n = 0; n < N; ++n) {
199 for (
unsigned int i = 0; i < N; ++i) {
201 lists[n].push_back(std::make_pair(i, distances[n][i]));
210 LinkageFunction linkage_function;
211 unsigned int steps = N - 1;
213 for (
unsigned int k = 0; k < steps; ++k) {
216 double minimum_distance = std::numeric_limits<double>::max();
219 for (
unsigned int j = 0; j < N; ++j) {
220 if (active_list[j] ==
true) {
222 if (lists[j].front().second < minimum_distance) {
223 minimum_distance = lists[j].front().second;
229 unsigned int l2 = lists[l1].front().first;
230 minimum_distance = lists[l2].front().second;
231 IMP_LOG_TERSE(
"step " << k <<
": joining clusters " << cluster_id[l1]
232 <<
" and " << cluster_id[l2] <<
" to form cluster "
233 << cluster_set.get_id_for_cluster_at_step(k)
234 <<
" distance = " << minimum_distance << std::endl);
237 active_list[l2] =
false;
238 cluster_id[l1] = cluster_set.get_id_for_cluster_at_step(k);
242 for (
unsigned int i = 0; i < N; ++i) {
243 IMP_LOG_TERSE(
"Updating list of distances " << i << std::endl);
244 if (active_list[i] ==
true && i != l1) {
247 lists[i].erase(std::remove_if(lists[i].begin(), lists[i].end(),
248 internal::ListHasDistance(l1, l2)),
251 double dist = linkage_function(cluster_id[l1], cluster_id[i],
252 cluster_set, distances);
253 IMP_LOG_TERSE(
"Distance by linkage function " << dist << std::endl);
254 lists[i].push_back(std::make_pair(l1, dist));
255 lists[l1].push_back(std::make_pair(i, dist));
259 for (
unsigned int i = 0; i < N; ++i) {
260 if (active_list[i] ==
true) {
268 IMPEM2D_END_NAMESPACE
ClusterSet do_hierarchical_agglomerative_clustering(const FloatsList &distances)
Function to perform agglomerative clustering.
Functor for hierarchical clustering based on single linkage.
#define IMP_LOG_TERSE(expr)
Scoring functions for 2D Copyright 2007-2020 IMP Inventors. All rights reserved.
void do_join_clusters(unsigned int cluster_id1, unsigned int cluster_id2, double distance_between_clusters)
join operation
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values.
A class to store the clusters generated during hierarchical clustering.
std::ostream & show(Hierarchy h, std::ostream &out=std::cout)
Print the hierarchy using a given decorator to display each node.
Functor for hierarchical clustering based on complete linkage.
Functor for hierarchical clustering based on average-linkage.
Logging and error reporting support.
unsigned int get_number_of_steps() const
Returns the number of steps of clustering recorded.
Comparison of pairs by checking the second element.