9 #ifndef IMPEM2D_HIERARCHICAL_CLUSTERING_H
10 #define IMPEM2D_HIERARCHICAL_CLUSTERING_H
12 #include <IMP/em2d/em2d_config.h>
14 #include <IMP/em2d/internal/clustering_helper.h>
22 #include <cereal/access.hpp>
24 IMPEM2D_BEGIN_NAMESPACE
27 void print_vector(
const std::vector<T> &v) {
28 for (
unsigned int i = 0; i < v.size(); ++i) {
29 std::cout << v[i] <<
" , ";
31 std::cout << std::endl;
49 void do_join_clusters(
unsigned int cluster_id1,
unsigned int cluster_id2,
50 double distance_between_clusters);
56 Ints get_cluster_elements(
unsigned int id)
const;
63 Ints get_clusters_below_cutoff(
double cutoff)
const;
66 Ints get_cluster_formed_at_step(
unsigned int step)
const;
69 double get_distance_at_step(
unsigned int step)
const;
71 unsigned int get_id_for_cluster_at_step(
unsigned int step)
const {
72 return step + n_elements_;
90 FloatsList get_linkage_matrix_in_matlab_format()
const;
95 void show(std::ostream &out)
const;
98 void check_step_value(
unsigned int s)
const;
99 unsigned int get_step_from_id(
unsigned int id)
const {
100 return (
id - n_elements_);
104 unsigned int n_elements_;
105 Ints joined_ids1_, joined_ids2_;
106 Floats cluster_distances_;
112 friend class cereal::access;
114 template<
class Archive>
void serialize(Archive &ar) {
115 ar(steps_, n_elements_, joined_ids1_, joined_ids2_,
116 cluster_distances_, clusters_elements_);
136 double operator()(
unsigned int id1,
unsigned int id2,
140 void show(std::ostream &out)
const {
141 out <<
"SingleLinkage";
144 friend class cereal::access;
146 template<
class Archive>
void serialize(Archive &) {}
160 double operator()(
unsigned int id1,
unsigned int id2,
163 void show(std::ostream &out)
const {
164 out <<
"CompleteLinkage";
167 friend class cereal::access;
169 template<
class Archive>
void serialize(Archive &) {}
182 double operator()(
unsigned int id1,
unsigned int id2,
185 void show(std::ostream &out)
const {
186 out <<
"AverageDistanceLinkage";
189 friend class cereal::access;
191 template<
class Archive>
void serialize(Archive &) {}
202 template <
class LinkageFunction>
209 IMP_LOG_TERSE(
"starting hierarchical_agglomerative_clustering " << std::endl);
211 unsigned int N = distances.size();
214 std::vector<internal::list_cluster_id_distance> lists(N);
218 std::vector<bool> active_list(N);
219 std::fill(active_list.begin(), active_list.end(),
true);
221 for (
unsigned int n = 0; n < N; ++n) {
222 for (
unsigned int i = 0; i < N; ++i) {
224 lists[n].push_back(std::make_pair(i, distances[n][i]));
233 LinkageFunction linkage_function;
234 unsigned int steps = N - 1;
236 for (
unsigned int k = 0; k < steps; ++k) {
239 double minimum_distance = std::numeric_limits<double>::max();
242 for (
unsigned int j = 0; j < N; ++j) {
243 if (active_list[j] ==
true) {
245 if (lists[j].front().second < minimum_distance) {
246 minimum_distance = lists[j].front().second;
252 unsigned int l2 = lists[l1].front().first;
253 minimum_distance = lists[l2].front().second;
254 IMP_LOG_TERSE(
"step " << k <<
": joining clusters " << cluster_id[l1]
255 <<
" and " << cluster_id[l2] <<
" to form cluster "
256 << cluster_set.get_id_for_cluster_at_step(k)
257 <<
" distance = " << minimum_distance << std::endl);
260 active_list[l2] =
false;
261 cluster_id[l1] = cluster_set.get_id_for_cluster_at_step(k);
265 for (
unsigned int i = 0; i < N; ++i) {
266 IMP_LOG_TERSE(
"Updating list of distances " << i << std::endl);
267 if (active_list[i] ==
true && i != l1) {
270 lists[i].erase(std::remove_if(lists[i].begin(), lists[i].end(),
271 internal::ListHasDistance(l1, l2)),
274 double dist = linkage_function(cluster_id[l1], cluster_id[i],
275 cluster_set, distances);
276 IMP_LOG_TERSE(
"Distance by linkage function " << dist << std::endl);
277 lists[i].push_back(std::make_pair(l1, dist));
278 lists[l1].push_back(std::make_pair(i, dist));
282 for (
unsigned int i = 0; i < N; ++i) {
283 if (active_list[i] ==
true) {
291 IMPEM2D_END_NAMESPACE
ClusterSet do_hierarchical_agglomerative_clustering(const FloatsList &distances)
Function to perform agglomerative clustering.
Functor for hierarchical clustering based on single linkage.
#define IMP_LOG_TERSE(expr)
Scoring functions for 2D Copyright 2007-2022 IMP Inventors. All rights reserved.
void do_join_clusters(unsigned int cluster_id1, unsigned int cluster_id2, double distance_between_clusters)
join operation
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values.
A class to store the clusters generated during hierarchical clustering.
std::ostream & show(Hierarchy h, std::ostream &out=std::cout)
Print the hierarchy using a given decorator to display each node.
Functor for hierarchical clustering based on complete linkage.
Functor for hierarchical clustering based on average-linkage.
Logging and error reporting support.
unsigned int get_number_of_steps() const
Returns the number of steps of clustering recorded.
Comparison of pairs by checking the second element.