9 #ifndef IMPEM2D_HIERARCHICAL_CLUSTERING_H
10 #define IMPEM2D_HIERARCHICAL_CLUSTERING_H
12 #include "IMP/em2d/em2d_config.h"
22 IMPEM2D_BEGIN_NAMESPACE
24 typedef std::pair<unsigned int, double> pair_cluster_id_distance;
25 typedef std::list<pair_cluster_id_distance> list_cluster_id_distance;
30 void print_vector(
const std::vector<T> &v) {
31 for (
unsigned int i = 0; i < v.size(); ++i) {
32 std::cout << v[i] <<
" , ";
34 std::cout << std::endl;
38 class IMPEM2DEXPORT ClusterSet {
43 ClusterSet(
unsigned int N);
51 void do_join_clusters(
unsigned int cluster_id1,
unsigned int cluster_id2,
52 double distance_between_clusters);
59 Ints get_cluster_elements(
unsigned int id)
const;
67 Ints get_clusters_below_cutoff(
double cutoff)
const;
72 Ints get_cluster_formed_at_step(
unsigned int step)
const;
75 double get_distance_at_step(
unsigned int step)
const;
77 unsigned int get_id_for_cluster_at_step(
unsigned int step)
const {
78 return step + n_elements_;
96 FloatsList get_linkage_matrix_in_matlab_format()
const;
99 unsigned int get_number_of_steps()
const {
return steps_; }
101 void show(std::ostream &out)
const;
104 void check_step_value(
unsigned int s)
const;
105 unsigned int get_step_from_id(
unsigned int id)
const {
106 return (
id - n_elements_);
110 unsigned int n_elements_;
111 Ints joined_ids1_, joined_ids2_;
112 Floats cluster_distances_;
120 class IMPEM2DEXPORT SingleLinkage {
134 double operator()(
unsigned int id1,
unsigned int id2,
135 const ClusterSet &cluster_set,
138 void show(std::ostream &out)
const {
139 out <<
"SingleLinkage";
145 class IMPEM2DEXPORT CompleteLinkage {
154 double operator()(
unsigned int id1,
unsigned int id2,
155 const ClusterSet &cluster_set,
const FloatsList &distances);
157 void show(std::ostream &out)
const {
158 out <<
"CompleteLinkage";
164 class IMPEM2DEXPORT AverageDistanceLinkage {
166 AverageDistanceLinkage() {}
172 double operator()(
unsigned int id1,
unsigned int id2,
173 const ClusterSet &cluster_set,
const FloatsList &distances);
175 void show(std::ostream &out)
const {
176 out <<
"AverageDistanceLinkage";
188 template <
class LinkageFunction>
195 IMP_LOG_TERSE(
"starting hierarchical_agglomerative_clustering " << std::endl);
197 unsigned int N = distances.size();
200 std::vector<list_cluster_id_distance> lists(N);
204 std::vector<bool> active_list(N);
205 std::fill(active_list.begin(), active_list.end(),
true);
207 for (
unsigned int n = 0; n < N; ++n) {
208 for (
unsigned int i = 0; i < N; ++i) {
210 lists[n].push_back(std::make_pair(i, distances[n][i]));
218 ClusterSet cluster_set(N);
219 LinkageFunction linkage_function;
220 unsigned int steps = N - 1;
222 for (
unsigned int k = 0; k < steps; ++k) {
225 double minimum_distance = std::numeric_limits<double>::max();
228 for (
unsigned int j = 0; j < N; ++j) {
229 if (active_list[j] ==
true) {
231 if (lists[j].front().second < minimum_distance) {
232 minimum_distance = lists[j].front().second;
238 unsigned int l2 = lists[l1].front().first;
239 minimum_distance = lists[l2].front().second;
240 IMP_LOG_TERSE(
"step " << k <<
": joining clusters " << cluster_id[l1]
241 <<
" and " << cluster_id[l2] <<
" to form cluster "
242 << cluster_set.get_id_for_cluster_at_step(k)
243 <<
" distance = " << minimum_distance << std::endl);
244 cluster_set.do_join_clusters(cluster_id[l1], cluster_id[l2],
246 active_list[l2] =
false;
247 cluster_id[l1] = cluster_set.get_id_for_cluster_at_step(k);
251 for (
unsigned int i = 0; i < N; ++i) {
252 IMP_LOG_TERSE(
"Updating list of distances " << i << std::endl);
253 if (active_list[i] ==
true && i != l1) {
256 list_cluster_id_distance::iterator it;
257 for (it = lists[i].begin(); it != lists[i].end(); ++it) {
258 if ((*it).first == l1 || (*it).first == l2) {
264 double dist = linkage_function(cluster_id[l1], cluster_id[i],
265 cluster_set, distances);
266 IMP_LOG_TERSE(
"Distance by linkage function " << dist << std::endl);
267 lists[i].push_back(std::make_pair(l1, dist));
268 lists[l1].push_back(std::make_pair(i, dist));
272 for (
unsigned int i = 0; i < N; ++i) {
273 if (active_list[i] ==
true) {
281 IMPEM2D_END_NAMESPACE
Import IMP/kernel/base_types.h in the namespace.
ClusterSet do_hierarchical_agglomerative_clustering(const FloatsList &distances)
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values.
IMP::base::Vector< Floats > FloatsList
Standard way to pass a bunch of Floats values.
Scoring functions for 2D Copyright 2007-2014 IMP Inventors. All rights reserved.
#define IMP_LOG_TERSE(expr)
IMP::base::Vector< Ints > IntsList
Standard way to pass a bunch of Ints values.
void show(Hierarchy h, std::ostream &out=std::cout)
Print out a molecular hierarchy.
IMP::base::Vector< Float > Floats
Standard way to pass a bunch of Float values.
Logging and error reporting support.
Comparison of pairs by checking the second element.
IMP::base::Vector< Int > Ints
Standard way to pass a bunch of Int values.