9 #ifndef IMPEM2D_HIERARCHICAL_CLUSTERING_H 
   10 #define IMPEM2D_HIERARCHICAL_CLUSTERING_H 
   12 #include <IMP/em2d/em2d_config.h> 
   14 #include <IMP/em2d/internal/clustering_helper.h> 
   22 #include <cereal/access.hpp> 
   24 IMPEM2D_BEGIN_NAMESPACE
 
   27 void print_vector(
const std::vector<T> &v) {
 
   28   for (
unsigned int i = 0; i < v.size(); ++i) {
 
   29     std::cout << v[i] << 
" , ";
 
   31   std::cout << std::endl;
 
   49   void do_join_clusters(
unsigned int cluster_id1, 
unsigned int cluster_id2,
 
   50                         double distance_between_clusters);
 
   56   Ints get_cluster_elements(
unsigned int id) 
const;
 
   63   Ints get_clusters_below_cutoff(
double cutoff) 
const;
 
   66   Ints get_cluster_formed_at_step(
unsigned int step) 
const;
 
   69   double get_distance_at_step(
unsigned int step) 
const;
 
   71   unsigned int get_id_for_cluster_at_step(
unsigned int step)
 const {
 
   72     return step + n_elements_;
 
   90   FloatsList get_linkage_matrix_in_matlab_format() 
const;
 
   95   void show(std::ostream &out) 
const;
 
   98   void check_step_value(
unsigned int s) 
const;
 
   99   unsigned int get_step_from_id(
unsigned int id)
 const {
 
  100     return (
id - n_elements_);
 
  104   unsigned int n_elements_;  
 
  105   Ints joined_ids1_, joined_ids2_;
 
  106   Floats cluster_distances_;
 
  112   friend class cereal::access;
 
  114   template<
class Archive> 
void serialize(Archive &ar) {
 
  115     ar(steps_, n_elements_, joined_ids1_, joined_ids2_,
 
  116        cluster_distances_, clusters_elements_);
 
  136   double operator()(
unsigned int id1, 
unsigned int id2,
 
  140   void show(std::ostream &out)
 const {
 
  141     out << 
"SingleLinkage";
 
  144   friend class cereal::access;
 
  146   template<
class Archive> 
void serialize(Archive &) {}
 
  160   double operator()(
unsigned int id1, 
unsigned int id2,
 
  163   void show(std::ostream &out)
 const {
 
  164     out << 
"CompleteLinkage";
 
  167   friend class cereal::access;
 
  169   template<
class Archive> 
void serialize(Archive &) {}
 
  182   double operator()(
unsigned int id1, 
unsigned int id2,
 
  185   void show(std::ostream &out)
 const {
 
  186     out << 
"AverageDistanceLinkage";
 
  189   friend class cereal::access;
 
  191   template<
class Archive> 
void serialize(Archive &) {}
 
  202 template <
class LinkageFunction>
 
  209   IMP_LOG_TERSE(
"starting hierarchical_agglomerative_clustering " << std::endl);
 
  211   unsigned int N = distances.size();  
 
  214   std::vector<internal::list_cluster_id_distance> lists(N);
 
  218   std::vector<bool> active_list(N);
 
  219   std::fill(active_list.begin(), active_list.end(), 
true);
 
  221   for (
unsigned int n = 0; n < N; ++n) {
 
  222     for (
unsigned int i = 0; i < N; ++i) {
 
  224         lists[n].push_back(std::make_pair(i, distances[n][i]));
 
  233   LinkageFunction linkage_function;
 
  234   unsigned int steps = N - 1;  
 
  236   for (
unsigned int k = 0; k < steps; ++k) {
 
  239     double minimum_distance = std::numeric_limits<double>::max();
 
  242     for (
unsigned int j = 0; j < N; ++j) {
 
  243       if (active_list[j] == 
true) {
 
  245         if (lists[j].front().second < minimum_distance) {
 
  246           minimum_distance = lists[j].front().second;
 
  252     unsigned int l2 = lists[l1].front().first;
 
  253     minimum_distance = lists[l2].front().second;
 
  254     IMP_LOG_TERSE(
"step " << k << 
": joining clusters " << cluster_id[l1]
 
  255                           << 
" and " << cluster_id[l2] << 
" to form cluster " 
  256                           << cluster_set.get_id_for_cluster_at_step(k)
 
  257                           << 
" distance = " << minimum_distance << std::endl);
 
  260     active_list[l2] = 
false;
 
  261     cluster_id[l1] = cluster_set.get_id_for_cluster_at_step(k);
 
  265     for (
unsigned int i = 0; i < N; ++i) {
 
  266       IMP_LOG_TERSE(
"Updating list of distances " << i << std::endl);
 
  267       if (active_list[i] == 
true && i != l1) {
 
  270         lists[i].erase(std::remove_if(lists[i].begin(), lists[i].end(),
 
  271                                       internal::ListHasDistance(l1, l2)),
 
  274         double dist = linkage_function(cluster_id[l1], cluster_id[i],
 
  275                                        cluster_set, distances);
 
  276         IMP_LOG_TERSE(
"Distance by linkage function " << dist << std::endl);
 
  277         lists[i].push_back(std::make_pair(l1, dist));
 
  278         lists[l1].push_back(std::make_pair(i, dist));
 
  282     for (
unsigned int i = 0; i < N; ++i) {
 
  283       if (active_list[i] == 
true) {
 
  291 IMPEM2D_END_NAMESPACE
 
ClusterSet do_hierarchical_agglomerative_clustering(const FloatsList &distances)
Function to perform agglomerative clustering. 
 
Functor for hierarchical clustering based on single linkage. 
 
#define IMP_LOG_TERSE(expr)
 
Scoring functions for 2D Copyright 2007-2022 IMP Inventors. All rights reserved. 
 
void do_join_clusters(unsigned int cluster_id1, unsigned int cluster_id2, double distance_between_clusters)
join operation 
 
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values. 
 
A class to store the clusters generated during hierarchical clustering. 
 
std::ostream & show(Hierarchy h, std::ostream &out=std::cout)
Print the hierarchy using a given decorator to display each node. 
 
Functor for hierarchical clustering based on complete linkage. 
 
Functor for hierarchical clustering based on average-linkage. 
 
Logging and error reporting support. 
 
unsigned int get_number_of_steps() const 
Returns the number of steps of clustering recorded. 
 
Comparison of pairs by checking the second element.