IMP logo
IMP Reference Guide  develop.7cb8855c60,2024/10/04
The Integrative Modeling Platform
point_clustering.h
Go to the documentation of this file.
1 /**
2  * \file IMP/statistics/point_clustering.h
3  * \brief Cluster sets of points.
4  *
5  * Copyright 2007-2022 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPSTATISTICS_POINT_CLUSTERING_H
10 #define IMPSTATISTICS_POINT_CLUSTERING_H
11 
12 #include <IMP/statistics/statistics_config.h>
14 #include "Embedding.h"
15 
16 IMPSTATISTICS_BEGIN_NAMESPACE
17 
18 /** Return a k-means clustering of all points contained in the
19  embedding (ie [0... embedding->get_number_of_embeddings())).
20  These points are then clustered into k clusters. More iterations
21  takes longer but produces a better clustering.
22 
23  The algorithm uses algebra::EuclideanVectorKDMetric for computing
24  distances between embeddings and cluster centers. This can be
25  parameterized if desired.
26 */
27 IMPSTATISTICSEXPORT PartitionalClusteringWithCenter *create_lloyds_kmeans(
28  Embedding *embedding, unsigned int k, unsigned int iterations);
29 
30 /** Two points, \f$p_i\f$, \f$p_j\f$ are in the same cluster if
31  there is a sequence of points \f$\left(p^{ij}_{0}\dots p^{ij}_k\right)\f$
32  such that \f$\forall l ||p^{ij}_l-p^{ij}_{l+1}|| < d\f$.
33  */
34 IMPSTATISTICSEXPORT PartitionalClusteringWithCenter *
35  create_connectivity_clustering(Embedding *embed, double dist);
36 
37 /** The space is grided with bins of side size and all points
38  that fall in the same grid bin are made part of the same cluster.
39 */
40 IMPSTATISTICSEXPORT PartitionalClusteringWithCenter *
41  create_bin_based_clustering(Embedding *embed, double side);
42 
43 /** Cluster by repeatedly removing edges which have lots
44  of shortest paths passing through them. The process is
45  terminated when there are a set number of
46  connected components. Other termination criteria
47  can be added if someone proposes them.
48  */
49 IMPSTATISTICSEXPORT PartitionalClustering *create_centrality_clustering(
50  Embedding *d, double far, int k);
51 
52 IMPSTATISTICS_END_NAMESPACE
53 
54 #endif /* IMPSTATISTICS_POINT_CLUSTERING_H */
PartitionalClustering * create_centrality_clustering(Embedding *d, double far, int k)
PartitionalClusteringWithCenter * create_lloyds_kmeans(Embedding *embedding, unsigned int k, unsigned int iterations)
PartitionalClusteringWithCenter * create_connectivity_clustering(Embedding *embed, double dist)
PartitionalClusteringWithCenter * create_bin_based_clustering(Embedding *embed, double side)
Cluster sets of points.
Cluster sets of points.