ATTPCROOT  0.3.0-alpha
A ROOT-based framework for analyzing data from active target detectors
fastcluster.cxx
Go to the documentation of this file.
1 //
2 // C++ standalone verion of fastcluster by Daniel Müllner
3 //
4 // Copyright: Christoph Dalitz, 2018
5 // Daniel Müllner, 2011
6 // License: BSD style license
7 // (see the file LICENSE for details)
8 //
9 
10 #include "fastcluster.h"
11 
12 #include <algorithm>
13 #include <vector>
14 
15 // Code by Daniel Müllner
16 // workaround to make it usable as a standalone version (without R)
17 bool fc_isnan(double x)
18 {
19  return false;
20 }
21 // clang-format off
22 #include "fastcluster_dm.cxx" // These headers need to be included in this spefic order
23 #include "fastcluster_R_dm.cxx"
24 // clang-format on
25 
26 //
27 // Assigns cluster labels (0, ..., nclust-1) to the n points such
28 // that the cluster result is split into nclust clusters.
29 //
30 // Input arguments:
31 // n = number of observables
32 // merge = clustering result in R format
33 // nclust = number of clusters
34 // Output arguments:
35 // labels = allocated integer array of size n for result
36 //
37 void cutree_k(int n, const int *merge, int nclust, int *labels)
38 {
39 
40  int k, m1, m2, j, l;
41 
42  if (nclust > n || nclust < 2) {
43  for (j = 0; j < n; j++)
44  labels[j] = 0;
45  return;
46  }
47 
48  // assign to each observable the number of its last merge step
49  // beware: indices of observables in merge start at 1 (R convention)
50  std::vector<int> last_merge(n, 0);
51  for (k = 1; k <= (n - nclust); k++) {
52  // (m1,m2) = merge[k,]
53  m1 = merge[k - 1];
54  m2 = merge[n - 1 + k - 1];
55  if (m1 < 0 && m2 < 0) { // both single observables
56  last_merge[-m1 - 1] = last_merge[-m2 - 1] = k;
57  } else if (m1 < 0 || m2 < 0) { // one is a cluster
58  if (m1 < 0) {
59  j = -m1;
60  m1 = m2;
61  } else
62  j = -m2;
63  // merging single observable and cluster
64  for (l = 0; l < n; l++)
65  if (last_merge[l] == m1)
66  last_merge[l] = k;
67  last_merge[j - 1] = k;
68  } else { // both cluster
69  for (l = 0; l < n; l++) {
70  if (last_merge[l] == m1 || last_merge[l] == m2)
71  last_merge[l] = k;
72  }
73  }
74  }
75 
76  // assign cluster labels
77  int label = 0;
78  std::vector<int> z(n, -1);
79  for (j = 0; j < n; j++) {
80  if (last_merge[j] == 0) { // still singleton
81  labels[j] = label++;
82  } else {
83  if (z[last_merge[j]] < 0) {
84  z[last_merge[j]] = label++;
85  }
86  labels[j] = z[last_merge[j]];
87  }
88  }
89 }
90 
91 //
92 // Assigns cluster labels (0, ..., nclust-1) to the n points such
93 // that the hierarchical clustering is stopped when cluster distance >= cdist
94 //
95 // Input arguments:
96 // n = number of observables
97 // merge = clustering result in R format
98 // height = cluster distance at each merge step
99 // cdist = cutoff cluster distance
100 // Output arguments:
101 // labels = allocated integer array of size n for result
102 //
103 void cutree_cdist(int n, const int *merge, double *height, double cdist, int *labels)
104 {
105 
106  int k;
107 
108  for (k = 0; k < (n - 1); k++) {
109  if (height[k] >= cdist) {
110  break;
111  }
112  }
113  cutree_k(n, merge, n - k, labels);
114 }
115 
116 //
117 // Hierarchical clustering with one of Daniel Muellner's fast algorithms
118 //
119 // Input arguments:
120 // n = number of observables
121 // distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
122 // the upper triangle (without diagonal elements) of the distance
123 // matrix, e.g. for n=4:
124 // d00 d01 d02 d03
125 // d10 d11 d12 d13 -> d02 d02 d03 d12 d13 d23
126 // d20 d21 d22 d23
127 // d30 d31 d32 d33
128 // method = cluster metric (see enum method_code)
129 // Output arguments:
130 // merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
131 // Result follows R hclust convention:
132 // - observabe indices start with one
133 // - merge[i][] contains the merged nodes in step i
134 // - merge[i][j] is negative when the node is an atom
135 // height = allocated (n-1) array with distances at each merge step
136 // Return code:
137 // 0 = ok
138 // 1 = invalid method
139 //
140 int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
141 {
142 
143  // call appropriate culstering function
144  cluster_result Z2(n - 1);
145  if (method == HCLUST_METHOD_SINGLE) {
146  // single link
147  MST_linkage_core(n, distmat, Z2);
148  } else if (method == HCLUST_METHOD_COMPLETE) {
149  // complete link
150  NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, NULL, Z2);
151  } else if (method == HCLUST_METHOD_AVERAGE) {
152  // best average distance
153  double *members = new double[n];
154  for (int i = 0; i < n; i++)
155  members[i] = 1;
156  NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2);
157  delete[] members;
158  } else if (method == HCLUST_METHOD_MEDIAN) {
159  // best median distance (beware: O(n^3))
160  generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, NULL, Z2);
161  } else {
162  return 1;
163  }
164 
165  int *order = new int[n];
166  if (method == HCLUST_METHOD_MEDIAN) {
167  generate_R_dendrogram<true>(merge, height, order, Z2, n);
168  } else {
169  generate_R_dendrogram<false>(merge, height, order, Z2, n);
170  }
171 
172  delete[] order; // only needed for visualization
173 
174  return 0;
175 }
fastcluster_dm.cxx
fc_isnan
bool fc_isnan(double x)
Definition: fastcluster.cxx:17
cluster_result
Definition: fastcluster_dm.cxx:223
HCLUST_METHOD_AVERAGE
@ HCLUST_METHOD_AVERAGE
Definition: fastcluster.h:68
HCLUST_METHOD_MEDIAN
@ HCLUST_METHOD_MEDIAN
Definition: fastcluster.h:69
fastcluster.h
fastcluster_R_dm.cxx
cutree_k
void cutree_k(int n, const int *merge, int nclust, int *labels)
Definition: fastcluster.cxx:38
cutree_cdist
void cutree_cdist(int n, const int *merge, double *height, double cdist, int *labels)
Definition: fastcluster.cxx:104
hclust_fast
int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
Definition: fastcluster.cxx:141
HCLUST_METHOD_COMPLETE
@ HCLUST_METHOD_COMPLETE
Definition: fastcluster.h:67
HCLUST_METHOD_SINGLE
@ HCLUST_METHOD_SINGLE
Definition: fastcluster.h:66