ATTPCROOT  0.3.0-alpha
A ROOT-based framework for analyzing data from active target detectors
fastcluster.cxx
Go to the documentation of this file.
1 //
2 // C++ standalone verion of fastcluster by Daniel Müllner
3 //
4 // Copyright: Christoph Dalitz, 2018
5 // Daniel Müllner, 2011
6 // License: BSD style license
7 // (see the file LICENSE for details)
8 //
9 
10 #include "fastcluster.h"
11 
12 #include <algorithm>
13 #include <vector>
14 
15 // Code by Daniel Müllner
16 // workaround to make it usable as a standalone version (without R)
17 bool fc_isnan(double x)
18 {
19  return false;
20 }
21 
22 // clang-format off
23 #include "fastcluster_dm.cxx"
24 #include "fastcluster_R_dm.cxx"
25 // clang-format on
26 
27 //
28 // Assigns cluster labels (0, ..., nclust-1) to the n points such
29 // that the cluster result is split into nclust clusters.
30 //
31 // Input arguments:
32 // n = number of observables
33 // merge = clustering result in R format
34 // nclust = number of clusters
35 // Output arguments:
36 // labels = allocated integer array of size n for result
37 //
38 void cutree_k(int n, const int *merge, int nclust, int *labels)
39 {
40 
41  int k, m1, m2, j, l;
42 
43  if (nclust > n || nclust < 2) {
44  for (j = 0; j < n; j++)
45  labels[j] = 0;
46  return;
47  }
48 
49  // assign to each observable the number of its last merge step
50  // beware: indices of observables in merge start at 1 (R convention)
51  std::vector<int> last_merge(n, 0);
52  for (k = 1; k <= (n - nclust); k++) {
53  // (m1,m2) = merge[k,]
54  m1 = merge[k - 1];
55  m2 = merge[n - 1 + k - 1];
56  if (m1 < 0 && m2 < 0) { // both single observables
57  last_merge[-m1 - 1] = last_merge[-m2 - 1] = k;
58  } else if (m1 < 0 || m2 < 0) { // one is a cluster
59  if (m1 < 0) {
60  j = -m1;
61  m1 = m2;
62  } else
63  j = -m2;
64  // merging single observable and cluster
65  for (l = 0; l < n; l++)
66  if (last_merge[l] == m1)
67  last_merge[l] = k;
68  last_merge[j - 1] = k;
69  } else { // both cluster
70  for (l = 0; l < n; l++) {
71  if (last_merge[l] == m1 || last_merge[l] == m2)
72  last_merge[l] = k;
73  }
74  }
75  }
76 
77  // assign cluster labels
78  int label = 0;
79  std::vector<int> z(n, -1);
80  for (j = 0; j < n; j++) {
81  if (last_merge[j] == 0) { // still singleton
82  labels[j] = label++;
83  } else {
84  if (z[last_merge[j]] < 0) {
85  z[last_merge[j]] = label++;
86  }
87  labels[j] = z[last_merge[j]];
88  }
89  }
90 }
91 
92 //
93 // Assigns cluster labels (0, ..., nclust-1) to the n points such
94 // that the hierarchical clustering is stopped when cluster distance >= cdist
95 //
96 // Input arguments:
97 // n = number of observables
98 // merge = clustering result in R format
99 // height = cluster distance at each merge step
100 // cdist = cutoff cluster distance
101 // Output arguments:
102 // labels = allocated integer array of size n for result
103 //
104 void cutree_cdist(int n, const int *merge, double *height, double cdist, int *labels)
105 {
106 
107  int k;
108 
109  for (k = 0; k < (n - 1); k++) {
110  if (height[k] >= cdist) {
111  break;
112  }
113  }
114  cutree_k(n, merge, n - k, labels);
115 }
116 
117 //
118 // Hierarchical clustering with one of Daniel Muellner's fast algorithms
119 //
120 // Input arguments:
121 // n = number of observables
122 // distmat = condensed distance matrix, i.e. an n*(n-1)/2 array representing
123 // the upper triangle (without diagonal elements) of the distance
124 // matrix, e.g. for n=4:
125 // d00 d01 d02 d03
126 // d10 d11 d12 d13 -> d02 d02 d03 d12 d13 d23
127 // d20 d21 d22 d23
128 // d30 d31 d32 d33
129 // method = cluster metric (see enum method_code)
130 // Output arguments:
131 // merge = allocated (n-1)x2 matrix (2*(n-1) array) for storing result.
132 // Result follows R hclust convention:
133 // - observabe indices start with one
134 // - merge[i][] contains the merged nodes in step i
135 // - merge[i][j] is negative when the node is an atom
136 // height = allocated (n-1) array with distances at each merge step
137 // Return code:
138 // 0 = ok
139 // 1 = invalid method
140 //
141 int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
142 {
143 
144  // call appropriate culstering function
145  cluster_result Z2(n - 1);
146  if (method == HCLUST_METHOD_SINGLE) {
147  // single link
148  MST_linkage_core(n, distmat, Z2);
149  } else if (method == HCLUST_METHOD_COMPLETE) {
150  // complete link
151  NN_chain_core<METHOD_METR_COMPLETE, t_float>(n, distmat, nullptr, Z2);
152  } else if (method == HCLUST_METHOD_AVERAGE) {
153  // best average distance
154  auto *members = new double[n]; // NOLINT
155  for (int i = 0; i < n; i++)
156  members[i] = 1;
157  NN_chain_core<METHOD_METR_AVERAGE, t_float>(n, distmat, members, Z2);
158  delete[] members; // NOLINT
159  } else if (method == HCLUST_METHOD_MEDIAN) {
160  // best median distance (beware: O(n^3))
161  generic_linkage<METHOD_METR_MEDIAN, t_float>(n, distmat, nullptr, Z2);
162  } else {
163  return 1;
164  }
165 
166  int *order = new int[n]; // NOLINT
167  if (method == HCLUST_METHOD_MEDIAN) {
168  generate_R_dendrogram<true>(merge, height, order, Z2, n);
169  } else {
170  generate_R_dendrogram<false>(merge, height, order, Z2, n);
171  }
172 
173  delete[] order; // NOLINT only needed for visualization
174 
175  return 0;
176 }
fastcluster_dm.cxx
fc_isnan
bool fc_isnan(double x)
Definition: fastcluster.cxx:17
cluster_result
Definition: fastcluster_dm.cxx:223
HCLUST_METHOD_AVERAGE
@ HCLUST_METHOD_AVERAGE
Definition: fastcluster.h:68
HCLUST_METHOD_MEDIAN
@ HCLUST_METHOD_MEDIAN
Definition: fastcluster.h:69
fastcluster.h
fastcluster_R_dm.cxx
cutree_k
void cutree_k(int n, const int *merge, int nclust, int *labels)
Definition: fastcluster.cxx:38
cutree_cdist
void cutree_cdist(int n, const int *merge, double *height, double cdist, int *labels)
Definition: fastcluster.cxx:104
hclust_fast
int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
Definition: fastcluster.cxx:141
HCLUST_METHOD_COMPLETE
@ HCLUST_METHOD_COMPLETE
Definition: fastcluster.h:67
HCLUST_METHOD_SINGLE
@ HCLUST_METHOD_SINGLE
Definition: fastcluster.h:66