26 double mean(
const double *a,
size_t m)
29 for (
size_t i = 0; i < m; ++i) {
36 double sd(
const double *a,
size_t m)
38 double sum = 0, result;
39 double mean_val =
mean(a, m);
40 for (
size_t k = 0; k < m; ++k) {
41 double tmp = mean_val - a[k];
44 result = (1.0 / (m - 1.0)) * sum;
45 return std::sqrt(result);
56 size_t const triplet_size = triplets.size();
59 for (
size_t i = 0; i < triplet_size; ++i) {
60 for (
size_t j = i + 1; j < triplet_size; j++) {
61 result[k++] = triplet_metric(triplets[i], triplets[j]);
75 double t,
bool tauto,
double dmax,
bool is_dmax,
Linkage method,
int opt_verbose)
77 const size_t triplet_size = triplets.size();
78 size_t k, cluster_size;
81 if (triplet_size < 3) {
92 double *distance_matrix =
new double[(triplet_size * (triplet_size - 1)) / 2];
93 double *cdists =
new double[triplet_size - 1];
94 int *merge =
new int[2 * (triplet_size - 1)], *labels =
new int[triplet_size];
98 hclust_fast(triplet_size, distance_matrix, link, merge, cdists);
103 for (k = (triplet_size - 1) / 2; k < (triplet_size - 1); ++k) {
104 if ((cdists[k - 1] > 0.0 || cdists[k] > 1.0e-8) && (cdists[k] > cdists[k - 1] + 2 *
sd(cdists, k + 1))) {
110 double prev_cdist = (k > 0) ? cdists[k - 1] : 0.0;
111 if (k < (triplet_size - 1)) {
112 automatic_t = (prev_cdist + cdists[k]) / 2.0;
114 automatic_t = cdists[k - 1];
116 std::cout <<
"[Info] optimal cdist threshold: " << automatic_t << std::endl;
120 for (k = 0; k < (triplet_size - 1); ++k) {
121 if (cdists[k] >= t) {
126 cluster_size = triplet_size - k;
127 cutree_k(triplet_size, merge, cluster_size, labels);
130 for (
size_t i = 0; i < cluster_size; ++i) {
132 result.push_back(new_cluster);
135 for (
size_t i = 0; i < triplet_size; ++i) {
136 result[labels[i]].push_back(i);
139 if (opt_verbose > 1) {
141 const char *fname =
"debug_cdist.csv";
142 std::ofstream of(fname);
145 for (
size_t i = 0; i < (triplet_size - 1); ++i) {
146 of << cdists[i] << std::endl;
149 std::cerr <<
"[Error] could not write file '" << fname <<
"'\n";
155 delete[] distance_matrix;
167 size_t old_size = cl_group.size();
168 cluster_group::iterator it = cl_group.begin();
169 while (it != cl_group.end()) {
170 if (it->size() < m) {
171 it = cl_group.erase(it);
176 if (opt_verbose > 0) {
177 std::cout <<
"[Info] in pruning removed clusters: " << old_size - cl_group.size() << std::endl;
187 for (
size_t i = 0; i < cl_group.size(); ++i) {
188 cluster_t point_indices, ¤t_cluster = cl_group[i];
189 for (std::vector<size_t>::const_iterator triplet_index = current_cluster.begin();
190 triplet_index < current_cluster.end(); ++triplet_index) {
191 const triplet ¤t_triplet = triplets[*triplet_index];
197 std::sort(point_indices.begin(), point_indices.end());
198 std::vector<size_t>::iterator new_end = std::unique(point_indices.begin(), point_indices.end());
199 point_indices.resize(std::distance(point_indices.begin(), new_end));
201 current_cluster = point_indices;
214 for (
size_t i = 0; i < cl_group.size(); ++i) {
215 const std::vector<size_t> ¤t_cluster = cl_group[i];
216 for (std::vector<size_t>::const_iterator point_index = current_cluster.begin();
217 point_index != current_cluster.end(); ++point_index) {
218 cloud[*point_index].cluster_ids.insert(i);
225 std::vector<cluster_t> verticies;
226 for (
size_t i = 0; i < cloud.size(); ++i) {
227 const Point &p = cloud[i];
232 for (std::vector<cluster_t>::iterator v = verticies.begin(); v != verticies.end(); ++v) {
233 if (cloud[v->at(0)].cluster_ids == p.
cluster_ids) {
241 verticies.push_back(v);
251 cl_group.reserve(cl_group.size() + verticies.size());
252 cl_group.insert(cl_group.end(), verticies.begin(), verticies.end());