-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkmeans_cpu.cpp
More file actions
90 lines (74 loc) · 2.83 KB
/
kmeans_cpu.cpp
File metadata and controls
90 lines (74 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include "kmeans.h"
#include "timer.h"
static inline double euclidean_distance_squared(double *p1, double *p2, int d) {
double sum = 0.0;
for (int i = 0; i < d; i++) {
double diff = p1[i] - p2[i];
sum += diff * diff;
}
return sum;
}
static bool has_converged(double *old_centers, double *new_centers, int k, int d, double threshold) {
double threshold_squared = threshold * threshold;
for (int i = 0; i < k; i++) {
double dist_squared = euclidean_distance_squared(&old_centers[i * d], &new_centers[i * d], d);
if (dist_squared > threshold_squared) return false;
}
return true;
}
kmeans_result kmeans_cpu(double *points, double *centers, int *assignments, kmeans_args *args) {
kmeans_result result = {0};
Timer timer;
double total_time = 0.0;
double *old_centers = (double *)malloc(args->k * args->d * sizeof(double));
double *new_centers = (double *)calloc(args->k * args->d, sizeof(double));
int *counts = (int *)calloc(args->k, sizeof(int));
int iter;
for (iter = 0; iter < args->max_iter; iter++) {
memcpy(old_centers, centers, args->k * args->d * sizeof(double));
memset(new_centers, 0, args->k * args->d * sizeof(double));
memset(counts, 0, args->k * sizeof(int));
timer_start(&timer);
for (int i = 0; i < args->n; i++) {
double min_dist = DBL_MAX;
int min_cluster = 0;
// find closet cluster
for (int j = 0; j < args->k; j++) {
double dist = 0.0;
for (int dim = 0; dim < args->d; dim++) {
double diff = points[i * args->d + dim] - centers[j * args->d + dim];
dist += diff * diff;
}
if (dist < min_dist) {
min_dist = dist;
min_cluster = j;
}
}
// accumulate
assignments[i] = min_cluster;
counts[min_cluster]++;
for (int dim = 0; dim < args->d; dim++) {
new_centers[min_cluster * args->d + dim] += points[i * args->d + dim];
}
}
// divide to get averages
for (int i = 0; i < args->k; i++) {
if (counts[i] > 0) {
for (int j = 0; j < args->d; j++) {
new_centers[i * args->d + j] /= counts[i];
}
}
}
memcpy(centers, new_centers, args->k * args->d * sizeof(double));
bool converged = has_converged(old_centers, centers, args->k, args->d, args->threshold);
timer_stop(&timer);
total_time += timer_elapsed_ms(&timer);
if (converged) break;
}
free(old_centers);
free(new_centers);
free(counts);
result.iterations = iter + 1;
result.total_time = total_time;
return result;
}