sum_points is not recognized as a reduction by Numba. Keep this issue alive to track the Numba issue and once the Numba issue is fixed then I presume we may need some work for the new reduction type on the bodo side.
import numpy as np
import time
from bodo import jit, prange
@jit
def kmeans(num_points, num_features, k, num_iterations):
data = np.fromfile("kmeans_data", dtype=np.float64)
data = np.ascontiguousarray(data[:num_points*num_features].reshape((num_points, num_features)))
centroids = np.linspace(0, 1, k * num_features).reshape(k, num_features)
centroids.tofile("orig_centroids.out")
labels = np.zeros(num_points, dtype=np.int32)
start_time = time.time()
for it in range(num_iterations):
# Assign labels based on closest centroid
for i in prange(num_points):
distances = np.array([np.linalg.norm(data[i] - c) for c in centroids])
labels[i] = np.argmin(distances)
sum_points = np.zeros((k, num_features))
count_points = np.zeros(k, dtype=np.int32)
for i in prange(num_points):
cluster_id = labels[i]
sum_points[cluster_id, :] += data[i, :]
count_points[cluster_id] += 1
for j in range(k):
if count_points[j] > 0:
centroids[j] = sum_points[j] / count_points[j]
end_time = time.time()
print("internal time", end_time - start_time)
sum_points.tofile("sum_points")
count_points.tofile("count_points")
centroids.tofile("centroids.out")
labels.tofile("labels.out")
sum_points is not recognized as a reduction by Numba. Keep this issue alive to track the Numba issue and once the Numba issue is fixed then I presume we may need some work for the new reduction type on the bodo side.
import numpy as np
import time
from bodo import jit, prange
@jit
def kmeans(num_points, num_features, k, num_iterations):
data = np.fromfile("kmeans_data", dtype=np.float64)
data = np.ascontiguousarray(data[:num_points*num_features].reshape((num_points, num_features)))
centroids = np.linspace(0, 1, k * num_features).reshape(k, num_features)
centroids.tofile("orig_centroids.out")
labels = np.zeros(num_points, dtype=np.int32)