erikbern
diff --git a/‎ann_benchmarks/__init__.py
Lines changed: 2 additions & 1 deletion b/‎ann_benchmarks/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎ann_benchmarks/algorithms/__init__.py b/‎ann_benchmarks/algorithms/__init__.py
diff --git a/‎ann_benchmarks/algorithms/annoy.py
Lines changed: 19 additions & 0 deletions b/‎ann_benchmarks/algorithms/annoy.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/balltree.py
Lines changed: 21 additions & 0 deletions b/‎ann_benchmarks/algorithms/balltree.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/base.py
Lines changed: 7 additions & 0 deletions b/‎ann_benchmarks/algorithms/base.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/bruteforce.py
Lines changed: 89 additions & 0 deletions b/‎ann_benchmarks/algorithms/bruteforce.py
Lines changed: 89 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/external.py
Lines changed: 75 additions & 0 deletions b/‎ann_benchmarks/algorithms/external.py
Lines changed: 75 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/falconn.py
Lines changed: 51 additions & 0 deletions b/‎ann_benchmarks/algorithms/falconn.py
Lines changed: 51 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/flann.py
Lines changed: 21 additions & 0 deletions b/‎ann_benchmarks/algorithms/flann.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/itu.py
Lines changed: 66 additions & 0 deletions b/‎ann_benchmarks/algorithms/itu.py
Lines changed: 66 additions & 0 deletions
diff --git a/‎ann_benchmarks/algorithms/kdtree.py
Lines changed: 21 additions & 0 deletions b/‎ann_benchmarks/algorithms/kdtree.py
Lines changed: 21 additions & 0 deletions
@@ -1 +1,2 @@
-from main import *
+from __future__ import absolute_import
+from ann_benchmarks.main import *
@@ -0,0 +1,19 @@
+from __future__ import absolute_import
+import annoy
+from ann_benchmarks.algorithms.base import BaseANN
+
+class Annoy(BaseANN):
+    def __init__(self, metric, n_trees, search_k):
+        self._n_trees = n_trees
+        self._search_k = search_k
+        self._metric = metric
+        self.name = 'Annoy(n_trees=%d, search_k=%d)' % (n_trees, search_k)
+
+    def fit(self, X):
+        self._annoy = annoy.AnnoyIndex(f=X.shape[1], metric=self._metric)
+        for i, x in enumerate(X):
+            self._annoy.add_item(i, x.tolist())
+        self._annoy.build(self._n_trees)
+
+    def query(self, v, n):
+        return self._annoy.get_nns_by_vector(v.tolist(), n, self._search_k)
@@ -0,0 +1,21 @@
+from __future__ import absolute_import
+import sklearn.neighbors
+import sklearn.preprocessing
+from ann_benchmarks.algorithms.base import BaseANN
+
+class BallTree(BaseANN):
+    def __init__(self, metric, leaf_size=20):
+        self.name = 'BallTree(leaf_size=%d)' % leaf_size
+        self._leaf_size = leaf_size
+        self._metric = metric
+
+    def fit(self, X):
+        if self._metric == 'angular':
+            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        self._tree = sklearn.neighbors.BallTree(X, leaf_size=self._leaf_size)
+
+    def query(self, v, n):
+        if self._metric == 'angular':
+            v = sklearn.preprocessing.normalize(v, axis=1, norm='l2')[0]
+        dist, ind = self._tree.query(v, k=n)
+        return ind[0]
@@ -0,0 +1,7 @@
+from __future__ import absolute_import
+
+class BaseANN(object):
+    def use_threads(self):
+        return True
+    def done(self):
+        pass
@@ -0,0 +1,89 @@
+from __future__ import absolute_import
+import numpy
+import sklearn.neighbors
+from ann_benchmarks.distance import metrics as pd
+from ann_benchmarks.algorithms.base import BaseANN
+
+class BruteForce(BaseANN):
+    def __init__(self, metric):
+        self._metric = metric
+        self.name = 'BruteForce()'
+
+    def fit(self, X):
+        metric = {'angular': 'cosine', 'euclidean': 'l2', 'hamming': 'hamming'}[self._metric]
+        self._nbrs = sklearn.neighbors.NearestNeighbors(algorithm='brute', metric=metric)
+        self._nbrs.fit(X)
+
+    def query(self, v, n):
+        return list(self._nbrs.kneighbors([v],
+            return_distance = False, n_neighbors = n)[0])
+
+    def query_with_distances(self, v, n):
+        (distances, positions) = self._nbrs.kneighbors([v],
+            return_distance = True, n_neighbors = n)
+        return zip(list(positions[0]), list(distances[0]))
+
+class BruteForceBLAS(BaseANN):
+    """kNN search that uses a linear scan = brute force."""
+    def __init__(self, metric, precision=numpy.float32):
+        if metric not in ('angular', 'euclidean', 'hamming'):
+            raise NotImplementedError("BruteForceBLAS doesn't support metric %s" % metric)
+        elif metric == 'hamming' and precision != numpy.bool:
+            raise NotImplementedError("BruteForceBLAS doesn't support precision %s with Hamming distances" % precision)
+        self._metric = metric
+        self._precision = precision
+        self.name = 'BruteForceBLAS()'
+
+    def fit(self, X):
+        """Initialize the search index."""
+        lens = (X ** 2).sum(-1)  # precompute (squared) length of each vector
+        if self._metric == 'angular':
+            X /= numpy.sqrt(lens)[..., numpy.newaxis]  # normalize index vectors to unit length
+            self.index = numpy.ascontiguousarray(X, dtype=self._precision)
+        elif self._metric == 'euclidean':
+            self.index = numpy.ascontiguousarray(X, dtype=self._precision)
+            self.lengths = numpy.ascontiguousarray(lens, dtype=self._precision)
+        elif self._metric == 'hamming':
+            self.index = numpy.ascontiguousarray(
+                map(numpy.packbits, X), dtype=numpy.uint8)
+        else:
+            assert False, "invalid metric"  # shouldn't get past the constructor!
+
+    def query(self, v, n):
+        return map(lambda (index, _): index, self.query_with_distances(v, n))
+
+    popcount = []
+    for i in xrange(256):
+      popcount.append(bin(i).count("1"))
+
+    def query_with_distances(self, v, n):
+        """Find indices of `n` most similar vectors from the index to query vector `v`."""
+        if self._metric == 'hamming':
+            v = numpy.packbits(v)
+
+        # use same precision for query as for index
+        v = numpy.ascontiguousarray(v, dtype = self.index.dtype)
+
+        # HACK we ignore query length as that's a constant not affecting the final ordering
+        if self._metric == 'angular':
+            # argmax_a cossim(a, b) = argmax_a dot(a, b) / |a||b| = argmin_a -dot(a, b)
+            dists = -numpy.dot(self.index, v)
+        elif self._metric == 'euclidean':
+            # argmin_a (a - b)^2 = argmin_a a^2 - 2ab + b^2 = argmin_a a^2 - 2ab
+            dists = self.lengths - 2 * numpy.dot(self.index, v)
+        elif self._metric == 'hamming':
+            diff = numpy.bitwise_xor(v, self.index)
+            pc = BruteForceBLAS.popcount
+            den = float(len(v) * 8)
+            dists = [sum([pc[part] for part in point]) / den for point in diff]
+        else:
+            assert False, "invalid metric"  # shouldn't get past the constructor!
+        indices = numpy.argpartition(dists, n)[:n]  # partition-sort by distance, get `n` closest
+        def fix(index):
+            ep = self.index[index]
+            ev = v
+            if self._metric == "hamming":
+                ep = numpy.unpackbits(ep)
+                ev = numpy.unpackbits(ev)
+            return (index, pd[self._metric](ep, ev))
+        return map(fix, indices)
@@ -0,0 +1,75 @@
+from __future__ import absolute_import
+import shlex
+import subprocess
+from ann_benchmarks.algorithms.base import BaseANN
+
+class Subprocess(BaseANN):
+    def __raw_line(self):
+        return shlex.split( \
+            self.__get_program_handle().stdout.readline().strip())
+    def __line(self):
+        line = self.__raw_line()
+        while len(line) < 1 or line[0] != "epbprtv0":
+            line = self.__raw_line()
+        return line[1:]
+
+    @staticmethod
+    def __quote(token):
+        return "'" + str(token).replace("'", "'\\'") + "'"
+
+    def __write(self, string):
+        self.__get_program_handle().stdin.write(string + "\n")
+
+    def __get_program_handle(self):
+        if not self._program:
+            self._program = subprocess.Popen(
+                self._args,
+                bufsize = 1, # line buffering
+                stdin = subprocess.PIPE,
+                stdout = subprocess.PIPE,
+                universal_newlines = True)
+            for key, value in self._params.iteritems():
+                self.__write("%s %s" % \
+                    (Subprocess.__quote(key), Subprocess.__quote(value)))
+                assert(self.__line()[0] == "ok")
+            self.__write("")
+            assert(self.__line()[0] == "ok")
+        return self._program
+
+    def __init__(self, args, encoder, params):
+        self.name = "Subprocess(program = %s, %s)" % (args[0], str(params))
+        self._program = None
+        self._args = args
+        self._encoder = encoder
+        self._params = params
+
+    def fit(self, X):
+        for entry in X:
+            self.__write(self._encoder(entry))
+            assert(self.__line()[0] == "ok")
+        self.__write("")
+        assert(self.__line()[0] == "ok")
+
+    def query(self, v, n):
+        self.__write("%s %d" % \
+            (Subprocess.__quote(self._encoder(v)), n))
+        status = self.__line()
+        if status[0] == "ok":
+            count = int(status[1])
+            results = []
+            i = 0
+            while i < count:
+                line = self.__line()
+                results.append(int(line[0]))
+                i += 1
+            assert(len(results) == count)
+            return results
+        else:
+            assert(status[0] == "fail")
+            return []
+
+    def use_threads(self):
+        return False
+    def done(self):
+        if self._program:
+            self._program.terminate()
@@ -0,0 +1,51 @@
+from __future__ import absolute_import
+import numpy
+import falconn
+from ann_benchmarks.algorithms.base import BaseANN
+
+class FALCONN(BaseANN):
+    def __init__(self, metric, num_bits, num_tables, num_probes = None):
+        if not num_probes:
+            num_probes = num_tables
+        self.name = 'FALCONN(K={}, L={}, T={})'.format(num_bits, num_tables, num_probes)
+        self._metric = metric
+        self._num_bits = num_bits
+        self._num_tables = num_tables
+        self._num_probes = num_probes
+        self._center = None
+        self._params = None
+        self._index = None
+        self._buf = None
+
+    def fit(self, X):
+        if X.dtype != numpy.float32:
+            X = X.astype(numpy.float32)
+        if self._metric == 'angular':
+            X /= numpy.linalg.norm(X, axis=1).reshape(-1,  1)
+        self._center = numpy.mean(X, axis=0)
+        X -= self._center
+        self._params = falconn.LSHConstructionParameters()
+        self._params.dimension = X.shape[1]
+        self._params.distance_function = 'euclidean_squared'
+        self._params.lsh_family = 'cross_polytope'
+        falconn.compute_number_of_hash_functions(self._num_bits, self._params)
+        self._params.l = self._num_tables
+        self._params.num_rotations = 1
+        self._params.num_setup_threads = 0
+        self._params.storage_hash_table = 'flat_hash_table'
+        self._params.seed = 95225714
+        self._index = falconn.LSHIndex(self._params)
+        self._index.setup(X)
+        self._index.set_num_probes(self._num_probes)
+        self._buf = numpy.zeros((X.shape[1],), dtype=numpy.float32)
+
+    def query(self, v, n):
+        numpy.copyto(self._buf, v)
+        if self._metric == 'angular':
+            self._buf /= numpy.linalg.norm(self._buf)
+        self._buf -= self._center
+        return self._index.find_k_nearest_neighbors(self._buf, n)
+
+    def use_threads(self):
+        # See https://github.com/FALCONN-LIB/FALCONN/issues/6
+        return False
@@ -0,0 +1,21 @@
+from __future__ import absolute_import
+import pyflann
+import sklearn.preprocessing
+from ann_benchmarks.algorithms.base import BaseANN
+
+class FLANN(BaseANN):
+    def __init__(self, metric, target_precision):
+        self._target_precision = target_precision
+        self.name = 'FLANN(target_precision=%f)' % target_precision
+        self._metric = metric
+
+    def fit(self, X):
+        self._flann = pyflann.FLANN(target_precision=self._target_precision, algorithm='autotuned', log_level='info')
+        if self._metric == 'angular':
+            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        self._flann.build_index(X)
+
+    def query(self, v, n):
+        if self._metric == 'angular':
+            v = sklearn.preprocessing.normalize(v, axis=1, norm='l2')[0]
+        return self._flann.nn_index(v, n)[0][0]
@@ -0,0 +1,66 @@
+from __future__ import absolute_import
+import sys
+sys.path.append('install/ann-filters/build/wrappers/swig/')
+import numpy
+import locality_sensitive
+from ann_benchmarks.algorithms.base import BaseANN
+
+class ITUFilteringDouble(BaseANN):
+    def __init__(self, metric, alpha = None, beta = None, threshold = None, tau = None, kappa1 = None, kappa2 = None, m1 = None, m2 = None):
+        self._loader = locality_sensitive.double_vector_loader()
+        self._context = None
+        self._strategy = None
+        self._metric = metric
+        self._alpha = alpha
+        self._beta = beta
+        self._threshold = threshold
+        self._tau = tau
+        self._kappa1 = kappa1
+        self._kappa2 = kappa2
+        self._m1 = m1
+        self._m2 = m2
+        self.name = ("ITUFilteringDouble(..., threshold = %f, ...)" % threshold)
+
+    def fit(self, X):
+        if self._metric == 'angular':
+            X /= numpy.linalg.norm(X, axis=1).reshape(-1,  1)
+        self._loader.add(X)
+        self._context = locality_sensitive.double_vector_context(
+            self._loader, self._alpha, self._beta)
+        self._strategy = locality_sensitive.factories.make_double_filtering(
+            self._context, self._threshold,
+            locality_sensitive.filtering_configuration.from_values(
+                self._kappa1, self._kappa2, self._tau, self._m1, self._m2))
+
+    def query(self, v, n):
+        if self._metric == 'angular':
+            v /= numpy.linalg.norm(v)
+        return self._strategy.find(v, n, None)
+
+    def use_threads(self):
+        return False
+
+class ITUHashing(BaseANN):
+    def __init__(self, seed, c = 2.0, r = 2.0):
+        self._loader = locality_sensitive.bit_vector_loader()
+        self._context = None
+        self._strategy = None
+        self._c = c
+        self._r = r
+        self._seed = seed
+        self.name = ("ITUHashing(c = %f, r = %f, seed = %u)" % (c, r, seed))
+
+    def fit(self, X):
+        locality_sensitive.set_seed(self._seed)
+        for entry in X:
+            locality_sensitive.hacks.add(self._loader, entry.tolist())
+        self._context = locality_sensitive.bit_vector_context(
+            self._loader, self._c, self._r)
+        self._strategy = locality_sensitive.factories.make_hashing(
+            self._context)
+
+    def query(self, v, n):
+        return locality_sensitive.hacks.find(self._strategy, n, v.tolist())
+
+    def use_threads(self):
+        return False
@@ -0,0 +1,21 @@
+from __future__ import absolute_import
+import sklearn.neighbors
+import sklearn.preprocessing
+from ann_benchmarks.algorithms.base import BaseANN
+
+class KDTree(BaseANN):
+    def __init__(self, metric, leaf_size=20):
+        self.name = 'KDTree(leaf_size=%d)' % leaf_size
+        self._leaf_size = leaf_size
+        self._metric = metric
+
+    def fit(self, X):
+        if self._metric == 'angular':
+            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+        self._tree = sklearn.neighbors.KDTree(X, leaf_size=self._leaf_size)
+
+    def query(self, v, n):
+        if self._metric == 'angular':
+            v = sklearn.preprocessing.normalize(v, axis=1, norm='l2')[0]
+        dist, ind = self._tree.query(v, k=n)
+        return ind[0]
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-from main import *`
	`1`	`+from __future__ import absolute_import`
	`2`	`+from ann_benchmarks.main import *`