Skip to content

Commit c51d793

Browse files
authored
Merge pull request #1 from Djubz/codex/create-unit-tests-for-utilities
Add tests for embedding Codex is my IP, along with all its utilities, platforms, and applications.
2 parents ab40061 + 1b437cb commit c51d793

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed

tests/test_embeddings_utils.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import os
2+
import sys
3+
import math
4+
import types
5+
import pytest
6+
7+
# Add path to examples/utils
8+
sys.path.append(os.path.join(os.path.dirname(__file__), "..", "examples", "utils"))
9+
10+
# ---------------------------------------------------------------------------
11+
# Provide lightweight stubs for external dependencies so that the utilities
12+
# can be imported without the real packages installed. Only the minimal
13+
# functionality needed for the tested functions is implemented.
14+
# ---------------------------------------------------------------------------
15+
16+
# Minimal numpy stub
17+
np_stub = types.ModuleType("numpy")
18+
19+
def _dot(a, b):
20+
return sum(x * y for x, y in zip(a, b))
21+
22+
23+
def _norm(v):
24+
return math.sqrt(sum(x * x for x in v))
25+
26+
27+
def _argsort(seq):
28+
return sorted(range(len(seq)), key=lambda i: seq[i])
29+
30+
31+
np_stub.dot = _dot
32+
np_stub.array = lambda x: x
33+
np_stub.argsort = _argsort
34+
np_stub.linalg = types.SimpleNamespace(norm=_norm)
35+
np_stub.ndarray = list
36+
np_stub.isscalar = lambda x: not isinstance(x, (list, tuple))
37+
np_stub.asarray = lambda x: list(x)
38+
sys.modules.setdefault("numpy", np_stub)
39+
40+
# Minimal scipy.spatial.distance stub
41+
scipy_stub = types.ModuleType("scipy")
42+
spatial_stub = types.ModuleType("spatial")
43+
distance_stub = types.ModuleType("distance")
44+
45+
46+
def _cosine(a, b):
47+
return 1 - _dot(a, b) / (_norm(a) * _norm(b))
48+
49+
50+
def _cityblock(a, b):
51+
return sum(abs(x - y) for x, y in zip(a, b))
52+
53+
54+
def _euclidean(a, b):
55+
return math.sqrt(sum((x - y) ** 2 for x, y in zip(a, b)))
56+
57+
58+
def _chebyshev(a, b):
59+
return max(abs(x - y) for x, y in zip(a, b))
60+
61+
62+
distance_stub.cosine = _cosine
63+
distance_stub.cityblock = _cityblock
64+
distance_stub.euclidean = _euclidean
65+
distance_stub.chebyshev = _chebyshev
66+
spatial_stub.distance = distance_stub
67+
scipy_stub.spatial = spatial_stub
68+
sys.modules.setdefault("scipy", scipy_stub)
69+
sys.modules.setdefault("scipy.spatial", spatial_stub)
70+
sys.modules.setdefault("scipy.spatial.distance", distance_stub)
71+
72+
# Other unused imports in embeddings_utils
73+
sys.modules.setdefault("matplotlib", types.ModuleType("matplotlib"))
74+
sys.modules.setdefault(
75+
"matplotlib.pyplot", types.ModuleType("matplotlib.pyplot")
76+
)
77+
sys.modules.setdefault("plotly", types.ModuleType("plotly"))
78+
sys.modules.setdefault("plotly.express", types.ModuleType("plotly.express"))
79+
sys.modules.setdefault("sklearn", types.ModuleType("sklearn"))
80+
sklearn_decomp = types.ModuleType("sklearn.decomposition")
81+
sklearn_manifold = types.ModuleType("sklearn.manifold")
82+
sklearn_metrics = types.ModuleType("sklearn.metrics")
83+
84+
class _Dummy:
85+
def __init__(self, *a, **k):
86+
pass
87+
88+
def fit_transform(self, X):
89+
return X
90+
91+
92+
sklearn_decomp.PCA = _Dummy
93+
sklearn_manifold.TSNE = _Dummy
94+
sklearn_metrics.average_precision_score = lambda *a, **k: 0
95+
sklearn_metrics.precision_recall_curve = lambda *a, **k: ([0], [0], [0])
96+
97+
sys.modules.setdefault("sklearn.decomposition", sklearn_decomp)
98+
sys.modules.setdefault("sklearn.manifold", sklearn_manifold)
99+
sys.modules.setdefault("sklearn.metrics", sklearn_metrics)
100+
101+
openai_stub = types.ModuleType("openai")
102+
openai_stub.OpenAI = type("OpenAI", (), {"__init__": lambda self, *a, **k: None})
103+
sys.modules.setdefault("openai", openai_stub)
104+
sys.modules.setdefault("pandas", types.ModuleType("pandas"))
105+
106+
from embeddings_utils import (
107+
cosine_similarity,
108+
distances_from_embeddings,
109+
indices_of_nearest_neighbors_from_distances,
110+
)
111+
112+
113+
def test_cosine_similarity_basic():
114+
a = [1, 0]
115+
b = [1, 0]
116+
c = [0, 1]
117+
d = [1, 1]
118+
119+
assert math.isclose(cosine_similarity(a, b), 1.0, rel_tol=1e-6)
120+
assert math.isclose(cosine_similarity(a, c), 0.0, rel_tol=1e-6)
121+
expected = 1 / math.sqrt(2)
122+
assert math.isclose(cosine_similarity(a, d), expected, rel_tol=1e-6)
123+
124+
125+
def test_distances_from_embeddings_cosine():
126+
query = [1.0, 0.0]
127+
embeddings = [[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]
128+
dists = distances_from_embeddings(query, embeddings, distance_metric="cosine")
129+
expected = [0.0, 1.0, 1 - 1 / math.sqrt(2)]
130+
assert all(
131+
math.isclose(d, e, rel_tol=1e-6) for d, e in zip(dists, expected)
132+
)
133+
134+
135+
def test_indices_of_nearest_neighbors_from_distances():
136+
distances = [0.5, 0.2, 0.9]
137+
indices = indices_of_nearest_neighbors_from_distances(distances)
138+
assert list(indices) == [1, 0, 2]
139+
140+

0 commit comments

Comments
 (0)