Skip to content

Commit f229f45

Browse files
authored
Add grpc unit test run, expand testing of VectorFactory (#326)
## Problem There is a small number of unit tests that should only get run when grpc dependencies are installed. These were previously omitted from CI by mistake. ## Solution Add a step to run these grpc steps. Make it conditional on the `use_grpc` test matrix param. ## Type of Change - [x] Infrastructure change (CI configs, etc)
1 parent 58ea88c commit f229f45

13 files changed

+449
-237
lines changed

.github/workflows/testing-unit.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ jobs:
77
name: Unit tests
88
runs-on: ubuntu-latest
99
strategy:
10+
fail-fast: false
1011
matrix:
1112
python-version:
1213
- 3.8
@@ -28,8 +29,11 @@ jobs:
2829
with:
2930
include_grpc: '${{ matrix.use_grpc }}'
3031
include_types: true
31-
- name: Run unit tests
32+
- name: Run unit tests (REST)
3233
run: poetry run pytest --cov=pinecone --timeout=120 tests/unit
34+
- name: Run unit tests (GRPC)
35+
if: ${{ matrix.use_grpc == true }}
36+
run: poetry run pytest --cov=pinecone/grpc --timeout=120 tests/unit_grpc
3337
- name: mypy check
3438
env:
3539
INCLUDE_GRPC: '${{ matrix.use_grpc }}'

pinecone/data/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .index import *
2-
from .vector_factory import (
2+
from .errors import (
33
VectorDictionaryMissingKeysError,
44
VectorDictionaryExcessKeysError,
55
VectorTupleLengthError,

pinecone/data/errors.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from ..utils.constants import REQUIRED_VECTOR_FIELDS, OPTIONAL_VECTOR_FIELDS
2+
3+
class VectorDictionaryMissingKeysError(ValueError):
4+
def __init__(self, item):
5+
message = f"Vector dictionary is missing required fields: {list(REQUIRED_VECTOR_FIELDS - set(item.keys()))}"
6+
super().__init__(message)
7+
8+
class VectorDictionaryExcessKeysError(ValueError):
9+
def __init__(self, item):
10+
invalid_keys = list(set(item.keys()) - (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS))
11+
message = f"Found excess keys in the vector dictionary: {invalid_keys}. The allowed keys are: {list(REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS)}"
12+
super().__init__(message)
13+
14+
class VectorTupleLengthError(ValueError):
15+
def __init__(self, item):
16+
message = f"Found a tuple of length {len(item)} which is not supported. Vectors can be represented as tuples either the form (id, values, metadata) or (id, values). To pass sparse values please use either dicts or Vector objects as inputs."
17+
super().__init__(message)
18+
19+
class SparseValuesTypeError(ValueError, TypeError):
20+
def __init__(self):
21+
message = "Found unexpected data in column `sparse_values`. Expected format is `'sparse_values': {'indices': List[int], 'values': List[float]}`."
22+
super().__init__(message)
23+
24+
class SparseValuesMissingKeysError(ValueError):
25+
def __init__(self, sparse_values_dict):
26+
message = f"Missing required keys in data in column `sparse_values`. Expected format is `'sparse_values': {{'indices': List[int], 'values': List[float]}}`. Found keys {list(sparse_values_dict.keys())}"
27+
super().__init__(message)
28+
29+
class SparseValuesDictionaryExpectedError(ValueError, TypeError):
30+
def __init__(self, sparse_values_dict):
31+
message = f"Column `sparse_values` is expected to be a dictionary, found {type(sparse_values_dict)}"
32+
super().__init__(message)
33+
34+
class MetadataDictionaryExpectedError(ValueError, TypeError):
35+
def __init__(self, item):
36+
message = f"Column `metadata` is expected to be a dictionary, found {type(item['metadata'])}"
37+
super().__init__(message)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import numbers
2+
3+
from collections.abc import Mapping
4+
from typing import Union, Dict
5+
6+
from ..utils import convert_to_list
7+
8+
from .errors import (
9+
SparseValuesTypeError,
10+
SparseValuesMissingKeysError,
11+
SparseValuesDictionaryExpectedError
12+
)
13+
14+
from pinecone.core.client.models import (
15+
SparseValues
16+
)
17+
18+
class SparseValuesFactory:
19+
@staticmethod
20+
def build(input: Union[Dict, SparseValues]) -> SparseValues:
21+
if input is None:
22+
return input
23+
if isinstance(input, SparseValues):
24+
return input
25+
if not isinstance(input, Mapping):
26+
raise SparseValuesDictionaryExpectedError(input)
27+
if not {"indices", "values"}.issubset(input):
28+
raise SparseValuesMissingKeysError(input)
29+
30+
indices = SparseValuesFactory._convert_to_list(input.get("indices"), int)
31+
values = SparseValuesFactory._convert_to_list(input.get("values"), float)
32+
33+
if len(indices) != len(values):
34+
raise ValueError("Sparse values indices and values must have the same length")
35+
36+
try:
37+
return SparseValues(indices=indices, values=values)
38+
except TypeError as e:
39+
raise SparseValuesTypeError() from e
40+
41+
@staticmethod
42+
def _convert_to_list(input, expected_type):
43+
try:
44+
converted = convert_to_list(input)
45+
except TypeError as e:
46+
raise SparseValuesTypeError() from e
47+
48+
SparseValuesFactory._validate_list_items_type(converted, expected_type)
49+
return converted
50+
51+
@staticmethod
52+
def _validate_list_items_type(input, expected_type):
53+
if len(input) > 0 and not isinstance(input[0], expected_type):
54+
raise SparseValuesTypeError()

pinecone/data/vector_factory.py

Lines changed: 11 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5,47 +5,19 @@
55

66
from ..utils import fix_tuple_length, convert_to_list
77
from ..utils.constants import REQUIRED_VECTOR_FIELDS, OPTIONAL_VECTOR_FIELDS
8+
from .sparse_vector_factory import SparseValuesFactory
89

910
from pinecone.core.client.models import (
1011
Vector,
1112
SparseValues
1213
)
1314

14-
class VectorDictionaryMissingKeysError(ValueError):
15-
def __init__(self, item):
16-
message = f"Vector dictionary is missing required fields: {list(REQUIRED_VECTOR_FIELDS - set(item.keys()))}"
17-
super().__init__(message)
18-
19-
class VectorDictionaryExcessKeysError(ValueError):
20-
def __init__(self, item):
21-
invalid_keys = list(set(item.keys()) - (REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS))
22-
message = f"Found excess keys in the vector dictionary: {invalid_keys}. The allowed keys are: {list(REQUIRED_VECTOR_FIELDS | OPTIONAL_VECTOR_FIELDS)}"
23-
super().__init__(message)
24-
25-
class VectorTupleLengthError(ValueError):
26-
def __init__(self, item):
27-
message = f"Found a tuple of length {len(item)} which is not supported. Vectors can be represented as tuples either the form (id, values, metadata) or (id, values). To pass sparse values please use either dicts or Vector objects as inputs."
28-
super().__init__(message)
29-
30-
class SparseValuesTypeError(ValueError, TypeError):
31-
def __init__(self):
32-
message = "Found unexpected data in column `sparse_values`. Expected format is `'sparse_values': {'indices': List[int], 'values': List[float]}`."
33-
super().__init__(message)
34-
35-
class SparseValuesMissingKeysError(ValueError):
36-
def __init__(self, sparse_values_dict):
37-
message = f"Missing required keys in data in column `sparse_values`. Expected format is `'sparse_values': {{'indices': List[int], 'values': List[float]}}`. Found keys {list(sparse_values_dict.keys())}"
38-
super().__init__(message)
39-
40-
class SparseValuesDictionaryExpectedError(ValueError, TypeError):
41-
def __init__(self, sparse_values_dict):
42-
message = f"Column `sparse_values` is expected to be a dictionary, found {type(sparse_values_dict)}"
43-
super().__init__(message)
44-
45-
class MetadataDictionaryExpectedError(ValueError, TypeError):
46-
def __init__(self, item):
47-
message = f"Column `metadata` is expected to be a dictionary, found {type(item['metadata'])}"
48-
super().__init__(message)
15+
from .errors import (
16+
VectorDictionaryMissingKeysError,
17+
VectorDictionaryExcessKeysError,
18+
VectorTupleLengthError,
19+
MetadataDictionaryExpectedError,
20+
)
4921

5022
class VectorFactory:
5123
@staticmethod
@@ -84,8 +56,10 @@ def _dict_to_vector(item, check_type: bool) -> Vector:
8456
item["values"] = convert_to_list(values)
8557

8658
sparse_values = item.get("sparse_values")
87-
if sparse_values and not isinstance(sparse_values, SparseValues):
88-
item["sparse_values"] = VectorFactory._dict_to_sparse_values(sparse_values, check_type)
59+
if sparse_values is None:
60+
item.pop("sparse_values", None)
61+
else:
62+
item["sparse_values"] = SparseValuesFactory.build(sparse_values)
8963

9064
metadata = item.get("metadata")
9165
if metadata and not isinstance(metadata, Mapping):
@@ -97,18 +71,3 @@ def _dict_to_vector(item, check_type: bool) -> Vector:
9771
if not isinstance(item["values"], Iterable) or not isinstance(item["values"].__iter__().__next__(), numbers.Real):
9872
raise TypeError(f"Column `values` is expected to be a list of floats")
9973
raise e
100-
101-
@staticmethod
102-
def _dict_to_sparse_values(sparse_values_dict: Dict, check_type: bool) -> SparseValues:
103-
if not isinstance(sparse_values_dict, Mapping):
104-
raise SparseValuesDictionaryExpectedError(sparse_values_dict)
105-
if not {"indices", "values"}.issubset(sparse_values_dict):
106-
raise SparseValuesMissingKeysError(sparse_values_dict)
107-
108-
indices = convert_to_list(sparse_values_dict.get("indices"))
109-
values = convert_to_list(sparse_values_dict.get("values"))
110-
111-
try:
112-
return SparseValues(indices=indices, values=values, _check_type=check_type)
113-
except TypeError:
114-
raise SparseValuesTypeError()

pinecone/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ class PineconeProtocolError(PineconeException):
1717
class PineconeConfigurationError(PineconeException):
1818
"""Raised when a configuration error occurs."""
1919

20+
class ListConversionException(PineconeException, TypeError):
21+
def __init__(self, message):
22+
super().__init__(message)
23+
2024
__all__ = [
2125
"PineconeConfigurationError",
2226
"PineconeProtocolError",
@@ -30,4 +34,5 @@ class PineconeConfigurationError(PineconeException):
3034
"UnauthorizedException",
3135
"ForbiddenException",
3236
"ServiceException",
37+
"ListConversionException"
3338
]
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import numbers
2+
3+
from collections.abc import Mapping
4+
from typing import Union, Dict
5+
6+
from ..utils import convert_to_list
7+
8+
from ..data import (
9+
SparseValuesTypeError,
10+
SparseValuesMissingKeysError,
11+
SparseValuesDictionaryExpectedError
12+
)
13+
14+
from pinecone.core.grpc.protos.vector_service_pb2 import (
15+
SparseValues as GRPCSparseValues,
16+
)
17+
from pinecone import (
18+
SparseValues as NonGRPCSparseValues
19+
)
20+
21+
class SparseValuesFactory:
22+
@staticmethod
23+
def build(input: Union[Dict, GRPCSparseValues, NonGRPCSparseValues]) -> GRPCSparseValues:
24+
if input is None:
25+
return input
26+
if isinstance(input, GRPCSparseValues):
27+
return input
28+
if isinstance(input, NonGRPCSparseValues):
29+
return GRPCSparseValues(indices=input.indices, values=input.values)
30+
if not isinstance(input, Mapping):
31+
raise SparseValuesDictionaryExpectedError(input)
32+
if not {"indices", "values"}.issubset(input):
33+
raise SparseValuesMissingKeysError(input)
34+
35+
indices = SparseValuesFactory._convert_to_list(input.get("indices"), int)
36+
values = SparseValuesFactory._convert_to_list(input.get("values"), float)
37+
38+
if len(indices) != len(values):
39+
raise ValueError("Sparse values indices and values must have the same length")
40+
41+
try:
42+
return GRPCSparseValues(indices=indices, values=values)
43+
except TypeError as e:
44+
raise SparseValuesTypeError() from e
45+
46+
@staticmethod
47+
def _convert_to_list(input, expected_type):
48+
try:
49+
converted = convert_to_list(input)
50+
except TypeError as e:
51+
raise SparseValuesTypeError() from e
52+
53+
SparseValuesFactory._validate_list_items_type(converted, expected_type)
54+
return converted
55+
56+
@staticmethod
57+
def _validate_list_items_type(input, expected_type):
58+
if len(input) > 0 and not isinstance(input[0], expected_type):
59+
raise SparseValuesTypeError()

pinecone/grpc/vector_factory_grpc.py

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,9 @@
1212
VectorDictionaryMissingKeysError,
1313
VectorDictionaryExcessKeysError,
1414
VectorTupleLengthError,
15-
SparseValuesTypeError,
16-
SparseValuesMissingKeysError,
17-
SparseValuesDictionaryExpectedError,
1815
MetadataDictionaryExpectedError
1916
)
17+
from .sparse_values_factory import SparseValuesFactory
2018

2119
from pinecone.core.grpc.protos.vector_service_pb2 import (
2220
Vector as GRPCVector,
@@ -73,8 +71,8 @@ def _dict_to_vector(item) -> GRPCVector:
7371
raise TypeError(f"Column `values` is expected to be a list of floats") from e
7472

7573
sparse_values = item.get("sparse_values")
76-
if sparse_values and not isinstance(sparse_values, GRPCSparseValues):
77-
item["sparse_values"] = VectorFactoryGRPC._dict_to_sparse_values(sparse_values)
74+
if sparse_values != None and not isinstance(sparse_values, GRPCSparseValues):
75+
item["sparse_values"] = SparseValuesFactory.build(sparse_values)
7876

7977
metadata = item.get("metadata")
8078
if metadata:
@@ -90,32 +88,4 @@ def _dict_to_vector(item) -> GRPCVector:
9088
except TypeError as e:
9189
if not isinstance(item["values"], Iterable) or not isinstance(item["values"].__iter__().__next__(), numbers.Real):
9290
raise TypeError(f"Column `values` is expected to be a list of floats")
93-
raise e
94-
95-
@staticmethod
96-
def _dict_to_sparse_values(sparse_values_dict: Union[Dict, GRPCSparseValues, NonGRPCSparseValues]) -> GRPCSparseValues:
97-
if isinstance(sparse_values_dict, GRPCSparseValues):
98-
return sparse_values_dict
99-
if isinstance(sparse_values_dict, NonGRPCSparseValues):
100-
return GRPCSparseValues(indices=sparse_values_dict.indices, values=sparse_values_dict.values)
101-
102-
if not isinstance(sparse_values_dict, Mapping):
103-
raise SparseValuesDictionaryExpectedError(sparse_values_dict)
104-
if not {"indices", "values"}.issubset(sparse_values_dict):
105-
raise SparseValuesMissingKeysError(sparse_values_dict)
106-
107-
108-
try:
109-
indices = convert_to_list(sparse_values_dict.get("indices"))
110-
except TypeError as e:
111-
raise SparseValuesTypeError() from e
112-
113-
try:
114-
values = convert_to_list(sparse_values_dict.get("values"))
115-
except TypeError as e:
116-
raise SparseValuesTypeError() from e
117-
118-
try:
119-
return GRPCSparseValues(indices=indices, values=values)
120-
except TypeError as e:
121-
raise SparseValuesTypeError() from e
91+
raise e

pinecone/utils/convert_to_list.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
1+
from ..exceptions import ListConversionException
2+
13
def convert_to_list(obj):
24
class_name = obj.__class__.__name__
35

46
if class_name == 'list':
57
return obj
68
elif hasattr(obj, 'tolist') and callable(getattr(obj, 'tolist')):
79
return obj.tolist()
10+
elif obj is None or isinstance(obj, str) or isinstance(obj, dict):
11+
# The string and dictionary classes in python can be passed to list()
12+
# but they're not going to yield sensible results for our use case.
13+
raise ListConversionException(f"Expected a list or list-like data structure, but got: {obj}")
814
else:
9-
return list(obj)
15+
try:
16+
return list(obj)
17+
except Exception as e:
18+
raise ListConversionException(f"Expected a list or list-like data structure, but got: {obj}") from e

0 commit comments

Comments
 (0)