|
1 | 1 | import logging |
2 | 2 | from dataclasses import fields |
3 | | -from typing import List |
| 3 | +from typing import Any, Dict, List |
4 | 4 |
|
5 | 5 | import numpy as np |
6 | 6 | import pytest |
@@ -77,6 +77,42 @@ def test_run(self, document_store: MilvusDocumentStore): |
77 | 77 | assert len(res["documents"]) == 10 |
78 | 78 | assert_docs_equal_except_score(res["documents"][0], documents[5]) |
79 | 79 |
|
| 80 | + def test_run_using_filters(self, document_store: MilvusDocumentStore): |
| 81 | + """Test that filters are properly applied at runtime""" |
| 82 | + documents = [] |
| 83 | + for i in range(10): |
| 84 | + doc = Document( |
| 85 | + content=f"Document {i}", |
| 86 | + meta={ |
| 87 | + "name": f"name_{i}", |
| 88 | + "page": str(100 + i), |
| 89 | + "chapter": "intro" if i < 5 else "outro", |
| 90 | + "number": i, |
| 91 | + "date": "1969-07-21T20:17:40", |
| 92 | + }, |
| 93 | + embedding=l2_normalization([0.5] * 63 + [0.1 * i]), |
| 94 | + ) |
| 95 | + documents.append(doc) |
| 96 | + document_store.write_documents(documents) |
| 97 | + |
| 98 | + # Test with runtime filters |
| 99 | + retriever = MilvusEmbeddingRetriever(document_store) |
| 100 | + query_embedding = l2_normalization([0.5] * 64) |
| 101 | + |
| 102 | + # Filter: chapter == "intro" (should return 5 documents) |
| 103 | + filters: Dict[str, Any] = {"field": "chapter", "operator": "==", "value": "intro"} |
| 104 | + res = retriever.run(query_embedding, filters=filters) |
| 105 | + assert len(res["documents"]) == 5 |
| 106 | + for doc in res["documents"]: |
| 107 | + assert doc.meta["chapter"] == "intro" |
| 108 | + |
| 109 | + # Filter: number >= 5 (should return 5 documents) |
| 110 | + filters = {"field": "number", "operator": ">=", "value": 5} # type: ignore[no-redef] |
| 111 | + res = retriever.run(query_embedding, filters=filters) |
| 112 | + assert len(res["documents"]) == 5 |
| 113 | + for doc in res["documents"]: |
| 114 | + assert doc.meta["number"] >= 5 |
| 115 | + |
80 | 116 | def test_to_dict(self, document_store: MilvusDocumentStore): |
81 | 117 | expected_dict = { |
82 | 118 | "type": "src.milvus_haystack.document_store.MilvusDocumentStore", |
@@ -210,6 +246,43 @@ def test_run(self, document_store: MilvusDocumentStore, documents: List[Document |
210 | 246 | assert len(res["documents"]) == 10 |
211 | 247 | assert_docs_equal_except_score(res["documents"][0], documents[5]) |
212 | 248 |
|
| 249 | + def test_run_using_filters(self, document_store: MilvusDocumentStore): |
| 250 | + """Test that filters are properly applied at runtime for sparse retrieval""" |
| 251 | + documents = [] |
| 252 | + for i in range(10): |
| 253 | + doc = Document( |
| 254 | + content=f"Document {i}", |
| 255 | + meta={ |
| 256 | + "name": f"name_{i}", |
| 257 | + "page": str(100 + i), |
| 258 | + "chapter": "intro" if i < 5 else "outro", |
| 259 | + "number": i, |
| 260 | + "date": "1969-07-21T20:17:40", |
| 261 | + }, |
| 262 | + embedding=l2_normalization([0.5] * 64), |
| 263 | + sparse_embedding=SparseEmbedding(indices=[0, 1, 2 + i], values=[1.0, 2.0, 3.0]), |
| 264 | + ) |
| 265 | + documents.append(doc) |
| 266 | + document_store.write_documents(documents) |
| 267 | + |
| 268 | + # Test with runtime filters |
| 269 | + retriever = MilvusSparseEmbeddingRetriever(document_store) |
| 270 | + sparse_query_embedding = SparseEmbedding(indices=[0, 1, 2 + 5], values=[1.0, 2.0, 3.0]) |
| 271 | + |
| 272 | + # Filter: chapter == "outro" (should return 5 documents) |
| 273 | + filters: Dict[str, Any] = {"field": "chapter", "operator": "==", "value": "outro"} |
| 274 | + res = retriever.run(sparse_query_embedding, filters=filters) |
| 275 | + assert len(res["documents"]) == 5 |
| 276 | + for doc in res["documents"]: |
| 277 | + assert doc.meta["chapter"] == "outro" |
| 278 | + |
| 279 | + # Filter: number < 3 (should return 3 documents) |
| 280 | + filters = {"field": "number", "operator": "<", "value": 3} # type: ignore[no-redef] |
| 281 | + res = retriever.run(sparse_query_embedding, filters=filters) |
| 282 | + assert len(res["documents"]) == 3 |
| 283 | + for doc in res["documents"]: |
| 284 | + assert doc.meta["number"] < 3 |
| 285 | + |
213 | 286 | def test_fail_without_sparse_field(self, documents: List[Document]): |
214 | 287 | document_store = MilvusDocumentStore( |
215 | 288 | connection_args=DEFAULT_CONNECTION_ARGS, |
@@ -366,6 +439,52 @@ def test_run(self, document_store: MilvusDocumentStore, documents: List[Document |
366 | 439 | assert len(res["documents"]) == 10 |
367 | 440 | assert_docs_equal_except_score(res["documents"][0], documents[5]) |
368 | 441 |
|
| 442 | + def test_run_using_filters(self, document_store: MilvusDocumentStore): |
| 443 | + """Test that filters are properly applied at runtime for hybrid retrieval""" |
| 444 | + documents = [] |
| 445 | + for i in range(10): |
| 446 | + doc = Document( |
| 447 | + content=f"Hybrid Document {i}", |
| 448 | + meta={ |
| 449 | + "name": f"name_{i}", |
| 450 | + "page": str(100 + i), |
| 451 | + "chapter": "intro" if i < 5 else "outro", |
| 452 | + "number": i, |
| 453 | + "date": "1969-07-21T20:17:40", |
| 454 | + }, |
| 455 | + embedding=l2_normalization([0.5] * 63 + [0.45 + 0.01 * i]), |
| 456 | + sparse_embedding=SparseEmbedding(indices=[0, 1, 2 + i], values=[1.0, 2.0, 3.0]), |
| 457 | + ) |
| 458 | + documents.append(doc) |
| 459 | + document_store.write_documents(documents) |
| 460 | + |
| 461 | + # Test with runtime filters |
| 462 | + retriever = MilvusHybridRetriever(document_store) |
| 463 | + query_embedding = l2_normalization([0.5] * 64) |
| 464 | + sparse_query_embedding = SparseEmbedding(indices=[0, 1, 2 + 5], values=[1.0, 2.0, 3.0]) |
| 465 | + |
| 466 | + # Filter: chapter == "intro" (should return 5 documents) |
| 467 | + filters: Dict[str, Any] = {"field": "chapter", "operator": "==", "value": "intro"} |
| 468 | + res = retriever.run( |
| 469 | + query_embedding=query_embedding, |
| 470 | + query_sparse_embedding=sparse_query_embedding, |
| 471 | + filters=filters, |
| 472 | + ) |
| 473 | + assert len(res["documents"]) == 5 |
| 474 | + for doc in res["documents"]: |
| 475 | + assert doc.meta["chapter"] == "intro" |
| 476 | + |
| 477 | + # Filter: number in [2, 4, 6, 8] (should return 4 documents) |
| 478 | + filters = {"field": "number", "operator": "in", "value": [2, 4, 6, 8]} # type: ignore[no-redef] |
| 479 | + res = retriever.run( |
| 480 | + query_embedding=query_embedding, |
| 481 | + query_sparse_embedding=sparse_query_embedding, |
| 482 | + filters=filters, |
| 483 | + ) |
| 484 | + assert len(res["documents"]) == 4 |
| 485 | + for doc in res["documents"]: |
| 486 | + assert doc.meta["number"] in [2, 4, 6, 8] |
| 487 | + |
369 | 488 | def test_fail_without_sparse_field(self, documents: List[Document]): |
370 | 489 | document_store = MilvusDocumentStore( |
371 | 490 | connection_args=DEFAULT_CONNECTION_ARGS, |
|
0 commit comments