|
9 | 9 | from chromadb.api.types import GetResult, QueryResult |
10 | 10 | from haystack import default_from_dict, default_to_dict, logging |
11 | 11 | from haystack.dataclasses import Document |
| 12 | +from haystack.document_stores.errors import DocumentStoreError |
12 | 13 | from haystack.document_stores.types import DuplicatePolicy |
13 | 14 |
|
14 | 15 | from .filters import _convert_filters |
@@ -113,6 +114,8 @@ def _ensure_initialized(self): |
113 | 114 | # Local persistent storage |
114 | 115 | client = chromadb.PersistentClient(path=self._persist_path) |
115 | 116 |
|
| 117 | + self._client = client # store client for potential future use |
| 118 | + |
116 | 119 | self._metadata = self._metadata or {} |
117 | 120 | if "hnsw:space" not in self._metadata: |
118 | 121 | self._metadata["hnsw:space"] = self._distance_function |
@@ -149,6 +152,8 @@ async def _ensure_initialized_async(self): |
149 | 152 | port=self._port, |
150 | 153 | ) |
151 | 154 |
|
| 155 | + self._async_client = client # store client for potential future use |
| 156 | + |
152 | 157 | self._metadata = self._metadata or {} |
153 | 158 | if "hnsw:space" not in self._metadata: |
154 | 159 | self._metadata["hnsw:space"] = self._distance_function |
@@ -408,6 +413,86 @@ async def delete_documents_async(self, document_ids: List[str]) -> None: |
408 | 413 |
|
409 | 414 | await self._async_collection.delete(ids=document_ids) |
410 | 415 |
|
| 416 | + def delete_all_documents(self, *, recreate_index: bool = False) -> None: |
| 417 | + """ |
| 418 | + Deletes all documents in the document store. |
| 419 | +
|
| 420 | + A fast way to clear all documents from the document store while preserving any collection settings and mappings. |
| 421 | + :param recreate_index: Whether to recreate the index after deleting all documents. |
| 422 | + """ |
| 423 | + self._ensure_initialized() # _ensure_initialized ensures _client is not None and a collection exists |
| 424 | + assert self._collection is not None |
| 425 | + |
| 426 | + try: |
| 427 | + if recreate_index: |
| 428 | + # Store existing collection metadata and embedding function |
| 429 | + metadata = self._collection.metadata |
| 430 | + embedding_function = self._collection._embedding_function |
| 431 | + collection_name = self._collection_name |
| 432 | + |
| 433 | + # Delete the collection |
| 434 | + self._client.delete_collection(name=collection_name) |
| 435 | + |
| 436 | + # Recreate the collection with previous metadata |
| 437 | + self._collection = self._client.create_collection( |
| 438 | + name=collection_name, |
| 439 | + metadata=metadata, |
| 440 | + embedding_function=embedding_function, |
| 441 | + ) |
| 442 | + |
| 443 | + else: |
| 444 | + collection = self._collection.get() |
| 445 | + ids = collection.get("ids", []) |
| 446 | + self._collection.delete(ids=ids) # type: ignore |
| 447 | + logger.info( |
| 448 | + "Deleted all the {n_docs} documents from the collection '{name}'.", |
| 449 | + name=self._collection_name, |
| 450 | + n_docs=len(ids), |
| 451 | + ) |
| 452 | + except Exception as e: |
| 453 | + msg = f"Failed to delete all documents from ChromaDB: {e!s}" |
| 454 | + raise DocumentStoreError(msg) from e |
| 455 | + |
| 456 | + async def delete_all_documents_async(self, *, recreate_index: bool = False) -> None: |
| 457 | + """ |
| 458 | + Asynchronously deletes all documents in the document store. |
| 459 | +
|
| 460 | + A fast way to clear all documents from the document store while preserving any collection settings and mappings. |
| 461 | + :param recreate_index: Whether to recreate the index after deleting all documents. |
| 462 | + """ |
| 463 | + await self._ensure_initialized_async() # ensures _async_client is not None |
| 464 | + assert self._async_collection is not None |
| 465 | + |
| 466 | + try: |
| 467 | + if recreate_index: |
| 468 | + # Store existing collection metadata and embedding function |
| 469 | + metadata = self._async_collection.metadata |
| 470 | + embedding_function = self._async_collection._embedding_function |
| 471 | + collection_name = self._collection_name |
| 472 | + |
| 473 | + # Delete the collection |
| 474 | + await self._async_client.delete_collection(name=collection_name) |
| 475 | + |
| 476 | + # Recreate the collection with previous metadata |
| 477 | + self._async_collection = await self._async_client.create_collection( |
| 478 | + name=collection_name, |
| 479 | + metadata=metadata, |
| 480 | + embedding_function=embedding_function, |
| 481 | + ) |
| 482 | + else: |
| 483 | + collection = await self._async_collection.get() |
| 484 | + ids = collection.get("ids", []) |
| 485 | + await self._async_collection.delete(ids=ids) # type: ignore |
| 486 | + logger.info( |
| 487 | + "Deleted all the {n_docs} documents from the collection '{name}'.", |
| 488 | + name=self._collection_name, |
| 489 | + n_docs=len(ids), |
| 490 | + ) |
| 491 | + |
| 492 | + except Exception as e: |
| 493 | + msg = f"Failed to delete all documents from ChromaDB: {e!s}" |
| 494 | + raise DocumentStoreError(msg) from e |
| 495 | + |
411 | 496 | def search( |
412 | 497 | self, |
413 | 498 | queries: List[str], |
|
0 commit comments