File tree Expand file tree Collapse file tree 1 file changed +10
-1
lines changed Expand file tree Collapse file tree 1 file changed +10
-1
lines changed Original file line number Diff line number Diff line change 1+ import logging
12from dataclasses import dataclass
23
34import tiktoken
45
56from ..indexing .document import Document
67
8+ logger = logging .getLogger (__name__ )
9+
710
811@dataclass
912class ContextWindow :
@@ -40,9 +43,10 @@ def assemble_context(
4043 Assemble a context window from documents, staying within token limit.
4144
4245 Documents should be pre-sorted by relevance.
46+ Duplicate documents will be filtered out and a warning will be logged.
4347 """
4448 total_tokens = 0
45- included_docs = []
49+ included_docs : list [ Document ] = []
4650 context_parts = []
4751 truncated = False
4852
@@ -64,6 +68,11 @@ def assemble_context(
6468 formatted_doc = self ._format_document (doc )
6569 doc_tokens = self ._count_tokens (formatted_doc )
6670
71+ # check if document content is duplicate
72+ if doc .content in [d .content for d in included_docs ]:
73+ logger .warning (f"Duplicate document found: { doc .metadata ['source' ]} " )
74+ continue
75+
6776 if total_tokens + doc_tokens > self .max_tokens :
6877 truncated = True
6978 break
You can’t perform that action at this time.
0 commit comments