Skip to content

Commit 9080fa6

Browse files
committed
feat: improve search results and file handling
- Improve search results formatting with better indentation and code blocks - Change 'filename' to 'source' in metadata for consistency - Enhance gitignore pattern handling with more default patterns - Remove redundant ignore patterns from watch command (now using gitignore) - Use code blocks with source as language in context assembly
1 parent aebaa50 commit 9080fa6

File tree

3 files changed

+27
-12
lines changed

3 files changed

+27
-12
lines changed

gptme_rag/cli.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,31 +97,31 @@ def search(
9797
# Show a summary of the most relevant documents
9898
console.print("\n[bold]Most Relevant Documents:[/bold]")
9999
for i, doc in enumerate(documents):
100-
filename = doc.metadata.get("filename", "unknown")
100+
source = doc.metadata.get("source", "unknown")
101101
distance = distances[i]
102102
relevance = 1 - distance # Convert distance to similarity score
103103

104104
# Show document header with relevance score
105105
console.print(
106-
f"\n[cyan]{i+1}. {filename}[/cyan] [yellow](relevance: {relevance:.2f})[/yellow]"
106+
f"\n[cyan]{i+1}. {source}[/cyan] [yellow](relevance: {relevance:.2f})[/yellow]"
107107
)
108108

109109
# Extract first meaningful section (after headers)
110110
lines = doc.content.split("\n")
111111
content = []
112112
for line in lines:
113113
if line.strip() and not line.startswith("#"):
114-
content.append(line.strip())
115-
if len(" ".join(content)) > 200:
114+
content.append(" " + line + "\n")
115+
if len("".join(content)) > 200:
116116
break
117117

118118
# Show the first paragraph or meaningful content
119119
content_preview = (
120-
" ".join(content)[:200] + "..."
121-
if len(" ".join(content)) > 200
122-
else " ".join(content)
120+
"".join(content)[:200] + "..."
121+
if len("".join(content)) > 200
122+
else "".join(content)
123123
)
124-
console.print(f" {content_preview}")
124+
console.print(f"{content_preview}")
125125

126126
# Assemble context window
127127
context = assembler.assemble_context(documents, user_query=query)
@@ -158,7 +158,7 @@ def search(
158158
"--ignore-patterns",
159159
"-i",
160160
multiple=True,
161-
default=[".git", "__pycache__", "*.pyc"],
161+
default=[],
162162
help="Glob patterns to ignore",
163163
)
164164
def watch(directory: Path, pattern: str, persist_dir: Path, ignore_patterns: list[str]):
@@ -174,6 +174,7 @@ def watch(directory: Path, pattern: str, persist_dir: Path, ignore_patterns: lis
174174
console.print("Starting file watcher...")
175175

176176
try:
177+
# TODO: FileWatcher should use same gitignore patterns as indexer
177178
file_watcher = FileWatcher(
178179
indexer, [str(directory)], pattern, ignore_patterns
179180
)

gptme_rag/indexing/indexer.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,17 @@ def add_documents(self, documents: list[Document], batch_size: int = 100) -> Non
172172

173173
def _load_gitignore(self, directory: Path) -> list[str]:
174174
"""Load gitignore patterns from all .gitignore files up to root."""
175-
patterns: list[str] = [".git/", ".sqlite3", ".db"]
175+
# arguably only .git/** should be here, with the rest in system global gitignore (which should be respected)
176+
patterns: list[str] = [
177+
".git/**",
178+
"*.sqlite3",
179+
"*.db",
180+
"*.pyc",
181+
"__pycache__",
182+
".*cache/**",
183+
"*.lock",
184+
".DS_Store",
185+
]
176186
current_dir = directory.resolve()
177187
max_depth = 10 # Limit traversal to avoid infinite loops
178188

@@ -201,7 +211,11 @@ def _is_ignored(self, file_path: Path, gitignore_patterns: list[str]) -> bool:
201211
rel_path = str(file_path)
202212

203213
for pattern in gitignore_patterns:
204-
if fnmatch(rel_path, pattern) or fnmatch(rel_path, f"**/{pattern}"):
214+
if (
215+
fnmatch(rel_path, pattern)
216+
or fnmatch(rel_path, f"**/{pattern}")
217+
or fnmatch(rel_path, f"**/{pattern}/**")
218+
):
205219
return True
206220
return False
207221

gptme_rag/query/context_assembler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _count_tokens(self, text: str) -> int:
2828
def _format_document(self, doc: Document) -> str:
2929
"""Format a document for inclusion in the context window."""
3030
source = doc.metadata.get("source", "unknown")
31-
return f"--- From {source} ---\n{doc.content}\n"
31+
return f"```{source}\n{doc.content}\n```"
3232

3333
def assemble_context(
3434
self,

0 commit comments

Comments
 (0)