@@ -57,26 +57,68 @@ def index(paths: list[Path], pattern: str, persist_dir: Path):
57
57
try :
58
58
indexer = Indexer (persist_directory = persist_dir , enable_persist = True )
59
59
60
- # First, collect all documents
60
+ # Get existing files and their metadata from the index, using absolute paths
61
+ existing_docs = indexer .get_all_documents ()
62
+ logger .debug ("Found %d existing documents in index" , len (existing_docs ))
63
+
64
+ existing_files = {}
65
+ for doc in existing_docs :
66
+ if "source" in doc .metadata :
67
+ abs_path = os .path .abspath (doc .metadata ["source" ])
68
+ mtime = doc .metadata .get ("mtime" , 0 )
69
+ existing_files [abs_path ] = mtime
70
+ logger .debug ("Existing file: %s (mtime: %s)" , abs_path , mtime )
71
+
72
+ logger .debug ("Loaded %d existing files from index" , len (existing_files ))
73
+
74
+ # First, collect all documents and filter for new/modified
61
75
all_documents = []
62
76
with console .status ("Collecting documents..." ) as status :
63
77
for path in paths :
64
78
if path .is_file ():
65
79
status .update (f"Processing file: { path } " )
66
80
else :
67
81
status .update (f"Processing directory: { path } " )
82
+
68
83
documents = indexer .collect_documents (path )
69
- all_documents .extend (documents )
84
+
85
+ # Filter for new or modified documents
86
+ filtered_documents = []
87
+ for doc in documents :
88
+ source = doc .metadata .get ("source" )
89
+ if source :
90
+ # Resolve to absolute path for consistent comparison
91
+ abs_source = os .path .abspath (source )
92
+ doc .metadata ["source" ] = abs_source
93
+ current_mtime = os .path .getmtime (abs_source )
94
+ doc .metadata ["mtime" ] = current_mtime
95
+
96
+ # Include if file is new or modified
97
+ if abs_source not in existing_files :
98
+ logger .debug ("New file: %s" , abs_source )
99
+ filtered_documents .append (doc )
100
+ elif current_mtime > existing_files [abs_source ]:
101
+ logger .debug (
102
+ "Modified file: %s (current: %s, stored: %s)" ,
103
+ abs_source ,
104
+ current_mtime ,
105
+ existing_files [abs_source ],
106
+ )
107
+ filtered_documents .append (doc )
108
+ else :
109
+ logger .debug ("Unchanged file: %s" , abs_source )
110
+
111
+ all_documents .extend (filtered_documents )
70
112
71
113
if not all_documents :
72
- console .print ("No documents found to index" , style = "yellow" )
114
+ console .print ("No new or modified documents to index" , style = "yellow" )
73
115
return
74
116
75
117
# Then process them with a progress bar
76
118
n_files = len (set (doc .metadata .get ("source" , "" ) for doc in all_documents ))
77
119
n_chunks = len (all_documents )
78
120
79
- logger .info (f"Found { n_files } files to index ({ n_chunks } chunks)" )
121
+ logger .info (f"Found { n_files } new/modified files to index ({ n_chunks } chunks)" )
80
122
81
123
with tqdm (
82
124
total = n_chunks ,
0 commit comments