1818import { DEFAULT_COMPARE_OPTIONS } from '../utils.js'
1919import { ReverseIndex } from '../indexes/reverse-index.js'
2020import { hasVirtualPropPath } from '../virtual-props.js'
21+ import { Func } from '../query/ir.js'
2122import type { CompareOptions } from '../query/builder/types.js'
2223import type { IndexInterface , IndexOperation } from '../indexes/base-index.js'
2324import type { BasicExpression } from '../query/ir.js'
2425import type { CollectionLike } from '../types.js'
2526
2627/**
2728 * Result of index-based query optimization
29+ *
30+ * When `residualPredicate` is set, the caller MUST evaluate it against each
31+ * value in `matchingKeys` and drop rows that fail. This supports partial
32+ * optimization of AND expressions where some branches are indexed and the
33+ * rest must be re-checked against the candidate set.
34+ *
35+ * When `residualPredicate` is `undefined`, `matchingKeys` is exact.
2836 */
2937export interface OptimizationResult < TKey > {
3038 canOptimize : boolean
3139 matchingKeys : Set < TKey >
40+ residualPredicate ?: BasicExpression
3241}
3342
3443/**
@@ -138,7 +147,7 @@ function optimizeQueryRecursive<T extends object, TKey extends string | number>(
138147}
139148
140149/**
141- * Checks if an expression can be optimized
150+ * Checks if an expression can be optimized (possibly with a residual predicate)
142151 */
143152export function canOptimizeExpression <
144153 T extends object ,
@@ -167,27 +176,71 @@ export function canOptimizeExpression<
167176 return false
168177}
169178
179+ /**
180+ * Checks if an expression can be fully optimized — i.e. matched entirely by
181+ * index lookups with no residual predicate. AND children must satisfy this
182+ * recursively; partial AND optimization (which produces a residual) does not
183+ * count. This is the predicate the strict-OR contract uses to decide whether
184+ * an OR branch is safe to union by index alone.
185+ */
186+ function canFullyOptimizeExpression <
187+ T extends object ,
188+ TKey extends string | number ,
189+ > ( expression : BasicExpression , collection : CollectionLike < T , TKey > ) : boolean {
190+ if ( expression . type !== `func` ) return false
191+
192+ switch ( expression . name ) {
193+ case `eq` :
194+ case `gt` :
195+ case `gte` :
196+ case `lt` :
197+ case `lte` :
198+ return canOptimizeSimpleComparison ( expression , collection )
199+ case `in` :
200+ return canOptimizeInArrayExpression ( expression , collection )
201+ case `and` :
202+ return (
203+ expression . args . length >= 2 &&
204+ expression . args . every ( ( arg ) =>
205+ canFullyOptimizeExpression ( arg , collection ) ,
206+ )
207+ )
208+ case `or` :
209+ // OR is already strict; reuse the OR predicate, which itself recurses.
210+ return canOptimizeOrExpression ( expression , collection )
211+ default :
212+ return false
213+ }
214+ }
215+
170216/**
171217 * Optimizes compound range queries on the same field
172218 * Example: WHERE age > 5 AND age < 10
173219 */
220+ interface CompoundRangeQueryResult < TKey > {
221+ matchingKeys : Set < TKey >
222+ consumedArgs : Set < BasicExpression >
223+ }
224+
174225function optimizeCompoundRangeQuery <
175226 T extends object ,
176227 TKey extends string | number ,
177228> (
178229 expression : BasicExpression ,
179230 collection : CollectionLike < T , TKey > ,
180- ) : OptimizationResult < TKey > {
231+ ) : CompoundRangeQueryResult < TKey > | undefined {
181232 if ( expression . type !== `func` || expression . args . length < 2 ) {
182- return { canOptimize : false , matchingKeys : new Set ( ) }
233+ return undefined
183234 }
184235
185- // Group range operations by field
236+ // Group range operations by field, tracking which arg produced each entry so
237+ // we can report back exactly which branches the compound range consumed.
186238 const fieldOperations = new Map <
187239 string ,
188240 Array < {
189241 operation : `gt` | `gte` | `lt` | `lte`
190242 value : any
243+ arg : BasicExpression
191244 } >
192245 > ( )
193246
@@ -238,7 +291,7 @@ function optimizeCompoundRangeQuery<
238291 if ( ! fieldOperations . has ( fieldKey ) ) {
239292 fieldOperations . set ( fieldKey , [ ] )
240293 }
241- fieldOperations . get ( fieldKey ) ! . push ( { operation, value } )
294+ fieldOperations . get ( fieldKey ) ! . push ( { operation, value, arg } )
242295 }
243296 }
244297 }
@@ -293,12 +346,15 @@ function optimizeCompoundRangeQuery<
293346 toInclusive,
294347 } )
295348
296- return { canOptimize : true , matchingKeys }
349+ const consumedArgs = new Set < BasicExpression > (
350+ operations . map ( ( op ) => op . arg ) ,
351+ )
352+ return { matchingKeys, consumedArgs }
297353 }
298354 }
299355 }
300356
301- return { canOptimize : false , matchingKeys : new Set ( ) }
357+ return undefined
302358}
303359
304360/**
@@ -415,30 +471,52 @@ function optimizeAndExpression<T extends object, TKey extends string | number>(
415471 return { canOptimize : false , matchingKeys : new Set ( ) }
416472 }
417473
418- // First, try to optimize compound range queries on the same field
474+ // Compound range queries fuse multiple range ops on the same field into a
475+ // single index lookup. When present, treat the fused result as one optimized
476+ // branch and run the normal partition over the remaining args, so any
477+ // non-range siblings (indexed or not) still participate in the residual.
419478 const compoundRangeResult = optimizeCompoundRangeQuery ( expression , collection )
420- if ( compoundRangeResult . canOptimize ) {
421- return compoundRangeResult
422- }
423479
424- const results : Array < OptimizationResult < TKey > > = [ ]
480+ const optimizedResults : Array < OptimizationResult < TKey > > = [ ]
481+ const residualBranches : Array < BasicExpression > = [ ]
425482
426- // Try to optimize each part, keep the optimizable ones
483+ if ( compoundRangeResult ) {
484+ optimizedResults . push ( {
485+ canOptimize : true ,
486+ matchingKeys : compoundRangeResult . matchingKeys ,
487+ } )
488+ }
489+
490+ // Partition branches into ones we can use indexes for, and ones that need to
491+ // be re-evaluated as a residual predicate on the resulting candidate set.
427492 for ( const arg of expression . args ) {
493+ if ( compoundRangeResult ?. consumedArgs . has ( arg ) ) continue
428494 const result = optimizeQueryRecursive ( arg , collection )
429495 if ( result . canOptimize ) {
430- results . push ( result )
496+ optimizedResults . push ( result )
497+ // A partially-optimized child carries its own residual; fold it in.
498+ if ( result . residualPredicate ) {
499+ residualBranches . push ( result . residualPredicate )
500+ }
501+ } else {
502+ residualBranches . push ( arg )
431503 }
432504 }
433505
434- if ( results . length > 0 ) {
435- // Use intersectSets utility for AND logic
436- const allMatchingSets = results . map ( ( r ) => r . matchingKeys )
437- const intersectedKeys = intersectSets ( allMatchingSets )
438- return { canOptimize : true , matchingKeys : intersectedKeys }
506+ if ( optimizedResults . length === 0 ) {
507+ return { canOptimize : false , matchingKeys : new Set ( ) }
439508 }
440509
441- return { canOptimize : false , matchingKeys : new Set ( ) }
510+ const allMatchingSets = optimizedResults . map ( ( r ) => r . matchingKeys )
511+ const intersectedKeys = intersectSets ( allMatchingSets )
512+ const residualPredicate =
513+ residualBranches . length === 0
514+ ? undefined
515+ : residualBranches . length === 1
516+ ? residualBranches [ 0 ]
517+ : new Func ( `and` , residualBranches )
518+
519+ return { canOptimize : true , matchingKeys : intersectedKeys , residualPredicate }
442520}
443521
444522/**
@@ -467,11 +545,26 @@ function optimizeOrExpression<T extends object, TKey extends string | number>(
467545 return { canOptimize : false , matchingKeys : new Set ( ) }
468546 }
469547
470- const results : Array < OptimizationResult < TKey > > = [ ]
548+ // Strict-OR: every branch must be fully optimizable (no residual). A residual
549+ // on any branch would mean some rows match only via re-evaluation against a
550+ // candidate set, and candidates from OR siblings don't include those. Check
551+ // up front using the cheap predicate so we don't run wasted index lookups on
552+ // earlier branches before discovering a later one cannot be optimized.
553+ if (
554+ ! expression . args . every ( ( arg ) =>
555+ canFullyOptimizeExpression ( arg , collection ) ,
556+ )
557+ ) {
558+ return { canOptimize : false , matchingKeys : new Set ( ) }
559+ }
471560
561+ const results : Array < OptimizationResult < TKey > > = [ ]
472562 for ( const arg of expression . args ) {
473563 const result = optimizeQueryRecursive ( arg , collection )
474- if ( ! result . canOptimize ) {
564+ // Defensive: canFullyOptimizeExpression guarantees both invariants above,
565+ // but assert here so any future divergence between the two paths surfaces
566+ // as a soft fallback rather than an unsound union.
567+ if ( ! result . canOptimize || result . residualPredicate !== undefined ) {
475568 return { canOptimize : false , matchingKeys : new Set ( ) }
476569 }
477570 results . push ( result )
@@ -493,8 +586,12 @@ function canOptimizeOrExpression<
493586 return false
494587 }
495588
496- // All branches must be optimizable — partial OR optimization is unsound
497- return expression . args . every ( ( arg ) => canOptimizeExpression ( arg , collection ) )
589+ // Strict OR: every branch must be FULLY optimizable (no residual). Using
590+ // canOptimizeExpression here would let partial-AND children slip through,
591+ // disagreeing with optimizeOrExpression's runtime contract.
592+ return expression . args . every ( ( arg ) =>
593+ canFullyOptimizeExpression ( arg , collection ) ,
594+ )
498595}
499596
500597/**
0 commit comments