Skip to content

Commit 4a831d0

Browse files
authored
[9.x] Add an integration test to verify DirectIO is used for BBQ rescoring (#128465)
Port DirectIOIT from lucene_snapshot to main. relates #128370
1 parent 0aae7f6 commit 4a831d0

File tree

3 files changed

+124
-3
lines changed

3 files changed

+124
-3
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.store;
11+
12+
import org.apache.logging.log4j.Level;
13+
import org.apache.lucene.misc.store.DirectIODirectory;
14+
import org.apache.lucene.store.Directory;
15+
import org.apache.lucene.store.FSDirectory;
16+
import org.apache.lucene.store.IOContext;
17+
import org.apache.lucene.store.IndexOutput;
18+
import org.apache.lucene.tests.util.LuceneTestCase;
19+
import org.elasticsearch.common.settings.Settings;
20+
import org.elasticsearch.plugins.Plugin;
21+
import org.elasticsearch.search.vectors.KnnSearchBuilder;
22+
import org.elasticsearch.search.vectors.VectorData;
23+
import org.elasticsearch.test.ESIntegTestCase;
24+
import org.elasticsearch.test.InternalSettingsPlugin;
25+
import org.elasticsearch.test.MockLog;
26+
import org.elasticsearch.test.junit.annotations.TestLogging;
27+
import org.junit.BeforeClass;
28+
29+
import java.io.IOException;
30+
import java.nio.file.Path;
31+
import java.util.Collection;
32+
import java.util.List;
33+
import java.util.OptionalLong;
34+
import java.util.stream.IntStream;
35+
36+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
37+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
38+
39+
@LuceneTestCase.SuppressCodecs("*") // only use our own codecs
40+
public class DirectIOIT extends ESIntegTestCase {
41+
42+
@BeforeClass
43+
public static void checkSupported() throws IOException {
44+
Path path = createTempDir("directIOProbe");
45+
try (Directory dir = open(path); IndexOutput out = dir.createOutput("out", IOContext.DEFAULT)) {
46+
out.writeString("test");
47+
} catch (IOException e) {
48+
assumeNoException("test requires filesystem that supports Direct IO", e);
49+
}
50+
}
51+
52+
static DirectIODirectory open(Path path) throws IOException {
53+
return new DirectIODirectory(FSDirectory.open(path)) {
54+
@Override
55+
protected boolean useDirectIO(String name, IOContext context, OptionalLong fileLength) {
56+
return true;
57+
}
58+
};
59+
}
60+
61+
@Override
62+
protected Collection<Class<? extends Plugin>> nodePlugins() {
63+
return List.of(InternalSettingsPlugin.class);
64+
}
65+
66+
private void indexVectors() {
67+
assertAcked(
68+
prepareCreate("foo-vectors").setSettings(Settings.builder().put(InternalSettingsPlugin.USE_COMPOUND_FILE.getKey(), false))
69+
.setMapping("""
70+
{
71+
"properties": {
72+
"fooVector": {
73+
"type": "dense_vector",
74+
"dims": 64,
75+
"element_type": "float",
76+
"index": true,
77+
"similarity": "l2_norm",
78+
"index_options": {
79+
"type": "bbq_flat"
80+
}
81+
}
82+
}
83+
}
84+
""")
85+
);
86+
ensureGreen("foo-vectors");
87+
88+
for (int i = 0; i < 1000; i++) {
89+
indexDoc("foo-vectors", Integer.toString(i), "fooVector", IntStream.range(0, 64).mapToDouble(d -> randomFloat()).toArray());
90+
}
91+
refresh();
92+
}
93+
94+
@TestLogging(value = "org.elasticsearch.index.store.FsDirectoryFactory:DEBUG", reason = "to capture trace logging for direct IO")
95+
public void testDirectIOUsed() {
96+
try (MockLog mockLog = MockLog.capture(FsDirectoryFactory.class)) {
97+
// we're just looking for some evidence direct IO is used
98+
mockLog.addExpectation(
99+
new MockLog.PatternSeenEventExpectation(
100+
"Direct IO used",
101+
FsDirectoryFactory.class.getCanonicalName(),
102+
Level.DEBUG,
103+
"Opening .*\\.vec with direct IO"
104+
)
105+
);
106+
107+
indexVectors();
108+
109+
// do a search
110+
var knn = List.of(new KnnSearchBuilder("fooVector", new VectorData(null, new byte[64]), 10, 20, null, null));
111+
assertHitCount(prepareSearch("foo-vectors").setKnnSearch(knn), 10);
112+
mockLog.assertAllExpectationsMatched();
113+
}
114+
}
115+
116+
@Override
117+
protected boolean addMockFSIndexStore() {
118+
return false; // we require to always use the "real" hybrid directory
119+
}
120+
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.apache.lucene.index.VectorSimilarityFunction;
3737
import org.apache.lucene.internal.hppc.IntObjectHashMap;
3838
import org.apache.lucene.store.ChecksumIndexInput;
39+
import org.apache.lucene.store.FilterDirectory;
3940
import org.apache.lucene.store.IOContext;
4041
import org.apache.lucene.store.IndexInput;
4142
import org.apache.lucene.store.ReadAdvice;
@@ -86,7 +87,7 @@ public DirectIOLucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScor
8687
}
8788

8889
public static boolean shouldUseDirectIO(SegmentReadState state) {
89-
return USE_DIRECT_IO && state.directory instanceof DirectIOIndexInputSupplier;
90+
return USE_DIRECT_IO && FilterDirectory.unwrap(state.directory) instanceof DirectIOIndexInputSupplier;
9091
}
9192

9293
private int readMetadata(SegmentReadState state) throws IOException {
@@ -126,7 +127,7 @@ private static IndexInput openDataInput(
126127
) throws IOException {
127128
String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
128129
// use direct IO for accessing raw vector data for searches
129-
IndexInput in = USE_DIRECT_IO && state.directory instanceof DirectIOIndexInputSupplier did
130+
IndexInput in = USE_DIRECT_IO && FilterDirectory.unwrap(state.directory) instanceof DirectIOIndexInputSupplier did
130131
? did.openInputDirect(fileName, context)
131132
: state.directory.openInput(fileName, context);
132133
boolean success = false;

server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ public IndexInput openInputDirect(String name, IOContext context) throws IOExcep
195195
// we need to do these checks on the outer directory since the inner doesn't know about pending deletes
196196
ensureOpen();
197197
ensureCanRead(name);
198-
198+
Log.debug("Opening {} with direct IO", name);
199199
return directIODelegate.openInput(name, context);
200200
}
201201

0 commit comments

Comments
 (0)