Skip to content

Commit fe2c7fa

Browse files
committed
Fix missing basedirs implementation for multilevel, add tests
1 parent 43089ab commit fe2c7fa

File tree

2 files changed

+145
-7
lines changed

2 files changed

+145
-7
lines changed

src/cache/multilevel.rs

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -286,31 +286,42 @@ pub struct MultiLevelStorage {
286286
write_policy: WritePolicy,
287287
/// Lock-free atomic statistics per level
288288
atomic_stats: Vec<Arc<AtomicLevelStats>>,
289+
/// Base directories for path normalization, propagated to compiler pipeline
290+
basedirs: Vec<Vec<u8>>,
289291
}
290292

291293
impl MultiLevelStorage {
294+
/// Collect and deduplicate basedirs from all cache levels.
295+
fn collect_basedirs(levels: &[Arc<dyn Storage>]) -> Vec<Vec<u8>> {
296+
let mut seen = Vec::new();
297+
for level in levels {
298+
for basedir in level.basedirs() {
299+
if !seen.contains(basedir) {
300+
seen.push(basedir.clone());
301+
}
302+
}
303+
}
304+
seen
305+
}
306+
292307
/// Create a new multi-level storage from a list of storage backends.
293308
///
294309
/// Levels are checked in order (L0, L1, L2, ...) during reads.
295310
/// All levels receive writes in parallel.
296311
pub fn new(levels: Vec<Arc<dyn Storage>>) -> Self {
297-
let atomic_stats = AtomicLevelStats::from_levels(&levels);
298-
299-
MultiLevelStorage {
300-
levels,
301-
write_policy: WritePolicy::default(),
302-
atomic_stats,
303-
}
312+
Self::with_write_policy(levels, WritePolicy::default())
304313
}
305314

306315
/// Create a new multi-level storage with explicit write policy.
307316
pub fn with_write_policy(levels: Vec<Arc<dyn Storage>>, write_policy: WritePolicy) -> Self {
308317
let atomic_stats = AtomicLevelStats::from_levels(&levels);
318+
let basedirs = Self::collect_basedirs(&levels);
309319

310320
MultiLevelStorage {
311321
levels,
312322
write_policy,
313323
atomic_stats,
324+
basedirs,
314325
}
315326
}
316327

@@ -837,6 +848,10 @@ impl Storage for MultiLevelStorage {
837848
.unwrap_or_default()
838849
}
839850

851+
fn basedirs(&self) -> &[Vec<u8>] {
852+
&self.basedirs
853+
}
854+
840855
async fn get_preprocessor_cache_entry(
841856
&self,
842857
key: &str,

tests/integration/scripts/test-basedirs.sh

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,129 @@ test_backend "webdav" \
116116
"SCCACHE_WEBDAV_USERNAME=bar" \
117117
"SCCACHE_WEBDAV_PASSWORD=baz"
118118

119+
# Function to test basedirs with multi-level cache (disk + remote)
120+
# Tests that basedirs normalization works across cache levels and backfill
121+
test_multilevel_backend() {
122+
local backend_name="$1"
123+
local level_name="$backend_name"
124+
if [ "$backend_name" = "azblob" ]; then
125+
level_name="azure"
126+
fi
127+
shift
128+
cp -r /sccache/tests/integration/basedirs-autotools /build/dir1
129+
cp -r /sccache/tests/integration/basedirs-autotools /build/dir2
130+
131+
echo ""
132+
echo "=========================================="
133+
echo "Testing multilevel basedirs: disk + $backend_name"
134+
echo "=========================================="
135+
136+
# Stop any running sccache server
137+
"$SCCACHE" --stop-server 2>/dev/null || true
138+
139+
# Set backend-specific environment variables (passed as arguments)
140+
for env_var in "$@"; do
141+
export "${env_var?}"
142+
done
143+
144+
# Configure basedirs and multi-level cache
145+
export SCCACHE_BASEDIRS="/build/dir1:/build/dir2"
146+
export SCCACHE_MULTILEVEL_CHAIN="disk,$level_name"
147+
export SCCACHE_DIR="/build/sccache-ml-basedirs"
148+
rm -rf /build/sccache-ml-basedirs
149+
mkdir -p /build/sccache-ml-basedirs
150+
151+
# Start sccache server
152+
"$SCCACHE" --start-server
153+
154+
# Verify multi-level is active
155+
STATS_JSON=$("$SCCACHE" --show-stats --stats-format=json)
156+
CACHE_LOCATION=$(echo "$STATS_JSON" | python3 -c "import sys, json; print(json.load(sys.stdin).get('cache_location', ''))" || echo "unknown")
157+
echo "Cache location: $CACHE_LOCATION"
158+
159+
if ! echo "$CACHE_LOCATION" | grep -qi "Multi-level"; then
160+
echo "✗ FAIL: Multi-level cache not detected in cache_location: $CACHE_LOCATION"
161+
exit 1
162+
fi
163+
164+
echo "Test 1: Compile from first directory (cache miss, populates L0 disk + L1 $backend_name)"
165+
autotools /build/dir1
166+
167+
STATS_JSON=$("$SCCACHE" --show-stats --stats-format=json)
168+
FIRST_MISSES=$(echo "$STATS_JSON" | python3 -c "import sys, json; stats = json.load(sys.stdin).get('stats', {}); print(stats.get('cache_misses', {}).get('counts', {}).get('C/C++', 0))")
169+
echo "Cache misses after first build: $FIRST_MISSES"
170+
171+
echo ""
172+
echo "Test 2: Compile from second directory (cache hit expected via basedirs)"
173+
autotools /build/dir2
174+
175+
STATS_JSON=$("$SCCACHE" --show-stats --stats-format=json)
176+
CACHE_HITS=$(echo "$STATS_JSON" | python3 -c "import sys, json; stats = json.load(sys.stdin).get('stats', {}); print(stats.get('cache_hits', {}).get('counts', {}).get('C/C++', 0))")
177+
SECOND_MISSES=$(echo "$STATS_JSON" | python3 -c "import sys, json; stats = json.load(sys.stdin).get('stats', {}); print(stats.get('cache_misses', {}).get('counts', {}).get('C/C++', 0))")
178+
echo "Cache hits: $CACHE_HITS, misses: $SECOND_MISSES (first build: $FIRST_MISSES)"
179+
180+
if [ "$FIRST_MISSES" != "$SECOND_MISSES" ]; then
181+
echo "✗ FAIL: multilevel disk+$backend_name - Cache misses increased from $FIRST_MISSES to $SECOND_MISSES"
182+
echo "$STATS_JSON" | python3 -m json.tool
183+
exit 1
184+
fi
185+
186+
echo ""
187+
echo "Test 3: Clear L0 (disk), rebuild from dir1 (should hit L1 $backend_name and backfill)"
188+
"$SCCACHE" --stop-server 2>/dev/null || true
189+
rm -rf /build/sccache-ml-basedirs
190+
mkdir -p /build/sccache-ml-basedirs
191+
rm -rf /build/dir1
192+
cp -r /sccache/tests/integration/basedirs-autotools /build/dir1
193+
"$SCCACHE" --start-server
194+
195+
autotools /build/dir1
196+
197+
STATS_JSON=$("$SCCACHE" --show-stats --stats-format=json)
198+
THIRD_MISSES=$(echo "$STATS_JSON" | python3 -c "import sys, json; stats = json.load(sys.stdin).get('stats', {}); print(stats.get('cache_misses', {}).get('counts', {}).get('C/C++', 0))")
199+
echo "Cache misses after L0 clear and rebuild: $THIRD_MISSES (should be 0)"
200+
201+
if [ "$THIRD_MISSES" -gt 0 ]; then
202+
echo "✗ FAIL: multilevel disk+$backend_name - Misses after L0 clear ($THIRD_MISSES), L1 should have served data"
203+
echo "$STATS_JSON" | python3 -m json.tool
204+
exit 1
205+
fi
206+
207+
# Give backfill time to complete
208+
sleep 2
209+
210+
echo ""
211+
echo "Test 4: Rebuild from dir2 (should hit backfilled L0 via basedirs)"
212+
rm -rf /build/dir2
213+
cp -r /sccache/tests/integration/basedirs-autotools /build/dir2
214+
autotools /build/dir2
215+
216+
STATS_JSON=$("$SCCACHE" --show-stats --stats-format=json)
217+
FOURTH_MISSES=$(echo "$STATS_JSON" | python3 -c "import sys, json; stats = json.load(sys.stdin).get('stats', {}); print(stats.get('cache_misses', {}).get('counts', {}).get('C/C++', 0))")
218+
219+
if [ "$FOURTH_MISSES" -gt 0 ]; then
220+
echo "✗ FAIL: multilevel disk+$backend_name - Misses on build 4, basedirs + backfill should provide hits"
221+
echo "$STATS_JSON" | python3 -m json.tool
222+
exit 1
223+
fi
224+
225+
echo "✓ PASS: multilevel disk+$backend_name - Basedirs + multilevel + backfill all working"
226+
227+
# Clean up for next test
228+
rm -rf /build/dir1 /build/dir2 /build/sccache-ml-basedirs
229+
"$SCCACHE" --stop-server &>/dev/null || true
230+
231+
# Unset environment variables
232+
for env_var in "$@"; do
233+
VAR_NAME="${env_var%%=*}"
234+
unset "$VAR_NAME"
235+
done
236+
unset SCCACHE_BASEDIRS SCCACHE_MULTILEVEL_CHAIN SCCACHE_DIR
237+
}
238+
239+
# Test multilevel basedirs with redis
240+
test_multilevel_backend "redis" "SCCACHE_REDIS_ENDPOINT=tcp://redis:6379"
241+
119242
echo ""
120243
echo "=========================================="
121244
echo "All basedir tests completed successfully!"

0 commit comments

Comments
 (0)