Skip to content

Commit 5cea6f1

Browse files
authored
Add benchmark on collection load time (qdrant#204)
* Recover collection from a snapshot * Push init_time_ms values into postgres * Collect telemetry separately * Add dedicated benchmark strategy * Use new benchmark-server-3 * Run in parallel
1 parent 6bab477 commit 5cea6f1

File tree

8 files changed

+257
-35
lines changed

8 files changed

+257
-35
lines changed

.github/workflows/continuous-benchmark.yaml

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ jobs:
9191
9292
export ENGINE_NAME="qdrant-all-on-disk-scalar-q"
9393
export DATASETS="random-768-100-tenants"
94+
export BENCHMARK_STRATEGY="tenants"
9495
export CONTAINER_MEM_LIMIT=160mb
9596
9697
# Benchmark the dev branch:
@@ -105,6 +106,82 @@ jobs:
105106
- name: Fail job if any of the benches failed
106107
if: steps.benches.outputs.failed == 'error' || steps.benches.outputs.failed == 'timeout'
107108
run: exit 1
109+
- name: Send Notification
110+
if: failure() || cancelled()
111+
uses: slackapi/[email protected]
112+
with:
113+
payload: |
114+
{
115+
"text": "CI tenants benchmarks run status: ${{ job.status }}",
116+
"blocks": [
117+
{
118+
"type": "section",
119+
"text": {
120+
"type": "mrkdwn",
121+
"text": "CI tenants benchmarks failed because of ${{ steps.benches.outputs.failed }}.\nView the results <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|here>"
122+
}
123+
}
124+
]
125+
}
126+
env:
127+
SLACK_WEBHOOK_URL: ${{ secrets.CI_ALERTS_CHANNEL_WEBHOOK_URL }}
128+
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK
129+
runLoadTimeBenchmark:
130+
runs-on: ubuntu-latest
131+
needs: runBenchmark
132+
if: ${{ always() }}
133+
steps:
134+
- uses: actions/checkout@v3
135+
- uses: webfactory/[email protected]
136+
with:
137+
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
138+
- name: Benches
139+
id: benches
140+
run: |
141+
export HCLOUD_TOKEN=${{ secrets.HCLOUD_TOKEN }}
142+
export POSTGRES_PASSWORD=${{ secrets.POSTGRES_PASSWORD }}
143+
export POSTGRES_HOST=${{ secrets.POSTGRES_HOST }}
144+
export SERVER_NAME="benchmark-server-3"
145+
bash -x tools/setup_ci.sh
146+
147+
set +e
148+
149+
# Benchmark collection load time
150+
export BENCHMARK_STRATEGY="collection-reload"
151+
152+
declare -A DATASET_TO_ENGINE
153+
declare -A DATASET_TO_URL
154+
DATASET_TO_ENGINE["all-payloads-default"]="qdrant-continuous-benchmark-snapshot"
155+
DATASET_TO_ENGINE["all-payloads-on-disk"]="qdrant-continuous-benchmark-snapshot"
156+
DATASET_TO_ENGINE["all-payloads-default-sparse"]="qdrant-continuous-benchmark-snapshot"
157+
DATASET_TO_ENGINE["all-payloads-on-disk-sparse"]="qdrant-continuous-benchmark-snapshot"
158+
159+
export STORAGE_URL="https://storage.googleapis.com/qdrant-benchmark-snapshots/all-payloads"
160+
DATASET_TO_URL["all-payloads-default"]="${STORAGE_URL}/benchmark-all-payloads-500k-768-default.snapshot"
161+
DATASET_TO_URL["all-payloads-on-disk"]="${STORAGE_URL}/benchmark-all-payloads-500k-768-on-disk.snapshot"
162+
DATASET_TO_URL["all-payloads-default-sparse"]="${STORAGE_URL}/benchmark-all-payloads-500k-sparse-default.snapshot"
163+
DATASET_TO_URL["all-payloads-on-disk-sparse"]="${STORAGE_URL}/benchmark-all-payloads-500k-sparse-on-disk.snapshot"
164+
165+
set +e
166+
167+
for dataset in "${!DATASET_TO_ENGINE[@]}"; do
168+
export ENGINE_NAME=${DATASET_TO_ENGINE[$dataset]}
169+
export DATASETS=$dataset
170+
export SNAPSHOT_URL=${DATASET_TO_URL[$dataset]}
171+
172+
# Benchmark the dev branch:
173+
export QDRANT_VERSION=ghcr/dev
174+
timeout 30m bash -x tools/run_ci.sh
175+
176+
# Benchmark the master branch:
177+
export QDRANT_VERSION=docker/master
178+
timeout 30m bash -x tools/run_ci.sh
179+
done
180+
181+
set -e
182+
- name: Fail job if any of the benches failed
183+
if: steps.benches.outputs.failed == 'error' || steps.benches.outputs.failed == 'timeout'
184+
run: exit 1
108185
- name: Send Notification
109186
if: failure() || cancelled()
110187
uses: slackapi/[email protected]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
version: '3.7'
2+
3+
services:
4+
qdrant_bench:
5+
image: ${CONTAINER_REGISTRY:-docker.io}/qdrant/qdrant:${QDRANT_VERSION}
6+
container_name: qdrant-continuous
7+
environment:
8+
QDRANT_NUM_CPUS: 4
9+
ports:
10+
- "6333:6333"
11+
- "6334:6334"
12+
volumes:
13+
- qdrant_storage:/qdrant/storage
14+
logging:
15+
driver: "json-file"
16+
options:
17+
max-file: 1
18+
max-size: 10m
19+
deploy:
20+
resources:
21+
limits:
22+
memory: ${CONTAINER_MEM_LIMIT:-25Gb}
23+
24+
volumes:
25+
qdrant_storage:
26+
name: "qdrant_storage"

tools/qdrant_collect_stats.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,7 @@ echo "$RSS_ANON_MEMORY_USAGE" > results/rss-anon-memory-usage-"${CURRENT_DATE}".
2828
ROOT_API_RESPONSE=$(ssh -t "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "curl -s http://localhost:6333/")
2929

3030
echo "$ROOT_API_RESPONSE" > results/root-api-"${CURRENT_DATE}".json
31+
32+
TELEMETRY_API_RESPONSE=$(ssh -t "${SERVER_USERNAME}@${IP_OF_THE_SERVER}" "curl -s http://localhost:6333/telemetry?details_level=10")
33+
34+
echo "$TELEMETRY_API_RESPONSE" > results/telemetry-api-"${CURRENT_DATE}".json

tools/run_ci.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,23 @@ trap 'handle_term' TERM
1818

1919
# Script, that runs benchmark within the GitHub Actions CI environment
2020

21+
BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}
22+
2123
SCRIPT=$(realpath "$0")
2224
SCRIPT_PATH=$(dirname "$SCRIPT")
2325

2426
bash -x "${SCRIPT_PATH}/run_remote_benchmark.sh"
2527

2628
# Upload to postgres
2729
# -t sorts by modification time
28-
export SEARCH_RESULTS_FILE=$(ls -t results/*-search-*.json | head -n 1)
29-
export UPLOAD_RESULTS_FILE=$(ls -t results/*-upload-*.json | head -n 1)
30+
if [[ "$BENCHMARK_STRATEGY" == "collection-reload" ]]; then
31+
export TELEMETRY_API_RESPONSE_FILE=$(ls -t results/telemetry-api-*.json | head -n 1)
32+
else
33+
# any other strategies are considered to have search & upload results
34+
export SEARCH_RESULTS_FILE=$(ls -t results/*-search-*.json | head -n 1)
35+
export UPLOAD_RESULTS_FILE=$(ls -t results/*-upload-*.json | head -n 1)
36+
fi
37+
3038
export VM_RSS_MEMORY_USAGE_FILE=$(ls -t results/vm-rss-memory-usage-*.txt | head -n 1)
3139
export RSS_ANON_MEMORY_USAGE_FILE=$(ls -t results/rss-anon-memory-usage-*.txt | head -n 1)
3240
export ROOT_API_RESPONSE_FILE=$(ls -t results/root-api-*.json | head -n 1)

tools/run_client_script.sh

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,38 @@ BENCH_CLIENT_NAME=${CLIENT_NAME:-"benchmark-client-1"}
1717

1818
IP_OF_THE_CLIENT=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_public_ip.sh" "$BENCH_CLIENT_NAME")
1919

20-
scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment.sh"
21-
2220
ENGINE_NAME=${ENGINE_NAME:-"qdrant-continuous-benchmark"}
2321

2422
DATASETS=${DATASETS:-"laion-small-clip"}
2523

24+
SNAPSHOT_URL=${SNAPSHOT_URL:-""}
25+
2626
PRIVATE_IP_OF_THE_SERVER=$(bash "${SCRIPT_PATH}/${CLOUD_NAME}/get_private_ip.sh" "$BENCH_SERVER_NAME")
2727

28-
RUN_EXPERIMENT="ENGINE_NAME=${ENGINE_NAME} DATASETS=${DATASETS} PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} EXPERIMENT_MODE=${EXPERIMENT_MODE} bash ~/run_experiment.sh"
28+
if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
29+
scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment_snapshot.sh"
30+
31+
RUN_EXPERIMENT="ENGINE_NAME=${ENGINE_NAME} \
32+
DATASETS=${DATASETS} \
33+
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
34+
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
35+
SNAPSHOT_URL=${SNAPSHOT_URL} \
36+
bash ~/run_experiment_snapshot.sh"
37+
38+
ssh -tt -o ServerAliveInterval=120 -o ServerAliveCountMax=10 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"
2939

30-
ssh -tt -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"
40+
else
41+
scp "${SCRIPT_PATH}/run_experiment.sh" "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}:~/run_experiment.sh"
42+
43+
RUN_EXPERIMENT="ENGINE_NAME=${ENGINE_NAME} \
44+
DATASETS=${DATASETS} \
45+
PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER} \
46+
EXPERIMENT_MODE=${EXPERIMENT_MODE} \
47+
bash ~/run_experiment.sh"
48+
49+
ssh -tt -o ServerAliveInterval=60 -o ServerAliveCountMax=3 "${SERVER_USERNAME}@${IP_OF_THE_CLIENT}" "${RUN_EXPERIMENT}"
50+
51+
fi
3152

3253
echo "Gather experiment results..."
3354
result_files_arr=()

tools/run_experiment.sh

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ PRIVATE_IP_OF_THE_SERVER=${PRIVATE_IP_OF_THE_SERVER:-""}
1111

1212
EXPERIMENT_MODE=${EXPERIMENT_MODE:-"full"}
1313

14+
SNAPSHOT_URL=${SNAPSHOT_URL:-""}
15+
1416
if [[ -z "$ENGINE_NAME" ]]; then
1517
echo "ENGINE_NAME is not set"
1618
exit 1
@@ -27,13 +29,21 @@ if [[ -z "$PRIVATE_IP_OF_THE_SERVER" ]]; then
2729
fi
2830

2931
if [[ -z "$EXPERIMENT_MODE" ]]; then
30-
echo "EXPERIMENT_MODE is not set, possible values are: full | upload | search"
32+
echo "EXPERIMENT_MODE is not set, possible values are: full | upload | search | snapshot"
33+
exit 1
34+
fi
35+
36+
if [[ "$EXPERIMENT_MODE" == "snapshot" ]] && [[ -z "$SNAPSHOT_URL" ]]; then
37+
echo "EXPERIMENT_MODE is 'snapshot' but SNAPSHOT_URL is not set"
3138
exit 1
3239
fi
33-
docker container rm -f ci-benchmark-upload || true
34-
docker container rm -f ci-benchmark-search || true
3540

36-
docker rmi --force qdrant/vector-db-benchmark:latest || true
41+
if [[ "$EXPERIMENT_MODE" != "snapshot" ]]; then
42+
docker container rm -f ci-benchmark-upload || true
43+
docker container rm -f ci-benchmark-search || true
44+
45+
docker rmi --force qdrant/vector-db-benchmark:latest || true
46+
fi
3747

3848
if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "upload" ]]; then
3949
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"
@@ -63,3 +73,28 @@ if [[ "$EXPERIMENT_MODE" == "full" ]] || [[ "$EXPERIMENT_MODE" == "search" ]]; t
6373
qdrant/vector-db-benchmark:latest \
6474
python run.py --engines "${ENGINE_NAME}" --datasets "${DATASETS}" --host "${PRIVATE_IP_OF_THE_SERVER}" --no-skip-if-exists --skip-upload
6575
fi
76+
77+
78+
if [[ "$EXPERIMENT_MODE" == "snapshot" ]]; then
79+
echo "EXPERIMENT_MODE=$EXPERIMENT_MODE"
80+
81+
curl -X PUT \
82+
"http://${PRIVATE_IP_OF_THE_SERVER}:6333/collections/benchmark/snapshots/recover" \
83+
--data-raw "{\"location\": \"${SNAPSHOT_URL}\"}"
84+
85+
collection_url="http://${PRIVATE_IP_OF_THE_SERVER}:6333/collections/benchmark"
86+
collection_status=$(curl -s "$collection_url" | jq -r '.result.status')
87+
counter=0
88+
while [[ "$collection_status" != "green" && "$counter" -lt 5 ]]; do
89+
collection_status=$(curl -s "$collection_url" | jq -r '.result.status')
90+
counter=$(expr $counter + 1)
91+
sleep 1
92+
done
93+
94+
if [[ "$collection_status" == "green" ]]; then
95+
echo "Experiment stage: Done"
96+
else
97+
echo "Experiment interrupted: collection is not ready."
98+
exit 1
99+
fi
100+
fi

tools/run_remote_benchmark.sh

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,14 @@ trap 'cleanup' EXIT
3131
#SERVER_NAME=$BENCH_CLIENT_NAME SERVER_TYPE='cpx11' bash -x "${SCRIPT_PATH}/${CLOUD_NAME}/create_and_install.sh"
3232
#wait $SERVER_CREATION_PID
3333

34+
BENCHMARK_STRATEGY=${BENCHMARK_STRATEGY:-"default"}
35+
3436
SERVER_NAME=$BENCH_SERVER_NAME bash -x "${SCRIPT_PATH}/${CLOUD_NAME}/check_ssh_connection.sh"
3537
SERVER_NAME=$BENCH_CLIENT_NAME bash -x "${SCRIPT_PATH}/${CLOUD_NAME}/check_ssh_connection.sh"
3638

37-
if [[ -z "${CONTAINER_MEM_LIMIT:-}" ]]; then
38-
echo "CONTAINER_MEM_LIMIT is not set, run without memory limit"
39+
case "$BENCHMARK_STRATEGY" in
40+
"default")
41+
echo "Default benchmark, no volume, no memory limit"
3942

4043
SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks"}
4144

@@ -44,9 +47,14 @@ if [[ -z "${CONTAINER_MEM_LIMIT:-}" ]]; then
4447
bash -x "${SCRIPT_PATH}/run_client_script.sh"
4548

4649
bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
50+
;;
51+
"tenants")
52+
if [[ -z "${CONTAINER_MEM_LIMIT:-}" ]]; then
53+
echo "Tenants benchmark, but CONTAINER_MEM_LIMIT is not set!"
54+
exit 2
55+
fi
4756

48-
else
49-
echo "CONTAINER_MEM_LIMIT is set, run search with memory limit: ${CONTAINER_MEM_LIMIT}"
57+
echo "Tenants benchmark, run search with memory limit: ${CONTAINER_MEM_LIMIT}"
5058

5159
SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks-with-volume"}
5260

@@ -59,6 +67,26 @@ else
5967
bash -x "${SCRIPT_PATH}/run_client_script.sh" "search"
6068

6169
bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
70+
;;
71+
72+
"collection-reload")
73+
echo "Collection load time benchmark"
74+
75+
SERVER_CONTAINER_NAME=${SERVER_CONTAINER_NAME:-"qdrant-continuous-benchmarks-snapshot"}
76+
77+
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME"
6278

63-
fi
79+
bash -x "${SCRIPT_PATH}/run_client_script.sh" "snapshot"
80+
81+
bash -x "${SCRIPT_PATH}/run_server_container_with_volume.sh" "$SERVER_CONTAINER_NAME" "25Gb" "continue"
82+
83+
sleep 10
84+
85+
bash -x "${SCRIPT_PATH}/qdrant_collect_stats.sh" "$SERVER_CONTAINER_NAME"
86+
;;
6487

88+
*)
89+
echo "Invalid BENCHMARK_STRATEGY value: $BENCHMARK_STRATEGY"
90+
exit 1
91+
;;
92+
esac

0 commit comments

Comments
 (0)