Skip to content

Commit c0298b2

Browse files
committed
update experiment scripts
1 parent e3a8d4d commit c0298b2

File tree

7 files changed

+61
-34
lines changed

7 files changed

+61
-34
lines changed

benchmarks/autoscaling/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ For example,
4343
There are two plots that you can plot.
4444

4545
### Generating report
46-
`python <aibrix_root_repo>/benchmarks/plot/plot-everything.py <experiment_home_dir>`
46+
`python plot-everything.py <experiment_home_dir>`
4747

4848
For example,
49-
`python <aibrix_root_repo>/benchmarks/plot/plot-everything.py experiment_results/25min_test`
49+
`python plot-everything.py experiment_results/25min_test`
5050

5151
The directories should look like
5252
```bash

benchmarks/autoscaling/plot-everything.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python3
22
import os
33
import json
4+
import re
45
import pandas as pd
56
import matplotlib.pyplot as plt
67
import numpy as np
@@ -24,10 +25,15 @@ def parse_experiment_output(lines):
2425
continue
2526
try:
2627
data = json.loads(line.strip())
27-
required_fields = ['status_code', 'start_time', 'end_time', 'latency', 'throughput',
28+
# required_fields = ['status_code', 'start_time', 'end_time', 'latency', 'throughput',
29+
# 'prompt_tokens', 'output_tokens', 'total_tokens', 'input', 'output']
30+
required_fields = ['start_time', 'end_time', 'latency', 'throughput',
2831
'prompt_tokens', 'output_tokens', 'total_tokens', 'input', 'output']
2932
if any(field not in data for field in required_fields):
33+
missingfields = [field not in data for field in required_fields]
34+
print(missingfields)
3035
continue
36+
data['status_code'] = 200
3137
results.append(data)
3238
except json.JSONDecodeError:
3339
continue
@@ -52,15 +58,27 @@ def parse_experiment_output(lines):
5258

5359
def get_autoscaler_name(output_dir):
5460
autoscaling = None
55-
with open(f"{output_dir}/output.txt", 'r', encoding='utf-8') as f_:
56-
lines = f_.readlines()
57-
for line in lines:
58-
if "autoscaler" in line:
59-
autoscaling = line.split(":")[-1].strip()
60-
break
61+
print(f"output_dir: {output_dir}")
62+
# Extract the last part of the path after the last slash
63+
filename = output_dir.split("/")[-1]
64+
65+
# Regular expression to match the autoscaler name
66+
match = re.search(r"^[^-]+-[^-]+-([^-]+(?:-[^-]+)*)-\d{8}-\d{6}$", filename)
67+
68+
if match:
69+
print(match)
70+
autoscaling = match.group(1)
71+
72+
# with open(f"{output_dir}/output.txt", 'r', encoding='utf-8') as f_:
73+
# lines = f_.readlines()
74+
# for line in lines:
75+
# if "autoscaler" in line:
76+
# autoscaling = line.split(":")[-1].strip()
77+
# break
6178
if autoscaling == None:
6279
print(f"Invalid parsed autoscaling name: {autoscaling}")
6380
assert False
81+
print(autoscaling)
6482
return autoscaling.upper()
6583

6684
def parse_performance_stats(file_content):

benchmarks/autoscaling/run-test.sh

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ autoscaler=$2
55
aibrix_repo=$3 # root dir of aibrix repo
66
api_key=$4 # set your api key
77
kube_context=$5
8+
workload_type=$6
9+
810
k8s_yaml_dir="deepseek-llm-7b-chat"
911
target_deployment="deepseek-llm-7b-chat" # "aibrix-model-deepseek-llm-7b-chat"
1012
target_ai_model=deepseek-llm-7b-chat
@@ -38,7 +40,7 @@ fi
3840

3941
# Setup experiment directory
4042
workload_name=$(echo $input_workload_path | tr '/' '\n' | grep .jsonl | cut -d '.' -f 1)
41-
experiment_result_dir="experiment_results/${workload_name}-${autoscaler}-$(date +%Y%m%d-%H%M%S)"
43+
experiment_result_dir="experiment_results/${workload_type}/${workload_name}-${workload_type}-${autoscaler}-$(date +%Y%m%d-%H%M%S)"
4244
if [ ! -d ${experiment_result_dir} ]; then
4345
echo "output directory does not exist. Create the output directory (${experiment_result_dir})"
4446
mkdir -p ${experiment_result_dir}
@@ -83,9 +85,9 @@ kubectl rollout restart deploy ${target_deployment} -n default
8385
sleep_before_pod_check=20
8486
echo "Sleep for ${sleep_before_pod_check} seconds after restarting deployment"
8587
sleep ${sleep_before_pod_check}
86-
python3 ${aibrix_repo}/benchmarks/utils/check_k8s_is_ready.py ${target_deployment}
87-
python3 ${aibrix_repo}/benchmarks/utils/check_k8s_is_ready.py aibrix-controller-manager
88-
python3 ${aibrix_repo}/benchmarks/utils/check_k8s_is_ready.py aibrix-gateway-plugins
88+
python3 ${aibrix_repo}/benchmarks/utils/check_k8s_is_ready.py ${target_deployment} ${kube_context}
89+
python3 ${aibrix_repo}/benchmarks/utils/check_k8s_is_ready.py aibrix-controller-manager ${kube_context}
90+
python3 ${aibrix_repo}/benchmarks/utils/check_k8s_is_ready.py aibrix-gateway-plugins ${kube_context}
8991

9092
# Start pod log monitoring
9193
pod_log_dir="${experiment_result_dir}/pod_logs"
@@ -95,7 +97,7 @@ mkdir -p ${pod_log_dir}
9597
cp ${input_workload_path} ${experiment_result_dir}
9698

9799
# Start pod counter. It will run on background until the end of the experiment.
98-
python3 ${aibrix_repo}/benchmarks/utils/count_num_pods.py ${target_deployment} ${experiment_result_dir} &
100+
python3 ${aibrix_repo}/benchmarks/utils/count_num_pods.py ${target_deployment} ${experiment_result_dir} ${kube_context} &
99101
COUNT_NUM_POD_PID=$!
100102
echo "started count_num_pods.py with PID: $COUNT_NUM_POD_PID"
101103

@@ -106,13 +108,13 @@ python3 ${aibrix_repo}/benchmarks/utils/streaming_pod_log_to_file.py aibrix-gate
106108

107109
# Run experiment!!!
108110
output_jsonl_path=${experiment_result_dir}/output.jsonl
109-
python3 ${aibrix_repo}/benchmarks/generator/client.py \
111+
python3 ${aibrix_repo}/benchmarks/client/client.py \
110112
--workload-path ${input_workload_path} \
111-
--endpoint "localhost:8888" \
113+
--endpoint "http://localhost:8888" \
112114
--model ${target_ai_model} \
113115
--api-key ${api_key} \
114-
--output-dir ${experiment_result_dir} \
115-
--output-file-path ${output_jsonl_path}
116+
--output-file-path ${output_jsonl_path} \
117+
#--output-dir ${experiment_result_dir} \
116118

117119
echo "Experiment is done. date: $(date)"
118120

@@ -124,7 +126,7 @@ sleep 1
124126

125127
# Cleanup
126128
kubectl delete podautoscaler --all --all-namespaces
127-
python3 ${aibrix_repo}/benchmarks/utils/set_num_replicas.py --deployment ${target_deployment} --replicas 1
129+
python3 ${aibrix_repo}/benchmarks/utils/set_num_replicas.py --deployment ${target_deployment} --replicas 1 --context ${kube_context}
128130
kubectl delete -f ${k8s_yaml_dir}/deploy.yaml
129131

130132
# Stop monitoring processes

benchmarks/autoscaling/run.sh

100644100755
Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#!/bin/bash
2-
2+
set -x
33
#./run.sh workload/workload/25min_up_and_down/25min_up_and_down.jsonl
44

55
export KUBECONFIG=~/.kube/config-vke
6-
export aibrix_repo="/root/aibrix"
6+
export aibrix_repo="/root/aibrix-local"
77
export api_key="sk-kFJ12nKsFVfVmGpj3QzX65s4RbN2xJqWzPYCjYu7wT3BlbLi"
88
export kube_context="ccr3aths9g2gqedu8asdg@35122069-kcu0n2lfb7pjdd83330h0"
99

@@ -23,11 +23,16 @@ do
2323
echo "started experiment at $(date)"
2424
echo autoscaler: ${autoscaler}
2525
echo workload: ${workload_path}
26-
echo "The stdout/stderr is being logged in ./output.txt"
27-
./run-test.sh ${workload_path} ${autoscaler} ${aibrix_repo} ${api_key} ${kube_context} &> output-${WORKLOAD_TYPE}.txt
26+
echo "The stdout/stderr is being logged in output-${WORKLOAD_TYPE}.txt"
27+
./run-test.sh ${workload_path} ${autoscaler} ${aibrix_repo} ${api_key} ${kube_context} ${WORKLOAD_TYPE} > output-${WORKLOAD_TYPE}.txt 2>&1
2828
end_time=$(date +%s)
2929
echo "Done: Time taken: $((end_time-start_time)) seconds"
3030
echo "--------------------------------"
3131
sleep 10
3232
done
33-
done
33+
done
34+
35+
# for WORKLOAD_TYPE in "T_HighSlow_I_HighSlow_O_HighFast" "T_HighSlow_I_HighSlow_O_HighSlow" "T_HighSlow_I_LowFast_O_HighSlow" "T_HighSlow_I_LowSlow_O_HighSlow"
36+
# do
37+
# python plot-everything.py experiment_results/${WORKLOAD_TYPE}
38+
# done

benchmarks/utils/check_k8s_is_ready.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def wait_for_all_podautoscaler_ready(namespace="default"):
5858

5959
if __name__ == "__main__":
6060
target_deployment = sys.argv[1]
61-
config.load_kube_config(context="ccr3aths9g2gqedu8asdg@41073177-kcu0mslcp5mhjsva38rpg")
61+
kube_context = sys.argv[2]
62+
config.load_kube_config(context=kube_context)
6263
wait_for_pods_ready(target_deployment)
6364
print("All pods are ready")

benchmarks/utils/count_num_pods.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import os
77
import asyncio
88

9-
def get_pod_status_counts(deployment_name, namespace="default"):
10-
config.load_kube_config(context="ccr3aths9g2gqedu8asdg@41073177-kcu0mslcp5mhjsva38rpg")
9+
def get_pod_status_counts(deployment_name, kube_context, namespace="default"):
10+
config.load_kube_config(context=kube_context)
1111
v1 = client.CoreV1Api()
1212
pods = v1.list_namespaced_pod(namespace)
1313
filtered_pods = [pod for pod in pods.items if deployment_name in pod.metadata.name]
@@ -39,12 +39,13 @@ def main():
3939
parser = argparse.ArgumentParser()
4040
parser.add_argument("deployment", help="Deployment name")
4141
parser.add_argument("output_dir", help="Output directory")
42+
parser.add_argument("kube_context", help="Kube context")
4243
args = parser.parse_args()
4344

4445
filename = f"{args.output_dir}/pod_count.csv"
4546
idx = 0
4647
while True:
47-
status_counts = get_pod_status_counts(args.deployment)
48+
status_counts = get_pod_status_counts(args.deployment, args.kube_context)
4849
write_to_csv(args.deployment, status_counts, filename, idx)
4950
time.sleep(1)
5051
idx += 1

benchmarks/utils/streaming_pod_log_to_file.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,12 @@ def signal_handler(sig, frame):
5757
target_deployment = sys.argv[1]
5858
namespace = sys.argv[2]
5959
pod_log_dir = sys.argv[3]
60-
include = sys.argv[4]
61-
exclude = sys.argv[5]
62-
if include == "none":
63-
include = None
64-
if exclude == "none":
65-
exclude = None
60+
include = None
61+
exclude = None
62+
if len(sys.argv) > 4:
63+
include = sys.argv[4]
64+
if len(sys.argv) > 5:
65+
exclude = sys.argv[5]
6666

6767
running_processes = []
6868
signal.signal(signal.SIGINT, signal_handler)

0 commit comments

Comments
 (0)