|
| 1 | +import argparse |
| 2 | +import json |
| 3 | +import os |
| 4 | +from pathlib import Path |
| 5 | + |
| 6 | +import pandas as pd |
| 7 | +from tabulate import tabulate |
| 8 | + |
| 9 | +CUR_PATH = Path(__file__).parent.resolve() |
| 10 | +# latency results and the keys that will be printed into markdown |
| 11 | +latency_results = [] |
| 12 | +latency_column_mapping = { |
| 13 | + "test_name": "Test name", |
| 14 | + "avg_latency": "Mean latency (ms)", |
| 15 | + "P50": "Median latency (ms)", |
| 16 | + "P99": "P99 latency (ms)", |
| 17 | +} |
| 18 | + |
| 19 | +# throughput tests and the keys that will be printed into markdown |
| 20 | +throughput_results = [] |
| 21 | +throughput_results_column_mapping = { |
| 22 | + "test_name": "Test name", |
| 23 | + "num_requests": "Num of reqs", |
| 24 | + "total_num_tokens": "Total num of tokens", |
| 25 | + "elapsed_time": "Elapsed time (s)", |
| 26 | + "requests_per_second": "Tput (req/s)", |
| 27 | + "tokens_per_second": "Tput (tok/s)", |
| 28 | +} |
| 29 | + |
| 30 | +# serving results and the keys that will be printed into markdown |
| 31 | +serving_results = [] |
| 32 | +serving_column_mapping = { |
| 33 | + "test_name": "Test name", |
| 34 | + "request_rate": "Request rate (req/s)", |
| 35 | + "request_throughput": "Tput (req/s)", |
| 36 | + "output_throughput": "Output Tput (tok/s)", |
| 37 | + "median_ttft_ms": "TTFT (ms)", |
| 38 | + "median_tpot_ms": "TPOT (ms)", |
| 39 | + "median_itl_ms": "ITL (ms)", |
| 40 | +} |
| 41 | + |
| 42 | + |
| 43 | +def read_markdown(file): |
| 44 | + if os.path.exists(file): |
| 45 | + with open(file) as f: |
| 46 | + return f.read() + "\n" |
| 47 | + else: |
| 48 | + return f"{file} not found.\n" |
| 49 | + |
| 50 | + |
| 51 | +def results_to_json(latency, throughput, serving): |
| 52 | + return json.dumps({ |
| 53 | + 'latency': latency.to_dict(), |
| 54 | + 'throughput': throughput.to_dict(), |
| 55 | + 'serving': serving.to_dict() |
| 56 | + }) |
| 57 | + |
| 58 | + |
| 59 | +if __name__ == "__main__": |
| 60 | + parser = argparse.ArgumentParser( |
| 61 | + description="Process the results of the benchmark tests.") |
| 62 | + parser.add_argument( |
| 63 | + "--results_folder", |
| 64 | + type=str, |
| 65 | + default="../results/", |
| 66 | + help="The folder where the benchmark results are stored.") |
| 67 | + parser.add_argument( |
| 68 | + "--output_folder", |
| 69 | + type=str, |
| 70 | + default="../results/", |
| 71 | + help="The folder where the benchmark results are stored.") |
| 72 | + parser.add_argument("--markdown_template", |
| 73 | + type=str, |
| 74 | + default="./perf_result_template.md", |
| 75 | + help="The template file for the markdown report.") |
| 76 | + parser.add_argument("--tag", |
| 77 | + default="main", |
| 78 | + help="Tag to be used for release message.") |
| 79 | + parser.add_argument("--commit_id", |
| 80 | + default="", |
| 81 | + help="Commit ID to be used for release message.") |
| 82 | + |
| 83 | + args = parser.parse_args() |
| 84 | + results_folder = (CUR_PATH / args.results_folder).resolve() |
| 85 | + output_folder = (CUR_PATH / args.output_folder).resolve() |
| 86 | + markdown_template = (CUR_PATH / args.markdown_template).resolve() |
| 87 | + |
| 88 | + # collect results |
| 89 | + for test_file in results_folder.glob("*.json"): |
| 90 | + |
| 91 | + with open(test_file) as f: |
| 92 | + raw_result = json.loads(f.read()) |
| 93 | + |
| 94 | + if "serving" in str(test_file): |
| 95 | + # this result is generated via `benchmark_serving.py` |
| 96 | + |
| 97 | + # update the test name of this result |
| 98 | + raw_result.update({"test_name": test_file.stem}) |
| 99 | + |
| 100 | + # add the result to raw_result |
| 101 | + serving_results.append(raw_result) |
| 102 | + continue |
| 103 | + |
| 104 | + elif "latency" in f.name: |
| 105 | + # this result is generated via `benchmark_latency.py` |
| 106 | + |
| 107 | + # update the test name of this result |
| 108 | + raw_result.update({"test_name": test_file.stem}) |
| 109 | + |
| 110 | + # get different percentiles |
| 111 | + for perc in [10, 25, 50, 75, 90, 99]: |
| 112 | + # Multiply 1000 to convert the time unit from s to ms |
| 113 | + raw_result.update( |
| 114 | + {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}) |
| 115 | + raw_result["avg_latency"] = raw_result["avg_latency"] * 1000 |
| 116 | + |
| 117 | + # add the result to raw_result |
| 118 | + latency_results.append(raw_result) |
| 119 | + continue |
| 120 | + |
| 121 | + elif "throughput" in f.name: |
| 122 | + # this result is generated via `benchmark_throughput.py` |
| 123 | + |
| 124 | + # update the test name of this result |
| 125 | + raw_result.update({"test_name": test_file.stem}) |
| 126 | + |
| 127 | + # add the result to raw_result |
| 128 | + throughput_results.append(raw_result) |
| 129 | + continue |
| 130 | + |
| 131 | + print(f"Skipping {test_file}") |
| 132 | + serving_results.sort(key=lambda x: (len(x['test_name']), x['test_name'])) |
| 133 | + |
| 134 | + latency_results = pd.DataFrame.from_dict(latency_results) |
| 135 | + serving_results = pd.DataFrame.from_dict(serving_results) |
| 136 | + throughput_results = pd.DataFrame.from_dict(throughput_results) |
| 137 | + |
| 138 | + raw_results_json = results_to_json(latency_results, throughput_results, |
| 139 | + serving_results) |
| 140 | + |
| 141 | + # remapping the key, for visualization purpose |
| 142 | + if not latency_results.empty: |
| 143 | + latency_results = latency_results[list( |
| 144 | + latency_column_mapping.keys())].rename( |
| 145 | + columns=latency_column_mapping) |
| 146 | + if not serving_results.empty: |
| 147 | + serving_results = serving_results[list( |
| 148 | + serving_column_mapping.keys())].rename( |
| 149 | + columns=serving_column_mapping) |
| 150 | + if not throughput_results.empty: |
| 151 | + throughput_results = throughput_results[list( |
| 152 | + throughput_results_column_mapping.keys())].rename( |
| 153 | + columns=throughput_results_column_mapping) |
| 154 | + |
| 155 | + processed_results_json = results_to_json(latency_results, |
| 156 | + throughput_results, |
| 157 | + serving_results) |
| 158 | + |
| 159 | + # get markdown tables |
| 160 | + latency_md_table = tabulate(latency_results, |
| 161 | + headers='keys', |
| 162 | + tablefmt='pipe', |
| 163 | + showindex=False) |
| 164 | + serving_md_table = tabulate(serving_results, |
| 165 | + headers='keys', |
| 166 | + tablefmt='pipe', |
| 167 | + showindex=False) |
| 168 | + throughput_md_table = tabulate(throughput_results, |
| 169 | + headers='keys', |
| 170 | + tablefmt='pipe', |
| 171 | + showindex=False) |
| 172 | + |
| 173 | + # document the result |
| 174 | + print(output_folder) |
| 175 | + with open(output_folder / "benchmark_results.md", "w") as f: |
| 176 | + |
| 177 | + results = read_markdown(markdown_template) |
| 178 | + results = results.format( |
| 179 | + latency_tests_markdown_table=latency_md_table, |
| 180 | + throughput_tests_markdown_table=throughput_md_table, |
| 181 | + serving_tests_markdown_table=serving_md_table, |
| 182 | + benchmarking_results_in_json_string=processed_results_json) |
| 183 | + f.write(results) |
0 commit comments