Skip to content

Commit 4a4c463

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 7fd455c commit 4a4c463

File tree

2 files changed

+133
-107
lines changed

2 files changed

+133
-107
lines changed

ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py

Lines changed: 89 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1-
import os
2-
import subprocess
3-
import json
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
44
import argparse
5-
import yaml
65
import glob
6+
import json
7+
import os
78
import shutil
9+
import subprocess
810
import sys
11+
12+
import yaml
913
from generate_helm_values import generate_helm_values
1014

15+
1116
def run_kubectl_command(command):
1217
"""Run a kubectl command and return the output."""
1318
try:
@@ -17,12 +22,14 @@ def run_kubectl_command(command):
1722
print(f"Error running command: {command}\n{e.stderr}")
1823
exit(1)
1924

25+
2026
def get_all_nodes():
2127
"""Get the list of all nodes in the Kubernetes cluster."""
2228
command = ["kubectl", "get", "nodes", "-o", "json"]
2329
output = run_kubectl_command(command)
2430
nodes = json.loads(output)
25-
return [node['metadata']['name'] for node in nodes['items']]
31+
return [node["metadata"]["name"] for node in nodes["items"]]
32+
2633

2734
def add_label_to_node(node_name, label):
2835
"""Add a label to the specified node."""
@@ -31,6 +38,7 @@ def add_label_to_node(node_name, label):
3138
run_kubectl_command(command)
3239
print(f"Label {label} added to node {node_name} successfully.")
3340

41+
3442
def add_labels_to_nodes(node_count=None, label=None, node_names=None):
3543
"""Add a label to the specified number of nodes or to specified nodes."""
3644

@@ -49,9 +57,10 @@ def add_labels_to_nodes(node_count=None, label=None, node_names=None):
4957
for node_name in selected_nodes:
5058
add_label_to_node(node_name, label)
5159

60+
5261
def clear_labels_from_nodes(label, node_names=None):
5362
"""Clear the specified label from specific nodes if provided, otherwise from all nodes."""
54-
label_key = label.split('=')[0] # Extract key from 'key=value' format
63+
label_key = label.split("=")[0] # Extract key from 'key=value' format
5564

5665
# If specific nodes are provided, use them; otherwise, get all nodes
5766
nodes_to_clear = node_names if node_names else get_all_nodes()
@@ -63,7 +72,7 @@ def clear_labels_from_nodes(label, node_names=None):
6372
node_metadata = json.loads(node_info)
6473

6574
# Check if the label exists on this node
66-
labels = node_metadata['metadata'].get('labels', {})
75+
labels = node_metadata["metadata"].get("labels", {})
6776
if label_key in labels:
6877
# Remove the label from the node
6978
command = ["kubectl", "label", "node", node_name, f"{label_key}-"]
@@ -73,6 +82,7 @@ def clear_labels_from_nodes(label, node_names=None):
7382
else:
7483
print(f"Label {label_key} not found on node {node_name}, skipping.")
7584

85+
7686
def add_helm_repo(repo_name, repo_url):
7787
# Add the repo if it does not exist
7888
add_command = ["helm", "repo", "add", repo_name, repo_url]
@@ -82,6 +92,7 @@ def add_helm_repo(repo_name, repo_url):
8292
except subprocess.CalledProcessError as e:
8393
print(f"Failed to add Helm repo {repo_name}: {e}")
8494

95+
8596
def delete_helm_repo(repo_name):
8697
"""Delete Helm repo if it exists."""
8798
command = ["helm", "repo", "remove", repo_name]
@@ -91,29 +102,29 @@ def delete_helm_repo(repo_name):
91102
except subprocess.CalledProcessError:
92103
print(f"Failed to delete Helm repo {repo_name}. It may not exist.")
93104

105+
94106
def configmap_exists(name, namespace):
95107
"""Check if a ConfigMap exists in the specified namespace."""
96108
check_command = ["kubectl", "get", "configmap", name, "-n", namespace]
97-
result = subprocess.run(
98-
check_command,
99-
check=False,
100-
stdout=subprocess.DEVNULL,
101-
stderr=subprocess.DEVNULL
102-
)
109+
result = subprocess.run(check_command, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
103110
return result.returncode == 0
104111

112+
105113
def create_configmap(name, namespace, data):
106114
"""Create a ConfigMap if it does not already exist."""
107115
if configmap_exists(name, namespace):
108116
print(f"ConfigMap '{name}' already exists in namespace '{namespace}', skipping creation.")
109117
else:
110-
create_command = ["kubectl", "create", "configmap", name] + [
111-
f"--from-literal={k}={v}" for k, v in data.items()
112-
] + ["-n", namespace]
118+
create_command = (
119+
["kubectl", "create", "configmap", name]
120+
+ [f"--from-literal={k}={v}" for k, v in data.items()]
121+
+ ["-n", namespace]
122+
)
113123
print(f"Creating ConfigMap '{name}' in namespace '{namespace}'...")
114124
subprocess.run(create_command, check=True)
115125
print(f"ConfigMap '{name}' created successfully.")
116126

127+
117128
def delete_configmap(name, namespace):
118129
"""Delete a ConfigMap if it exists."""
119130
if configmap_exists(name, namespace):
@@ -124,9 +135,9 @@ def delete_configmap(name, namespace):
124135
else:
125136
print(f"ConfigMap '{name}' does not exist in namespace '{namespace}', skipping deletion.")
126137

138+
127139
def install_helm_release(release_name, chart_name, namespace, values_file, device_type):
128-
"""
129-
Deploy a Helm release with a specified name and chart.
140+
"""Deploy a Helm release with a specified name and chart.
130141
131142
Parameters:
132143
- release_name: The name of the Helm release.
@@ -162,7 +173,7 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
162173
subprocess.run(["helm", "pull", chart_name, "--untar"], check=True)
163174

164175
# Determine the directory name (get the actual chart_name if chart_name is in the format 'repo_name/chart_name', else use chart_name directly)
165-
chart_dir_name = chart_name.split('/')[-1] if '/' in chart_name else chart_name
176+
chart_dir_name = chart_name.split("/")[-1] if "/" in chart_name else chart_name
166177

167178
# Find the untarred directory (assumes only one directory matches chart_dir_name)
168179
untar_dirs = glob.glob(f"{chart_dir_name}*")
@@ -175,10 +186,7 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
175186
return
176187

177188
# Prepare the Helm install command
178-
command = [
179-
"helm", "install", release_name, chart_name,
180-
"--namespace", namespace
181-
]
189+
command = ["helm", "install", release_name, chart_name, "--namespace", namespace]
182190

183191
# Append additional values file for gaudi if it exists
184192
if hw_values_file:
@@ -201,6 +209,7 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
201209
shutil.rmtree(untar_dir)
202210
print("Temporary directory removed successfully.")
203211

212+
204213
def uninstall_helm_release(release_name, namespace=None):
205214
"""Uninstall a Helm release and clean up resources, optionally delete the namespace if not 'default'."""
206215
# Default to 'default' namespace if none is specified
@@ -229,48 +238,61 @@ def uninstall_helm_release(release_name, namespace=None):
229238
except subprocess.CalledProcessError as e:
230239
print(f"Error occurred while uninstalling Helm release or deleting namespace: {e}")
231240

241+
232242
def main():
233243
parser = argparse.ArgumentParser(description="Manage Helm Deployment.")
234-
parser.add_argument("--release-name", type=str, default="chatqna",
235-
help="The Helm release name created during deployment (default: chatqna).")
236-
parser.add_argument("--chart-name", type=str, default="opea/chatqna",
237-
help="The chart name to deploy, composed of repo name and chart name (default: opea/chatqna).")
238-
parser.add_argument("--namespace", default="default",
239-
help="Kubernetes namespace (default: default).")
240-
parser.add_argument("--hf-token",
241-
help="Hugging Face API token.")
242-
parser.add_argument("--model-dir",
243-
help="Model directory, mounted as volumes for service access to pre-downloaded models")
244-
parser.add_argument("--repo-name", default="opea",
245-
help="Helm repo name to add/delete (default: opea).")
246-
parser.add_argument("--repo-url", default="https://opea-project.github.io/GenAIInfra",
247-
help="Helm repository URL (default: https://opea-project.github.io/GenAIInfra).")
248-
parser.add_argument("--user-values",
249-
help="Path to a user-specified values.yaml file.")
250-
parser.add_argument("--create-values-only", action="store_true",
251-
help="Only create the values.yaml file without deploying.")
252-
parser.add_argument("--uninstall", action="store_true",
253-
help="Uninstall the Helm release.")
254-
parser.add_argument("--num-nodes", type=int, default=1,
255-
help="Number of nodes to use (default: 1).")
256-
parser.add_argument("--node-names", nargs='*',
257-
help="Optional specific node names to label.")
258-
parser.add_argument("--add-label", action="store_true",
259-
help="Add label to specified nodes if this flag is set.")
260-
parser.add_argument("--delete-label", action="store_true",
261-
help="Delete label from specified nodes if this flag is set.")
262-
parser.add_argument("--label", default="node-type=opea-benchmark",
263-
help="Label to add/delete (default: node-type=opea-benchmark).")
264-
parser.add_argument("--with-rerank", action="store_true",
265-
help="Include rerank service in the deployment.")
266-
parser.add_argument("--tuned", action="store_true",
267-
help="Modify resources for services and change extraCmdArgs when creating values.yaml.")
268-
parser.add_argument("--add-repo", action="store_true",
269-
help="Add the Helm repo specified by --repo-url.")
270-
parser.add_argument("--delete-repo", action="store_true",
271-
help="Delete the Helm repo specified by --repo-name.")
272-
parser.add_argument("--device-type", type=str, choices=["cpu", "gaudi"], default="gaudi",
273-
help="Specify the device type for deployment (choices: 'cpu', 'gaudi'; default: gaudi).")
244+
parser.add_argument(
245+
"--release-name",
246+
type=str,
247+
default="chatqna",
248+
help="The Helm release name created during deployment (default: chatqna).",
249+
)
250+
parser.add_argument(
251+
"--chart-name",
252+
type=str,
253+
default="opea/chatqna",
254+
help="The chart name to deploy, composed of repo name and chart name (default: opea/chatqna).",
255+
)
256+
parser.add_argument("--namespace", default="default", help="Kubernetes namespace (default: default).")
257+
parser.add_argument("--hf-token", help="Hugging Face API token.")
258+
parser.add_argument(
259+
"--model-dir", help="Model directory, mounted as volumes for service access to pre-downloaded models"
260+
)
261+
parser.add_argument("--repo-name", default="opea", help="Helm repo name to add/delete (default: opea).")
262+
parser.add_argument(
263+
"--repo-url",
264+
default="https://opea-project.github.io/GenAIInfra",
265+
help="Helm repository URL (default: https://opea-project.github.io/GenAIInfra).",
266+
)
267+
parser.add_argument("--user-values", help="Path to a user-specified values.yaml file.")
268+
parser.add_argument(
269+
"--create-values-only", action="store_true", help="Only create the values.yaml file without deploying."
270+
)
271+
parser.add_argument("--uninstall", action="store_true", help="Uninstall the Helm release.")
272+
parser.add_argument("--num-nodes", type=int, default=1, help="Number of nodes to use (default: 1).")
273+
parser.add_argument("--node-names", nargs="*", help="Optional specific node names to label.")
274+
parser.add_argument("--add-label", action="store_true", help="Add label to specified nodes if this flag is set.")
275+
parser.add_argument(
276+
"--delete-label", action="store_true", help="Delete label from specified nodes if this flag is set."
277+
)
278+
parser.add_argument(
279+
"--label", default="node-type=opea-benchmark", help="Label to add/delete (default: node-type=opea-benchmark)."
280+
)
281+
parser.add_argument("--with-rerank", action="store_true", help="Include rerank service in the deployment.")
282+
parser.add_argument(
283+
"--tuned",
284+
action="store_true",
285+
help="Modify resources for services and change extraCmdArgs when creating values.yaml.",
286+
)
287+
parser.add_argument("--add-repo", action="store_true", help="Add the Helm repo specified by --repo-url.")
288+
parser.add_argument("--delete-repo", action="store_true", help="Delete the Helm repo specified by --repo-name.")
289+
parser.add_argument(
290+
"--device-type",
291+
type=str,
292+
choices=["cpu", "gaudi"],
293+
default="gaudi",
294+
help="Specify the device type for deployment (choices: 'cpu', 'gaudi'; default: gaudi).",
295+
)
274296

275297
args = parser.parse_args()
276298

@@ -309,25 +331,25 @@ def main():
309331
else:
310332
if not args.hf_token:
311333
parser.error("--hf-token are required")
312-
node_selector = {args.label.split('=')[0]: args.label.split('=')[1]}
334+
node_selector = {args.label.split("=")[0]: args.label.split("=")[1]}
313335
values_file_path = generate_helm_values(
314336
with_rerank=args.with_rerank,
315337
num_nodes=args.num_nodes,
316338
hf_token=args.hf_token,
317339
model_dir=args.model_dir,
318340
node_selector=node_selector,
319-
tune=args.tuned
341+
tune=args.tuned,
320342
)
321343

322344
# Read back the generated YAML file for verification
323-
with open(values_file_path, 'r') as file:
345+
with open(values_file_path, "r") as file:
324346
print("Generated YAML contents:")
325347
print(file.read())
326348

327-
328349
# Deploy unless --create-values-only is specified
329350
if not args.create_values_only:
330351
install_helm_release(args.release_name, args.chart_name, args.namespace, values_file_path, args.device_type)
331352

353+
332354
if __name__ == "__main__":
333355
main()

0 commit comments

Comments
 (0)