1- import os
2- import subprocess
3- import json
1+ # Copyright (C) 2024 Intel Corporation
2+ # SPDX-License-Identifier: Apache-2.0
3+
44import argparse
5- import yaml
65import glob
6+ import json
7+ import os
78import shutil
9+ import subprocess
810import sys
11+
12+ import yaml
913from generate_helm_values import generate_helm_values
1014
15+
1116def run_kubectl_command (command ):
1217 """Run a kubectl command and return the output."""
1318 try :
@@ -17,12 +22,14 @@ def run_kubectl_command(command):
1722 print (f"Error running command: { command } \n { e .stderr } " )
1823 exit (1 )
1924
25+
2026def get_all_nodes ():
2127 """Get the list of all nodes in the Kubernetes cluster."""
2228 command = ["kubectl" , "get" , "nodes" , "-o" , "json" ]
2329 output = run_kubectl_command (command )
2430 nodes = json .loads (output )
25- return [node ['metadata' ]['name' ] for node in nodes ['items' ]]
31+ return [node ["metadata" ]["name" ] for node in nodes ["items" ]]
32+
2633
2734def add_label_to_node (node_name , label ):
2835 """Add a label to the specified node."""
@@ -31,6 +38,7 @@ def add_label_to_node(node_name, label):
3138 run_kubectl_command (command )
3239 print (f"Label { label } added to node { node_name } successfully." )
3340
41+
3442def add_labels_to_nodes (node_count = None , label = None , node_names = None ):
3543 """Add a label to the specified number of nodes or to specified nodes."""
3644
@@ -49,9 +57,10 @@ def add_labels_to_nodes(node_count=None, label=None, node_names=None):
4957 for node_name in selected_nodes :
5058 add_label_to_node (node_name , label )
5159
60+
5261def clear_labels_from_nodes (label , node_names = None ):
5362 """Clear the specified label from specific nodes if provided, otherwise from all nodes."""
54- label_key = label .split ('=' )[0 ] # Extract key from 'key=value' format
63+ label_key = label .split ("=" )[0 ] # Extract key from 'key=value' format
5564
5665 # If specific nodes are provided, use them; otherwise, get all nodes
5766 nodes_to_clear = node_names if node_names else get_all_nodes ()
@@ -63,7 +72,7 @@ def clear_labels_from_nodes(label, node_names=None):
6372 node_metadata = json .loads (node_info )
6473
6574 # Check if the label exists on this node
66- labels = node_metadata [' metadata' ].get (' labels' , {})
75+ labels = node_metadata [" metadata" ].get (" labels" , {})
6776 if label_key in labels :
6877 # Remove the label from the node
6978 command = ["kubectl" , "label" , "node" , node_name , f"{ label_key } -" ]
@@ -73,6 +82,7 @@ def clear_labels_from_nodes(label, node_names=None):
7382 else :
7483 print (f"Label { label_key } not found on node { node_name } , skipping." )
7584
85+
7686def add_helm_repo (repo_name , repo_url ):
7787 # Add the repo if it does not exist
7888 add_command = ["helm" , "repo" , "add" , repo_name , repo_url ]
@@ -82,6 +92,7 @@ def add_helm_repo(repo_name, repo_url):
8292 except subprocess .CalledProcessError as e :
8393 print (f"Failed to add Helm repo { repo_name } : { e } " )
8494
95+
8596def delete_helm_repo (repo_name ):
8697 """Delete Helm repo if it exists."""
8798 command = ["helm" , "repo" , "remove" , repo_name ]
@@ -91,29 +102,29 @@ def delete_helm_repo(repo_name):
91102 except subprocess .CalledProcessError :
92103 print (f"Failed to delete Helm repo { repo_name } . It may not exist." )
93104
105+
94106def configmap_exists (name , namespace ):
95107 """Check if a ConfigMap exists in the specified namespace."""
96108 check_command = ["kubectl" , "get" , "configmap" , name , "-n" , namespace ]
97- result = subprocess .run (
98- check_command ,
99- check = False ,
100- stdout = subprocess .DEVNULL ,
101- stderr = subprocess .DEVNULL
102- )
109+ result = subprocess .run (check_command , check = False , stdout = subprocess .DEVNULL , stderr = subprocess .DEVNULL )
103110 return result .returncode == 0
104111
112+
105113def create_configmap (name , namespace , data ):
106114 """Create a ConfigMap if it does not already exist."""
107115 if configmap_exists (name , namespace ):
108116 print (f"ConfigMap '{ name } ' already exists in namespace '{ namespace } ', skipping creation." )
109117 else :
110- create_command = ["kubectl" , "create" , "configmap" , name ] + [
111- f"--from-literal={ k } ={ v } " for k , v in data .items ()
112- ] + ["-n" , namespace ]
118+ create_command = (
119+ ["kubectl" , "create" , "configmap" , name ]
120+ + [f"--from-literal={ k } ={ v } " for k , v in data .items ()]
121+ + ["-n" , namespace ]
122+ )
113123 print (f"Creating ConfigMap '{ name } ' in namespace '{ namespace } '..." )
114124 subprocess .run (create_command , check = True )
115125 print (f"ConfigMap '{ name } ' created successfully." )
116126
127+
117128def delete_configmap (name , namespace ):
118129 """Delete a ConfigMap if it exists."""
119130 if configmap_exists (name , namespace ):
@@ -124,9 +135,9 @@ def delete_configmap(name, namespace):
124135 else :
125136 print (f"ConfigMap '{ name } ' does not exist in namespace '{ namespace } ', skipping deletion." )
126137
138+
127139def install_helm_release (release_name , chart_name , namespace , values_file , device_type ):
128- """
129- Deploy a Helm release with a specified name and chart.
140+ """Deploy a Helm release with a specified name and chart.
130141
131142 Parameters:
132143 - release_name: The name of the Helm release.
@@ -162,7 +173,7 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
162173 subprocess .run (["helm" , "pull" , chart_name , "--untar" ], check = True )
163174
164175 # Determine the directory name (get the actual chart_name if chart_name is in the format 'repo_name/chart_name', else use chart_name directly)
165- chart_dir_name = chart_name .split ('/' )[- 1 ] if '/' in chart_name else chart_name
176+ chart_dir_name = chart_name .split ("/" )[- 1 ] if "/" in chart_name else chart_name
166177
167178 # Find the untarred directory (assumes only one directory matches chart_dir_name)
168179 untar_dirs = glob .glob (f"{ chart_dir_name } *" )
@@ -175,10 +186,7 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
175186 return
176187
177188 # Prepare the Helm install command
178- command = [
179- "helm" , "install" , release_name , chart_name ,
180- "--namespace" , namespace
181- ]
189+ command = ["helm" , "install" , release_name , chart_name , "--namespace" , namespace ]
182190
183191 # Append additional values file for gaudi if it exists
184192 if hw_values_file :
@@ -201,6 +209,7 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
201209 shutil .rmtree (untar_dir )
202210 print ("Temporary directory removed successfully." )
203211
212+
204213def uninstall_helm_release (release_name , namespace = None ):
205214 """Uninstall a Helm release and clean up resources, optionally delete the namespace if not 'default'."""
206215 # Default to 'default' namespace if none is specified
@@ -229,48 +238,61 @@ def uninstall_helm_release(release_name, namespace=None):
229238 except subprocess .CalledProcessError as e :
230239 print (f"Error occurred while uninstalling Helm release or deleting namespace: { e } " )
231240
241+
232242def main ():
233243 parser = argparse .ArgumentParser (description = "Manage Helm Deployment." )
234- parser .add_argument ("--release-name" , type = str , default = "chatqna" ,
235- help = "The Helm release name created during deployment (default: chatqna)." )
236- parser .add_argument ("--chart-name" , type = str , default = "opea/chatqna" ,
237- help = "The chart name to deploy, composed of repo name and chart name (default: opea/chatqna)." )
238- parser .add_argument ("--namespace" , default = "default" ,
239- help = "Kubernetes namespace (default: default)." )
240- parser .add_argument ("--hf-token" ,
241- help = "Hugging Face API token." )
242- parser .add_argument ("--model-dir" ,
243- help = "Model directory, mounted as volumes for service access to pre-downloaded models" )
244- parser .add_argument ("--repo-name" , default = "opea" ,
245- help = "Helm repo name to add/delete (default: opea)." )
246- parser .add_argument ("--repo-url" , default = "https://opea-project.github.io/GenAIInfra" ,
247- help = "Helm repository URL (default: https://opea-project.github.io/GenAIInfra)." )
248- parser .add_argument ("--user-values" ,
249- help = "Path to a user-specified values.yaml file." )
250- parser .add_argument ("--create-values-only" , action = "store_true" ,
251- help = "Only create the values.yaml file without deploying." )
252- parser .add_argument ("--uninstall" , action = "store_true" ,
253- help = "Uninstall the Helm release." )
254- parser .add_argument ("--num-nodes" , type = int , default = 1 ,
255- help = "Number of nodes to use (default: 1)." )
256- parser .add_argument ("--node-names" , nargs = '*' ,
257- help = "Optional specific node names to label." )
258- parser .add_argument ("--add-label" , action = "store_true" ,
259- help = "Add label to specified nodes if this flag is set." )
260- parser .add_argument ("--delete-label" , action = "store_true" ,
261- help = "Delete label from specified nodes if this flag is set." )
262- parser .add_argument ("--label" , default = "node-type=opea-benchmark" ,
263- help = "Label to add/delete (default: node-type=opea-benchmark)." )
264- parser .add_argument ("--with-rerank" , action = "store_true" ,
265- help = "Include rerank service in the deployment." )
266- parser .add_argument ("--tuned" , action = "store_true" ,
267- help = "Modify resources for services and change extraCmdArgs when creating values.yaml." )
268- parser .add_argument ("--add-repo" , action = "store_true" ,
269- help = "Add the Helm repo specified by --repo-url." )
270- parser .add_argument ("--delete-repo" , action = "store_true" ,
271- help = "Delete the Helm repo specified by --repo-name." )
272- parser .add_argument ("--device-type" , type = str , choices = ["cpu" , "gaudi" ], default = "gaudi" ,
273- help = "Specify the device type for deployment (choices: 'cpu', 'gaudi'; default: gaudi)." )
244+ parser .add_argument (
245+ "--release-name" ,
246+ type = str ,
247+ default = "chatqna" ,
248+ help = "The Helm release name created during deployment (default: chatqna)." ,
249+ )
250+ parser .add_argument (
251+ "--chart-name" ,
252+ type = str ,
253+ default = "opea/chatqna" ,
254+ help = "The chart name to deploy, composed of repo name and chart name (default: opea/chatqna)." ,
255+ )
256+ parser .add_argument ("--namespace" , default = "default" , help = "Kubernetes namespace (default: default)." )
257+ parser .add_argument ("--hf-token" , help = "Hugging Face API token." )
258+ parser .add_argument (
259+ "--model-dir" , help = "Model directory, mounted as volumes for service access to pre-downloaded models"
260+ )
261+ parser .add_argument ("--repo-name" , default = "opea" , help = "Helm repo name to add/delete (default: opea)." )
262+ parser .add_argument (
263+ "--repo-url" ,
264+ default = "https://opea-project.github.io/GenAIInfra" ,
265+ help = "Helm repository URL (default: https://opea-project.github.io/GenAIInfra)." ,
266+ )
267+ parser .add_argument ("--user-values" , help = "Path to a user-specified values.yaml file." )
268+ parser .add_argument (
269+ "--create-values-only" , action = "store_true" , help = "Only create the values.yaml file without deploying."
270+ )
271+ parser .add_argument ("--uninstall" , action = "store_true" , help = "Uninstall the Helm release." )
272+ parser .add_argument ("--num-nodes" , type = int , default = 1 , help = "Number of nodes to use (default: 1)." )
273+ parser .add_argument ("--node-names" , nargs = "*" , help = "Optional specific node names to label." )
274+ parser .add_argument ("--add-label" , action = "store_true" , help = "Add label to specified nodes if this flag is set." )
275+ parser .add_argument (
276+ "--delete-label" , action = "store_true" , help = "Delete label from specified nodes if this flag is set."
277+ )
278+ parser .add_argument (
279+ "--label" , default = "node-type=opea-benchmark" , help = "Label to add/delete (default: node-type=opea-benchmark)."
280+ )
281+ parser .add_argument ("--with-rerank" , action = "store_true" , help = "Include rerank service in the deployment." )
282+ parser .add_argument (
283+ "--tuned" ,
284+ action = "store_true" ,
285+ help = "Modify resources for services and change extraCmdArgs when creating values.yaml." ,
286+ )
287+ parser .add_argument ("--add-repo" , action = "store_true" , help = "Add the Helm repo specified by --repo-url." )
288+ parser .add_argument ("--delete-repo" , action = "store_true" , help = "Delete the Helm repo specified by --repo-name." )
289+ parser .add_argument (
290+ "--device-type" ,
291+ type = str ,
292+ choices = ["cpu" , "gaudi" ],
293+ default = "gaudi" ,
294+ help = "Specify the device type for deployment (choices: 'cpu', 'gaudi'; default: gaudi)." ,
295+ )
274296
275297 args = parser .parse_args ()
276298
@@ -309,25 +331,25 @@ def main():
309331 else :
310332 if not args .hf_token :
311333 parser .error ("--hf-token are required" )
312- node_selector = {args .label .split ('=' )[0 ]: args .label .split ('=' )[1 ]}
334+ node_selector = {args .label .split ("=" )[0 ]: args .label .split ("=" )[1 ]}
313335 values_file_path = generate_helm_values (
314336 with_rerank = args .with_rerank ,
315337 num_nodes = args .num_nodes ,
316338 hf_token = args .hf_token ,
317339 model_dir = args .model_dir ,
318340 node_selector = node_selector ,
319- tune = args .tuned
341+ tune = args .tuned ,
320342 )
321343
322344 # Read back the generated YAML file for verification
323- with open (values_file_path , 'r' ) as file :
345+ with open (values_file_path , "r" ) as file :
324346 print ("Generated YAML contents:" )
325347 print (file .read ())
326348
327-
328349 # Deploy unless --create-values-only is specified
329350 if not args .create_values_only :
330351 install_helm_release (args .release_name , args .chart_name , args .namespace , values_file_path , args .device_type )
331352
353+
332354if __name__ == "__main__" :
333355 main ()
0 commit comments