Skip to content

Commit 1827c82

Browse files
committed
chore(master): merge maint-0.9 (#497)
ci(runners): upgrade CI runners to Ubuntu 22.04 (#486) build(deps): update reana-auth-vomsproxy to 1.3.1 (#486) feat(kubernetes): add resource settings in Kubernetes jobs (#484)
2 parents f014442 + bfbbd7c commit 1827c82

File tree

7 files changed

+367
-23
lines changed

7 files changed

+367
-23
lines changed

docs/openapi.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,22 @@
7878
"kerberos": {
7979
"type": "boolean"
8080
},
81+
"kubernetes_cpu_limit": {
82+
"type": "string"
83+
},
84+
"kubernetes_cpu_request": {
85+
"type": "string"
86+
},
8187
"kubernetes_job_timeout": {
8288
"format": "int32",
8389
"type": "integer"
8490
},
8591
"kubernetes_memory_limit": {
8692
"type": "string"
8793
},
94+
"kubernetes_memory_request": {
95+
"type": "string"
96+
},
8897
"kubernetes_uid": {
8998
"format": "int32",
9099
"type": "integer"

reana_job_controller/config.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,59 @@
117117
IMAGE_PULL_SECRETS = os.getenv("IMAGE_PULL_SECRETS", "").split(",")
118118
"""Docker image pull secrets which allow the usage of private images."""
119119

120+
REANA_KUBERNETES_JOBS_CPU_REQUEST = os.getenv("REANA_KUBERNETES_JOBS_CPU_REQUEST")
121+
"""Default cpu request for user job containers.
122+
123+
Please see the following URL for possible values
124+
https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-cpu.
125+
"""
126+
127+
REANA_KUBERNETES_JOBS_CPU_LIMIT = os.getenv("REANA_KUBERNETES_JOBS_CPU_LIMIT")
128+
"""Default cpu limit for user job containers. Exceeding this limit will terminate the container.
129+
130+
Please see the following URL for possible values
131+
https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-cpu.
132+
"""
133+
134+
REANA_KUBERNETES_JOBS_MEMORY_REQUEST = os.getenv("REANA_KUBERNETES_JOBS_MEMORY_REQUEST")
135+
"""Default memory request for user job containers.
136+
137+
Please see the following URL for possible values
138+
https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory.
139+
"""
140+
120141
REANA_KUBERNETES_JOBS_MEMORY_LIMIT = os.getenv("REANA_KUBERNETES_JOBS_MEMORY_LIMIT")
121-
"""Maximum default memory limit for user job containers. Exceeding this limit will terminate the container.
142+
"""Default memory limit for user job containers. Exceeding this limit will terminate the container.
143+
144+
Please see the following URL for possible values
145+
https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory.
146+
"""
147+
148+
REANA_KUBERNETES_JOBS_MAX_USER_CPU_REQUEST = os.getenv(
149+
"REANA_KUBERNETES_JOBS_MAX_USER_CPU_REQUEST"
150+
)
151+
"""Maximum custom CPU request that users can assign to their job containers via
152+
``kubernetes_cpu_request`` in reana.yaml.
153+
154+
Please see the following URL for possible values
155+
https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-cpu.
156+
"""
157+
158+
REANA_KUBERNETES_JOBS_MAX_USER_CPU_LIMIT = os.getenv(
159+
"REANA_KUBERNETES_JOBS_MAX_USER_CPU_LIMIT"
160+
)
161+
"""Maximum custom CPU limit that users can assign to their job containers via
162+
``kubernetes_cpu_limit`` in reana.yaml. Exceeding this limit will terminate the container.
163+
164+
Please see the following URL for possible values
165+
https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-cpu.
166+
"""
167+
168+
REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_REQUEST = os.getenv(
169+
"REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_REQUEST"
170+
)
171+
"""Maximum custom memory request that users can assign to their job containers via
172+
``kubernetes_memory_request`` in reana.yaml.
122173
123174
Please see the following URL for possible values
124175
https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#meaning-of-memory.

reana_job_controller/kubernetes_job_manager.py

Lines changed: 141 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This file is part of REANA.
2-
# Copyright (C) 2019, 2020, 2021, 2022, 2023, 2024 CERN.
2+
# Copyright (C) 2019, 2020, 2021, 2022, 2023, 2024, 2025 CERN.
33
#
44
# REANA is free software; you can redistribute it and/or modify it
55
# under the terms of the MIT License; see LICENSE file for more details.
@@ -29,10 +29,15 @@
2929
from reana_commons.errors import (
3030
REANAKubernetesMemoryLimitExceeded,
3131
REANAKubernetesWrongMemoryFormat,
32+
REANAKubernetesCPULimitExceeded,
33+
REANAKubernetesWrongCPUFormat,
34+
REANAKubernetesRequestExceedsLimit,
3235
)
3336
from reana_commons.job_utils import (
3437
validate_kubernetes_memory,
3538
kubernetes_memory_to_bytes,
39+
validate_kubernetes_cpu,
40+
kubernetes_cpu_to_millicores,
3641
)
3742
from reana_commons.k8s.api_client import (
3843
current_k8s_batchv1_api_client,
@@ -49,7 +54,13 @@
4954
from retrying import retry
5055

5156
from reana_job_controller.config import (
57+
REANA_KUBERNETES_JOBS_CPU_REQUEST,
58+
REANA_KUBERNETES_JOBS_CPU_LIMIT,
59+
REANA_KUBERNETES_JOBS_MEMORY_REQUEST,
5260
REANA_KUBERNETES_JOBS_MEMORY_LIMIT,
61+
REANA_KUBERNETES_JOBS_MAX_USER_CPU_REQUEST,
62+
REANA_KUBERNETES_JOBS_MAX_USER_CPU_LIMIT,
63+
REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_REQUEST,
5364
REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_LIMIT,
5465
REANA_USER_ID,
5566
USE_KUEUE,
@@ -80,6 +91,9 @@ def __init__(
8091
job_name=None,
8192
kerberos=False,
8293
kubernetes_uid=None,
94+
kubernetes_cpu_request=None,
95+
kubernetes_cpu_limit=None,
96+
kubernetes_memory_request=None,
8397
kubernetes_memory_limit=None,
8498
voms_proxy=False,
8599
rucio=False,
@@ -141,10 +155,13 @@ def __init__(
141155
self.voms_proxy = voms_proxy
142156
self.rucio = rucio
143157
self.set_user_id(kubernetes_uid)
144-
self.set_memory_limit(kubernetes_memory_limit)
145158
self.workflow_uuid = workflow_uuid
146159
self.kubernetes_job_timeout = kubernetes_job_timeout
147160
self._secrets: Optional[UserSecrets] = secrets
161+
self.set_cpu_request(kubernetes_cpu_request)
162+
self.set_cpu_limit(kubernetes_cpu_limit)
163+
self.set_memory_request(kubernetes_memory_request)
164+
self.set_memory_limit(kubernetes_memory_limit)
148165

149166
@property
150167
def secrets(self):
@@ -214,7 +231,7 @@ def execute(self):
214231
for var, value in self.env_vars.items():
215232
job_spec["containers"][0]["env"].append({"name": var, "value": value})
216233

217-
self.add_memory_limit(job_spec)
234+
self.add_resource_requests_and_limits(job_spec)
218235
self.add_hostpath_volumes()
219236
self.add_workspace_volume()
220237
self.add_shared_volume()
@@ -436,20 +453,46 @@ def add_image_pull_secrets(self):
436453

437454
self.job["spec"]["template"]["spec"]["imagePullSecrets"] = image_pull_secrets
438455

439-
def add_memory_limit(self, job_spec):
440-
"""Add limits.memory to job accordingly."""
456+
def validate_resources(self):
457+
"""Validate that resource requests are less than or equal to limits."""
458+
if self.kubernetes_cpu_request and self.kubernetes_cpu_limit:
459+
cpu_request = kubernetes_cpu_to_millicores(self.kubernetes_cpu_request)
460+
cpu_limit = kubernetes_cpu_to_millicores(self.kubernetes_cpu_limit)
461+
if cpu_request > cpu_limit:
462+
raise REANAKubernetesRequestExceedsLimit(
463+
f"ERROR: CPU request ({self.kubernetes_cpu_request}) cannot be greater than limit ({self.kubernetes_cpu_limit}). If you are overriding the values, please check the default and maximum values for requests and limits with 'reana-client info' command."
464+
)
441465

442-
def _set_job_memory_limit(job_spec, memory_limit):
443-
job_spec["containers"][0]["resources"] = {
444-
"limits": {
445-
"memory": memory_limit,
446-
}
447-
}
466+
if self.kubernetes_memory_request and self.kubernetes_memory_limit:
467+
memory_request = kubernetes_memory_to_bytes(self.kubernetes_memory_request)
468+
memory_limit = kubernetes_memory_to_bytes(self.kubernetes_memory_limit)
469+
if memory_request > memory_limit:
470+
raise REANAKubernetesRequestExceedsLimit(
471+
f"ERROR: Memory request ({self.kubernetes_memory_request}) cannot be greater than limit ({self.kubernetes_memory_limit}). If you are overriding the values, please check the default and maximum values for requests and limits with 'reana-client info' command."
472+
)
473+
474+
def add_resource_requests_and_limits(self, job_spec):
475+
"""Add resource requests and limits to job accordingly."""
476+
self.validate_resources()
477+
478+
resources = {}
479+
480+
if self.kubernetes_cpu_request or self.kubernetes_memory_request:
481+
resources["requests"] = {}
482+
if self.kubernetes_cpu_request:
483+
resources["requests"]["cpu"] = self.kubernetes_cpu_request
484+
if self.kubernetes_memory_request:
485+
resources["requests"]["memory"] = self.kubernetes_memory_request
486+
487+
if self.kubernetes_cpu_limit or self.kubernetes_memory_limit:
488+
resources["limits"] = {}
489+
if self.kubernetes_cpu_limit:
490+
resources["limits"]["cpu"] = self.kubernetes_cpu_limit
491+
if self.kubernetes_memory_limit:
492+
resources["limits"]["memory"] = self.kubernetes_memory_limit
448493

449-
if self.kubernetes_memory_limit:
450-
_set_job_memory_limit(job_spec, self.kubernetes_memory_limit)
451-
elif REANA_KUBERNETES_JOBS_MEMORY_LIMIT:
452-
_set_job_memory_limit(job_spec, REANA_KUBERNETES_JOBS_MEMORY_LIMIT)
494+
if resources:
495+
job_spec["containers"][0]["resources"] = resources
453496

454497
def add_hostpath_volumes(self):
455498
"""Add hostPath mounts from configuration to job."""
@@ -693,11 +736,89 @@ def set_user_id(self, kubernetes_uid):
693736
else:
694737
self.kubernetes_uid = WORKFLOW_RUNTIME_USER_UID
695738

739+
def set_cpu_request(self, kubernetes_cpu_request):
740+
"""Set CPU request for job pods. Validate if provided format is correct."""
741+
if kubernetes_cpu_request:
742+
if not validate_kubernetes_cpu(kubernetes_cpu_request):
743+
msg = f'The "kubernetes_cpu_request" requested {kubernetes_cpu_request} has wrong format.'
744+
logging.error(
745+
"Error while validating Kubernetes CPU request: {}".format(msg)
746+
)
747+
raise REANAKubernetesWrongCPUFormat(msg)
748+
749+
if REANA_KUBERNETES_JOBS_MAX_USER_CPU_REQUEST:
750+
custom_job_cpu_request = kubernetes_cpu_to_millicores(
751+
kubernetes_cpu_request
752+
)
753+
max_custom_job_cpu_request = kubernetes_cpu_to_millicores(
754+
REANA_KUBERNETES_JOBS_MAX_USER_CPU_REQUEST
755+
)
756+
if custom_job_cpu_request > max_custom_job_cpu_request:
757+
msg = f'The "kubernetes_cpu_request" requested ({kubernetes_cpu_request}) exceeds the limit ({REANA_KUBERNETES_JOBS_MAX_USER_CPU_REQUEST}).'
758+
raise REANAKubernetesCPULimitExceeded(msg)
759+
760+
self.kubernetes_cpu_request = (
761+
kubernetes_cpu_request or REANA_KUBERNETES_JOBS_CPU_REQUEST
762+
)
763+
764+
def set_cpu_limit(self, kubernetes_cpu_limit):
765+
"""Set CPU limit for job pods. Validate if provided format is correct."""
766+
if kubernetes_cpu_limit:
767+
if not validate_kubernetes_cpu(kubernetes_cpu_limit):
768+
msg = f'The "kubernetes_cpu_limit" requested {kubernetes_cpu_limit} has wrong format.'
769+
logging.error(
770+
"Error while validating Kubernetes CPU limit: {}".format(msg)
771+
)
772+
raise REANAKubernetesWrongCPUFormat(msg)
773+
774+
if REANA_KUBERNETES_JOBS_MAX_USER_CPU_LIMIT:
775+
custom_job_cpu_limit = kubernetes_cpu_to_millicores(
776+
kubernetes_cpu_limit
777+
)
778+
max_custom_job_cpu_limit = kubernetes_cpu_to_millicores(
779+
REANA_KUBERNETES_JOBS_MAX_USER_CPU_LIMIT
780+
)
781+
if custom_job_cpu_limit > max_custom_job_cpu_limit:
782+
msg = f'The "kubernetes_cpu_limit" requested ({kubernetes_cpu_limit}) exceeds the limit ({REANA_KUBERNETES_JOBS_MAX_USER_CPU_LIMIT}).'
783+
raise REANAKubernetesCPULimitExceeded(msg)
784+
785+
self.kubernetes_cpu_limit = (
786+
kubernetes_cpu_limit or REANA_KUBERNETES_JOBS_CPU_LIMIT
787+
)
788+
789+
def set_memory_request(self, kubernetes_memory_request):
790+
"""Set memory request for job pods. Validate if provided format is correct."""
791+
if kubernetes_memory_request:
792+
if not validate_kubernetes_memory(kubernetes_memory_request):
793+
msg = f'The "kubernetes_memory_request" requested {kubernetes_memory_request} has wrong format.'
794+
logging.error(
795+
"Error while validating Kubernetes memory request: {}".format(msg)
796+
)
797+
raise REANAKubernetesWrongMemoryFormat(msg)
798+
799+
if REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_REQUEST:
800+
custom_job_memory_request_bytes = kubernetes_memory_to_bytes(
801+
kubernetes_memory_request
802+
)
803+
max_custom_job_memory_request_bytes = kubernetes_memory_to_bytes(
804+
REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_REQUEST
805+
)
806+
if (
807+
custom_job_memory_request_bytes
808+
> max_custom_job_memory_request_bytes
809+
):
810+
msg = f'The "kubernetes_memory_request" requested ({kubernetes_memory_request}) exceeds the limit ({REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_REQUEST}).'
811+
raise REANAKubernetesMemoryLimitExceeded(msg)
812+
813+
self.kubernetes_memory_request = (
814+
kubernetes_memory_request or REANA_KUBERNETES_JOBS_MEMORY_REQUEST
815+
)
816+
696817
def set_memory_limit(self, kubernetes_memory_limit):
697818
"""Set memory limit for job pods. Validate if provided format is correct."""
698819
if kubernetes_memory_limit:
699820
if not validate_kubernetes_memory(kubernetes_memory_limit):
700-
msg = f'The "kubernetes_memory_limit" provided {kubernetes_memory_limit} has wrong format.'
821+
msg = f'The "kubernetes_memory_limit" requested {kubernetes_memory_limit} has wrong format.'
701822
logging.error(
702823
"Error while validating Kubernetes memory limit: {}".format(msg)
703824
)
@@ -711,7 +832,9 @@ def set_memory_limit(self, kubernetes_memory_limit):
711832
REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_LIMIT
712833
)
713834
if custom_job_memory_limit_bytes > max_custom_job_memory_limit_bytes:
714-
msg = f'The "kubernetes_memory_limit" provided ({kubernetes_memory_limit}) exceeds the limit ({REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_LIMIT}).'
835+
msg = f'The "kubernetes_memory_limit" requested ({kubernetes_memory_limit}) exceeds the limit ({REANA_KUBERNETES_JOBS_MAX_USER_MEMORY_LIMIT}).'
715836
raise REANAKubernetesMemoryLimitExceeded(msg)
716837

717-
self.kubernetes_memory_limit = kubernetes_memory_limit
838+
self.kubernetes_memory_limit = (
839+
kubernetes_memory_limit or REANA_KUBERNETES_JOBS_MEMORY_LIMIT
840+
)

reana_job_controller/rest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from reana_commons.errors import (
1919
REANAKubernetesMemoryLimitExceeded,
2020
REANAKubernetesWrongMemoryFormat,
21+
REANAKubernetesCPULimitExceeded,
22+
REANAKubernetesWrongCPUFormat,
2123
)
2224

2325
from reana_db.models import JobStatus
@@ -291,6 +293,10 @@ def create_job(): # noqa
291293
# to the k8s API when many jobs are executed at the same time
292294
secrets=get_cached_user_secrets(),
293295
)
296+
except REANAKubernetesCPULimitExceeded as e:
297+
return jsonify({"message": e.message}), 403
298+
except REANAKubernetesWrongCPUFormat as e:
299+
return jsonify({"message": e.message}), 400
294300
except REANAKubernetesMemoryLimitExceeded as e:
295301
return jsonify({"message": e.message}), 403
296302
except REANAKubernetesWrongMemoryFormat as e:

reana_job_controller/schemas.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# -*- coding: utf-8 -*-
33
#
44
# This file is part of REANA.
5-
# Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022 CERN.
5+
# Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022, 2025 CERN.
66
#
77
# REANA is free software; you can redistribute it and/or modify it
88
# under the terms of the MIT License; see LICENSE file for more details.
@@ -49,6 +49,9 @@ class JobRequest(Schema):
4949
voms_proxy = fields.Bool(required=False)
5050
rucio = fields.Bool(required=False)
5151
kubernetes_uid = fields.Int(required=False)
52+
kubernetes_cpu_request = fields.Str(required=False)
53+
kubernetes_cpu_limit = fields.Str(required=False)
54+
kubernetes_memory_request = fields.Str(required=False)
5255
kubernetes_memory_limit = fields.Str(required=False)
5356
kubernetes_job_timeout = fields.Int(required=False)
5457
unpacked_img = fields.Bool(required=False)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
"jinja2<3.1.0",
6565
"fs>=2.0",
6666
"marshmallow>2.13.0,<3.0.0", # same upper pin as reana-server
67-
"reana-commons[kubernetes]>=0.95.0a7,<0.96.0",
67+
"reana-commons[kubernetes]>=0.95.0a11,<0.96.0",
6868
"reana-db>=0.95.0a5,<0.96.0",
6969
"retrying>=1.3.3",
7070
]

0 commit comments

Comments
 (0)