From ec8be036afa42b905386cbbddb3a11e5b373ffd3 Mon Sep 17 00:00:00 2001 From: Alexander Jipa Date: Fri, 26 Apr 2024 15:24:32 -0400 Subject: [PATCH] feat: add privileged option to local_docker (#897) --- torchx/schedulers/docker_scheduler.py | 9 ++++++++ .../schedulers/test/docker_scheduler_test.py | 23 +++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/torchx/schedulers/docker_scheduler.py b/torchx/schedulers/docker_scheduler.py index b56e4381b..fe36a2a49 100644 --- a/torchx/schedulers/docker_scheduler.py +++ b/torchx/schedulers/docker_scheduler.py @@ -124,6 +124,7 @@ def ensure_network(client: Optional["DockerClient"] = None) -> None: class DockerOpts(TypedDict, total=False): copy_env: Optional[List[str]] env: Optional[Dict[str, str]] + privileged: bool class DockerScheduler(DockerWorkspaceMixin, Scheduler[DockerOpts]): @@ -287,6 +288,7 @@ def _submit_dryrun(self, app: AppDef, cfg: DockerOpts) -> AppDryRunInfo[DockerJo LABEL_REPLICA_ID: str(replica_id), }, "hostname": name, + "privileged": cfg.get("privileged", False), "network": NETWORK, "mounts": mounts, "devices": devices, @@ -374,6 +376,13 @@ def _run_opts(self) -> runopts: (e.g. ENV1:v1,ENV2:v2,ENV3:v3 or ENV1:V1;ENV2:V2). Environment variables from env will be applied on top of the ones from copy_env""", ) + opts.add( + "privileged", + type_=bool, + default=False, + help="If true runs the container with elevated permissions." + " Equivalent to running with `docker run --privileged`.", + ) return opts def _get_app_state(self, container: "Container") -> AppState: diff --git a/torchx/schedulers/test/docker_scheduler_test.py b/torchx/schedulers/test/docker_scheduler_test.py index 846cd190a..f07083dd1 100644 --- a/torchx/schedulers/test/docker_scheduler_test.py +++ b/torchx/schedulers/test/docker_scheduler_test.py @@ -71,7 +71,7 @@ def test_submit_dryrun(self) -> None: app = _test_app() with patch("torchx.schedulers.docker_scheduler.make_unique") as make_unique_ctx: make_unique_ctx.return_value = "app_name_42" - info = self.scheduler._submit_dryrun(app, cfg={}) + info = self.scheduler.submit_dryrun(app, cfg={}) want = DockerJob( "app_name_42", @@ -109,6 +109,7 @@ def test_submit_dryrun(self) -> None: }, "mem_limit": "3000m", "shm_size": "3000m", + "privileged": False, "name": "app_name_42-trainer-0", "hostname": "app_name_42-trainer-0", "nano_cpus": int(2e9), @@ -137,7 +138,7 @@ def test_volume_mounts(self) -> None: specs.VolumeMount(src="name", dst_path="/tmp", read_only=True), ] - info = self.scheduler._submit_dryrun(app, cfg={}) + info = self.scheduler.submit_dryrun(app, cfg={}) want = [ Mount( target="/tmp", @@ -154,7 +155,7 @@ def test_device_mounts(self) -> None: specs.DeviceMount(src_path="foo", dst_path="bar"), ] - info = self.scheduler._submit_dryrun(app, cfg={}) + info = self.scheduler.submit_dryrun(app, cfg={}) self.assertEqual(info.request.containers[0].kwargs["devices"], ["foo:bar:rwm"]) def test_resource_devices(self) -> None: @@ -162,7 +163,7 @@ def test_resource_devices(self) -> None: app.roles[0].mounts = [] app.roles[0].resource.devices = {"vpc.amazonaws.com/efa": 1} - info = self.scheduler._submit_dryrun(app, cfg={}) + info = self.scheduler.submit_dryrun(app, cfg={}) self.assertEqual( info.request.containers[0].kwargs["devices"], ["/dev/infiniband/uverbs0:/dev/infiniband/uverbs0:rwm"], @@ -174,7 +175,7 @@ def test_copy_env(self) -> None: cfg = DockerOpts({"copy_env": ["FOO_*", "BAR_*"]}) with patch("torchx.schedulers.docker_scheduler.make_unique") as make_unique_ctx: make_unique_ctx.return_value = "app_name_42" - info = self.scheduler._submit_dryrun(app, cfg) + info = self.scheduler.submit_dryrun(app, cfg) self.assertEqual( info.request.containers[0].kwargs["environment"], { @@ -190,7 +191,7 @@ def test_env(self) -> None: cfg = DockerOpts({"env": {"FOO_1": "BAR_1"}}) with patch("torchx.schedulers.docker_scheduler.make_unique") as make_unique_ctx: make_unique_ctx.return_value = "app_name_42" - info = self.scheduler._submit_dryrun(app, cfg) + info = self.scheduler.submit_dryrun(app, cfg) self.assertEqual( info.request.containers[0].kwargs["environment"], { @@ -200,13 +201,21 @@ def test_env(self) -> None: }, ) + def test_privileged(self) -> None: + app = _test_app() + cfg = DockerOpts({"privileged": True}) + with patch("torchx.schedulers.docker_scheduler.make_unique") as make_unique_ctx: + make_unique_ctx.return_value = "app_name_42" + info = self.scheduler.submit_dryrun(app, cfg) + self.assertTrue(info.request.containers[0].kwargs["privileged"]) + def test_long_hostname(self) -> None: app = _test_app() for role in app.roles: role.name = "ethology_explore_magic_calliope_divisive_whirl_dealt_lotus_oncology_facet_deerskin_blum_elective_spill_trammel_trainer" with patch("torchx.schedulers.docker_scheduler.make_unique") as make_unique_ctx: make_unique_ctx.return_value = "ethology_explore_magic_calliope_divisive_whirl_dealt_lotus_oncology_facet_deerskin_blum_elective_spill_trammel_12345" - info = self.scheduler._submit_dryrun(app, DockerOpts()) + info = self.scheduler.submit_dryrun(app, DockerOpts()) for container in info.request.containers: assert "name" in container.kwargs name = container.kwargs["name"]