From c8fa7ffc35ae82925a8631fe213ddd198bb58cc2 Mon Sep 17 00:00:00 2001 From: Agoston Peto Date: Mon, 3 Nov 2025 20:49:54 +0000 Subject: [PATCH 1/6] Prevent parallel remote testplan executions on same runpath. --- .../3283_changed.prevent_parallel_run.rst | 1 + testplan/common/remote/remote_resource.py | 44 +++++++++++++++++++ testplan/runnable/base.py | 4 ++ 3 files changed, 49 insertions(+) create mode 100755 doc/newsfragments/3283_changed.prevent_parallel_run.rst diff --git a/doc/newsfragments/3283_changed.prevent_parallel_run.rst b/doc/newsfragments/3283_changed.prevent_parallel_run.rst new file mode 100755 index 000000000..abfcb2729 --- /dev/null +++ b/doc/newsfragments/3283_changed.prevent_parallel_run.rst @@ -0,0 +1 @@ +Prevent parallel remote testplan executions on same runpath. \ No newline at end of file diff --git a/testplan/common/remote/remote_resource.py b/testplan/common/remote/remote_resource.py index 6354c4ae8..00e6b1eb2 100644 --- a/testplan/common/remote/remote_resource.py +++ b/testplan/common/remote/remote_resource.py @@ -204,6 +204,7 @@ def _prepare_remote(self) -> None: self._check_remote_os() self._define_remote_dirs() + self._check_remote_pidfile() self._create_remote_dirs() if self.cfg.push: @@ -233,6 +234,9 @@ def _define_remote_dirs(self) -> None: self._remote_runid_file = os.path.join( self._remote_plan_runpath, self._get_plan().runid_filename ) + self._remote_pid_file = os.path.join( + self._remote_plan_runpath, "testplan.pid" + ) self._remote_resource_runpath = rebase_path( self.runpath, @@ -267,6 +271,41 @@ def _remote_working_dir(self) -> None: self._workspace_paths.remote, ) + def _check_remote_pidfile(self) -> None: + """Check remote PID file and block if another testplan process is running on the same runpath.""" + + status, stdout, _ = self._ssh_client.exec_command( + cmd=f"/bin/cat {self._remote_pid_file}", + label="read remote pid file", + check=False, + ) + + if status: + # No PID file found, no other process is using the runpath + return + + hostname, pid = ( + stdout.strip().split(";") + if ";" in stdout + else (None, stdout.strip()) + ) + if pid and pid.isdigit(): + if hostname: + ssh_client = SSHClient(hostname) + else: + ssh_client = self._ssh_client + + check_status, _, _ = ssh_client.exec_command( + cmd=f"/bin/ps -p {pid}", + label="check remote pid running", + check=False, + ) + + if check_status == 0: + raise RuntimeError( + f"Another testplan instance (hostname: {hostname or self.cfg.remote_host}, pid: {pid}) is already using the same runpath" + ) + def _create_remote_dirs(self) -> None: """Create mandatory directories in remote host.""" @@ -298,6 +337,11 @@ def _create_remote_dirs(self) -> None: label="create remote runid file", ) + self._ssh_client.exec_command( + cmd=f'/bin/echo "{os.uname()[1]};{os.getpid()}" > {self._remote_pid_file}', + label="create initial pid file", + ) + # TODO: remote venv setup # TODO: testplan_lib will resolved to site-packages under venv, # TODO: while rpyc_classic.py under bin isn't included diff --git a/testplan/runnable/base.py b/testplan/runnable/base.py index 9d72f34aa..f82a8e731 100644 --- a/testplan/runnable/base.py +++ b/testplan/runnable/base.py @@ -1316,6 +1316,10 @@ def make_runpath_dirs(self): makeemptydirs(self._runpath) makeemptydirs(self._scratch) + self.pidfile_path = os.path.join(self._runpath, "testplan.pid") + with open(self.pidfile_path, "w") as pid_file: + pid_file.write(str(os.getpid())) + with open( os.path.join(self._runpath, self.runid_filename), "wb" ) as fp: From f9e503ace468dca491dc563365df3f883b5d576b Mon Sep 17 00:00:00 2001 From: Agoston Peto Date: Wed, 12 Nov 2025 08:11:02 +0000 Subject: [PATCH 2/6] revert remote changes, add pid check for testrunner --- .../3283_changed.prevent_parallel_run.rst | 2 +- testplan/common/entity/base.py | 2 + testplan/common/remote/remote_resource.py | 41 ------------------- testplan/runnable/base.py | 36 +++++++++++++++- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/doc/newsfragments/3283_changed.prevent_parallel_run.rst b/doc/newsfragments/3283_changed.prevent_parallel_run.rst index abfcb2729..e57eeb1ce 100755 --- a/doc/newsfragments/3283_changed.prevent_parallel_run.rst +++ b/doc/newsfragments/3283_changed.prevent_parallel_run.rst @@ -1 +1 @@ -Prevent parallel remote testplan executions on same runpath. \ No newline at end of file +Prevent parallel testplan executions on same runpath. \ No newline at end of file diff --git a/testplan/common/entity/base.py b/testplan/common/entity/base.py index 45c1e0673..f5061d4be 100644 --- a/testplan/common/entity/base.py +++ b/testplan/common/entity/base.py @@ -1125,6 +1125,8 @@ def _execute_step(self, step, *args, **kwargs): res = None try: res = step(*args, **kwargs) + except RuntimeError as exc: + raise exc except Exception as exc: self.logger.error( "Exception on %s, step %s - %s", diff --git a/testplan/common/remote/remote_resource.py b/testplan/common/remote/remote_resource.py index 9aa5f8d8a..075925d90 100644 --- a/testplan/common/remote/remote_resource.py +++ b/testplan/common/remote/remote_resource.py @@ -247,7 +247,6 @@ def _prepare_remote(self) -> None: self._check_remote_os() self._define_remote_dirs() - self._check_remote_pidfile() self._create_remote_dirs() if self.cfg.push: @@ -321,41 +320,6 @@ def _remote_working_dir(self) -> str: self._workspace_paths.remote, ) - def _check_remote_pidfile(self) -> None: - """Check remote PID file and block if another testplan process is running on the same runpath.""" - - status, stdout, _ = self._ssh_client.exec_command( - cmd=f"/bin/cat {self._remote_pid_file}", - label="read remote pid file", - check=False, - ) - - if status: - # No PID file found, no other process is using the runpath - return - - hostname, pid = ( - stdout.strip().split(";") - if ";" in stdout - else (None, stdout.strip()) - ) - if pid and pid.isdigit(): - if hostname: - ssh_client = SSHClient(hostname) - else: - ssh_client = self._ssh_client - - check_status, _, _ = ssh_client.exec_command( - cmd=f"/bin/ps -p {pid}", - label="check remote pid running", - check=False, - ) - - if check_status == 0: - raise RuntimeError( - f"Another testplan instance (hostname: {hostname or self.cfg.remote_host}, pid: {pid}) is already using the same runpath" - ) - def _create_remote_dirs(self) -> None: """Create mandatory directories in remote host.""" @@ -386,11 +350,6 @@ def _create_remote_dirs(self) -> None: label="create remote runid file", ) - self._ssh_client.exec_command( - cmd=f'/bin/echo "{os.uname()[1]};{os.getpid()}" > {self._remote_pid_file}', - label="create initial pid file", - ) - # corner cases: # - local workspace under remote runpath # more? diff --git a/testplan/runnable/base.py b/testplan/runnable/base.py index f82a8e731..7f31e24a4 100644 --- a/testplan/runnable/base.py +++ b/testplan/runnable/base.py @@ -1291,6 +1291,36 @@ def _should_task_running(self, task_info: TaskInformation) -> bool: return should_run + def _check_pidfile(self): + """ + Check if a PID file exists and if the process is still running. + Raises RuntimeError if another testplan instance is using this runpath. + """ + + if not os.path.exists(self._pidfile_path): + self.logger.debug(f"PID file does not exist: {self._pidfile_path}") + return + + with open(self._pidfile_path, "r") as pid_file: + pid_content = pid_file.read().strip() + + if not pid_content or not pid_content.isdigit(): + self.logger.debug(f"Invalid PID format in file: {pid_content}") + return + + pid = int(pid_content) + + try: + # Signal 0 doesn't kill, just checks if process exists + os.kill(pid, 0) + raise RuntimeError( + f"Another testplan instance with PID {pid} is already using runpath: {self._runpath}" + ) + except OSError: + self.logger.debug( + f"Stale PID file found with PID {pid}, continuing" + ) + def make_runpath_dirs(self): """ Creates runpath related directories. @@ -1300,6 +1330,9 @@ def make_runpath_dirs(self): "{} runpath cannot be None".format(self.__class__.__name__) ) + self._pidfile_path = os.path.join(self._runpath, "testplan.pid") + self._check_pidfile() + self.logger.user_info( "Testplan[%s] has runpath: %s and pid %s", self.cfg.name, @@ -1316,8 +1349,7 @@ def make_runpath_dirs(self): makeemptydirs(self._runpath) makeemptydirs(self._scratch) - self.pidfile_path = os.path.join(self._runpath, "testplan.pid") - with open(self.pidfile_path, "w") as pid_file: + with open(self._pidfile_path, "w") as pid_file: pid_file.write(str(os.getpid())) with open( From d50898fd9527de4b726ff0c10fc82ee5ac9917aa Mon Sep 17 00:00:00 2001 From: Agoston Peto Date: Wed, 12 Nov 2025 08:14:06 +0000 Subject: [PATCH 3/6] revert remote changes, add pid check for testrunner --- testplan/common/remote/remote_resource.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/testplan/common/remote/remote_resource.py b/testplan/common/remote/remote_resource.py index 075925d90..8c9248873 100644 --- a/testplan/common/remote/remote_resource.py +++ b/testplan/common/remote/remote_resource.py @@ -273,9 +273,6 @@ def _define_remote_dirs(self) -> None: self._remote_runid_file = os.path.join( self._remote_plan_runpath, self._get_plan().runid_filename ) - self._remote_pid_file = os.path.join( - self._remote_plan_runpath, "testplan.pid" - ) self._remote_resource_runpath = rebase_path( self.runpath, From bfcb2a9efac9aa3212f127a087371f939d60453b Mon Sep 17 00:00:00 2001 From: Agoston Peto Date: Fri, 14 Nov 2025 12:08:20 +0000 Subject: [PATCH 4/6] revert remote changes, add pid check for testrunner --- testplan/runnable/base.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/testplan/runnable/base.py b/testplan/runnable/base.py index 7f31e24a4..4340b6451 100644 --- a/testplan/runnable/base.py +++ b/testplan/runnable/base.py @@ -3,6 +3,7 @@ import inspect import math import os +import psutil import random import re import sys @@ -1298,28 +1299,20 @@ def _check_pidfile(self): """ if not os.path.exists(self._pidfile_path): - self.logger.debug(f"PID file does not exist: {self._pidfile_path}") return with open(self._pidfile_path, "r") as pid_file: pid_content = pid_file.read().strip() if not pid_content or not pid_content.isdigit(): - self.logger.debug(f"Invalid PID format in file: {pid_content}") return pid = int(pid_content) - try: - # Signal 0 doesn't kill, just checks if process exists - os.kill(pid, 0) + if psutil.pid_exists(pid): raise RuntimeError( f"Another testplan instance with PID {pid} is already using runpath: {self._runpath}" ) - except OSError: - self.logger.debug( - f"Stale PID file found with PID {pid}, continuing" - ) def make_runpath_dirs(self): """ From d73016653c0533d06882246a5c22a25538114b53 Mon Sep 17 00:00:00 2001 From: Agoston Peto Date: Tue, 18 Nov 2025 07:32:46 +0000 Subject: [PATCH 5/6] Add unit tests --- testplan/runnable/base.py | 2 +- tests/unit/testplan/runnable/test_base.py | 89 ++++++++++++++++++++++- 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/testplan/runnable/base.py b/testplan/runnable/base.py index 4340b6451..278a7ba3c 100644 --- a/testplan/runnable/base.py +++ b/testplan/runnable/base.py @@ -1309,7 +1309,7 @@ def _check_pidfile(self): pid = int(pid_content) - if psutil.pid_exists(pid): + if psutil.pid_exists(pid) and pid != os.getpid(): raise RuntimeError( f"Another testplan instance with PID {pid} is already using runpath: {self._runpath}" ) diff --git a/tests/unit/testplan/runnable/test_base.py b/tests/unit/testplan/runnable/test_base.py index 67f8b04d1..30a3db748 100644 --- a/tests/unit/testplan/runnable/test_base.py +++ b/tests/unit/testplan/runnable/test_base.py @@ -1,8 +1,10 @@ import pytest +import os +import psutil from testplan.common.report import ReportCategories from testplan.report.testing import TestGroupReport -from testplan.runnable.base import collate_for_merging +from testplan.runnable.base import collate_for_merging, TestRunner @pytest.mark.parametrize( @@ -22,3 +24,88 @@ def test_collate_for_merging(entries): assert all( [t[0].category != ReportCategories.SYNTHESIZED for t in res if t] ) + + +class TestPidFileCheck: + """Tests for PID file checking functionality""" + + def test_check_pidfile_no_file(self, tmpdir): + """Test when no PID file exists""" + plan = TestRunner(name="test", parse_cmdline=False) + plan._runpath = str(tmpdir) + plan._pidfile_path = os.path.join(plan._runpath, "testplan.pid") + plan._check_pidfile() + + def test_check_pidfile_empty_file(self, tmpdir): + """Test when PID file is empty""" + plan = TestRunner(name="test", parse_cmdline=False) + plan._runpath = str(tmpdir) + plan._pidfile_path = os.path.join(plan._runpath, "testplan.pid") + + with open(plan._pidfile_path, "w") as f: + f.write("") + + plan._check_pidfile() + + def test_check_pidfile_invalid_content(self, tmpdir): + """Test when PID file contains invalid content""" + plan = TestRunner(name="test", parse_cmdline=False) + plan._runpath = str(tmpdir) + plan._pidfile_path = os.path.join(plan._runpath, "testplan.pid") + + with open(plan._pidfile_path, "w") as f: + f.write("not_a_number") + + plan._check_pidfile() + + def test_check_pidfile_current_process(self, tmpdir): + """Test when PID file contains current process PID""" + plan = TestRunner(name="test", parse_cmdline=False) + plan._runpath = str(tmpdir) + plan._pidfile_path = os.path.join(plan._runpath, "testplan.pid") + + current_pid = os.getpid() + with open(plan._pidfile_path, "w") as f: + f.write(str(current_pid)) + + plan._check_pidfile() + + def test_check_pidfile_stale_pid(self, tmpdir): + """Test when PID file contains a non-existent PID""" + plan = TestRunner(name="test", parse_cmdline=False) + plan._runpath = str(tmpdir) + plan._pidfile_path = os.path.join(plan._runpath, "testplan.pid") + + fake_pid = 999999 + while psutil.pid_exists(fake_pid): + fake_pid += 1 + + with open(plan._pidfile_path, "w") as f: + f.write(str(fake_pid)) + + plan._check_pidfile() + + def test_check_pidfile_another_process(self, tmpdir): + """Test when PID file contains another running process PID""" + plan = TestRunner(name="test", parse_cmdline=False) + plan._runpath = str(tmpdir) + plan._pidfile_path = os.path.join(plan._runpath, "testplan.pid") + + current_pid = os.getpid() + other_pid = None + for proc in psutil.process_iter(["pid"]): + if proc.pid != current_pid and proc.pid != 1: + other_pid = proc.pid + break + + if other_pid is None: + pytest.skip("Could not find another running process for testing") + + with open(plan._pidfile_path, "w") as f: + f.write(str(other_pid)) + + with pytest.raises( + RuntimeError, + match=f"Another testplan instance with PID {other_pid}", + ): + plan._check_pidfile() From 9fd52ecd2de38957aae2aae99bd694d99f6a7af3 Mon Sep 17 00:00:00 2001 From: Agoston Peto Date: Wed, 19 Nov 2025 08:05:21 +0000 Subject: [PATCH 6/6] Use custom exception for runpath conflict --- testplan/common/entity/base.py | 3 ++- testplan/common/utils/exceptions.py | 8 ++++++++ testplan/runnable/base.py | 3 ++- tests/unit/testplan/runnable/test_base.py | 3 ++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/testplan/common/entity/base.py b/testplan/common/entity/base.py index f5061d4be..5cd1b355c 100644 --- a/testplan/common/entity/base.py +++ b/testplan/common/entity/base.py @@ -29,6 +29,7 @@ from testplan.common.config import Config, ConfigOption from testplan.common.report import Status from testplan.common.utils import logger +from testplan.common.utils.exceptions import RunpathInUseError from testplan.common.utils.path import default_runpath, makedirs, makeemptydirs from testplan.common.utils.strings import slugify, uuid4 from testplan.common.utils.thread import execute_as_thread, interruptible_join @@ -1125,7 +1126,7 @@ def _execute_step(self, step, *args, **kwargs): res = None try: res = step(*args, **kwargs) - except RuntimeError as exc: + except RunpathInUseError as exc: raise exc except Exception as exc: self.logger.error( diff --git a/testplan/common/utils/exceptions.py b/testplan/common/utils/exceptions.py index 72356efd4..73a1de981 100644 --- a/testplan/common/utils/exceptions.py +++ b/testplan/common/utils/exceptions.py @@ -7,6 +7,14 @@ LOGGER = logging.getLogger(__name__) +class RunpathInUseError(RuntimeError): + """ + Exception raised when a runpath is already in use by another testplan instance. + """ + + pass + + def should_raise(exception, item, args=None, kwargs=None, pattern=None): """ "Utility that validates callable should raise specific exception. diff --git a/testplan/runnable/base.py b/testplan/runnable/base.py index 278a7ba3c..3aa111da7 100644 --- a/testplan/runnable/base.py +++ b/testplan/runnable/base.py @@ -52,6 +52,7 @@ from testplan.common.report import MergeError from testplan.common.utils import logger, strings +from testplan.common.utils.exceptions import RunpathInUseError from testplan.common.utils.package import import_tmp_module from testplan.common.utils.path import default_runpath, makedirs, makeemptydirs from testplan.common.utils.selector import Expr as SExpr @@ -1310,7 +1311,7 @@ def _check_pidfile(self): pid = int(pid_content) if psutil.pid_exists(pid) and pid != os.getpid(): - raise RuntimeError( + raise RunpathInUseError( f"Another testplan instance with PID {pid} is already using runpath: {self._runpath}" ) diff --git a/tests/unit/testplan/runnable/test_base.py b/tests/unit/testplan/runnable/test_base.py index 30a3db748..17107d2f6 100644 --- a/tests/unit/testplan/runnable/test_base.py +++ b/tests/unit/testplan/runnable/test_base.py @@ -3,6 +3,7 @@ import psutil from testplan.common.report import ReportCategories +from testplan.common.utils.exceptions import RunpathInUseError from testplan.report.testing import TestGroupReport from testplan.runnable.base import collate_for_merging, TestRunner @@ -105,7 +106,7 @@ def test_check_pidfile_another_process(self, tmpdir): f.write(str(other_pid)) with pytest.raises( - RuntimeError, + RunpathInUseError, match=f"Another testplan instance with PID {other_pid}", ): plan._check_pidfile()