Skip to content

Add appdef metadata to torchx event #947

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion torchx/runner/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,12 @@ def run_component(
parent_run_id=parent_run_id,
)
handle = self.schedule(dryrun_info)
app = none_throws(dryrun_info._app)
ctx._torchx_event.workspace = workspace
ctx._torchx_event.scheduler = none_throws(dryrun_info._scheduler)
ctx._torchx_event.app_image = none_throws(dryrun_info._app).roles[0].image
ctx._torchx_event.app_image = app.roles[0].image
ctx._torchx_event.app_id = parse_app_handle(handle)[2]
ctx._torchx_event.app_metadata = app.metadata
return handle

def dryrun_component(
Expand Down Expand Up @@ -263,6 +265,7 @@ def run(
ctx._torchx_event.scheduler = none_throws(dryrun_info._scheduler)
ctx._torchx_event.app_image = none_throws(dryrun_info._app).roles[0].image
ctx._torchx_event.app_id = parse_app_handle(handle)[2]
ctx._torchx_event.app_metadata = app.metadata
return handle

def schedule(self, dryrun_info: AppDryRunInfo) -> AppHandle:
Expand Down
6 changes: 5 additions & 1 deletion torchx/runner/events/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import time
import traceback
from types import TracebackType
from typing import Optional, Type
from typing import Dict, Optional, Type

from torchx.runner.events.handlers import get_logging_handler

Expand Down Expand Up @@ -84,6 +84,7 @@ def __init__(
scheduler: Optional[str] = None,
app_id: Optional[str] = None,
app_image: Optional[str] = None,
app_metadata: Optional[Dict[str, str]] = None,
runcfg: Optional[str] = None,
workspace: Optional[str] = None,
) -> None:
Expand All @@ -92,6 +93,7 @@ def __init__(
scheduler or "",
app_id,
app_image=app_image,
app_metadata=app_metadata,
runcfg=runcfg,
workspace=workspace,
)
Expand Down Expand Up @@ -128,6 +130,7 @@ def _generate_torchx_event(
scheduler: str,
app_id: Optional[str] = None,
app_image: Optional[str] = None,
app_metadata: Optional[Dict[str, str]] = None,
runcfg: Optional[str] = None,
source: SourceType = SourceType.UNKNOWN,
workspace: Optional[str] = None,
Expand All @@ -138,6 +141,7 @@ def _generate_torchx_event(
api=api,
app_id=app_id,
app_image=app_image,
app_metadata=app_metadata,
runcfg=runcfg,
source=source,
workspace=workspace,
Expand Down
6 changes: 5 additions & 1 deletion torchx/runner/events/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import json
from dataclasses import asdict, dataclass
from enum import Enum
from typing import Optional, Union
from typing import Dict, Optional, Union


class SourceType(str, Enum):
Expand All @@ -30,17 +30,21 @@ class TorchxEvent:
api: Api name
app_id: Unique id that is set by the underlying scheduler
image: Image/container bundle that is used to execute request.
app_metadata: metadata to the app (treatment of metadata is scheduler dependent)
runcfg: Run config that was used to schedule app.
source: Type of source the event is generated.
cpu_time_usec: CPU time spent in usec
wall_time_usec: Wall time spent in usec
start_epoch_time_usec: Epoch time in usec when runner event starts
Workspace: Track how different workspaces/no workspace affects build and scheduler
"""

session: str
scheduler: str
api: str
app_id: Optional[str] = None
app_image: Optional[str] = None
app_metadata: Optional[Dict[str, str]] = None
runcfg: Optional[str] = None
raw_exception: Optional[str] = None
source: SourceType = SourceType.UNKNOWN
Expand Down
17 changes: 15 additions & 2 deletions torchx/runner/events/test/lib_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def assert_event(
self.assertEqual(actual_event.app_image, expected_event.app_image)
self.assertEqual(actual_event.runcfg, expected_event.runcfg)
self.assertEqual(actual_event.source, expected_event.source)
self.assertEqual(actual_event.app_metadata, expected_event.app_metadata)

@patch("torchx.runner.events.get_logging_handler")
def test_get_or_create_logger(self, logging_handler_mock: MagicMock) -> None:
Expand All @@ -41,11 +42,13 @@ def test_get_or_create_logger(self, logging_handler_mock: MagicMock) -> None:
self.assertIsInstance(logger.handlers[0], logging.NullHandler)

def test_event_created(self) -> None:
test_metadata = {"test_key": "test_value"}
event = TorchxEvent(
session="test_session",
scheduler="test_scheduler",
api="test_api",
app_image="test_app_image",
app_metadata=test_metadata,
workspace="test_workspace",
)
self.assertEqual("test_session", event.session)
Expand All @@ -54,13 +57,16 @@ def test_event_created(self) -> None:
self.assertEqual("test_app_image", event.app_image)
self.assertEqual(SourceType.UNKNOWN, event.source)
self.assertEqual("test_workspace", event.workspace)
self.assertEqual(test_metadata, event.app_metadata)

def test_event_deser(self) -> None:
test_metadata = {"test_key": "test_value"}
event = TorchxEvent(
session="test_session",
scheduler="test_scheduler",
api="test_api",
app_image="test_app_image",
app_metadata=test_metadata,
workspace="test_workspace",
source=SourceType.EXTERNAL,
)
Expand All @@ -78,14 +84,17 @@ def assert_torchx_event(self, expected: TorchxEvent, actual: TorchxEvent) -> Non
self.assertEqual(expected.app_image, actual.app_image)
self.assertEqual(expected.source, actual.source)
self.assertEqual(expected.workspace, actual.workspace)
self.assertEqual(expected.app_metadata, actual.app_metadata)

def test_create_context(self, _) -> None:
cfg = json.dumps({"test_key": "test_value"})
test_dict = {"test_key": "test_value"}
cfg = json.dumps(test_dict)
context = log_event(
"test_call",
"local",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
)
Expand All @@ -95,19 +104,22 @@ def test_create_context(self, _) -> None:
"test_call",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
)

self.assert_torchx_event(expected_torchx_event, context._torchx_event)

def test_record_event(self, record_mock: MagicMock) -> None:
cfg = json.dumps({"test_key": "test_value"})
test_dict = {"test_key": "test_value"}
cfg = json.dumps(test_dict)
with log_event(
"test_call",
"local",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
) as ctx:
Expand All @@ -119,6 +131,7 @@ def test_record_event(self, record_mock: MagicMock) -> None:
"test_call",
"test_app_id",
app_image="test_app_image_id",
app_metadata=test_dict,
runcfg=cfg,
workspace="test_workspace",
cpu_time_usec=ctx._torchx_event.cpu_time_usec,
Expand Down
Loading