Skip to content

Commit abb2fa1

Browse files
authored
Add option to pass credentials name when creating a job (#1287)
1 parent f933a3e commit abb2fa1

5 files changed

Lines changed: 25 additions & 4 deletions

File tree

docs/commands/job/run.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ This command runs a job in Studio using the specified query file. You can config
3232
* `--env-file ENV_FILE` - File with environment variables for the job
3333
* `--env ENV` - Environment variables in KEY=VALUE format
3434
* `--cluster CLUSTER` - Compute cluster to run the job on
35+
* `--credentials-name CREDENTIALS_NAME` - Name of the credentials to use for the job
3536
* `--workers WORKERS` - Number of workers for the job
3637
* `--files FILES` - Additional files to include in the job
3738
* `--python-version PYTHON_VERSION` - Python version for the job (e.g., 3.9, 3.10, 3.11)
@@ -97,7 +98,12 @@ datachain job clusters
9798
datachain job run --cluster 1 query.py
9899
```
99100

100-
9. Schedule a job to run once at a specific time
101+
9. Run a job with specific credentials
102+
```bash
103+
datachain job run --credentials-name my-aws-credentials query.py
104+
```
105+
106+
10. Schedule a job to run once at a specific time
101107
```bash
102108
# Run job tomorrow at 3pm
103109
datachain job run --start-time "tomorrow 3pm" query.py
@@ -112,7 +118,7 @@ datachain job run --start-time "monday 9am" query.py
112118
datachain job run --start-time "2024-01-15 14:30:00" query.py
113119
```
114120

115-
10. Schedule a recurring job using cron expression
121+
11. Schedule a recurring job using cron expression
116122
```bash
117123
# Run job daily at midnight
118124
datachain job run --cron "0 0 * * *" query.py
@@ -127,13 +133,13 @@ datachain job run --cron "0 * * * *" query.py
127133
datachain job run --cron "@monthly" query.py
128134
```
129135

130-
11. Schedule a recurring job with a start time
136+
12. Schedule a recurring job with a start time
131137
```bash
132138
# Start the cron job after tomorrow 3pm
133139
datachain job run --start-time "tomorrow 3pm" --cron "0 0 * * *" query.py
134140
```
135141

136-
12. Start the job and do not wait for the job to complete
142+
13. Start the job and do not wait for the job to complete
137143
```bash
138144
# Do not follow or tail the logs from Studio.
139145
datachain job run query.py --no-wait

src/datachain/cli/parser/job.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
6363
help="Compute cluster to run the job on",
6464
)
6565

66+
studio_run_parser.add_argument(
67+
"-c",
68+
"--credentials-name",
69+
action="store",
70+
help="Name of the credentials to use for the job",
71+
)
72+
6673
studio_run_parser.add_argument(
6774
"--workers",
6875
type=int,

src/datachain/remote/studio.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ def create_job(
431431
cluster: Optional[str] = None,
432432
start_time: Optional[str] = None,
433433
cron: Optional[str] = None,
434+
credentials_name: Optional[str] = None,
434435
) -> Response[JobData]:
435436
data = {
436437
"query": query,
@@ -446,6 +447,7 @@ def create_job(
446447
"compute_cluster_name": cluster,
447448
"start_after": start_time,
448449
"cron_expression": cron,
450+
"credentials_name": credentials_name,
449451
}
450452
return self._send_request("datachain/job", data)
451453

src/datachain/studio.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def process_jobs_args(args: "Namespace"):
5050
args.start_time,
5151
args.cron,
5252
args.no_wait,
53+
args.credentials_name,
5354
)
5455

5556
if args.cmd == "cancel":
@@ -356,6 +357,7 @@ def create_job(
356357
start_time: Optional[str] = None,
357358
cron: Optional[str] = None,
358359
no_wait: Optional[bool] = False,
360+
credentials_name: Optional[str] = None,
359361
):
360362
query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
361363
with open(query_file) as f:
@@ -393,6 +395,7 @@ def create_job(
393395
cluster=cluster,
394396
start_time=parsed_start_time,
395397
cron=cron,
398+
credentials_name=credentials_name,
396399
)
397400
if not response.ok:
398401
raise DataChainError(response.message)

tests/test_cli_studio.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,8 @@ def test_studio_run(capsys, mocker, tmp_dir):
401401
"https://github.com/iterative/datachain",
402402
"--cluster",
403403
"default",
404+
"--credentials-name",
405+
"my-credentials",
404406
]
405407
)
406408
== 0
@@ -442,6 +444,7 @@ def test_studio_run(capsys, mocker, tmp_dir):
442444
"compute_cluster_name": "default",
443445
"start_after": None,
444446
"cron_expression": None,
447+
"credentials_name": "my-credentials",
445448
}
446449

447450

0 commit comments

Comments
 (0)