Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ SHAREDIR ?= ${PREFIX}/share
PYTHON ?= $(shell command -v python3 python|head -n1)
DESTDIR ?= /
PATH := $(PATH):$(HOME)/.local/bin
MYPIP ?= pip
MYPIP ?= uv pip
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pushed a fix in #2247

IMAGE ?= ramalama
PROJECT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
EXCLUDE_DIRS := .venv venv .tox build
Expand Down
13 changes: 7 additions & 6 deletions docs/ramalama-convert.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,15 @@ Image to use when converting to GGUF format (when then `--gguf` option has been
executable and available in the `PATH`. The script is available from the `llama.cpp` GitHub repo. Defaults to the current
`quay.io/ramalama/ramalama-rag` image.

#### **--type**=*raw* | *car*
#### **--type**="artifact" | *raw* | *car*

type of OCI Model Image to convert.
Convert the MODEL to the specified OCI Object

| Type | Description |
| ---- | ------------------------------------------------------------- |
| car | Includes base image with the model stored in a /models subdir |
| raw | Only the model and a link file model.file to it stored at / |
| Type | Description |
| -------- | ------------------------------------------------------------- |
| artifact | Store AI Models as artifacts |
| car | Traditional OCI image including base image with the model stored in a /models subdir |
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |

## EXAMPLE

Expand Down
8 changes: 8 additions & 0 deletions docs/ramalama.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@
#
#carimage = "registry.access.redhat.com/ubi10-micro:latest"

# Convert the MODEL to the specified OCI Object
# Options: artifact, car, raw
#
# artifact: Store AI Models as artifacts
# car: Traditional OCI image including base image with the model stored in a /models subdir
# raw: Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /
#convert_type = "raw"

# Run RamaLama in the default container.
#
#container = true
Expand Down
12 changes: 12 additions & 0 deletions docs/ramalama.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ Min chunk size to attempt reusing from the cache via KV shifting
Run RamaLama in the default container.
RAMALAMA_IN_CONTAINER environment variable overrides this field.

**convert_type**="raw"

Convert the MODEL to the specified OCI Object
Options: artifact, car, raw

| Type | Description |
| -------- | ------------------------------------------------------------- |
| artifact | Store AI Models as artifacts |
| car | Traditional OCI image including base image with the model stored in a /models subdir |
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |


**ctx_size**=0

Size of the prompt context (0 = loaded from model)
Expand Down
70 changes: 50 additions & 20 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,11 +734,12 @@ def convert_parser(subparsers):
)
parser.add_argument(
"--type",
default="raw",
choices=["car", "raw"],
default=CONFIG.convert_type,
choices=["artifact", "car", "raw"],
help="""\
type of OCI Model Image to push.

Model "artifact" stores the AI Model as an OCI Artifact.
Model "car" includes base image with the model stored in a /models subdir.
Model "raw" contains the model and a link file model.file to it stored at /.""",
)
Expand Down Expand Up @@ -775,11 +776,12 @@ def push_parser(subparsers):
add_network_argument(parser)
parser.add_argument(
"--type",
default="raw",
choices=["car", "raw"],
default=CONFIG.convert_type,
choices=["artifact", "car", "raw"],
help="""\
type of OCI Model Image to push.

Model "artifact" stores the AI Model as an OCI Artifact.
Model "car" includes base image with the model stored in a /models subdir.
Model "raw" contains the model and a link file model.file to it stored at /.""",
)
Expand All @@ -794,19 +796,25 @@ def push_parser(subparsers):
parser.set_defaults(func=push_cli)


def _get_source_model(args):
def _get_source_model(args, transport=None):
src = shortnames.resolve(args.SOURCE)
smodel = New(src, args)
smodel = New(src, args, transport=transport)
if smodel.type == "OCI":
if not args.TARGET:
return smodel
raise ValueError(f"converting from an OCI based image {src} is not supported")
if not smodel.exists() and not args.dryrun:
smodel.pull(args)
return smodel


def push_cli(args):
source_model = _get_source_model(args)
target = args.SOURCE
transport = None
if not args.TARGET:
transport = "oci"
source_model = _get_source_model(args, transport=transport)

if args.TARGET:
target = shortnames.resolve(args.TARGET)
target_model = New(target, args)
Expand Down Expand Up @@ -1189,9 +1197,14 @@ def serve_cli(args):
model.ensure_model_exists(args)
except KeyError as e:
try:
if "://" in args.MODEL:
raise e
args.quiet = True
model = TransportFactory(args.MODEL, args, ignore_stderr=True).create_oci()
model.ensure_model_exists(args)
# Since this is a OCI model, prepend oci://
args.MODEL = f"oci://{args.MODEL}"

except Exception:
raise e

Expand Down Expand Up @@ -1432,27 +1445,42 @@ def rm_parser(subparsers):
parser.set_defaults(func=rm_cli)


def _rm_oci_model(model, args) -> bool:
# attempt to remove as a container image
try:
m = TransportFactory(model, args, ignore_stderr=True).create_oci()
return m.remove(args)
except Exception:
return False


def _rm_model(models, args):
exceptions = []
for model in models:
model = shortnames.resolve(model)

try:
m = New(model, args)
m.remove(args)
except KeyError as e:
if m.remove(args):
continue
# Failed to remove and might be OCI so attempt to remove OCI
if args.ignore:
_rm_oci_model(model, args)
continue
except (KeyError, subprocess.CalledProcessError) as e:
for prefix in MODEL_TYPES:
if model.startswith(prefix + "://"):
if not args.ignore:
raise e
try:
# attempt to remove as a container image
m = TransportFactory(model, args, ignore_stderr=True).create_oci()
m.remove(args)
return
except Exception:
pass
if not args.ignore:
raise e
# attempt to remove as a container image
if _rm_oci_model(model, args) or args.ignore:
continue
exceptions.append(e)

if len(exceptions) > 0:
for exception in exceptions[1:]:
perror("Error: " + str(exception).strip("'\""))
raise exceptions[0]


def rm_cli(args):
Expand Down Expand Up @@ -1512,7 +1540,7 @@ def inspect_parser(subparsers):
def inspect_cli(args):
args.pull = "never"
model = New(args.MODEL, args)
model.inspect(args.all, args.get == "all", args.get, args.json, args.dryrun)
print(model.inspect(args.all, args.get == "all", args.get, args.json, args.dryrun))


def main() -> None:
Expand Down Expand Up @@ -1544,9 +1572,11 @@ def eprint(e, exit_code):
args.func(args)
except urllib.error.HTTPError as e:
eprint(f"pulling {e.geturl()} failed: {e}", errno.EINVAL)
except FileNotFoundError as e:
eprint(e, errno.ENOENT)
except HelpException:
parser.print_help()
except (ConnectionError, IndexError, KeyError, ValueError, NoRefFileFound) as e:
except (IsADirectoryError, ConnectionError, IndexError, KeyError, ValueError, NoRefFileFound) as e:
eprint(e, errno.EINVAL)
except NotImplementedError as e:
eprint(e, errno.ENOSYS)
Expand Down
2 changes: 1 addition & 1 deletion ramalama/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def verify_checksum(filename: str) -> bool:


def genname():
return "ramalama_" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
return "ramalama-" + "".join(random.choices(string.ascii_letters + string.digits, k=10))


def engine_version(engine: SUPPORTED_ENGINES) -> str:
Expand Down
1 change: 1 addition & 0 deletions ramalama/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ class BaseConfig:
carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
container: bool = None # type: ignore
ctx_size: int = 0
convert_type: Literal["artifact", "car", "raw"] = "raw"
default_image: str = DEFAULT_IMAGE
default_rag_image: str = DEFAULT_RAG_IMAGE
dryrun: bool = False
Expand Down
20 changes: 12 additions & 8 deletions ramalama/kube.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import platform
from typing import Optional, Tuple

from ramalama.common import MNT_DIR, RAG_DIR, genname, get_accel_env_vars
from ramalama.common import MNT_DIR, RAG_DIR, get_accel_env_vars
from ramalama.file import PlainFile
from ramalama.path_utils import normalize_host_path_for_container
from ramalama.version import version
Expand All @@ -17,6 +17,7 @@ def __init__(
mmproj_paths: Optional[Tuple[str, str]],
args,
exec_args,
artifact: bool,
):
self.src_model_path, self.dest_model_path = model_paths
self.src_chat_template_path, self.dest_chat_template_path = (
Expand All @@ -29,27 +30,30 @@ def __init__(
if getattr(args, "name", None):
self.name = args.name
else:
self.name = genname()
self.name = "ramalama"

self.args = args
self.exec_args = exec_args
self.image = args.image
self.artifact = artifact

def _gen_volumes(self):
mounts = """\
volumeMounts:"""

volumes = """
volumes:"""

if os.path.exists(self.src_model_path):
m, v = self._gen_path_volume()
mounts += m
volumes += v
else:
subPath = ""
if not self.artifact:
subPath = """
subPath: /models"""
mounts += f"""
- mountPath: {MNT_DIR}
subPath: /models
- mountPath: {MNT_DIR}{subPath}
name: model"""
volumes += self._gen_oci_volume()

Expand Down Expand Up @@ -104,7 +108,7 @@ def _gen_path_volume(self):
def _gen_oci_volume(self):
return f"""
- image:
reference: {self.ai_image}
reference: {self.src_model_path}
pullPolicy: IfNotPresent
name: model"""

Expand Down Expand Up @@ -176,7 +180,7 @@ def __gen_env_vars():
for k, v in env_vars.items():
env_spec += f"""
- name: {k}
value: {v}"""
value: \"{v}\""""

return env_spec

Expand All @@ -191,7 +195,7 @@ def generate(self) -> PlainFile:
# it into Kubernetes.
#
# Created with ramalama-{_version}
apiVersion: v1
apiVersion: apps/v1
kind: Deployment
metadata:
name: {self.name}
Expand Down
Loading
Loading