Skip to content

Commit fea9908

Browse files
authored
Merge branch 'main' into kvcache3
2 parents 4f046d9 + f99ca4e commit fea9908

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+999
-209
lines changed

.azure/gpu-test.yml

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ jobs:
1717
strategy:
1818
matrix:
1919
"ordinary":
20-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
20+
#image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.7-cuda12.6.3"
2121
dependency: ""
2222
"w. Thunder":
23-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
23+
#image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.7-cuda12.6.3"
2424
dependency: "compiler"
2525
variables:
2626
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
@@ -29,8 +29,14 @@ jobs:
2929
HF_HOME: "/var/tmp/hf/home"
3030
HF_HUB_CACHE: "/var/tmp/hf/hub"
3131
CI: "true"
32+
PYTHON_VERSION: "3.10"
33+
CUDA_VERSION: "12.6.3"
34+
TORCH_VERSION: "2.7.0"
35+
CUDNN_FRONTEND_VERSION: "1.10.0"
3236
container:
33-
image: $(image)
37+
# image: "pytorchlightning/pytorch_lightning:base-cuda-py$(PYTHON_VERSION)-torch$(TORCH_VERSION)-cuda$(CUDA_VERSION)"
38+
# pytorchlightning/lightning-thunder:ubuntu22.04-cuda12.1.1-cudnn-fe1.5.0-py3.10-pt_main-dev
39+
image: "pytorchlightning/lightning-thunder:ubuntu24.04-cuda$(CUDA_VERSION)-cudnn-fe$(CUDNN_FRONTEND_VERSION)-py$(PYTHON_VERSION)-pt_$(TORCH_VERSION)-dev"
3440
options: "--gpus=all --shm-size=8gb -v /var/tmp:/var/tmp"
3541
workspace:
3642
clean: all
@@ -55,54 +61,51 @@ jobs:
5561
5662
- script: |
5763
pip install --upgrade pip
58-
pip install '.[extra,test]' -U
59-
displayName: "Install dependencies"
64+
pip install '.[extra,test]' cffi -U
65+
displayName: "Install package & dependencies"
6066
6167
- script: |
6268
set -e
6369
pip uninstall -y torchvision torchaudio
64-
pip install --pre 'nvfuser-cu121[torch]' --extra-index-url https://pypi.nvidia.com
6570
pip install '.[compiler]'
6671
python -c "from thunder.executors import nvfuser_available ; assert nvfuser_available(), 'nvFuser is missing!'"
6772
python -c "from thunder.executors.triton_utils import triton_version ; assert triton_version() is not None, 'triton is missing!'"
6873
condition: eq(variables['dependency'], 'compiler')
69-
displayName: "Install nvFuser & Thunder"
74+
displayName: "Install `compiler` [nvFuser & Thunder]"
7075
7176
- bash: |
7277
set -e
7378
pip list
7479
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
80+
python -c "from torch import __version__ as ver ; assert str(ver).split('+')[0] == '$(TORCH_VERSION)', f'PyTorch: installed {ver} but expected $(TORCH_VERSION)'"
7581
displayName: "Env details"
7682
77-
- bash: |
78-
pytest -v \
79-
--ignore-glob="tests/test_thunder*" \
80-
--ignore="tests/test_unsloth_executor.py"
81-
displayName: "Ordinary tests"
82-
condition: ne(variables['dependency'], 'compiler')
83-
timeoutInMinutes: "5"
83+
- bash: pytest -v
84+
displayName: "All tests"
85+
#condition: eq(variables['dependency'], 'compiler')
86+
timeoutInMinutes: "15"
8487

8588
- bash: |
86-
# install thunder from source, so that, thunder.tests will be available
87-
pip install -U "thunder[test] @ git+https://github.com/Lightning-AI/lightning-thunder.git"
88-
PL_RUN_CUDA_TESTS=0 pytest tests/ext_thunder/test_thunder_networks.py -v # without env var, it filters out all tests
89-
displayName: "Extra tests w. Thunder [main branch]"
90-
condition: eq(variables['dependency'], 'compiler')
89+
wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
90+
bash run_standalone_tests.sh "tests"
91+
displayName: "Standalone tests"
9192
env:
92-
PL_RUN_CUDA_TESTS: "0"
93+
PL_RUN_STANDALONE_TESTS: "1"
94+
# NUM_PARALLEL_TESTS: "10"
9395
timeoutInMinutes: "10"
9496
9597
- bash: |
96-
pytest -v
97-
displayName: "All tests"
98+
pip uninstall -y lightning-thunder
99+
# install thunder from source, so that, thunder.tests will be available
100+
pip install -U "lightning-thunder[test] @ git+https://github.com/Lightning-AI/lightning-thunder.git"
101+
displayName: "Re-install Thunder [main branch]"
98102
condition: eq(variables['dependency'], 'compiler')
99-
timeoutInMinutes: "5"
100103
101104
- bash: |
102-
wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
103-
bash run_standalone_tests.sh "tests"
104-
displayName: "Standalone tests"
105+
# without env var, it filters out all tests
106+
PL_RUN_CUDA_TESTS=0 pytest tests/ext_thunder/test_thunder_networks.py -v
107+
displayName: "Extra tests for Thunder [main branch]"
108+
condition: eq(variables['dependency'], 'compiler')
105109
env:
106-
PL_RUN_STANDALONE_TESTS: "1"
107-
# NUM_PARALLEL_TESTS: "10"
110+
TORCHDYNAMO_VERBOSE: "1"
108111
timeoutInMinutes: "10"

.devcontainer/Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# See here for image contents: https://github.com/devcontainers/images/blob/main/src/python/.devcontainer/Dockerfile
2+
3+
# [Choice] Python version (use -bookworm or -bullseye variants on local arm64/Apple Silicon): 3, 3.12, 3.11, 3.10, 3.9, 3.8, 3-bookworm, 3.12-bookworm, 3.11-bookworm, 3.10-bookworm, 3.9-bookworm, 3.8-bookworm, 3-bullseye, 3.12-bullseye, 3.11-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3-buster, 3.12-buster, 3.11-buster, 3.10-buster, 3.9-buster, 3.8-buster
4+
ARG VARIANT=3-bookworm
5+
FROM mcr.microsoft.com/devcontainers/python:1-${VARIANT}
6+
7+
# Temporary: Upgrade python packages due to https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-40897
8+
# They are installed by the base image (python) which does not have the patch.
9+
RUN python3 -m pip install --upgrade pip setuptools

.devcontainer/devcontainer.json

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
2+
// https://github.com/microsoft/vscode-dev-containers/tree/v0.194.0/containers/python-3
3+
{
4+
"name": "Python 3 (litgpt)",
5+
"build": {
6+
"dockerfile": "Dockerfile",
7+
"context": "..",
8+
"args": {
9+
"VARIANT": "3.11-bookworm"
10+
}
11+
},
12+
"runArgs": [
13+
// Enable GPU passthrough, requires WSL2 on Windows
14+
//"--gpus=all",
15+
// One of the following options is required for torch multiprocessing
16+
//"--ipc=host",
17+
//"--shm-size=4gb",
18+
],
19+
// Features to add to the dev container. More info: https://containers.dev/features.
20+
"features": {
21+
"ghcr.io/devcontainers/features/git:1": {},
22+
"ghcr.io/devcontainers/features/git-lfs:1": {},
23+
//"ghcr.io/devcontainers/features/nvidia-cuda:1": {},
24+
"ghcr.io/devcontainers-extra/features/actionlint:1": {},
25+
"ghcr.io/devcontainers-extra/features/pre-commit:2": {},
26+
"ghcr.io/dhoeric/features/act:1": {},
27+
"ghcr.io/devcontainers/features/docker-in-docker:2": {
28+
"version": "latest",
29+
"moby": true
30+
}
31+
},
32+
// Set *default* container specific settings.json values on container create.
33+
"customizations": {
34+
"vscode": {
35+
"settings": {
36+
"editor.tabSize": 4,
37+
"editor.renderWhitespace": "all",
38+
"editor.formatOnSave": true,
39+
"editor.rulers": [120],
40+
"files.exclude": {
41+
"**/__pycache__": true
42+
},
43+
"python.pythonPath": "/usr/local/bin/python",
44+
"python.defaultInterpreterPath": "/usr/local/bin/python",
45+
"python.languageServer": "Pylance",
46+
"python.analysis.autoImportCompletions": true,
47+
"python.analysis.completeFunctionParens": true,
48+
"python.analysis.autoSearchPaths": true,
49+
"python.testing.pytestArgs": ["tests"],
50+
"python.testing.unittestEnabled": false,
51+
"python.testing.pytestEnabled": true,
52+
"code-eol.highlightNonDefault": true,
53+
"code-eol.highlightExtraWhitespace": true,
54+
"autoDocstring.docstringFormat": "google-notypes",
55+
"autoDocstring.guessTypes": true,
56+
"autoDocstring.generateDocstringOnEnter": true,
57+
"autoDocstring.startOnNewLine": true,
58+
"telemetry.telemetryLevel": "off",
59+
"[python]": {
60+
"editor.formatOnSave": true,
61+
"editor.defaultFormatter": "charliermarsh.ruff",
62+
"editor.codeActionsOnSave": {
63+
"source.organizeImports": "always",
64+
"source.fixAll": "always"
65+
}
66+
}
67+
},
68+
// Add the IDs of extensions you want installed when the container is created.
69+
"extensions": [
70+
"ms-python.python",
71+
"ms-python.vscode-pylance",
72+
"ms-toolsai.jupyter",
73+
"GitHub.copilot",
74+
"GitHub.copilot-chat",
75+
"github.vscode-github-actions",
76+
"SanjulaGanepola.github-local-actions",
77+
"charliermarsh.ruff",
78+
"esbenp.prettier-vscode",
79+
"ms-vscode.test-adapter-converter",
80+
"njqdev.vscode-python-typehint",
81+
"KevinRose.vsc-python-indent",
82+
"medo64.render-crlf",
83+
"shardulm94.trailing-spaces",
84+
"nhoizey.gremlins",
85+
"wayou.vscode-todo-highlight",
86+
"Gruntfuggly.todo-tree",
87+
"njpwerner.autodocstring",
88+
"rodolphebarbanneau.python-docstring-highlighter",
89+
"mechatroner.rainbow-csv",
90+
"uctakeoff.vscode-counter",
91+
"bierner.github-markdown-preview",
92+
"yahyabatulu.vscode-markdown-alert",
93+
"ms-vscode-remote.vscode-remote-extensionpack",
94+
"ms-azuretools.vscode-docker",
95+
"redhat.vscode-yaml"
96+
]
97+
}
98+
},
99+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
100+
// "forwardPorts": [],
101+
// Use 'postCreateCommand' to run commands after the container is created.
102+
"postCreateCommand": "pre-commit install && pip install '.[extra,compiler,test]' -U",
103+
// Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
104+
"remoteUser": "vscode"
105+
}

.github/workflows/cpu-tests.yml

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,15 @@ jobs:
2929
strategy:
3030
fail-fast: false
3131
matrix:
32-
os: ["ubuntu-22.04", "macOS-14", "windows-2022"]
32+
os: ["ubuntu-22.04", "ubuntu-24.04", "macOS-14", "windows-2022"]
3333
python-version: ["3.10"]
3434
timeout-minutes: 10
3535
steps:
36-
- uses: actions/checkout@v4
36+
- name: Checkout generic
37+
uses: actions/checkout@v4
3738
if: github.event_name != 'pull_request_target'
38-
- uses: actions/checkout@v4
39+
- name: Checkout for `pull_request_target`
40+
uses: actions/checkout@v4
3941
if: github.event_name == 'pull_request_target'
4042
with:
4143
ref: ${{ github.event.pull_request.head.sha }}
@@ -60,6 +62,13 @@ jobs:
6062
python -c "$modules"
6163
6264
pytester:
65+
# skip PR trigger if secrets are not shared as for all forked PRs
66+
if: |
67+
github.event_name != 'pull_request' ||
68+
(
69+
github.event_name == 'pull_request' &&
70+
contains('OWNER,MEMBER,COLLABORATOR', github.event.pull_request.author_association)
71+
)
6372
runs-on: ${{ matrix.os }}
6473
strategy:
6574
fail-fast: false
@@ -71,9 +80,11 @@ jobs:
7180
- { os: "windows-2022", python-version: "3.9" }
7281
timeout-minutes: 25
7382
steps:
74-
- uses: actions/checkout@v4
83+
- name: Checkout generic
84+
uses: actions/checkout@v4
7585
if: github.event_name != 'pull_request_target'
76-
- uses: actions/checkout@v4
86+
- name: Checkout for `pull_request_target`
87+
uses: actions/checkout@v4
7788
if: github.event_name == 'pull_request_target'
7889
with:
7990
ref: ${{ github.event.pull_request.head.sha }}
@@ -113,7 +124,12 @@ jobs:
113124
testing-guardian:
114125
runs-on: ubuntu-latest
115126
needs: [pytester, testing-imports]
116-
if: always()
127+
if: |
128+
github.event_name == 'pull_request_target' ||
129+
(
130+
github.event_name == 'pull_request' &&
131+
contains('OWNER,MEMBER,COLLABORATOR', github.event.pull_request.author_association)
132+
)
117133
steps:
118134
- run: echo "${{ needs.pytester.result }}"
119135
- name: failing...

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
<pre>
99
✅ From scratch implementations ✅ No abstractions ✅ Beginner friendly
1010
✅ Flash attention ✅ FSDP ✅ LoRA, QLoRA, Adapter
11-
✅ Reduce GPU memory (fp4/8/16/32) ✅ 1-1000+ GPUs/TPUs ✅ 20+ LLMs
11+
✅ Reduce GPU memory (fp4/8/16/32) ✅ 1-1000+ GPUs/TPUs ✅ 20+ LLMs
1212
</pre>
1313

1414

@@ -53,7 +53,7 @@ Every LLM is implemented from scratch with **no abstractions** and **full contro
5353
# Quick start
5454
Install LitGPT
5555
```
56-
pip install 'litgpt[all]'
56+
pip install 'litgpt[extra]'
5757
```
5858

5959
Load and use any of the [20+ LLMs](#choose-from-20-llms):
@@ -139,13 +139,17 @@ Every model is written from scratch to maximize performance and remove layers of
139139
| Phi 3 | 3.8B | Microsoft Research | [Abdin et al. 2024](https://arxiv.org/abs/2404.14219) |
140140
| Phi 4 | 14B | Microsoft Research | [Abdin et al. 2024](https://arxiv.org/abs/2412.08905) |
141141
| Phi 4 Mini Instruct | 3.8B | Microsoft Research | [Microsoft 2025](https://arxiv.org/abs/2503.01743) |
142+
| Phi 4 Mini Reasoning | 3.8B | Microsoft Research | [Xu, Peng et al. 2025](https://arxiv.org/abs/2504.21233) |
143+
| Phi 4 Reasoning | 3.8B | Microsoft Research | [Abdin et al. 2025](https://arxiv.org/abs/2504.21318) |
144+
| Phi 4 Reasoning Plus | 3.8B | Microsoft Research | [Abdin et al. 2025](https://arxiv.org/abs/2504.21318) |
142145
| Platypus | 7B, 13B, 70B | Lee et al. | [Lee, Hunter, and Ruiz 2023](https://arxiv.org/abs/2308.07317) |
143146
| Pythia | {14,31,70,160,410}M, {1,1.4,2.8,6.9,12}B | EleutherAI | [Biderman et al. 2023](https://arxiv.org/abs/2304.01373) |
144147
| Qwen2.5 | 0.5B, 1.5B, 3B, 7B, 14B, 32B, 72B | Alibaba Group | [Qwen Team 2024](https://qwenlm.github.io/blog/qwen2.5/) |
145148
| Qwen2.5 Coder | 0.5B, 1.5B, 3B, 7B, 14B, 32B | Alibaba Group | [Hui, Binyuan et al. 2024](https://arxiv.org/abs/2409.12186) |
146149
| Qwen2.5 Math | 1.5B, 7B, 72B | Alibaba Group | [An, Yang et al. 2024](https://arxiv.org/abs/2409.12122) |
147150
| QwQ | 32B | Alibaba Group | [Qwen Team 2025](https://qwenlm.github.io/blog/qwq-32b/) |
148151
| QwQ-Preview | 32B | Alibaba Group | [Qwen Team 2024](https://qwenlm.github.io/blog/qwq-32b-preview/) |
152+
| Qwen3 | 0.6B, 1.7B, 4B, 8B, 14B, 32B | Alibaba Group | [Qwen Team 2025](https://arxiv.org/abs/2505.09388/) |
149153
| R1 Distill Llama | 8B, 70B | DeepSeek AI | [DeepSeek AI 2025](https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf) |
150154
| SmolLM2 | 135M, 360M, 1.7B | Hugging Face | [Hugging Face 2024](https://github.com/huggingface/smollm) |
151155
| Salamandra | 2B, 7B | Barcelona Supercomputing Centre | [BSC-LTC 2024](https://github.com/BSC-LTC/salamandra) |

extensions/thunder/pretrain.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pprint
66
import sys
77
import time
8+
from dataclasses import asdict
89
from datetime import timedelta
910
from functools import partial
1011
from pathlib import Path
@@ -20,7 +21,7 @@
2021
from typing_extensions import Literal
2122

2223
from litgpt import Tokenizer
23-
from litgpt.args import EvalArgs, TrainArgs
24+
from litgpt.args import EvalArgs, LogArgs, TrainArgs
2425
from litgpt.data import DataModule, TinyLlama
2526
from litgpt.model import GPT, Block, CausalSelfAttention, Config, LLaMAMLP
2627
from litgpt.utils import (
@@ -70,6 +71,7 @@ def setup(
7071
tie_embeddings=False,
7172
),
7273
eval: EvalArgs = EvalArgs(interval=1000, max_iters=100),
74+
log: LogArgs = LogArgs(),
7375
optimizer: Union[str, Dict] = "AdamW",
7476
devices: Union[int, str] = "auto",
7577
num_nodes: int = 1,
@@ -121,7 +123,12 @@ def setup(
121123
tokenizer = Tokenizer(tokenizer_dir) if tokenizer_dir is not None else None
122124

123125
logger = choose_logger(
124-
logger_name, out_dir, name=f"pretrain-{config.name}", resume=bool(resume), log_interval=train.log_interval
126+
logger_name,
127+
out_dir,
128+
name=f"pretrain-{config.name}",
129+
resume=bool(resume),
130+
log_interval=train.log_interval,
131+
log_args=asdict(log),
125132
)
126133

127134
if devices * num_nodes > 1:

extensions/xla/generate/adapter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def setup(
2626
prompt: str = "What food do llamas eat?",
2727
*,
2828
input: str = "",
29+
sys_prompt: Optional[str] = None,
2930
adapter_path: Path = Path("out/adapter/alpaca/lit_model_adapter_finetuned.pth"),
3031
checkpoint_dir: Path = Path("checkpoints/tiiuae/falcon-7b"),
3132
max_new_tokens: int = 100,
@@ -40,6 +41,7 @@ def setup(
4041
Args:
4142
prompt: The prompt/instruction (Alpaca style).
4243
input: Optional input (Alpaca style).
44+
sys_prompt: Optional system prompt.
4345
adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
4446
`xla/finetune/adapter.py`.
4547
checkpoint_dir: The path to the checkpoint folder with pretrained model weights.
@@ -52,13 +54,14 @@ def setup(
5254
devices = XLAAccelerator.auto_device_count()
5355
strategy = XLAFSDPStrategy(auto_wrap_policy={Block}) if devices > 1 else "auto"
5456
fabric = L.Fabric(devices=devices, precision=precision, strategy=strategy)
55-
fabric.launch(main, prompt, input, adapter_path, checkpoint_dir, max_new_tokens, top_k, temperature)
57+
fabric.launch(main, prompt, input, sys_prompt, adapter_path, checkpoint_dir, max_new_tokens, top_k, temperature)
5658

5759

5860
def main(
5961
fabric: L.Fabric,
6062
prompt: str,
6163
input: str,
64+
sys_prompt: Optional[str],
6265
adapter_path: Path,
6366
checkpoint_dir: Path,
6467
max_new_tokens: int,
@@ -90,7 +93,7 @@ def main(
9093
tokenizer = Tokenizer(checkpoint_dir)
9194
# TODO: Load prompt style from checkpoint and apply it here
9295
prompt_style = Alpaca()
93-
prompt = prompt_style.apply(prompt, input=input)
96+
prompt = prompt_style.apply(prompt, sys_prompt=sys_prompt, input=input)
9497
encoded = tokenizer.encode(prompt, device=fabric.device)
9598
prompt_length = encoded.size(0)
9699
max_returned_tokens = prompt_length + max_new_tokens

0 commit comments

Comments
 (0)