Skip to content

Commit 4ea2542

Browse files
authored
fix: resolve failing CI (#1944)
1 parent 3b7f5bb commit 4ea2542

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+332
-241
lines changed

.github/azure-gpu-test-with-thunder.yml renamed to .azure/gpu-test-with-thunder.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,21 @@ jobs:
4444
4545
- script: |
4646
pip install --upgrade pip
47-
pip install '.[all,test]'
47+
pip install '.[extra,all,test]'
4848
displayName: 'Install dependencies'
4949
5050
- script: |
5151
pip uninstall -y torchvision torchaudio
5252
pip install --pre 'nvfuser-cu121[torch]' --extra-index-url https://pypi.nvidia.com
53-
displayName: 'Install PyTorch nightly'
53+
displayName: 'Install nvFuser'
5454
5555
- bash: |
5656
set -e
5757
pip list
5858
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
5959
displayName: "Env details"
6060
61-
- bash: pytest -v --disable-pytest-warnings --strict-markers --color=yes
61+
- bash: pytest -v
6262
displayName: 'Ordinary tests'
6363
env:
6464
PL_RUN_CUDA_TESTS: "1"
File renamed without changes.

.github/workflows/cpu-tests.yml

Lines changed: 53 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,47 @@ env:
1818
HF_TOKEN: ${{ secrets.HF_TOKEN }}
1919

2020
jobs:
21-
cpu-tests:
21+
testing-imports:
2222
runs-on: ${{ matrix.os }}
2323
strategy:
2424
fail-fast: false
2525
matrix:
26+
os: [ "ubuntu-22.04", "macOS-14", "windows-2022" ]
27+
python-version: [ "3.10" ]
28+
timeout-minutes: 10
29+
30+
steps:
31+
- uses: actions/checkout@v4
32+
- uses: actions/setup-python@v5
33+
with:
34+
python-version: ${{ matrix.python-version }}
35+
36+
- name: Install minimal dependencies
37+
run: |
38+
pip install .
39+
pip list
40+
41+
- name: Testing package imports
42+
# make sure all modules are still importable with only the minimal dependencies available
43+
run: |
44+
modules=$(
45+
find litgpt -type f -name "*.py" | \
46+
sed 's/\.py$//' | sed 's/\//./g' | \
47+
sed 's/.__init__//g' | xargs -I {} echo "import {};"
48+
)
49+
echo "$modules"
50+
python -c "$modules"
51+
52+
pytester:
53+
runs-on: ${{ matrix.os }}
54+
strategy:
55+
fail-fast: false
56+
matrix:
57+
os: ["ubuntu-22.04"]
58+
python-version: ["3.9", "3.10", "3.11"]
2659
include:
27-
- {os: "macOS-14", python-version: "3.10"}
28-
- {os: "ubuntu-22.04", python-version: "3.11"}
29-
- {os: "ubuntu-22.04", python-version: "3.10"}
30-
- {os: "ubuntu-22.04", python-version: "3.9"}
31-
- {os: "windows-2022", python-version: "3.9"}
60+
- {os: "macOS-14", python-version: "3.9"} # without Thunder
61+
- {os: "windows-2022", python-version: "3.9"} # without Thunder
3262
timeout-minutes: 25
3363

3464
steps:
@@ -42,25 +72,24 @@ jobs:
4272
cache-dependency-path: |
4373
pyproject.toml
4474
45-
- name: Install minimal dependencies
46-
run: |
47-
# python -m pip install --upgrade pip
48-
pip install .
49-
pip list
50-
# make sure all modules are still importable with only the minimal dependencies available
51-
modules=$(
52-
find litgpt -type f -name "*.py" | \
53-
sed 's/\.py$//' | sed 's/\//./g' | \
54-
sed 's/.__init__//g' | xargs -I {} echo "import {};"
55-
)
56-
echo "$modules"
57-
python -c "$modules"
58-
59-
- name: Install all dependencies
75+
- name: Install dependencies
6076
run: |
61-
pip install '.[all,test]'
77+
pip install '.[extra,all,test]'
6278
pip list
6379
6480
- name: Run tests
65-
run: |
66-
pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 120
81+
run: pytest -v litgpt/ tests/ --timeout 120
82+
83+
testing-guardian:
84+
runs-on: ubuntu-latest
85+
needs: [pytester, testing-imports]
86+
if: always()
87+
steps:
88+
- run: echo "${{ needs.pytester.result }}"
89+
- name: failing...
90+
if: needs.pytester.result == 'failure'
91+
run: exit 1
92+
- name: cancelled or skipped...
93+
if: contains(fromJSON('["cancelled", "skipped"]'), needs.pytester.result)
94+
timeout-minutes: 1
95+
run: sleep 90

extensions/thunder/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ After applying the DDP transformation, the backward trace will include the expec
460460
With `L.Fabric`, this is how to use them:
461461

462462
```python
463-
from extensions.thunder.strategies import ThunderFSDPStrategy, ThunderDDPStrategy
463+
from extensions.extensions.thunder.strategies import ThunderFSDPStrategy, ThunderDDPStrategy
464464

465465
# fully-sharded data parallel
466466
strategy = ThunderFSDPStrategy(

extensions/thunder/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import sys
2+
from pathlib import Path
3+
4+
# support running without installing as a package, adding extensions to the Pyton path
5+
wd = Path(__file__).parent.parent.resolve()
6+
sys.path.append(str(wd))

extensions/thunder/strategies/thunder_ddp.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,17 @@
2222
_sync_ddp_if_available,
2323
)
2424
from lightning.fabric.utilities.rank_zero import rank_zero_only
25-
from lightning_utilities.core.imports import RequirementCache
2625
from lightning_utilities.core.rank_zero import rank_zero_only as utils_rank_zero_only
2726
from torch import Tensor
2827
from torch.nn import Module
2928
from typing_extensions import override
3029

30+
from litgpt.utils import _THUNDER_AVAILABLE
31+
3132
if TYPE_CHECKING:
3233
from thunder import Executor
3334

3435

35-
_THUNDER_AVAILABLE = RequirementCache("lightning-thunder", "thunder")
36-
3736

3837
class ThunderDDPStrategy(ParallelStrategy):
3938
def __init__(

extensions/thunder/strategies/thunder_fsdp.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
from lightning.fabric.utilities.rank_zero import rank_zero_only
2626
from lightning.fabric.utilities.seed import reset_seed
2727
from lightning.fabric.utilities.types import _PATH, _Stateful
28-
from lightning_utilities.core.imports import RequirementCache
2928
from lightning_utilities.core.rank_zero import rank_zero_only as utils_rank_zero_only
3029
from torch import Tensor
3130
from torch.nn import Module
3231
from torch.optim import Optimizer
3332
from typing_extensions import override
33+
from litgpt.utils import _THUNDER_AVAILABLE
3434
from extensions.thunder.strategies.thunder_ddp import _ThunderDataParalellBackwardSyncControl
3535

3636
if TYPE_CHECKING:
@@ -42,9 +42,6 @@
4242
_BUCKETING_STRATEGY = Union[FSDPBucketingStrategy, Literal["NONE", "LAYER", "BLOCK"]]
4343

4444

45-
_THUNDER_AVAILABLE = RequirementCache("lightning-thunder", "thunder")
46-
47-
4845
class ThunderFSDPStrategy(ParallelStrategy, _Sharded):
4946
def __init__(
5047
self,

extensions/thunder/unsloth/__init__.py

Whitespace-only changes.

extensions/thunder/unsloth/executor.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
22
import sys
3+
import torch
34
from pathlib import Path
45
from typing import Optional, Tuple
5-
6-
import thunder
7-
import thunder.torch as ltorch
8-
import torch
96
from thunder.core.proxies import TensorProxy
107
from thunder.core.transforms import get_grad, mean_backward, put_grads
118
from thunder.extend import OperatorExecutor, register_executor
129
from thunder.torch import ne, sum, true_divide
1310
from torch import Tensor
1411

1512
import litgpt.model
13+
from litgpt.utils import _THUNDER_AVAILABLE
14+
15+
if _THUNDER_AVAILABLE:
16+
import thunder
17+
import thunder.torch as ltorch
1618

1719
sys.path.append(str(Path(__file__).parent))
1820

extensions/thunder/unsloth/kernels/cross_entropy_loss.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@
1313
# limitations under the License.
1414

1515
import torch
16-
import triton
17-
import triton.language as tl
1816

17+
from litgpt.utils import _TRITON_AVAILABLE
1918
from .utils import MAX_FUSED_SIZE, calculate_settings
2019

20+
if _TRITON_AVAILABLE:
21+
import triton
22+
import triton.language as tl
23+
2124

2225
@triton.jit
2326
def _cross_entropy_forward(

extensions/thunder/unsloth/kernels/rope_embedding.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import triton
16-
import triton.language as tl
17-
import torch
15+
from litgpt.utils import _TRITON_AVAILABLE
1816
from .utils import calculate_settings
1917

18+
if _TRITON_AVAILABLE:
19+
import triton
20+
import triton.language as tl
21+
2022
ROPE_GROUP_SIZE = 4
2123

2224
@triton.heuristics({"BACKWARD_PASS": lambda args: args["BACKWARD_PASS"],})

extensions/thunder/unsloth/kernels/swiglu.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,12 @@
1313
# limitations under the License.
1414

1515
import torch
16-
import triton
17-
import triton.language as tl
16+
17+
from litgpt.utils import _TRITON_AVAILABLE
18+
19+
if _TRITON_AVAILABLE:
20+
import triton
21+
import triton.language as tl
1822

1923

2024
@triton.jit

extensions/thunder/unsloth/kernels/utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import triton
15+
16+
from litgpt.utils import _TRITON_AVAILABLE
17+
18+
if _TRITON_AVAILABLE:
19+
import triton
1620

1721
MAX_FUSED_SIZE = 65536 # 2**16
1822
next_power_of_2 = triton.next_power_of_2

extensions/xla/__init__

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import sys
2+
from pathlib import Path
3+
4+
# support running without installing as a package, adding extensions to the Pyton path
5+
wd = Path(__file__).parent.parent.resolve()
6+
sys.path.append(str(wd))

extensions/xla/finetune/__init__

Whitespace-only changes.

extensions/xla/finetune/adapter.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
wd = Path(__file__).parents[3].resolve()
2323
sys.path.append(str(wd))
2424

25-
from extensions.xla.generate.base import generate
26-
from extensions.xla.scripts.prepare_alpaca import generate_prompt
27-
from extensions.xla.utils import rank_print, sequential_load_and_fsdp_wrap
25+
from xla.generate.base import generate
26+
from xla.scripts.prepare_alpaca import generate_prompt
27+
from xla.utils import rank_print, sequential_load_and_fsdp_wrap
2828

2929
eval_interval = 200
3030
save_interval = 200

extensions/xla/generate/__init__

Whitespace-only changes.

extensions/xla/generate/adapter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
wd = Path(__file__).parents[3].resolve()
1919
sys.path.append(str(wd))
2020

21-
from extensions.xla.generate.base import generate
22-
from extensions.xla.utils import rank_print
21+
from xla.generate.base import generate
22+
from xla.utils import rank_print
2323

2424

2525
def setup(

extensions/xla/generate/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
wd = Path(__file__).parents[3].resolve()
2020
sys.path.append(str(wd))
2121

22-
from extensions.xla.utils import rank_print
22+
from xla.utils import rank_print
2323

2424

2525
# xla does not support `inference_mode`: RuntimeError: Cannot set version_counter for inference tensor

extensions/xla/scripts/__init__

Whitespace-only changes.

litgpt/utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import sys
1313
from dataclasses import asdict, is_dataclass
1414
from io import BytesIO
15+
16+
from lightning_utilities.core.imports import package_available
1517
from packaging import version
1618
from pathlib import Path
1719
import subprocess
@@ -35,6 +37,9 @@
3537
if TYPE_CHECKING:
3638
from litgpt import GPT, Config
3739

40+
_THUNDER_AVAILABLE = package_available("thunder")
41+
_TRITON_AVAILABLE = package_available("triton")
42+
3843

3944
def init_out_dir(out_dir: Path) -> Path:
4045
if not isinstance(out_dir, Path):
@@ -815,3 +820,17 @@ def select_sft_generate_example(eval, data):
815820
else:
816821
raise ValueError(f"Unknown evaluation example type: {eval.evaluate_example}")
817822
return instruction
823+
824+
825+
826+
def _RunIf(thunder: bool = False, **kwargs):
827+
import pytest
828+
from lightning.fabric.utilities.testing import _runif_reasons
829+
830+
reasons, marker_kwargs = _runif_reasons(**kwargs)
831+
832+
if thunder and not package_available("thunder"):
833+
# if we require Thunder, but it's not available, we should skip
834+
reasons.append("Thunder")
835+
836+
return pytest.mark.skipif(condition=len(reasons) > 0, reason=f"Requires: [{' + '.join(reasons)}]", **marker_kwargs)

0 commit comments

Comments
 (0)