Skip to content

Commit 194b47f

Browse files
author
morelos
committed
Update base for Update on "[ET-VK][Ops] affine quantization operators registration"
# Context In order to enable dynamic quantization, especially for the source transform method using `Int8DynActInt4WeightQuantizer` we need to have vulkan versions for `quantize_affine`, `dequantize_affine`, and `choose_qparams_affine`. Currently we do not have a shader that performs block-based quantization as expected from these shaders, so we delegate to the per_tensor variant just to get unblocked. At a later stage, this will likely be developed more on in order to ensure we don't get too much accuracy loss. # Changes This creates a schema reference in the TorchAO library for out variants of these respective operators. Then there is a VK_REGISTER_OP done on them to ensure that we can properly register them when lowering the ET model with vulkan. Also the vulkan_quantizer is changed a bit in this to enable a dynamic quantization config so that we aren't purely working with just static quantization anymore. Furthermore, we have `_annotate_for_static_quantization_config` for parity/legacy reasons, and we simply create an equivalent dynamic quantization config method. We also changed `Linear.cpp`, particularly to allow a passthrough for weight_data since during dynamic quantization it's possible that it'll be a tensor_data than tensor_ref. Differential Revision: [D78035354](https://our.internmc.facebook.com/intern/diff/D78035354/) [ghstack-poisoned]
2 parents 37ab037 + 1540659 commit 194b47f

File tree

69 files changed

+4514
-171
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+4514
-171
lines changed

.ci/scripts/setup-emscripten.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
set -ex
3+
4+
install_emscripten() {
5+
git clone https://github.com/emscripten-core/emsdk.git
6+
pushd emsdk || return
7+
./emsdk install 4.0.10
8+
./emsdk activate 4.0.10
9+
source ./emsdk_env.sh
10+
popd || return
11+
}
12+
13+
install_emscripten

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ cmake -DPYTHON_EXECUTABLE=python \
3535
-DEXECUTORCH_BUILD_XNNPACK=OFF \
3636
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
3737
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
38-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
38+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
3939
-Bcmake-out .
4040
cmake --build cmake-out -j16 --target install --config Release
4141

4242
# Install llama runner with torchao
4343
cmake -DPYTHON_EXECUTABLE=python \
4444
-DBUILD_TESTING=OFF \
4545
-DCMAKE_BUILD_TYPE=Release \
46-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
46+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
4747
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
4848
-DEXECUTORCH_BUILD_XNNPACK=OFF \
4949
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.ci/scripts/test_llava.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS=" \
3939
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
4141
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
42-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
42+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
4343
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
4444
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
4545
-DEXECUTORCH_BUILD_XNNPACK=ON \
@@ -69,7 +69,7 @@ LLAVA_COMMON_CMAKE_ARGS=" \
6969
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
7070
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
7171
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
72-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
72+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
7373
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7474
-DEXECUTORCH_BUILD_XNNPACK=ON"
7575

.ci/scripts/test_phi_3_mini.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ cmake_install_executorch_libraries() {
3333
-DEXECUTORCH_BUILD_XNNPACK=ON \
3434
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
3535
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
36-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
36+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
3737
-B${BUILD_DIR} .
3838

3939
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
@@ -43,7 +43,7 @@ cmake_build_phi_3_mini() {
4343
cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
4444
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
4545
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
46-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
46+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
4747
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
4848
-DEXECUTORCH_BUILD_XNNPACK=ON \
4949
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \

.github/workflows/apple.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ jobs:
152152
"backend_coreml"
153153
"backend_mps"
154154
"backend_xnnpack"
155-
"kernels_custom"
155+
"kernels_llm"
156156
"kernels_optimized"
157157
"kernels_quantized"
158158
"threadpool"

.github/workflows/pull.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,3 +734,31 @@ jobs:
734734
735735
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-openvino.sh
736736
PYTHON_EXECUTABLE=python bash .ci/scripts/test_openvino.sh
737+
738+
test-build-wasm-linux:
739+
name: test-build-wasm-linux
740+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
741+
permissions:
742+
id-token: write
743+
contents: read
744+
strategy:
745+
fail-fast: false
746+
with:
747+
runner: linux.2xlarge
748+
docker-image: executorch-ubuntu-22.04-clang12
749+
submodules: 'recursive'
750+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
751+
timeout: 90
752+
script: |
753+
# The generic Linux job chooses to use base env, not the one setup by the image
754+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
755+
conda activate "${CONDA_ENV}"
756+
757+
BUILD_TOOL="cmake"
758+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
759+
760+
# Install Node.js and Emscripten
761+
source .ci/scripts/setup-emscripten.sh
762+
763+
# Test selective build
764+
PYTHON_EXECUTABLE=python bash examples/wasm/test_build_wasm.sh

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ jobs:
645645
-DEXECUTORCH_BUILD_XNNPACK=ON \
646646
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
647647
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
648-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
648+
-DEXECUTORCH_BUILD_KERNELS_LLM=ON \
649649
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
650650
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
651651
-Bcmake-out .

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
url = https://github.com/pytorch/cpuinfo.git
2828
[submodule "backends/xnnpack/third-party/pthreadpool"]
2929
path = backends/xnnpack/third-party/pthreadpool
30-
url = https://github.com/Maratyszcza/pthreadpool.git
30+
url = https://github.com/google/pthreadpool.git
3131
[submodule "extension/llm/tokenizers"]
3232
path = extension/llm/tokenizers
3333
url = https://github.com/pytorch-labs/tokenizers.git

0 commit comments

Comments
 (0)