Skip to content

Commit f5b711f

Browse files
authored
Update default models to be benchmarked continuously (#11610)
### Summary - Promoted Qwen3-0.6B to be the default as it's not only small enough to run quickly but also covers most of advanced changes in both etLLM and [optimum-executorch](https://github.com/huggingface/optimum-executorch). - Removed tinyllama as nobody would care about its perf. We shouldn't use device farm for correctness testing - ~~Added google/gemma-3-1b-it to apple perf (private)~~
1 parent 9f443ad commit f5b711f

File tree

4 files changed

+12
-12
lines changed

4 files changed

+12
-12
lines changed

.github/workflows/android-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ on:
1818
description: Models to be benchmarked
1919
required: false
2020
type: string
21-
default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
21+
default: Qwen/Qwen3-0.6B
2222
devices:
2323
description: Target devices to run benchmark
2424
required: false
@@ -34,7 +34,7 @@ on:
3434
description: Models to be benchmarked
3535
required: false
3636
type: string
37-
default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
37+
default: Qwen/Qwen3-0.6B
3838
devices:
3939
description: Target devices to run benchmark
4040
required: false
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
6161
devices: samsung_galaxy_s22_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/android-perf.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ on:
2222
description: Models to be benchmarked
2323
required: false
2424
type: string
25-
default: llama
25+
default: Qwen/Qwen3-0.6B
2626
devices:
2727
description: Target devices to run benchmark
2828
required: false
@@ -38,7 +38,7 @@ on:
3838
description: Models to be benchmarked
3939
required: false
4040
type: string
41-
default: llama
41+
default: Qwen/Qwen3-0.6B
4242
devices:
4343
description: Target devices to run benchmark
4444
required: false
@@ -72,7 +72,7 @@ jobs:
7272
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
75-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'llama' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
7676
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
7777
run: |
7878
set -eux

.github/workflows/apple-perf-private-device-experiment.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ on:
1818
description: Models to be benchmarked
1919
required: false
2020
type: string
21-
default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
21+
default: Qwen/Qwen3-0.6B
2222
devices:
2323
description: Target devices to run benchmark
2424
required: false
@@ -34,7 +34,7 @@ on:
3434
description: Models to be benchmarked
3535
required: false
3636
type: string
37-
default: Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
37+
default: Qwen/Qwen3-0.6B
3838
devices:
3939
description: Target devices to run benchmark
4040
required: false
@@ -57,6 +57,6 @@ jobs:
5757
id-token: write
5858
contents: read
5959
with:
60-
models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }}
60+
models: ${{ inputs.models || github.event_name == 'schedule' && 'Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
6161
devices: apple_iphone_15_private
6262
benchmark_configs: ${{ inputs.benchmark_configs }}

.github/workflows/apple-perf.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ on:
2222
description: Models to be benchmarked
2323
required: false
2424
type: string
25-
default: llama
25+
default: Qwen/Qwen3-0.6B
2626
devices:
2727
description: Target devices to run benchmark
2828
required: false
@@ -38,7 +38,7 @@ on:
3838
description: Models to be benchmarked
3939
required: false
4040
type: string
41-
default: llama
41+
default: Qwen/Qwen3-0.6B
4242
devices:
4343
description: Target devices to run benchmark
4444
required: false
@@ -72,7 +72,7 @@ jobs:
7272
# Separate default values from the workflow dispatch. To ensure defaults are accessible
7373
# during scheduled runs and to provide flexibility for different defaults between
7474
# on-demand and periodic benchmarking.
75-
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'llama' }}
75+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
7676
CRON_DEFAULT_DEVICES: apple_iphone_15
7777
run: |
7878
set -eux

0 commit comments

Comments
 (0)