Update default models to be benchmarked continuously

guangy10 · guangy10 · commit 52769340230e · 2025-06-12T13:30:12.000-07:00
diff --git a/.github/workflows/android-perf-private-device-experiment.yml b/.github/workflows/android-perf-private-device-experiment.yml
@@ -18,7 +18,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -34,7 +34,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -57,6 +57,6 @@ jobs:
       id-token: write
       contents: read
     with:
-      models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }}
+      models: ${{ inputs.models || github.event_name == 'schedule' && 'google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
       devices: samsung_galaxy_s22_private
       benchmark_configs: ${{ inputs.benchmark_configs }}
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -20,7 +20,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: llama
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -36,7 +36,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: llama
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -70,7 +70,7 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'llama' }}
+          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
           CRON_DEFAULT_DEVICES: samsung_galaxy_s22
         run: |
           set -eux
diff --git a/.github/workflows/apple-perf-private-device-experiment.yml b/.github/workflows/apple-perf-private-device-experiment.yml
@@ -18,7 +18,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -34,7 +34,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -57,6 +57,6 @@ jobs:
       id-token: write
       contents: read
     with:
-      models: ${{ inputs.models || 'Qwen/Qwen3-0.6B' }}
+      models: ${{ inputs.models || github.event_name == 'schedule' && 'google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
       devices: apple_iphone_15_private
       benchmark_configs: ${{ inputs.benchmark_configs }}
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
@@ -20,7 +20,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: llama
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -36,7 +36,7 @@ on:
         description: Models to be benchmarked
         required: false
         type: string
-        default: llama
+        default: Qwen/Qwen3-0.6B
       devices:
         description: Target devices to run benchmark
         required: false
@@ -70,7 +70,7 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'llama,mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'llama' }}
+          CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'mv3,mv2,ic4,ic3,resnet50,edsr,mobilebert,w2l,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8,google/gemma-3-1b-it,Qwen/Qwen3-0.6B,HuggingFaceTB/SmolLM2-135M,meta-llama/Llama-3.2-1B,allenai/OLMo-1B-hf' || 'Qwen/Qwen3-0.6B' }}
           CRON_DEFAULT_DEVICES: apple_iphone_15
         run: |
           set -eux