Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions examples/aishell2/asr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
## Tutorial

First, prepare the train data `data/train.jsonl`, the data is like:

```
{"key": "IC0001W0001", "wav": "AISHELL-2/iOS/data/wav/C0001/IC0001W0001.wav", "txt": "厨房用具"}
{"key": "IC0001W0002", "wav": "AISHELL-2/iOS/data/wav/C0001/IC0001W0002.wav", "txt": "电压力锅"}
```
where `wav` is the wav path, `txt` is the transcript.

To train the model, just run

``` shell
bash run.sh --stage train
```

To decode, just prepare the test data `data/test.jsonl` the same as train. then just run

``` shell
bash run.sh --stage decode
```

## Results

| LLM | Speech Encoder | LoRA | test CER | Details |
|------------|----------------|------|----------|---------------------------------------|
| Qwen3-1.7B | firered | No | 5.41 | 4 A800 GPUs, pack 18000, 10000 steps |
3 changes: 3 additions & 0 deletions examples/aishell2/asr/conf/accelerator_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"dispatch_batches": false
}
54 changes: 54 additions & 0 deletions examples/aishell2/asr/conf/ds_config_zero2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 16,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": "auto",
"eps": "auto",
"weight_decay": "auto"
}
},

"scheduler": {
"type": "WarmupLR",
"params": {
"warmup_min_lr": "auto",
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
}
},

"zero_optimization": {
"stage": 2,
"offload_optimizer": {
"device": "none",
"pin_memory": true
},
"offload_param": {
"device": "none",
"pin_memory": true
},
"overlap_comm": true,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": "auto"
},

"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 100,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
9 changes: 9 additions & 0 deletions examples/aishell2/asr/conf/generation_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"do_sample": false,
"eos_token_id": [
151645,
151643
],
"max_new_tokens": 50,
"transformers_version": "4.37.0"
}
17 changes: 17 additions & 0 deletions examples/aishell2/asr/conf/qwen3-1.7b_firered.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"architectures": [
"TouchASU"
],
"encoder_ds_rate": 4,
"encoder_projector_ds_rate": 2,
"hidden_size": 2048,
"llm_model_name_or_path": "Qwen/Qwen3-1.7B",
"lora_config": null,
"max_speech_frames": 2000,
"min_speech_frames": 20,
"model_type": "touch_asu",
"projector_hidden_size": 2048,
"torch_dtype": "bfloat16",
"transformers_version": "4.52.3",
"wenet_model_name_or_path": "firered"
}
63 changes: 63 additions & 0 deletions examples/aishell2/asr/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2025 Binbin Zhang([email protected])

[ ! -s west ] && ln -s ../../../west
[ ! -s tools ] && ln -s ../../../tools
export PYTHONPATH=$PYTHONPATH:$PWD
# Change this to all your available gpus, such as "0,1,2,3"
export CUDA_VISIBLE_DEVICES="0,1,2,3"
num_gpus=$(echo $CUDA_VISIBLE_DEVICES | awk -F ',' '{print NF}')

stage=train # data/train/decode
data=data
dir=exp/Qwen3-1.7B-firered
steps=5000 # training steps

model_conf=conf/qwen3-1.7b_firered.json
decode_conf=conf/generation_config.json

if [ $stage == "data" ] || [ $stage == "all" ]; then
echo "Prepare required data"
fi


if [ $stage == "train" ] || [ $stage == "all" ]; then
torchrun --standalone --nnodes=1 --nproc_per_node=$num_gpus west/bin/train.py \
--model_config_or_dir $model_conf \
--data_path $data/train.jsonl \
--output_dir $dir \
--pack_size 8192 \
--bf16 True \
--max_steps $steps \
--num_data_cycles 1000 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 4 \
--save_strategy "steps" \
--save_steps 100 \
--save_total_limit 100 \
--learning_rate 3e-4 \
--weight_decay 0.01 \
--adam_beta2 0.95 \
--warmup_ratio 0.5 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--report_to "tensorboard" \
--gradient_checkpointing \
--dataloader_num_workers 2 \
--dataloader_prefetch_factor 10 \
--save_total_limit 10000 \
--deepspeed conf/ds_config_zero2.json \
--accelerator_config conf/accelerator_config.json
fi


if [ $stage == "decode" ] || [ $stage == "all" ]; then
mdir=$dir/checkpoint-${steps}
cp $decode_conf $mdir
python west/bin/decode.py \
--data_path $data/test.jsonl \
--model_dir $mdir \
--result_path $mdir/result.jsonl
python tools/compute_wer.py --char=1 --v=1 \
$data/test.jsonl $mdir/result.jsonl > $mdir/result.wer
fi