# Setup environment for Sketch-Mode Code Start and SketchJudge Reward Model SFT
conda create -n LLaMA-Factory python==3.10
conda activate LLaMA-Factory
cd LLaMA-Factory
pip install -e ".[torch,metrics]" --no-build-isolation
# Setup environment for Sketch-Thinking Reinforcement Learning
conda create -n EasyR1 python=3.10
conda activate EasyR1
cd EasyR1
pip install -e .
- Download LLaVA-CoT-10k-image.zip, Vision-R1-10k-image.zip, llava_cot_10k.json, vision_r1_10k.json
- Put them under LLaMA-Factory/data
- Unzip LLaVA-CoT-10k-image.zip to LLaMA-Factory/data/LLaVA-CoT-10k-image and unzip Vision-R1-10k-image.zip to LLaMA-Factory/data/Vision-R1-10k-image
Add the following data config to LLaMA-Factory/data/dataset_info.json:
"llava_cot_10k": {
"file_name": "llava_cot_10k.json",
"formatting": "sharegpt",
"columns": {
"messages": "messages",
"images": "images"
},
"tags": {
"role_tag": "role",
"content_tag": "content",
"user_tag": "user",
"assistant_tag": "assistant"
}
},
"vision_r1_10k": {
"file_name": "vision_r1_10k.json",
"formatting": "sharegpt",
"columns": {
"messages": "messages",
"images": "images"
},
"tags": {
"role_tag": "role",
"content_tag": "content",
"user_tag": "user",
"assistant_tag": "assistant"
}
}
# Conduct Sketch-Mode Cold Start on Qwen2.5-VL-7B-Instruct
llamafactory-cli train examples/train_lora/cold_start_20k_7b.yaml
llamafactory-cli export examples/merge_lora/cold_start_20k_7b.yaml
# Conduct Sketch-Mode Cold Start on Qwen2.5-VL-3B-Instruct
llamafactory-cli train examples/train_lora/cold_start_20k_3b.yaml
llamafactory-cli export examples/merge_lora/cold_start_20k_3b.yaml
- Download sketch_judge.json
- Put it under LLaMA-Factory/data
Add the following data config to LLaMA-Factory/data/dataset_info.json:
"sketch_judge": {
"file_name": "sketch_judge.json"
}
llamafactory-cli train examples/train_lora/sketch_judge.yaml
llamafactory-cli export examples/merge_lora/sketch_judge.yaml
bash sketch_judge_server.sh
You should change the /path/to/sketch_judge_checkpoint in this file based on the model path
# Training scripts for SketchThinker-R1-7B and SketchThinker-R1-3B
bash examples/sketch_thinker_r1_7b.sh
bash examples/sketch_thinker_r1_3b.sh
# Training scripts for baselines
bash examples/vanilla_r1_7b.sh
bash examples/vanilla_r1_3b.sh
Note that you should change the /path/to/sketch_judge_checkpoint in EasyR1/examples/reward_function/r1v.py based on the model path
We also provide the generation code for our cold start data and SketchJudge fine-tuning data.
cd data_generation
export OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxx
# Generation process
python cold_start_llava_cot.py --dataset Xkev/LLaVA-CoT-100k --split train --model gpt-5
python cold_start_vision_r1.py --dataset Osilly/Vision-R1-cold --split train --model gpt-5
# Convert to LLaMA-Factory training data format
python convert_llava_cot.py --dataset_name Xkev/LLaVA-CoT-100k --sketch_reasoning_file outputs/llava_cot/merged.jsonl --output_file outputs/llava_cot/llava_cot_10k.jsonl
python convert_vision_r1.py --dataset_name Osilly/Vision-R1-cold --sketch_reasoning_file outputs/vision_r1/merged.jsonl --output_file outputs/vision_r1/vision_r1_10k.jsonl
Before running cold_start_vision_r1.py, the json file vision_r1_llava_cot_full.json should be downloaded and put under data_generation/data
python convert_sketch_judge.py
We release our SketchThinker-R1-7B, SketchThinker-R1-3B, and SketchJudge checkpoints on huggingface.
| Model | Huggingface Path |
|---|---|
| SketchThinker-R1-7B | Ruiyang-061X/SketchThinker-R1-7B |
| SketchThinker-R1-3B | Ruiyang-061X/SketchThinker-R1-3B |
| SketchJudge | Ruiyang-061X/Qwen2.5-7B-Instruct_sketch_judge |
@article{zhang2025sketch,
title={SketchThinker-R1: Towards Efficient Sketch-Style Reasoning in Large Multimodal Models},
author={Zhang, Ruiyang and Zhou, Dongzhan and Zheng, Zhedong},
journal={arXiv},
year={2025}
}