update scripts to support deepseekr1 (#3495)

blzheng · web-flow · commit 1ebda4e1841f · 2025-02-07T14:44:41.000+08:00
diff --git a/examples/cpu/llm/inference/distributed/run_accuracy_with_deepspeed.py b/examples/cpu/llm/inference/distributed/run_accuracy_with_deepspeed.py
@@ -402,7 +402,11 @@ def get_repo_root(model_name_or_path):
         def get_checkpoint_files(model_name_or_path):
             cached_repo_dir = get_repo_root(model_name_or_path)
             glob_pattern = "*.[bp][it][n]"
-            if re.search("deepseek-v2", model_name_or_path, re.IGNORECASE):
+            if (
+                re.search("deepseek-v2", model_name_or_path, re.IGNORECASE)
+                or re.search("deepseek-v3", model_name_or_path, re.IGNORECASE)
+                or re.search("deepseek-r1", model_name_or_path, re.IGNORECASE)
+            ):
                 glob_pattern = "*.[sbp][ait][fn][e][t][e][n][s][o][r][s]"
             # extensions: .bin | .pt
             # creates a list of paths from all downloaded files in cache dir
diff --git a/examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py b/examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py
@@ -294,8 +294,10 @@ def get_repo_root(model_name_or_path):
 def get_checkpoint_files(model_name_or_path):
     cached_repo_dir = get_repo_root(model_name_or_path)
     glob_pattern = "*.[bp][it][n]"
-    if re.search("deepseek-v2", model_name_or_path, re.IGNORECASE) or re.search(
-        "deepseek-v3", model_name_or_path, re.IGNORECASE
+    if (
+        re.search("deepseek-v2", model_name_or_path, re.IGNORECASE)
+        or re.search("deepseek-v3", model_name_or_path, re.IGNORECASE)
+        or re.search("deepseek-r1", model_name_or_path, re.IGNORECASE)
     ):
         glob_pattern = "*.[sbp][ait][fn][e][t][e][n][s][o][r][s]"
     # extensions: .bin | .pt
@@ -328,7 +330,7 @@ def get_checkpoint_files(model_name_or_path):
 model_type = next((x for x in MODEL_CLASSES.keys() if x in model_name.lower()), "auto")
 if model_type == "llama" and args.vision_text_model:
     model_type = "mllama"
-if model_type in ["maira-2", "deepseek-v2", "deepseek-v3"]:
+if model_type in ["maira-2", "deepseek-v2", "deepseek-v3", "deepseek-r1"]:
     model_type = model_type.replace("-", "")
 model_class = MODEL_CLASSES[model_type]
 tokenizer = model_class[1].from_pretrained(model_name, trust_remote_code=True)
diff --git a/examples/cpu/llm/inference/run.py b/examples/cpu/llm/inference/run.py
@@ -595,6 +595,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
                 "jamba": ("/jamba_local_shard"),
                 "deepseek-v2": ("/deepseekv2_local_shard"),
                 "deepseek-v3": ("/deepseekv3_local_shard"),
+                "deepseek-r1": ("/deepseekr1_local_shard"),
             }
             model_type = next(
                 (
diff --git a/examples/cpu/llm/inference/single_instance/run_generation.py b/examples/cpu/llm/inference/single_instance/run_generation.py
@@ -140,7 +140,7 @@
 )
 if model_type == "llama" and args.vision_text_model:
     model_type = "mllama"
-if model_type in ["maira-2", "deepseek-v2", "deepseek-v3"]:
+if model_type in ["maira-2", "deepseek-v2", "deepseek-v3", "deepseek-r1"]:
     model_type = model_type.replace("-", "")
 model_class = MODEL_CLASSES[model_type]
 if args.config_file is None:
diff --git a/examples/cpu/llm/inference/single_instance/run_quantization.py b/examples/cpu/llm/inference/single_instance/run_quantization.py
@@ -441,6 +441,8 @@ def download_and_open(url: str) -> Image.Image:
     model = DeepseekV2Config(args.model_id)
 elif re.search("deepseekv3", config.architectures[0], re.IGNORECASE):
     model = DeepseekV3Config(args.model_id)
+    if "deepseek-r1" in args.model_id.lower() or "deepseekr1" in args.model_id.lower():
+        model.name = "deepseekr1"
 else:
     raise AssertionError("Not support %s." % (args.model_id))
 
diff --git a/examples/cpu/llm/inference/utils/create_shard_model.py b/examples/cpu/llm/inference/utils/create_shard_model.py
@@ -53,7 +53,7 @@
     )
     if model_type == "llama" and args.vision_text_model:
         model_type = "mllama"
-    if model_type in ["maira-2", "deepseek-v2", "deepseek-v3"]:
+    if model_type in ["maira-2", "deepseek-v2", "deepseek-v3", "deepseek-r1"]:
         model_type = model_type.replace("-", "")
     model_class = MODEL_CLASSES[model_type]
     load_dtype = torch.float32
@@ -83,7 +83,7 @@
     tokenizer.save_pretrained(save_directory=args.save_path)
     if model_type == "llava":
         image_processor.save_pretrained(save_directory=args.save_path)
-    if model_type in ["maira2", "deepseekv2", "deepseekv3"]:
+    if model_type in ["maira2", "deepseekv2", "deepseekv3", "deepseekr1"]:
         import inspect
         import shutil
 
diff --git a/examples/cpu/llm/inference/utils/supported_models.py b/examples/cpu/llm/inference/utils/supported_models.py
@@ -37,8 +37,10 @@
     "jamba": (AutoModelForCausalLM, AutoTokenizer),
     "deepseek-v2": (AutoModelForCausalLM, AutoTokenizer),
     "deepseek-v3": (AutoModelForCausalLM, AutoTokenizer),
+    "deepseek-r1": (AutoModelForCausalLM, AutoTokenizer),
     "deepseekv2": (AutoModelForCausalLM, AutoTokenizer),
     "deepseekv3": (AutoModelForCausalLM, AutoTokenizer),
+    "deepseekr1": (AutoModelForCausalLM, AutoTokenizer),
     "auto": (AutoModelForCausalLM, AutoTokenizer),
 }
 

Original file line number	Diff line number	Diff line change
`@@ -595,6 +595,7 @@ def main(args_in: Optional[List[str]] = None) -> None:`
`595`	`595`	`"jamba": ("/jamba_local_shard"),`
`596`	`596`	`"deepseek-v2": ("/deepseekv2_local_shard"),`
`597`	`597`	`"deepseek-v3": ("/deepseekv3_local_shard"),`
	`598`	`+ "deepseek-r1": ("/deepseekr1_local_shard"),`
`598`	`599`	`}`
`599`	`600`	`model_type = next(`
`600`	`601`	`(`
Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@`
`140`	`140`	`)`
`141`	`141`	`if model_type == "llama" and args.vision_text_model:`
`142`	`142`	`model_type = "mllama"`
`143`		`-if model_type in ["maira-2", "deepseek-v2", "deepseek-v3"]:`
	`143`	`+if model_type in ["maira-2", "deepseek-v2", "deepseek-v3", "deepseek-r1"]:`
`144`	`144`	`model_type = model_type.replace("-", "")`
`145`	`145`	`model_class = MODEL_CLASSES[model_type]`
`146`	`146`	`if args.config_file is None:`