fix lint

Potabk · Potabk · commit cd37c9552b4d · 2025-06-20T17:03:05.000+08:00
Signed-off-by: wangli &lt;wangli858794774@gmail.com&gt;
diff --git a/docs/source/index.md b/docs/source/index.md
@@ -47,6 +47,7 @@ user_guide/suppoted_features
 user_guide/supported_models
 user_guide/env_vars
 user_guide/additional_config
+user_guide/sleep_mode
 user_guide/graph_mode.md
 user_guide/quantization.md
 user_guide/release_notes
diff --git a/docs/source/user_guide/sleep_mode.md b/docs/source/user_guide/sleep_mode.md
@@ -12,7 +12,7 @@ This module provides a custom memory allocator for Ascend NPUs using the [CANN](
 +-------------------+            +---------------------------+          +----------------------------+
 |    Python Layer   |  ----->    |   CaMemAllocator (class)  |  --->    | C Extension (vllm_ascend_C)|
 +-------------------+            +---------------------------+          +----------------------------+
-    ⬇ Registers                      ⬇ Tracks & Tags                    ⬇ Calls into CANN
+    ⬇ Registers                      ⬇ Tracks & Tags                         ⬇ Calls into CANN
 init_module(malloc, free)         pointer_to_data[ptr] = data         aclrtMallocPhysical, aclrtMapMem, etc.
 ```
 
diff --git a/examples/offline_inference_sleep_mode_npu.py b/examples/offline_inference_sleep_mode_npu.py
@@ -22,7 +22,6 @@
 from vllm import LLM, SamplingParams
 from vllm.utils import GiB_bytes
 
-
 os.environ["VLLM_USE_V1"] = "1"
 os.environ["VLLM_USE_MODELSCOPE"] = "True"
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
@@ -41,7 +40,9 @@
     llm.sleep(level=1)
 
     free_npu_bytes_after_sleep, total = torch.npu.mem_get_info()
-    print(f"Free memory after sleep: {free_npu_bytes_after_sleep / 1024 ** 3:.2f} GiB")
+    print(
+        f"Free memory after sleep: {free_npu_bytes_after_sleep / 1024 ** 3:.2f} GiB"
+    )
     used_bytes = total - free_npu_bytes_after_sleep - used_bytes_baseline
     # now the memory usage should be less than the model weights
     # (0.5B model, 1GiB weights)