77import shortuuid
88
99from lmdeploy .serve .openai .api_server import VariableInterface
10- from lmdeploy .serve .openai .protocol import (
11- ChatCompletionRequest , ChatCompletionResponse ,
12- ChatCompletionResponseChoice , ChatCompletionResponseStreamChoice ,
13- ChatCompletionStreamResponse , ChatMessage , DeltaMessage , DeltaToolCall ,
14- UsageInfo )
15- from lmdeploy .serve .openai .tool_parser .qwen3coder_parser import (
16- Qwen3CoderToolParser )
10+ from lmdeploy .serve .openai .protocol import (ChatCompletionRequest , ChatCompletionResponse , ChatCompletionResponseChoice ,
11+ ChatCompletionResponseStreamChoice , ChatCompletionStreamResponse ,
12+ ChatMessage , DeltaMessage , DeltaToolCall , UsageInfo )
13+ from lmdeploy .serve .openai .tool_parser .qwen3coder_parser import Qwen3CoderToolParser
1714
1815TestExpects = collections .namedtuple ('TestExpects' , 'func_name kwargs' )
1916
@@ -57,30 +54,26 @@ def encode(self, text: str) -> List[int]:
5754
5855
5956def _chat_completion_v1 (
60- request : ChatCompletionRequest , text_sequence : List [str ]
61- ) -> Union [ChatCompletionResponse , Generator [ChatCompletionStreamResponse ,
62- None , None ]]:
57+ request : ChatCompletionRequest ,
58+ text_sequence : List [str ]) -> Union [ChatCompletionResponse , Generator [ChatCompletionStreamResponse , None , None ]]:
6359 request_id = f'chat-{ shortuuid .random ()} '
6460 created_time = int (time .time ())
6561 model_name = request .model
6662 if request .stream :
6763
68- def completion_stream_generator (
69- ) -> Generator [ChatCompletionStreamResponse , None , None ]:
64+ def completion_stream_generator () -> Generator [ChatCompletionStreamResponse , None , None ]:
7065 previous_text = ''
7166 current_text = ''
7267 finish_reason = 'stop'
73- has_parser = (VariableInterface .tool_parser is not None
74- or VariableInterface .reasoning_parser is not None )
68+ has_parser = (VariableInterface .tool_parser is not None or VariableInterface .reasoning_parser is not None )
7569 for text in text_sequence :
7670 logprobs , usage = None , None
7771 delta_message = DeltaMessage (role = 'assistant' , content = text )
7872 if has_parser :
7973 current_text = current_text + text
8074 has_tool = VariableInterface .tool_parser is not None
8175 if request .tool_choice != 'none' and has_tool :
82- tool_delta = VariableInterface .tool_parser \
83- .extract_tool_calls_streaming (
76+ tool_delta = VariableInterface .tool_parser .extract_tool_calls_streaming (
8477 previous_text = previous_text ,
8578 current_text = current_text ,
8679 delta_text = delta_message .content ,
@@ -93,25 +86,22 @@ def completion_stream_generator(
9386 delta_message .content = tool_delta .content or ''
9487 if VariableInterface .reasoning_parser is not None :
9588 parser = VariableInterface .reasoning_parser
96- reasoning_delta = parser .extract_reasoning_content_streaming (
97- previous_text = previous_text ,
98- current_text = current_text ,
99- delta_text = delta_message .content ,
100- previous_token_ids = [],
101- current_token_ids = [],
102- delta_token_ids = [])
89+ reasoning_delta = parser .extract_reasoning_content_streaming (previous_text = previous_text ,
90+ current_text = current_text ,
91+ delta_text = delta_message .content ,
92+ previous_token_ids = [],
93+ current_token_ids = [],
94+ delta_token_ids = [])
10395 if reasoning_delta is not None :
104- delta_message .reasoning_content = (
105- reasoning_delta .reasoning_content )
96+ delta_message .reasoning_content = (reasoning_delta .reasoning_content )
10697 delta_message .content = reasoning_delta .content or ''
10798 if has_parser :
10899 previous_text = current_text
109100
110- choice_data = ChatCompletionResponseStreamChoice (
111- index = 0 ,
112- delta = delta_message ,
113- finish_reason = finish_reason ,
114- logprobs = logprobs )
101+ choice_data = ChatCompletionResponseStreamChoice (index = 0 ,
102+ delta = delta_message ,
103+ finish_reason = finish_reason ,
104+ logprobs = logprobs )
115105 response = ChatCompletionStreamResponse (
116106 id = request_id ,
117107 created = created_time ,
@@ -129,25 +119,20 @@ def completion_stream_generator(
129119 finish_reason = 'stop'
130120 has_tool = VariableInterface .tool_parser is not None
131121 if request .tool_choice != 'none' and has_tool :
132- tool_call_info = VariableInterface .tool_parser .extract_tool_calls (
133- text , request = request )
122+ tool_call_info = VariableInterface .tool_parser .extract_tool_calls (text , request = request )
134123 text , tool_calls = tool_call_info .content , tool_call_info .tool_calls
135124 if isinstance (tool_calls , List ) and len (tool_calls ):
136125 if finish_reason == 'stop' :
137126 finish_reason = 'tool_calls'
138127
139128 if VariableInterface .reasoning_parser is not None :
140129 parser = VariableInterface .reasoning_parser
141- reasoning_content , text = parser .extract_reasoning_content (
142- text , request )
130+ reasoning_content , text = parser .extract_reasoning_content (text , request )
143131
144132 choices = []
145133 choice_data = ChatCompletionResponseChoice (
146134 index = 0 ,
147- message = ChatMessage (role = 'assistant' ,
148- content = text ,
149- tool_calls = tool_calls ,
150- reasoning_content = reasoning_content ),
135+ message = ChatMessage (role = 'assistant' , content = text , tool_calls = tool_calls , reasoning_content = reasoning_content ),
151136 finish_reason = finish_reason ,
152137 )
153138 choices .append (choice_data )
@@ -161,9 +146,7 @@ def completion_stream_generator(
161146 )
162147
163148
164- def _stream_parse (
165- request : ChatCompletionRequest ,
166- text_sequence : List [str ]) -> Tuple [str , str , List [DeltaToolCall ]]:
149+ def _stream_parse (request : ChatCompletionRequest , text_sequence : List [str ]) -> Tuple [str , str , List [DeltaToolCall ]]:
167150 content = ''
168151 reasoning_content = ''
169152 tool_calls = {}
@@ -184,19 +167,16 @@ def _stream_parse(
184167 if c .function .name :
185168 existing_call .function .name = c .function .name
186169 if c .function .arguments :
187- existing_call .function .arguments = (
188- existing_call .function .arguments or '' )
170+ existing_call .function .arguments = (existing_call .function .arguments or '' )
189171 existing_call .function .arguments += c .function .arguments
190- return content , reasoning_content , list (
191- sorted (tool_calls .values (), key = lambda x : x .index ))
172+ return content , reasoning_content , list (sorted (tool_calls .values (), key = lambda x : x .index ))
192173
193174
194175@pytest .mark .parametrize (('text_sequence' , 'expects' ), [
195- (DELTA_TEXT_SEQUENCE ,
196- [TestExpects ('get_weather' , {
197- 'location' : '北京' ,
198- 'unit' : 'celsius'
199- })]),
176+ (DELTA_TEXT_SEQUENCE , [TestExpects ('get_weather' , {
177+ 'location' : '北京' ,
178+ 'unit' : 'celsius'
179+ })]),
200180 (DELTA_TEXT_SEQUENCE_MULTIPLE_CALLS , [
201181 TestExpects ('get_weather' , {
202182 'location' : '北京' ,
@@ -209,11 +189,8 @@ def test_parser_stream(text_sequence: List[str], expects: List[TestExpects]):
209189 tokenizer = DummyTokenizer ()
210190 VariableInterface .tool_parser = Qwen3CoderToolParser (tokenizer = tokenizer )
211191 VariableInterface .reasoning_parser = None
212- request = ChatCompletionRequest (model = 'qwen3coder' ,
213- messages = [],
214- stream = True )
215- content , reasoning_content , tool_calls = _stream_parse (
216- request , text_sequence )
192+ request = ChatCompletionRequest (model = 'qwen3coder' , messages = [], stream = True )
193+ content , reasoning_content , tool_calls = _stream_parse (request , text_sequence )
217194 assert len (tool_calls ) == len (expects )
218195 for parsed_call , expected_call in zip (tool_calls , expects ):
219196 assert parsed_call .function .name == expected_call .func_name
@@ -223,11 +200,10 @@ def test_parser_stream(text_sequence: List[str], expects: List[TestExpects]):
223200
224201
225202@pytest .mark .parametrize (('text_sequence' , 'expects' ), [
226- (DELTA_TEXT_SEQUENCE ,
227- [TestExpects ('get_weather' , {
228- 'location' : '北京' ,
229- 'unit' : 'celsius'
230- })]),
203+ (DELTA_TEXT_SEQUENCE , [TestExpects ('get_weather' , {
204+ 'location' : '北京' ,
205+ 'unit' : 'celsius'
206+ })]),
231207 (DELTA_TEXT_SEQUENCE_MULTIPLE_CALLS , [
232208 TestExpects ('get_weather' , {
233209 'location' : '北京' ,
@@ -236,14 +212,12 @@ def test_parser_stream(text_sequence: List[str], expects: List[TestExpects]):
236212 TestExpects ('get_weather' , {'location' : '上海' })
237213 ]),
238214])
239- def test_parser_nonstream (text_sequence : List [str ],
240- expects : List [TestExpects ]):
215+ def test_parser_nonstream (text_sequence : List [str ], expects : List [TestExpects ]):
241216 tokenizer = DummyTokenizer ()
242217 VariableInterface .tool_parser = Qwen3CoderToolParser (tokenizer = tokenizer )
243218 VariableInterface .reasoning_parser = None
244219 resp : ChatCompletionResponse = _chat_completion_v1 (
245- ChatCompletionRequest (model = 'qwen3coder' , messages = [], stream = False ),
246- text_sequence )
220+ ChatCompletionRequest (model = 'qwen3coder' , messages = [], stream = False ), text_sequence )
247221
248222 assert len (resp .choices ) == 1
249223 first_message = resp .choices [0 ].message
@@ -273,8 +247,7 @@ def test_no_think_nonstream():
273247 VariableInterface .tool_parser = Qwen3CoderToolParser (tokenizer = tokenizer )
274248 VariableInterface .reasoning_parser = None
275249 resp : ChatCompletionResponse = _chat_completion_v1 (
276- ChatCompletionRequest (model = 'qwen3coder' , messages = [], stream = False ),
277- text_sequence )
250+ ChatCompletionRequest (model = 'qwen3coder' , messages = [], stream = False ), text_sequence )
278251
279252 assert len (resp .choices ) == 1
280253 first_message = resp .choices [0 ].message
0 commit comments