@@ -64,6 +64,7 @@ class PossibleSystemPromptException(Exception):
64
64
def inference (latest_message , history ):
65
65
# Allow mutating global variable
66
66
global BACKEND_INITIALISED
67
+ log .debug ("Inference request received with history: %s" , history )
67
68
68
69
try :
69
70
context = []
@@ -86,21 +87,27 @@ def inference(latest_message, history):
86
87
87
88
log .debug ("Chat context: %s" , context )
88
89
89
-
90
90
response = ""
91
+ thinking = False
92
+
91
93
for chunk in llm .stream (context ):
92
94
# If this is our first successful response from the backend
93
95
# then update the status variable to allow future error messages
94
96
# to be more informative
95
97
if not BACKEND_INITIALISED and len (response ) > 0 :
96
98
BACKEND_INITIALISED = True
97
99
98
- # NOTE(sd109): For some reason the '>' character breaks the UI
99
- # so we need to escape it here.
100
- # response += chunk.content.replace('>', '\>')
101
- # UPDATE(sd109): Above bug seems to have been fixed as of gradio 4.15.0
102
- # but keeping this note here incase we enounter it again
103
- response += chunk .content
100
+ # The "think" tags mark the chatbot's reasoning. Remove the content
101
+ # and replace with "Thinking..." until the closing tag is found.
102
+ content = chunk .content
103
+ if '<think>' in content or thinking :
104
+ thinking = True
105
+ response = "Thinking..."
106
+ if '</think>' in content :
107
+ thinking = False
108
+ response = ""
109
+ else :
110
+ response += content
104
111
yield response
105
112
106
113
# Handle any API errors here. See OpenAI Python client for possible error responses
@@ -171,7 +178,12 @@ def inference_wrapper(*args):
171
178
inference_wrapper ,
172
179
type = "messages" ,
173
180
analytics_enabled = False ,
174
- chatbot = gr .Chatbot (show_copy_button = True ),
181
+ chatbot = gr .Chatbot (
182
+ show_copy_button = True ,
183
+ height = "75vh" ,
184
+ resizable = True ,
185
+ sanitize_html = True ,
186
+ ),
175
187
)
176
188
177
189
0 commit comments