Skip to content

Commit 51506a3

Browse files
fix(chat): normalize AI text output and enforce backend reply path
1 parent a2cc49a commit 51506a3

File tree

13 files changed

+241
-38
lines changed

13 files changed

+241
-38
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,5 @@ npm-debug.log*
1212
yarn-debug.log*
1313
yarn-error.log*
1414

15+
# Local QA artifacts
16+
screenshots/

assets/index-DBWXNU1C.js

Lines changed: 86 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

assets/index-DmnBBc78.css

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/services/llmScorer.js

Lines changed: 80 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,63 @@ BẮT BUỘC: Bạn phải luôn trả lời bằng định dạng JSON sau:
1717
"suggested_questions": ["Câu hỏi gợi ý 1", "Câu hỏi gợi ý 2"]
1818
}`;
1919

20+
function countReplacementChars(value) {
21+
return (String(value || '').match(/\uFFFD/g) || []).length;
22+
}
23+
24+
function fixCommonMojibake(value) {
25+
const text = String(value || '');
26+
if (!text) return '';
27+
28+
// Heuristic: common UTF-8 -> Latin-1 mojibake prefixes.
29+
if (!/[ÃÂâ]/.test(text)) return text;
30+
31+
try {
32+
const repaired = Buffer.from(text, 'latin1').toString('utf8');
33+
if (!repaired || repaired === text) return text;
34+
if (countReplacementChars(repaired) <= countReplacementChars(text)) {
35+
return repaired;
36+
}
37+
} catch {
38+
// Ignore and keep original text.
39+
}
40+
41+
return text;
42+
}
43+
44+
function decodeEscapedText(value) {
45+
return String(value || '')
46+
.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
47+
.replace(/\\r\\n/g, '\n')
48+
.replace(/\\n/g, '\n')
49+
.replace(/\\r/g, '\n')
50+
.replace(/\\t/g, '\t')
51+
.replace(/\\"/g, '"')
52+
.replace(/\\\\/g, '\\');
53+
}
54+
55+
function normalizeReplyText(value) {
56+
let text = String(value || '').replace(/\uFEFF/g, '').trim();
57+
if (!text) return '';
58+
59+
text = decodeEscapedText(text);
60+
text = fixCommonMojibake(text);
61+
62+
if ((text.startsWith('"') && text.endsWith('"')) || (text.startsWith("'") && text.endsWith("'"))) {
63+
text = text.slice(1, -1).trim();
64+
}
65+
66+
return text.replace(/\u0000/g, '').trim();
67+
}
68+
69+
function normalizeSuggestedQuestions(value) {
70+
if (!Array.isArray(value)) return [];
71+
return value
72+
.map((item) => normalizeReplyText(item))
73+
.filter(Boolean)
74+
.slice(0, 5);
75+
}
76+
2077
function isEnabled() {
2178
return Boolean(GROQ_API_KEY);
2279
}
@@ -77,9 +134,12 @@ function postJson(url, payload, timeoutMs) {
77134
Authorization: `Bearer ${GROQ_API_KEY}`
78135
}
79136
}, (res) => {
80-
let raw = '';
81-
res.on('data', (chunk) => { raw += chunk; });
137+
const chunks = [];
138+
res.on('data', (chunk) => {
139+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
140+
});
82141
res.on('end', () => {
142+
const raw = Buffer.concat(chunks).toString('utf8');
83143
try {
84144
if (res.statusCode !== 200) {
85145
console.error(`[LLM Error] Status: ${res.statusCode}, Body: ${raw}`);
@@ -109,13 +169,21 @@ function postJson(url, payload, timeoutMs) {
109169

110170
function safeParseChatReply(content) {
111171
if (!content) return null;
112-
const text = String(content).trim();
172+
const text = String(content).replace(/\uFEFF/g, '').trim();
113173
const candidates = [];
114174

115175
const tryParseJson = (value) => {
116176
if (!value) return null;
117177
try {
118-
return JSON.parse(value);
178+
const parsed = JSON.parse(value);
179+
if (typeof parsed === 'string') {
180+
try {
181+
return JSON.parse(parsed);
182+
} catch {
183+
return parsed;
184+
}
185+
}
186+
return parsed;
119187
} catch {
120188
return null;
121189
}
@@ -159,20 +227,21 @@ function safeParseChatReply(content) {
159227

160228
for (const obj of candidates) {
161229
if (!obj || typeof obj !== 'object') continue;
162-
let bot = String(obj.bot_reply || '').trim();
230+
let bot = normalizeReplyText(obj.bot_reply);
163231
if (!bot) continue;
164232

165233
// Nếu model trả về chuỗi template mặc định, bỏ qua đối tượng này
166234
if (bot.toLowerCase().includes('nội dung phản hồi')) continue;
167-
if (!Array.isArray(obj.suggested_questions)) {
168-
obj.suggested_questions = [];
169-
}
170-
return obj;
235+
return {
236+
bot_reply: bot,
237+
suggested_questions: normalizeSuggestedQuestions(obj.suggested_questions)
238+
};
171239
}
172240

173241
// Fallback: dùng raw text như phản hồi để tránh null
174-
if (text && !text.toLowerCase().includes('nội dung phản hồi')) {
175-
return { bot_reply: text, suggested_questions: [] };
242+
const normalizedText = normalizeReplyText(text);
243+
if (normalizedText && !normalizedText.toLowerCase().includes('nội dung phản hồi')) {
244+
return { bot_reply: normalizedText, suggested_questions: [] };
176245
}
177246

178247
return null;
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// Copy this file to runtime-config.js and set your backend API URL for production.
2+
// Example: window.__API_BASE__ = 'https://your-backend.example.com';
3+
4+
window.__API_BASE__ = window.__API_BASE__ || '';
5+
6+
// If API_BASE is empty on GitHub Pages, app will run in offline mode by default.
7+
window.__IS_OFFLINE__ = window.__IS_OFFLINE__ ?? (!window.__API_BASE__ && /(^|\.)github\.io$/i.test(window.location.hostname));
8+

frontend/public/runtime-config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
window.__API_BASE__ = window.__API_BASE__ || '';
2-
window.__IS_OFFLINE__ = window.__IS_OFFLINE__ ?? (!window.__API_BASE__ && !/^(localhost|127\.0\.0\.1)$/i.test(window.location.hostname));
2+
window.__IS_OFFLINE__ = window.__IS_OFFLINE__ ?? (!window.__API_BASE__ && /(^|\.)github\.io$/i.test(window.location.hostname));

frontend/src/api.js

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,7 @@ export const api = {
7676
});
7777
},
7878
async sendMessage(payload, token) {
79-
if (IS_OFFLINE) {
80-
return offlineApi.sendMessage({
81-
conversation_id: payload.conversation_id,
82-
message: payload.message,
83-
user_id: payload.user_id,
84-
user_type: payload.user_type,
85-
request_more: payload.request_more
86-
});
87-
}
79+
// Backend-only: chatbot replies must always come from server APIs.
8880
return request('/api/chat/message', {
8981
method: 'POST',
9082
headers: {

frontend/src/config.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ const runtimeApiBase = normalizeBaseUrl(
88
const envApiBase = normalizeBaseUrl(import.meta.env.VITE_API_BASE);
99
const hostname = typeof window !== 'undefined' ? window.location.hostname : '';
1010
const isLocalhost = /^(localhost|127\.0\.0\.1)$/i.test(hostname);
11+
const isGithubPages = /(^|\.)github\.io$/i.test(hostname);
1112

1213
const fallbackApiBase = isLocalhost ? 'http://localhost:3001' : '';
1314
const configuredApiBase = runtimeApiBase || envApiBase || fallbackApiBase;
1415

1516
const runtimeOffline = typeof window !== 'undefined' ? window.__IS_OFFLINE__ : undefined;
1617
const envOffline = import.meta.env.VITE_OFFLINE_MODE === 'true';
17-
const autoOfflineWithoutApi = !configuredApiBase && !isLocalhost;
18+
const autoOfflineWithoutApi = !configuredApiBase && !isLocalhost && !isGithubPages;
1819

1920
export const API_BASE = configuredApiBase;
2021
export const IS_OFFLINE = Boolean(runtimeOffline ?? envOffline ?? autoOfflineWithoutApi);

frontend/src/index.css

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,8 @@ body::before {
351351
}
352352

353353
.border-\[\#E8E2D8\],
354-
.border-\[\#E2D8C8\] {
354+
.border-\[\#E2D8C8\],
355+
.border-\[\#EFEAE2\] {
355356
border-color: var(--c-border) !important;
356357
}
357358

@@ -448,6 +449,11 @@ body::before {
448449
border-color: var(--c-border) !important;
449450
}
450451

452+
[data-theme="dark"] .border-\[\#E2D8C8\],
453+
[data-theme="dark"] .border-\[\#EFEAE2\] {
454+
border-color: var(--c-border) !important;
455+
}
456+
451457
[data-theme="dark"] .bg-\[\#F7F5F2\] {
452458
background-color: var(--c-surface-2) !important;
453459
}

frontend/src/sanitizeHtml.js

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,26 @@ const ALLOWED_URI_REGEXP = /^(?:(?:https?|mailto|tel):|[^a-z]|[a-z+\.\-]+(?:[^a-
9191

9292
let hooksInitialized = false;
9393

94+
function countReplacementChars(value) {
95+
return (String(value || '').match(/\uFFFD/g) || []).length;
96+
}
97+
98+
function fixCommonMojibake(value) {
99+
const text = String(value ?? '');
100+
if (!text || !/[ÃÂâ]/.test(text)) return text;
101+
try {
102+
const bytes = new Uint8Array(Array.from(text, (char) => char.charCodeAt(0) & 0xff));
103+
const repaired = new TextDecoder('utf-8', { fatal: false }).decode(bytes);
104+
if (!repaired || repaired === text) return text;
105+
if (countReplacementChars(repaired) <= countReplacementChars(text)) {
106+
return repaired;
107+
}
108+
} catch {
109+
// Keep original if conversion fails.
110+
}
111+
return text;
112+
}
113+
94114
function extractJsonSlice(text) {
95115
const start = text.indexOf('{');
96116
const end = text.lastIndexOf('}');
@@ -100,29 +120,39 @@ function extractJsonSlice(text) {
100120

101121
function decodeEscapes(value) {
102122
const stringValue = String(value ?? '');
123+
if (!stringValue) return '';
124+
103125
const normalized = stringValue
104126
.replace(/\\/g, '\\\\')
105127
.replace(/"/g, '\\"')
106128
.replace(/\r/g, '\\r')
107129
.replace(/\n/g, '\\n')
108130
.replace(/\t/g, '\\t');
131+
132+
let decoded = stringValue;
109133
try {
110-
return JSON.parse(`"${normalized}"`);
134+
decoded = JSON.parse(`"${normalized}"`);
111135
} catch {
112-
return stringValue
113-
.replace(/\\n/g, '\n')
114-
.replace(/\\r/g, '\r')
115-
.replace(/\\t/g, '\t')
116-
.replace(/\\"/g, '"')
117-
.replace(/\\\\/g, '\\');
136+
decoded = stringValue;
118137
}
138+
139+
decoded = String(decoded)
140+
.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
141+
.replace(/\\r\\n/g, '\n')
142+
.replace(/\\n/g, '\n')
143+
.replace(/\\r/g, '\n')
144+
.replace(/\\t/g, '\t')
145+
.replace(/\\"/g, '"')
146+
.replace(/\\\\/g, '\\');
147+
148+
return fixCommonMojibake(decoded).replace(/\u0000/g, '');
119149
}
120150

121151
function normalizeChatText(input) {
122152
if (input == null) return '';
123153
if (typeof input === 'object') {
124154
if (Object.prototype.hasOwnProperty.call(input, 'bot_reply')) {
125-
return String(input?.bot_reply ?? '');
155+
return decodeEscapes(String(input?.bot_reply ?? ''));
126156
}
127157
try {
128158
return JSON.stringify(input, null, 2);
@@ -132,15 +162,15 @@ function normalizeChatText(input) {
132162
}
133163
const raw = String(input ?? '');
134164
const trimmed = raw.trim();
135-
if (!trimmed) return raw;
136-
if (!trimmed.includes('"bot_reply"')) return raw;
165+
if (!trimmed) return decodeEscapes(raw);
166+
if (!trimmed.includes('"bot_reply"')) return decodeEscapes(raw);
137167

138168
const slice = extractJsonSlice(trimmed);
139169
if (slice) {
140170
try {
141171
const parsed = JSON.parse(slice);
142172
if (parsed && Object.prototype.hasOwnProperty.call(parsed, 'bot_reply')) {
143-
return String(parsed.bot_reply ?? '');
173+
return decodeEscapes(String(parsed.bot_reply ?? ''));
144174
}
145175
} catch {
146176
// Ignore JSON parse errors and fallback to regex extraction.
@@ -155,7 +185,7 @@ function normalizeChatText(input) {
155185
return decodeEscapes(stripped);
156186
}
157187

158-
return raw;
188+
return decodeEscapes(raw);
159189
}
160190

161191
function initHooks() {

0 commit comments

Comments
 (0)