Skip to content

Commit 62b39cd

Browse files
committed
Unicode character support in screen tab names
1 parent 6f948d3 commit 62b39cd

File tree

4 files changed

+284
-3
lines changed

4 files changed

+284
-3
lines changed

Action.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,8 @@ Htop_Reaction Action_setScreenTab(State* st, int x) {
411411
int rem = x - SCREEN_TAB_MARGIN_LEFT;
412412
for (unsigned int i = 0; i < settings->nScreens; i++) {
413413
const char* tab = settings->screens[i]->heading;
414-
int width = rem >= bracketWidth ? (int)strnlen(tab, rem - bracketWidth + 1) : 0;
414+
const char* ptr = tab;
415+
int width = rem >= bracketWidth ? String_mbswidth(&ptr, SIZE_MAX, rem - bracketWidth + 1) : 0;
415416
if (width >= rem - bracketWidth + 1) {
416417
settings->ssIndex = i;
417418
setActiveScreen(settings, st, i);

ScreenManager.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,10 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c
169169
(*x)++;
170170
if (*x >= l)
171171
return false;
172-
int nameWidth = (int)strnlen(name, l - *x);
172+
const char* ptr = name;
173+
int nameWidth = String_mbswidth(&ptr, (size_t)INT_MAX, l - *x);
173174
attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]);
174-
mvaddnstr(*y, *x, name, nameWidth);
175+
mvaddnstr(*y, *x, name, (int)(ptr - name));
175176
*x += nameWidth;
176177
if (*x >= l)
177178
return false;

XUtils.c

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@ in the source distribution for its full text.
1010
#include "XUtils.h"
1111

1212
#include <assert.h>
13+
#include <ctype.h> // IWYU pragma: keep
1314
#include <errno.h>
1415
#include <fcntl.h>
16+
#include <limits.h> // IWYU pragma: keep
1517
#include <math.h>
1618
#include <stdarg.h>
1719
#include <stdint.h>
@@ -235,6 +237,246 @@ size_t strnlen(const char* str, size_t maxLen) {
235237
}
236238
#endif
237239

240+
#ifdef HAVE_LIBNCURSESW
241+
static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) {
242+
assert(!ps->buf || ps->pos < ps->size);
243+
244+
char tempBuf[MB_LEN_MAX];
245+
char* dest = ps->buf ? (char*)ps->buf + ps->pos : tempBuf;
246+
247+
// It is unnecessarily expensive to fix the output string if the caller
248+
// gives an incorrect buffer size. This function would not support any
249+
// truncation of the output string.
250+
size_t len = wcrtomb(dest, wc, &ps->mbState);
251+
assert(len > 0);
252+
if (len == (size_t)-1) {
253+
assert(len != (size_t)-1);
254+
fail();
255+
}
256+
if (ps->buf && len > ps->size - ps->pos) {
257+
assert(!ps->buf || len <= ps->size - ps->pos);
258+
fail();
259+
}
260+
261+
ps->pos += len;
262+
}
263+
#else
264+
static void String_encodeWChar(WCharEncoderState* ps, int c) {
265+
assert(!ps->buf || ps->pos < ps->size);
266+
267+
char* buf = ps->buf;
268+
if (buf) {
269+
buf[ps->pos] = (char)c;
270+
}
271+
272+
ps->pos += 1;
273+
}
274+
#endif
275+
276+
void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) {
277+
assert(src || maxLen == 0);
278+
279+
size_t pos = 0;
280+
bool wasReplaced = false;
281+
282+
#ifdef HAVE_LIBNCURSESW
283+
const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?';
284+
wchar_t ch;
285+
286+
mbstate_t decState;
287+
memset(&decState, 0, sizeof(decState));
288+
#else
289+
const char replacementChar = '?';
290+
char ch;
291+
#endif
292+
293+
do {
294+
size_t len = 0;
295+
bool shouldReplace = false;
296+
ch = 0;
297+
298+
if (pos < maxLen) {
299+
// Read the next character from the byte sequence
300+
#ifdef HAVE_LIBNCURSESW
301+
mbstate_t newState;
302+
memcpy(&newState, &decState, sizeof(newState));
303+
len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState);
304+
305+
assert(len != 0 || ch == 0);
306+
switch (len) {
307+
case (size_t)-2:
308+
errno = EILSEQ;
309+
shouldReplace = true;
310+
len = maxLen - pos;
311+
break;
312+
313+
case (size_t)-1:
314+
shouldReplace = true;
315+
len = 1;
316+
break;
317+
318+
default:
319+
memcpy(&decState, &newState, sizeof(decState));
320+
}
321+
#else
322+
len = 1;
323+
ch = src[pos];
324+
#endif
325+
}
326+
327+
pos += len;
328+
329+
// Filter unprintable characters
330+
if (!shouldReplace && ch != 0) {
331+
#ifdef HAVE_LIBNCURSESW
332+
shouldReplace = !iswprint(ch);
333+
#else
334+
shouldReplace = !isprint((unsigned char)ch);
335+
#endif
336+
}
337+
338+
if (shouldReplace) {
339+
ch = replacementChar;
340+
if (wasReplaced) {
341+
continue;
342+
}
343+
}
344+
wasReplaced = shouldReplace;
345+
346+
encodeWChar(ps, ch);
347+
} while (ch != 0);
348+
}
349+
350+
char* String_makePrintable(const char* str, size_t maxLen) {
351+
WCharEncoderState encState;
352+
353+
memset(&encState, 0, sizeof(encState));
354+
EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
355+
size_t size = encState.pos;
356+
assert(size > 0);
357+
358+
memset(&encState, 0, sizeof(encState));
359+
char* buf = xMalloc(size);
360+
encState.size = size;
361+
encState.buf = buf;
362+
EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
363+
assert(encState.pos == size);
364+
365+
return buf;
366+
}
367+
368+
bool String_decodeNextWChar(MBStringDecoderState* ps) {
369+
if (!ps->str || ps->maxLen == 0) {
370+
return false;
371+
}
372+
373+
// If the previous call of this function encounters an invalid sequence,
374+
// do not continue (because the "mbState" object for mbrtowc() is
375+
// undefined). The caller is supposed to reset the state.
376+
#ifdef HAVE_LIBNCURSESW
377+
bool isStateDefined = ps->ch != WEOF;
378+
#else
379+
bool isStateDefined = ps->ch != EOF;
380+
#endif
381+
if (!isStateDefined) {
382+
return false;
383+
}
384+
385+
#ifdef HAVE_LIBNCURSESW
386+
wchar_t wc;
387+
size_t len = mbrtowc(&wc, ps->str, ps->maxLen, &ps->mbState);
388+
switch (len) {
389+
case (size_t)-1:
390+
// Invalid sequence
391+
ps->ch = WEOF;
392+
return false;
393+
394+
case (size_t)-2:
395+
// Incomplete sequence
396+
ps->str += ps->maxLen;
397+
ps->maxLen = 0;
398+
return false;
399+
400+
case 0:
401+
assert(wc == 0);
402+
403+
ps->str = NULL;
404+
ps->maxLen = 0;
405+
ps->ch = wc;
406+
return true;
407+
408+
default:
409+
ps->str += len;
410+
ps->maxLen -= len;
411+
ps->ch = wc;
412+
}
413+
return true;
414+
#else
415+
ps->ch = *ps->str;
416+
if (ps->ch == 0) {
417+
ps->str = NULL;
418+
ps->maxLen = 0;
419+
} else {
420+
ps->str++;
421+
ps->maxLen--;
422+
}
423+
return true;
424+
#endif
425+
}
426+
427+
int String_mbswidth(const char** str, size_t maxLen, int maxWidth) {
428+
assert(*str || maxLen == 0);
429+
430+
if (maxWidth < 0)
431+
maxWidth = INT_MAX;
432+
433+
#ifdef HAVE_LIBNCURSESW
434+
MBStringDecoderState state;
435+
memset(&state, 0, sizeof(state));
436+
state.str = *str;
437+
state.maxLen = maxLen;
438+
439+
int totalWidth = 0;
440+
441+
while (String_decodeNextWChar(&state)) {
442+
if (state.ch == 0)
443+
break;
444+
445+
int w = wcwidth((wchar_t)state.ch);
446+
if (w < 0) {
447+
assert(w >= 0);
448+
break;
449+
}
450+
451+
if (w > maxWidth - totalWidth)
452+
break;
453+
454+
totalWidth += w;
455+
456+
// If the character takes zero columns, include the character in the
457+
// substring if the working encoding is UTF-8, and ignore it otherwise.
458+
// In Unicode, combining characters are always placed after the base
459+
// character, but some legacy 8-bit encodings instead place combining
460+
// characters before the base character.
461+
if (w <= 0 && !CRT_utf8) {
462+
continue;
463+
}
464+
465+
// (*str - start) will represent the length of the substring bounded
466+
// by the width limit.
467+
*str = state.str;
468+
}
469+
470+
assert(state.ch != WEOF);
471+
return totalWidth;
472+
#else
473+
maxLen = MINIMUM((unsigned int)maxWidth, maxLen);
474+
size_t len = strnlen(*str, maxLen);
475+
*str += len;
476+
return (int)len;
477+
#endif
478+
}
479+
238480
int xAsprintf(char** strp, const char* fmt, ...) {
239481
va_list vl;
240482
va_start(vl, fmt);

XUtils.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,32 @@ in the source distribution for its full text.
2323

2424
#include "Compat.h"
2525
#include "Macros.h"
26+
#include "ProvideCurses.h"
27+
28+
29+
typedef struct WCharEncoderState_ {
30+
size_t pos;
31+
size_t size;
32+
void* buf;
33+
mbstate_t mbState;
34+
} WCharEncoderState;
35+
36+
typedef struct MBStringDecoderState_ {
37+
const char* str;
38+
size_t maxLen;
39+
#ifdef HAVE_LIBNCURSESW
40+
wint_t ch;
41+
mbstate_t mbState;
42+
#else
43+
int ch;
44+
#endif
45+
} MBStringDecoderState;
2646

47+
#ifdef HAVE_LIBNCURSESW
48+
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc);
49+
#else
50+
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c);
51+
#endif
2752

2853
ATTR_NORETURN
2954
void fail(void);
@@ -106,6 +131,18 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
106131
size_t strnlen(const char* str, size_t maxLen);
107132
#endif
108133

134+
ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3)
135+
void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar);
136+
137+
ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2)
138+
char* String_makePrintable(const char* str, size_t maxLen);
139+
140+
ATTR_NONNULL
141+
bool String_decodeNextWChar(MBStringDecoderState* ps);
142+
143+
ATTR_NONNULL ATTR_ACCESS2_RW(1)
144+
int String_mbswidth(const char** str, size_t maxLen, int maxWidth);
145+
109146
ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2)
110147
int xAsprintf(char** strp, const char* fmt, ...);
111148

0 commit comments

Comments
 (0)