Skip to content

Unicode character support in screen tab names #1642

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Action.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ Htop_Reaction Action_setScreenTab(State* st, int x) {
int rem = x - SCREEN_TAB_MARGIN_LEFT;
for (unsigned int i = 0; i < settings->nScreens; i++) {
const char* tab = settings->screens[i]->heading;
int width = rem >= bracketWidth ? (int)strnlen(tab, rem - bracketWidth + 1) : 0;
const char* ptr = tab;
int width = rem >= bracketWidth ? String_mbswidth(&ptr, SIZE_MAX, rem - bracketWidth + 1) : 0;
if (width >= rem - bracketWidth + 1) {
settings->ssIndex = i;
setActiveScreen(settings, st, i);
Expand Down
5 changes: 3 additions & 2 deletions ScreenManager.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,10 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c
(*x)++;
if (*x >= l)
return false;
int nameWidth = (int)strnlen(name, l - *x);
const char* ptr = name;
int nameWidth = String_mbswidth(&ptr, (size_t)INT_MAX, l - *x);
attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]);
mvaddnstr(*y, *x, name, nameWidth);
mvaddnstr(*y, *x, name, (int)(ptr - name));
*x += nameWidth;
if (*x >= l)
return false;
Expand Down
289 changes: 289 additions & 0 deletions XUtils.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ in the source distribution for its full text.
#include "XUtils.h"

#include <assert.h>
#include <ctype.h> // IWYU pragma: keep
#include <errno.h>
#include <fcntl.h>
#include <limits.h> // IWYU pragma: keep
#include <math.h>
#include <stdarg.h>
#include <stdint.h>
Expand Down Expand Up @@ -235,6 +237,293 @@ size_t strnlen(const char* str, size_t maxLen) {
}
#endif

#ifdef HAVE_LIBNCURSESW
static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) {
assert(!ps->buf || ps->pos < ps->size);

char tempBuf[MB_LEN_MAX];
char* dest = ps->buf ? (char*)ps->buf + ps->pos : tempBuf;

// It is unnecessarily expensive to fix the output string if the caller
// gives an incorrect buffer size. This function would not support any
// truncation of the output string.
size_t len = wcrtomb(dest, wc, &ps->mbState);
assert(len > 0);
if (len == (size_t)-1) {
assert(len != (size_t)-1);
fail();
}
if (ps->buf && len > ps->size - ps->pos) {
assert(!ps->buf || len <= ps->size - ps->pos);
fail();
}

ps->pos += len;
}
#else
static void String_encodeWChar(WCharEncoderState* ps, int c) {
assert(!ps->buf || ps->pos < ps->size);

char* buf = ps->buf;
if (buf) {
buf[ps->pos] = (char)c;
}

ps->pos += 1;
}
#endif

void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) {
assert(src || maxLen == 0);

size_t pos = 0;
bool wasReplaced = false;

#ifdef HAVE_LIBNCURSESW
const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?';
wchar_t ch;

mbstate_t decState;
memset(&decState, 0, sizeof(decState));
#else
const char replacementChar = '?';
char ch;
#endif

do {
size_t len = 0;
bool shouldReplace = false;
ch = 0;

if (pos < maxLen) {
// Read the next character from the byte sequence
#ifdef HAVE_LIBNCURSESW
mbstate_t newState;
memcpy(&newState, &decState, sizeof(newState));
len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState);

assert(len != 0 || ch == 0);
switch (len) {
case (size_t)-2:
errno = EILSEQ;
shouldReplace = true;
len = maxLen - pos;
break;

case (size_t)-1:
shouldReplace = true;
len = 1;
break;

default:
memcpy(&decState, &newState, sizeof(decState));
}
#else
len = 1;
ch = src[pos];
#endif
}

pos += len;

// Filter unprintable characters
if (!shouldReplace && ch != 0) {
#ifdef HAVE_LIBNCURSESW
shouldReplace = !iswprint(ch);
#else
shouldReplace = !isprint((unsigned char)ch);
#endif
}

if (shouldReplace) {
ch = replacementChar;
if (wasReplaced) {
continue;
}
}
wasReplaced = shouldReplace;

encodeWChar(ps, ch);
} while (ch != 0);
}

char* String_makePrintable(const char* str, size_t maxLen) {
WCharEncoderState encState;

memset(&encState, 0, sizeof(encState));
EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
size_t size = encState.pos;
assert(size > 0);

memset(&encState, 0, sizeof(encState));
char* buf = xMalloc(size);
encState.size = size;
encState.buf = buf;
EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
assert(encState.pos == size);

return buf;
}

bool String_decodeNextWChar(MBStringDecoderState* ps) {
if (!ps->str || ps->maxLen == 0) {
return false;
}

// If the previous call of this function encounters an invalid sequence,
// do not continue (because the "mbState" object for mbrtowc() is
// undefined). The caller is supposed to reset the state.
#ifdef HAVE_LIBNCURSESW
bool isStateDefined = ps->ch != WEOF;
#else
bool isStateDefined = ps->ch != EOF;
#endif
if (!isStateDefined) {
return false;
}

#ifdef HAVE_LIBNCURSESW
wchar_t wc;
size_t len = mbrtowc(&wc, ps->str, ps->maxLen, &ps->mbState);
switch (len) {
case (size_t)-1:
// Invalid sequence
ps->ch = WEOF;
return false;

case (size_t)-2:
// Incomplete sequence
ps->str += ps->maxLen;
ps->maxLen = 0;
return false;

case 0:
assert(wc == 0);

ps->str = NULL;
ps->maxLen = 0;
ps->ch = wc;
return true;

default:
ps->str += len;
ps->maxLen -= len;
ps->ch = wc;
}
return true;
#else
ps->ch = *ps->str;
if (ps->ch == 0) {
ps->str = NULL;
ps->maxLen = 0;
} else {
ps->str++;
ps->maxLen--;
}
return true;
#endif
}

int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator) {
assert(*str || maxLen == 0);

if (maxWidth < 0)
maxWidth = INT_MAX;

MBStringDecoderState state;
memset(&state, 0, sizeof(state));
state.str = *str;
state.maxLen = maxLen;

int totalWidth = 0;
int breakWidth = 0;

const char* breakPos = NULL;
bool inSpaces = true;

while (String_decodeNextWChar(&state)) {
if (state.ch == 0)
break;

if (state.ch == ' ' && separator == ' ' && !inSpaces) {
breakWidth = totalWidth;
breakPos = *str;
inSpaces = true;
}

#ifdef HAVE_LIBNCURSESW
int w = wcwidth((wchar_t)state.ch);
if (w < 0) {
// This function should not be used with string containing unprintable
// characters. Tolerate them on release build, however.
assert(w >= 0);
break;
}
#else
assert(isprint(state.ch));
int w = 1;
#endif

if (w > maxWidth - totalWidth) {
// This character cannot fit the line with the given maxWidth.
if (breakPos) {
// Rewind the scanning state to the last found separator.
totalWidth = breakWidth;
*str = breakPos;
}
break;
}

#ifdef HAVE_LIBNCURSESW
// If the character takes zero columns, include the character in the
// substring if the working encoding is UTF-8, and ignore it otherwise.
// In Unicode, combining characters are always placed after the base
// character, but some legacy 8-bit encodings instead place combining
// characters before the base character.
if (w <= 0 && !CRT_utf8) {
continue;
}
#endif

totalWidth += w;

// (*str - start) will represent the length of the substring bounded
// by the width limit.
*str = state.str;

if (state.ch != ' ')
inSpaces = false;

#ifdef HAVE_LIBNCURSESW
wint_t sepCast = (wint_t)separator;
#else
int sepCast = (int)separator;
#endif
if (state.ch == sepCast && separator != ' ') {
breakWidth = totalWidth;
breakPos = *str;
}
}

return totalWidth;
}

int String_mbswidth(const char** str, size_t maxLen, int maxWidth) {
#ifdef HAVE_LIBNCURSESW
return String_lineBreakWidth(str, maxLen, maxWidth, '\0');
#else
assert(*str || maxLen == 0);

if (maxWidth < 0)
maxWidth = INT_MAX;

maxLen = MINIMUM((size_t)maxWidth, maxLen);
size_t len = strnlen(*str, maxLen);
*str += len;
return (int)len;
#endif
}

int xAsprintf(char** strp, const char* fmt, ...) {
va_list vl;
va_start(vl, fmt);
Expand Down
40 changes: 40 additions & 0 deletions XUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,32 @@ in the source distribution for its full text.

#include "Compat.h"
#include "Macros.h"
#include "ProvideCurses.h"


typedef struct WCharEncoderState_ {
size_t pos;
size_t size;
void* buf;
mbstate_t mbState;
} WCharEncoderState;

typedef struct MBStringDecoderState_ {
const char* str;
size_t maxLen;
#ifdef HAVE_LIBNCURSESW
wint_t ch;
mbstate_t mbState;
#else
int ch;
#endif
} MBStringDecoderState;

#ifdef HAVE_LIBNCURSESW
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc);
#else
typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c);
#endif

ATTR_NORETURN
void fail(void);
Expand Down Expand Up @@ -106,6 +131,21 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
size_t strnlen(const char* str, size_t maxLen);
#endif

ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3)
void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar);

ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2)
char* String_makePrintable(const char* str, size_t maxLen);

ATTR_NONNULL
bool String_decodeNextWChar(MBStringDecoderState* ps);

ATTR_NONNULL ATTR_ACCESS2_RW(1)
int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator);

ATTR_NONNULL ATTR_ACCESS2_RW(1)
int String_mbswidth(const char** str, size_t maxLen, int maxWidth);

ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2)
int xAsprintf(char** strp, const char* fmt, ...);

Expand Down
Loading