From 12117776a0ba4dd0864e617f3c8d3bdb958b8019 Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Thu, 8 Aug 2019 18:23:31 +0200 Subject: [PATCH] ignore some code more points, but leave emoji intact. --- src/util/unicode.c | 15 ++++++++++++--- src/util/unicode.test.c | 11 +++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/util/unicode.c b/src/util/unicode.c index 3d11ed230..d09476318 100644 --- a/src/util/unicode.c +++ b/src/util/unicode.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,14 @@ #define B00000011 0x03 #define B00000001 0x01 +static bool char_trimmed(wint_t wc) { + if (wc >= 0x2000 && wc <= 0x200f) { + /* only weird stuff here */ + return true; + } + return iswspace(wc) || iswcntrl(wc); +} + size_t unicode_utf8_trim(char *buf) { int result = 0, ts = 0; @@ -56,15 +65,15 @@ size_t unicode_utf8_trim(char *buf) ++result; } } - if (op == buf && (iswspace(wc) || !iswprint(wc))) { + if (op == buf && char_trimmed(wc)) { result += size; } - else if (wc>255 || !iscntrl(wc)) { + else if (wc>255 || !iswcntrl(wc)) { if (op != ip) { memmove(op, ip, size); } op += size; - if (iswspace(wc) || !iswprint(wc)) { + if (char_trimmed(wc)) { ts += size; } else { diff --git a/src/util/unicode.test.c b/src/util/unicode.test.c index 17c453296..8a5d77488 100644 --- a/src/util/unicode.test.c +++ b/src/util/unicode.test.c @@ -173,12 +173,23 @@ static void test_unicode_trim_ltrm(CuTest *tc) { CuAssertStrEquals(tc, expect, name); } +static void test_unicode_trim_emoji(CuTest *tc) { + const char clock[] = { 0xE2, 0x8F, 0xB0, 0x00 }; + char name[64]; + char expect[64]; + snprintf(name, sizeof(name), "%s Alarm%sClock %s", clock, clock, clock); + strcpy(expect, name); + CuAssertIntEquals(tc, 0, unicode_utf8_trim(name)); + CuAssertStrEquals(tc, expect, name); +} + CuSuite *get_unicode_suite(void) { CuSuite *suite = CuSuiteNew(); SUITE_ADD_TEST(suite, test_unicode_trim); SUITE_ADD_TEST(suite, test_unicode_trim_zwnj); SUITE_ADD_TEST(suite, test_unicode_trim_ltrm); + SUITE_ADD_TEST(suite, test_unicode_trim_emoji); SUITE_ADD_TEST(suite, test_unicode_utf8_to_other); SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs); SUITE_ADD_TEST(suite, test_unicode_compare);