ignore some code more points, but leave emoji intact.

2019-08-08 18:23:31 +02:00 · 2019-08-08 18:23:31 +02:00 · 12117776a0
commit 12117776a0
parent a0bd0378c3
2 changed files with 23 additions and 3 deletions
--- a/src/util/unicode.c
+++ b/src/util/unicode.c
@ -13,6 +13,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <stdbool.h>
 #include <string.h>
 #include <wctype.h>
 #include <ctype.h>
@ -33,6 +34,14 @@
 #define B00000011 0x03
 #define B00000001 0x01
 static bool char_trimmed(wint_t wc) {
    if (wc >= 0x2000 && wc <= 0x200f) {
        /* only weird stuff here */
        return true;
    }
    return iswspace(wc) || iswcntrl(wc);
 }
 size_t unicode_utf8_trim(char *buf)
 {
    int result = 0, ts = 0;
@ -56,15 +65,15 @@ size_t unicode_utf8_trim(char *buf)
                ++result;
            }
        }
-        if (op == buf && (iswspace(wc) || !iswprint(wc))) {
+        if (op == buf && char_trimmed(wc)) {
            result += size;
        }
-        else if (wc>255 || !iscntrl(wc)) {
+        else if (wc>255 || !iswcntrl(wc)) {
            if (op != ip) {
                memmove(op, ip, size);
            }
            op += size;
-            if (iswspace(wc) || !iswprint(wc)) {
+            if (char_trimmed(wc)) {
                ts += size;
            }
            else {
--- a/src/util/unicode.test.c
+++ b/src/util/unicode.test.c
@ -173,12 +173,23 @@ static void test_unicode_trim_ltrm(CuTest *tc) {
    CuAssertStrEquals(tc, expect, name);
 }
 static void test_unicode_trim_emoji(CuTest *tc) {
    const char clock[] = { 0xE2, 0x8F, 0xB0, 0x00 };
    char name[64];
    char expect[64];
    snprintf(name, sizeof(name), "%s Alarm%sClock %s", clock, clock, clock);
    strcpy(expect, name);
    CuAssertIntEquals(tc, 0, unicode_utf8_trim(name));
    CuAssertStrEquals(tc, expect, name);
 }
 CuSuite *get_unicode_suite(void)
 {
    CuSuite *suite = CuSuiteNew();
    SUITE_ADD_TEST(suite, test_unicode_trim);
    SUITE_ADD_TEST(suite, test_unicode_trim_zwnj);
    SUITE_ADD_TEST(suite, test_unicode_trim_ltrm);
    SUITE_ADD_TEST(suite, test_unicode_trim_emoji);
    SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
    SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs);
    SUITE_ADD_TEST(suite, test_unicode_compare);