ignore some code more points, but leave emoji intact.

This commit is contained in:
Enno Rehling 2019-08-08 18:23:31 +02:00
parent a0bd0378c3
commit 12117776a0
2 changed files with 23 additions and 3 deletions

View file

@ -13,6 +13,7 @@
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <stdbool.h>
#include <string.h> #include <string.h>
#include <wctype.h> #include <wctype.h>
#include <ctype.h> #include <ctype.h>
@ -33,6 +34,14 @@
#define B00000011 0x03 #define B00000011 0x03
#define B00000001 0x01 #define B00000001 0x01
static bool char_trimmed(wint_t wc) {
if (wc >= 0x2000 && wc <= 0x200f) {
/* only weird stuff here */
return true;
}
return iswspace(wc) || iswcntrl(wc);
}
size_t unicode_utf8_trim(char *buf) size_t unicode_utf8_trim(char *buf)
{ {
int result = 0, ts = 0; int result = 0, ts = 0;
@ -56,15 +65,15 @@ size_t unicode_utf8_trim(char *buf)
++result; ++result;
} }
} }
if (op == buf && (iswspace(wc) || !iswprint(wc))) { if (op == buf && char_trimmed(wc)) {
result += size; result += size;
} }
else if (wc>255 || !iscntrl(wc)) { else if (wc>255 || !iswcntrl(wc)) {
if (op != ip) { if (op != ip) {
memmove(op, ip, size); memmove(op, ip, size);
} }
op += size; op += size;
if (iswspace(wc) || !iswprint(wc)) { if (char_trimmed(wc)) {
ts += size; ts += size;
} }
else { else {

View file

@ -173,12 +173,23 @@ static void test_unicode_trim_ltrm(CuTest *tc) {
CuAssertStrEquals(tc, expect, name); CuAssertStrEquals(tc, expect, name);
} }
static void test_unicode_trim_emoji(CuTest *tc) {
const char clock[] = { 0xE2, 0x8F, 0xB0, 0x00 };
char name[64];
char expect[64];
snprintf(name, sizeof(name), "%s Alarm%sClock %s", clock, clock, clock);
strcpy(expect, name);
CuAssertIntEquals(tc, 0, unicode_utf8_trim(name));
CuAssertStrEquals(tc, expect, name);
}
CuSuite *get_unicode_suite(void) CuSuite *get_unicode_suite(void)
{ {
CuSuite *suite = CuSuiteNew(); CuSuite *suite = CuSuiteNew();
SUITE_ADD_TEST(suite, test_unicode_trim); SUITE_ADD_TEST(suite, test_unicode_trim);
SUITE_ADD_TEST(suite, test_unicode_trim_zwnj); SUITE_ADD_TEST(suite, test_unicode_trim_zwnj);
SUITE_ADD_TEST(suite, test_unicode_trim_ltrm); SUITE_ADD_TEST(suite, test_unicode_trim_ltrm);
SUITE_ADD_TEST(suite, test_unicode_trim_emoji);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_other); SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs); SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs);
SUITE_ADD_TEST(suite, test_unicode_compare); SUITE_ADD_TEST(suite, test_unicode_compare);