ignore some code more points, but leave emoji intact.

This commit is contained in:
Enno Rehling 2019-08-08 18:23:31 +02:00
parent a0bd0378c3
commit 12117776a0
2 changed files with 23 additions and 3 deletions

View File

@ -13,6 +13,7 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#include <wctype.h>
#include <ctype.h>
@ -33,6 +34,14 @@
#define B00000011 0x03
#define B00000001 0x01
static bool char_trimmed(wint_t wc) {
if (wc >= 0x2000 && wc <= 0x200f) {
/* only weird stuff here */
return true;
}
return iswspace(wc) || iswcntrl(wc);
}
size_t unicode_utf8_trim(char *buf)
{
int result = 0, ts = 0;
@ -56,15 +65,15 @@ size_t unicode_utf8_trim(char *buf)
++result;
}
}
if (op == buf && (iswspace(wc) || !iswprint(wc))) {
if (op == buf && char_trimmed(wc)) {
result += size;
}
else if (wc>255 || !iscntrl(wc)) {
else if (wc>255 || !iswcntrl(wc)) {
if (op != ip) {
memmove(op, ip, size);
}
op += size;
if (iswspace(wc) || !iswprint(wc)) {
if (char_trimmed(wc)) {
ts += size;
}
else {

View File

@ -173,12 +173,23 @@ static void test_unicode_trim_ltrm(CuTest *tc) {
CuAssertStrEquals(tc, expect, name);
}
static void test_unicode_trim_emoji(CuTest *tc) {
const char clock[] = { 0xE2, 0x8F, 0xB0, 0x00 };
char name[64];
char expect[64];
snprintf(name, sizeof(name), "%s Alarm%sClock %s", clock, clock, clock);
strcpy(expect, name);
CuAssertIntEquals(tc, 0, unicode_utf8_trim(name));
CuAssertStrEquals(tc, expect, name);
}
CuSuite *get_unicode_suite(void)
{
CuSuite *suite = CuSuiteNew();
SUITE_ADD_TEST(suite, test_unicode_trim);
SUITE_ADD_TEST(suite, test_unicode_trim_zwnj);
SUITE_ADD_TEST(suite, test_unicode_trim_ltrm);
SUITE_ADD_TEST(suite, test_unicode_trim_emoji);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs);
SUITE_ADD_TEST(suite, test_unicode_compare);