fix truncated names that split a utf8 sequence.

This commit is contained in:
Enno Rehling 2016-11-20 14:50:38 +01:00
parent a08563e846
commit 55a0388eb2
2 changed files with 26 additions and 5 deletions

View file

@ -40,13 +40,20 @@ int unicode_utf8_trim(utf8_t *buf)
size_t size = 1; size_t size = 1;
wint_t wc = *ip; wint_t wc = *ip;
if (wc & 0x80) { if (wc & 0x80) {
ucs4_t ucs; ucs4_t ucs = 0;
if (ip[1]) {
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size); int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
wc = (wint_t)ucs; wc = (wint_t)ucs;
} }
else {
wc = *op = '?';
size = 1;
++result;
}
}
if (op == buf && iswspace(wc)) { if (op == buf && iswspace(wc)) {
++result; ++result;
} }

View file

@ -28,6 +28,10 @@ static void test_unicode_trim(CuTest * tc)
strcpy(buffer, " \t Hello Word"); strcpy(buffer, " \t Hello Word");
CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer)); CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Word", buffer); CuAssertStrEquals(tc, "Hello Word", buffer);
buffer[9] = 0xc3;
CuAssertIntEquals(tc, 1, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Wor?", buffer);
} }
static void test_unicode_tolower(CuTest * tc) static void test_unicode_tolower(CuTest * tc)
@ -87,11 +91,21 @@ static void test_unicode_utf8_to_other(CuTest *tc)
CuAssertIntEquals(tc, 'l', ch); CuAssertIntEquals(tc, 'l', ch);
} }
static void test_unicode_utf8_to_ucs(CuTest *tc) {
ucs4_t ucs;
size_t sz;
CuAssertIntEquals(tc, 0, unicode_utf8_to_ucs4(&ucs, "a", &sz));
CuAssertIntEquals(tc, 'a', ucs);
CuAssertIntEquals(tc, 1, sz);
}
CuSuite *get_unicode_suite(void) CuSuite *get_unicode_suite(void)
{ {
CuSuite *suite = CuSuiteNew(); CuSuite *suite = CuSuiteNew();
SUITE_ADD_TEST(suite, test_unicode_tolower); SUITE_ADD_TEST(suite, test_unicode_tolower);
SUITE_ADD_TEST(suite, test_unicode_trim); SUITE_ADD_TEST(suite, test_unicode_trim);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_other); SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs);
return suite; return suite;
} }