From f68fcf40a658671507ea044b41a4eb8fbabcfb22 Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Tue, 8 Nov 2016 22:54:47 +0100 Subject: [PATCH] custom function to make "valid" names. remove any non-printable characters from a string. TODO: test with utf-8 values. TODO: remove leading/trainling whitespace. --- src/util/unicode.c | 32 ++++++++++++++++++++++++++++++++ src/util/unicode.h | 4 ++-- src/util/unicode.test.c | 12 ++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/util/unicode.c b/src/util/unicode.c index f7ea2653b..dd8e03814 100644 --- a/src/util/unicode.c +++ b/src/util/unicode.c @@ -14,6 +14,7 @@ #include #include #include +#include #define B00000000 0x00 #define B10000000 0x80 @@ -31,6 +32,37 @@ #define B00000011 0x03 #define B00000001 0x01 +int unicode_utf8_mkname(utf8_t * op, size_t outlen, const utf8_t * ip) +{ + while (*ip) { + ucs4_t ucs = *ip; + size_t size = 1; + bool isp = false; +// bool iss = false; + if (ucs & 0x80) { + int ret = unicode_utf8_to_ucs4(&ucs, ip, &size); + if (ret !=0) { + return ret; + } + isp = iswprint(ucs); +// iss = iswspace(ucs); + } else { + isp = isprint(ucs); +// iss = isspace(ucs); + } + if (size > outlen) { + return ENOMEM; + } + if (isp) { + memcpy(op, ip, size); + op += size; + outlen -= size; + } + ip += size; + } + return 0; +} + int unicode_utf8_tolower(utf8_t * op, size_t outlen, const utf8_t * ip) { while (*ip) { diff --git a/src/util/unicode.h b/src/util/unicode.h index df68ade02..c817a04e3 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -41,8 +41,8 @@ extern "C" { int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen, const char *in, size_t * inlen); - int unicode_utf8_tolower(utf8_t * out, size_t outlen, - const utf8_t * in); + int unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip); + int unicode_utf8_mkname(utf8_t *op, size_t outlen, const utf8_t *ip); #ifdef __cplusplus } diff --git a/src/util/unicode.test.c b/src/util/unicode.test.c index 410e8e68a..3cfa0615c 100644 --- a/src/util/unicode.test.c +++ b/src/util/unicode.test.c @@ -5,6 +5,17 @@ #include #include +static void test_unicode_mkname(CuTest * tc) +{ + char buffer[32]; + CuAssertIntEquals(tc, 0, unicode_utf8_mkname(buffer, sizeof(buffer), "HeLlO\nW0Rld")); + CuAssertStrEquals(tc, "HeLlOW0Rld", buffer); + memset(buffer, 0, sizeof(buffer)); + buffer[5] = 'X'; + CuAssertIntEquals(tc, ENOMEM, unicode_utf8_mkname(buffer, 5, "HeLl\n W0Rld")); + CuAssertStrEquals(tc, "HeLl X", buffer); +} + static void test_unicode_tolower(CuTest * tc) { char buffer[32]; @@ -66,6 +77,7 @@ CuSuite *get_unicode_suite(void) { CuSuite *suite = CuSuiteNew(); SUITE_ADD_TEST(suite, test_unicode_tolower); + SUITE_ADD_TEST(suite, test_unicode_mkname); SUITE_ADD_TEST(suite, test_unicode_utf8_to_other); return suite; }