custom function to make "valid" names.

remove any non-printable characters from a string.
TODO: test with utf-8 values.
TODO: remove leading/trainling whitespace.
This commit is contained in:
Enno Rehling 2016-11-08 22:54:47 +01:00
parent b9f6a3f620
commit f68fcf40a6
3 changed files with 46 additions and 2 deletions

View file

@ -14,6 +14,7 @@
#include <errno.h> #include <errno.h>
#include <string.h> #include <string.h>
#include <wctype.h> #include <wctype.h>
#include <ctype.h>
#define B00000000 0x00 #define B00000000 0x00
#define B10000000 0x80 #define B10000000 0x80
@ -31,6 +32,37 @@
#define B00000011 0x03 #define B00000011 0x03
#define B00000001 0x01 #define B00000001 0x01
int unicode_utf8_mkname(utf8_t * op, size_t outlen, const utf8_t * ip)
{
while (*ip) {
ucs4_t ucs = *ip;
size_t size = 1;
bool isp = false;
// bool iss = false;
if (ucs & 0x80) {
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret !=0) {
return ret;
}
isp = iswprint(ucs);
// iss = iswspace(ucs);
} else {
isp = isprint(ucs);
// iss = isspace(ucs);
}
if (size > outlen) {
return ENOMEM;
}
if (isp) {
memcpy(op, ip, size);
op += size;
outlen -= size;
}
ip += size;
}
return 0;
}
int unicode_utf8_tolower(utf8_t * op, size_t outlen, const utf8_t * ip) int unicode_utf8_tolower(utf8_t * op, size_t outlen, const utf8_t * ip)
{ {
while (*ip) { while (*ip) {

View file

@ -41,8 +41,8 @@ extern "C" {
int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b);
int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen, int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen,
const char *in, size_t * inlen); const char *in, size_t * inlen);
int unicode_utf8_tolower(utf8_t * out, size_t outlen, int unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip);
const utf8_t * in); int unicode_utf8_mkname(utf8_t *op, size_t outlen, const utf8_t *ip);
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -5,6 +5,17 @@
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
static void test_unicode_mkname(CuTest * tc)
{
char buffer[32];
CuAssertIntEquals(tc, 0, unicode_utf8_mkname(buffer, sizeof(buffer), "HeLlO\nW0Rld"));
CuAssertStrEquals(tc, "HeLlOW0Rld", buffer);
memset(buffer, 0, sizeof(buffer));
buffer[5] = 'X';
CuAssertIntEquals(tc, ENOMEM, unicode_utf8_mkname(buffer, 5, "HeLl\n W0Rld"));
CuAssertStrEquals(tc, "HeLl X", buffer);
}
static void test_unicode_tolower(CuTest * tc) static void test_unicode_tolower(CuTest * tc)
{ {
char buffer[32]; char buffer[32];
@ -66,6 +77,7 @@ CuSuite *get_unicode_suite(void)
{ {
CuSuite *suite = CuSuiteNew(); CuSuite *suite = CuSuiteNew();
SUITE_ADD_TEST(suite, test_unicode_tolower); SUITE_ADD_TEST(suite, test_unicode_tolower);
SUITE_ADD_TEST(suite, test_unicode_mkname);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_other); SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
return suite; return suite;
} }