fix bad names.

trim leading whitespace and non-printable characters from existing
unit, region and faction names/descriptions.
This commit is contained in:
Enno Rehling 2016-11-11 00:30:49 +01:00
parent 7b412399b7
commit 97e91fd8eb
4 changed files with 88 additions and 17 deletions

View File

@ -69,6 +69,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <util/resolve.h> #include <util/resolve.h>
#include <util/rng.h> #include <util/rng.h>
#include <util/umlaut.h> #include <util/umlaut.h>
#include <util/unicode.h>
#include <quicklist.h> #include <quicklist.h>
#include <stream.h> #include <stream.h>
@ -747,12 +748,18 @@ unit *read_unit(struct gamedata *data)
} }
READ_STR(data->store, obuf, sizeof(obuf)); READ_STR(data->store, obuf, sizeof(obuf));
if (unicode_utf8_trim(obuf)!=0) {
log_error("trim unit %s name to '%s'", itoa36(u->no), obuf);
};
u->_name = obuf[0] ? _strdup(obuf) : 0; u->_name = obuf[0] ? _strdup(obuf) : 0;
if (lomem) { if (lomem) {
READ_STR(data->store, NULL, 0); READ_STR(data->store, NULL, 0);
} }
else { else {
READ_STR(data->store, obuf, sizeof(obuf)); READ_STR(data->store, obuf, sizeof(obuf));
if (unicode_utf8_trim(obuf)!=0) {
log_error("trim unit %s info to '%s'", itoa36(u->no), obuf);
};
u->display = obuf[0] ? _strdup(obuf) : 0; u->display = obuf[0] ? _strdup(obuf) : 0;
} }
READ_INT(data->store, &number); READ_INT(data->store, &number);
@ -986,6 +993,9 @@ static region *readregion(struct gamedata *data, int x, int y)
else { else {
char info[DISPLAYSIZE]; char info[DISPLAYSIZE];
READ_STR(data->store, info, sizeof(info)); READ_STR(data->store, info, sizeof(info));
if (unicode_utf8_trim(info)!=0) {
log_error("trim region %d info to '%s'", uid, info);
};
region_setinfo(r, info); region_setinfo(r, info);
} }
@ -1003,6 +1013,9 @@ static region *readregion(struct gamedata *data, int x, int y)
if (fval(r->terrain, LAND_REGION)) { if (fval(r->terrain, LAND_REGION)) {
r->land = calloc(1, sizeof(land_region)); r->land = calloc(1, sizeof(land_region));
READ_STR(data->store, name, sizeof(name)); READ_STR(data->store, name, sizeof(name));
if (unicode_utf8_trim(name)!=0) {
log_error("trim region %d name to '%s'", uid, name);
};
r->land->name = _strdup(name); r->land->name = _strdup(name);
} }
if (r->land) { if (r->land) {
@ -1386,8 +1399,14 @@ faction *readfaction(struct gamedata * data)
} }
READ_STR(data->store, name, sizeof(name)); READ_STR(data->store, name, sizeof(name));
if (unicode_utf8_trim(name)!=0) {
log_error("trim faction %s name to '%s'", itoa36(f->no), name);
};
f->name = _strdup(name); f->name = _strdup(name);
READ_STR(data->store, name, sizeof(name)); READ_STR(data->store, name, sizeof(name));
if (unicode_utf8_trim(name)!=0) {
log_error("trim faction %s banner to '%s'", itoa36(f->no), name);
};
f->banner = _strdup(name); f->banner = _strdup(name);
log_debug(" - Lese Partei %s (%s)", f->name, factionid(f)); log_debug(" - Lese Partei %s (%s)", f->name, factionid(f));
@ -1686,7 +1705,6 @@ int read_game(gamedata *data) {
bp = &r->buildings; bp = &r->buildings;
while (--p >= 0) { while (--p >= 0) {
b = (building *)calloc(1, sizeof(building)); b = (building *)calloc(1, sizeof(building));
READ_INT(store, &b->no); READ_INT(store, &b->no);
*bp = b; *bp = b;

View File

@ -32,6 +32,36 @@
#define B00000011 0x03 #define B00000011 0x03
#define B00000001 0x01 #define B00000001 0x01
int unicode_utf8_trim(utf8_t *buf)
{
int result = 0;
utf8_t *op = buf, *ip = buf;
while (*ip) {
ucs4_t ucs = *ip;
size_t size = 1;
if (ucs & 0x80) {
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret != 0) {
return ret;
}
}
if (op == buf && iswspace(ucs)) {
++result;
}
else if (iswprint(ucs)) {
if (op != ip) {
memcpy(op, ip, size);
}
op += size;
} else {
++result;
}
ip += size;
}
*op = '\0';
return result;
}
int unicode_utf8_mkname(utf8_t * op, size_t outlen, const utf8_t * ip) int unicode_utf8_mkname(utf8_t * op, size_t outlen, const utf8_t * ip)
{ {
int ret = 0; int ret = 0;
@ -40,22 +70,22 @@ int unicode_utf8_mkname(utf8_t * op, size_t outlen, const utf8_t * ip)
size_t size = 1; size_t size = 1;
bool isp = false; bool isp = false;
do { do {
ucs4_t ucs = *ip; ucs4_t ucs = *ip;
if (ucs & 0x80) { if (ucs & 0x80) {
ret = unicode_utf8_to_ucs4(&ucs, ip, &size); ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret !=0) { if (ret !=0) {
return ret; return ret;
} }
isp = iswprint(ucs); isp = iswprint(ucs);
iss &= !!iswspace(ucs); iss &= !!iswspace(ucs);
} else { } else {
isp = isprint(ucs); isp = isprint(ucs);
iss &= !!isspace(ucs); iss &= !!isspace(ucs);
} }
if (iss) { if (iss) {
ip += size; ip += size;
} }
} while (iss); } while (iss);
if (size > outlen) { if (size > outlen) {
return ENOMEM; return ENOMEM;
} }

View File

@ -43,6 +43,7 @@ extern "C" {
const char *in, size_t * inlen); const char *in, size_t * inlen);
int unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip); int unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip);
int unicode_utf8_mkname(utf8_t *op, size_t outlen, const utf8_t *ip); int unicode_utf8_mkname(utf8_t *op, size_t outlen, const utf8_t *ip);
int unicode_utf8_trim(utf8_t *ip);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -5,6 +5,27 @@
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
static void test_unicode_trim(CuTest * tc)
{
char buffer[32];
strcpy(buffer, "Hello Word");
CuAssertIntEquals(tc, 0, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Word", buffer);
strcpy(buffer, "Hello Word\n");
CuAssertIntEquals(tc, 1, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Word", buffer);
strcpy(buffer, " Hello Word\t\n");
CuAssertIntEquals(tc, 4, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Word", buffer);
strcpy(buffer, " \t Hello Word");
CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer));
CuAssertStrEquals(tc, "Hello Word", buffer);
}
static void test_unicode_mkname(CuTest * tc) static void test_unicode_mkname(CuTest * tc)
{ {
char buffer[32]; char buffer[32];
@ -82,6 +103,7 @@ CuSuite *get_unicode_suite(void)
CuSuite *suite = CuSuiteNew(); CuSuite *suite = CuSuiteNew();
SUITE_ADD_TEST(suite, test_unicode_tolower); SUITE_ADD_TEST(suite, test_unicode_tolower);
SUITE_ADD_TEST(suite, test_unicode_mkname); SUITE_ADD_TEST(suite, test_unicode_mkname);
SUITE_ADD_TEST(suite, test_unicode_trim);
SUITE_ADD_TEST(suite, test_unicode_utf8_to_other); SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
return suite; return suite;
} }