From 7e3c531436c9820b6d4fea70c735f53b0388dc6d Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Sun, 16 Sep 2007 15:34:49 +0000 Subject: [PATCH] unicode function to convert latin1->utf8 --- src/common/gamecode/xmlreport.c | 13 +++++++------ src/common/kernel/border.c | 31 +++++++++++++++---------------- src/common/kernel/save.c | 23 +++++++++++++++++------ src/common/modules/autoseed.c | 7 ++++--- src/common/util/filereader.c | 11 ++++++++--- src/common/util/goodies.c | 9 ++++----- src/common/util/unicode.c | 29 +++++++++++++++++++++++++++++ src/common/util/unicode.h | 1 + 8 files changed, 85 insertions(+), 39 deletions(-) diff --git a/src/common/gamecode/xmlreport.c b/src/common/gamecode/xmlreport.c index cc28b72a9..e8d925eb4 100644 --- a/src/common/gamecode/xmlreport.c +++ b/src/common/gamecode/xmlreport.c @@ -61,9 +61,10 @@ /* util includes */ #include -#include -#include -#include +#include +#include +#include +#include /* libxml2 includes */ #include @@ -87,10 +88,10 @@ xml_s(const char * str) static xmlChar buffer[1024]; const char * inbuf = str; unsigned char * outbuf = buffer; - int inbytes = (int)strlen(str)+1; - int outbytes = (int)sizeof(buffer); + size_t inbytes = strlen(str)+1; + size_t outbytes = sizeof(buffer); - isolat1ToUTF8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes); + unicode_latin1_to_utf8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes); return buffer; } diff --git a/src/common/kernel/border.c b/src/common/kernel/border.c index 28ab079f5..aa37b2778 100644 --- a/src/common/kernel/border.c +++ b/src/common/kernel/border.c @@ -533,32 +533,28 @@ read_borders(FILE * f) fscanf(f, "%u %hd %hd %hd %hd", &bid, &fx, &fy, &tx, &ty); from = findregion(fx, fy); - if (from==NULL) { - if (!incomplete_data) { - log_error(("border for unknown region %d,%d\n", fx, fy)); - } - from = new_region(fx, fy); + if (!incomplete_data && from==NULL) { + log_error(("border for unknown region %d,%d\n", fx, fy)); } to = findregion(tx, ty); - if (to==NULL) { - if (!incomplete_data) { - log_error(("border for unknown region %d,%d\n", tx, ty)); - } - to = new_region(tx, ty); + if (!incomplete_data && to==NULL) { + log_error(("border for unknown region %d,%d\n", tx, ty)); } type = find_bordertype(zText); - if (type==NULL) { - log_error(("[read_borders] unknown border type %s in %s\n", zText, - regionname(from, NULL))); - assert(type || !"border type not registered"); + if (from) { + if (type==NULL) { + log_error(("[read_borders] unknown border type %s in %s\n", zText, + regionname(from, NULL))); + assert(type || !"border type not registered"); + } } - if (to==from) { + if (to==from && type && from) { direction_t dir = (direction_t) (rng_int() % MAXDIRECTIONS); region * r = rconnect(from, dir); log_error(("[read_borders] invalid %s in %s\n", type->__name, - regionname(from, NULL))); + regionname(from, NULL))); if (r!=NULL) to = r; } b = new_border(type, from, to); @@ -567,5 +563,8 @@ read_borders(FILE * f) assert(bid<=nextborder); if (type->read) type->read(b, f); a_read(f, &b->attribs); + if (!to || !from) { + erase_border(b); + } } } diff --git a/src/common/kernel/save.c b/src/common/kernel/save.c index 1494ab640..0668cabc8 100644 --- a/src/common/kernel/save.c +++ b/src/common/kernel/save.c @@ -67,6 +67,7 @@ #include #include #include +#include #include @@ -176,10 +177,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size) if ((size_t)(str-start+1)0) str+=ret; + else { + log_error(("input data was not iso-8859-1! assuming utf-8\n")); + encoding = XML_CHAR_ENCODING_ERROR; + *str++ = (char)c; + } } else { *str++ = (char)c; } @@ -190,10 +196,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size) if ((size_t)(str-start+1)0) str+=ret; + else { + log_error(("input data was not iso-8859-1! assuming utf-8\n")); + encoding = XML_CHAR_ENCODING_ERROR; + *str++ = (char)c; + } } else { *str++ = (char)c; } diff --git a/src/common/modules/autoseed.c b/src/common/modules/autoseed.c index 0fe604eb5..f374eb7d1 100644 --- a/src/common/modules/autoseed.c +++ b/src/common/modules/autoseed.c @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -243,9 +244,9 @@ read_newfactions(const char * filename) nf->race = findrace(race, default_locale); if (nf->race==NULL) { char buffer[32]; - int outbytes = sizeof(buffer); - int inbytes = (int)strlen(race); - isolat1ToUTF8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes); + size_t outbytes = sizeof(buffer); + size_t inbytes = strlen(race); + unicode_latin1_to_utf8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes); nf->race = findrace(buffer, default_locale); if (nf->race==NULL) { log_error(("new faction has unknown race '%s'.\n", race)); diff --git a/src/common/util/filereader.c b/src/common/util/filereader.c index eeb19cb3d..9660a02a2 100644 --- a/src/common/util/filereader.c +++ b/src/common/util/filereader.c @@ -147,10 +147,15 @@ getbuf_latin1(FILE * F) } } else { char inbuf = (char)c; - int inbytes = 1; - int outbytes = (int)(MAXLINE-(cp-fbuf)); - int ret = isolat1ToUTF8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes); + size_t inbytes = 1; + size_t outbytes = MAXLINE-(cp-fbuf); + int ret = unicode_latin1_to_utf8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes); if (ret>0) cp+=ret; + else { + log_error(("input data was not iso-8859-1! assuming utf-8\n")); + return NULL; + } + ++bp; continue; } diff --git a/src/common/util/goodies.c b/src/common/util/goodies.c index 6adf9bfc9..53d305ccf 100644 --- a/src/common/util/goodies.c +++ b/src/common/util/goodies.c @@ -22,8 +22,7 @@ #include #include "goodies.h" -/* libxml2 includes */ -#include +#include "unicode.h" /* libc includes */ #include @@ -81,9 +80,9 @@ locale_check(void) int i, errorlevel = 0; const unsigned char * umlaute = (const unsigned char*)"äöüÄÖÜß"; unsigned char result[32]; - int inbytes = (int)strlen((const char *)umlaute); - int outbytes = (int)sizeof(result); - int ret = isolat1ToUTF8(result, &outbytes, umlaute, &inbytes); + size_t inbytes = strlen((const char *)umlaute); + size_t outbytes = sizeof(result); + int ret = unicode_latin1_to_utf8(result, &outbytes, umlaute, &inbytes); if (ret<=0) { ++errorlevel; } diff --git a/src/common/util/unicode.c b/src/common/util/unicode.c index 351ba8917..866f87030 100644 --- a/src/common/util/unicode.c +++ b/src/common/util/unicode.c @@ -14,6 +14,35 @@ #include #include +int +unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen) +{ + int is = (int)*inlen; + int os = (int)*outlen; + const unsigned char * ip = in; + unsigned char * op = out; + + while (ip-in 0xBF) { + if (op-out>=os-1) break; + *op++ = 0xC3; + *op++ = c-64; + } else if (c>0x7F) { + if (op-out>=os-1) break; + *op++ = 0xC2; + *op++ = c; + } else { + if (op-out>=os) break; + *op++ = c; + } + ++ip; + } + *outlen = op-out; + *inlen = ip-in; + return (int)*outlen; +} + int unicode_utf8_strcasecmp(const char * a, const char * b) { diff --git a/src/common/util/unicode.h b/src/common/util/unicode.h index aca580bcd..074327ed9 100644 --- a/src/common/util/unicode.h +++ b/src/common/util/unicode.h @@ -23,6 +23,7 @@ extern "C" { #define USE_UNICODE extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length); extern int unicode_utf8_strcasecmp(const char * a, const char * b); + extern int unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen); #ifdef __cplusplus }