unicode function to convert latin1->utf8

This commit is contained in:
Enno Rehling 2007-09-16 15:34:49 +00:00
parent 05a8bd5b39
commit 7e3c531436
8 changed files with 85 additions and 39 deletions

View File

@ -61,9 +61,10 @@
/* util includes */
#include <util/message.h>
#include <goodies.h>
#include <base36.h>
#include <language.h>
#include <util/goodies.h>
#include <util/base36.h>
#include <util/language.h>
#include <util/unicode.h>
/* libxml2 includes */
#include <libxml/tree.h>
@ -87,10 +88,10 @@ xml_s(const char * str)
static xmlChar buffer[1024];
const char * inbuf = str;
unsigned char * outbuf = buffer;
int inbytes = (int)strlen(str)+1;
int outbytes = (int)sizeof(buffer);
size_t inbytes = strlen(str)+1;
size_t outbytes = sizeof(buffer);
isolat1ToUTF8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes);
unicode_latin1_to_utf8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes);
return buffer;
}

View File

@ -533,28 +533,24 @@ read_borders(FILE * f)
fscanf(f, "%u %hd %hd %hd %hd", &bid, &fx, &fy, &tx, &ty);
from = findregion(fx, fy);
if (from==NULL) {
if (!incomplete_data) {
if (!incomplete_data && from==NULL) {
log_error(("border for unknown region %d,%d\n", fx, fy));
}
from = new_region(fx, fy);
}
to = findregion(tx, ty);
if (to==NULL) {
if (!incomplete_data) {
if (!incomplete_data && to==NULL) {
log_error(("border for unknown region %d,%d\n", tx, ty));
}
to = new_region(tx, ty);
}
type = find_bordertype(zText);
if (from) {
if (type==NULL) {
log_error(("[read_borders] unknown border type %s in %s\n", zText,
regionname(from, NULL)));
assert(type || !"border type not registered");
}
}
if (to==from) {
if (to==from && type && from) {
direction_t dir = (direction_t) (rng_int() % MAXDIRECTIONS);
region * r = rconnect(from, dir);
log_error(("[read_borders] invalid %s in %s\n", type->__name,
@ -567,5 +563,8 @@ read_borders(FILE * f)
assert(bid<=nextborder);
if (type->read) type->read(b, f);
a_read(f, &b->attribs);
if (!to || !from) {
erase_border(b);
}
}
}

View File

@ -67,6 +67,7 @@
#include <util/rand.h>
#include <util/rng.h>
#include <util/umlaut.h>
#include <util/unicode.h>
#include <libxml/encoding.h>
@ -176,10 +177,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
if ((size_t)(str-start+1)<size) {
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
char inbuf = (char)c;
int inbytes = 1;
int outbytes = (int)(size-(str-start));
int ret = isolat1ToUTF8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
size_t inbytes = 1;
size_t outbytes = size-(str-start);
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
if (ret>0) str+=ret;
else {
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
encoding = XML_CHAR_ENCODING_ERROR;
*str++ = (char)c;
}
} else {
*str++ = (char)c;
}
@ -190,10 +196,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
if ((size_t)(str-start+1)<size) {
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
char inbuf = (char)c;
int inbytes = 1;
int outbytes = (int)(size-(str-start));
int ret = isolat1ToUTF8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
size_t inbytes = 1;
size_t outbytes = size-(str-start);
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
if (ret>0) str+=ret;
else {
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
encoding = XML_CHAR_ENCODING_ERROR;
*str++ = (char)c;
}
} else {
*str++ = (char)c;
}

View File

@ -32,6 +32,7 @@
#include <util/log.h>
#include <util/rng.h>
#include <util/sql.h>
#include <util/unicode.h>
#include <libxml/encoding.h>
@ -243,9 +244,9 @@ read_newfactions(const char * filename)
nf->race = findrace(race, default_locale);
if (nf->race==NULL) {
char buffer[32];
int outbytes = sizeof(buffer);
int inbytes = (int)strlen(race);
isolat1ToUTF8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes);
size_t outbytes = sizeof(buffer);
size_t inbytes = strlen(race);
unicode_latin1_to_utf8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes);
nf->race = findrace(buffer, default_locale);
if (nf->race==NULL) {
log_error(("new faction has unknown race '%s'.\n", race));

View File

@ -147,10 +147,15 @@ getbuf_latin1(FILE * F)
}
} else {
char inbuf = (char)c;
int inbytes = 1;
int outbytes = (int)(MAXLINE-(cp-fbuf));
int ret = isolat1ToUTF8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes);
size_t inbytes = 1;
size_t outbytes = MAXLINE-(cp-fbuf);
int ret = unicode_latin1_to_utf8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes);
if (ret>0) cp+=ret;
else {
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
return NULL;
}
++bp;
continue;
}

View File

@ -22,8 +22,7 @@
#include <config.h>
#include "goodies.h"
/* libxml2 includes */
#include <libxml/encoding.h>
#include "unicode.h"
/* libc includes */
#include <wctype.h>
@ -81,9 +80,9 @@ locale_check(void)
int i, errorlevel = 0;
const unsigned char * umlaute = (const unsigned char*)"äöüÄÖÜß";
unsigned char result[32];
int inbytes = (int)strlen((const char *)umlaute);
int outbytes = (int)sizeof(result);
int ret = isolat1ToUTF8(result, &outbytes, umlaute, &inbytes);
size_t inbytes = strlen((const char *)umlaute);
size_t outbytes = sizeof(result);
int ret = unicode_latin1_to_utf8(result, &outbytes, umlaute, &inbytes);
if (ret<=0) {
++errorlevel;
}

View File

@ -14,6 +14,35 @@
#include <errno.h>
#include <wctype.h>
int
unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen)
{
int is = (int)*inlen;
int os = (int)*outlen;
const unsigned char * ip = in;
unsigned char * op = out;
while (ip-in<is) {
unsigned char c = *ip;
if (c > 0xBF) {
if (op-out>=os-1) break;
*op++ = 0xC3;
*op++ = c-64;
} else if (c>0x7F) {
if (op-out>=os-1) break;
*op++ = 0xC2;
*op++ = c;
} else {
if (op-out>=os) break;
*op++ = c;
}
++ip;
}
*outlen = op-out;
*inlen = ip-in;
return (int)*outlen;
}
int
unicode_utf8_strcasecmp(const char * a, const char * b)
{

View File

@ -23,6 +23,7 @@ extern "C" {
#define USE_UNICODE
extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length);
extern int unicode_utf8_strcasecmp(const char * a, const char * b);
extern int unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen);
#ifdef __cplusplus
}