forked from github/server
unicode function to convert latin1->utf8
This commit is contained in:
parent
05a8bd5b39
commit
7e3c531436
8 changed files with 85 additions and 39 deletions
|
@ -61,9 +61,10 @@
|
||||||
|
|
||||||
/* util includes */
|
/* util includes */
|
||||||
#include <util/message.h>
|
#include <util/message.h>
|
||||||
#include <goodies.h>
|
#include <util/goodies.h>
|
||||||
#include <base36.h>
|
#include <util/base36.h>
|
||||||
#include <language.h>
|
#include <util/language.h>
|
||||||
|
#include <util/unicode.h>
|
||||||
|
|
||||||
/* libxml2 includes */
|
/* libxml2 includes */
|
||||||
#include <libxml/tree.h>
|
#include <libxml/tree.h>
|
||||||
|
@ -87,10 +88,10 @@ xml_s(const char * str)
|
||||||
static xmlChar buffer[1024];
|
static xmlChar buffer[1024];
|
||||||
const char * inbuf = str;
|
const char * inbuf = str;
|
||||||
unsigned char * outbuf = buffer;
|
unsigned char * outbuf = buffer;
|
||||||
int inbytes = (int)strlen(str)+1;
|
size_t inbytes = strlen(str)+1;
|
||||||
int outbytes = (int)sizeof(buffer);
|
size_t outbytes = sizeof(buffer);
|
||||||
|
|
||||||
isolat1ToUTF8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes);
|
unicode_latin1_to_utf8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes);
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -533,32 +533,28 @@ read_borders(FILE * f)
|
||||||
fscanf(f, "%u %hd %hd %hd %hd", &bid, &fx, &fy, &tx, &ty);
|
fscanf(f, "%u %hd %hd %hd %hd", &bid, &fx, &fy, &tx, &ty);
|
||||||
|
|
||||||
from = findregion(fx, fy);
|
from = findregion(fx, fy);
|
||||||
if (from==NULL) {
|
if (!incomplete_data && from==NULL) {
|
||||||
if (!incomplete_data) {
|
log_error(("border for unknown region %d,%d\n", fx, fy));
|
||||||
log_error(("border for unknown region %d,%d\n", fx, fy));
|
|
||||||
}
|
|
||||||
from = new_region(fx, fy);
|
|
||||||
}
|
}
|
||||||
to = findregion(tx, ty);
|
to = findregion(tx, ty);
|
||||||
if (to==NULL) {
|
if (!incomplete_data && to==NULL) {
|
||||||
if (!incomplete_data) {
|
log_error(("border for unknown region %d,%d\n", tx, ty));
|
||||||
log_error(("border for unknown region %d,%d\n", tx, ty));
|
|
||||||
}
|
|
||||||
to = new_region(tx, ty);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type = find_bordertype(zText);
|
type = find_bordertype(zText);
|
||||||
if (type==NULL) {
|
if (from) {
|
||||||
log_error(("[read_borders] unknown border type %s in %s\n", zText,
|
if (type==NULL) {
|
||||||
regionname(from, NULL)));
|
log_error(("[read_borders] unknown border type %s in %s\n", zText,
|
||||||
assert(type || !"border type not registered");
|
regionname(from, NULL)));
|
||||||
|
assert(type || !"border type not registered");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (to==from) {
|
if (to==from && type && from) {
|
||||||
direction_t dir = (direction_t) (rng_int() % MAXDIRECTIONS);
|
direction_t dir = (direction_t) (rng_int() % MAXDIRECTIONS);
|
||||||
region * r = rconnect(from, dir);
|
region * r = rconnect(from, dir);
|
||||||
log_error(("[read_borders] invalid %s in %s\n", type->__name,
|
log_error(("[read_borders] invalid %s in %s\n", type->__name,
|
||||||
regionname(from, NULL)));
|
regionname(from, NULL)));
|
||||||
if (r!=NULL) to = r;
|
if (r!=NULL) to = r;
|
||||||
}
|
}
|
||||||
b = new_border(type, from, to);
|
b = new_border(type, from, to);
|
||||||
|
@ -567,5 +563,8 @@ read_borders(FILE * f)
|
||||||
assert(bid<=nextborder);
|
assert(bid<=nextborder);
|
||||||
if (type->read) type->read(b, f);
|
if (type->read) type->read(b, f);
|
||||||
a_read(f, &b->attribs);
|
a_read(f, &b->attribs);
|
||||||
|
if (!to || !from) {
|
||||||
|
erase_border(b);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,6 +67,7 @@
|
||||||
#include <util/rand.h>
|
#include <util/rand.h>
|
||||||
#include <util/rng.h>
|
#include <util/rng.h>
|
||||||
#include <util/umlaut.h>
|
#include <util/umlaut.h>
|
||||||
|
#include <util/unicode.h>
|
||||||
|
|
||||||
#include <libxml/encoding.h>
|
#include <libxml/encoding.h>
|
||||||
|
|
||||||
|
@ -176,10 +177,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
|
||||||
if ((size_t)(str-start+1)<size) {
|
if ((size_t)(str-start+1)<size) {
|
||||||
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
|
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
|
||||||
char inbuf = (char)c;
|
char inbuf = (char)c;
|
||||||
int inbytes = 1;
|
size_t inbytes = 1;
|
||||||
int outbytes = (int)(size-(str-start));
|
size_t outbytes = size-(str-start);
|
||||||
int ret = isolat1ToUTF8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||||
if (ret>0) str+=ret;
|
if (ret>0) str+=ret;
|
||||||
|
else {
|
||||||
|
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||||
|
encoding = XML_CHAR_ENCODING_ERROR;
|
||||||
|
*str++ = (char)c;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
*str++ = (char)c;
|
*str++ = (char)c;
|
||||||
}
|
}
|
||||||
|
@ -190,10 +196,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
|
||||||
if ((size_t)(str-start+1)<size) {
|
if ((size_t)(str-start+1)<size) {
|
||||||
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
|
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
|
||||||
char inbuf = (char)c;
|
char inbuf = (char)c;
|
||||||
int inbytes = 1;
|
size_t inbytes = 1;
|
||||||
int outbytes = (int)(size-(str-start));
|
size_t outbytes = size-(str-start);
|
||||||
int ret = isolat1ToUTF8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||||
if (ret>0) str+=ret;
|
if (ret>0) str+=ret;
|
||||||
|
else {
|
||||||
|
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||||
|
encoding = XML_CHAR_ENCODING_ERROR;
|
||||||
|
*str++ = (char)c;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
*str++ = (char)c;
|
*str++ = (char)c;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
#include <util/log.h>
|
#include <util/log.h>
|
||||||
#include <util/rng.h>
|
#include <util/rng.h>
|
||||||
#include <util/sql.h>
|
#include <util/sql.h>
|
||||||
|
#include <util/unicode.h>
|
||||||
|
|
||||||
#include <libxml/encoding.h>
|
#include <libxml/encoding.h>
|
||||||
|
|
||||||
|
@ -243,9 +244,9 @@ read_newfactions(const char * filename)
|
||||||
nf->race = findrace(race, default_locale);
|
nf->race = findrace(race, default_locale);
|
||||||
if (nf->race==NULL) {
|
if (nf->race==NULL) {
|
||||||
char buffer[32];
|
char buffer[32];
|
||||||
int outbytes = sizeof(buffer);
|
size_t outbytes = sizeof(buffer);
|
||||||
int inbytes = (int)strlen(race);
|
size_t inbytes = strlen(race);
|
||||||
isolat1ToUTF8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes);
|
unicode_latin1_to_utf8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes);
|
||||||
nf->race = findrace(buffer, default_locale);
|
nf->race = findrace(buffer, default_locale);
|
||||||
if (nf->race==NULL) {
|
if (nf->race==NULL) {
|
||||||
log_error(("new faction has unknown race '%s'.\n", race));
|
log_error(("new faction has unknown race '%s'.\n", race));
|
||||||
|
|
|
@ -147,10 +147,15 @@ getbuf_latin1(FILE * F)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
char inbuf = (char)c;
|
char inbuf = (char)c;
|
||||||
int inbytes = 1;
|
size_t inbytes = 1;
|
||||||
int outbytes = (int)(MAXLINE-(cp-fbuf));
|
size_t outbytes = MAXLINE-(cp-fbuf);
|
||||||
int ret = isolat1ToUTF8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
int ret = unicode_latin1_to_utf8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||||
if (ret>0) cp+=ret;
|
if (ret>0) cp+=ret;
|
||||||
|
else {
|
||||||
|
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
++bp;
|
++bp;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,8 +22,7 @@
|
||||||
#include <config.h>
|
#include <config.h>
|
||||||
#include "goodies.h"
|
#include "goodies.h"
|
||||||
|
|
||||||
/* libxml2 includes */
|
#include "unicode.h"
|
||||||
#include <libxml/encoding.h>
|
|
||||||
|
|
||||||
/* libc includes */
|
/* libc includes */
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
|
@ -81,9 +80,9 @@ locale_check(void)
|
||||||
int i, errorlevel = 0;
|
int i, errorlevel = 0;
|
||||||
const unsigned char * umlaute = (const unsigned char*)"äöüÄÖÜß";
|
const unsigned char * umlaute = (const unsigned char*)"äöüÄÖÜß";
|
||||||
unsigned char result[32];
|
unsigned char result[32];
|
||||||
int inbytes = (int)strlen((const char *)umlaute);
|
size_t inbytes = strlen((const char *)umlaute);
|
||||||
int outbytes = (int)sizeof(result);
|
size_t outbytes = sizeof(result);
|
||||||
int ret = isolat1ToUTF8(result, &outbytes, umlaute, &inbytes);
|
int ret = unicode_latin1_to_utf8(result, &outbytes, umlaute, &inbytes);
|
||||||
if (ret<=0) {
|
if (ret<=0) {
|
||||||
++errorlevel;
|
++errorlevel;
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,35 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen)
|
||||||
|
{
|
||||||
|
int is = (int)*inlen;
|
||||||
|
int os = (int)*outlen;
|
||||||
|
const unsigned char * ip = in;
|
||||||
|
unsigned char * op = out;
|
||||||
|
|
||||||
|
while (ip-in<is) {
|
||||||
|
unsigned char c = *ip;
|
||||||
|
if (c > 0xBF) {
|
||||||
|
if (op-out>=os-1) break;
|
||||||
|
*op++ = 0xC3;
|
||||||
|
*op++ = c-64;
|
||||||
|
} else if (c>0x7F) {
|
||||||
|
if (op-out>=os-1) break;
|
||||||
|
*op++ = 0xC2;
|
||||||
|
*op++ = c;
|
||||||
|
} else {
|
||||||
|
if (op-out>=os) break;
|
||||||
|
*op++ = c;
|
||||||
|
}
|
||||||
|
++ip;
|
||||||
|
}
|
||||||
|
*outlen = op-out;
|
||||||
|
*inlen = ip-in;
|
||||||
|
return (int)*outlen;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
unicode_utf8_strcasecmp(const char * a, const char * b)
|
unicode_utf8_strcasecmp(const char * a, const char * b)
|
||||||
{
|
{
|
||||||
|
|
|
@ -23,6 +23,7 @@ extern "C" {
|
||||||
#define USE_UNICODE
|
#define USE_UNICODE
|
||||||
extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length);
|
extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length);
|
||||||
extern int unicode_utf8_strcasecmp(const char * a, const char * b);
|
extern int unicode_utf8_strcasecmp(const char * a, const char * b);
|
||||||
|
extern int unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue