forked from github/server
unicode function to convert latin1->utf8
This commit is contained in:
parent
05a8bd5b39
commit
7e3c531436
8 changed files with 85 additions and 39 deletions
|
@ -61,9 +61,10 @@
|
|||
|
||||
/* util includes */
|
||||
#include <util/message.h>
|
||||
#include <goodies.h>
|
||||
#include <base36.h>
|
||||
#include <language.h>
|
||||
#include <util/goodies.h>
|
||||
#include <util/base36.h>
|
||||
#include <util/language.h>
|
||||
#include <util/unicode.h>
|
||||
|
||||
/* libxml2 includes */
|
||||
#include <libxml/tree.h>
|
||||
|
@ -87,10 +88,10 @@ xml_s(const char * str)
|
|||
static xmlChar buffer[1024];
|
||||
const char * inbuf = str;
|
||||
unsigned char * outbuf = buffer;
|
||||
int inbytes = (int)strlen(str)+1;
|
||||
int outbytes = (int)sizeof(buffer);
|
||||
size_t inbytes = strlen(str)+1;
|
||||
size_t outbytes = sizeof(buffer);
|
||||
|
||||
isolat1ToUTF8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes);
|
||||
unicode_latin1_to_utf8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
|
|
@ -533,32 +533,28 @@ read_borders(FILE * f)
|
|||
fscanf(f, "%u %hd %hd %hd %hd", &bid, &fx, &fy, &tx, &ty);
|
||||
|
||||
from = findregion(fx, fy);
|
||||
if (from==NULL) {
|
||||
if (!incomplete_data) {
|
||||
log_error(("border for unknown region %d,%d\n", fx, fy));
|
||||
}
|
||||
from = new_region(fx, fy);
|
||||
if (!incomplete_data && from==NULL) {
|
||||
log_error(("border for unknown region %d,%d\n", fx, fy));
|
||||
}
|
||||
to = findregion(tx, ty);
|
||||
if (to==NULL) {
|
||||
if (!incomplete_data) {
|
||||
log_error(("border for unknown region %d,%d\n", tx, ty));
|
||||
}
|
||||
to = new_region(tx, ty);
|
||||
if (!incomplete_data && to==NULL) {
|
||||
log_error(("border for unknown region %d,%d\n", tx, ty));
|
||||
}
|
||||
|
||||
type = find_bordertype(zText);
|
||||
if (type==NULL) {
|
||||
log_error(("[read_borders] unknown border type %s in %s\n", zText,
|
||||
regionname(from, NULL)));
|
||||
assert(type || !"border type not registered");
|
||||
if (from) {
|
||||
if (type==NULL) {
|
||||
log_error(("[read_borders] unknown border type %s in %s\n", zText,
|
||||
regionname(from, NULL)));
|
||||
assert(type || !"border type not registered");
|
||||
}
|
||||
}
|
||||
|
||||
if (to==from) {
|
||||
if (to==from && type && from) {
|
||||
direction_t dir = (direction_t) (rng_int() % MAXDIRECTIONS);
|
||||
region * r = rconnect(from, dir);
|
||||
log_error(("[read_borders] invalid %s in %s\n", type->__name,
|
||||
regionname(from, NULL)));
|
||||
regionname(from, NULL)));
|
||||
if (r!=NULL) to = r;
|
||||
}
|
||||
b = new_border(type, from, to);
|
||||
|
@ -567,5 +563,8 @@ read_borders(FILE * f)
|
|||
assert(bid<=nextborder);
|
||||
if (type->read) type->read(b, f);
|
||||
a_read(f, &b->attribs);
|
||||
if (!to || !from) {
|
||||
erase_border(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,6 +67,7 @@
|
|||
#include <util/rand.h>
|
||||
#include <util/rng.h>
|
||||
#include <util/umlaut.h>
|
||||
#include <util/unicode.h>
|
||||
|
||||
#include <libxml/encoding.h>
|
||||
|
||||
|
@ -176,10 +177,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
|
|||
if ((size_t)(str-start+1)<size) {
|
||||
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
|
||||
char inbuf = (char)c;
|
||||
int inbytes = 1;
|
||||
int outbytes = (int)(size-(str-start));
|
||||
int ret = isolat1ToUTF8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||
size_t inbytes = 1;
|
||||
size_t outbytes = size-(str-start);
|
||||
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||
if (ret>0) str+=ret;
|
||||
else {
|
||||
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||
encoding = XML_CHAR_ENCODING_ERROR;
|
||||
*str++ = (char)c;
|
||||
}
|
||||
} else {
|
||||
*str++ = (char)c;
|
||||
}
|
||||
|
@ -190,10 +196,15 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
|
|||
if ((size_t)(str-start+1)<size) {
|
||||
if (encoding == XML_CHAR_ENCODING_8859_1 && c&0x80) {
|
||||
char inbuf = (char)c;
|
||||
int inbytes = 1;
|
||||
int outbytes = (int)(size-(str-start));
|
||||
int ret = isolat1ToUTF8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||
size_t inbytes = 1;
|
||||
size_t outbytes = size-(str-start);
|
||||
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||
if (ret>0) str+=ret;
|
||||
else {
|
||||
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||
encoding = XML_CHAR_ENCODING_ERROR;
|
||||
*str++ = (char)c;
|
||||
}
|
||||
} else {
|
||||
*str++ = (char)c;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <util/log.h>
|
||||
#include <util/rng.h>
|
||||
#include <util/sql.h>
|
||||
#include <util/unicode.h>
|
||||
|
||||
#include <libxml/encoding.h>
|
||||
|
||||
|
@ -243,9 +244,9 @@ read_newfactions(const char * filename)
|
|||
nf->race = findrace(race, default_locale);
|
||||
if (nf->race==NULL) {
|
||||
char buffer[32];
|
||||
int outbytes = sizeof(buffer);
|
||||
int inbytes = (int)strlen(race);
|
||||
isolat1ToUTF8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes);
|
||||
size_t outbytes = sizeof(buffer);
|
||||
size_t inbytes = strlen(race);
|
||||
unicode_latin1_to_utf8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes);
|
||||
nf->race = findrace(buffer, default_locale);
|
||||
if (nf->race==NULL) {
|
||||
log_error(("new faction has unknown race '%s'.\n", race));
|
||||
|
|
|
@ -147,10 +147,15 @@ getbuf_latin1(FILE * F)
|
|||
}
|
||||
} else {
|
||||
char inbuf = (char)c;
|
||||
int inbytes = 1;
|
||||
int outbytes = (int)(MAXLINE-(cp-fbuf));
|
||||
int ret = isolat1ToUTF8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||
size_t inbytes = 1;
|
||||
size_t outbytes = MAXLINE-(cp-fbuf);
|
||||
int ret = unicode_latin1_to_utf8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
||||
if (ret>0) cp+=ret;
|
||||
else {
|
||||
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
++bp;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -22,8 +22,7 @@
|
|||
#include <config.h>
|
||||
#include "goodies.h"
|
||||
|
||||
/* libxml2 includes */
|
||||
#include <libxml/encoding.h>
|
||||
#include "unicode.h"
|
||||
|
||||
/* libc includes */
|
||||
#include <wctype.h>
|
||||
|
@ -81,9 +80,9 @@ locale_check(void)
|
|||
int i, errorlevel = 0;
|
||||
const unsigned char * umlaute = (const unsigned char*)"äöüÄÖÜß";
|
||||
unsigned char result[32];
|
||||
int inbytes = (int)strlen((const char *)umlaute);
|
||||
int outbytes = (int)sizeof(result);
|
||||
int ret = isolat1ToUTF8(result, &outbytes, umlaute, &inbytes);
|
||||
size_t inbytes = strlen((const char *)umlaute);
|
||||
size_t outbytes = sizeof(result);
|
||||
int ret = unicode_latin1_to_utf8(result, &outbytes, umlaute, &inbytes);
|
||||
if (ret<=0) {
|
||||
++errorlevel;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,35 @@
|
|||
#include <errno.h>
|
||||
#include <wctype.h>
|
||||
|
||||
int
|
||||
unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen)
|
||||
{
|
||||
int is = (int)*inlen;
|
||||
int os = (int)*outlen;
|
||||
const unsigned char * ip = in;
|
||||
unsigned char * op = out;
|
||||
|
||||
while (ip-in<is) {
|
||||
unsigned char c = *ip;
|
||||
if (c > 0xBF) {
|
||||
if (op-out>=os-1) break;
|
||||
*op++ = 0xC3;
|
||||
*op++ = c-64;
|
||||
} else if (c>0x7F) {
|
||||
if (op-out>=os-1) break;
|
||||
*op++ = 0xC2;
|
||||
*op++ = c;
|
||||
} else {
|
||||
if (op-out>=os) break;
|
||||
*op++ = c;
|
||||
}
|
||||
++ip;
|
||||
}
|
||||
*outlen = op-out;
|
||||
*inlen = ip-in;
|
||||
return (int)*outlen;
|
||||
}
|
||||
|
||||
int
|
||||
unicode_utf8_strcasecmp(const char * a, const char * b)
|
||||
{
|
||||
|
|
|
@ -23,6 +23,7 @@ extern "C" {
|
|||
#define USE_UNICODE
|
||||
extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length);
|
||||
extern int unicode_utf8_strcasecmp(const char * a, const char * b);
|
||||
extern int unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue