forked from github/server
- added a testsuite
- testcase skill-parser - findskill uses patricia
This commit is contained in:
parent
a2abcfe177
commit
1dd05538ad
21 changed files with 634 additions and 91 deletions
|
@ -3775,7 +3775,7 @@ static void reset_rng(void) {
|
||||||
|
|
||||||
static void reset_rng_region(region * r)
|
static void reset_rng_region(region * r)
|
||||||
{
|
{
|
||||||
rng_init(r->index);
|
rng_init(r->index+turn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** warn about passwords that are not US ASCII.
|
/** warn about passwords that are not US ASCII.
|
||||||
|
|
|
@ -87,11 +87,11 @@ xml_s(const char * str)
|
||||||
{
|
{
|
||||||
static xmlChar buffer[1024];
|
static xmlChar buffer[1024];
|
||||||
const char * inbuf = str;
|
const char * inbuf = str;
|
||||||
unsigned char * outbuf = buffer;
|
char * outbuf = (char *)buffer;
|
||||||
size_t inbytes = strlen(str)+1;
|
size_t inbytes = strlen(str)+1;
|
||||||
size_t outbytes = sizeof(buffer) - 1;
|
size_t outbytes = sizeof(buffer) - 1;
|
||||||
|
|
||||||
unicode_latin1_to_utf8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes);
|
unicode_latin1_to_utf8(outbuf, &outbytes, inbuf, &inbytes);
|
||||||
buffer[outbytes] = 0;
|
buffer[outbytes] = 0;
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,6 +68,7 @@
|
||||||
#include <util/umlaut.h>
|
#include <util/umlaut.h>
|
||||||
#include <util/xml.h>
|
#include <util/xml.h>
|
||||||
#include <util/bsdstring.h>
|
#include <util/bsdstring.h>
|
||||||
|
#include <util/unicode.h>
|
||||||
|
|
||||||
/* libxml includes */
|
/* libxml includes */
|
||||||
#include <libxml/tree.h>
|
#include <libxml/tree.h>
|
||||||
|
@ -83,6 +84,11 @@
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
|
#define PTRIES 1
|
||||||
|
#if PTRIES
|
||||||
|
#include <util/patricia.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
/* exported variables */
|
/* exported variables */
|
||||||
region *regions;
|
region *regions;
|
||||||
faction *factions;
|
faction *factions;
|
||||||
|
@ -1332,14 +1338,126 @@ findoption(const char *s, const struct locale * lang)
|
||||||
return NODIRECTION;
|
return NODIRECTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if PTRIES
|
||||||
|
static struct trie_node * ptries[UT_MAX][4];
|
||||||
|
|
||||||
|
static struct trie_node **
|
||||||
|
get_ptrie(const struct locale * lang, int type)
|
||||||
|
{
|
||||||
|
int index = (strcmp(locale_name(lang), "de")==0);
|
||||||
|
return &(ptries[type][index]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
umlaut_substitution(const char * ip, char * op, size_t outlen)
|
||||||
|
{
|
||||||
|
#define UMAX 7
|
||||||
|
static struct replace {
|
||||||
|
ucs4_t ucs;
|
||||||
|
const char str[3];
|
||||||
|
} replace[UMAX] = {
|
||||||
|
/* match lower-case (!) umlauts and others to transcriptions */
|
||||||
|
{ 223, "ss"}, /* szlig */
|
||||||
|
{ 228, "ae"}, /* auml */
|
||||||
|
{ 229, "aa"}, /* norsk */
|
||||||
|
{ 230, "ae"}, /* norsk */
|
||||||
|
{ 246, "oe"}, /* ouml */
|
||||||
|
{ 248, "oe"}, /* norsk */
|
||||||
|
{ 252, "ue"}, /* uuml */
|
||||||
|
};
|
||||||
|
int subs = 0;
|
||||||
|
while (*ip) {
|
||||||
|
ucs4_t ucs = *ip;
|
||||||
|
size_t size = 1;
|
||||||
|
size_t cpsize = 1;
|
||||||
|
|
||||||
|
if (ucs & 0x80) {
|
||||||
|
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
|
||||||
|
if (ret!=0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
cpsize = size;
|
||||||
|
if (ucs >= replace[0].ucs && ucs <= replace[UMAX-1].ucs) {
|
||||||
|
int i;
|
||||||
|
for (i=0;i!=UMAX;++i) {
|
||||||
|
if (replace[i].ucs==ucs) {
|
||||||
|
cpsize = 0;
|
||||||
|
memcpy(op, replace[i].str, 2);
|
||||||
|
op+=2;
|
||||||
|
++subs;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cpsize) {
|
||||||
|
if (cpsize>outlen) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
memcpy(op, ip, cpsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
ip += size;
|
||||||
|
op += cpsize;
|
||||||
|
outlen -= cpsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outlen<=0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
*op = 0;
|
||||||
|
return subs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
ptrie_find(struct trie_node *ptrie, const char * key, void * data, size_t size)
|
||||||
|
{
|
||||||
|
trie_node * node = trie_find_prefix(ptrie, key);
|
||||||
|
if (node) {
|
||||||
|
void * result = trie_getdata(node);
|
||||||
|
memcpy(data, result, size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
ptrie_insert(struct trie_node **ptrie, const char * name, void * data, size_t size)
|
||||||
|
{
|
||||||
|
char converted[256];
|
||||||
|
char simple[256];
|
||||||
|
int ret = unicode_utf8_tolower(converted, 256, name);
|
||||||
|
if (ret==0) {
|
||||||
|
int subs = umlaut_substitution(converted, simple, sizeof(simple));
|
||||||
|
if (subs>0) {
|
||||||
|
trie_insert(ptrie, simple, data, size);
|
||||||
|
}
|
||||||
|
trie_insert(ptrie, converted, data, size);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
skill_t
|
skill_t
|
||||||
findskill(const char *s, const struct locale * lang)
|
findskill(const char *s, const struct locale * lang)
|
||||||
{
|
{
|
||||||
|
#if PTRIES
|
||||||
|
char lowercase[256];
|
||||||
|
int res = unicode_utf8_tolower(lowercase, sizeof(lowercase), s);
|
||||||
|
if (res==0) {
|
||||||
|
trie_node ** ptrie = get_ptrie(lang, UT_SKILLS);
|
||||||
|
skill_t sk;
|
||||||
|
int result = ptrie_find(*ptrie, lowercase, &sk, sizeof(sk));
|
||||||
|
if (result==0) return sk;
|
||||||
|
}
|
||||||
|
return NOSKILL;
|
||||||
|
#else
|
||||||
struct tnode * tokens = get_translations(lang, UT_SKILLS);
|
struct tnode * tokens = get_translations(lang, UT_SKILLS);
|
||||||
variant token;
|
variant token;
|
||||||
|
|
||||||
if (findtoken(tokens, s, &token)==E_TOK_NOMATCH) return NOSKILL;
|
if (findtoken(tokens, s, &token)==E_TOK_NOMATCH) return NOSKILL;
|
||||||
return (skill_t)token.i;
|
return (skill_t)token.i;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
keyword_t
|
keyword_t
|
||||||
|
@ -1954,6 +2072,9 @@ init_locale(const struct locale * lang)
|
||||||
const struct race * rc;
|
const struct race * rc;
|
||||||
struct tnode * tokens;
|
struct tnode * tokens;
|
||||||
const terrain_type * terrain;
|
const terrain_type * terrain;
|
||||||
|
#if PTRIES
|
||||||
|
trie_node ** ptrie;
|
||||||
|
#endif
|
||||||
|
|
||||||
tokens = get_translations(lang, UT_MAGIC);
|
tokens = get_translations(lang, UT_MAGIC);
|
||||||
for (i=0;i!=MAXMAGIETYP;++i) {
|
for (i=0;i!=MAXMAGIETYP;++i) {
|
||||||
|
@ -1976,7 +2097,18 @@ init_locale(const struct locale * lang)
|
||||||
var.i = i;
|
var.i = i;
|
||||||
addtoken(tokens, LOC(lang, parameters[i]), var);
|
addtoken(tokens, LOC(lang, parameters[i]), var);
|
||||||
}
|
}
|
||||||
|
#if PTRIES
|
||||||
|
ptrie = get_ptrie(lang, UT_SKILLS);
|
||||||
|
for (i=0;i!=MAXSKILLS;++i) {
|
||||||
|
if (i!=SK_TRADE || !TradeDisabled()) {
|
||||||
|
skill_t sk = (skill_t)i;
|
||||||
|
const char * skname = skillname(sk, lang);
|
||||||
|
if (skname!=NULL) {
|
||||||
|
ptrie_insert(ptrie, skname, &sk, sizeof(sk));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
tokens = get_translations(lang, UT_SKILLS);
|
tokens = get_translations(lang, UT_SKILLS);
|
||||||
for (i=0;i!=MAXSKILLS;++i) {
|
for (i=0;i!=MAXSKILLS;++i) {
|
||||||
if (i!=SK_TRADE || !TradeDisabled()) {
|
if (i!=SK_TRADE || !TradeDisabled()) {
|
||||||
|
@ -1987,6 +2119,7 @@ init_locale(const struct locale * lang)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
tokens = get_translations(lang, UT_KEYWORDS);
|
tokens = get_translations(lang, UT_KEYWORDS);
|
||||||
for (i=0;i!=MAXKEYWORDS;++i) {
|
for (i=0;i!=MAXKEYWORDS;++i) {
|
||||||
|
|
|
@ -384,7 +384,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
|
||||||
char * bufp;
|
char * bufp;
|
||||||
unsigned int c = 0;
|
unsigned int c = 0;
|
||||||
size_t bpt, i;
|
size_t bpt, i;
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
size_t size;
|
size_t size;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
|
@ -401,7 +401,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
|
||||||
assert(result==0 || "damnit, we're not handling invalid input here!");
|
assert(result==0 || "damnit, we're not handling invalid input here!");
|
||||||
|
|
||||||
/* Leerzeichen überspringen */
|
/* Leerzeichen überspringen */
|
||||||
while (*p != 0 && !iswalnum(ucs)) {
|
while (*p != 0 && !iswalnum((wint_t)ucs)) {
|
||||||
p += size;
|
p += size;
|
||||||
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
||||||
assert(result==0 || "damnit, we're not handling invalid input here!");
|
assert(result==0 || "damnit, we're not handling invalid input here!");
|
||||||
|
@ -411,7 +411,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
|
||||||
if (*p != 0) ++c;
|
if (*p != 0) ++c;
|
||||||
|
|
||||||
/* alnums überspringen */
|
/* alnums überspringen */
|
||||||
while (*p != 0 && iswalnum(ucs)) {
|
while (*p != 0 && iswalnum((wint_t)ucs)) {
|
||||||
p+=size;
|
p+=size;
|
||||||
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
||||||
assert(result==0 || "damnit, we're not handling invalid input here!");
|
assert(result==0 || "damnit, we're not handling invalid input here!");
|
||||||
|
@ -434,7 +434,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
|
||||||
while (*p != 0 && c < maxchars) {
|
while (*p != 0 && c < maxchars) {
|
||||||
/* Leerzeichen überspringen */
|
/* Leerzeichen überspringen */
|
||||||
|
|
||||||
while (*p != 0 && !iswalnum(ucs)) {
|
while (*p != 0 && !iswalnum((wint_t)ucs)) {
|
||||||
p+=size;
|
p+=size;
|
||||||
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
||||||
assert(result==0 || "damnit, we're not handling invalid input here!");
|
assert(result==0 || "damnit, we're not handling invalid input here!");
|
||||||
|
@ -442,7 +442,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
|
||||||
|
|
||||||
/* alnums übertragen */
|
/* alnums übertragen */
|
||||||
|
|
||||||
for (i = 0; i < bpt && *p != 0 && iswalnum(ucs); ++i) {
|
for (i = 0; i < bpt && *p != 0 && iswalnum((wint_t)ucs); ++i) {
|
||||||
memcpy(bufp, p, size);
|
memcpy(bufp, p, size);
|
||||||
p += size;
|
p += size;
|
||||||
bufp += size;
|
bufp += size;
|
||||||
|
@ -454,7 +454,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
|
||||||
|
|
||||||
/* Bis zum nächsten Leerzeichen */
|
/* Bis zum nächsten Leerzeichen */
|
||||||
|
|
||||||
while (c < maxchars && *p != 0 && iswalnum(ucs)) {
|
while (c < maxchars && *p != 0 && iswalnum((wint_t)ucs)) {
|
||||||
p+=size;
|
p+=size;
|
||||||
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
result = unicode_utf8_to_ucs4(&ucs, p, &size);
|
||||||
assert(result==0 || "damnit, we're not handling invalid input here!");
|
assert(result==0 || "damnit, we're not handling invalid input here!");
|
||||||
|
|
|
@ -178,7 +178,7 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
|
||||||
char inbuf = (char)c;
|
char inbuf = (char)c;
|
||||||
size_t inbytes = 1;
|
size_t inbytes = 1;
|
||||||
size_t outbytes = size-(str-start);
|
size_t outbytes = size-(str-start);
|
||||||
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
int ret = unicode_latin1_to_utf8(str, &outbytes, &inbuf, &inbytes);
|
||||||
if (ret>0) str+=ret;
|
if (ret>0) str+=ret;
|
||||||
else {
|
else {
|
||||||
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||||
|
@ -197,7 +197,7 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
|
||||||
char inbuf = (char)c;
|
char inbuf = (char)c;
|
||||||
size_t inbytes = 1;
|
size_t inbytes = 1;
|
||||||
size_t outbytes = size-(str-start);
|
size_t outbytes = size-(str-start);
|
||||||
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
int ret = unicode_latin1_to_utf8(str, &outbytes, &inbuf, &inbytes);
|
||||||
if (ret>0) str+=ret;
|
if (ret>0) str+=ret;
|
||||||
else {
|
else {
|
||||||
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||||
|
|
|
@ -246,7 +246,7 @@ read_newfactions(const char * filename)
|
||||||
char buffer[32];
|
char buffer[32];
|
||||||
size_t outbytes = sizeof(buffer) - 1;
|
size_t outbytes = sizeof(buffer) - 1;
|
||||||
size_t inbytes = strlen(race);
|
size_t inbytes = strlen(race);
|
||||||
unicode_latin1_to_utf8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes);
|
unicode_latin1_to_utf8(buffer, &outbytes, race, &inbytes);
|
||||||
buffer[outbytes] = 0;
|
buffer[outbytes] = 0;
|
||||||
nf->race = findrace(buffer, default_locale);
|
nf->race = findrace(buffer, default_locale);
|
||||||
if (nf->race==NULL) {
|
if (nf->race==NULL) {
|
||||||
|
|
|
@ -29,11 +29,11 @@ eatwhite(const char * ptr, size_t * total_size)
|
||||||
*total_size = 0;
|
*total_size = 0;
|
||||||
|
|
||||||
while (*ptr) {
|
while (*ptr) {
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
ret = unicode_utf8_to_ucs4(&ucs, ptr, &size);
|
ret = unicode_utf8_to_ucs4(&ucs, ptr, &size);
|
||||||
if (ret!=0) break;
|
if (ret!=0) break;
|
||||||
if (!iswspace(ucs)) break;
|
if (!iswspace((wint_t)ucs)) break;
|
||||||
*total_size += size;
|
*total_size += size;
|
||||||
ptr += size;
|
ptr += size;
|
||||||
}
|
}
|
||||||
|
@ -149,7 +149,7 @@ getbuf_latin1(FILE * F)
|
||||||
char inbuf = (char)c;
|
char inbuf = (char)c;
|
||||||
size_t inbytes = 1;
|
size_t inbytes = 1;
|
||||||
size_t outbytes = MAXLINE-(cp-fbuf);
|
size_t outbytes = MAXLINE-(cp-fbuf);
|
||||||
int ret = unicode_latin1_to_utf8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes);
|
int ret = unicode_latin1_to_utf8(cp, &outbytes, &inbuf, &inbytes);
|
||||||
if (ret>0) cp+=ret;
|
if (ret>0) cp+=ret;
|
||||||
else {
|
else {
|
||||||
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
log_error(("input data was not iso-8859-1! assuming utf-8\n"));
|
||||||
|
@ -213,7 +213,7 @@ getbuf_utf8(FILE * F)
|
||||||
}
|
}
|
||||||
cont = false;
|
cont = false;
|
||||||
while (*bp && cp<fbuf+MAXLINE) {
|
while (*bp && cp<fbuf+MAXLINE) {
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
size_t size;
|
size_t size;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -244,7 +244,7 @@ getbuf_utf8(FILE * F)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iswspace(ucs)) {
|
if (iswspace((wint_t)ucs)) {
|
||||||
if (!quote) {
|
if (!quote) {
|
||||||
bp += size;
|
bp += size;
|
||||||
ret = eatwhite(bp, &size);
|
ret = eatwhite(bp, &size);
|
||||||
|
@ -264,7 +264,7 @@ getbuf_utf8(FILE * F)
|
||||||
} else {
|
} else {
|
||||||
bp+=size;
|
bp+=size;
|
||||||
}
|
}
|
||||||
} else if (iswcntrl(ucs)) {
|
} else if (iswcntrl((wint_t)ucs)) {
|
||||||
if (!comment && cp<fbuf+MAXLINE) {
|
if (!comment && cp<fbuf+MAXLINE) {
|
||||||
*cp++ = '?';
|
*cp++ = '?';
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,32 +74,6 @@ set_string (char **s, const char *neu)
|
||||||
return *s;
|
return *s;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean
|
|
||||||
locale_check(void)
|
|
||||||
{
|
|
||||||
int i, errorlevel = 0;
|
|
||||||
const unsigned char * umlaute = (const unsigned char*)"äöüÄÖÜß";
|
|
||||||
unsigned char result[32];
|
|
||||||
size_t inbytes = strlen((const char *)umlaute);
|
|
||||||
size_t outbytes = sizeof(result);
|
|
||||||
int ret = unicode_latin1_to_utf8(result, &outbytes, umlaute, &inbytes);
|
|
||||||
if (ret<=0) {
|
|
||||||
++errorlevel;
|
|
||||||
}
|
|
||||||
/* E: das testet, ob umlaute funktionieren. Wenn äöü nicht mit isalpha() true sind, kriegen wir ärger. */
|
|
||||||
for (i=0;i!=3;++i) {
|
|
||||||
if (towupper(umlaute[i])!=(int)umlaute[i+3]) {
|
|
||||||
++errorlevel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i=0;umlaute[i]!=0;++i) {
|
|
||||||
if (!iswalpha(umlaute[i]) || iswspace(umlaute[i]) || iswcntrl(umlaute[i])) {
|
|
||||||
++errorlevel;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (errorlevel) return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
spc_email_isvalid(const char *address)
|
spc_email_isvalid(const char *address)
|
||||||
|
|
|
@ -18,7 +18,6 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern boolean locale_check(void);
|
|
||||||
extern char * set_string(char **s, const char *neu);
|
extern char * set_string(char **s, const char *neu);
|
||||||
extern int set_email(char** pemail, const char *newmail);
|
extern int set_email(char** pemail, const char *newmail);
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ static int
|
||||||
eatwhitespace_c(const char ** str)
|
eatwhitespace_c(const char ** str)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
|
||||||
/* skip over potential whitespace */
|
/* skip over potential whitespace */
|
||||||
|
@ -38,7 +38,7 @@ eatwhitespace_c(const char ** str)
|
||||||
log_warning(("illegal character sequence in UTF8 string: %s\n", *str));
|
log_warning(("illegal character sequence in UTF8 string: %s\n", *str));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
if (!iswspace(ucs)) break;
|
if (!iswspace((wint_t)ucs)) break;
|
||||||
*str+=len;
|
*str+=len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -89,7 +89,7 @@ skip_token(void)
|
||||||
eatwhitespace_c(&state->current_token);
|
eatwhitespace_c(&state->current_token);
|
||||||
|
|
||||||
while (*state->current_token) {
|
while (*state->current_token) {
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
|
||||||
unsigned char utf8_character = (unsigned char)state->current_token[0];
|
unsigned char utf8_character = (unsigned char)state->current_token[0];
|
||||||
|
@ -104,7 +104,7 @@ skip_token(void)
|
||||||
log_warning(("illegal character sequence in UTF8 string: %s\n", state->current_token));
|
log_warning(("illegal character sequence in UTF8 string: %s\n", state->current_token));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (iswspace(ucs) && quotechar==0) {
|
if (iswspace((wint_t)ucs) && quotechar==0) {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
switch(utf8_character) {
|
switch(utf8_character) {
|
||||||
|
@ -134,7 +134,7 @@ parse_token(const char ** str)
|
||||||
|
|
||||||
eatwhitespace_c(&ctoken);
|
eatwhitespace_c(&ctoken);
|
||||||
while (*ctoken && cursor-lbuf < MAXTOKENSIZE-1) {
|
while (*ctoken && cursor-lbuf < MAXTOKENSIZE-1) {
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
size_t len;
|
size_t len;
|
||||||
boolean copy = false;
|
boolean copy = false;
|
||||||
|
|
||||||
|
@ -152,7 +152,7 @@ parse_token(const char ** str)
|
||||||
if (escape) {
|
if (escape) {
|
||||||
copy = true;
|
copy = true;
|
||||||
escape = false;
|
escape = false;
|
||||||
} else if (iswspace(ucs)) {
|
} else if (iswspace((wint_t)ucs)) {
|
||||||
if (quotechar==0) break;
|
if (quotechar==0) break;
|
||||||
copy = true;
|
copy = true;
|
||||||
} else if (utf8_character=='"' || utf8_character=='\'') {
|
} else if (utf8_character=='"' || utf8_character=='\'') {
|
||||||
|
|
236
src/common/util/patricia.c
Normal file
236
src/common/util/patricia.c
Normal file
|
@ -0,0 +1,236 @@
|
||||||
|
#include <config.h>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "patricia.h"
|
||||||
|
|
||||||
|
#define MAXKEYLEN 128
|
||||||
|
|
||||||
|
/* TODO: custom memory management to optimize cache layout, or use arrays. */
|
||||||
|
|
||||||
|
/* NOTE: The structure saves an extra 0 delimiter for the key. Technically
|
||||||
|
* this wouldn't be necessary (because we know its' length from data[0]),
|
||||||
|
* but it makes it possible for trie_getkey to return a key without making
|
||||||
|
* a copy or have a cumbersome (const char**, size_t*) interface.
|
||||||
|
* +-----------+-------------+------+------------+
|
||||||
|
* data: | keylen(1) | key(keylen) | 0(1) | data(size) |
|
||||||
|
* +-----------+-------------+------+------------+
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct trie_node {
|
||||||
|
struct trie_node *l, *r;
|
||||||
|
char * data;
|
||||||
|
unsigned int bitpos;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
#define get_bit(c, s, p) (unsigned int)((((p)>>3)>(unsigned int)(s))?0:((c)[(p)>>3]>>((p)&7)&1))
|
||||||
|
#else
|
||||||
|
unsigned int get_bit(const char * c, size_t s, unsigned int p)
|
||||||
|
{
|
||||||
|
if ((p>>3)>=(unsigned int)s) return 0;
|
||||||
|
return ((c)[p>>3]>>(p&7)&1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#define node_bit(n, p) get_bit((n)->data+1, (n)->data[0], (p))
|
||||||
|
|
||||||
|
trie_node * trie_insert(trie_node **root_p, const char * key, const void * data, size_t size)
|
||||||
|
{
|
||||||
|
trie_node * new_node;
|
||||||
|
size_t keylen = strlen(key);
|
||||||
|
trie_node ** insert_p = root_p, *node = *insert_p;
|
||||||
|
unsigned int p, bit=0;
|
||||||
|
|
||||||
|
assert(keylen<MAXKEYLEN);
|
||||||
|
|
||||||
|
for (p=0;p!=keylen*8+1;++p) {
|
||||||
|
bit = get_bit(key, keylen, p);
|
||||||
|
|
||||||
|
/* NULL-pointers lead to someplace we haven't got a prefix yet. */
|
||||||
|
if (node==NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if we have the full prefix that the current node represents, move on */
|
||||||
|
if (p==node->bitpos) {
|
||||||
|
insert_p = bit?&node->r:&node->l;
|
||||||
|
node = *insert_p;
|
||||||
|
if (node==NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if we are looking at a back-node, we need to add our node before it. */
|
||||||
|
if (p>=node->bitpos) {
|
||||||
|
/* find the point p where both differ. */
|
||||||
|
if (keylen==(unsigned int)node->data[0] && strncmp(key, node->data+1, keylen)==0) {
|
||||||
|
/* we are trying to insert the same key again */
|
||||||
|
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
++p;
|
||||||
|
bit = get_bit(key, keylen, p);
|
||||||
|
} while (node_bit(node, p)==bit);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if instead we differ before reaching the end of the current prefix, we must split.
|
||||||
|
* we insert our node before the current one and re-attach it. */
|
||||||
|
if (node_bit(node, p)!=bit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
new_node = (trie_node *)malloc(sizeof(trie_node));
|
||||||
|
new_node->bitpos = p;
|
||||||
|
new_node->data = malloc(keylen+2+size);
|
||||||
|
new_node->data[0] = (char)keylen;
|
||||||
|
memcpy(new_node->data+1, key, keylen+1);
|
||||||
|
if (data!=NULL && size>0) {
|
||||||
|
/* if data is NULL then the user only wanted some space that they're going to write to later */
|
||||||
|
/* if size is 0 then the user is using the trie as a set, not a map */
|
||||||
|
memcpy(new_node->data+2+keylen, data, size);
|
||||||
|
}
|
||||||
|
if (bit) {
|
||||||
|
new_node->l = node;
|
||||||
|
new_node->r = new_node; /* loop the 1-bit to ourselves, search will end */
|
||||||
|
} else {
|
||||||
|
new_node->l = new_node; /* loop the 0-bit to ourselves, search will end */
|
||||||
|
new_node->r = node;
|
||||||
|
}
|
||||||
|
*insert_p = new_node;
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void trie_remove(trie_node **root_p, trie_node *pos)
|
||||||
|
{
|
||||||
|
if (pos!=NULL) {
|
||||||
|
const char * key = trie_getkey(pos);
|
||||||
|
size_t keylen = pos->data[0];
|
||||||
|
trie_node ** node_p = root_p;
|
||||||
|
trie_node * node = *root_p;
|
||||||
|
|
||||||
|
while (node) {
|
||||||
|
int bit;
|
||||||
|
trie_node ** next_p;
|
||||||
|
trie_node * next;
|
||||||
|
|
||||||
|
if (node == pos) {
|
||||||
|
if (node->l==node) {
|
||||||
|
*node_p = node->r;
|
||||||
|
break;
|
||||||
|
} else if (node->r==node) {
|
||||||
|
*node_p = node->l;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bit = get_bit(key, keylen, node->bitpos);
|
||||||
|
next_p = bit?&node->r:&node->l;
|
||||||
|
next = *next_p;
|
||||||
|
if (next == pos && next->bitpos<=node->bitpos) {
|
||||||
|
/* the element that has a back-pointer to pos gets swapped with pos */
|
||||||
|
char * data = pos->data;
|
||||||
|
pos->data = node->data;
|
||||||
|
node->data = data;
|
||||||
|
|
||||||
|
/* finally, find the back-pointer to node and set it to pos */
|
||||||
|
next_p = bit?&node->l:&node->r; /* NB: this is the OTHER child of node */
|
||||||
|
next = *next_p;
|
||||||
|
key = trie_getkey(node);
|
||||||
|
keylen = (unsigned int)node->data[0];
|
||||||
|
while (next) {
|
||||||
|
int new_bit;
|
||||||
|
if (next==node) {
|
||||||
|
*next_p = pos;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
new_bit = get_bit(key, keylen, next->bitpos);
|
||||||
|
next_p = new_bit?&next->r:&next->l;
|
||||||
|
next = *next_p;
|
||||||
|
}
|
||||||
|
*node_p = bit?node->l:node->r;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
node = *next_p;
|
||||||
|
node_p = next_p;
|
||||||
|
}
|
||||||
|
free(node->data);
|
||||||
|
free(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void trie_debug(trie_node * root)
|
||||||
|
{
|
||||||
|
const char * l = root->l?trie_getkey(root->l):"?";
|
||||||
|
const char * r = root->r?trie_getkey(root->r):"?";
|
||||||
|
printf("%s %d | %s | %s\n", trie_getkey(root), root->bitpos, l, r);
|
||||||
|
if (root->l && root->l->bitpos > root->bitpos) trie_debug(root->l);
|
||||||
|
if (root->r && root->r->bitpos > root->bitpos) trie_debug(root->r);
|
||||||
|
}
|
||||||
|
|
||||||
|
trie_node * trie_find(trie_node *root, const char *key)
|
||||||
|
{
|
||||||
|
trie_node * node = root;
|
||||||
|
size_t keylen = strlen(key);
|
||||||
|
|
||||||
|
while (node) {
|
||||||
|
int bit = get_bit(key, keylen, node->bitpos);
|
||||||
|
trie_node * next = bit?node->r:node->l;
|
||||||
|
|
||||||
|
if (next!=NULL) {
|
||||||
|
if (node->bitpos>=next->bitpos) {
|
||||||
|
if (keylen==(unsigned int)next->data[0] && strncmp(key, next->data+1, keylen)==0) {
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
next = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node = next;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
trie_node * trie_find_prefix(trie_node *root, const char *key)
|
||||||
|
{
|
||||||
|
trie_node * node = root;
|
||||||
|
size_t keylen = strlen(key);
|
||||||
|
|
||||||
|
while (node) {
|
||||||
|
int bit = get_bit(key, keylen, node->bitpos);
|
||||||
|
trie_node * next = bit?node->r:node->l;
|
||||||
|
|
||||||
|
if (next!=NULL) {
|
||||||
|
if (node->bitpos>=next->bitpos) {
|
||||||
|
if (keylen<=(unsigned int)next->data[0] && strncmp(key, next->data+1, keylen)==0) {
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
next = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node = next;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * trie_getdata(trie_node * node)
|
||||||
|
{
|
||||||
|
return node->data+2+node->data[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * trie_getkey(trie_node * node)
|
||||||
|
{
|
||||||
|
return node->data+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void trie_free(trie_node * root)
|
||||||
|
{
|
||||||
|
if (root) {
|
||||||
|
if (root->l && root->l->bitpos>root->bitpos) trie_free(root->l);
|
||||||
|
if (root->r && root->r->bitpos>root->bitpos) trie_free(root->r);
|
||||||
|
free(root);
|
||||||
|
}
|
||||||
|
}
|
21
src/common/util/patricia.h
Normal file
21
src/common/util/patricia.h
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
#ifndef H_PATRICIA
|
||||||
|
#define H_PATRICIA
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct trie_node trie_node;
|
||||||
|
|
||||||
|
trie_node * trie_insert(trie_node **root, const char *key, const void *data, size_t size);
|
||||||
|
trie_node * trie_find(trie_node *root, const char *key);
|
||||||
|
void * trie_getdata(trie_node *node);
|
||||||
|
const char * trie_getkey(trie_node *node);
|
||||||
|
void trie_free(trie_node * root);
|
||||||
|
void trie_remove(trie_node **root_p, trie_node *pos);
|
||||||
|
void trie_debug(trie_node * root);
|
||||||
|
trie_node * trie_find_prefix(trie_node *root, const char *key);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
|
@ -32,7 +32,7 @@
|
||||||
|
|
||||||
typedef struct tref {
|
typedef struct tref {
|
||||||
struct tref * nexthash;
|
struct tref * nexthash;
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
struct tnode * node;
|
struct tnode * node;
|
||||||
} tref;
|
} tref;
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ void
|
||||||
addtoken(tnode * root, const char * str, variant id)
|
addtoken(tnode * root, const char * str, variant id)
|
||||||
{
|
{
|
||||||
static struct replace {
|
static struct replace {
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
const char str[3];
|
const char str[3];
|
||||||
} replace[] = {
|
} replace[] = {
|
||||||
/* match lower-case (!) umlauts and others to transcriptions */
|
/* match lower-case (!) umlauts and others to transcriptions */
|
||||||
|
@ -63,7 +63,7 @@ addtoken(tnode * root, const char * str, variant id)
|
||||||
} else {
|
} else {
|
||||||
tref * next;
|
tref * next;
|
||||||
int ret, index, i = 0;
|
int ret, index, i = 0;
|
||||||
wint_t ucs, lcs;
|
ucs4_t ucs, lcs;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
|
||||||
ret = unicode_utf8_to_ucs4(&ucs, str, &len);
|
ret = unicode_utf8_to_ucs4(&ucs, str, &len);
|
||||||
|
@ -84,10 +84,10 @@ addtoken(tnode * root, const char * str, variant id)
|
||||||
tnode * node = calloc(1, sizeof(tnode));
|
tnode * node = calloc(1, sizeof(tnode));
|
||||||
|
|
||||||
if (ucs<'a' || ucs>'z') {
|
if (ucs<'a' || ucs>'z') {
|
||||||
lcs = towlower(ucs);
|
lcs = towlower((wint_t)ucs);
|
||||||
}
|
}
|
||||||
if (ucs==lcs) {
|
if (ucs==lcs) {
|
||||||
ucs = towupper(ucs);
|
ucs = towupper((wint_t)ucs);
|
||||||
}
|
}
|
||||||
|
|
||||||
ref = malloc(sizeof(tref));
|
ref = malloc(sizeof(tref));
|
||||||
|
@ -136,7 +136,7 @@ findtoken(const tnode * tk, const char * str, variant* result)
|
||||||
do {
|
do {
|
||||||
int index;
|
int index;
|
||||||
const tref * ref;
|
const tref * ref;
|
||||||
wint_t ucs;
|
ucs4_t ucs;
|
||||||
size_t len;
|
size_t len;
|
||||||
int ret = unicode_utf8_to_ucs4(&ucs, str, &len);
|
int ret = unicode_utf8_to_ucs4(&ucs, str, &len);
|
||||||
|
|
||||||
|
|
|
@ -14,13 +14,67 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
|
|
||||||
|
#define B00000000 0x00
|
||||||
|
#define B10000000 0x80
|
||||||
|
#define B11000000 0xC0
|
||||||
|
#define B11100000 0xE0
|
||||||
|
#define B11110000 0xF0
|
||||||
|
#define B11111000 0xF8
|
||||||
|
#define B11111100 0xFC
|
||||||
|
#define B11111110 0xFE
|
||||||
|
|
||||||
|
#define B00111111 0x3F
|
||||||
|
#define B00011111 0x1F
|
||||||
|
#define B00001111 0x0F
|
||||||
|
#define B00000111 0x07
|
||||||
|
#define B00000011 0x03
|
||||||
|
#define B00000001 0x01
|
||||||
|
|
||||||
int
|
int
|
||||||
unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen)
|
unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip)
|
||||||
|
{
|
||||||
|
while (*ip) {
|
||||||
|
ucs4_t ucs = *ip;
|
||||||
|
ucs4_t low;
|
||||||
|
size_t size = 1;
|
||||||
|
|
||||||
|
if (ucs & 0x80) {
|
||||||
|
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
|
||||||
|
if (ret!=0) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (size>outlen) {
|
||||||
|
return ENOMEM;
|
||||||
|
}
|
||||||
|
low = towlower((wint_t)ucs);
|
||||||
|
if (low==ucs) {
|
||||||
|
memcpy(op, ip, size);
|
||||||
|
ip += size;
|
||||||
|
op += size;
|
||||||
|
outlen -=size;
|
||||||
|
} else {
|
||||||
|
ip += size;
|
||||||
|
unicode_ucs4_to_utf8(op, &size, low);
|
||||||
|
op += size;
|
||||||
|
outlen -=size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outlen<=0) {
|
||||||
|
return ENOMEM;
|
||||||
|
}
|
||||||
|
*op = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
unicode_latin1_to_utf8(utf8_t *out, size_t *outlen, const char *in, size_t *inlen)
|
||||||
{
|
{
|
||||||
int is = (int)*inlen;
|
int is = (int)*inlen;
|
||||||
int os = (int)*outlen;
|
int os = (int)*outlen;
|
||||||
const unsigned char * ip = in;
|
const char * ip = in;
|
||||||
unsigned char * op = out;
|
utf8_t * op = out;
|
||||||
|
|
||||||
while (ip-in<is) {
|
while (ip-in<is) {
|
||||||
unsigned char c = *ip;
|
unsigned char c = *ip;
|
||||||
|
@ -44,12 +98,12 @@ unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
unicode_utf8_strcasecmp(const char * a, const char * b)
|
unicode_utf8_strcasecmp(const utf8_t * a, const char * b)
|
||||||
{
|
{
|
||||||
while (*a && *b) {
|
while (*a && *b) {
|
||||||
int ret;
|
int ret;
|
||||||
size_t size;
|
size_t size;
|
||||||
wint_t ucsa = *a, ucsb = *b;
|
ucs4_t ucsa = *a, ucsb = *b;
|
||||||
|
|
||||||
if (ucsa & 0x80) {
|
if (ucsa & 0x80) {
|
||||||
ret = unicode_utf8_to_ucs4(&ucsa, a, &size);
|
ret = unicode_utf8_to_ucs4(&ucsa, a, &size);
|
||||||
|
@ -63,8 +117,8 @@ unicode_utf8_strcasecmp(const char * a, const char * b)
|
||||||
} else ++b;
|
} else ++b;
|
||||||
|
|
||||||
if (ucsb!=ucsa) {
|
if (ucsb!=ucsa) {
|
||||||
ucsb = towlower(ucsb);
|
ucsb = towlower((wint_t)ucsb);
|
||||||
ucsa = towlower(ucsa);
|
ucsa = towlower((wint_t)ucsa);
|
||||||
if (ucsb<ucsa) return 1;
|
if (ucsb<ucsa) return 1;
|
||||||
if (ucsb>ucsa) return -1;
|
if (ucsb>ucsa) return -1;
|
||||||
}
|
}
|
||||||
|
@ -74,12 +128,73 @@ unicode_utf8_strcasecmp(const char * a, const char * b)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Convert a UCS-4 character to UTF-8. */
|
||||||
|
int
|
||||||
|
unicode_ucs4_to_utf8 (utf8_t *utf8_character, size_t *size, ucs4_t ucs4_character)
|
||||||
|
{
|
||||||
|
int utf8_bytes;
|
||||||
|
|
||||||
|
if (ucs4_character <= 0x0000007F) {
|
||||||
|
/* 0xxxxxxx */
|
||||||
|
utf8_bytes = 1;
|
||||||
|
utf8_character[0] = (char) ucs4_character;
|
||||||
|
}
|
||||||
|
else if (ucs4_character <= 0x000007FF) {
|
||||||
|
/* 110xxxxx 10xxxxxx */
|
||||||
|
utf8_bytes = 2;
|
||||||
|
utf8_character[0] = (char) ((ucs4_character >> 6) | B11000000);
|
||||||
|
utf8_character[1] = (char) ((ucs4_character & B00111111) | B10000000);
|
||||||
|
}
|
||||||
|
else if (ucs4_character <= 0x0000FFFF) {
|
||||||
|
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
||||||
|
utf8_bytes = 3;
|
||||||
|
utf8_character[0] = (char) ((ucs4_character >> 12) | B11100000);
|
||||||
|
utf8_character[1] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
|
||||||
|
utf8_character[2] = (char) ((ucs4_character & B00111111) | B10000000);
|
||||||
|
}
|
||||||
|
else if (ucs4_character <= 0x001FFFFF) {
|
||||||
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||||
|
utf8_bytes = 4;
|
||||||
|
utf8_character[0] = (char) ((ucs4_character >> 18) | B11110000);
|
||||||
|
utf8_character[1] = (char) (((ucs4_character >> 12) & B00111111) | B10000000);
|
||||||
|
utf8_character[2] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
|
||||||
|
utf8_character[3] = (char) ((ucs4_character & B00111111) | B10000000);
|
||||||
|
}
|
||||||
|
else if (ucs4_character <= 0x03FFFFFF) {
|
||||||
|
/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||||
|
utf8_bytes = 5;
|
||||||
|
utf8_character[0] = (char) ((ucs4_character >> 24) | B11111000);
|
||||||
|
utf8_character[1] = (char) (((ucs4_character >> 18) & B00111111) | B10000000);
|
||||||
|
utf8_character[2] = (char) (((ucs4_character >> 12) & B00111111) | B10000000);
|
||||||
|
utf8_character[3] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
|
||||||
|
utf8_character[4] = (char) ((ucs4_character & B00111111) | B10000000);
|
||||||
|
}
|
||||||
|
else if (ucs4_character <= 0x7FFFFFFF) {
|
||||||
|
/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||||
|
utf8_bytes = 6;
|
||||||
|
utf8_character[0] = (char) ((ucs4_character >> 30) | B11111100);
|
||||||
|
utf8_character[1] = (char) (((ucs4_character >> 24) & B00111111) | B10000000);
|
||||||
|
utf8_character[2] = (char) (((ucs4_character >> 18) & B00111111) | B10000000);
|
||||||
|
utf8_character[3] = (char) (((ucs4_character >> 12) & B00111111) | B10000000);
|
||||||
|
utf8_character[4] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
|
||||||
|
utf8_character[5] = (char) ((ucs4_character & B00111111) | B10000000);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return EILSEQ;
|
||||||
|
}
|
||||||
|
|
||||||
|
*size = utf8_bytes;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Convert a UTF-8 encoded character to UCS-4. */
|
/* Convert a UTF-8 encoded character to UCS-4. */
|
||||||
int
|
int
|
||||||
unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string,
|
unicode_utf8_to_ucs4(ucs4_t *ucs4_character, const utf8_t *utf8_string,
|
||||||
size_t *length)
|
size_t *length)
|
||||||
{
|
{
|
||||||
unsigned char utf8_character = (unsigned char)utf8_string[0];
|
utf8_t utf8_character = utf8_string[0];
|
||||||
|
|
||||||
/* Is the character in the ASCII range? If so, just copy it to the
|
/* Is the character in the ASCII range? If so, just copy it to the
|
||||||
output. */
|
output. */
|
||||||
|
@ -202,10 +317,10 @@ unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string,
|
||||||
|
|
||||||
/** Convert a UTF-8 encoded character to CP437. */
|
/** Convert a UTF-8 encoded character to CP437. */
|
||||||
int
|
int
|
||||||
unicode_utf8_to_cp437(char *cp_character, const char *utf8_string,
|
unicode_utf8_to_cp437(char *cp_character, const utf8_t *utf8_string,
|
||||||
size_t *length)
|
size_t *length)
|
||||||
{
|
{
|
||||||
wint_t ucs4_character;
|
ucs4_t ucs4_character;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
|
result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
|
||||||
|
@ -217,7 +332,7 @@ unicode_utf8_to_cp437(char *cp_character, const char *utf8_string,
|
||||||
if (ucs4_character<0x7F) {
|
if (ucs4_character<0x7F) {
|
||||||
*cp_character = (char)ucs4_character;
|
*cp_character = (char)ucs4_character;
|
||||||
} else {
|
} else {
|
||||||
struct { wint_t ucs4; unsigned char cp437; } xref[160] = {
|
struct { ucs4_t ucs4; unsigned char cp437; } xref[160] = {
|
||||||
{0x00A0, 255}, {0x00A1, 173}, {0x00A2, 155}, {0x00A3, 156},
|
{0x00A0, 255}, {0x00A1, 173}, {0x00A2, 155}, {0x00A3, 156},
|
||||||
{0x00A5, 157}, {0x00A7, 21}, {0x00AA, 166}, {0x00AB, 174},
|
{0x00A5, 157}, {0x00A7, 21}, {0x00AA, 166}, {0x00AB, 174},
|
||||||
{0x00AC, 170}, {0x00B0, 248}, {0x00B1, 241}, {0x00B2, 253},
|
{0x00AC, 170}, {0x00B0, 248}, {0x00B1, 241}, {0x00B2, 253},
|
||||||
|
@ -278,10 +393,10 @@ unicode_utf8_to_cp437(char *cp_character, const char *utf8_string,
|
||||||
|
|
||||||
/** Convert a UTF-8 encoded character to CP1252. */
|
/** Convert a UTF-8 encoded character to CP1252. */
|
||||||
int
|
int
|
||||||
unicode_utf8_to_cp1252(char *cp_character, const char *utf8_string,
|
unicode_utf8_to_cp1252(char *cp_character, const utf8_t *utf8_string,
|
||||||
size_t *length)
|
size_t *length)
|
||||||
{
|
{
|
||||||
wint_t ucs4_character;
|
ucs4_t ucs4_character;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
|
result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
|
||||||
|
@ -293,7 +408,7 @@ unicode_utf8_to_cp1252(char *cp_character, const char *utf8_string,
|
||||||
if (ucs4_character<=0x7F || ucs4_character>=0xA0) {
|
if (ucs4_character<=0x7F || ucs4_character>=0xA0) {
|
||||||
*cp_character = (char)ucs4_character;
|
*cp_character = (char)ucs4_character;
|
||||||
} else {
|
} else {
|
||||||
struct { wint_t ucs4; unsigned char cp; } xref[] = {
|
struct { ucs4_t ucs4; unsigned char cp; } xref[] = {
|
||||||
{0x20ac, 0x80}, {0x0081, 0x81}, {0x201a, 0x82}, {0x0192, 0x83},
|
{0x20ac, 0x80}, {0x0081, 0x81}, {0x201a, 0x82}, {0x0192, 0x83},
|
||||||
{0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86}, {0x2021, 0x87},
|
{0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86}, {0x2021, 0x87},
|
||||||
{0x02c6, 0x88}, {0x2030, 0x89}, {0x0160, 0x8a}, {0x2039, 0x8b},
|
{0x02c6, 0x88}, {0x2030, 0x89}, {0x0160, 0x8a}, {0x2039, 0x8b},
|
||||||
|
|
|
@ -21,11 +21,16 @@ extern "C" {
|
||||||
|
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
#define USE_UNICODE
|
#define USE_UNICODE
|
||||||
extern int unicode_utf8_to_cp437(char *ucs4_character, const char *utf8_string, size_t *length);
|
typedef unsigned long ucs4_t;
|
||||||
extern int unicode_utf8_to_cp1252(char *ucs4_character, const char *utf8_string, size_t *length);
|
typedef char utf8_t;
|
||||||
extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length);
|
|
||||||
extern int unicode_utf8_strcasecmp(const char * a, const char * b);
|
extern int unicode_utf8_to_cp437(char *result, const utf8_t *utf8_string, size_t *length);
|
||||||
extern int unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen);
|
extern int unicode_utf8_to_cp1252(char *result, const utf8_t *utf8_string, size_t *length);
|
||||||
|
extern int unicode_utf8_to_ucs4(ucs4_t *result, const utf8_t *utf8_string, size_t *length);
|
||||||
|
extern int unicode_ucs4_to_utf8 (utf8_t *result, size_t *size, ucs4_t ucs4_character);
|
||||||
|
extern int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b);
|
||||||
|
extern int unicode_latin1_to_utf8(utf8_t *out, size_t *outlen, const char *in, size_t *inlen);
|
||||||
|
extern int unicode_utf8_tolower(utf8_t *out, size_t outlen, const utf8_t *in);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,7 @@ SHARED_BINDINGS =
|
||||||
<lua>spell.cpp
|
<lua>spell.cpp
|
||||||
<lua>unit.cpp
|
<lua>unit.cpp
|
||||||
<lua>item.cpp
|
<lua>item.cpp
|
||||||
|
<lua>test.cpp
|
||||||
;
|
;
|
||||||
|
|
||||||
Library luabindings : $(SHARED_BINDINGS) ;
|
Library luabindings : $(SHARED_BINDINGS) ;
|
||||||
|
|
|
@ -16,6 +16,9 @@ extern void bind_event(struct lua_State * L);
|
||||||
extern void bind_message(struct lua_State * L);
|
extern void bind_message(struct lua_State * L);
|
||||||
extern void bind_objects(struct lua_State * L);
|
extern void bind_objects(struct lua_State * L);
|
||||||
|
|
||||||
|
/* test routines */
|
||||||
|
extern void bind_test(struct lua_State * L);
|
||||||
|
|
||||||
/* server only */
|
/* server only */
|
||||||
extern void bind_script(struct lua_State * L);
|
extern void bind_script(struct lua_State * L);
|
||||||
extern void bind_gamecode(struct lua_State * L);
|
extern void bind_gamecode(struct lua_State * L);
|
||||||
|
|
|
@ -116,9 +116,12 @@ lua_setstring(const char * lname, const char * key, const char * str)
|
||||||
static const char *
|
static const char *
|
||||||
lua_getstring(const char * lname, const char * key)
|
lua_getstring(const char * lname, const char * key)
|
||||||
{
|
{
|
||||||
|
if (key) {
|
||||||
struct locale * lang = find_locale(lname);
|
struct locale * lang = find_locale(lname);
|
||||||
return (const char*)locale_getstring(lang, key);
|
return (const char*)locale_getstring(lang, key);
|
||||||
}
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
#define ISLANDSIZE 20
|
#define ISLANDSIZE 20
|
||||||
#define TURNS_PER_ISLAND 4
|
#define TURNS_PER_ISLAND 4
|
||||||
|
|
49
src/eressea/lua/test.cpp
Normal file
49
src/eressea/lua/test.cpp
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
#include <config.h>
|
||||||
|
#include <kernel/eressea.h>
|
||||||
|
|
||||||
|
#include "bindings.h"
|
||||||
|
#include "list.h"
|
||||||
|
|
||||||
|
// Lua includes
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning (push)
|
||||||
|
#pragma warning (disable: 4127)
|
||||||
|
#endif
|
||||||
|
#include <lua.hpp>
|
||||||
|
#include <luabind/luabind.hpp>
|
||||||
|
#include <luabind/iterator_policy.hpp>
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning (pop)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace luabind;
|
||||||
|
|
||||||
|
#include <util/language.h>
|
||||||
|
#include <kernel/skill.h>
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
loc_getskill(const char * loc, const char * locstring)
|
||||||
|
{
|
||||||
|
struct locale * lang = find_locale(loc);
|
||||||
|
skill_t result = findskill(locstring, lang);
|
||||||
|
if (result==NOSKILL) return 0;
|
||||||
|
return skillnames[result];
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
loc_getkeyword(const char * loc, const char * locstring)
|
||||||
|
{
|
||||||
|
struct locale * lang = find_locale(loc);
|
||||||
|
keyword_t result = findkeyword(locstring, lang);
|
||||||
|
if (result==NOKEYWORD) return 0;
|
||||||
|
return keywords[result];
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
bind_test(lua_State * L)
|
||||||
|
{
|
||||||
|
module(L, "test")[
|
||||||
|
def("loc_skill", &loc_getskill),
|
||||||
|
def("loc_keyword", &loc_getkeyword)
|
||||||
|
];
|
||||||
|
}
|
|
@ -19,11 +19,6 @@
|
||||||
* permission from the authors.
|
* permission from the authors.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define LOCALE_CHECK
|
|
||||||
#ifdef __LCC__
|
|
||||||
#undef LOCALE_CHECK
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* config includes */
|
/* config includes */
|
||||||
#include <config.h>
|
#include <config.h>
|
||||||
#include <kernel/eressea.h>
|
#include <kernel/eressea.h>
|
||||||
|
@ -312,7 +307,9 @@ lua_init(void)
|
||||||
bind_event(L);
|
bind_event(L);
|
||||||
bind_message(L);
|
bind_message(L);
|
||||||
bind_gamecode(L);
|
bind_gamecode(L);
|
||||||
|
|
||||||
bind_gmtool(L);
|
bind_gmtool(L);
|
||||||
|
bind_test(L);
|
||||||
return L;
|
return L;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -662,12 +659,6 @@ main(int argc, char *argv[])
|
||||||
lc_numeric = setlocale(LC_NUMERIC, "C");
|
lc_numeric = setlocale(LC_NUMERIC, "C");
|
||||||
if (lc_ctype) lc_ctype = strdup(lc_ctype);
|
if (lc_ctype) lc_ctype = strdup(lc_ctype);
|
||||||
if (lc_numeric) lc_numeric = strdup(lc_numeric);
|
if (lc_numeric) lc_numeric = strdup(lc_numeric);
|
||||||
#ifdef LOCALE_CHECK
|
|
||||||
if (!locale_check()) {
|
|
||||||
log_error(("The current locale is not suitable for international Eressea.\n"));
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
lua_State * luaState = lua_init();
|
lua_State * luaState = lua_init();
|
||||||
global.vm_state = luaState;
|
global.vm_state = luaState;
|
||||||
|
|
13
src/scripts/run-tests.lua
Normal file
13
src/scripts/run-tests.lua
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
-- -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
function test_locales()
|
||||||
|
local skills = { "", "herb", "kraut", "Kräute", "Kraeut", "k", "kra", "MAGIE" }
|
||||||
|
for k,v in pairs(skills) do
|
||||||
|
str = test.loc_skill("de", v)
|
||||||
|
io.stdout:write(v, "\t", tostring(str), " ", tostring(get_string("de", "skill::" .. tostring(str))), "\n")
|
||||||
|
end
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
test_locales()
|
||||||
|
io.stdin:read("*line")
|
Loading…
Reference in a new issue