- added a testsuite

- testcase skill-parser
- findskill uses patricia
This commit is contained in:
Enno Rehling 2008-04-25 14:31:38 +00:00
parent a2abcfe177
commit 1dd05538ad
21 changed files with 634 additions and 91 deletions

View file

@ -3775,7 +3775,7 @@ static void reset_rng(void) {
static void reset_rng_region(region * r) static void reset_rng_region(region * r)
{ {
rng_init(r->index); rng_init(r->index+turn);
} }
/** warn about passwords that are not US ASCII. /** warn about passwords that are not US ASCII.

View file

@ -87,11 +87,11 @@ xml_s(const char * str)
{ {
static xmlChar buffer[1024]; static xmlChar buffer[1024];
const char * inbuf = str; const char * inbuf = str;
unsigned char * outbuf = buffer; char * outbuf = (char *)buffer;
size_t inbytes = strlen(str)+1; size_t inbytes = strlen(str)+1;
size_t outbytes = sizeof(buffer) - 1; size_t outbytes = sizeof(buffer) - 1;
unicode_latin1_to_utf8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes); unicode_latin1_to_utf8(outbuf, &outbytes, inbuf, &inbytes);
buffer[outbytes] = 0; buffer[outbytes] = 0;
return buffer; return buffer;
} }

View file

@ -68,6 +68,7 @@
#include <util/umlaut.h> #include <util/umlaut.h>
#include <util/xml.h> #include <util/xml.h>
#include <util/bsdstring.h> #include <util/bsdstring.h>
#include <util/unicode.h>
/* libxml includes */ /* libxml includes */
#include <libxml/tree.h> #include <libxml/tree.h>
@ -83,6 +84,11 @@
#include <time.h> #include <time.h>
#include <errno.h> #include <errno.h>
#define PTRIES 1
#if PTRIES
#include <util/patricia.h>
#endif
/* exported variables */ /* exported variables */
region *regions; region *regions;
faction *factions; faction *factions;
@ -1332,14 +1338,126 @@ findoption(const char *s, const struct locale * lang)
return NODIRECTION; return NODIRECTION;
} }
#if PTRIES
static struct trie_node * ptries[UT_MAX][4];
static struct trie_node **
get_ptrie(const struct locale * lang, int type)
{
int index = (strcmp(locale_name(lang), "de")==0);
return &(ptries[type][index]);
}
static int
umlaut_substitution(const char * ip, char * op, size_t outlen)
{
#define UMAX 7
static struct replace {
ucs4_t ucs;
const char str[3];
} replace[UMAX] = {
/* match lower-case (!) umlauts and others to transcriptions */
{ 223, "ss"}, /* szlig */
{ 228, "ae"}, /* auml */
{ 229, "aa"}, /* norsk */
{ 230, "ae"}, /* norsk */
{ 246, "oe"}, /* ouml */
{ 248, "oe"}, /* norsk */
{ 252, "ue"}, /* uuml */
};
int subs = 0;
while (*ip) {
ucs4_t ucs = *ip;
size_t size = 1;
size_t cpsize = 1;
if (ucs & 0x80) {
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret!=0) {
return ret;
}
cpsize = size;
if (ucs >= replace[0].ucs && ucs <= replace[UMAX-1].ucs) {
int i;
for (i=0;i!=UMAX;++i) {
if (replace[i].ucs==ucs) {
cpsize = 0;
memcpy(op, replace[i].str, 2);
op+=2;
++subs;
break;
}
}
}
}
if (cpsize) {
if (cpsize>outlen) {
return -1;
}
memcpy(op, ip, cpsize);
}
ip += size;
op += cpsize;
outlen -= cpsize;
}
if (outlen<=0) {
return -1;
}
*op = 0;
return subs;
}
static int
ptrie_find(struct trie_node *ptrie, const char * key, void * data, size_t size)
{
trie_node * node = trie_find_prefix(ptrie, key);
if (node) {
void * result = trie_getdata(node);
memcpy(data, result, size);
return 0;
}
return -1;
}
static int
ptrie_insert(struct trie_node **ptrie, const char * name, void * data, size_t size)
{
char converted[256];
char simple[256];
int ret = unicode_utf8_tolower(converted, 256, name);
if (ret==0) {
int subs = umlaut_substitution(converted, simple, sizeof(simple));
if (subs>0) {
trie_insert(ptrie, simple, data, size);
}
trie_insert(ptrie, converted, data, size);
}
return ret;
}
#endif
skill_t skill_t
findskill(const char *s, const struct locale * lang) findskill(const char *s, const struct locale * lang)
{ {
#if PTRIES
char lowercase[256];
int res = unicode_utf8_tolower(lowercase, sizeof(lowercase), s);
if (res==0) {
trie_node ** ptrie = get_ptrie(lang, UT_SKILLS);
skill_t sk;
int result = ptrie_find(*ptrie, lowercase, &sk, sizeof(sk));
if (result==0) return sk;
}
return NOSKILL;
#else
struct tnode * tokens = get_translations(lang, UT_SKILLS); struct tnode * tokens = get_translations(lang, UT_SKILLS);
variant token; variant token;
if (findtoken(tokens, s, &token)==E_TOK_NOMATCH) return NOSKILL; if (findtoken(tokens, s, &token)==E_TOK_NOMATCH) return NOSKILL;
return (skill_t)token.i; return (skill_t)token.i;
#endif
} }
keyword_t keyword_t
@ -1954,6 +2072,9 @@ init_locale(const struct locale * lang)
const struct race * rc; const struct race * rc;
struct tnode * tokens; struct tnode * tokens;
const terrain_type * terrain; const terrain_type * terrain;
#if PTRIES
trie_node ** ptrie;
#endif
tokens = get_translations(lang, UT_MAGIC); tokens = get_translations(lang, UT_MAGIC);
for (i=0;i!=MAXMAGIETYP;++i) { for (i=0;i!=MAXMAGIETYP;++i) {
@ -1976,7 +2097,18 @@ init_locale(const struct locale * lang)
var.i = i; var.i = i;
addtoken(tokens, LOC(lang, parameters[i]), var); addtoken(tokens, LOC(lang, parameters[i]), var);
} }
#if PTRIES
ptrie = get_ptrie(lang, UT_SKILLS);
for (i=0;i!=MAXSKILLS;++i) {
if (i!=SK_TRADE || !TradeDisabled()) {
skill_t sk = (skill_t)i;
const char * skname = skillname(sk, lang);
if (skname!=NULL) {
ptrie_insert(ptrie, skname, &sk, sizeof(sk));
}
}
}
#else
tokens = get_translations(lang, UT_SKILLS); tokens = get_translations(lang, UT_SKILLS);
for (i=0;i!=MAXSKILLS;++i) { for (i=0;i!=MAXSKILLS;++i) {
if (i!=SK_TRADE || !TradeDisabled()) { if (i!=SK_TRADE || !TradeDisabled()) {
@ -1987,6 +2119,7 @@ init_locale(const struct locale * lang)
} }
} }
} }
#endif
tokens = get_translations(lang, UT_KEYWORDS); tokens = get_translations(lang, UT_KEYWORDS);
for (i=0;i!=MAXKEYWORDS;++i) { for (i=0;i!=MAXKEYWORDS;++i) {

View file

@ -384,7 +384,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
char * bufp; char * bufp;
unsigned int c = 0; unsigned int c = 0;
size_t bpt, i; size_t bpt, i;
wint_t ucs; ucs4_t ucs;
size_t size; size_t size;
int result; int result;
@ -401,7 +401,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
assert(result==0 || "damnit, we're not handling invalid input here!"); assert(result==0 || "damnit, we're not handling invalid input here!");
/* Leerzeichen überspringen */ /* Leerzeichen überspringen */
while (*p != 0 && !iswalnum(ucs)) { while (*p != 0 && !iswalnum((wint_t)ucs)) {
p += size; p += size;
result = unicode_utf8_to_ucs4(&ucs, p, &size); result = unicode_utf8_to_ucs4(&ucs, p, &size);
assert(result==0 || "damnit, we're not handling invalid input here!"); assert(result==0 || "damnit, we're not handling invalid input here!");
@ -411,7 +411,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
if (*p != 0) ++c; if (*p != 0) ++c;
/* alnums überspringen */ /* alnums überspringen */
while (*p != 0 && iswalnum(ucs)) { while (*p != 0 && iswalnum((wint_t)ucs)) {
p+=size; p+=size;
result = unicode_utf8_to_ucs4(&ucs, p, &size); result = unicode_utf8_to_ucs4(&ucs, p, &size);
assert(result==0 || "damnit, we're not handling invalid input here!"); assert(result==0 || "damnit, we're not handling invalid input here!");
@ -434,7 +434,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
while (*p != 0 && c < maxchars) { while (*p != 0 && c < maxchars) {
/* Leerzeichen überspringen */ /* Leerzeichen überspringen */
while (*p != 0 && !iswalnum(ucs)) { while (*p != 0 && !iswalnum((wint_t)ucs)) {
p+=size; p+=size;
result = unicode_utf8_to_ucs4(&ucs, p, &size); result = unicode_utf8_to_ucs4(&ucs, p, &size);
assert(result==0 || "damnit, we're not handling invalid input here!"); assert(result==0 || "damnit, we're not handling invalid input here!");
@ -442,7 +442,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
/* alnums übertragen */ /* alnums übertragen */
for (i = 0; i < bpt && *p != 0 && iswalnum(ucs); ++i) { for (i = 0; i < bpt && *p != 0 && iswalnum((wint_t)ucs); ++i) {
memcpy(bufp, p, size); memcpy(bufp, p, size);
p += size; p += size;
bufp += size; bufp += size;
@ -454,7 +454,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars)
/* Bis zum nächsten Leerzeichen */ /* Bis zum nächsten Leerzeichen */
while (c < maxchars && *p != 0 && iswalnum(ucs)) { while (c < maxchars && *p != 0 && iswalnum((wint_t)ucs)) {
p+=size; p+=size;
result = unicode_utf8_to_ucs4(&ucs, p, &size); result = unicode_utf8_to_ucs4(&ucs, p, &size);
assert(result==0 || "damnit, we're not handling invalid input here!"); assert(result==0 || "damnit, we're not handling invalid input here!");

View file

@ -178,7 +178,7 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
char inbuf = (char)c; char inbuf = (char)c;
size_t inbytes = 1; size_t inbytes = 1;
size_t outbytes = size-(str-start); size_t outbytes = size-(str-start);
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes); int ret = unicode_latin1_to_utf8(str, &outbytes, &inbuf, &inbytes);
if (ret>0) str+=ret; if (ret>0) str+=ret;
else { else {
log_error(("input data was not iso-8859-1! assuming utf-8\n")); log_error(("input data was not iso-8859-1! assuming utf-8\n"));
@ -197,7 +197,7 @@ freadstr(FILE * F, int encoding, char * start, size_t size)
char inbuf = (char)c; char inbuf = (char)c;
size_t inbytes = 1; size_t inbytes = 1;
size_t outbytes = size-(str-start); size_t outbytes = size-(str-start);
int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes); int ret = unicode_latin1_to_utf8(str, &outbytes, &inbuf, &inbytes);
if (ret>0) str+=ret; if (ret>0) str+=ret;
else { else {
log_error(("input data was not iso-8859-1! assuming utf-8\n")); log_error(("input data was not iso-8859-1! assuming utf-8\n"));

View file

@ -246,7 +246,7 @@ read_newfactions(const char * filename)
char buffer[32]; char buffer[32];
size_t outbytes = sizeof(buffer) - 1; size_t outbytes = sizeof(buffer) - 1;
size_t inbytes = strlen(race); size_t inbytes = strlen(race);
unicode_latin1_to_utf8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes); unicode_latin1_to_utf8(buffer, &outbytes, race, &inbytes);
buffer[outbytes] = 0; buffer[outbytes] = 0;
nf->race = findrace(buffer, default_locale); nf->race = findrace(buffer, default_locale);
if (nf->race==NULL) { if (nf->race==NULL) {

View file

@ -29,11 +29,11 @@ eatwhite(const char * ptr, size_t * total_size)
*total_size = 0; *total_size = 0;
while (*ptr) { while (*ptr) {
wint_t ucs; ucs4_t ucs;
size_t size = 0; size_t size = 0;
ret = unicode_utf8_to_ucs4(&ucs, ptr, &size); ret = unicode_utf8_to_ucs4(&ucs, ptr, &size);
if (ret!=0) break; if (ret!=0) break;
if (!iswspace(ucs)) break; if (!iswspace((wint_t)ucs)) break;
*total_size += size; *total_size += size;
ptr += size; ptr += size;
} }
@ -149,7 +149,7 @@ getbuf_latin1(FILE * F)
char inbuf = (char)c; char inbuf = (char)c;
size_t inbytes = 1; size_t inbytes = 1;
size_t outbytes = MAXLINE-(cp-fbuf); size_t outbytes = MAXLINE-(cp-fbuf);
int ret = unicode_latin1_to_utf8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes); int ret = unicode_latin1_to_utf8(cp, &outbytes, &inbuf, &inbytes);
if (ret>0) cp+=ret; if (ret>0) cp+=ret;
else { else {
log_error(("input data was not iso-8859-1! assuming utf-8\n")); log_error(("input data was not iso-8859-1! assuming utf-8\n"));
@ -213,7 +213,7 @@ getbuf_utf8(FILE * F)
} }
cont = false; cont = false;
while (*bp && cp<fbuf+MAXLINE) { while (*bp && cp<fbuf+MAXLINE) {
wint_t ucs; ucs4_t ucs;
size_t size; size_t size;
int ret; int ret;
@ -244,7 +244,7 @@ getbuf_utf8(FILE * F)
break; break;
} }
if (iswspace(ucs)) { if (iswspace((wint_t)ucs)) {
if (!quote) { if (!quote) {
bp += size; bp += size;
ret = eatwhite(bp, &size); ret = eatwhite(bp, &size);
@ -264,7 +264,7 @@ getbuf_utf8(FILE * F)
} else { } else {
bp+=size; bp+=size;
} }
} else if (iswcntrl(ucs)) { } else if (iswcntrl((wint_t)ucs)) {
if (!comment && cp<fbuf+MAXLINE) { if (!comment && cp<fbuf+MAXLINE) {
*cp++ = '?'; *cp++ = '?';
} }

View file

@ -74,32 +74,6 @@ set_string (char **s, const char *neu)
return *s; return *s;
} }
boolean
locale_check(void)
{
int i, errorlevel = 0;
const unsigned char * umlaute = (const unsigned char*)"äöüÄÖÜß";
unsigned char result[32];
size_t inbytes = strlen((const char *)umlaute);
size_t outbytes = sizeof(result);
int ret = unicode_latin1_to_utf8(result, &outbytes, umlaute, &inbytes);
if (ret<=0) {
++errorlevel;
}
/* E: das testet, ob umlaute funktionieren. Wenn äöü nicht mit isalpha() true sind, kriegen wir ärger. */
for (i=0;i!=3;++i) {
if (towupper(umlaute[i])!=(int)umlaute[i+3]) {
++errorlevel;
}
}
for (i=0;umlaute[i]!=0;++i) {
if (!iswalpha(umlaute[i]) || iswspace(umlaute[i]) || iswcntrl(umlaute[i])) {
++errorlevel;
}
}
if (errorlevel) return false;
return true;
}
static int static int
spc_email_isvalid(const char *address) spc_email_isvalid(const char *address)

View file

@ -18,7 +18,6 @@
extern "C" { extern "C" {
#endif #endif
extern boolean locale_check(void);
extern char * set_string(char **s, const char *neu); extern char * set_string(char **s, const char *neu);
extern int set_email(char** pemail, const char *newmail); extern int set_email(char** pemail, const char *newmail);

View file

@ -23,7 +23,7 @@ static int
eatwhitespace_c(const char ** str) eatwhitespace_c(const char ** str)
{ {
int ret; int ret;
wint_t ucs; ucs4_t ucs;
size_t len; size_t len;
/* skip over potential whitespace */ /* skip over potential whitespace */
@ -38,7 +38,7 @@ eatwhitespace_c(const char ** str)
log_warning(("illegal character sequence in UTF8 string: %s\n", *str)); log_warning(("illegal character sequence in UTF8 string: %s\n", *str));
return ret; return ret;
} }
if (!iswspace(ucs)) break; if (!iswspace((wint_t)ucs)) break;
*str+=len; *str+=len;
} }
} }
@ -89,7 +89,7 @@ skip_token(void)
eatwhitespace_c(&state->current_token); eatwhitespace_c(&state->current_token);
while (*state->current_token) { while (*state->current_token) {
wint_t ucs; ucs4_t ucs;
size_t len; size_t len;
unsigned char utf8_character = (unsigned char)state->current_token[0]; unsigned char utf8_character = (unsigned char)state->current_token[0];
@ -104,7 +104,7 @@ skip_token(void)
log_warning(("illegal character sequence in UTF8 string: %s\n", state->current_token)); log_warning(("illegal character sequence in UTF8 string: %s\n", state->current_token));
} }
} }
if (iswspace(ucs) && quotechar==0) { if (iswspace((wint_t)ucs) && quotechar==0) {
return; return;
} else { } else {
switch(utf8_character) { switch(utf8_character) {
@ -134,7 +134,7 @@ parse_token(const char ** str)
eatwhitespace_c(&ctoken); eatwhitespace_c(&ctoken);
while (*ctoken && cursor-lbuf < MAXTOKENSIZE-1) { while (*ctoken && cursor-lbuf < MAXTOKENSIZE-1) {
wint_t ucs; ucs4_t ucs;
size_t len; size_t len;
boolean copy = false; boolean copy = false;
@ -152,7 +152,7 @@ parse_token(const char ** str)
if (escape) { if (escape) {
copy = true; copy = true;
escape = false; escape = false;
} else if (iswspace(ucs)) { } else if (iswspace((wint_t)ucs)) {
if (quotechar==0) break; if (quotechar==0) break;
copy = true; copy = true;
} else if (utf8_character=='"' || utf8_character=='\'') { } else if (utf8_character=='"' || utf8_character=='\'') {

236
src/common/util/patricia.c Normal file
View file

@ -0,0 +1,236 @@
#include <config.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "patricia.h"
#define MAXKEYLEN 128
/* TODO: custom memory management to optimize cache layout, or use arrays. */
/* NOTE: The structure saves an extra 0 delimiter for the key. Technically
* this wouldn't be necessary (because we know its' length from data[0]),
* but it makes it possible for trie_getkey to return a key without making
* a copy or have a cumbersome (const char**, size_t*) interface.
* +-----------+-------------+------+------------+
* data: | keylen(1) | key(keylen) | 0(1) | data(size) |
* +-----------+-------------+------+------------+
*/
struct trie_node {
struct trie_node *l, *r;
char * data;
unsigned int bitpos;
};
#if 1
#define get_bit(c, s, p) (unsigned int)((((p)>>3)>(unsigned int)(s))?0:((c)[(p)>>3]>>((p)&7)&1))
#else
unsigned int get_bit(const char * c, size_t s, unsigned int p)
{
if ((p>>3)>=(unsigned int)s) return 0;
return ((c)[p>>3]>>(p&7)&1);
}
#endif
#define node_bit(n, p) get_bit((n)->data+1, (n)->data[0], (p))
trie_node * trie_insert(trie_node **root_p, const char * key, const void * data, size_t size)
{
trie_node * new_node;
size_t keylen = strlen(key);
trie_node ** insert_p = root_p, *node = *insert_p;
unsigned int p, bit=0;
assert(keylen<MAXKEYLEN);
for (p=0;p!=keylen*8+1;++p) {
bit = get_bit(key, keylen, p);
/* NULL-pointers lead to someplace we haven't got a prefix yet. */
if (node==NULL) {
break;
}
/* if we have the full prefix that the current node represents, move on */
if (p==node->bitpos) {
insert_p = bit?&node->r:&node->l;
node = *insert_p;
if (node==NULL) {
continue;
}
}
/* if we are looking at a back-node, we need to add our node before it. */
if (p>=node->bitpos) {
/* find the point p where both differ. */
if (keylen==(unsigned int)node->data[0] && strncmp(key, node->data+1, keylen)==0) {
/* we are trying to insert the same key again */
return node;
}
do {
++p;
bit = get_bit(key, keylen, p);
} while (node_bit(node, p)==bit);
break;
}
/* if instead we differ before reaching the end of the current prefix, we must split.
* we insert our node before the current one and re-attach it. */
if (node_bit(node, p)!=bit) {
break;
}
}
new_node = (trie_node *)malloc(sizeof(trie_node));
new_node->bitpos = p;
new_node->data = malloc(keylen+2+size);
new_node->data[0] = (char)keylen;
memcpy(new_node->data+1, key, keylen+1);
if (data!=NULL && size>0) {
/* if data is NULL then the user only wanted some space that they're going to write to later */
/* if size is 0 then the user is using the trie as a set, not a map */
memcpy(new_node->data+2+keylen, data, size);
}
if (bit) {
new_node->l = node;
new_node->r = new_node; /* loop the 1-bit to ourselves, search will end */
} else {
new_node->l = new_node; /* loop the 0-bit to ourselves, search will end */
new_node->r = node;
}
*insert_p = new_node;
return new_node;
}
void trie_remove(trie_node **root_p, trie_node *pos)
{
if (pos!=NULL) {
const char * key = trie_getkey(pos);
size_t keylen = pos->data[0];
trie_node ** node_p = root_p;
trie_node * node = *root_p;
while (node) {
int bit;
trie_node ** next_p;
trie_node * next;
if (node == pos) {
if (node->l==node) {
*node_p = node->r;
break;
} else if (node->r==node) {
*node_p = node->l;
break;
}
}
bit = get_bit(key, keylen, node->bitpos);
next_p = bit?&node->r:&node->l;
next = *next_p;
if (next == pos && next->bitpos<=node->bitpos) {
/* the element that has a back-pointer to pos gets swapped with pos */
char * data = pos->data;
pos->data = node->data;
node->data = data;
/* finally, find the back-pointer to node and set it to pos */
next_p = bit?&node->l:&node->r; /* NB: this is the OTHER child of node */
next = *next_p;
key = trie_getkey(node);
keylen = (unsigned int)node->data[0];
while (next) {
int new_bit;
if (next==node) {
*next_p = pos;
break;
}
new_bit = get_bit(key, keylen, next->bitpos);
next_p = new_bit?&next->r:&next->l;
next = *next_p;
}
*node_p = bit?node->l:node->r;
break;
}
node = *next_p;
node_p = next_p;
}
free(node->data);
free(node);
}
}
void trie_debug(trie_node * root)
{
const char * l = root->l?trie_getkey(root->l):"?";
const char * r = root->r?trie_getkey(root->r):"?";
printf("%s %d | %s | %s\n", trie_getkey(root), root->bitpos, l, r);
if (root->l && root->l->bitpos > root->bitpos) trie_debug(root->l);
if (root->r && root->r->bitpos > root->bitpos) trie_debug(root->r);
}
trie_node * trie_find(trie_node *root, const char *key)
{
trie_node * node = root;
size_t keylen = strlen(key);
while (node) {
int bit = get_bit(key, keylen, node->bitpos);
trie_node * next = bit?node->r:node->l;
if (next!=NULL) {
if (node->bitpos>=next->bitpos) {
if (keylen==(unsigned int)next->data[0] && strncmp(key, next->data+1, keylen)==0) {
return next;
}
next = NULL;
}
}
node = next;
}
return NULL;
}
trie_node * trie_find_prefix(trie_node *root, const char *key)
{
trie_node * node = root;
size_t keylen = strlen(key);
while (node) {
int bit = get_bit(key, keylen, node->bitpos);
trie_node * next = bit?node->r:node->l;
if (next!=NULL) {
if (node->bitpos>=next->bitpos) {
if (keylen<=(unsigned int)next->data[0] && strncmp(key, next->data+1, keylen)==0) {
return next;
}
next = NULL;
}
}
node = next;
}
return NULL;
}
void * trie_getdata(trie_node * node)
{
return node->data+2+node->data[0];
}
const char * trie_getkey(trie_node * node)
{
return node->data+1;
}
void trie_free(trie_node * root)
{
if (root) {
if (root->l && root->l->bitpos>root->bitpos) trie_free(root->l);
if (root->r && root->r->bitpos>root->bitpos) trie_free(root->r);
free(root);
}
}

View file

@ -0,0 +1,21 @@
#ifndef H_PATRICIA
#define H_PATRICIA
#ifdef __cplusplus
extern "C" {
#endif
typedef struct trie_node trie_node;
trie_node * trie_insert(trie_node **root, const char *key, const void *data, size_t size);
trie_node * trie_find(trie_node *root, const char *key);
void * trie_getdata(trie_node *node);
const char * trie_getkey(trie_node *node);
void trie_free(trie_node * root);
void trie_remove(trie_node **root_p, trie_node *pos);
void trie_debug(trie_node * root);
trie_node * trie_find_prefix(trie_node *root, const char *key);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -32,7 +32,7 @@
typedef struct tref { typedef struct tref {
struct tref * nexthash; struct tref * nexthash;
wint_t ucs; ucs4_t ucs;
struct tnode * node; struct tnode * node;
} tref; } tref;
@ -43,7 +43,7 @@ void
addtoken(tnode * root, const char * str, variant id) addtoken(tnode * root, const char * str, variant id)
{ {
static struct replace { static struct replace {
wint_t ucs; ucs4_t ucs;
const char str[3]; const char str[3];
} replace[] = { } replace[] = {
/* match lower-case (!) umlauts and others to transcriptions */ /* match lower-case (!) umlauts and others to transcriptions */
@ -63,7 +63,7 @@ addtoken(tnode * root, const char * str, variant id)
} else { } else {
tref * next; tref * next;
int ret, index, i = 0; int ret, index, i = 0;
wint_t ucs, lcs; ucs4_t ucs, lcs;
size_t len; size_t len;
ret = unicode_utf8_to_ucs4(&ucs, str, &len); ret = unicode_utf8_to_ucs4(&ucs, str, &len);
@ -84,10 +84,10 @@ addtoken(tnode * root, const char * str, variant id)
tnode * node = calloc(1, sizeof(tnode)); tnode * node = calloc(1, sizeof(tnode));
if (ucs<'a' || ucs>'z') { if (ucs<'a' || ucs>'z') {
lcs = towlower(ucs); lcs = towlower((wint_t)ucs);
} }
if (ucs==lcs) { if (ucs==lcs) {
ucs = towupper(ucs); ucs = towupper((wint_t)ucs);
} }
ref = malloc(sizeof(tref)); ref = malloc(sizeof(tref));
@ -136,7 +136,7 @@ findtoken(const tnode * tk, const char * str, variant* result)
do { do {
int index; int index;
const tref * ref; const tref * ref;
wint_t ucs; ucs4_t ucs;
size_t len; size_t len;
int ret = unicode_utf8_to_ucs4(&ucs, str, &len); int ret = unicode_utf8_to_ucs4(&ucs, str, &len);

View file

@ -14,13 +14,67 @@
#include <errno.h> #include <errno.h>
#include <wctype.h> #include <wctype.h>
#define B00000000 0x00
#define B10000000 0x80
#define B11000000 0xC0
#define B11100000 0xE0
#define B11110000 0xF0
#define B11111000 0xF8
#define B11111100 0xFC
#define B11111110 0xFE
#define B00111111 0x3F
#define B00011111 0x1F
#define B00001111 0x0F
#define B00000111 0x07
#define B00000011 0x03
#define B00000001 0x01
int int
unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen) unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip)
{
while (*ip) {
ucs4_t ucs = *ip;
ucs4_t low;
size_t size = 1;
if (ucs & 0x80) {
int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
if (ret!=0) {
return ret;
}
}
if (size>outlen) {
return ENOMEM;
}
low = towlower((wint_t)ucs);
if (low==ucs) {
memcpy(op, ip, size);
ip += size;
op += size;
outlen -=size;
} else {
ip += size;
unicode_ucs4_to_utf8(op, &size, low);
op += size;
outlen -=size;
}
}
if (outlen<=0) {
return ENOMEM;
}
*op = 0;
return 0;
}
int
unicode_latin1_to_utf8(utf8_t *out, size_t *outlen, const char *in, size_t *inlen)
{ {
int is = (int)*inlen; int is = (int)*inlen;
int os = (int)*outlen; int os = (int)*outlen;
const unsigned char * ip = in; const char * ip = in;
unsigned char * op = out; utf8_t * op = out;
while (ip-in<is) { while (ip-in<is) {
unsigned char c = *ip; unsigned char c = *ip;
@ -44,12 +98,12 @@ unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *
} }
int int
unicode_utf8_strcasecmp(const char * a, const char * b) unicode_utf8_strcasecmp(const utf8_t * a, const char * b)
{ {
while (*a && *b) { while (*a && *b) {
int ret; int ret;
size_t size; size_t size;
wint_t ucsa = *a, ucsb = *b; ucs4_t ucsa = *a, ucsb = *b;
if (ucsa & 0x80) { if (ucsa & 0x80) {
ret = unicode_utf8_to_ucs4(&ucsa, a, &size); ret = unicode_utf8_to_ucs4(&ucsa, a, &size);
@ -63,8 +117,8 @@ unicode_utf8_strcasecmp(const char * a, const char * b)
} else ++b; } else ++b;
if (ucsb!=ucsa) { if (ucsb!=ucsa) {
ucsb = towlower(ucsb); ucsb = towlower((wint_t)ucsb);
ucsa = towlower(ucsa); ucsa = towlower((wint_t)ucsa);
if (ucsb<ucsa) return 1; if (ucsb<ucsa) return 1;
if (ucsb>ucsa) return -1; if (ucsb>ucsa) return -1;
} }
@ -74,12 +128,73 @@ unicode_utf8_strcasecmp(const char * a, const char * b)
return 0; return 0;
} }
/* Convert a UCS-4 character to UTF-8. */
int
unicode_ucs4_to_utf8 (utf8_t *utf8_character, size_t *size, ucs4_t ucs4_character)
{
int utf8_bytes;
if (ucs4_character <= 0x0000007F) {
/* 0xxxxxxx */
utf8_bytes = 1;
utf8_character[0] = (char) ucs4_character;
}
else if (ucs4_character <= 0x000007FF) {
/* 110xxxxx 10xxxxxx */
utf8_bytes = 2;
utf8_character[0] = (char) ((ucs4_character >> 6) | B11000000);
utf8_character[1] = (char) ((ucs4_character & B00111111) | B10000000);
}
else if (ucs4_character <= 0x0000FFFF) {
/* 1110xxxx 10xxxxxx 10xxxxxx */
utf8_bytes = 3;
utf8_character[0] = (char) ((ucs4_character >> 12) | B11100000);
utf8_character[1] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
utf8_character[2] = (char) ((ucs4_character & B00111111) | B10000000);
}
else if (ucs4_character <= 0x001FFFFF) {
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
utf8_bytes = 4;
utf8_character[0] = (char) ((ucs4_character >> 18) | B11110000);
utf8_character[1] = (char) (((ucs4_character >> 12) & B00111111) | B10000000);
utf8_character[2] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
utf8_character[3] = (char) ((ucs4_character & B00111111) | B10000000);
}
else if (ucs4_character <= 0x03FFFFFF) {
/* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
utf8_bytes = 5;
utf8_character[0] = (char) ((ucs4_character >> 24) | B11111000);
utf8_character[1] = (char) (((ucs4_character >> 18) & B00111111) | B10000000);
utf8_character[2] = (char) (((ucs4_character >> 12) & B00111111) | B10000000);
utf8_character[3] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
utf8_character[4] = (char) ((ucs4_character & B00111111) | B10000000);
}
else if (ucs4_character <= 0x7FFFFFFF) {
/* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
utf8_bytes = 6;
utf8_character[0] = (char) ((ucs4_character >> 30) | B11111100);
utf8_character[1] = (char) (((ucs4_character >> 24) & B00111111) | B10000000);
utf8_character[2] = (char) (((ucs4_character >> 18) & B00111111) | B10000000);
utf8_character[3] = (char) (((ucs4_character >> 12) & B00111111) | B10000000);
utf8_character[4] = (char) (((ucs4_character >> 6) & B00111111) | B10000000);
utf8_character[5] = (char) ((ucs4_character & B00111111) | B10000000);
}
else {
return EILSEQ;
}
*size = utf8_bytes;
return 0;
}
/* Convert a UTF-8 encoded character to UCS-4. */ /* Convert a UTF-8 encoded character to UCS-4. */
int int
unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, unicode_utf8_to_ucs4(ucs4_t *ucs4_character, const utf8_t *utf8_string,
size_t *length) size_t *length)
{ {
unsigned char utf8_character = (unsigned char)utf8_string[0]; utf8_t utf8_character = utf8_string[0];
/* Is the character in the ASCII range? If so, just copy it to the /* Is the character in the ASCII range? If so, just copy it to the
output. */ output. */
@ -202,10 +317,10 @@ unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string,
/** Convert a UTF-8 encoded character to CP437. */ /** Convert a UTF-8 encoded character to CP437. */
int int
unicode_utf8_to_cp437(char *cp_character, const char *utf8_string, unicode_utf8_to_cp437(char *cp_character, const utf8_t *utf8_string,
size_t *length) size_t *length)
{ {
wint_t ucs4_character; ucs4_t ucs4_character;
int result; int result;
result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length); result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
@ -217,7 +332,7 @@ unicode_utf8_to_cp437(char *cp_character, const char *utf8_string,
if (ucs4_character<0x7F) { if (ucs4_character<0x7F) {
*cp_character = (char)ucs4_character; *cp_character = (char)ucs4_character;
} else { } else {
struct { wint_t ucs4; unsigned char cp437; } xref[160] = { struct { ucs4_t ucs4; unsigned char cp437; } xref[160] = {
{0x00A0, 255}, {0x00A1, 173}, {0x00A2, 155}, {0x00A3, 156}, {0x00A0, 255}, {0x00A1, 173}, {0x00A2, 155}, {0x00A3, 156},
{0x00A5, 157}, {0x00A7, 21}, {0x00AA, 166}, {0x00AB, 174}, {0x00A5, 157}, {0x00A7, 21}, {0x00AA, 166}, {0x00AB, 174},
{0x00AC, 170}, {0x00B0, 248}, {0x00B1, 241}, {0x00B2, 253}, {0x00AC, 170}, {0x00B0, 248}, {0x00B1, 241}, {0x00B2, 253},
@ -278,10 +393,10 @@ unicode_utf8_to_cp437(char *cp_character, const char *utf8_string,
/** Convert a UTF-8 encoded character to CP1252. */ /** Convert a UTF-8 encoded character to CP1252. */
int int
unicode_utf8_to_cp1252(char *cp_character, const char *utf8_string, unicode_utf8_to_cp1252(char *cp_character, const utf8_t *utf8_string,
size_t *length) size_t *length)
{ {
wint_t ucs4_character; ucs4_t ucs4_character;
int result; int result;
result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length); result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
@ -293,7 +408,7 @@ unicode_utf8_to_cp1252(char *cp_character, const char *utf8_string,
if (ucs4_character<=0x7F || ucs4_character>=0xA0) { if (ucs4_character<=0x7F || ucs4_character>=0xA0) {
*cp_character = (char)ucs4_character; *cp_character = (char)ucs4_character;
} else { } else {
struct { wint_t ucs4; unsigned char cp; } xref[] = { struct { ucs4_t ucs4; unsigned char cp; } xref[] = {
{0x20ac, 0x80}, {0x0081, 0x81}, {0x201a, 0x82}, {0x0192, 0x83}, {0x20ac, 0x80}, {0x0081, 0x81}, {0x201a, 0x82}, {0x0192, 0x83},
{0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86}, {0x2021, 0x87}, {0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86}, {0x2021, 0x87},
{0x02c6, 0x88}, {0x2030, 0x89}, {0x0160, 0x8a}, {0x2039, 0x8b}, {0x02c6, 0x88}, {0x2030, 0x89}, {0x0160, 0x8a}, {0x2039, 0x8b},

View file

@ -21,11 +21,16 @@ extern "C" {
#include <wchar.h> #include <wchar.h>
#define USE_UNICODE #define USE_UNICODE
extern int unicode_utf8_to_cp437(char *ucs4_character, const char *utf8_string, size_t *length); typedef unsigned long ucs4_t;
extern int unicode_utf8_to_cp1252(char *ucs4_character, const char *utf8_string, size_t *length); typedef char utf8_t;
extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length);
extern int unicode_utf8_strcasecmp(const char * a, const char * b); extern int unicode_utf8_to_cp437(char *result, const utf8_t *utf8_string, size_t *length);
extern int unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen); extern int unicode_utf8_to_cp1252(char *result, const utf8_t *utf8_string, size_t *length);
extern int unicode_utf8_to_ucs4(ucs4_t *result, const utf8_t *utf8_string, size_t *length);
extern int unicode_ucs4_to_utf8 (utf8_t *result, size_t *size, ucs4_t ucs4_character);
extern int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b);
extern int unicode_latin1_to_utf8(utf8_t *out, size_t *outlen, const char *in, size_t *inlen);
extern int unicode_utf8_tolower(utf8_t *out, size_t outlen, const utf8_t *in);
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -34,6 +34,7 @@ SHARED_BINDINGS =
<lua>spell.cpp <lua>spell.cpp
<lua>unit.cpp <lua>unit.cpp
<lua>item.cpp <lua>item.cpp
<lua>test.cpp
; ;
Library luabindings : $(SHARED_BINDINGS) ; Library luabindings : $(SHARED_BINDINGS) ;

View file

@ -16,6 +16,9 @@ extern void bind_event(struct lua_State * L);
extern void bind_message(struct lua_State * L); extern void bind_message(struct lua_State * L);
extern void bind_objects(struct lua_State * L); extern void bind_objects(struct lua_State * L);
/* test routines */
extern void bind_test(struct lua_State * L);
/* server only */ /* server only */
extern void bind_script(struct lua_State * L); extern void bind_script(struct lua_State * L);
extern void bind_gamecode(struct lua_State * L); extern void bind_gamecode(struct lua_State * L);

View file

@ -116,9 +116,12 @@ lua_setstring(const char * lname, const char * key, const char * str)
static const char * static const char *
lua_getstring(const char * lname, const char * key) lua_getstring(const char * lname, const char * key)
{ {
if (key) {
struct locale * lang = find_locale(lname); struct locale * lang = find_locale(lname);
return (const char*)locale_getstring(lang, key); return (const char*)locale_getstring(lang, key);
} }
return NULL;
}
#define ISLANDSIZE 20 #define ISLANDSIZE 20
#define TURNS_PER_ISLAND 4 #define TURNS_PER_ISLAND 4

49
src/eressea/lua/test.cpp Normal file
View file

@ -0,0 +1,49 @@
#include <config.h>
#include <kernel/eressea.h>
#include "bindings.h"
#include "list.h"
// Lua includes
#ifdef _MSC_VER
#pragma warning (push)
#pragma warning (disable: 4127)
#endif
#include <lua.hpp>
#include <luabind/luabind.hpp>
#include <luabind/iterator_policy.hpp>
#ifdef _MSC_VER
#pragma warning (pop)
#endif
using namespace luabind;
#include <util/language.h>
#include <kernel/skill.h>
static const char *
loc_getskill(const char * loc, const char * locstring)
{
struct locale * lang = find_locale(loc);
skill_t result = findskill(locstring, lang);
if (result==NOSKILL) return 0;
return skillnames[result];
}
static const char *
loc_getkeyword(const char * loc, const char * locstring)
{
struct locale * lang = find_locale(loc);
keyword_t result = findkeyword(locstring, lang);
if (result==NOKEYWORD) return 0;
return keywords[result];
}
void
bind_test(lua_State * L)
{
module(L, "test")[
def("loc_skill", &loc_getskill),
def("loc_keyword", &loc_getkeyword)
];
}

View file

@ -19,11 +19,6 @@
* permission from the authors. * permission from the authors.
*/ */
#define LOCALE_CHECK
#ifdef __LCC__
#undef LOCALE_CHECK
#endif
/* config includes */ /* config includes */
#include <config.h> #include <config.h>
#include <kernel/eressea.h> #include <kernel/eressea.h>
@ -312,7 +307,9 @@ lua_init(void)
bind_event(L); bind_event(L);
bind_message(L); bind_message(L);
bind_gamecode(L); bind_gamecode(L);
bind_gmtool(L); bind_gmtool(L);
bind_test(L);
return L; return L;
} }
@ -662,12 +659,6 @@ main(int argc, char *argv[])
lc_numeric = setlocale(LC_NUMERIC, "C"); lc_numeric = setlocale(LC_NUMERIC, "C");
if (lc_ctype) lc_ctype = strdup(lc_ctype); if (lc_ctype) lc_ctype = strdup(lc_ctype);
if (lc_numeric) lc_numeric = strdup(lc_numeric); if (lc_numeric) lc_numeric = strdup(lc_numeric);
#ifdef LOCALE_CHECK
if (!locale_check()) {
log_error(("The current locale is not suitable for international Eressea.\n"));
return -1;
}
#endif
lua_State * luaState = lua_init(); lua_State * luaState = lua_init();
global.vm_state = luaState; global.vm_state = luaState;

13
src/scripts/run-tests.lua Normal file
View file

@ -0,0 +1,13 @@
-- -*- coding: utf-8 -*-
function test_locales()
local skills = { "", "herb", "kraut", "Kräute", "Kraeut", "k", "kra", "MAGIE" }
for k,v in pairs(skills) do
str = test.loc_skill("de", v)
io.stdout:write(v, "\t", tostring(str), " ", tostring(get_string("de", "skill::" .. tostring(str))), "\n")
end
return 0
end
test_locales()
io.stdin:read("*line")