diff --git a/src/common/gamecode/laws.c b/src/common/gamecode/laws.c index 855108559..f2152ac91 100644 --- a/src/common/gamecode/laws.c +++ b/src/common/gamecode/laws.c @@ -3775,7 +3775,7 @@ static void reset_rng(void) { static void reset_rng_region(region * r) { - rng_init(r->index); + rng_init(r->index+turn); } /** warn about passwords that are not US ASCII. diff --git a/src/common/gamecode/xmlreport.c b/src/common/gamecode/xmlreport.c index 66d7892bd..5af4a64cb 100644 --- a/src/common/gamecode/xmlreport.c +++ b/src/common/gamecode/xmlreport.c @@ -87,11 +87,11 @@ xml_s(const char * str) { static xmlChar buffer[1024]; const char * inbuf = str; - unsigned char * outbuf = buffer; + char * outbuf = (char *)buffer; size_t inbytes = strlen(str)+1; size_t outbytes = sizeof(buffer) - 1; - unicode_latin1_to_utf8(outbuf, &outbytes, (const xmlChar *)inbuf, &inbytes); + unicode_latin1_to_utf8(outbuf, &outbytes, inbuf, &inbytes); buffer[outbytes] = 0; return buffer; } diff --git a/src/common/kernel/eressea.c b/src/common/kernel/eressea.c index 0e20b54ed..029d0bd7f 100644 --- a/src/common/kernel/eressea.c +++ b/src/common/kernel/eressea.c @@ -68,6 +68,7 @@ #include #include #include +#include /* libxml includes */ #include @@ -83,6 +84,11 @@ #include #include +#define PTRIES 1 +#if PTRIES +#include +#endif + /* exported variables */ region *regions; faction *factions; @@ -1332,14 +1338,126 @@ findoption(const char *s, const struct locale * lang) return NODIRECTION; } +#if PTRIES +static struct trie_node * ptries[UT_MAX][4]; + +static struct trie_node ** +get_ptrie(const struct locale * lang, int type) +{ + int index = (strcmp(locale_name(lang), "de")==0); + return &(ptries[type][index]); +} + +static int +umlaut_substitution(const char * ip, char * op, size_t outlen) +{ +#define UMAX 7 + static struct replace { + ucs4_t ucs; + const char str[3]; + } replace[UMAX] = { + /* match lower-case (!) umlauts and others to transcriptions */ + { 223, "ss"}, /* szlig */ + { 228, "ae"}, /* auml */ + { 229, "aa"}, /* norsk */ + { 230, "ae"}, /* norsk */ + { 246, "oe"}, /* ouml */ + { 248, "oe"}, /* norsk */ + { 252, "ue"}, /* uuml */ + }; + int subs = 0; + while (*ip) { + ucs4_t ucs = *ip; + size_t size = 1; + size_t cpsize = 1; + + if (ucs & 0x80) { + int ret = unicode_utf8_to_ucs4(&ucs, ip, &size); + if (ret!=0) { + return ret; + } + cpsize = size; + if (ucs >= replace[0].ucs && ucs <= replace[UMAX-1].ucs) { + int i; + for (i=0;i!=UMAX;++i) { + if (replace[i].ucs==ucs) { + cpsize = 0; + memcpy(op, replace[i].str, 2); + op+=2; + ++subs; + break; + } + } + } + } + if (cpsize) { + if (cpsize>outlen) { + return -1; + } + memcpy(op, ip, cpsize); + } + + ip += size; + op += cpsize; + outlen -= cpsize; + } + + if (outlen<=0) { + return -1; + } + *op = 0; + return subs; +} + +static int +ptrie_find(struct trie_node *ptrie, const char * key, void * data, size_t size) +{ + trie_node * node = trie_find_prefix(ptrie, key); + if (node) { + void * result = trie_getdata(node); + memcpy(data, result, size); + return 0; + } + return -1; +} + +static int +ptrie_insert(struct trie_node **ptrie, const char * name, void * data, size_t size) +{ + char converted[256]; + char simple[256]; + int ret = unicode_utf8_tolower(converted, 256, name); + if (ret==0) { + int subs = umlaut_substitution(converted, simple, sizeof(simple)); + if (subs>0) { + trie_insert(ptrie, simple, data, size); + } + trie_insert(ptrie, converted, data, size); + } + return ret; +} +#endif + skill_t findskill(const char *s, const struct locale * lang) { +#if PTRIES + char lowercase[256]; + int res = unicode_utf8_tolower(lowercase, sizeof(lowercase), s); + if (res==0) { + trie_node ** ptrie = get_ptrie(lang, UT_SKILLS); + skill_t sk; + int result = ptrie_find(*ptrie, lowercase, &sk, sizeof(sk)); + if (result==0) return sk; + } + return NOSKILL; +#else struct tnode * tokens = get_translations(lang, UT_SKILLS); variant token; if (findtoken(tokens, s, &token)==E_TOK_NOMATCH) return NOSKILL; return (skill_t)token.i; +#endif } keyword_t @@ -1954,6 +2072,9 @@ init_locale(const struct locale * lang) const struct race * rc; struct tnode * tokens; const terrain_type * terrain; +#if PTRIES + trie_node ** ptrie; +#endif tokens = get_translations(lang, UT_MAGIC); for (i=0;i!=MAXMAGIETYP;++i) { @@ -1976,7 +2097,18 @@ init_locale(const struct locale * lang) var.i = i; addtoken(tokens, LOC(lang, parameters[i]), var); } - +#if PTRIES + ptrie = get_ptrie(lang, UT_SKILLS); + for (i=0;i!=MAXSKILLS;++i) { + if (i!=SK_TRADE || !TradeDisabled()) { + skill_t sk = (skill_t)i; + const char * skname = skillname(sk, lang); + if (skname!=NULL) { + ptrie_insert(ptrie, skname, &sk, sizeof(sk)); + } + } + } +#else tokens = get_translations(lang, UT_SKILLS); for (i=0;i!=MAXSKILLS;++i) { if (i!=SK_TRADE || !TradeDisabled()) { @@ -1987,6 +2119,7 @@ init_locale(const struct locale * lang) } } } +#endif tokens = get_translations(lang, UT_KEYWORDS); for (i=0;i!=MAXKEYWORDS;++i) { diff --git a/src/common/kernel/names.c b/src/common/kernel/names.c index a9b57f795..56c958ecb 100644 --- a/src/common/kernel/names.c +++ b/src/common/kernel/names.c @@ -384,7 +384,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars) char * bufp; unsigned int c = 0; size_t bpt, i; - wint_t ucs; + ucs4_t ucs; size_t size; int result; @@ -401,7 +401,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars) assert(result==0 || "damnit, we're not handling invalid input here!"); /* Leerzeichen überspringen */ - while (*p != 0 && !iswalnum(ucs)) { + while (*p != 0 && !iswalnum((wint_t)ucs)) { p += size; result = unicode_utf8_to_ucs4(&ucs, p, &size); assert(result==0 || "damnit, we're not handling invalid input here!"); @@ -411,7 +411,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars) if (*p != 0) ++c; /* alnums überspringen */ - while (*p != 0 && iswalnum(ucs)) { + while (*p != 0 && iswalnum((wint_t)ucs)) { p+=size; result = unicode_utf8_to_ucs4(&ucs, p, &size); assert(result==0 || "damnit, we're not handling invalid input here!"); @@ -434,7 +434,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars) while (*p != 0 && c < maxchars) { /* Leerzeichen überspringen */ - while (*p != 0 && !iswalnum(ucs)) { + while (*p != 0 && !iswalnum((wint_t)ucs)) { p+=size; result = unicode_utf8_to_ucs4(&ucs, p, &size); assert(result==0 || "damnit, we're not handling invalid input here!"); @@ -442,7 +442,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars) /* alnums übertragen */ - for (i = 0; i < bpt && *p != 0 && iswalnum(ucs); ++i) { + for (i = 0; i < bpt && *p != 0 && iswalnum((wint_t)ucs); ++i) { memcpy(bufp, p, size); p += size; bufp += size; @@ -454,7 +454,7 @@ abkz(const char *s, char * buf, size_t buflen, size_t maxchars) /* Bis zum nächsten Leerzeichen */ - while (c < maxchars && *p != 0 && iswalnum(ucs)) { + while (c < maxchars && *p != 0 && iswalnum((wint_t)ucs)) { p+=size; result = unicode_utf8_to_ucs4(&ucs, p, &size); assert(result==0 || "damnit, we're not handling invalid input here!"); diff --git a/src/common/kernel/save.c b/src/common/kernel/save.c index 1fb75a7b2..abbd79b03 100644 --- a/src/common/kernel/save.c +++ b/src/common/kernel/save.c @@ -178,7 +178,7 @@ freadstr(FILE * F, int encoding, char * start, size_t size) char inbuf = (char)c; size_t inbytes = 1; size_t outbytes = size-(str-start); - int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes); + int ret = unicode_latin1_to_utf8(str, &outbytes, &inbuf, &inbytes); if (ret>0) str+=ret; else { log_error(("input data was not iso-8859-1! assuming utf-8\n")); @@ -197,7 +197,7 @@ freadstr(FILE * F, int encoding, char * start, size_t size) char inbuf = (char)c; size_t inbytes = 1; size_t outbytes = size-(str-start); - int ret = unicode_latin1_to_utf8((xmlChar *)str, &outbytes, (const xmlChar *)&inbuf, &inbytes); + int ret = unicode_latin1_to_utf8(str, &outbytes, &inbuf, &inbytes); if (ret>0) str+=ret; else { log_error(("input data was not iso-8859-1! assuming utf-8\n")); diff --git a/src/common/modules/autoseed.c b/src/common/modules/autoseed.c index 88b853f0e..2561f9e5f 100644 --- a/src/common/modules/autoseed.c +++ b/src/common/modules/autoseed.c @@ -246,7 +246,7 @@ read_newfactions(const char * filename) char buffer[32]; size_t outbytes = sizeof(buffer) - 1; size_t inbytes = strlen(race); - unicode_latin1_to_utf8((unsigned char *)buffer, &outbytes, (const unsigned char *)race, &inbytes); + unicode_latin1_to_utf8(buffer, &outbytes, race, &inbytes); buffer[outbytes] = 0; nf->race = findrace(buffer, default_locale); if (nf->race==NULL) { diff --git a/src/common/util/filereader.c b/src/common/util/filereader.c index dbfb73e77..95e2436da 100644 --- a/src/common/util/filereader.c +++ b/src/common/util/filereader.c @@ -29,11 +29,11 @@ eatwhite(const char * ptr, size_t * total_size) *total_size = 0; while (*ptr) { - wint_t ucs; + ucs4_t ucs; size_t size = 0; ret = unicode_utf8_to_ucs4(&ucs, ptr, &size); if (ret!=0) break; - if (!iswspace(ucs)) break; + if (!iswspace((wint_t)ucs)) break; *total_size += size; ptr += size; } @@ -149,7 +149,7 @@ getbuf_latin1(FILE * F) char inbuf = (char)c; size_t inbytes = 1; size_t outbytes = MAXLINE-(cp-fbuf); - int ret = unicode_latin1_to_utf8((xmlChar *)cp, &outbytes, (const xmlChar *)&inbuf, &inbytes); + int ret = unicode_latin1_to_utf8(cp, &outbytes, &inbuf, &inbytes); if (ret>0) cp+=ret; else { log_error(("input data was not iso-8859-1! assuming utf-8\n")); @@ -213,7 +213,7 @@ getbuf_utf8(FILE * F) } cont = false; while (*bp && cpcurrent_token); while (*state->current_token) { - wint_t ucs; + ucs4_t ucs; size_t len; unsigned char utf8_character = (unsigned char)state->current_token[0]; @@ -104,7 +104,7 @@ skip_token(void) log_warning(("illegal character sequence in UTF8 string: %s\n", state->current_token)); } } - if (iswspace(ucs) && quotechar==0) { + if (iswspace((wint_t)ucs) && quotechar==0) { return; } else { switch(utf8_character) { @@ -134,7 +134,7 @@ parse_token(const char ** str) eatwhitespace_c(&ctoken); while (*ctoken && cursor-lbuf < MAXTOKENSIZE-1) { - wint_t ucs; + ucs4_t ucs; size_t len; boolean copy = false; @@ -152,7 +152,7 @@ parse_token(const char ** str) if (escape) { copy = true; escape = false; - } else if (iswspace(ucs)) { + } else if (iswspace((wint_t)ucs)) { if (quotechar==0) break; copy = true; } else if (utf8_character=='"' || utf8_character=='\'') { diff --git a/src/common/util/patricia.c b/src/common/util/patricia.c new file mode 100644 index 000000000..6208c34d1 --- /dev/null +++ b/src/common/util/patricia.c @@ -0,0 +1,236 @@ +#include + +#include +#include +#include + +#include "patricia.h" + +#define MAXKEYLEN 128 + +/* TODO: custom memory management to optimize cache layout, or use arrays. */ + +/* NOTE: The structure saves an extra 0 delimiter for the key. Technically + * this wouldn't be necessary (because we know its' length from data[0]), + * but it makes it possible for trie_getkey to return a key without making + * a copy or have a cumbersome (const char**, size_t*) interface. + * +-----------+-------------+------+------------+ + * data: | keylen(1) | key(keylen) | 0(1) | data(size) | + * +-----------+-------------+------+------------+ + */ + +struct trie_node { + struct trie_node *l, *r; + char * data; + unsigned int bitpos; +}; + +#if 1 +#define get_bit(c, s, p) (unsigned int)((((p)>>3)>(unsigned int)(s))?0:((c)[(p)>>3]>>((p)&7)&1)) +#else +unsigned int get_bit(const char * c, size_t s, unsigned int p) +{ + if ((p>>3)>=(unsigned int)s) return 0; + return ((c)[p>>3]>>(p&7)&1); +} +#endif +#define node_bit(n, p) get_bit((n)->data+1, (n)->data[0], (p)) + +trie_node * trie_insert(trie_node **root_p, const char * key, const void * data, size_t size) +{ + trie_node * new_node; + size_t keylen = strlen(key); + trie_node ** insert_p = root_p, *node = *insert_p; + unsigned int p, bit=0; + + assert(keylenbitpos) { + insert_p = bit?&node->r:&node->l; + node = *insert_p; + if (node==NULL) { + continue; + } + } + + /* if we are looking at a back-node, we need to add our node before it. */ + if (p>=node->bitpos) { + /* find the point p where both differ. */ + if (keylen==(unsigned int)node->data[0] && strncmp(key, node->data+1, keylen)==0) { + /* we are trying to insert the same key again */ + + return node; + } + do { + ++p; + bit = get_bit(key, keylen, p); + } while (node_bit(node, p)==bit); + break; + } + + /* if instead we differ before reaching the end of the current prefix, we must split. + * we insert our node before the current one and re-attach it. */ + if (node_bit(node, p)!=bit) { + break; + } + } + + new_node = (trie_node *)malloc(sizeof(trie_node)); + new_node->bitpos = p; + new_node->data = malloc(keylen+2+size); + new_node->data[0] = (char)keylen; + memcpy(new_node->data+1, key, keylen+1); + if (data!=NULL && size>0) { + /* if data is NULL then the user only wanted some space that they're going to write to later */ + /* if size is 0 then the user is using the trie as a set, not a map */ + memcpy(new_node->data+2+keylen, data, size); + } + if (bit) { + new_node->l = node; + new_node->r = new_node; /* loop the 1-bit to ourselves, search will end */ + } else { + new_node->l = new_node; /* loop the 0-bit to ourselves, search will end */ + new_node->r = node; + } + *insert_p = new_node; + return new_node; +} + +void trie_remove(trie_node **root_p, trie_node *pos) +{ + if (pos!=NULL) { + const char * key = trie_getkey(pos); + size_t keylen = pos->data[0]; + trie_node ** node_p = root_p; + trie_node * node = *root_p; + + while (node) { + int bit; + trie_node ** next_p; + trie_node * next; + + if (node == pos) { + if (node->l==node) { + *node_p = node->r; + break; + } else if (node->r==node) { + *node_p = node->l; + break; + } + } + + bit = get_bit(key, keylen, node->bitpos); + next_p = bit?&node->r:&node->l; + next = *next_p; + if (next == pos && next->bitpos<=node->bitpos) { + /* the element that has a back-pointer to pos gets swapped with pos */ + char * data = pos->data; + pos->data = node->data; + node->data = data; + + /* finally, find the back-pointer to node and set it to pos */ + next_p = bit?&node->l:&node->r; /* NB: this is the OTHER child of node */ + next = *next_p; + key = trie_getkey(node); + keylen = (unsigned int)node->data[0]; + while (next) { + int new_bit; + if (next==node) { + *next_p = pos; + break; + } + new_bit = get_bit(key, keylen, next->bitpos); + next_p = new_bit?&next->r:&next->l; + next = *next_p; + } + *node_p = bit?node->l:node->r; + break; + } + node = *next_p; + node_p = next_p; + } + free(node->data); + free(node); + } +} + +void trie_debug(trie_node * root) +{ + const char * l = root->l?trie_getkey(root->l):"?"; + const char * r = root->r?trie_getkey(root->r):"?"; + printf("%s %d | %s | %s\n", trie_getkey(root), root->bitpos, l, r); + if (root->l && root->l->bitpos > root->bitpos) trie_debug(root->l); + if (root->r && root->r->bitpos > root->bitpos) trie_debug(root->r); +} + +trie_node * trie_find(trie_node *root, const char *key) +{ + trie_node * node = root; + size_t keylen = strlen(key); + + while (node) { + int bit = get_bit(key, keylen, node->bitpos); + trie_node * next = bit?node->r:node->l; + + if (next!=NULL) { + if (node->bitpos>=next->bitpos) { + if (keylen==(unsigned int)next->data[0] && strncmp(key, next->data+1, keylen)==0) { + return next; + } + next = NULL; + } + } + node = next; + } + return NULL; +} + +trie_node * trie_find_prefix(trie_node *root, const char *key) +{ + trie_node * node = root; + size_t keylen = strlen(key); + + while (node) { + int bit = get_bit(key, keylen, node->bitpos); + trie_node * next = bit?node->r:node->l; + + if (next!=NULL) { + if (node->bitpos>=next->bitpos) { + if (keylen<=(unsigned int)next->data[0] && strncmp(key, next->data+1, keylen)==0) { + return next; + } + next = NULL; + } + } + node = next; + } + return NULL; +} + +void * trie_getdata(trie_node * node) +{ + return node->data+2+node->data[0]; +} + +const char * trie_getkey(trie_node * node) +{ + return node->data+1; +} + +void trie_free(trie_node * root) +{ + if (root) { + if (root->l && root->l->bitpos>root->bitpos) trie_free(root->l); + if (root->r && root->r->bitpos>root->bitpos) trie_free(root->r); + free(root); + } +} diff --git a/src/common/util/patricia.h b/src/common/util/patricia.h new file mode 100644 index 000000000..f01be96be --- /dev/null +++ b/src/common/util/patricia.h @@ -0,0 +1,21 @@ +#ifndef H_PATRICIA +#define H_PATRICIA +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct trie_node trie_node; + +trie_node * trie_insert(trie_node **root, const char *key, const void *data, size_t size); +trie_node * trie_find(trie_node *root, const char *key); +void * trie_getdata(trie_node *node); +const char * trie_getkey(trie_node *node); +void trie_free(trie_node * root); +void trie_remove(trie_node **root_p, trie_node *pos); +void trie_debug(trie_node * root); +trie_node * trie_find_prefix(trie_node *root, const char *key); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/common/util/umlaut.c b/src/common/util/umlaut.c index ffc2abcc5..cdd61290b 100644 --- a/src/common/util/umlaut.c +++ b/src/common/util/umlaut.c @@ -32,7 +32,7 @@ typedef struct tref { struct tref * nexthash; - wint_t ucs; + ucs4_t ucs; struct tnode * node; } tref; @@ -43,7 +43,7 @@ void addtoken(tnode * root, const char * str, variant id) { static struct replace { - wint_t ucs; + ucs4_t ucs; const char str[3]; } replace[] = { /* match lower-case (!) umlauts and others to transcriptions */ @@ -63,7 +63,7 @@ addtoken(tnode * root, const char * str, variant id) } else { tref * next; int ret, index, i = 0; - wint_t ucs, lcs; + ucs4_t ucs, lcs; size_t len; ret = unicode_utf8_to_ucs4(&ucs, str, &len); @@ -84,10 +84,10 @@ addtoken(tnode * root, const char * str, variant id) tnode * node = calloc(1, sizeof(tnode)); if (ucs<'a' || ucs>'z') { - lcs = towlower(ucs); + lcs = towlower((wint_t)ucs); } if (ucs==lcs) { - ucs = towupper(ucs); + ucs = towupper((wint_t)ucs); } ref = malloc(sizeof(tref)); @@ -136,7 +136,7 @@ findtoken(const tnode * tk, const char * str, variant* result) do { int index; const tref * ref; - wint_t ucs; + ucs4_t ucs; size_t len; int ret = unicode_utf8_to_ucs4(&ucs, str, &len); diff --git a/src/common/util/unicode.c b/src/common/util/unicode.c index 799640fc1..09a91dfc9 100644 --- a/src/common/util/unicode.c +++ b/src/common/util/unicode.c @@ -14,13 +14,67 @@ #include #include +#define B00000000 0x00 +#define B10000000 0x80 +#define B11000000 0xC0 +#define B11100000 0xE0 +#define B11110000 0xF0 +#define B11111000 0xF8 +#define B11111100 0xFC +#define B11111110 0xFE + +#define B00111111 0x3F +#define B00011111 0x1F +#define B00001111 0x0F +#define B00000111 0x07 +#define B00000011 0x03 +#define B00000001 0x01 + int -unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen) +unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip) +{ + while (*ip) { + ucs4_t ucs = *ip; + ucs4_t low; + size_t size = 1; + + if (ucs & 0x80) { + int ret = unicode_utf8_to_ucs4(&ucs, ip, &size); + if (ret!=0) { + return ret; + } + } + if (size>outlen) { + return ENOMEM; + } + low = towlower((wint_t)ucs); + if (low==ucs) { + memcpy(op, ip, size); + ip += size; + op += size; + outlen -=size; + } else { + ip += size; + unicode_ucs4_to_utf8(op, &size, low); + op += size; + outlen -=size; + } + } + + if (outlen<=0) { + return ENOMEM; + } + *op = 0; + return 0; +} + +int +unicode_latin1_to_utf8(utf8_t *out, size_t *outlen, const char *in, size_t *inlen) { int is = (int)*inlen; int os = (int)*outlen; - const unsigned char * ip = in; - unsigned char * op = out; + const char * ip = in; + utf8_t * op = out; while (ip-inucsa) return -1; } @@ -74,12 +128,73 @@ unicode_utf8_strcasecmp(const char * a, const char * b) return 0; } +/* Convert a UCS-4 character to UTF-8. */ +int +unicode_ucs4_to_utf8 (utf8_t *utf8_character, size_t *size, ucs4_t ucs4_character) +{ + int utf8_bytes; + + if (ucs4_character <= 0x0000007F) { + /* 0xxxxxxx */ + utf8_bytes = 1; + utf8_character[0] = (char) ucs4_character; + } + else if (ucs4_character <= 0x000007FF) { + /* 110xxxxx 10xxxxxx */ + utf8_bytes = 2; + utf8_character[0] = (char) ((ucs4_character >> 6) | B11000000); + utf8_character[1] = (char) ((ucs4_character & B00111111) | B10000000); + } + else if (ucs4_character <= 0x0000FFFF) { + /* 1110xxxx 10xxxxxx 10xxxxxx */ + utf8_bytes = 3; + utf8_character[0] = (char) ((ucs4_character >> 12) | B11100000); + utf8_character[1] = (char) (((ucs4_character >> 6) & B00111111) | B10000000); + utf8_character[2] = (char) ((ucs4_character & B00111111) | B10000000); + } + else if (ucs4_character <= 0x001FFFFF) { + /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8_bytes = 4; + utf8_character[0] = (char) ((ucs4_character >> 18) | B11110000); + utf8_character[1] = (char) (((ucs4_character >> 12) & B00111111) | B10000000); + utf8_character[2] = (char) (((ucs4_character >> 6) & B00111111) | B10000000); + utf8_character[3] = (char) ((ucs4_character & B00111111) | B10000000); + } + else if (ucs4_character <= 0x03FFFFFF) { + /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8_bytes = 5; + utf8_character[0] = (char) ((ucs4_character >> 24) | B11111000); + utf8_character[1] = (char) (((ucs4_character >> 18) & B00111111) | B10000000); + utf8_character[2] = (char) (((ucs4_character >> 12) & B00111111) | B10000000); + utf8_character[3] = (char) (((ucs4_character >> 6) & B00111111) | B10000000); + utf8_character[4] = (char) ((ucs4_character & B00111111) | B10000000); + } + else if (ucs4_character <= 0x7FFFFFFF) { + /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8_bytes = 6; + utf8_character[0] = (char) ((ucs4_character >> 30) | B11111100); + utf8_character[1] = (char) (((ucs4_character >> 24) & B00111111) | B10000000); + utf8_character[2] = (char) (((ucs4_character >> 18) & B00111111) | B10000000); + utf8_character[3] = (char) (((ucs4_character >> 12) & B00111111) | B10000000); + utf8_character[4] = (char) (((ucs4_character >> 6) & B00111111) | B10000000); + utf8_character[5] = (char) ((ucs4_character & B00111111) | B10000000); + } + else { + return EILSEQ; + } + + *size = utf8_bytes; + + return 0; +} + + /* Convert a UTF-8 encoded character to UCS-4. */ int -unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, +unicode_utf8_to_ucs4(ucs4_t *ucs4_character, const utf8_t *utf8_string, size_t *length) { - unsigned char utf8_character = (unsigned char)utf8_string[0]; + utf8_t utf8_character = utf8_string[0]; /* Is the character in the ASCII range? If so, just copy it to the output. */ @@ -202,10 +317,10 @@ unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, /** Convert a UTF-8 encoded character to CP437. */ int -unicode_utf8_to_cp437(char *cp_character, const char *utf8_string, +unicode_utf8_to_cp437(char *cp_character, const utf8_t *utf8_string, size_t *length) { - wint_t ucs4_character; + ucs4_t ucs4_character; int result; result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length); @@ -217,7 +332,7 @@ unicode_utf8_to_cp437(char *cp_character, const char *utf8_string, if (ucs4_character<0x7F) { *cp_character = (char)ucs4_character; } else { - struct { wint_t ucs4; unsigned char cp437; } xref[160] = { + struct { ucs4_t ucs4; unsigned char cp437; } xref[160] = { {0x00A0, 255}, {0x00A1, 173}, {0x00A2, 155}, {0x00A3, 156}, {0x00A5, 157}, {0x00A7, 21}, {0x00AA, 166}, {0x00AB, 174}, {0x00AC, 170}, {0x00B0, 248}, {0x00B1, 241}, {0x00B2, 253}, @@ -278,10 +393,10 @@ unicode_utf8_to_cp437(char *cp_character, const char *utf8_string, /** Convert a UTF-8 encoded character to CP1252. */ int -unicode_utf8_to_cp1252(char *cp_character, const char *utf8_string, +unicode_utf8_to_cp1252(char *cp_character, const utf8_t *utf8_string, size_t *length) { - wint_t ucs4_character; + ucs4_t ucs4_character; int result; result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length); @@ -293,7 +408,7 @@ unicode_utf8_to_cp1252(char *cp_character, const char *utf8_string, if (ucs4_character<=0x7F || ucs4_character>=0xA0) { *cp_character = (char)ucs4_character; } else { - struct { wint_t ucs4; unsigned char cp; } xref[] = { + struct { ucs4_t ucs4; unsigned char cp; } xref[] = { {0x20ac, 0x80}, {0x0081, 0x81}, {0x201a, 0x82}, {0x0192, 0x83}, {0x201e, 0x84}, {0x2026, 0x85}, {0x2020, 0x86}, {0x2021, 0x87}, {0x02c6, 0x88}, {0x2030, 0x89}, {0x0160, 0x8a}, {0x2039, 0x8b}, diff --git a/src/common/util/unicode.h b/src/common/util/unicode.h index fde7467b1..5238c00cb 100644 --- a/src/common/util/unicode.h +++ b/src/common/util/unicode.h @@ -21,11 +21,16 @@ extern "C" { #include #define USE_UNICODE - extern int unicode_utf8_to_cp437(char *ucs4_character, const char *utf8_string, size_t *length); - extern int unicode_utf8_to_cp1252(char *ucs4_character, const char *utf8_string, size_t *length); - extern int unicode_utf8_to_ucs4(wint_t *ucs4_character, const char *utf8_string, size_t *length); - extern int unicode_utf8_strcasecmp(const char * a, const char * b); - extern int unicode_latin1_to_utf8(unsigned char *out, size_t *outlen, const unsigned char *in, size_t *inlen); + typedef unsigned long ucs4_t; + typedef char utf8_t; + + extern int unicode_utf8_to_cp437(char *result, const utf8_t *utf8_string, size_t *length); + extern int unicode_utf8_to_cp1252(char *result, const utf8_t *utf8_string, size_t *length); + extern int unicode_utf8_to_ucs4(ucs4_t *result, const utf8_t *utf8_string, size_t *length); + extern int unicode_ucs4_to_utf8 (utf8_t *result, size_t *size, ucs4_t ucs4_character); + extern int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); + extern int unicode_latin1_to_utf8(utf8_t *out, size_t *outlen, const char *in, size_t *inlen); + extern int unicode_utf8_tolower(utf8_t *out, size_t outlen, const utf8_t *in); #ifdef __cplusplus } diff --git a/src/eressea/Jamfile b/src/eressea/Jamfile index 08fead386..d0b557f80 100644 --- a/src/eressea/Jamfile +++ b/src/eressea/Jamfile @@ -34,6 +34,7 @@ SHARED_BINDINGS = spell.cpp unit.cpp item.cpp + test.cpp ; Library luabindings : $(SHARED_BINDINGS) ; diff --git a/src/eressea/lua/bindings.h b/src/eressea/lua/bindings.h index 6c461af64..2dd5dc863 100644 --- a/src/eressea/lua/bindings.h +++ b/src/eressea/lua/bindings.h @@ -16,6 +16,9 @@ extern void bind_event(struct lua_State * L); extern void bind_message(struct lua_State * L); extern void bind_objects(struct lua_State * L); +/* test routines */ +extern void bind_test(struct lua_State * L); + /* server only */ extern void bind_script(struct lua_State * L); extern void bind_gamecode(struct lua_State * L); diff --git a/src/eressea/lua/eressea.cpp b/src/eressea/lua/eressea.cpp index d9a76fbbd..7232844c1 100644 --- a/src/eressea/lua/eressea.cpp +++ b/src/eressea/lua/eressea.cpp @@ -116,8 +116,11 @@ lua_setstring(const char * lname, const char * key, const char * str) static const char * lua_getstring(const char * lname, const char * key) { - struct locale * lang = find_locale(lname); - return (const char*)locale_getstring(lang, key); + if (key) { + struct locale * lang = find_locale(lname); + return (const char*)locale_getstring(lang, key); + } + return NULL; } #define ISLANDSIZE 20 diff --git a/src/eressea/lua/test.cpp b/src/eressea/lua/test.cpp new file mode 100644 index 000000000..cd4af295b --- /dev/null +++ b/src/eressea/lua/test.cpp @@ -0,0 +1,49 @@ +#include +#include + +#include "bindings.h" +#include "list.h" + +// Lua includes +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable: 4127) +#endif +#include +#include +#include +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +using namespace luabind; + +#include +#include + +static const char * +loc_getskill(const char * loc, const char * locstring) +{ + struct locale * lang = find_locale(loc); + skill_t result = findskill(locstring, lang); + if (result==NOSKILL) return 0; + return skillnames[result]; +} + +static const char * +loc_getkeyword(const char * loc, const char * locstring) +{ + struct locale * lang = find_locale(loc); + keyword_t result = findkeyword(locstring, lang); + if (result==NOKEYWORD) return 0; + return keywords[result]; +} + +void +bind_test(lua_State * L) +{ + module(L, "test")[ + def("loc_skill", &loc_getskill), + def("loc_keyword", &loc_getkeyword) + ]; +} diff --git a/src/eressea/server.cpp b/src/eressea/server.cpp index 41de21ca8..002b270e9 100644 --- a/src/eressea/server.cpp +++ b/src/eressea/server.cpp @@ -19,11 +19,6 @@ * permission from the authors. */ -#define LOCALE_CHECK -#ifdef __LCC__ -#undef LOCALE_CHECK -#endif - /* config includes */ #include #include @@ -312,7 +307,9 @@ lua_init(void) bind_event(L); bind_message(L); bind_gamecode(L); + bind_gmtool(L); + bind_test(L); return L; } @@ -662,12 +659,6 @@ main(int argc, char *argv[]) lc_numeric = setlocale(LC_NUMERIC, "C"); if (lc_ctype) lc_ctype = strdup(lc_ctype); if (lc_numeric) lc_numeric = strdup(lc_numeric); -#ifdef LOCALE_CHECK - if (!locale_check()) { - log_error(("The current locale is not suitable for international Eressea.\n")); - return -1; - } -#endif lua_State * luaState = lua_init(); global.vm_state = luaState; diff --git a/src/scripts/run-tests.lua b/src/scripts/run-tests.lua new file mode 100644 index 000000000..f3d15e5f1 --- /dev/null +++ b/src/scripts/run-tests.lua @@ -0,0 +1,13 @@ +-- -*- coding: utf-8 -*- + +function test_locales() + local skills = { "", "herb", "kraut", "Kräute", "Kraeut", "k", "kra", "MAGIE" } + for k,v in pairs(skills) do + str = test.loc_skill("de", v) + io.stdout:write(v, "\t", tostring(str), " ", tostring(get_string("de", "skill::" .. tostring(str))), "\n") + end + return 0 +end + +test_locales() +io.stdin:read("*line")