server/src/common/util/umlaut.c

163 lines
3.4 KiB
C
Raw Normal View History

2001-01-25 10:37:55 +01:00
/* vi: set ts=2:
*
*
* Eressea PB(E)M host Copyright (C) 1998-2003
2001-01-25 10:37:55 +01:00
* Christian Schlittchen (corwin@amber.kn-bremen.de)
* Katja Zedel (katze@felidae.kn-bremen.de)
* Henning Peters (faroul@beyond.kn-bremen.de)
* Enno Rehling (enno@eressea-pbem.de)
* Ingo Wilken (Ingo.Wilken@informatik.uni-oldenburg.de)
*
* based on:
*
* Atlantis v1.0 13 September 1993 Copyright 1993 by Russell Wallace
* Atlantis v1.7 Copyright 1996 by Alex Schr<EFBFBD>der
*
* This program may not be used, modified or distributed without
* prior permission by the authors of Eressea.
* This program may not be sold or used commercially without prior written
* permission from the authors.
*/
#include <config.h>
#include "umlaut.h"
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
typedef struct tref {
struct tref * nexthash;
char c;
struct tnode * node;
} tref;
#define LEAF 1 /* leaf node for a word. always matches */
#define SHARED 2 /* at least two words share the node */
2001-01-25 10:37:55 +01:00
void
addtoken(tnode * root, const char* str, variant id)
2001-01-25 10:37:55 +01:00
{
static struct replace {
char c;
const char * str;
} replace[] = {
{'<EFBFBD>', "ae"},
{'<EFBFBD>', "ae"},
{'<EFBFBD>', "oe"},
{'<EFBFBD>', "oe"},
{'<EFBFBD>', "ue"},
{'<EFBFBD>', "ue"},
{'<EFBFBD>', "ss"},
{ 0, 0 }
};
if (!*str) {
root->id = id;
root->flags |= LEAF;
2001-01-25 10:37:55 +01:00
} else {
tref * next;
2001-02-13 03:58:51 +01:00
int index, i = 0;
register char c = *str;
2001-02-13 03:58:51 +01:00
if (c<'a' || c>'z') c = (char)tolower((unsigned char)c);
#if NODEHASHSIZE == 8
index = c & 7;
#else
index = ((unsigned char)c) % NODEHASHSIZE;
#endif
next = root->next[index];
if (!(root->flags & LEAF)) root->id = id;
while (next && next->c != c) next = next->nexthash;
if (!next) {
tref * ref;
char u = (char)toupper((unsigned char)c);
tnode * node = calloc(1, sizeof(tnode));
ref = malloc(sizeof(tref));
ref->c = c;
ref->node = node;
ref->nexthash=root->next[index];
root->next[index] = ref;
if (u!=c) {
#if NODEHASHSIZE == 8
index = u & 7;
#else
index = ((unsigned char)u) % NODEHASHSIZE;
#endif
ref = malloc(sizeof(tref));
ref->c = u;
ref->node = node;
ref->nexthash = root->next[index];
root->next[index] = ref;
}
next=ref;
} else {
next->node->flags |= SHARED;
if ((next->node->flags & LEAF) == 0) next->node->id.v = NULL; /* why?*/
2001-01-25 10:37:55 +01:00
}
addtoken(next->node, str+1, id);
2001-01-25 10:37:55 +01:00
while (replace[i].str) {
if (*str==replace[i].c) {
char zText[1024];
2001-02-10 11:40:12 +01:00
strcat(strcpy(zText, replace[i].str), str+1);
addtoken(root, zText, id);
2001-01-25 10:37:55 +01:00
break;
}
++i;
}
}
}
int
findtoken(const tnode * tk, const char * str, variant* result)
2001-01-25 10:37:55 +01:00
{
if (!str || *str==0) return E_TOK_NOMATCH;
2001-01-25 10:37:55 +01:00
do {
2001-02-13 03:58:51 +01:00
int index;
2001-04-26 19:41:06 +02:00
const tref * ref;
2001-02-13 03:58:51 +01:00
char c = *str;
#if NODEHASHSIZE == 8
index = c & 7;
#else
index = ((unsigned char)c) % NODEHASHSIZE;
#endif
ref = tk->next[index];
while (ref && ref->c!=c) ref = ref->nexthash;
2001-01-25 10:37:55 +01:00
++str;
if (!ref) return E_TOK_NOMATCH;
tk = ref->node;
} while (*str);
2001-02-17 16:02:50 +01:00
if (tk) {
*result = tk->id;
return E_TOK_SUCCESS;
2001-01-25 10:37:55 +01:00
}
return E_TOK_NOMATCH;
2001-01-25 10:37:55 +01:00
}
#ifdef TEST_UMLAUT
#include <stdio.h>
tnode root;
int
main(int argc, char ** argv)
{
char buf[1024];
int i = 0;
for (;;) {
int k;
fgets(buf, sizeof(buf), stdin);
buf[strlen(buf)-1]=0;
if (findtoken(&root, buf, (void**)&k)==0) {
printf("%s returned %d\n", buf, k);
} else {
addtoken(&root, buf, (void*)++i);
printf("added %s=%d\n", buf, i);
}
}
return 0;
}
#endif