/* vi: set ts=2: * * * Eressea PB(E)M host Copyright (C) 1998-2003 * Christian Schlittchen (corwin@amber.kn-bremen.de) * Katja Zedel (katze@felidae.kn-bremen.de) * Henning Peters (faroul@beyond.kn-bremen.de) * Enno Rehling (enno@eressea-pbem.de) * Ingo Wilken (Ingo.Wilken@informatik.uni-oldenburg.de) * * based on: * * Atlantis v1.0 13 September 1993 Copyright 1993 by Russell Wallace * Atlantis v1.7 Copyright 1996 by Alex Schröder * * This program may not be used, modified or distributed without * prior permission by the authors of Eressea. * This program may not be sold or used commercially without prior written * permission from the authors. */ #include #include "umlaut.h" #include #include #include #include typedef struct tref { struct tref * nexthash; char c; struct tnode * node; } tref; #define LEAF 1 /* leaf node for a word. always matches */ #define SHARED 2 /* at least two words share the node */ void addtoken(tnode * root, const char* str, void * id) { static struct replace { char c; const char * str; } replace[] = { {'ä', "ae"}, {'Ä', "ae"}, {'ö', "oe"}, {'Ö', "oe"}, {'ü', "ue"}, {'Ü', "ue"}, {'ß', "ss"}, { 0, 0 } }; if (!*str) { root->id = id; root->flags |= LEAF; } else { tref * next; int index, i = 0; char c = *str; if (c<'a' || c>'z') c = (char)tolower((unsigned char)c); index = ((unsigned char)c) % NODEHASHSIZE; next = root->next[index]; if (!(root->flags & LEAF)) root->id = id; while (next && next->c != c) next = next->nexthash; if (!next) { tref * ref; char u = (char)toupper((unsigned char)c); tnode * node = calloc(1, sizeof(tnode)); ref = malloc(sizeof(tref)); ref->c = c; ref->node = node; ref->nexthash=root->next[index]; root->next[index] = ref; if (u!=c) { index = ((unsigned char)u) % NODEHASHSIZE; ref = malloc(sizeof(tref)); ref->c = u; ref->node = node; ref->nexthash = root->next[index]; root->next[index] = ref; } next=ref; } else { next->node->flags |= SHARED; if ((next->node->flags & LEAF) == 0) next->node->id = NULL; } addtoken(next->node, str+1, id); while (replace[i].str) { if (*str==replace[i].c) { char zText[1024]; strcat(strcpy(zText, replace[i].str), str+1); addtoken(root, zText, id); break; } ++i; } } } int findtoken(const tnode * tk, const char * str, void **result) { if (!str) return E_TOK_NOMATCH; if (*str == 0) return E_TOK_NOMATCH; while (*str) { int index; const tref * ref; char c = *str; /* if (c<'a' || c>'z') c = (char)tolower((unsigned char)c); */ index = ((unsigned char)c) % NODEHASHSIZE; ref = tk->next[index]; while (ref && ref->c!=c) ref = ref->nexthash; ++str; if (!ref) return E_TOK_NOMATCH; tk = ref->node; } if (tk) { *result = tk->id; return E_TOK_SUCCESS; } return E_TOK_NOMATCH; } #ifdef TEST_UMLAUT #include tnode root; int main(int argc, char ** argv) { char buf[1024]; int i = 0; for (;;) { int k; fgets(buf, sizeof(buf), stdin); buf[strlen(buf)-1]=0; if (findtoken(&root, buf, (void**)&k)==0) { printf("%s returned %d\n", buf, k); } else { addtoken(&root, buf, (void*)++i); printf("added %s=%d\n", buf, i); } } return 0; } #endif