From 6c6c1174d85fca90e42bcdf5a099d9976356841b Mon Sep 17 00:00:00 2001
From: Enno Rehling <enno@eressea.de>
Date: Wed, 25 Jan 2006 20:40:19 +0000
Subject: [PATCH] Speeding up the hashing in findtoken even more.

---
 src/common/util/umlaut.c | 40 +++++++++++++---------------------------
 src/common/util/umlaut.h |  2 +-
 2 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/src/common/util/umlaut.c b/src/common/util/umlaut.c
index f41a8105e..d35219eff 100644
--- a/src/common/util/umlaut.c
+++ b/src/common/util/umlaut.c
@@ -36,21 +36,6 @@ typedef struct tref {
 #define LEAF 1 /* leaf node for a word. always matches */
 #define SHARED 2 /* at least two words share the node */
 
-#if NODEHASHSIZE == 7
-/* lookup table, making c % 7 faster for chars. is this sick or what? */
-static int divc7[256] = { 
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
-  0,1,2,3 };
-#endif
-
 void
 addtoken(tnode * root, const char* str, variant id)
 {
@@ -73,10 +58,10 @@ addtoken(tnode * root, const char* str, variant id)
 	} else {
 		tref * next;
 		int index, i = 0;
-		char c = *str;
+		register char c = *str;
 		if (c<'a' || c>'z') c = (char)tolower((unsigned char)c);
-#if NODEHASHSIZE == 7
-		index = divc7[(unsigned char)c];
+#if NODEHASHSIZE == 8
+		index = c & 7;
 #else
     index = ((unsigned char)c) % NODEHASHSIZE;
 #endif
@@ -95,7 +80,11 @@ addtoken(tnode * root, const char* str, variant id)
 			root->next[index] = ref;
 			
 			if (u!=c) {
-				index = ((unsigned char)u) % NODEHASHSIZE;
+#if NODEHASHSIZE == 8
+        index = u & 7;
+#else
+        index = ((unsigned char)u) % NODEHASHSIZE;
+#endif
 				ref = malloc(sizeof(tref));
 				ref->c = u;
 				ref->node = node;
@@ -123,18 +112,15 @@ addtoken(tnode * root, const char* str, variant id)
 int
 findtoken(const tnode * tk, const char * str, variant* result)
 {
-	if (!str) return E_TOK_NOMATCH;
-	if (*str == 0) return E_TOK_NOMATCH;
+	if (!str || *str==0) return E_TOK_NOMATCH;
 
-	while (*str) {
+	do {
 		int index;
 		const tref * ref;
 		char c = *str;
 
-/*		if (c<'a' || c>'z') c = (char)tolower((unsigned char)c); */
-
-#if NODEHASHSIZE == 7
-    index = divc7[(unsigned char)c];
+#if NODEHASHSIZE == 8
+    index = c & 7;
 #else
     index = ((unsigned char)c) % NODEHASHSIZE;
 #endif
@@ -143,7 +129,7 @@ findtoken(const tnode * tk, const char * str, variant* result)
 		++str;
 		if (!ref) return E_TOK_NOMATCH;
 		tk = ref->node;
-	}
+	} while (*str);
 	if (tk) {
 		*result = tk->id;
 		return E_TOK_SUCCESS;
diff --git a/src/common/util/umlaut.h b/src/common/util/umlaut.h
index b753081c8..35fc121e9 100644
--- a/src/common/util/umlaut.h
+++ b/src/common/util/umlaut.h
@@ -23,7 +23,7 @@ extern "C" {
 
 #define E_TOK_NOMATCH (-1)
 #define E_TOK_SUCCESS 0
-#define NODEHASHSIZE 7
+#define NODEHASHSIZE 8
 struct tref;
 
 typedef struct tnode {