diff --git a/src/summary.c b/src/summary.c
index 06dabba90..1e3ddbedf 100644
--- a/src/summary.c
+++ b/src/summary.c
@@ -177,11 +177,11 @@ static int count_umlaut(const char *s)
     int result = 0;
     const char *cp;
     for (cp = s; *cp; ++cp) {
-        ucs4_t ucs = *cp;
-        if (ucs & 0x80) {
+        wint_t wc = *cp;
+        if (wc & 0x80) {
             size_t size;
             int err;
-            err = unicode_utf8_to_ucs4(&ucs, cp, &size);
+            err = unicode_utf8_decode(&wc, cp, &size);
             if (err != 0) {
                 log_error("illegal utf8 encoding %s at %s", s, cp);
                 return result;
diff --git a/src/util/filereader.c b/src/util/filereader.c
index c4a3b56cc..5973c8f1c 100644
--- a/src/util/filereader.c
+++ b/src/util/filereader.c
@@ -26,12 +26,12 @@ static int eatwhite(const char *ptr, size_t * total_size)
     *total_size = 0;
 
     while (*ptr) {
-        ucs4_t ucs;
+        wint_t wc;
         size_t size = 0;
-        ret = unicode_utf8_to_ucs4(&ucs, ptr, &size);
+        ret = unicode_utf8_decode(&wc, ptr, &size);
         if (ret != 0)
             break;
-        if (!iswspace((wint_t)ucs))
+        if (!iswspace(wc))
             break;
         *total_size += size;
         ptr += size;
@@ -86,7 +86,7 @@ static const char *getbuf_utf8(FILE * F)
         }
         cont = false;
         while (*bp && cp < fbuf + MAXLINE) {
-            ucs4_t ucs;
+            wint_t wc;
             size_t size;
             int ret;
 
@@ -119,14 +119,14 @@ static const char *getbuf_utf8(FILE * F)
                 }
             }
 
-            ret = unicode_utf8_to_ucs4(&ucs, bp, &size);
+            ret = unicode_utf8_decode(&wc, bp, &size);
 
             if (ret != 0) {
                 unicode_warning(bp);
                 break;
             }
 
-            if (iswspace((wint_t)ucs)) {
+            if (iswspace(wc)) {
                 if (!quote) {
                     bp += size;
                     ret = eatwhite(bp, &size);
@@ -151,7 +151,7 @@ static const char *getbuf_utf8(FILE * F)
                     bp += size;
                 }
             }
-            else if (iswcntrl((wint_t)ucs)) {
+            else if (iswcntrl(wc)) {
                 if (!comment && cp < fbuf + MAXLINE) {
                     *cp++ = '?';
                 }
diff --git a/src/util/parser.c b/src/util/parser.c
index bd187b5f8..e66e2b724 100644
--- a/src/util/parser.c
+++ b/src/util/parser.c
@@ -27,7 +27,7 @@ static parse_state *states;
 static int eatwhitespace_c(const char **str_p)
 {
     int ret = 0;
-    ucs4_t ucs;
+    wint_t wc;
     size_t len;
     const char *str = *str_p;
 
@@ -40,12 +40,12 @@ static int eatwhitespace_c(const char **str_p)
             ++str;
         }
         else {
-            ret = unicode_utf8_to_ucs4(&ucs, str, &len);
+            ret = unicode_utf8_decode(&wc, str, &len);
             if (ret != 0) {
                 log_warning("illegal character sequence in UTF8 string: %s\n", str);
                 break;
             }
-            if (!iswspace((wint_t)ucs))
+            if (!iswspace(wc))
                 break;
             str += len;
         }
@@ -106,16 +106,16 @@ void skip_token(void)
     eatwhitespace_c(&states->current_token);
 
     while (*states->current_token) {
-        ucs4_t ucs;
+        wint_t wc;
         size_t len;
 
         unsigned char utf8_character = (unsigned char)states->current_token[0];
         if (~utf8_character & 0x80) {
-            ucs = utf8_character;
+            wc = utf8_character;
             ++states->current_token;
         }
         else {
-            int ret = unicode_utf8_to_ucs4(&ucs, states->current_token, &len);
+            int ret = unicode_utf8_decode(&wc, states->current_token, &len);
             if (ret == 0) {
                 states->current_token += len;
             }
@@ -123,7 +123,7 @@ void skip_token(void)
                 log_warning("illegal character sequence in UTF8 string: %s\n", states->current_token);
             }
         }
-        if (iswspace((wint_t)ucs) && quotechar == 0) {
+        if (iswspace(wc) && quotechar == 0) {
             return;
         }
         else {
@@ -160,17 +160,17 @@ char *parse_token(const char **str, char *lbuf, size_t buflen)
         return 0;
     }
     while (*ctoken) {
-        ucs4_t ucs;
+        wint_t wc;
         size_t len;
         bool copy = false;
 
         unsigned char utf8_character = *(unsigned char *)ctoken;
         if (~utf8_character & 0x80) {
-            ucs = utf8_character;
+            wc = utf8_character;
             len = 1;
         }
         else {
-            int ret = unicode_utf8_to_ucs4(&ucs, ctoken, &len);
+            int ret = unicode_utf8_decode(&wc, ctoken, &len);
             if (ret != 0) {
                 log_warning("illegal character sequence in UTF8 string: %s\n", ctoken);
                 break;
@@ -180,7 +180,7 @@ char *parse_token(const char **str, char *lbuf, size_t buflen)
             copy = true;
             escape = false;
         }
-        else if (iswspace((wint_t)ucs)) {
+        else if (iswspace(wc)) {
             if (quotechar == 0)
                 break;
             copy = true;
diff --git a/src/util/umlaut.c b/src/util/umlaut.c
index 5f135df69..33facc0aa 100644
--- a/src/util/umlaut.c
+++ b/src/util/umlaut.c
@@ -32,7 +32,7 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 typedef struct tref {
     struct tref *nexthash;
-    ucs4_t ucs;
+    wint_t wc;
     struct tnode *node;
 } tref;
 
@@ -99,8 +99,8 @@ char * transliterate(char * out, size_t size, const char * in)
                 size -= advance;
             }
             else {
-                ucs4_t ucs;
-                int ret = unicode_utf8_to_ucs4(&ucs, src, &len);
+                wint_t wc;
+                int ret = unicode_utf8_decode(&wc, src, &len);
                 if (ret != 0) {
                     /* encoding is broken. yikes */
                     log_error("transliterate | encoding error in '%s'\n", src);
@@ -127,7 +127,7 @@ void addtoken(tnode ** root, const char *str, variant id)
 {
     tnode * tk;
     static const struct replace {
-        ucs4_t ucs;
+        wint_t wc;
         const char str[3];
     } replace[] = {
         /* match lower-case (!) umlauts and others to transcriptions */
@@ -150,10 +150,10 @@ void addtoken(tnode ** root, const char *str, variant id)
     else {
         tref *next;
         int ret, index, i = 0;
-        ucs4_t ucs, lcs;
+        wint_t ucs, lcs;
         size_t len;
 
-        ret = unicode_utf8_to_ucs4(&ucs, str, &len);
+        ret = unicode_utf8_decode(&ucs, str, &len);
         assert(ret == 0 || !"invalid utf8 string");
         lcs = ucs;
 
@@ -166,7 +166,7 @@ void addtoken(tnode ** root, const char *str, variant id)
         next = tk->next[index];
         if (!(tk->flags & LEAF))
             tk->id = id;
-        while (next && next->ucs != ucs)
+        while (next && next->wc != ucs)
             next = next->nexthash;
         if (!next) {
             tref *ref;
@@ -181,7 +181,7 @@ void addtoken(tnode ** root, const char *str, variant id)
 
             ref = (tref *)malloc(sizeof(tref));
             if (!ref) abort();
-            ref->ucs = ucs;
+            ref->wc = ucs;
             ref->node = node;
             ref->nexthash = tk->next[index];
             tk->next[index] = ref;
@@ -195,7 +195,7 @@ void addtoken(tnode ** root, const char *str, variant id)
 #endif
                 ref = (tref *)malloc(sizeof(tref));
                 assert_alloc(ref);
-                ref->ucs = lcs;
+                ref->wc = lcs;
                 ref->node = node;
                 ++node->refcount;
                 ref->nexthash = tk->next[index];
@@ -211,7 +211,7 @@ void addtoken(tnode ** root, const char *str, variant id)
         }
         addtoken(&next->node, str + len, id);
         while (replace[i].str[0]) {
-            if (lcs == replace[i].ucs) {
+            if (lcs == replace[i].wc) {
                 char zText[1024];
                 memcpy(zText, replace[i].str, 3);
                 str_strlcpy(zText + 2, (const char *)str + len, sizeof(zText)-2);
@@ -255,9 +255,9 @@ int findtoken(const void * root, const char *key, variant * result)
     do {
         int index;
         const tref *ref;
-        ucs4_t ucs;
+        wint_t wc;
         size_t len;
-        int ret = unicode_utf8_to_ucs4(&ucs, str, &len);
+        int ret = unicode_utf8_decode(&wc, str, &len);
 
         if (ret != 0) {
             /* encoding is broken. youch */
@@ -265,12 +265,12 @@ int findtoken(const void * root, const char *key, variant * result)
             return E_TOK_NOMATCH;
         }
 #if NODEHASHSIZE == 8
-        index = ucs & 7;
+        index = wc & 7;
 #else
-        index = ucs % NODEHASHSIZE;
+        index = wc % NODEHASHSIZE;
 #endif
         ref = tk->next[index];
-        while (ref && ref->ucs != ucs)
+        while (ref && ref->wc != wc)
             ref = ref->nexthash;
         str += len;
         if (!ref) {
diff --git a/src/util/unicode.c b/src/util/unicode.c
index b4bb803dd..e4e7b0d61 100644
--- a/src/util/unicode.c
+++ b/src/util/unicode.c
@@ -33,18 +33,18 @@
 #define B00000011 0x03
 #define B00000001 0x01
 
-int unicode_utf8_trim(utf8_t *buf)
+size_t unicode_utf8_trim(char *buf)
 {
     int result = 0, ts = 0;
-    utf8_t *op = buf, *ip = buf, *lc = buf;
+    char *op = buf, *ip = buf, *lc = buf;
     assert(buf);
     while (*ip) {
         size_t size = 1;
         wint_t wc = *ip;
         if (wc & 0x80) {
-            ucs4_t ucs = 0;
+            wint_t ucs = 0;
             if (ip[1]) {
-                int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
+                int ret = unicode_utf8_decode(&ucs, ip, &size);
                 if (ret != 0) {
                     return ret;
                 }
@@ -56,22 +56,24 @@ int unicode_utf8_trim(utf8_t *buf)
                 ++result;
             }
         }
-        if (op == buf && iswspace(wc)) {
-            ++result;
+        if (op == buf && (iswcntrl(wc) || iswspace(wc))) {
+            result += size;
         }
         else if (wc>255 || !iscntrl(wc)) {
             if (op != ip) {
                 memmove(op, ip, size);
             }
             op += size;
-            if (iswspace(wc)) ++ts;
+            if (iswcntrl(wc) || iswspace(wc)) {
+                ts += size;
+            }
             else {
                 lc = op;
                 ts = 0;
             }
         }
         else {
-            ++result;
+            result += size;
         }
         ip += size;
     }
@@ -79,15 +81,15 @@ int unicode_utf8_trim(utf8_t *buf)
     return result + ts;
 }
 
-int unicode_utf8_tolower(utf8_t * op, size_t outlen, const utf8_t * ip)
+int unicode_utf8_tolower(char * op, size_t outlen, const char * ip)
 {
     while (*ip) {
-        ucs4_t ucs = *ip;
-        ucs4_t low;
+        wint_t ucs = *ip;
+        wint_t low;
         size_t size = 1;
 
         if (ucs & 0x80) {
-            int ret = unicode_utf8_to_ucs4(&ucs, ip, &size);
+            int ret = unicode_utf8_decode(&ucs, ip, &size);
             if (ret != 0) {
                 return ret;
             }
@@ -104,7 +106,7 @@ int unicode_utf8_tolower(utf8_t * op, size_t outlen, const utf8_t * ip)
         }
         else {
             ip += size;
-            unicode_ucs4_to_utf8(op, &size, low);
+            unicode_utf8_encode(op, &size, low);
             op += size;
             outlen -= size;
         }
@@ -114,7 +116,7 @@ int unicode_utf8_tolower(utf8_t * op, size_t outlen, const utf8_t * ip)
 }
 
 int
-unicode_latin1_to_utf8(utf8_t * dst, size_t * outlen, const char *in,
+unicode_latin1_to_utf8(char * dst, size_t * outlen, const char *in,
     size_t * inlen)
 {
     int is = (int)*inlen;
@@ -148,15 +150,15 @@ unicode_latin1_to_utf8(utf8_t * dst, size_t * outlen, const char *in,
     return (int)*outlen;
 }
 
-int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t *b)
+int unicode_utf8_strcasecmp(const char * a, const char *b)
 {
     while (*a && *b) {
         int ret;
         size_t size;
-        ucs4_t ucsa = *a, ucsb = *b;
+        wint_t ucsa = *a, ucsb = *b;
 
         if (ucsa & 0x80) {
-            ret = unicode_utf8_to_ucs4(&ucsa, a, &size);
+            ret = unicode_utf8_decode(&ucsa, a, &size);
             if (ret != 0)
                 return -1;
             a += size;
@@ -164,7 +166,7 @@ int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t *b)
         else
             ++a;
         if (ucsb & 0x80) {
-            ret = unicode_utf8_to_ucs4(&ucsb, b, &size);
+            ret = unicode_utf8_decode(&ucsb, b, &size);
             if (ret != 0)
                 return -1;
             b += size;
@@ -188,10 +190,10 @@ int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t *b)
     return 0;
 }
 
-/* Convert a UCS-4 character to UTF-8. */
+/* Convert a wide character to UTF-8. */
 int
-unicode_ucs4_to_utf8(utf8_t * utf8_character, size_t * size,
-    ucs4_t ucs4_character)
+unicode_utf8_encode(char * utf8_character, size_t * size,
+    wint_t ucs4_character)
 {
     int utf8_bytes;
 
@@ -213,6 +215,7 @@ unicode_ucs4_to_utf8(utf8_t * utf8_character, size_t * size,
         utf8_character[1] = (char)(((ucs4_character >> 6) & B00111111) | B10000000);
         utf8_character[2] = (char)((ucs4_character & B00111111) | B10000000);
     }
+#if 0
     else if (ucs4_character <= 0x001FFFFF) {
         /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
         utf8_bytes = 4;
@@ -246,6 +249,7 @@ unicode_ucs4_to_utf8(utf8_t * utf8_character, size_t * size,
         utf8_character[4] = (char)(((ucs4_character >> 6) & B00111111) | B10000000);
         utf8_character[5] = (char)((ucs4_character & B00111111) | B10000000);
     }
+#endif
     else {
         return EILSEQ;
     }
@@ -257,10 +261,10 @@ unicode_ucs4_to_utf8(utf8_t * utf8_character, size_t * size,
 
 /* Convert a UTF-8 encoded character to UCS-4. */
 int
-unicode_utf8_to_ucs4(ucs4_t * ucs4_character, const utf8_t * utf8_string,
+unicode_utf8_decode(wint_t * ucs4_character, const char * utf8_string,
     size_t * length)
 {
-    utf8_t utf8_character = utf8_string[0];
+    char utf8_character = utf8_string[0];
 
     /* Is the character in the ASCII range? If so, just copy it to the
        output. */
@@ -361,13 +365,13 @@ unicode_utf8_to_ucs4(ucs4_t * ucs4_character, const utf8_t * utf8_string,
 
 /** Convert a UTF-8 encoded character to CP437. */
 int
-unicode_utf8_to_cp437(unsigned char *cp_character, const utf8_t * utf8_string,
+unicode_utf8_to_cp437(unsigned char *cp_character, const char * utf8_string,
     size_t * length)
 {
-    ucs4_t ucs4_character;
+    wint_t ucs4_character;
     int result;
 
-    result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
+    result = unicode_utf8_decode(&ucs4_character, utf8_string, length);
     if (result != 0) {
         /* pass decoding characters upstream */
         return result;
@@ -378,7 +382,7 @@ unicode_utf8_to_cp437(unsigned char *cp_character, const utf8_t * utf8_string,
     }
     else {
         struct {
-            ucs4_t ucs4;
+            wint_t ucs4;
             unsigned char cp437;
         } xref[160] = {
             { 0x00A0, 255 },
@@ -566,7 +570,7 @@ unicode_utf8_to_cp437(unsigned char *cp_character, const utf8_t * utf8_string,
 }
 
 /** Convert a UTF-8 encoded character to ASCII, with '?' replacements. */
-int unicode_utf8_to_ascii(unsigned char *cp_character, const utf8_t * utf8_string,
+int unicode_utf8_to_ascii(unsigned char *cp_character, const char * utf8_string,
     size_t *length)
 {
     int result = unicode_utf8_to_cp437(cp_character, utf8_string, length);
@@ -579,13 +583,13 @@ int unicode_utf8_to_ascii(unsigned char *cp_character, const utf8_t * utf8_strin
 }
 
 /** Convert a UTF-8 encoded character to CP1252. */
-int unicode_utf8_to_cp1252(unsigned char *cp_character, const utf8_t * utf8_string,
+int unicode_utf8_to_cp1252(unsigned char *cp_character, const char * utf8_string,
     size_t * length)
 {
-    ucs4_t ucs4_character;
+    wint_t ucs4_character;
     int result;
 
-    result = unicode_utf8_to_ucs4(&ucs4_character, utf8_string, length);
+    result = unicode_utf8_decode(&ucs4_character, utf8_string, length);
     if (result != 0) {
         /* pass decoding characters upstream */
         return result;
@@ -596,7 +600,7 @@ int unicode_utf8_to_cp1252(unsigned char *cp_character, const utf8_t * utf8_stri
     }
     else {
         struct {
-            ucs4_t ucs4;
+            wint_t ucs4;
             unsigned char cp;
         } xref[] = {
             { 0x0081, 0x81 },
diff --git a/src/util/unicode.h b/src/util/unicode.h
index 4fd860e45..6ebcc4794 100644
--- a/src/util/unicode.h
+++ b/src/util/unicode.h
@@ -19,30 +19,29 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #ifndef _UNICODE_H
 #define _UNICODE_H
 
+#include <stdint.h>
+#include <stddef.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <wchar.h>
 #define USE_UNICODE
-    typedef long ucs4_t;
-    typedef char utf8_t;
-
-    int unicode_utf8_to_cp437(unsigned char *result, const utf8_t * utf8_string,
+    int unicode_utf8_to_cp437(unsigned char *result, const char * utf8_string,
         size_t * length);
-    int unicode_utf8_to_cp1252(unsigned char *result, const utf8_t * utf8_string,
+    int unicode_utf8_to_cp1252(unsigned char *result, const char * utf8_string,
         size_t * length);
-    int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string,
+    int unicode_utf8_decode(wint_t * result, const char * utf8_string,
         size_t * length);
-    int unicode_ucs4_to_utf8(utf8_t * result, size_t * size,
-        ucs4_t ucs4_character);
-    int unicode_utf8_to_ascii(unsigned char *cp_character, const utf8_t * utf8_string,
+    int unicode_utf8_encode(char * result, size_t * size,
+        wint_t ucs4_character);
+    int unicode_utf8_to_ascii(unsigned char *cp_character, const char * utf8_string,
         size_t *length);
-    int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b);
-    int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen,
+    int unicode_utf8_strcasecmp(const char * a, const char * b);
+    int unicode_latin1_to_utf8(char * out, size_t * outlen,
         const char *in, size_t * inlen);
-    int unicode_utf8_tolower(utf8_t *op, size_t outlen, const utf8_t *ip);
-	int unicode_utf8_trim(utf8_t *ip);
+    int unicode_utf8_tolower(char *op, size_t outlen, const char *ip);
+    size_t unicode_utf8_trim(char *ip);
 
 #ifdef __cplusplus
 }
diff --git a/src/util/unicode.test.c b/src/util/unicode.test.c
index 7cada9da6..17c453296 100644
--- a/src/util/unicode.test.c
+++ b/src/util/unicode.test.c
@@ -1,6 +1,12 @@
+#ifdef _MSC_VER
 #include <platform.h>
-#include <CuTest.h>
+#endif
+
 #include "unicode.h"
+
+#include <CuTest.h>
+
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
@@ -9,9 +15,33 @@ static void test_unicode_trim(CuTest * tc)
 {
     char buffer[32];
 
-    strcpy(buffer, "Hello Word");
+    strcpy(buffer, "Hello World");
     CuAssertIntEquals(tc, 0, unicode_utf8_trim(buffer));
-    CuAssertStrEquals(tc, "Hello Word", buffer);
+    CuAssertStrEquals(tc, "Hello World", buffer);
+
+    strcpy(buffer, "  Hello World");
+    CuAssertIntEquals(tc, 2, unicode_utf8_trim(buffer));
+    CuAssertStrEquals(tc, "Hello World", buffer);
+
+    strcpy(buffer, "Hello World  ");
+    CuAssertIntEquals(tc, 2, unicode_utf8_trim(buffer));
+    CuAssertStrEquals(tc, "Hello World", buffer);
+
+    strcpy(buffer, " Hello World ");
+    CuAssertIntEquals(tc, 2, unicode_utf8_trim(buffer));
+    CuAssertStrEquals(tc, "Hello World", buffer);
+
+    strcpy(buffer, "Hello\t\r\nWorld");
+    CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer));
+    CuAssertStrEquals(tc, "HelloWorld", buffer);
+
+    strcpy(buffer, "LTR");
+    buffer[3] = -30;
+    buffer[4] = -128;
+    buffer[5] = -114;
+    buffer[6] = 0;
+    CuAssertIntEquals(tc, 3, unicode_utf8_trim(buffer));
+    CuAssertStrEquals(tc, "LTR", buffer);
 
     strcpy(buffer, "  Hello Word  ");
     CuAssertIntEquals(tc, 4, unicode_utf8_trim(buffer));
@@ -48,7 +78,7 @@ static void test_unicode_tolower(CuTest * tc)
 static void test_unicode_utf8_to_other(CuTest *tc)
 {
     const unsigned char uchar_str[] = { 0xc3, 0x98, 0xc5, 0xb8, 0xc2, 0x9d, 'l', 0 }; /* &Oslash;&Yuml;&#157;l */
-    utf8_t *utf8_str = (utf8_t *)uchar_str;
+    char *utf8_str = (char *)uchar_str;
     unsigned char ch;
     size_t sz;
     CuAssertIntEquals(tc, 0, unicode_utf8_to_cp437(&ch, utf8_str, &sz));
@@ -92,27 +122,27 @@ static void test_unicode_utf8_to_other(CuTest *tc)
 }
 
 static void test_unicode_utf8_to_ucs(CuTest *tc) {
-    ucs4_t ucs;
+    wint_t wc;
     size_t sz;
 
-    CuAssertIntEquals(tc, 0, unicode_utf8_to_ucs4(&ucs, "a", &sz));
-    CuAssertIntEquals(tc, 'a', ucs);
+    CuAssertIntEquals(tc, 0, unicode_utf8_decode(&wc, "a", &sz));
+    CuAssertIntEquals(tc, 'a', wc);
     CuAssertIntEquals(tc, 1, sz);
 }
 
 static void test_unicode_bug2262(CuTest *tc) {
     char name[7];
-    ucs4_t ucs;
+    wint_t wc;
     size_t sz;
 
     strcpy(name, "utende");
-    CuAssertIntEquals(tc, 0, unicode_utf8_to_ucs4(&ucs, name, &sz));
+    CuAssertIntEquals(tc, 0, unicode_utf8_decode(&wc, name, &sz));
     CuAssertIntEquals(tc, 1, sz);
-    CuAssertIntEquals(tc, 'u', ucs);
+    CuAssertIntEquals(tc, 'u', wc);
     CuAssertIntEquals(tc, 0, unicode_utf8_trim(name));
 
     name[0] = -4; /* latin1: &uuml; should fail to decode */
-    CuAssertIntEquals(tc, EILSEQ, unicode_utf8_to_ucs4(&ucs, name, &sz));
+    CuAssertIntEquals(tc, EILSEQ, unicode_utf8_decode(&wc, name, &sz));
     CuAssertIntEquals(tc, EILSEQ, unicode_utf8_trim(name));
 }
 
@@ -123,26 +153,36 @@ static void test_unicode_compare(CuTest *tc)
     CuAssertIntEquals(tc, 1, unicode_utf8_strcasecmp("bacdefg123", "ABCDEFG123"));
 }
 
-static void test_unicode_farsi_nzwj(CuTest *tc) {
-    const char str[] = { 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaf,
-        0xdb, 0x8c, 0xd9, 0x86, 0x20, 0xd9, 0x85, 0xd8, 0xad, 0xd9, 0x85, 0xd8,
-        0xaf, 0x20, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80,
-        0x8e, 0xe2, 0x80, 0x8e, 0x00 };
+static void test_unicode_trim_zwnj(CuTest *tc) {
+    const char zwnj[] = { 0xe2, 0x80, 0x8c, 0x00 };
     char name[64];
-    strcpy(name, str);
-    CuAssertIntEquals(tc, 0, unicode_utf8_trim(name));
-    CuAssertStrEquals(tc, str, name);
+    char expect[64];
+    snprintf(name, sizeof(name), "%sA%sB%s  ", zwnj, zwnj, zwnj);
+    snprintf(expect, sizeof(expect), "A%sB", zwnj);
+    CuAssertIntEquals(tc, 8, unicode_utf8_trim(name));
+    CuAssertStrEquals(tc, expect, name);
+}
+
+static void test_unicode_trim_ltrm(CuTest *tc) {
+    const char ltrm[] = { 0xe2, 0x80, 0x8e, 0x00 };
+    char name[64];
+    char expect[64];
+    snprintf(name, sizeof(name), "%sBrot%szeit%s  ", ltrm, ltrm, ltrm);
+    snprintf(expect, sizeof(expect), "Brot%szeit", ltrm);
+    CuAssertIntEquals(tc, 8, unicode_utf8_trim(name));
+    CuAssertStrEquals(tc, expect, name);
 }
 
 CuSuite *get_unicode_suite(void)
 {
     CuSuite *suite = CuSuiteNew();
-    SUITE_ADD_TEST(suite, test_unicode_bug2262);
-    SUITE_ADD_TEST(suite, test_unicode_tolower);
     SUITE_ADD_TEST(suite, test_unicode_trim);
+    SUITE_ADD_TEST(suite, test_unicode_trim_zwnj);
+    SUITE_ADD_TEST(suite, test_unicode_trim_ltrm);
     SUITE_ADD_TEST(suite, test_unicode_utf8_to_other);
     SUITE_ADD_TEST(suite, test_unicode_utf8_to_ucs);
     SUITE_ADD_TEST(suite, test_unicode_compare);
-    SUITE_ADD_TEST(suite, test_unicode_farsi_nzwj);
+    SUITE_ADD_TEST(suite, test_unicode_bug2262);
+    SUITE_ADD_TEST(suite, test_unicode_tolower);
     return suite;
 }