From 69c01946287c59bf29e8304c6292fd8ba512c703 Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Sun, 7 Aug 2016 18:25:28 +0200 Subject: [PATCH 1/4] curses output doesn't deal well with non-ascii characters. "Fix" UTF8 characters by replacing them with ? --- src/gmtool.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/src/gmtool.c b/src/gmtool.c index 3af7814a2..a14c8cb29 100644 --- a/src/gmtool.c +++ b/src/gmtool.c @@ -51,6 +51,7 @@ #include #include +#include #include #include #include @@ -77,6 +78,40 @@ state *current_state = NULL; static WINDOW *hstatus; +static void simplify(const char *rp, char *wp) { + while (*rp) { + if (*rp & 0x80) { + while (*rp & 0x80) ++rp; + *wp++ = '?'; + } + else { + *wp++ = *rp++; + } + } + *wp = 0; +} + +int umvwprintw(WINDOW *win, int y, int x, const char *format, ...) { + char buffer[128]; + int result; + va_list args; + + va_start(args, format); + memset(buffer, 0, sizeof(buffer)); + result = vsnprintf(buffer, sizeof(buffer)-1, format, args); + va_end(args); + + simplify(buffer, buffer); + + return mvwaddstr(win, y, x, buffer); +} + +int umvwaddnstr(WINDOW *w, int y, int x, const char * str, int len) { + char buffer[128]; + simplify(str, buffer); + return mvwaddnstr(w, y, x, buffer, len); +} + static void init_curses(void) { int fg, bg; @@ -354,7 +389,7 @@ static void paint_status(window * wnd, const state * st) terrain = mr->r->terrain->_name; } cnormalize(&st->cursor, &nx, &ny); - mvwprintw(win, 0, 0, "%4d %4d | %.4s | %.20s (%d)", nx, ny, terrain, name, + umvwprintw(win, 0, 0, "%4d %4d | %.4s | %.20s (%d)", nx, ny, terrain, name, uid); wclrtoeol(win); } @@ -377,13 +412,13 @@ static void paint_info_region(window * wnd, const state * st) if (mr && mr->r) { const region *r = mr->r; if (r->land) { - mvwaddnstr(win, line++, 1, (char *)r->land->name, size); + umvwaddnstr(win, line++, 1, (char *)r->land->name, size); } else { - mvwaddnstr(win, line++, 1, r->terrain->_name, size); + umvwaddnstr(win, line++, 1, r->terrain->_name, size); } line++; - mvwprintw(win, line++, 1, "%s, age %d", r->terrain->_name, r->age); + umvwprintw(win, line++, 1, "%s, age %d", r->terrain->_name, r->age); if (r->land) { mvwprintw(win, line++, 1, "$:%6d P:%5d", rmoney(r), rpeasants(r)); mvwprintw(win, line++, 1, "H:%6d %s:%5d", rhorses(r), @@ -398,7 +433,7 @@ static void paint_info_region(window * wnd, const state * st) wattroff(win, A_BOLD | COLOR_PAIR(COLOR_YELLOW)); for (sh = r->ships; sh && line < maxline; sh = sh->next) { mvwprintw(win, line, 1, "%.4s ", itoa36(sh->no)); - mvwaddnstr(win, line++, 6, (char *)sh->type->_name, size - 5); + umvwaddnstr(win, line++, 6, (char *)sh->type->_name, size - 5); } } if (r->units && (st->info_flags & IFL_FACTIONS)) { @@ -409,7 +444,7 @@ static void paint_info_region(window * wnd, const state * st) for (u = r->units; u && line < maxline; u = u->next) { if (!fval(u->faction, FFL_MARK)) { mvwprintw(win, line, 1, "%.4s ", itoa36(u->faction->no)); - mvwaddnstr(win, line++, 6, (char *)u->faction->name, size - 5); + umvwaddnstr(win, line++, 6, (char *)u->faction->name, size - 5); fset(u->faction, FFL_MARK); } } @@ -424,7 +459,7 @@ static void paint_info_region(window * wnd, const state * st) wattroff(win, A_BOLD | COLOR_PAIR(COLOR_YELLOW)); for (u = r->units; u && line < maxline; u = u->next) { mvwprintw(win, line, 1, "%.4s ", itoa36(u->no)); - mvwaddnstr(win, line++, 6, unit_getname(u), size - 5); + umvwaddnstr(win, line++, 6, unit_getname(u), size - 5); } } } From 400051332a42d15c454149a84d603920cee7486e Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Sun, 7 Aug 2016 18:55:03 +0200 Subject: [PATCH 2/4] Trying out some different codepages for pdcurses/WIN32 and ncurses/Linux. --- src/gmtool.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/gmtool.c b/src/gmtool.c index a14c8cb29..bb6cb5b32 100644 --- a/src/gmtool.c +++ b/src/gmtool.c @@ -78,11 +78,32 @@ state *current_state = NULL; static WINDOW *hstatus; -static void simplify(const char *rp, char *wp) { +static int unicode_utf8_to_ascii(char *result, const utf8_t * utf8_string, + size_t *length) +{ + int retval = unicode_utf8_to_cp437(result, utf8_string, length); + if (*length > 1) { + *result = '?'; + } + return retval; +} + +#ifdef WIN32 +#define CODEPAGE_TRANS unicode_utf8_to_cp1252 +#elif defined(NCURSES_VERSION) +#define CODEPAGE_TRANS unicode_utf8_to_cp437 +#else +#define CODEPAGE_TRANS unicode_utf8_to_ascii +#endif + +static void unicode_remove_diacritics(const char *rp, char *wp) { while (*rp) { if (*rp & 0x80) { - while (*rp & 0x80) ++rp; - *wp++ = '?'; + size_t sz = 0; + char ch; + CODEPAGE_TRANS(&ch, rp, &sz); + rp += sz; + *wp++ = ch; } else { *wp++ = *rp++; @@ -91,6 +112,10 @@ static void simplify(const char *rp, char *wp) { *wp = 0; } +static void simplify(const char *rp, char *wp) { + unicode_remove_diacritics(rp, wp); +} + int umvwprintw(WINDOW *win, int y, int x, const char *format, ...) { char buffer[128]; int result; From 731238dfb190aec62ab07f52fc9e5be2818e55b8 Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Sun, 7 Aug 2016 19:19:35 +0200 Subject: [PATCH 3/4] support different editor codepages --- src/gmtool.c | 34 ++++++++++++++++------------------ src/gmtool.h | 1 + src/main.c | 1 + src/util/unicode.c | 18 +++++++++++++++--- src/util/unicode.h | 16 +++++++++------- 5 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/gmtool.c b/src/gmtool.c index bb6cb5b32..1f683f44b 100644 --- a/src/gmtool.c +++ b/src/gmtool.c @@ -78,30 +78,28 @@ state *current_state = NULL; static WINDOW *hstatus; -static int unicode_utf8_to_ascii(char *result, const utf8_t * utf8_string, - size_t *length) -{ - int retval = unicode_utf8_to_cp437(result, utf8_string, length); - if (*length > 1) { - *result = '?'; - } - return retval; -} - -#ifdef WIN32 -#define CODEPAGE_TRANS unicode_utf8_to_cp1252 -#elif defined(NCURSES_VERSION) -#define CODEPAGE_TRANS unicode_utf8_to_cp437 -#else -#define CODEPAGE_TRANS unicode_utf8_to_ascii +#ifdef STDIO_CP +int gm_codepage = STDIO_CP; +#else +int gm_codepage = -1; #endif static void unicode_remove_diacritics(const char *rp, char *wp) { while (*rp) { - if (*rp & 0x80) { + if (gm_codepage >=0 && *rp & 0x80) { size_t sz = 0; char ch; - CODEPAGE_TRANS(&ch, rp, &sz); + switch (gm_codepage) { + case 1252: + unicode_utf8_to_cp1252(&ch, rp, &sz); + break; + case 437: + unicode_utf8_to_cp437(&ch, rp, &sz); + break; + default: + unicode_utf8_to_ascii(&ch, rp, &sz); + break; + } rp += sz; *wp++ = ch; } diff --git a/src/gmtool.h b/src/gmtool.h index d5396a615..3a682f453 100644 --- a/src/gmtool.h +++ b/src/gmtool.h @@ -31,6 +31,7 @@ extern "C" { void run_mapper(void); extern int force_color; + extern int gm_codepage; struct state *state_open(void); void state_close(struct state *); diff --git a/src/main.c b/src/main.c index ba1241564..d011a473b 100644 --- a/src/main.c +++ b/src/main.c @@ -96,6 +96,7 @@ static void parse_config(const char *filename) #ifdef USE_CURSES /* only one value in the [editor] section */ force_color = iniparser_getint(d, "editor:color", force_color); + gm_codepage = iniparser_getint(d, "editor:codepage", gm_codepage); #endif } } diff --git a/src/util/unicode.c b/src/util/unicode.c index 4be39a551..94b6e3d5a 100644 --- a/src/util/unicode.c +++ b/src/util/unicode.c @@ -518,10 +518,22 @@ size_t * length) return 0; } +/** Convert a UTF-8 encoded character to ASCII, with '?' replacements. */ +int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string, + size_t *length) +{ + int result = unicode_utf8_to_cp437(cp_character, utf8_string, length); + if (result == 0) { + if (*length > 1) { + *cp_character = '?'; + } + } + return result; +} + /** Convert a UTF-8 encoded character to CP1252. */ -int -unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string, -size_t * length) +int unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string, + size_t * length) { ucs4_t ucs4_character; int result; diff --git a/src/util/unicode.h b/src/util/unicode.h index 3408ef948..b061cd6fb 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -28,18 +28,20 @@ extern "C" { typedef unsigned long ucs4_t; typedef char utf8_t; - extern int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string, + int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string, size_t * length); - extern int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string, + int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string, size_t * length); - extern int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string, + int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string, size_t * length); - extern int unicode_ucs4_to_utf8(utf8_t * result, size_t * size, + int unicode_ucs4_to_utf8(utf8_t * result, size_t * size, ucs4_t ucs4_character); - extern int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); - extern int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen, + int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string, + size_t *length); + int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); + int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen, const char *in, size_t * inlen); - extern int unicode_utf8_tolower(utf8_t * out, size_t outlen, + int unicode_utf8_tolower(utf8_t * out, size_t outlen, const utf8_t * in); #ifdef __cplusplus From b4135e49fb790be1512c1c1b44181be88e3602f2 Mon Sep 17 00:00:00 2001 From: Enno Rehling Date: Sun, 7 Aug 2016 19:20:41 +0200 Subject: [PATCH 4/4] unused variable --- src/gmtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gmtool.c b/src/gmtool.c index 1f683f44b..7a87e4ae6 100644 --- a/src/gmtool.c +++ b/src/gmtool.c @@ -116,12 +116,11 @@ static void simplify(const char *rp, char *wp) { int umvwprintw(WINDOW *win, int y, int x, const char *format, ...) { char buffer[128]; - int result; va_list args; va_start(args, format); memset(buffer, 0, sizeof(buffer)); - result = vsnprintf(buffer, sizeof(buffer)-1, format, args); + vsnprintf(buffer, sizeof(buffer)-1, format, args); va_end(args); simplify(buffer, buffer);