diff --git a/src/gmtool.c b/src/gmtool.c index bb6cb5b32..1f683f44b 100644 --- a/src/gmtool.c +++ b/src/gmtool.c @@ -78,30 +78,28 @@ state *current_state = NULL; static WINDOW *hstatus; -static int unicode_utf8_to_ascii(char *result, const utf8_t * utf8_string, - size_t *length) -{ - int retval = unicode_utf8_to_cp437(result, utf8_string, length); - if (*length > 1) { - *result = '?'; - } - return retval; -} - -#ifdef WIN32 -#define CODEPAGE_TRANS unicode_utf8_to_cp1252 -#elif defined(NCURSES_VERSION) -#define CODEPAGE_TRANS unicode_utf8_to_cp437 -#else -#define CODEPAGE_TRANS unicode_utf8_to_ascii +#ifdef STDIO_CP +int gm_codepage = STDIO_CP; +#else +int gm_codepage = -1; #endif static void unicode_remove_diacritics(const char *rp, char *wp) { while (*rp) { - if (*rp & 0x80) { + if (gm_codepage >=0 && *rp & 0x80) { size_t sz = 0; char ch; - CODEPAGE_TRANS(&ch, rp, &sz); + switch (gm_codepage) { + case 1252: + unicode_utf8_to_cp1252(&ch, rp, &sz); + break; + case 437: + unicode_utf8_to_cp437(&ch, rp, &sz); + break; + default: + unicode_utf8_to_ascii(&ch, rp, &sz); + break; + } rp += sz; *wp++ = ch; } diff --git a/src/gmtool.h b/src/gmtool.h index d5396a615..3a682f453 100644 --- a/src/gmtool.h +++ b/src/gmtool.h @@ -31,6 +31,7 @@ extern "C" { void run_mapper(void); extern int force_color; + extern int gm_codepage; struct state *state_open(void); void state_close(struct state *); diff --git a/src/main.c b/src/main.c index ba1241564..d011a473b 100644 --- a/src/main.c +++ b/src/main.c @@ -96,6 +96,7 @@ static void parse_config(const char *filename) #ifdef USE_CURSES /* only one value in the [editor] section */ force_color = iniparser_getint(d, "editor:color", force_color); + gm_codepage = iniparser_getint(d, "editor:codepage", gm_codepage); #endif } } diff --git a/src/util/unicode.c b/src/util/unicode.c index 4be39a551..94b6e3d5a 100644 --- a/src/util/unicode.c +++ b/src/util/unicode.c @@ -518,10 +518,22 @@ size_t * length) return 0; } +/** Convert a UTF-8 encoded character to ASCII, with '?' replacements. */ +int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string, + size_t *length) +{ + int result = unicode_utf8_to_cp437(cp_character, utf8_string, length); + if (result == 0) { + if (*length > 1) { + *cp_character = '?'; + } + } + return result; +} + /** Convert a UTF-8 encoded character to CP1252. */ -int -unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string, -size_t * length) +int unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string, + size_t * length) { ucs4_t ucs4_character; int result; diff --git a/src/util/unicode.h b/src/util/unicode.h index 3408ef948..b061cd6fb 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -28,18 +28,20 @@ extern "C" { typedef unsigned long ucs4_t; typedef char utf8_t; - extern int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string, + int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string, size_t * length); - extern int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string, + int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string, size_t * length); - extern int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string, + int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string, size_t * length); - extern int unicode_ucs4_to_utf8(utf8_t * result, size_t * size, + int unicode_ucs4_to_utf8(utf8_t * result, size_t * size, ucs4_t ucs4_character); - extern int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); - extern int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen, + int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string, + size_t *length); + int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); + int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen, const char *in, size_t * inlen); - extern int unicode_utf8_tolower(utf8_t * out, size_t outlen, + int unicode_utf8_tolower(utf8_t * out, size_t outlen, const utf8_t * in); #ifdef __cplusplus