support different editor codepages

This commit is contained in:
Enno Rehling 2016-08-07 19:19:35 +02:00
parent 400051332a
commit 731238dfb1
5 changed files with 42 additions and 28 deletions

View file

@ -78,30 +78,28 @@ state *current_state = NULL;
static WINDOW *hstatus; static WINDOW *hstatus;
static int unicode_utf8_to_ascii(char *result, const utf8_t * utf8_string, #ifdef STDIO_CP
size_t *length) int gm_codepage = STDIO_CP;
{
int retval = unicode_utf8_to_cp437(result, utf8_string, length);
if (*length > 1) {
*result = '?';
}
return retval;
}
#ifdef WIN32
#define CODEPAGE_TRANS unicode_utf8_to_cp1252
#elif defined(NCURSES_VERSION)
#define CODEPAGE_TRANS unicode_utf8_to_cp437
#else #else
#define CODEPAGE_TRANS unicode_utf8_to_ascii int gm_codepage = -1;
#endif #endif
static void unicode_remove_diacritics(const char *rp, char *wp) { static void unicode_remove_diacritics(const char *rp, char *wp) {
while (*rp) { while (*rp) {
if (*rp & 0x80) { if (gm_codepage >=0 && *rp & 0x80) {
size_t sz = 0; size_t sz = 0;
char ch; char ch;
CODEPAGE_TRANS(&ch, rp, &sz); switch (gm_codepage) {
case 1252:
unicode_utf8_to_cp1252(&ch, rp, &sz);
break;
case 437:
unicode_utf8_to_cp437(&ch, rp, &sz);
break;
default:
unicode_utf8_to_ascii(&ch, rp, &sz);
break;
}
rp += sz; rp += sz;
*wp++ = ch; *wp++ = ch;
} }

View file

@ -31,6 +31,7 @@ extern "C" {
void run_mapper(void); void run_mapper(void);
extern int force_color; extern int force_color;
extern int gm_codepage;
struct state *state_open(void); struct state *state_open(void);
void state_close(struct state *); void state_close(struct state *);

View file

@ -96,6 +96,7 @@ static void parse_config(const char *filename)
#ifdef USE_CURSES #ifdef USE_CURSES
/* only one value in the [editor] section */ /* only one value in the [editor] section */
force_color = iniparser_getint(d, "editor:color", force_color); force_color = iniparser_getint(d, "editor:color", force_color);
gm_codepage = iniparser_getint(d, "editor:codepage", gm_codepage);
#endif #endif
} }
} }

View file

@ -518,9 +518,21 @@ size_t * length)
return 0; return 0;
} }
/** Convert a UTF-8 encoded character to ASCII, with '?' replacements. */
int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string,
size_t *length)
{
int result = unicode_utf8_to_cp437(cp_character, utf8_string, length);
if (result == 0) {
if (*length > 1) {
*cp_character = '?';
}
}
return result;
}
/** Convert a UTF-8 encoded character to CP1252. */ /** Convert a UTF-8 encoded character to CP1252. */
int int unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string,
unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string,
size_t * length) size_t * length)
{ {
ucs4_t ucs4_character; ucs4_t ucs4_character;

View file

@ -28,18 +28,20 @@ extern "C" {
typedef unsigned long ucs4_t; typedef unsigned long ucs4_t;
typedef char utf8_t; typedef char utf8_t;
extern int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string, int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string,
size_t * length); size_t * length);
extern int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string, int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string,
size_t * length); size_t * length);
extern int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string, int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string,
size_t * length); size_t * length);
extern int unicode_ucs4_to_utf8(utf8_t * result, size_t * size, int unicode_ucs4_to_utf8(utf8_t * result, size_t * size,
ucs4_t ucs4_character); ucs4_t ucs4_character);
extern int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b); int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string,
extern int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen, size_t *length);
int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b);
int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen,
const char *in, size_t * inlen); const char *in, size_t * inlen);
extern int unicode_utf8_tolower(utf8_t * out, size_t outlen, int unicode_utf8_tolower(utf8_t * out, size_t outlen,
const utf8_t * in); const utf8_t * in);
#ifdef __cplusplus #ifdef __cplusplus