support different editor codepages

This commit is contained in:
Enno Rehling 2016-08-07 19:19:35 +02:00
parent 400051332a
commit 731238dfb1
5 changed files with 42 additions and 28 deletions

View file

@ -78,30 +78,28 @@ state *current_state = NULL;
static WINDOW *hstatus;
static int unicode_utf8_to_ascii(char *result, const utf8_t * utf8_string,
size_t *length)
{
int retval = unicode_utf8_to_cp437(result, utf8_string, length);
if (*length > 1) {
*result = '?';
}
return retval;
}
#ifdef WIN32
#define CODEPAGE_TRANS unicode_utf8_to_cp1252
#elif defined(NCURSES_VERSION)
#define CODEPAGE_TRANS unicode_utf8_to_cp437
#else
#define CODEPAGE_TRANS unicode_utf8_to_ascii
#ifdef STDIO_CP
int gm_codepage = STDIO_CP;
#else
int gm_codepage = -1;
#endif
static void unicode_remove_diacritics(const char *rp, char *wp) {
while (*rp) {
if (*rp & 0x80) {
if (gm_codepage >=0 && *rp & 0x80) {
size_t sz = 0;
char ch;
CODEPAGE_TRANS(&ch, rp, &sz);
switch (gm_codepage) {
case 1252:
unicode_utf8_to_cp1252(&ch, rp, &sz);
break;
case 437:
unicode_utf8_to_cp437(&ch, rp, &sz);
break;
default:
unicode_utf8_to_ascii(&ch, rp, &sz);
break;
}
rp += sz;
*wp++ = ch;
}

View file

@ -31,6 +31,7 @@ extern "C" {
void run_mapper(void);
extern int force_color;
extern int gm_codepage;
struct state *state_open(void);
void state_close(struct state *);

View file

@ -96,6 +96,7 @@ static void parse_config(const char *filename)
#ifdef USE_CURSES
/* only one value in the [editor] section */
force_color = iniparser_getint(d, "editor:color", force_color);
gm_codepage = iniparser_getint(d, "editor:codepage", gm_codepage);
#endif
}
}

View file

@ -518,10 +518,22 @@ size_t * length)
return 0;
}
/** Convert a UTF-8 encoded character to ASCII, with '?' replacements. */
int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string,
size_t *length)
{
int result = unicode_utf8_to_cp437(cp_character, utf8_string, length);
if (result == 0) {
if (*length > 1) {
*cp_character = '?';
}
}
return result;
}
/** Convert a UTF-8 encoded character to CP1252. */
int
unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string,
size_t * length)
int unicode_utf8_to_cp1252(char *cp_character, const utf8_t * utf8_string,
size_t * length)
{
ucs4_t ucs4_character;
int result;

View file

@ -28,18 +28,20 @@ extern "C" {
typedef unsigned long ucs4_t;
typedef char utf8_t;
extern int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string,
int unicode_utf8_to_cp437(char *result, const utf8_t * utf8_string,
size_t * length);
extern int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string,
int unicode_utf8_to_cp1252(char *result, const utf8_t * utf8_string,
size_t * length);
extern int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string,
int unicode_utf8_to_ucs4(ucs4_t * result, const utf8_t * utf8_string,
size_t * length);
extern int unicode_ucs4_to_utf8(utf8_t * result, size_t * size,
int unicode_ucs4_to_utf8(utf8_t * result, size_t * size,
ucs4_t ucs4_character);
extern int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b);
extern int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen,
int unicode_utf8_to_ascii(char *cp_character, const utf8_t * utf8_string,
size_t *length);
int unicode_utf8_strcasecmp(const utf8_t * a, const utf8_t * b);
int unicode_latin1_to_utf8(utf8_t * out, size_t * outlen,
const char *in, size_t * inlen);
extern int unicode_utf8_tolower(utf8_t * out, size_t outlen,
int unicode_utf8_tolower(utf8_t * out, size_t outlen,
const utf8_t * in);
#ifdef __cplusplus