forked from github/server
support for non-utf8 encodings has rotted, kill it.
This commit is contained in:
parent
0c6ec47598
commit
fb8507e0d2
4 changed files with 4 additions and 156 deletions
|
@ -101,9 +101,6 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||
/* exported symbols symbols */
|
||||
int firstx = 0, firsty = 0;
|
||||
|
||||
/* TODO: is this still important? */
|
||||
int enc_gamedata = ENCODING_UTF8;
|
||||
|
||||
static void read_alliances(gamedata *data)
|
||||
{
|
||||
storage *store = data->store;
|
||||
|
|
|
@ -229,7 +229,7 @@ int read_orders(input *in)
|
|||
static const char * file_getbuf(void *data)
|
||||
{
|
||||
FILE *F = (FILE *)data;
|
||||
return getbuf(F, ENCODING_UTF8);
|
||||
return getbuf(F);
|
||||
}
|
||||
|
||||
typedef struct parser_state {
|
||||
|
|
|
@ -39,150 +39,6 @@ static int eatwhite(const char *ptr, size_t * total_size)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static const char *getbuf_latin1(FILE * F)
|
||||
{
|
||||
bool cont = false;
|
||||
char quote = 0;
|
||||
bool comment = false;
|
||||
char *cp = fbuf;
|
||||
char *tail = lbuf + MAXLINE - 2;
|
||||
|
||||
tail[1] = '@'; /* if this gets overwritten by fgets then the line was very long. */
|
||||
do {
|
||||
const char *bp = fgets(lbuf, MAXLINE, F);
|
||||
|
||||
if (bp == NULL)
|
||||
return NULL;
|
||||
while (*bp && isspace(*(unsigned char *)bp))
|
||||
++bp; /* eatwhite */
|
||||
|
||||
comment = (bool)(comment && cont);
|
||||
quote = (bool)(quote && cont);
|
||||
|
||||
if (tail[1] == 0) {
|
||||
/* we read he maximum number of bytes! */
|
||||
if (tail[0] != '\n') {
|
||||
/* it wasn't enough space to finish the line, eat the rest */
|
||||
for (;;) {
|
||||
tail[1] = '@';
|
||||
bp = fgets(lbuf, MAXLINE, F);
|
||||
if (bp == NULL)
|
||||
return NULL;
|
||||
if (tail[1]) {
|
||||
/* read enough this time to end the line */
|
||||
break;
|
||||
}
|
||||
}
|
||||
comment = false;
|
||||
cont = false;
|
||||
bp = NULL;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
tail[1] = '@';
|
||||
}
|
||||
}
|
||||
cont = false;
|
||||
while (*bp && cp < fbuf + MAXLINE) {
|
||||
int c = *(unsigned char *)bp;
|
||||
|
||||
if (c == '\n' || c == '\r') {
|
||||
/* line breaks, shmine breaks */
|
||||
break;
|
||||
}
|
||||
if (c == COMMENT_CHAR && !quote) {
|
||||
/* comment begins. we need to keep going, to look for CONTINUE_CHAR */
|
||||
comment = true;
|
||||
++bp;
|
||||
continue;
|
||||
}
|
||||
if (!comment && (c == '"' || c == '\'')) {
|
||||
if (quote == c) {
|
||||
quote = 0;
|
||||
if (cp < fbuf + MAXLINE)
|
||||
*cp++ = *bp;
|
||||
++bp;
|
||||
continue;
|
||||
}
|
||||
else if (!quote) {
|
||||
quote = *bp++;
|
||||
if (cp < fbuf + MAXLINE)
|
||||
*cp++ = quote;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (iscntrl(c)) {
|
||||
if (!comment && cp < fbuf + MAXLINE) {
|
||||
*cp++ = isspace(c) ? ' ' : '?';
|
||||
}
|
||||
++bp;
|
||||
continue;
|
||||
}
|
||||
else if (isspace(c)) {
|
||||
if (!quote) {
|
||||
++bp;
|
||||
while (*bp && isspace(*(unsigned char *)bp))
|
||||
++bp; /* eatwhite */
|
||||
if (!comment && *bp && *bp != COMMENT_CHAR && cp < fbuf + MAXLINE)
|
||||
*(cp++) = ' ';
|
||||
}
|
||||
else if (!comment && cp + 1 <= fbuf + MAXLINE) {
|
||||
*(cp++) = *(bp++);
|
||||
}
|
||||
else {
|
||||
++bp;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if (c == CONTINUE_CHAR) {
|
||||
const char *handle_end = ++bp;
|
||||
while (*handle_end && isspace(*(unsigned char *)handle_end))
|
||||
++handle_end; /* eatwhite */
|
||||
if (*handle_end == '\0') {
|
||||
bp = handle_end;
|
||||
cont = true;
|
||||
continue;
|
||||
}
|
||||
if (comment) {
|
||||
++bp;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (comment) {
|
||||
++bp;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c < 0x80) {
|
||||
if (cp + 1 <= fbuf + MAXLINE) {
|
||||
*(cp++) = *(bp++);
|
||||
}
|
||||
}
|
||||
else {
|
||||
char inbuf = (char)c;
|
||||
size_t inbytes = 1;
|
||||
size_t outbytes = MAXLINE - (cp - fbuf);
|
||||
int ret = unicode_latin1_to_utf8(cp, &outbytes, &inbuf, &inbytes);
|
||||
if (ret > 0)
|
||||
cp += ret;
|
||||
else {
|
||||
log_error("input data was not iso-8859-1! assuming utf-8\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
++bp;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (cp == fbuf + MAXLINE) {
|
||||
--cp;
|
||||
}
|
||||
*cp = 0;
|
||||
} while (cont || cp == fbuf);
|
||||
return fbuf;
|
||||
}
|
||||
|
||||
static const char *getbuf_utf8(FILE * F)
|
||||
{
|
||||
bool cont = false;
|
||||
|
@ -336,9 +192,7 @@ static const char *getbuf_utf8(FILE * F)
|
|||
return fbuf;
|
||||
}
|
||||
|
||||
const char *getbuf(FILE * F, int encoding)
|
||||
const char *getbuf(FILE * F)
|
||||
{
|
||||
if (encoding == ENCODING_UTF8)
|
||||
return getbuf_utf8(F);
|
||||
return getbuf_latin1(F);
|
||||
return getbuf_utf8(F);
|
||||
}
|
||||
|
|
|
@ -15,10 +15,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ENCODING_UTF8 0
|
||||
#define ENCODING_LATIN1 1
|
||||
|
||||
const char *getbuf(FILE *, int encoding);
|
||||
const char *getbuf(FILE *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue