#include #include "filereader.h" #include #include #include #include #include #define COMMENT_CHAR ';' #define CONTINUE_CHAR '\\' #define MAXLINE 4096*16 static char lbuf[MAXLINE]; static char fbuf[MAXLINE]; static void unicode_warning(const char * bp) { log_warning(("invalid sequence in UTF-8 string: %s\n", bp)); } INLINE_FUNCTION int eatwhite(const char * ptr, size_t * total_size) { int ret = 0; *total_size = 0; while (*ptr) { ucs4_t ucs; size_t size = 0; ret = unicode_utf8_to_ucs4(&ucs, ptr, &size); if (ret!=0) break; if (!iswxspace((wint_t)ucs)) break; *total_size += size; ptr += size; } return ret; } static const char * getbuf_latin1(FILE * F) { boolean cont = false; char quote = 0; boolean comment = false; char * cp = fbuf; char * tail = lbuf+MAXLINE-2; tail[1] = '@'; /* if this gets overwritten by fgets then the line was very long. */ do { const char * bp = fgets(lbuf, MAXLINE, F); if (bp==NULL) return NULL; while (*bp && isxspace(*(unsigned char*)bp)) ++bp; /* eatwhite */ comment = (boolean)(comment && cont); quote = (boolean)(quote && cont); if (tail[1]==0) { /* we read he maximum number of bytes! */ if (tail[0]!='\n') { /* it wasn't enough space to finish the line, eat the rest */ for (;;) { tail[1] = '@'; bp = fgets(lbuf, MAXLINE, F); if (bp==NULL) return NULL; if (tail[1]) { /* read enough this time to end the line */ break; } } comment = false; cont = false; bp = NULL; continue; } else { tail[1] = '@'; } } cont = false; while (*bp && cp0) cp+=ret; else { log_error(("input data was not iso-8859-1! assuming utf-8\n")); return NULL; } ++bp; continue; } } if (cp==fbuf+MAXLINE) { --cp; } *cp=0; } while (cont || cp==fbuf); return fbuf; } static const char * getbuf_utf8(FILE * F) { boolean cont = false; char quote = 0; boolean comment = false; char * cp = fbuf; char * tail = lbuf+MAXLINE-2; tail[1] = '@'; /* if this gets overwritten by fgets then the line was very long. */ do { const char * bp = fgets(lbuf, MAXLINE, F); size_t white; if (bp==NULL) { return NULL; } eatwhite(bp, &white); /* decoding errors will get caught later on, don't have to check */ bp += white; comment = (boolean)(comment && cont); quote = (boolean)(quote && cont); if (tail[1]==0) { /* we read the maximum number of bytes! */ if (tail[0]!='\n') { /* it wasn't enough space to finish the line, eat the rest */ for (;;) { tail[1] = '@'; bp = fgets(lbuf, MAXLINE, F); if (bp==NULL) return NULL; if (tail[1]) { /* read enough this time to end the line */ break; } } comment = false; cont = false; bp = NULL; continue; } else { tail[1] = '@'; } } cont = false; while (*bp && cp