From 9b60cad3dc13970bb0b4562cf4f9b38f3edc46db Mon Sep 17 00:00:00 2001 From: Not Zed Date: Mon, 13 Jan 2003 05:46:35 +0000 Subject: Read the characters as utf8, rather than as 8 bit bytes. Remove the 2003-01-13 Not Zed * camel-mime-filter-tohtml.c (writeln): Read the characters as utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it has no meaning. Also change the default logic slightly so that 8 bit or greater characters are properly converted to entities. * camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8 char, bounded by an end pointer. svn path=/trunk/; revision=19421 --- camel/ChangeLog | 10 +++++++ camel/camel-mime-filter-tohtml.c | 38 +++++++++++++++------------ camel/camel-utf8.c | 56 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 17 deletions(-) (limited to 'camel') diff --git a/camel/ChangeLog b/camel/ChangeLog index f9c2ce1c3f..916e4b6f70 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,13 @@ +2003-01-13 Not Zed + + * camel-mime-filter-tohtml.c (writeln): Read the characters as + utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it + has no meaning. Also change the default logic slightly so that 8 + bit or greater characters are properly converted to entities. + + * camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8 + char, bounded by an end pointer. + 2003-01-07 Dan Winship * camel-provider.h (CamelProvider): add a "translation_domain" diff --git a/camel/camel-mime-filter-tohtml.c b/camel/camel-mime-filter-tohtml.c index 4f9d972625..370d9c6c4e 100644 --- a/camel/camel-mime-filter-tohtml.c +++ b/camel/camel-mime-filter-tohtml.c @@ -28,6 +28,7 @@ #include #include +#include "camel-utf8.h" #include "camel-url-scanner.h" #include "camel-mime-filter-tohtml.h" @@ -147,14 +148,18 @@ static char * writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend) { CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter; - register const char *inptr = in; - + const char *inptr = in; + while (inptr < inend) { - unsigned char u; - - outptr = check_size (filter, outptr, outend, 9); - - switch ((u = (unsigned char) *inptr++)) { + guint32 u; + + outptr = check_size (filter, outptr, outend, 16); + + u = camel_utf8_getc_limit(&inptr, inend); + switch (u) { + case 0xffff: + g_warning("Truncated utf8 buffer"); + return outptr; case '<': outptr = g_stpcpy (outptr, "<"); html->column++; @@ -182,22 +187,21 @@ writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outpt } /* otherwise, FALL THROUGH */ case ' ': - if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES) { - if (inptr == (in + 1) || *inptr == ' ' || *inptr == '\t') { - outptr = g_stpcpy (outptr, " "); - html->column++; - break; - } + if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES + && ((inptr == (in + 1) || *inptr == ' ' || *inptr == '\t'))) { + outptr = g_stpcpy (outptr, " "); + html->column++; + break; } /* otherwise, FALL THROUGH */ default: - if (!(u >= 0x20 && u < 0x80) && !(html->flags & CAMEL_MIME_FILTER_TOHTML_PRESERVE_8BIT)) { + if (u >= 20 && u <0x80) + *outptr++ = u; + else { if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT) *outptr++ = '?'; else - outptr += g_snprintf (outptr, 9, "&#%d;", (int) u); - } else { - *outptr++ = (char) u; + outptr += sprintf(outptr, "&#%u;", u); } html->column++; break; diff --git a/camel/camel-utf8.c b/camel/camel-utf8.c index 5ed5a476d0..3c7af65b4d 100644 --- a/camel/camel-utf8.c +++ b/camel/camel-utf8.c @@ -83,6 +83,62 @@ loop: return v; } +/** + * camel_utf8_getc_limit: + * @ptr: + * @end: must not be NULL. + * + * Get the next utf8 char at @ptr, and return it, advancing @ptr to + * the next character. If @end is reached before a full utf8 + * character can be read, then the invalid Unicode char 0xffff is + * returned as a sentinel (Unicode 3.1, section 2.7), and @ptr is not + * advanced. + * + * Return value: The next utf8 char, or 0xffff. + **/ +guint32 +camel_utf8_getc_limit(const unsigned char **ptr, const unsigned char *end) +{ + register unsigned char *p = (unsigned char *)*ptr; + register unsigned char c, r; + register guint32 v = 0xffff, m; + +again: + while (p < end) { + r = *p++; +loop: + if (r < 0x80) { + *ptr = p; + return r; + } else if (r < 0xf8) { /* valid start char? (max 4 octets) */ + v = r; + m = 0x7f80; /* used to mask out the length bits */ + do { + if (p >= end) + return 0xffff; + + c = *p++; + if ((c & 0xc0) != 0x80) { + r = c; + goto loop; + } + v = (v<<6) | (c & 0x3f); + r<<=1; + m<<=5; + } while (r & 0x40); + + *ptr = p; + + v &= ~m; + return v; + } else { + goto again; + } + } + + return 0xffff; +} + void g_string_append_u(GString *out, guint32 c) { -- cgit v1.2.3