aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-utf8.c
diff options
context:
space:
mode:
authorNot Zed <NotZed@Ximian.com>2003-01-13 13:46:35 +0800
committerMichael Zucci <zucchi@src.gnome.org>2003-01-13 13:46:35 +0800
commit9b60cad3dc13970bb0b4562cf4f9b38f3edc46db (patch)
treeb5f767bf7c76558d6728f2db3a5c9c3869ace389 /camel/camel-utf8.c
parent969b2c6b650ed31a1accd44eda629c17c1b5ce8b (diff)
downloadgsoc2013-evolution-9b60cad3dc13970bb0b4562cf4f9b38f3edc46db.tar
gsoc2013-evolution-9b60cad3dc13970bb0b4562cf4f9b38f3edc46db.tar.gz
gsoc2013-evolution-9b60cad3dc13970bb0b4562cf4f9b38f3edc46db.tar.bz2
gsoc2013-evolution-9b60cad3dc13970bb0b4562cf4f9b38f3edc46db.tar.lz
gsoc2013-evolution-9b60cad3dc13970bb0b4562cf4f9b38f3edc46db.tar.xz
gsoc2013-evolution-9b60cad3dc13970bb0b4562cf4f9b38f3edc46db.tar.zst
gsoc2013-evolution-9b60cad3dc13970bb0b4562cf4f9b38f3edc46db.zip
Read the characters as utf8, rather than as 8 bit bytes. Remove the
2003-01-13 Not Zed <NotZed@Ximian.com> * camel-mime-filter-tohtml.c (writeln): Read the characters as utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it has no meaning. Also change the default logic slightly so that 8 bit or greater characters are properly converted to entities. * camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8 char, bounded by an end pointer. svn path=/trunk/; revision=19421
Diffstat (limited to 'camel/camel-utf8.c')
-rw-r--r--camel/camel-utf8.c56
1 files changed, 56 insertions, 0 deletions
diff --git a/camel/camel-utf8.c b/camel/camel-utf8.c
index 5ed5a476d0..3c7af65b4d 100644
--- a/camel/camel-utf8.c
+++ b/camel/camel-utf8.c
@@ -83,6 +83,62 @@ loop:
return v;
}
+/**
+ * camel_utf8_getc_limit:
+ * @ptr:
+ * @end: must not be NULL.
+ *
+ * Get the next utf8 char at @ptr, and return it, advancing @ptr to
+ * the next character. If @end is reached before a full utf8
+ * character can be read, then the invalid Unicode char 0xffff is
+ * returned as a sentinel (Unicode 3.1, section 2.7), and @ptr is not
+ * advanced.
+ *
+ * Return value: The next utf8 char, or 0xffff.
+ **/
+guint32
+camel_utf8_getc_limit(const unsigned char **ptr, const unsigned char *end)
+{
+ register unsigned char *p = (unsigned char *)*ptr;
+ register unsigned char c, r;
+ register guint32 v = 0xffff, m;
+
+again:
+ while (p < end) {
+ r = *p++;
+loop:
+ if (r < 0x80) {
+ *ptr = p;
+ return r;
+ } else if (r < 0xf8) { /* valid start char? (max 4 octets) */
+ v = r;
+ m = 0x7f80; /* used to mask out the length bits */
+ do {
+ if (p >= end)
+ return 0xffff;
+
+ c = *p++;
+ if ((c & 0xc0) != 0x80) {
+ r = c;
+ goto loop;
+ }
+ v = (v<<6) | (c & 0x3f);
+ r<<=1;
+ m<<=5;
+ } while (r & 0x40);
+
+ *ptr = p;
+
+ v &= ~m;
+ return v;
+ } else {
+ goto again;
+ }
+ }
+
+ return 0xffff;
+}
+
void
g_string_append_u(GString *out, guint32 c)
{