From 76d4c1a98efaab7d5d06edcde2ede189aa9b39e6 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Wed, 28 Aug 2002 07:45:17 +0000 Subject: fixes a crash on systems that dont have utf7 in iconv. 2002-08-28 Not Zed * providers/imap/camel-imap-utils.c (imap_mailbox_encode): Chagned to use camel_utf8_utf7 code. (imap_mailbox_decode): As above, using camel_utf8_utf7. 'UTF-7' isn't a widely support iconv() codeset, and besides the new code is simpler. * camel-utf8.[ch]: robust utilities for working with utf8 and utf7. svn path=/trunk/; revision=17886 --- camel/camel-utf8.c | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 camel/camel-utf8.c (limited to 'camel/camel-utf8.c') diff --git a/camel/camel-utf8.c b/camel/camel-utf8.c new file mode 100644 index 0000000000..5ed5a476d0 --- /dev/null +++ b/camel/camel-utf8.c @@ -0,0 +1,257 @@ + +#include +#include "camel-utf8.h" + +/** + * camel_utf8_putc: + * @ptr: + * @c: + * + * Output a 32 bit unicode character as utf8 octets. At most 4 octets will + * be written to @ptr. @ptr will be advanced to the next character position. + **/ +void +camel_utf8_putc(unsigned char **ptr, guint32 c) +{ + register unsigned char *p = *ptr; + + if (c <= 0x7f) + *p++ = c; + else if (c <= 0x7ff) { + *p++ = 0xc0 | c >> 6; + *p++ = 0x80 | (c & 0x3f); + } else if (c <= 0xffff) { + *p++ = 0xe0 | c >> 12; + *p++ = 0x80 | ((c >> 6) & 0x3f); + *p++ = 0x80 | (c & 0x3f); + } else { + /* see unicode standard 3.0, S 3.8, max 4 octets */ + *p++ = 0xf0 | c >> 18; + *p++ = 0x80 | ((c >> 12) & 0x3f); + *p++ = 0x80 | ((c >> 6) & 0x3f); + *p++ = 0x80 | (c & 0x3f); + } + + *ptr = p; +} + +/** + * camel_utf8_getc: + * @ptr: + * + * Get a Unicode character from a utf8 stream. @ptr will be advanced + * to the next character position. Invalid utf8 characters will be + * silently skipped. @ptr should point to a NUL terminated array. + * + * Return value: The next Unicode character. @ptr will be advanced to + * the next character always. + **/ +guint32 +camel_utf8_getc(const unsigned char **ptr) +{ + register unsigned char *p = (unsigned char *)*ptr; + register unsigned char c, r; + register guint32 v, m; + +again: + r = *p++; +loop: + if (r < 0x80) { + *ptr = p; + v = r; + } else if (r < 0xf8) { /* valid start char? (max 4 octets) */ + v = r; + m = 0x7f80; /* used to mask out the length bits */ + do { + c = *p++; + if ((c & 0xc0) != 0x80) { + r = c; + goto loop; + } + v = (v<<6) | (c & 0x3f); + r<<=1; + m<<=5; + } while (r & 0x40); + + *ptr = p; + + v &= ~m; + } else { + goto again; + } + + return v; +} + +void +g_string_append_u(GString *out, guint32 c) +{ + unsigned char buffer[8]; + unsigned char *p = buffer; + + camel_utf8_putc(&p, c); + *p = 0; + g_string_append(out, buffer); +} + +static char *utf7_alphabet = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + +static unsigned char utf7_rank[256] = { + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e,0x3f,0xff,0xff,0xff, + 0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e, + 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff, + 0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, +}; + +/** + * camel_utf7_utf8: + * @ptr: + * + * Convert a modified utf7 string to utf8. If the utf7 string + * contains 8 bit characters, they are treated as iso-8859-1. + * + * The IMAP rules [rfc2060] are used in the utf7 encoding. + * + * Return value: The converted string. + **/ +char * +camel_utf7_utf8(const char *ptr) +{ + const unsigned char *p = (unsigned char *)ptr; + unsigned int c; + guint32 v=0, x; + GString *out; + int i=0; + int state = 0; + char *ret; + + out = g_string_new(""); + do { + c = *p++; + switch(state) { + case 0: + if (c == '&') + state = 1; + else + g_string_append_u(out, c); + break; + case 1: + if (c == '-') { + g_string_append_c(out, '&'); + state = 0; + } else if (utf7_rank[c] != 0xff) { + v = utf7_rank[c]; + i = 6; + state = 2; + } else { + /* invalid */ + g_string_append(out, "&-"); + state = 0; + } + break; + case 2: + if (c == '-') { + state = 0; + } else if (utf7_rank[c] != 0xff) { + v = (v<<6) | utf7_rank[c]; + i+=6; + if (i >= 16) { + x = (v >> (i-16)) & 0xffff; + g_string_append_u(out, x); + i-=16; + } + } else { + g_string_append_u(out, c); + state = 0; + } + break; + } + } while (c); + + ret = g_strdup(out->str); + g_string_free(out, TRUE); + + return ret; +} + +static void utf7_closeb64(GString *out, guint32 v, guint32 i) +{ + guint32 x; + + if (i>0) { + x = (v << (6-i)) & 0x3f; + g_string_append_c(out, utf7_alphabet[x]); + } + g_string_append_c(out, '-'); +} + +/** + * camel_utf8_utf7: + * @ptr: + * + * Convert a utf8 string to a modified utf7 format. + * + * The IMAP rules [rfc2060] are used in the utf7 encoding. + * + * Return value: + **/ +char * +camel_utf8_utf7(const char *ptr) +{ + const unsigned char *p = (unsigned char *)ptr; + unsigned int c; + guint32 x, v = 0; + int state = 0; + GString *out; + int i = 0; + char *ret; + + out = g_string_new(""); + + while ( (c = camel_utf8_getc(&p)) ) { + if (c >= 0x20 && c <= 0x7e) { + if (state == 1) { + utf7_closeb64(out, v, i); + state = 0; + i = 0; + } + if (c == '&') + g_string_append(out, "&-"); + else + g_string_append_c(out, c); + } else { + if (state == 0) { + g_string_append_c(out, '&'); + state = 1; + } + v = (v << 16) | c; + i += 16; + while (i >= 6) { + x = (v >> (i-6)) & 0x3f; + g_string_append_c(out, utf7_alphabet[x]); + i -= 6; + } + } + } + + if (state == 1) + utf7_closeb64(out, v, i); + + ret = g_strdup(out->str); + g_string_free(out, TRUE); + + return ret; +} -- cgit v1.2.3