aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--camel/ChangeLog10
-rw-r--r--camel/Makefile.am2
-rw-r--r--camel/camel-utf8.c257
-rw-r--r--camel/camel-utf8.h16
-rw-r--r--camel/providers/imap/camel-imap-utils.c266
5 files changed, 300 insertions, 251 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog
index f1c3f6cc72..a8277e579b 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,3 +1,13 @@
+2002-08-28 Not Zed <NotZed@Ximian.com>
+
+ * providers/imap/camel-imap-utils.c (imap_mailbox_encode): Chagned
+ to use camel_utf8_utf7 code.
+ (imap_mailbox_decode): As above, using camel_utf8_utf7. 'UTF-7'
+ isn't a widely support iconv() codeset, and besides the new code
+ is simpler.
+
+ * camel-utf8.[ch]: robust utilities for working with utf8 and utf7.
+
2002-08-27 Jeffrey Stedfast <fejj@ximian.com>
* camel-folder-thread.c (camel_folder_thread_messages_new): Now
diff --git a/camel/Makefile.am b/camel/Makefile.am
index 642641e12f..4bb0ddeba6 100644
--- a/camel/Makefile.am
+++ b/camel/Makefile.am
@@ -109,6 +109,7 @@ libcamel_la_SOURCES = \
camel-transport.c \
camel-uid-cache.c \
camel-url.c \
+ camel-utf8.c \
camel-vee-folder.c \
camel-vee-store.c \
camel-vtrash-folder.c \
@@ -208,6 +209,7 @@ libcamelinclude_HEADERS = \
camel-types.h \
camel-uid-cache.h \
camel-url.h \
+ camel-utf8.h \
camel-vee-folder.h \
camel-vee-store.h \
camel-vtrash-folder.h \
diff --git a/camel/camel-utf8.c b/camel/camel-utf8.c
new file mode 100644
index 0000000000..5ed5a476d0
--- /dev/null
+++ b/camel/camel-utf8.c
@@ -0,0 +1,257 @@
+
+#include <glib.h>
+#include "camel-utf8.h"
+
+/**
+ * camel_utf8_putc:
+ * @ptr:
+ * @c:
+ *
+ * Output a 32 bit unicode character as utf8 octets. At most 4 octets will
+ * be written to @ptr. @ptr will be advanced to the next character position.
+ **/
+void
+camel_utf8_putc(unsigned char **ptr, guint32 c)
+{
+ register unsigned char *p = *ptr;
+
+ if (c <= 0x7f)
+ *p++ = c;
+ else if (c <= 0x7ff) {
+ *p++ = 0xc0 | c >> 6;
+ *p++ = 0x80 | (c & 0x3f);
+ } else if (c <= 0xffff) {
+ *p++ = 0xe0 | c >> 12;
+ *p++ = 0x80 | ((c >> 6) & 0x3f);
+ *p++ = 0x80 | (c & 0x3f);
+ } else {
+ /* see unicode standard 3.0, S 3.8, max 4 octets */
+ *p++ = 0xf0 | c >> 18;
+ *p++ = 0x80 | ((c >> 12) & 0x3f);
+ *p++ = 0x80 | ((c >> 6) & 0x3f);
+ *p++ = 0x80 | (c & 0x3f);
+ }
+
+ *ptr = p;
+}
+
+/**
+ * camel_utf8_getc:
+ * @ptr:
+ *
+ * Get a Unicode character from a utf8 stream. @ptr will be advanced
+ * to the next character position. Invalid utf8 characters will be
+ * silently skipped. @ptr should point to a NUL terminated array.
+ *
+ * Return value: The next Unicode character. @ptr will be advanced to
+ * the next character always.
+ **/
+guint32
+camel_utf8_getc(const unsigned char **ptr)
+{
+ register unsigned char *p = (unsigned char *)*ptr;
+ register unsigned char c, r;
+ register guint32 v, m;
+
+again:
+ r = *p++;
+loop:
+ if (r < 0x80) {
+ *ptr = p;
+ v = r;
+ } else if (r < 0xf8) { /* valid start char? (max 4 octets) */
+ v = r;
+ m = 0x7f80; /* used to mask out the length bits */
+ do {
+ c = *p++;
+ if ((c & 0xc0) != 0x80) {
+ r = c;
+ goto loop;
+ }
+ v = (v<<6) | (c & 0x3f);
+ r<<=1;
+ m<<=5;
+ } while (r & 0x40);
+
+ *ptr = p;
+
+ v &= ~m;
+ } else {
+ goto again;
+ }
+
+ return v;
+}
+
+void
+g_string_append_u(GString *out, guint32 c)
+{
+ unsigned char buffer[8];
+ unsigned char *p = buffer;
+
+ camel_utf8_putc(&p, c);
+ *p = 0;
+ g_string_append(out, buffer);
+}
+
+static char *utf7_alphabet =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+static unsigned char utf7_rank[256] = {
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e,0x3f,0xff,0xff,0xff,
+ 0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
+ 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
+ 0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+};
+
+/**
+ * camel_utf7_utf8:
+ * @ptr:
+ *
+ * Convert a modified utf7 string to utf8. If the utf7 string
+ * contains 8 bit characters, they are treated as iso-8859-1.
+ *
+ * The IMAP rules [rfc2060] are used in the utf7 encoding.
+ *
+ * Return value: The converted string.
+ **/
+char *
+camel_utf7_utf8(const char *ptr)
+{
+ const unsigned char *p = (unsigned char *)ptr;
+ unsigned int c;
+ guint32 v=0, x;
+ GString *out;
+ int i=0;
+ int state = 0;
+ char *ret;
+
+ out = g_string_new("");
+ do {
+ c = *p++;
+ switch(state) {
+ case 0:
+ if (c == '&')
+ state = 1;
+ else
+ g_string_append_u(out, c);
+ break;
+ case 1:
+ if (c == '-') {
+ g_string_append_c(out, '&');
+ state = 0;
+ } else if (utf7_rank[c] != 0xff) {
+ v = utf7_rank[c];
+ i = 6;
+ state = 2;
+ } else {
+ /* invalid */
+ g_string_append(out, "&-");
+ state = 0;
+ }
+ break;
+ case 2:
+ if (c == '-') {
+ state = 0;
+ } else if (utf7_rank[c] != 0xff) {
+ v = (v<<6) | utf7_rank[c];
+ i+=6;
+ if (i >= 16) {
+ x = (v >> (i-16)) & 0xffff;
+ g_string_append_u(out, x);
+ i-=16;
+ }
+ } else {
+ g_string_append_u(out, c);
+ state = 0;
+ }
+ break;
+ }
+ } while (c);
+
+ ret = g_strdup(out->str);
+ g_string_free(out, TRUE);
+
+ return ret;
+}
+
+static void utf7_closeb64(GString *out, guint32 v, guint32 i)
+{
+ guint32 x;
+
+ if (i>0) {
+ x = (v << (6-i)) & 0x3f;
+ g_string_append_c(out, utf7_alphabet[x]);
+ }
+ g_string_append_c(out, '-');
+}
+
+/**
+ * camel_utf8_utf7:
+ * @ptr:
+ *
+ * Convert a utf8 string to a modified utf7 format.
+ *
+ * The IMAP rules [rfc2060] are used in the utf7 encoding.
+ *
+ * Return value:
+ **/
+char *
+camel_utf8_utf7(const char *ptr)
+{
+ const unsigned char *p = (unsigned char *)ptr;
+ unsigned int c;
+ guint32 x, v = 0;
+ int state = 0;
+ GString *out;
+ int i = 0;
+ char *ret;
+
+ out = g_string_new("");
+
+ while ( (c = camel_utf8_getc(&p)) ) {
+ if (c >= 0x20 && c <= 0x7e) {
+ if (state == 1) {
+ utf7_closeb64(out, v, i);
+ state = 0;
+ i = 0;
+ }
+ if (c == '&')
+ g_string_append(out, "&-");
+ else
+ g_string_append_c(out, c);
+ } else {
+ if (state == 0) {
+ g_string_append_c(out, '&');
+ state = 1;
+ }
+ v = (v << 16) | c;
+ i += 16;
+ while (i >= 6) {
+ x = (v >> (i-6)) & 0x3f;
+ g_string_append_c(out, utf7_alphabet[x]);
+ i -= 6;
+ }
+ }
+ }
+
+ if (state == 1)
+ utf7_closeb64(out, v, i);
+
+ ret = g_strdup(out->str);
+ g_string_free(out, TRUE);
+
+ return ret;
+}
diff --git a/camel/camel-utf8.h b/camel/camel-utf8.h
new file mode 100644
index 0000000000..7d6fac5410
--- /dev/null
+++ b/camel/camel-utf8.h
@@ -0,0 +1,16 @@
+
+#ifndef _CAMEL_UTF8_H
+#define _CAMEL_UTF8_H
+
+void camel_utf8_putc(unsigned char **ptr, guint32 c);
+guint32 camel_utf8_getc(const unsigned char **ptr);
+
+/* utility func for utf8 gstrings */
+void g_string_append_u(GString *out, guint32 c);
+
+/* convert utf7 to/from utf8, actually this is modified IMAP utf7 */
+char *camel_utf7_utf8(const char *ptr);
+char *camel_utf8_utf7(const char *ptr);
+
+
+#endif /* ! _CAMEL_UTF8_H */
diff --git a/camel/providers/imap/camel-imap-utils.c b/camel/providers/imap/camel-imap-utils.c
index 4f0f9f143b..15bf2540b3 100644
--- a/camel/providers/imap/camel-imap-utils.c
+++ b/camel/providers/imap/camel-imap-utils.c
@@ -30,6 +30,7 @@
#include "camel-imap-summary.h"
#include "camel-imap-store.h"
#include "camel-folder.h"
+#include "camel-utf8.h"
#define d(x) x
@@ -1119,263 +1120,26 @@ imap_concat (CamelImapStore *imap_store, const char *prefix, const char *suffix)
return g_strdup_printf ("%s%c%s", prefix, imap_store->dir_sep, suffix);
}
-#define UTF8_TO_UTF7_LEN(len) ((len * 3) + 8)
-#define UTF7_TO_UTF8_LEN(len) (len)
-
-enum {
- MODE_USASCII,
- MODE_AMPERSAND,
- MODE_MODUTF7
-};
-
-#define is_usascii(c) (((c) >= 0x20 && (c) <= 0x25) || ((c) >= 0x27 && (c) <= 0x7e))
-#define encode_mode(c) (is_usascii (c) ? MODE_USASCII : (c) == '&' ? MODE_AMPERSAND : MODE_MODUTF7)
-
char *
imap_mailbox_encode (const unsigned char *in, size_t inlen)
{
- const unsigned char *start, *inptr, *inend;
- unsigned char *mailbox, *m, *mend;
- size_t inleft, outleft, conv;
- char *inbuf, *outbuf;
- iconv_t cd;
- int mode;
-
- cd = (iconv_t) -1;
- m = mailbox = g_malloc (UTF8_TO_UTF7_LEN (inlen) + 1);
- mend = mailbox + UTF8_TO_UTF7_LEN (inlen);
-
- start = inptr = in;
- inend = in + inlen;
- mode = MODE_USASCII;
-
- while (inptr < inend) {
- int new_mode;
-
- new_mode = encode_mode (*inptr);
-
- if (new_mode != mode) {
- switch (mode) {
- case MODE_USASCII:
- memcpy (m, start, inptr - start);
- m += (inptr - start);
- break;
- case MODE_AMPERSAND:
- while (start < inptr) {
- *m++ = '&';
- *m++ = '-';
- start++;
- }
- break;
- case MODE_MODUTF7:
- inbuf = (char *) start;
- inleft = inptr - start;
- outbuf = (char *) m;
- outleft = mend - m;
-
- if (cd == (iconv_t) -1)
- cd = iconv_open ("UTF-7", "UTF-8");
-
- conv = iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
- if (conv == (size_t) -1) {
- g_warning ("error converting mailbox to UTF-7!");
- }
- iconv (cd, NULL, NULL, &outbuf, &outleft);
-
- /* shift into modified UTF-7 mode (overwrite UTF-7's '+' shift)... */
- *m++ = '&';
-
- while (m < (unsigned char *) outbuf) {
- /* replace '/' with ',' */
- if (*m == '/')
- *m = ',';
-
- m++;
- }
-
- break;
- }
-
- mode = new_mode;
- start = inptr;
- }
-
- inptr++;
- }
-
- switch (mode) {
- case MODE_USASCII:
- memcpy (m, start, inptr - start);
- m += (inptr - start);
- break;
- case MODE_AMPERSAND:
- while (start < inptr) {
- *m++ = '&';
- *m++ = '-';
- start++;
- }
- break;
- case MODE_MODUTF7:
- inbuf = (char *) start;
- inleft = inptr - start;
- outbuf = (char *) m;
- outleft = mend - m;
-
- if (cd == (iconv_t) -1)
- cd = iconv_open ("UTF-7", "UTF-8");
-
- conv = iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
- if (conv == (size_t) -1) {
- g_warning ("error converting mailbox to UTF-7!");
- }
- iconv (cd, NULL, NULL, &outbuf, &outleft);
-
- /* shift into modified UTF-7 mode (overwrite UTF-7's '+' shift)... */
- *m++ = '&';
-
- while (m < (unsigned char *) outbuf) {
- /* replace '/' with ',' */
- if (*m == '/')
- *m = ',';
-
- m++;
- }
-
- break;
- }
-
- *m = '\0';
-
- if (cd != (iconv_t) -1)
- iconv_close (cd);
-
- return mailbox;
-}
+ char *buf;
+ buf = alloca(inlen+1);
+ memcpy(buf, in, inlen);
+ buf[inlen] = 0;
+
+ return camel_utf8_utf7(buf);
+}
char *
imap_mailbox_decode (const unsigned char *in, size_t inlen)
{
- const unsigned char *start, *inptr, *inend;
- unsigned char *mailbox, *m, *mend;
- unsigned char mode_switch;
- iconv_t cd;
-
- cd = (iconv_t) -1;
- m = mailbox = g_malloc (UTF7_TO_UTF8_LEN (inlen) + 1);
- mend = mailbox + UTF7_TO_UTF8_LEN (inlen);
-
- start = inptr = in;
- inend = in + inlen;
- mode_switch = '&';
-
- while (inptr < inend) {
- if (*inptr == mode_switch) {
- if (mode_switch == '&') {
- /* mode switch from US-ASCII to UTF-7 */
- mode_switch = '-';
- memcpy (m, start, inptr - start);
- m += (inptr - start);
- start = inptr;
- } else if (mode_switch == '-') {
- /* mode switch from UTF-7 to US-ASCII or an ampersand (&) */
- mode_switch = '&';
- start++;
- if (start == inptr) {
- /* we had the sequence "&-" which becomes "&" when decoded */
- *m++ = '&';
- } else {
- char *buffer, *inbuf, *outbuf;
- size_t buflen, outleft, conv;
-
- buflen = (inptr - start) + 2;
- inbuf = buffer = alloca (buflen);
- *inbuf++ = '+';
- while (start < inptr) {
- *inbuf++ = *start == ',' ? '/' : *start;
- start++;
- }
- *inbuf = '-';
-
- inbuf = buffer;
- outbuf = (char *) m;
- outleft = mend - m;
-
- if (cd == (iconv_t) -1)
- cd = iconv_open ("UTF-8", "UTF-7");
-
- conv = iconv (cd, &inbuf, &buflen, &outbuf, &outleft);
- if (conv == (size_t) -1) {
- g_warning ("error decoding mailbox: %.*s", inlen, in);
- }
- iconv (cd, NULL, NULL, NULL, NULL);
-
- m = (unsigned char *) outbuf;
- }
-
- /* point to the char after the '-' */
- start = inptr + 1;
- }
- }
-
- inptr++;
- }
-
- if (*inptr == mode_switch) {
- if (mode_switch == '&') {
- /* the remaining text is US-ASCII */
- memcpy (m, start, inptr - start);
- m += (inptr - start);
- start = inptr;
- } else if (mode_switch == '-') {
- /* We've got encoded UTF-7 or else an ampersand */
- start++;
- if (start == inptr) {
- /* we had the sequence "&-" which becomes "&" when decoded */
- *m++ = '&';
- } else {
- char *buffer, *inbuf, *outbuf;
- size_t buflen, outleft, conv;
-
- buflen = (inptr - start) + 2;
- inbuf = buffer = alloca (buflen);
- *inbuf++ = '+';
- while (start < inptr) {
- *inbuf++ = *start == ',' ? '/' : *start;
- start++;
- }
- *inbuf = '-';
-
- inbuf = buffer;
- outbuf = (char *) m;
- outleft = mend - m;
-
- if (cd == (iconv_t) -1)
- cd = iconv_open ("UTF-8", "UTF-7");
-
- conv = iconv (cd, &inbuf, &buflen, &outbuf, &outleft);
- if (conv == (size_t) -1) {
- g_warning ("error decoding mailbox: %.*s", inlen, in);
- }
- iconv (cd, NULL, NULL, NULL, NULL);
-
- m = (unsigned char *) outbuf;
- }
- }
- } else {
- if (mode_switch == '-') {
- /* illegal encoded mailbox... */
- g_warning ("illegal mailbox name encountered: %.*s", inlen, in);
- }
-
- memcpy (m, start, inptr - start);
- m += (inptr - start);
- }
-
- *m = '\0';
-
- if (cd != (iconv_t) -1)
- iconv_close (cd);
-
- return mailbox;
+ char *buf;
+
+ buf = alloca(inlen+1);
+ memcpy(buf, in, inlen);
+ buf[inlen] = 0;
+
+ return camel_utf7_utf8(buf);
}