diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | camel/gmime-rfc2047.c | 179 | ||||
-rw-r--r-- | tests/.cvsignore | 5 | ||||
-rw-r--r-- | tests/test5.c | 60 |
4 files changed, 168 insertions, 82 deletions
@@ -1,3 +1,9 @@ +1998-08-06 Robert Brady <rwb197@ecs.soton.ac.uk> + + * tests/test5.c: test for RFC2047 decoder. + + * camel/gmime-rfc2047.c: Improved RFC2047 decoder. + 1999-08-06 bertrand <Bertrand.Guiheneuf@aful.org> * camel/providers/MH/camel-mh-folder.c (_exists): add debug information diff --git a/camel/gmime-rfc2047.c b/camel/gmime-rfc2047.c index 7266cb1159..2a44a20dd6 100644 --- a/camel/gmime-rfc2047.c +++ b/camel/gmime-rfc2047.c @@ -49,10 +49,11 @@ hexval (gchar c) { return c - 'a' + 10; } -static void -decode_quoted (const gchar *text, gchar *to) -{ - while (*text) { +static gchar * +decode_quoted(const gchar *text, const gchar *end) { + gchar *to = malloc(end - text + 1), *to_2 = to; + if (!to) return NULL; + while (*text && text < end) { if (*text == '=') { gchar a = hexval (text[1]); gchar b = hexval (text[2]); @@ -70,20 +71,23 @@ decode_quoted (const gchar *text, gchar *to) text++; } } - *to = 0; + return to_2; } -static void -decode_base64 (const gchar *what, gchar *where) -{ +static gchar * +decode_base64(const gchar *data, const gchar *end) { unsigned short pattern = 0; int bits = 0; int delimiter = '='; gchar x; - gchar *t = where; + gchar *buffer = g_malloc((end - data) * 3); + gchar *t = buffer; int Q = 0; - while (*what != delimiter) { - x = base64_rank[(unsigned char)(*what++)]; + + if (!buffer) return NULL; + + while (*data != delimiter) { + x = base64_rank[(unsigned char)(*data++)]; if (x == NOT_RANKED) continue; pattern <<= 6; @@ -97,6 +101,7 @@ decode_base64 (const gchar *what, gchar *where) } } *t = 0; + return buffer; } static void @@ -113,86 +118,96 @@ build_base64_rank_table (void) } } -gchar -*gmime_rfc2047_decode (const gchar *data, const gchar *into_what) + +gchar* +rfc2047_decode_word (const gchar *data, const gchar *into_what) { - gchar buffer[4096] /* FIXME : constant sized buffer */, *b = buffer; + const char *charset = strstr(data, "=?"), *encoding, *text, *end; + + char *buffer, *b, *cooked_data; + buffer = g_malloc(strlen(data) * 2); + b = buffer; + + if (!charset) return strdup(data); + charset+=2; + + encoding = strchr(charset, '?'); + if (!encoding) return strdup(data); + encoding++; + + text = strchr(encoding, '?'); + if (!text) return strdup(data); + text++; + + end = strstr(text, "?="); + if (!end) return strdup(data); + + b[0] = 0; + + if (toupper(*encoding)=='Q') + cooked_data = decode_quoted(text, end); + else if (toupper(*encoding)=='B') + cooked_data = decode_base64(text, end); + else + return g_strdup(data); + + { + char *c = strchr(charset, '?'); + char *q = g_malloc(c - charset + 1); + char *cook_2 = cooked_data; + int cook_len = strlen(cook_2); + int b_len = 4096; + iconv_t i; + strncpy(q, charset, c - charset); + i = unicode_iconv_open(into_what, q); + if (!i) { + g_free(q); + return g_strdup(buffer); + } + unicode_iconv(i, &cook_2, &cook_len, &b, &b_len); + unicode_iconv_close(i); + } + + return g_strdup(buffer); +} + +gchar * +gmime_rfc2047_decode (const gchar *data, const gchar *into_what) +{ + char *buffer = malloc(strlen(data) * 4), *b = buffer; + + int was_encoded_word = 0; + build_base64_rank_table (); - - while (*data) { - - /* If we encounter an error we just break out of the loop and copy the rest - * of the text as-is */ - - if (*data=='=') { - data++; - if (*data=='?') { - gchar *charset, *encoding, *text, *end; - gchar dc[4096]; - charset = data+1; - encoding = strchr (charset, '?'); - - if (!encoding) break; - encoding++; - text = strchr (encoding, '?'); - if (!text) break; - text++; - end = strstr (text, "?="); - if (!end) break; - end++; - - *(encoding-1)=0; - *(text-1)=0; - *(end-1)=0; - - if (strcasecmp (encoding, "q") == 0) { - decode_quoted(text, dc); - } else if (strcasecmp (encoding, "b") == 0) { - decode_base64 (text, dc); - } else { - /* What to do here? */ - break; - } - - { - int f; - iconv_t i; - const gchar *d2 = dc; - int l = strlen (d2), l2 = 4000; - - i = unicode_iconv_open (into_what, charset); - if (!i) - break; - - unicode_iconv (i, &d2, &l, &b, &l2); - - unicode_iconv_close (i); - data = end; - } + + while (data && *data) { + char *word_start = strstr(data, "=?"), *decoded; + if (!word_start) { + strcpy(b, data); + return buffer; + } + if (word_start != data) { + + if (strspn(data, " \t\n\r") != (word_start - data)) { + strncpy(b, data, word_start - data); + b += word_start - data; } - } else { - *b = *data; - b++; } - - data++; - - } - - while (*data) { - *b = *data; - b++; - data++; + decoded = rfc2047_decode_word(word_start, into_what); + strcpy(b, decoded); + b += strlen(decoded); + g_free(decoded); + + data = strstr(data, "?=") + 2; } - + *b = 0; - - return g_strdup (buffer); + return buffer; } gchar -*rfc2047_encode (const gchar *string, const gchar *charset) +*gmime_rfc2047_encode (const gchar *string, const gchar *charset) { gchar buffer[4096] /* FIXME : constant sized buffer */; gchar *b = buffer; @@ -213,7 +228,7 @@ gchar while (*s) { if (*s == ' ') b += sprintf (b, "_"); else if (*s < 0x20 || *s >= 0x7f || *s == '=' || *s == '?' || *s == '_') { - b += sprintf (b, "=%2x", *s); + b += sprintf (b, "=%2x", (unsigned char)*s); } else { b += sprintf (b, "%c", *s); } diff --git a/tests/.cvsignore b/tests/.cvsignore index 8cb5c0556b..7232b242dc 100644 --- a/tests/.cvsignore +++ b/tests/.cvsignore @@ -1,3 +1,8 @@ +test1 +test2 +test3 +test4 +test5 Makefile.in Makefile .deps diff --git a/tests/test5.c b/tests/test5.c new file mode 100644 index 0000000000..2f47fda418 --- /dev/null +++ b/tests/test5.c @@ -0,0 +1,60 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ + +/* test for the RFC 2047 decoder */ + +#include <string.h> +#include <unicode.h> + +#include "gmime-utils.h" +#include "stdio.h" +#include "camel-log.h" +#include "camel-mime-message.h" +#include "camel-mime-part.h" +#include "camel-stream.h" +#include "camel-stream-fs.h" +#include "camel.h" +#include "gmime-rfc2047.h" + +#define TERMINAL_CHARSET "UTF-8" + +/* + * Info on many unicode issues, including, utf-8 xterms from : + * + * http://www.cl.cam.ac.uk/~mgk/unicode.html + * + */ + +const char *tests[] = +{ +/* these strings come from RFC 2047. Ought to add a few torture cases here. */ + "=?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>", + "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>", + "=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>", + "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=", + "=?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef@admin.kth.se>", + "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= <paf@nada.kth.se>", + "Nathaniel Borenstein <nsb@thumper.bellcore.com> (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)", + "", + "(=?ISO-8859-1?Q?a?=)", /* should be displayed as (a) */ + "(=?ISO-8859-1?Q?a?= b)", /* (a b) */ + "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)", /* (ab) */ + "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)", /* (ab) */ + "(=?ISO-8859-1?Q?a?= \n=?ISO-8859-1?Q?b?=)", /* (ab) */ + "(=?ISO-8859-1?Q?a_b?=)", /* (a b) */ + "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)", /* (ab) */ + NULL +}; + + +int +main (int argc, char**argv) +{ + const char **b = tests; + while (*b) { + printf("%s\n", gmime_rfc2047_decode(*b, TERMINAL_CHARSET)); + b++; + } + + return 0; + +} |