aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--camel/gmime-rfc2047.c179
-rw-r--r--tests/.cvsignore5
-rw-r--r--tests/test5.c60
4 files changed, 168 insertions, 82 deletions
diff --git a/ChangeLog b/ChangeLog
index e067b02f92..f65242e705 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+1998-08-06 Robert Brady <rwb197@ecs.soton.ac.uk>
+
+ * tests/test5.c: test for RFC2047 decoder.
+
+ * camel/gmime-rfc2047.c: Improved RFC2047 decoder.
+
1999-08-06 bertrand <Bertrand.Guiheneuf@aful.org>
* camel/providers/MH/camel-mh-folder.c (_exists): add debug information
diff --git a/camel/gmime-rfc2047.c b/camel/gmime-rfc2047.c
index 7266cb1159..2a44a20dd6 100644
--- a/camel/gmime-rfc2047.c
+++ b/camel/gmime-rfc2047.c
@@ -49,10 +49,11 @@ hexval (gchar c) {
return c - 'a' + 10;
}
-static void
-decode_quoted (const gchar *text, gchar *to)
-{
- while (*text) {
+static gchar *
+decode_quoted(const gchar *text, const gchar *end) {
+ gchar *to = malloc(end - text + 1), *to_2 = to;
+ if (!to) return NULL;
+ while (*text && text < end) {
if (*text == '=') {
gchar a = hexval (text[1]);
gchar b = hexval (text[2]);
@@ -70,20 +71,23 @@ decode_quoted (const gchar *text, gchar *to)
text++;
}
}
- *to = 0;
+ return to_2;
}
-static void
-decode_base64 (const gchar *what, gchar *where)
-{
+static gchar *
+decode_base64(const gchar *data, const gchar *end) {
unsigned short pattern = 0;
int bits = 0;
int delimiter = '=';
gchar x;
- gchar *t = where;
+ gchar *buffer = g_malloc((end - data) * 3);
+ gchar *t = buffer;
int Q = 0;
- while (*what != delimiter) {
- x = base64_rank[(unsigned char)(*what++)];
+
+ if (!buffer) return NULL;
+
+ while (*data != delimiter) {
+ x = base64_rank[(unsigned char)(*data++)];
if (x == NOT_RANKED)
continue;
pattern <<= 6;
@@ -97,6 +101,7 @@ decode_base64 (const gchar *what, gchar *where)
}
}
*t = 0;
+ return buffer;
}
static void
@@ -113,86 +118,96 @@ build_base64_rank_table (void)
}
}
-gchar
-*gmime_rfc2047_decode (const gchar *data, const gchar *into_what)
+
+gchar*
+rfc2047_decode_word (const gchar *data, const gchar *into_what)
{
- gchar buffer[4096] /* FIXME : constant sized buffer */, *b = buffer;
+ const char *charset = strstr(data, "=?"), *encoding, *text, *end;
+
+ char *buffer, *b, *cooked_data;
+ buffer = g_malloc(strlen(data) * 2);
+ b = buffer;
+
+ if (!charset) return strdup(data);
+ charset+=2;
+
+ encoding = strchr(charset, '?');
+ if (!encoding) return strdup(data);
+ encoding++;
+
+ text = strchr(encoding, '?');
+ if (!text) return strdup(data);
+ text++;
+
+ end = strstr(text, "?=");
+ if (!end) return strdup(data);
+
+ b[0] = 0;
+
+ if (toupper(*encoding)=='Q')
+ cooked_data = decode_quoted(text, end);
+ else if (toupper(*encoding)=='B')
+ cooked_data = decode_base64(text, end);
+ else
+ return g_strdup(data);
+
+ {
+ char *c = strchr(charset, '?');
+ char *q = g_malloc(c - charset + 1);
+ char *cook_2 = cooked_data;
+ int cook_len = strlen(cook_2);
+ int b_len = 4096;
+ iconv_t i;
+ strncpy(q, charset, c - charset);
+ i = unicode_iconv_open(into_what, q);
+ if (!i) {
+ g_free(q);
+ return g_strdup(buffer);
+ }
+ unicode_iconv(i, &cook_2, &cook_len, &b, &b_len);
+ unicode_iconv_close(i);
+ }
+
+ return g_strdup(buffer);
+}
+
+gchar *
+gmime_rfc2047_decode (const gchar *data, const gchar *into_what)
+{
+ char *buffer = malloc(strlen(data) * 4), *b = buffer;
+
+ int was_encoded_word = 0;
+
build_base64_rank_table ();
-
- while (*data) {
-
- /* If we encounter an error we just break out of the loop and copy the rest
- * of the text as-is */
-
- if (*data=='=') {
- data++;
- if (*data=='?') {
- gchar *charset, *encoding, *text, *end;
- gchar dc[4096];
- charset = data+1;
- encoding = strchr (charset, '?');
-
- if (!encoding) break;
- encoding++;
- text = strchr (encoding, '?');
- if (!text) break;
- text++;
- end = strstr (text, "?=");
- if (!end) break;
- end++;
-
- *(encoding-1)=0;
- *(text-1)=0;
- *(end-1)=0;
-
- if (strcasecmp (encoding, "q") == 0) {
- decode_quoted(text, dc);
- } else if (strcasecmp (encoding, "b") == 0) {
- decode_base64 (text, dc);
- } else {
- /* What to do here? */
- break;
- }
-
- {
- int f;
- iconv_t i;
- const gchar *d2 = dc;
- int l = strlen (d2), l2 = 4000;
-
- i = unicode_iconv_open (into_what, charset);
- if (!i)
- break;
-
- unicode_iconv (i, &d2, &l, &b, &l2);
-
- unicode_iconv_close (i);
- data = end;
- }
+
+ while (data && *data) {
+ char *word_start = strstr(data, "=?"), *decoded;
+ if (!word_start) {
+ strcpy(b, data);
+ return buffer;
+ }
+ if (word_start != data) {
+
+ if (strspn(data, " \t\n\r") != (word_start - data)) {
+ strncpy(b, data, word_start - data);
+ b += word_start - data;
}
- } else {
- *b = *data;
- b++;
}
-
- data++;
-
- }
-
- while (*data) {
- *b = *data;
- b++;
- data++;
+ decoded = rfc2047_decode_word(word_start, into_what);
+ strcpy(b, decoded);
+ b += strlen(decoded);
+ g_free(decoded);
+
+ data = strstr(data, "?=") + 2;
}
-
+
*b = 0;
-
- return g_strdup (buffer);
+ return buffer;
}
gchar
-*rfc2047_encode (const gchar *string, const gchar *charset)
+*gmime_rfc2047_encode (const gchar *string, const gchar *charset)
{
gchar buffer[4096] /* FIXME : constant sized buffer */;
gchar *b = buffer;
@@ -213,7 +228,7 @@ gchar
while (*s) {
if (*s == ' ') b += sprintf (b, "_");
else if (*s < 0x20 || *s >= 0x7f || *s == '=' || *s == '?' || *s == '_') {
- b += sprintf (b, "=%2x", *s);
+ b += sprintf (b, "=%2x", (unsigned char)*s);
} else {
b += sprintf (b, "%c", *s);
}
diff --git a/tests/.cvsignore b/tests/.cvsignore
index 8cb5c0556b..7232b242dc 100644
--- a/tests/.cvsignore
+++ b/tests/.cvsignore
@@ -1,3 +1,8 @@
+test1
+test2
+test3
+test4
+test5
Makefile.in
Makefile
.deps
diff --git a/tests/test5.c b/tests/test5.c
new file mode 100644
index 0000000000..2f47fda418
--- /dev/null
+++ b/tests/test5.c
@@ -0,0 +1,60 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+
+/* test for the RFC 2047 decoder */
+
+#include <string.h>
+#include <unicode.h>
+
+#include "gmime-utils.h"
+#include "stdio.h"
+#include "camel-log.h"
+#include "camel-mime-message.h"
+#include "camel-mime-part.h"
+#include "camel-stream.h"
+#include "camel-stream-fs.h"
+#include "camel.h"
+#include "gmime-rfc2047.h"
+
+#define TERMINAL_CHARSET "UTF-8"
+
+/*
+ * Info on many unicode issues, including, utf-8 xterms from :
+ *
+ * http://www.cl.cam.ac.uk/~mgk/unicode.html
+ *
+ */
+
+const char *tests[] =
+{
+/* these strings come from RFC 2047. Ought to add a few torture cases here. */
+ "=?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>",
+ "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>",
+ "=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>",
+ "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
+ "=?ISO-8859-1?Q?Olle_J=E4rnefors?= <ojarnef@admin.kth.se>",
+ "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?= <paf@nada.kth.se>",
+ "Nathaniel Borenstein <nsb@thumper.bellcore.com> (=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)",
+ "",
+ "(=?ISO-8859-1?Q?a?=)", /* should be displayed as (a) */
+ "(=?ISO-8859-1?Q?a?= b)", /* (a b) */
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)", /* (ab) */
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)", /* (ab) */
+ "(=?ISO-8859-1?Q?a?= \n=?ISO-8859-1?Q?b?=)", /* (ab) */
+ "(=?ISO-8859-1?Q?a_b?=)", /* (a b) */
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)", /* (ab) */
+ NULL
+};
+
+
+int
+main (int argc, char**argv)
+{
+ const char **b = tests;
+ while (*b) {
+ printf("%s\n", gmime_rfc2047_decode(*b, TERMINAL_CHARSET));
+ b++;
+ }
+
+ return 0;
+
+}