aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--camel/ChangeLog21
-rw-r--r--camel/camel-mime-utils.c544
-rw-r--r--camel/camel-mime-utils.h2
-rw-r--r--camel/tests/misc/Makefile.am5
-rw-r--r--camel/tests/misc/README1
-rw-r--r--camel/tests/misc/test2.c121
6 files changed, 351 insertions, 343 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog
index 8b3df0c46d..465c17777f 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,3 +1,24 @@
+2004-06-01 Not Zed <NotZed@Ximian.com>
+
+ ** A few fixes for better rfc compliance, and cleaner code.
+
+ * camel-mime-utils.c (header_encode_param): a bunch of logic
+ cleanups with new util functions.
+ (header_decode_init): setup a new type ATTR_CHAR, for
+ attribute-char.
+
+ * tests/misc/test2.c (main): new test for rfc2184 stuff.
+
+ * camel-mime-utils.c (header_convert): helper to convert between
+ charsets.
+ (rfc2184_decode): fix a bunch of logic problems and use the helper
+ above to simplify code.
+ (decode_param_token): removed, not needed.
+ (header_decode_rfc2184_param): removed, not needed.
+ (header_decode_param): removed, not needed. ugh.
+ (header_decode_param_list): completely rewritten, hence lack of
+ need of above.
+
2004-05-31 Jeffrey Stedfast <fejj@ximian.com>
* camel-mime-filter-gzip.c (camel_mime_filter_gzip_finalize):
diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c
index 35766e0b48..4e5ec39ee3 100644
--- a/camel/camel-mime-utils.c
+++ b/camel/camel-mime-utils.c
@@ -51,6 +51,7 @@
#include "camel-mime-utils.h"
#include "camel-charset-map.h"
#include "camel-service.h" /* for camel_gethostbyname() */
+#include "camel-utf8.h"
#ifndef CLEAN_DATE
#include "broken-date-parser.h"
@@ -96,6 +97,7 @@ static unsigned char camel_mime_base64_rank[256];
encoded word in text specials: rfc 2047 5(1)*/
#define CHARS_PSPECIAL "!*+-/" /* list of additional characters that can be left unencoded.
encoded word in phrase specials: rfc 2047 5(3) */
+#define CHARS_ATTRCHAR "*\'% " /* extra non-included attribute-chars */
static void
header_remove_bits(unsigned short bit, unsigned char *vals)
@@ -144,20 +146,22 @@ header_decode_init(void)
for (i=0;i<256;i++) {
camel_mime_special_table[i] = 0;
- if (i<32)
+ if (i<32 || i==127)
camel_mime_special_table[i] |= CAMEL_MIME_IS_CTRL;
+ else if (i < 127)
+ camel_mime_special_table[i] |= CAMEL_MIME_IS_ATTRCHAR;
if ((i>=32 && i<=60) || (i>=62 && i<=126) || i==9)
camel_mime_special_table[i] |= (CAMEL_MIME_IS_QPSAFE|CAMEL_MIME_IS_ESAFE);
if ((i>='0' && i<='9') || (i>='a' && i<='z') || (i>='A' && i<= 'Z'))
camel_mime_special_table[i] |= CAMEL_MIME_IS_PSAFE;
}
- camel_mime_special_table[127] |= CAMEL_MIME_IS_CTRL;
camel_mime_special_table[' '] |= CAMEL_MIME_IS_SPACE;
header_init_bits(CAMEL_MIME_IS_LWSP, 0, 0, CHARS_LWSP);
header_init_bits(CAMEL_MIME_IS_TSPECIAL, CAMEL_MIME_IS_CTRL, 0, CHARS_TSPECIAL);
header_init_bits(CAMEL_MIME_IS_SPECIAL, 0, 0, CHARS_SPECIAL);
header_init_bits(CAMEL_MIME_IS_DSPECIAL, 0, FALSE, CHARS_DSPECIAL);
header_remove_bits(CAMEL_MIME_IS_ESAFE, CHARS_ESPECIAL);
+ header_remove_bits(CAMEL_MIME_IS_ATTRCHAR, CHARS_TSPECIAL CHARS_ATTRCHAR);
header_init_bits(CAMEL_MIME_IS_PSAFE, 0, 0, CHARS_PSPECIAL);
}
@@ -1826,6 +1830,33 @@ hex_decode (const char *in, size_t len)
return outbuf;
}
+/* Tries to convert @in @from charset @to charset. Any failure, we get no data out rather than partial conversion */
+static char *
+header_convert(const char *to, const char *from, const char *in, size_t inlen)
+{
+ iconv_t ic;
+ size_t outlen, ret;
+ char *outbuf, *outbase, *result = NULL;
+
+ ic = e_iconv_open(to, from);
+ if (ic == (iconv_t) -1)
+ return NULL;
+
+ outlen = inlen * 6 + 16;
+ outbuf = outbase = g_malloc(outlen);
+
+ ret = e_iconv(ic, &in, &inlen, &outbuf, &outlen);
+ if (ret != (size_t) -1) {
+ e_iconv(ic, NULL, 0, &outbuf, &outlen);
+ *outbuf = '\0';
+ result = g_strdup(outbase);
+ }
+ e_iconv_close(ic);
+ g_free(outbase);
+
+ return result;
+}
+
/* an rfc2184 encoded string looks something like:
* us-ascii'en'This%20is%20even%20more%20
*/
@@ -1836,221 +1867,29 @@ rfc2184_decode (const char *in, size_t len)
const char *inptr = in;
const char *inend = in + len;
const char *charset;
- char *decoded = NULL;
- char *encoding;
+ char *decoded, *decword, *encoding;
inptr = memchr (inptr, '\'', len);
if (!inptr)
return NULL;
-
- encoding = g_strndup (in, inptr - in);
+
+ encoding = g_alloca(inptr-in+1);
+ memcpy(encoding, in, inptr-in);
+ encoding[inptr-in] = 0;
charset = e_iconv_charset_name (encoding);
- g_free (encoding);
inptr = memchr (inptr + 1, '\'', inend - inptr - 1);
if (!inptr)
return NULL;
-
inptr++;
- if (inptr < inend) {
- char *decword, *outbase, *outbuf;
- const char *inbuf;
- size_t inlen, outlen;
- iconv_t ic;
-
- inbuf = decword = hex_decode (inptr, inend - inptr);
- inlen = strlen (inbuf);
-
- ic = e_iconv_open ("UTF-8", charset);
- if (ic != (iconv_t) -1) {
- size_t ret;
-
- outlen = inlen * 6 + 16;
- outbuf = outbase = g_malloc (outlen);
-
- ret = e_iconv (ic, &inbuf, &inlen, &outbuf, &outlen);
- if (ret != (size_t) -1) {
- e_iconv (ic, NULL, 0, &outbuf, &outlen);
- *outbuf = '\0';
- g_free (decoded);
- decoded = outbase;
- }
-
- e_iconv_close (ic);
- } else {
- decoded = decword;
- }
- }
-
- return decoded;
-}
-
-/* This function is basically the same as decode_token()
- * except that it will not accept *'s which have a special
- * meaning for rfc2184 params */
-static char *
-decode_param_token (const char **in)
-{
- const char *inptr = *in;
- const char *start;
-
- header_decode_lwsp (&inptr);
- start = inptr;
- while (camel_mime_is_ttoken (*inptr) && *inptr != '*')
- inptr++;
- if (inptr > start) {
- *in = inptr;
- return g_strndup (start, inptr - start);
- } else {
+ if (inptr >= inend)
return NULL;
- }
-}
-static gboolean
-header_decode_rfc2184_param (const char **in, char **paramp, gboolean *is_encoded, int *part)
-{
- gboolean is_rfc2184 = FALSE;
- const char *inptr = *in;
- char *param;
-
- *is_encoded = FALSE;
- *part = -1;
-
- param = decode_param_token (&inptr);
- header_decode_lwsp (&inptr);
-
- if (*inptr == '*') {
- is_rfc2184 = TRUE;
- inptr++;
- header_decode_lwsp (&inptr);
- if (*inptr == '=') {
- /* form := param*=value */
- if (is_encoded)
- *is_encoded = TRUE;
- } else {
- /* form := param*#=value or param*#*=value */
- *part = camel_header_decode_int (&inptr);
- header_decode_lwsp (&inptr);
- if (*inptr == '*') {
- /* form := param*#*=value */
- if (is_encoded)
- *is_encoded = TRUE;
- inptr++;
- header_decode_lwsp (&inptr);
- }
- }
- }
-
- if (paramp)
- *paramp = param;
-
- if (param)
- *in = inptr;
-
- return is_rfc2184;
-}
+ decword = hex_decode (inptr, inend - inptr);
+ decoded = header_convert("UTF-8", charset, decword, strlen(decword));
+ g_free(decword);
-static int
-header_decode_param (const char **in, char **paramp, char **valuep, int *is_rfc2184_param, int *rfc2184_part)
-{
- gboolean is_rfc2184_encoded = FALSE;
- gboolean is_rfc2184 = FALSE;
- const char *inptr = *in;
- char *param = NULL;
- char *value = NULL;
-
- *is_rfc2184_param = FALSE;
- *rfc2184_part = -1;
-
- is_rfc2184 = header_decode_rfc2184_param (&inptr, &param, &is_rfc2184_encoded, rfc2184_part);
-
- if (*inptr == '=') {
- inptr++;
- value = header_decode_value (&inptr);
-
- if (value && is_rfc2184) {
- /* We have ourselves an rfc2184 parameter */
-
- if (*rfc2184_part == -1) {
- /* rfc2184 allows the value to be broken into
- * multiple parts - this isn't one of them so
- * it is safe to decode it.
- */
- char *val;
-
- val = rfc2184_decode (value, strlen (value));
- if (val) {
- g_free (value);
- value = val;
- }
- } else {
- /* Since we are expecting to find the rest of
- * this paramter value later, let our caller know.
- */
- *is_rfc2184_param = TRUE;
- }
- } else if (value && !strncmp (value, "=?", 2)) {
- /* We have a broken param value that is rfc2047 encoded.
- * Since both Outlook and Netscape/Mozilla do this, we
- * should handle this case.
- */
- char *val;
-
- if ((val = header_decode_text (value, strlen (value), NULL))) {
- g_free (value);
- value = val;
- }
- }
- }
-
- if (value && !g_utf8_validate (value, -1, NULL)) {
- /* The (broken) mailer sent us an unencoded 8bit value
- * attempt to save it by assuming it's in the user's
- * locale and converting to utf8 */
- char *outbase, *outbuf, *p;
- const char *charset, *inbuf;
- size_t inlen, outlen;
- iconv_t ic;
-
- inbuf = value;
- inlen = strlen (inbuf);
-
- charset = e_iconv_locale_charset ();
- ic = e_iconv_open ("UTF-8", charset ? charset : "ISO-8859-1");
- if (ic != (iconv_t) -1) {
- size_t ret;
-
- outlen = inlen * 6 + 16;
- outbuf = outbase = g_malloc (outlen);
-
- ret = e_iconv (ic, &inbuf, &inlen, &outbuf, &outlen);
- if (ret != (size_t) -1) {
- e_iconv (ic, NULL, 0, &outbuf, &outlen);
- *outbuf = '\0';
- }
-
- e_iconv_close (ic);
-
- g_free (value);
- value = outbase;
- } else {
- /* Okay, so now what? I guess we convert invalid chars to _'s? */
- for (p = value; *p; p++)
- if (!isascii ((unsigned) *p))
- *p = '_';
- }
- }
-
- if (param && value) {
- *paramp = param;
- *valuep = value;
- *in = inptr;
- return 0;
- } else {
- g_free (param);
- g_free (value);
- return 1;
- }
+ return decoded;
}
char *
@@ -2953,87 +2792,158 @@ camel_header_mime_decode(const char *in, int *maj, int *min)
d(printf("major = %d, minor = %d\n", major, minor));
}
+struct _rfc2184_param {
+ struct _camel_header_param param;
+ int index;
+};
+
+static int
+rfc2184_param_cmp(const void *ap, const void *bp)
+{
+ const struct _rfc2184_param *a = *(void **)ap;
+ const struct _rfc2184_param *b = *(void **)bp;
+ int res;
+
+ res = strcmp(a->param.name, b->param.name);
+ if (res == 0) {
+ if (a->index > b->index)
+ res = 1;
+ else if (a->index < b->index)
+ res = -1;
+ }
+
+ return res;
+}
+
+/* NB: Steals name and value */
+static struct _camel_header_param *
+header_append_param(struct _camel_header_param *last, char *name, char *value)
+{
+ struct _camel_header_param *node;
+
+ /* This handles -
+ 8 bit data in parameters, illegal, tries to convert using locale, or just safens it up.
+ rfc2047 ecoded parameters, illegal, decodes them anyway. Some Outlook & Mozilla do this?
+ */
+ node = g_malloc(sizeof(*node));
+ last->next = node;
+ node->next = NULL;
+ node->name = name;
+ if (strncmp(value, "=?", 2) == 0
+ && (node->value = header_decode_text(value, strlen(value), NULL))) {
+ g_free(value);
+ } else if (!g_utf8_validate(value, -1, NULL)) {
+ const char * charset = e_iconv_locale_charset();
+
+ if ((node->value = header_convert("UTF-8", charset?charset:"ISO-8859-1", value, strlen(value)))) {
+ g_free(value);
+ } else {
+ node->value = value;
+ for (;*value;value++)
+ if (!isascii((unsigned char)*value))
+ *value = '_';
+ }
+ } else
+ node->value = value;
+
+ return node;
+}
+
static struct _camel_header_param *
header_decode_param_list (const char **in)
{
+ struct _camel_header_param *head = NULL, *last = (struct _camel_header_param *)&head;
+ GPtrArray *split = NULL;
const char *inptr = *in;
- struct _camel_header_param *head = NULL, *tail = NULL;
- gboolean last_was_rfc2184 = FALSE;
- gboolean is_rfc2184 = FALSE;
-
- header_decode_lwsp (&inptr);
-
+ struct _rfc2184_param *work;
+ char *tmp;
+
+ /* Dump parameters into the output list, in the order found. RFC 2184 split parameters are kept in an array */
+ header_decode_lwsp(&inptr);
while (*inptr == ';') {
- struct _camel_header_param *param;
- char *name, *value;
- int rfc2184_part;
-
+ char *name;
+ char *value = NULL;
+
inptr++;
- /* invalid format? */
- if (header_decode_param (&inptr, &name, &value, &is_rfc2184, &rfc2184_part) != 0)
- break;
-
- if (is_rfc2184 && tail && !strcasecmp (name, tail->name)) {
- /* rfc2184 allows a parameter to be broken into multiple parts
- * and it looks like we've found one. Append this value to the
- * last value.
- */
- /* FIXME: we should be ordering these based on rfc2184_part id */
- GString *gvalue;
-
- gvalue = g_string_new (tail->value);
- g_string_append (gvalue, value);
- g_free (tail->value);
- g_free (value);
- g_free (name);
-
- tail->value = gvalue->str;
- g_string_free (gvalue, FALSE);
- } else {
- if (last_was_rfc2184) {
- /* We've finished gathering the values for the last param
- * so it is now safe to decode it.
- */
- char *val;
-
- val = rfc2184_decode (tail->value, strlen (tail->value));
- if (val) {
- g_free (tail->value);
- tail->value = val;
+ name = decode_token(&inptr);
+ header_decode_lwsp(&inptr);
+ if (*inptr == '=') {
+ inptr++;
+ value = header_decode_value(&inptr);
+ }
+
+ if (name && value) {
+ char *index = strchr(name, '*');
+
+ if (index) {
+ if (index[1] == 0) {
+ /* VAL*="foo", decode immediately and append */
+ *index = 0;
+ tmp = rfc2184_decode(value, strlen(value));
+ if (tmp) {
+ g_free(value);
+ value = tmp;
+ }
+ last = header_append_param(last, name, value);
+ } else {
+ /* VAL*1="foo", save for later */
+ *index++ = 0;
+ work = g_malloc(sizeof(*work));
+ work->param.name = name;
+ work->param.value = value;
+ work->index = atoi(index);
+ if (split == NULL)
+ split = g_ptr_array_new();
+ g_ptr_array_add(split, work);
}
+ } else {
+ last = header_append_param(last, name, value);
}
-
- param = g_malloc (sizeof (struct _camel_header_param));
- param->name = name;
- param->value = value;
- param->next = NULL;
- if (head == NULL)
- head = param;
- if (tail)
- tail->next = param;
- tail = param;
+ } else {
+ g_free(name);
+ g_free(value);
}
-
- last_was_rfc2184 = is_rfc2184;
-
- header_decode_lwsp (&inptr);
+
+ header_decode_lwsp(&inptr);
}
-
- if (last_was_rfc2184) {
- /* We've finished gathering the values for the last param
- * so it is now safe to decode it.
- */
- char *val;
-
- val = rfc2184_decode (tail->value, strlen (tail->value));
- if (val) {
- g_free (tail->value);
- tail->value = val;
+
+ /* Rejoin any RFC 2184 split parameters in the proper order */
+ /* Parameters with the same index will be concatenated in undefined order */
+ if (split) {
+ GString *value = g_string_new("");
+ struct _rfc2184_param *first;
+ int i;
+
+ qsort(split->pdata, split->len, sizeof(split->pdata[0]), rfc2184_param_cmp);
+ first = split->pdata[0];
+ for (i=0;i<split->len;i++) {
+ work = split->pdata[i];
+ if (split->len-1 == i)
+ g_string_append(value, work->param.value);
+ if (split->len-1 == i || strcmp(work->param.name, first->param.name) != 0) {
+ tmp = rfc2184_decode(value->str, value->len);
+ if (tmp == NULL)
+ tmp = g_strdup(value->str);
+
+ last = header_append_param(last, g_strdup(first->param.name), tmp);
+ g_string_truncate(value, 0);
+ first = work;
+ }
+ if (split->len-1 != i)
+ g_string_append(value, work->param.value);
}
+ g_string_free(value, TRUE);
+ for (i=0;i<split->len;i++) {
+ work = split->pdata[i];
+ g_free(work->param.name);
+ g_free(work->param.value);
+ g_free(work);
+ }
+ g_ptr_array_free(split, TRUE);
}
-
+
*in = inptr;
-
+
return head;
}
@@ -3046,23 +2956,19 @@ camel_header_param_list_decode(const char *in)
return header_decode_param_list(&in);
}
-
static char *
header_encode_param (const unsigned char *in, gboolean *encoded)
{
- register const unsigned char *inptr = in;
+ const unsigned char *inptr = in;
unsigned char *outbuf = NULL;
- const unsigned char *inend;
- iconv_t cd = (iconv_t) -1;
const char *charset;
- char *outstr;
int encoding;
GString *out;
-
+ guint32 c;
+
*encoded = FALSE;
g_return_val_if_fail (in != NULL, NULL);
- g_return_val_if_fail (g_utf8_validate (in, -1, NULL), NULL);
/* do a quick us-ascii check (the common case?) */
while (*inptr) {
@@ -3076,87 +2982,43 @@ header_encode_param (const unsigned char *in, gboolean *encoded)
inptr = in;
encoding = 0;
- while (inptr && *inptr) {
- const char *newinptr;
- gunichar c;
-
- newinptr = g_utf8_next_char (inptr);
- c = g_utf8_get_char (inptr);
- if (newinptr == NULL || !g_unichar_validate (c)) {
- w(g_warning ("Invalid UTF-8 sequence encountered (pos %d, char '%c'): %s",
- (inptr-in), inptr[0], in));
- inptr++;
- continue;
- }
-
- if (c > 127 && c < 256) {
+ while ( encoding !=2 && (c = camel_utf8_getc(&inptr)) ) {
+ if (c > 127 && c < 256)
encoding = MAX (encoding, 1);
- } else if (c >= 256) {
+ else if (c >= 256)
encoding = MAX (encoding, 2);
- }
-
- inptr = newinptr;
}
-
+
if (encoding == 2)
- charset = camel_charset_best (in, inptr - in);
+ charset = camel_charset_best(in, strlen(in));
else
charset = "iso-8859-1";
- if (strcasecmp (charset, "UTF-8") != 0)
- cd = e_iconv_open (charset, "UTF-8");
-
- if (cd == (iconv_t) -1) {
+ if (g_ascii_strcasecmp(charset, "UTF-8") != 0
+ && (outbuf = header_convert(charset, "UTF-8", in, strlen(in)))) {
+ inptr = outbuf;
+ } else {
charset = "UTF-8";
inptr = in;
- inend = inptr + strlen (in);
- } else {
- size_t inleft, outleft;
- const char *inbuf;
- char *outptr;
-
- inleft = (inptr - in);
- outleft = inleft * 6 + 20;
- outptr = outbuf = g_malloc (outleft);
- inbuf = in;
-
- if (e_iconv (cd, &inbuf, &inleft, &outptr, &outleft) == (size_t) -1) {
- w(g_warning ("Conversion problem: conversion truncated: %s", strerror (errno)));
- } else {
- e_iconv (cd, NULL, 0, &outptr, &outleft);
- }
-
- e_iconv_close (cd);
-
- inptr = outbuf;
- inend = outptr;
}
/* FIXME: set the 'language' as well, assuming we can get that info...? */
- out = g_string_new ("");
- g_string_append_printf (out, "%s''", charset);
-
- while (inptr < inend) {
- unsigned char c = *inptr++;
-
- /* FIXME: make sure that '\'', '*', and ';' are also encoded */
-
- if (c > 127) {
- g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
- } else if (camel_mime_is_lwsp (c) || !(camel_mime_special_table[c] & CAMEL_MIME_IS_ESAFE)) {
- g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
- } else {
+ out = g_string_new (charset);
+ g_string_append(out, "''");
+
+ while ( (c = *inptr++) ) {
+ if (camel_mime_is_attrchar(c))
g_string_append_c (out, c);
- }
+ else
+ g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
}
-
g_free (outbuf);
- outstr = out->str;
+ outbuf = out->str;
g_string_free (out, FALSE);
*encoded = TRUE;
- return outstr;
+ return outbuf;
}
void
diff --git a/camel/camel-mime-utils.h b/camel/camel-mime-utils.h
index d4e3012d95..5fb23be36f 100644
--- a/camel/camel-mime-utils.h
+++ b/camel/camel-mime-utils.h
@@ -263,6 +263,7 @@ enum {
CAMEL_MIME_IS_QPSAFE = 1<<6,
CAMEL_MIME_IS_ESAFE = 1<<7, /* encoded word safe */
CAMEL_MIME_IS_PSAFE = 1<<8, /* encoded word in phrase safe */
+ CAMEL_MIME_IS_ATTRCHAR = 1<<9, /* attribute-char safe (rfc2184) */
};
extern unsigned short camel_mime_special_table[256];
@@ -278,6 +279,7 @@ extern unsigned short camel_mime_special_table[256];
#define camel_mime_is_qpsafe(x) ((camel_mime_special_table[(unsigned char)(x)] & CAMEL_MIME_IS_QPSAFE) != 0)
#define camel_mime_is_especial(x) ((camel_mime_special_table[(unsigned char)(x)] & CAMEL_MIME_IS_ESPECIAL) != 0)
#define camel_mime_is_psafe(x) ((camel_mime_special_table[(unsigned char)(x)] & CAMEL_MIME_IS_PSAFE) != 0)
+#define camel_mime_is_attrchar(x) ((camel_mime_special_table[(unsigned char)(x)] & CAMEL_MIME_IS_ATTRCHAR) != 0)
#ifdef __cplusplus
}
diff --git a/camel/tests/misc/Makefile.am b/camel/tests/misc/Makefile.am
index cc119e9390..4cf6500c06 100644
--- a/camel/tests/misc/Makefile.am
+++ b/camel/tests/misc/Makefile.am
@@ -20,9 +20,10 @@ check_PROGRAMS = \
url \
url-scan \
utf7 \
- split
+ split \
+ test2
-TESTS = url utf7 split url-scan
+TESTS = url utf7 split url-scan test2
diff --git a/camel/tests/misc/README b/camel/tests/misc/README
index e92f579cf6..6b8f4a22f2 100644
--- a/camel/tests/misc/README
+++ b/camel/tests/misc/README
@@ -1,4 +1,5 @@
+test2 rfc2184 multipart/i18n parameters
url URL parsing
utf7 UTF7 and UTF8 processing
split word splitting for searching
diff --git a/camel/tests/misc/test2.c b/camel/tests/misc/test2.c
new file mode 100644
index 0000000000..a802392e91
--- /dev/null
+++ b/camel/tests/misc/test2.c
@@ -0,0 +1,121 @@
+
+
+#include <config.h>
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <camel/camel-object.h>
+#include <camel/camel-mime-utils.h>
+
+#include "camel-test.h"
+
+/* NB: We know which order the params will be decoded in, plain in the order they come,
+ and rfc2184 encoded following those, sorted lexigraphically */
+struct {
+ char *list;
+ int count;
+ char *params[8];
+} test1[] = {
+ { "; charset=\"iso-8859-1\"",
+ 1,
+ { "charset", "iso-8859-1" }, },
+ { "; charset=iso-8859-1",
+ 1,
+ { "charset", "iso-8859-1" }, },
+ { "; charset=\"iso-8859-1\"; boundary=\"foo\"",
+ 2,
+ { "charset", "iso-8859-1",
+ "boundary", "foo" }, },
+ { "; charset*1 = 8859; charset*0=\"iso-8859-1'en'iso-\";charset*2=\"-1\" ",
+ 1,
+ { "charset", "iso-8859-1" }, },
+ { "; charset*1 = 8859; boundary=foo; charset*0=\"iso-8859-1'en'iso-\";charset*2=\"-1\" ",
+ 2,
+ { "boundary", "foo",
+ "charset", "iso-8859-1", }, },
+ { "; charset*1 = 8859; boundary*0=f; charset*0=\"iso-8859-1'en'iso-\"; boundary*2=\"o\" ; charset*2=\"-1\"; boundary*1=o ",
+ 2,
+ { "boundary", "foo",
+ "charset", "iso-8859-1", }, },
+ { "; charset*1 = 8859; boundary*0=\"iso-8859-1'en'f\"; charset*0=\"iso-8859-1'en'iso-\"; boundary*2=\"o\" ; charset*2=\"-1\"; boundary*1=o ",
+ 2,
+ { "boundary", "foo",
+ "charset", "iso-8859-1", }, },
+};
+
+struct {
+ int count;
+ char *params[8];
+ char *list;
+} test2[] = {
+ { 1,
+ { "name", "Doul\xC3\xADk01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123457890123456789123456789" },
+ ";\n"
+ "\tname*0*=iso-8859-1''Doul%EDk012345678901234567890123456789012345678901234;\n"
+ "\tname*1*=56789012345678901234567890123456789012345678901234567890123457890;\n"
+ "\tname*2*=123456789123456789" },
+ { 1,
+ { "name", "\"%$#@ special chars?;; !" },
+ "; name=\"\\\"%$#@ special chars?;; !\"" },
+ { 1,
+ { "name", "\"%$#@ special chars?;; !\xC3\xAD" },
+ "; name*=iso-8859-1''%22%25$#%40%20special%20chars%3F%3B%3B%20!%ED" },
+};
+
+int
+main (int argc, char **argv)
+{
+ int i, j;
+
+ camel_test_init(argc, argv);
+
+ camel_test_start("Param list decoding");
+
+ for (i=0;i<sizeof(test1)/sizeof(test1[0]);i++) {
+ struct _camel_header_param *head, *node;
+
+ camel_test_push("param decoding[%d] '%s'", i, test1[i].list);
+ head = camel_header_param_list_decode(test1[i].list);
+ check(head != NULL);
+ node = head;
+ for (j=0;j<test1[i].count;j++) {
+ check_msg(node != NULL, "didn't find all params");
+ check(strcmp(node->name, test1[i].params[j*2]) == 0);
+ check(strcmp(node->value, test1[i].params[j*2+1]) == 0);
+ node = node->next;
+ }
+ check_msg(node == NULL, "found more params than should have");
+ camel_header_param_list_free(head);
+ camel_test_pull();
+ }
+
+ camel_test_end();
+
+ camel_test_start("Param list encoding");
+
+ for (i=0;i<sizeof(test2)/sizeof(test2[0]);i++) {
+ struct _camel_header_param *head = NULL, *scan;
+ char *text;
+
+ camel_test_push("param encoding[%d]", i);
+
+ for (j=0;j<test2[i].count;j++)
+ camel_header_set_param(&head, test2[i].params[j*2], test2[i].params[j*2+1]);
+ scan = head;
+ for (j=0;scan;j++)
+ scan = scan->next;
+ check(j == test2[i].count);
+
+ text = camel_header_param_list_format(head);
+ check(strcmp(text, test2[i].list) == 0);
+ camel_header_param_list_free(head);
+
+ camel_test_pull();
+ }
+
+ camel_test_end();
+
+ return 0;
+}