aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-mime-utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-mime-utils.c')
-rw-r--r--camel/camel-mime-utils.c228
1 files changed, 178 insertions, 50 deletions
diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c
index ccdd03634e..425c77d35d 100644
--- a/camel/camel-mime-utils.c
+++ b/camel/camel-mime-utils.c
@@ -877,7 +877,7 @@ rfc2047_decode_word(const char *in, int len)
int inlen, outlen;
iconv_t ic;
- d(printf("decoding '%.*s'\n", len, in));
+ d(printf("rfc2047: decoding '%.*s'\n", len, in));
/* just make sure we're not passed shit */
if (len<7
@@ -916,7 +916,7 @@ rfc2047_decode_word(const char *in, int len)
inbuf = decword;
- outlen = inlen*6;
+ outlen = inlen*6+16;
outbase = alloca(outlen);
outbuf = outbase;
@@ -924,11 +924,12 @@ rfc2047_decode_word(const char *in, int len)
ic = iconv_open("UTF-8", encname);
if (ic != (iconv_t)-1) {
ret = iconv(ic, (const char **)&inbuf, &inlen, &outbuf, &outlen);
- iconv_close(ic);
if (ret>=0) {
+ iconv(ic, NULL, 0, &outbuf, &outlen);
*outbuf = 0;
decoded = g_strdup(outbase);
}
+ iconv_close(ic);
} else {
w(g_warning("Cannot decode charset, header display may be corrupt: %s: %s", encname, strerror(errno)));
/* TODO: Should this do this, or just leave the encoded strings? */
@@ -1095,46 +1096,109 @@ header_decode_string(const char *in)
return header_decode_text(in, strlen(in));
}
+/* how long a sequence of pre-encoded words should be less than, to attempt to
+ fit into a properly folded word. Only a guide. */
+#define CAMEL_FOLD_PREENCODED (24)
+
/* FIXME: needs a way to cache iconv opens for different charsets? */
static void
rfc2047_encode_word(GString *outstring, const char *in, int len, const char *type, unsigned short safemask)
{
- iconv_t ic;
+ iconv_t ic = (iconv_t *)-1;
char *buffer, *out, *ascii;
- size_t inlen, outlen, enclen;
+ size_t inlen, outlen, enclen, bufflen;
+ const char *inptr, *p;
+ int first = 1;
- d(printf("Converting '%.*s' to %s\n", len, in, type));
+ d(printf("Converting [%d] '%.*s' to %s\n", len, len, in, type));
/* convert utf8->encoding */
- outlen = len*6;
- buffer = alloca(outlen);
+ bufflen = len*6+16;
+ buffer = alloca(bufflen);
inlen = len;
- out = buffer;
-
- /* if we can't convert from utf-8, just encode as utf-8 */
- if (!strcasecmp(type, "UTF-8")
- || (ic = iconv_open(type, "UTF-8")) == (iconv_t)-1) {
- memcpy(buffer, in, len);
- out = buffer+len;
- type = "UTF-8";
- } else {
- if (iconv(ic, &in, &inlen, &out, &outlen) == -1) {
- w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno)));
+ inptr = in;
+
+ ascii = alloca(bufflen);
+
+ if (strcasecmp(type, "UTF-8") != 0)
+ ic = iconv_open(type, "UTF-8");
+
+ while (inlen) {
+ int convlen, i, proclen;
+
+ /* break up words into smaller bits, what we really want is encoded + overhead < 75,
+ but we'll just guess what that means in terms of input chars, and assume its good enough */
+
+ out = buffer;
+ outlen = bufflen;
+
+ if (ic == (iconv_t) -1) {
+ /* native encoding case, the easy one (?) */
+ /* we work out how much we can convert, and still be in length */
+ /* proclen will be the result of input characters that we can convert, to the nearest
+ (approximated) valid utf8 char */
+ convlen = 0;
+ proclen = 0;
+ p = inptr;
+ i = 0;
+ while (p < (in+len) && convlen < (75 - strlen("=?utf-8?q??="))) {
+ unsigned char c = *p++;
+
+ if (c >= 0xc0)
+ proclen = i;
+ i++;
+ if (c < 0x80)
+ proclen = i;
+ if (camel_mime_special_table[c] & safemask)
+ convlen += 1;
+ else
+ convlen += 3;
+ }
+ /* well, we probably have broken utf8, just copy it anyway what the heck */
+ if (proclen == 0) {
+ w(g_warning("Appear to have truncated utf8 sequence"));
+ proclen = inlen;
+ }
+ memcpy(out, inptr, proclen);
+ inptr += proclen;
+ inlen -= proclen;
+ out += proclen;
+ } else {
+ /* well we could do similar, but we can't (without undue effort), we'll just break it up into
+ hopefully-small-enough chunks, and leave it at that */
+ convlen = MIN(inlen, CAMEL_FOLD_PREENCODED);
+ p = inptr;
+ if (iconv(ic, &inptr, &convlen, &out, &outlen) == -1) {
+ w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno)));
+ /* blah, we include it anyway, better than infinite loop ... */
+ inptr = p + convlen;
+ } else {
+ /* make sure we flush out any shift state */
+ iconv(ic, NULL, 0, &out, &outlen);
+ }
+ inlen -= (inptr - p);
}
- iconv_close(ic);
- }
- enclen = out-buffer;
- /* now create qp version */
- ascii = alloca(enclen*3 + strlen(type) + 8);
- out = ascii;
- /* should determine which encoding is smaller, and use that? */
- out += sprintf(out, "=?%s?Q?", type);
- out += quoted_encode(buffer, enclen, out, safemask);
- sprintf(out, "?=");
+ enclen = out-buffer;
+
+ /* create token */
+ out = ascii;
+ if (first)
+ first = 0;
+ else
+ *out++ = ' ';
+ out += sprintf(out, "=?%s?Q?", type);
+ out += quoted_encode(buffer, enclen, out, safemask);
+ sprintf(out, "?=");
+
+ d(printf("converted part = %s\n", ascii));
- d(printf("converted = %s\n", ascii));
- g_string_append(outstring, ascii);
+ g_string_append(outstring, ascii);
+ }
+
+ if (ic == (iconv_t) -1) {
+ iconv_close(ic);
+ }
}
@@ -1162,7 +1226,6 @@ header_encode_string(const unsigned char *in)
/* This gets each word out of the input, and checks to see what charset
can be used to encode it. */
/* TODO: Work out when to merge subsequent words, or across word-parts */
- /* FIXME: Make sure a converted word is less than the encoding size */
out = g_string_new("");
inptr = in;
encoding = 0;
@@ -1275,6 +1338,20 @@ header_encode_phrase(const unsigned char *in)
out = g_string_new("");
+#if 0
+ {
+ int i;
+
+ printf("encoding phrase: %s\n", in);
+ for (i=0;in[i];i++) {
+ printf(" %02x", in[i]);
+ if (((i) & 15) == 15)
+ printf("\n");
+ }
+ printf("\n");
+ }
+#endif
+
/* break the input into words */
type = WORD_ATOM;
count = 0;
@@ -1338,12 +1415,18 @@ header_encode_phrase(const unsigned char *in)
nextl = g_list_next(wordl);
while (nextl) {
next = nextl->data;
- /* merge nodes of the same (or lower?) type*/
- if (word->type == next->type || (next->type < word->type && word->type < WORD_2047) ) {
- word->end = next->end;
- words = g_list_remove_link(words, nextl);
- g_free(next);
- nextl = g_list_next(wordl);
+ /* merge nodes of the same type AND we are not creating too long a string */
+ if (word->type == next->type) {
+ if (next->end - word->start < CAMEL_FOLD_PREENCODED) {
+ word->end = next->end;
+ words = g_list_remove_link(words, nextl);
+ g_free(next);
+ nextl = g_list_next(wordl);
+ } else {
+ /* if it is going to be too long, make sure we include the separating whitespace */
+ word->end = next->start;
+ break;
+ }
} else {
break;
}
@@ -1377,7 +1460,12 @@ header_encode_phrase(const unsigned char *in)
if (nextl) {
int i;
next = nextl->data;
- for (i=next->start-word->end;i>0;i--)
+ /* if they are adjacent, it means we already had the spaces encoded internally,
+ so now we just need to output 1 space */
+ i=next->start-word->end;
+ if (i==0)
+ i=1;
+ for (;i>0;i--)
out = g_string_append_c(out, ' ');
}
@@ -1822,17 +1910,27 @@ header_decode_mailbox(const char **in)
/* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */
name = g_string_new("");
while (pre) {
- char *text;
+ char *text, *last;
- /* perform internationalised decoding, and appent */
+ /* perform internationalised decoding, and append */
text = header_decode_string(pre);
name = g_string_append(name, text);
- g_free(pre);
+ last = pre;
g_free(text);
pre = header_decode_word(&inptr);
- if (pre)
- name = g_string_append_c(name, ' ');
+ if (pre) {
+ int l = strlen(last);
+ int p = strlen(pre);
+ /* dont append ' ' between sucsessive encoded words */
+ if ((l>6 && last[l-2] == '?' && last[l-1] == '=')
+ && (p>6 && pre[0] == '=' && pre[1] == '?')) {
+ /* dont append ' ' */
+ } else {
+ name = g_string_append_c(name, ' ');
+ }
+ }
+ g_free(last);
}
header_decode_lwsp(&inptr);
if (*inptr == '<') {
@@ -2999,21 +3097,45 @@ header_address_list_format(struct _header_address *a)
}
/* simple header folding */
-/* note: assumes the input has not already been folded */
+/* will work even if the header is already folded */
char *
-header_fold(const char *in)
+header_fold(const char *in, int headerlen)
{
int len, outlen, i;
- const char *inptr = in, *space;
+ const char *inptr = in, *space, *p, *n;
GString *out;
char *ret;
+ int needunfold = FALSE;
+
+ if (in == NULL)
+ return NULL;
- len = strlen(in);
- if (len <= CAMEL_FOLD_SIZE)
+ /* first, check to see if we even need to fold */
+ len = headerlen + 2;
+ p = in;
+ while (*p) {
+ n = strchr(p, '\n');
+ if (n == NULL) {
+ n = p+strlen(p);
+ } else {
+ needunfold = TRUE;
+ }
+ len += n-p;
+
+ if (len >= CAMEL_FOLD_SIZE)
+ break;
+ len = 0;
+ p = n;
+ }
+ if (len < CAMEL_FOLD_SIZE)
return g_strdup(in);
+ /* we need to fold, so first unfold (if we need to), then process */
+ if (needunfold)
+ inptr = in = header_unfold(in);
+
out = g_string_new("");
- outlen = 0;
+ outlen = headerlen+2;
while (*inptr) {
space = strchr(inptr, ' ');
if (space) {
@@ -3021,7 +3143,9 @@ header_fold(const char *in)
} else {
len = strlen(inptr);
}
+ printf("next word '%.*s'\n", len, inptr);
if (outlen + len > CAMEL_FOLD_SIZE) {
+ printf("outlen = %d wordlen = %d\n", outlen, len);
g_string_append(out, "\n\t");
outlen = 1;
/* check for very long words, just cut them up */
@@ -3042,6 +3166,10 @@ header_fold(const char *in)
}
ret = out->str;
g_string_free(out, FALSE);
+
+ if (needunfold)
+ g_free((char *)in);
+
return ret;
}