diff options
Diffstat (limited to 'camel/camel-mime-utils.c')
-rw-r--r-- | camel/camel-mime-utils.c | 228 |
1 files changed, 178 insertions, 50 deletions
diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index ccdd03634e..425c77d35d 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -877,7 +877,7 @@ rfc2047_decode_word(const char *in, int len) int inlen, outlen; iconv_t ic; - d(printf("decoding '%.*s'\n", len, in)); + d(printf("rfc2047: decoding '%.*s'\n", len, in)); /* just make sure we're not passed shit */ if (len<7 @@ -916,7 +916,7 @@ rfc2047_decode_word(const char *in, int len) inbuf = decword; - outlen = inlen*6; + outlen = inlen*6+16; outbase = alloca(outlen); outbuf = outbase; @@ -924,11 +924,12 @@ rfc2047_decode_word(const char *in, int len) ic = iconv_open("UTF-8", encname); if (ic != (iconv_t)-1) { ret = iconv(ic, (const char **)&inbuf, &inlen, &outbuf, &outlen); - iconv_close(ic); if (ret>=0) { + iconv(ic, NULL, 0, &outbuf, &outlen); *outbuf = 0; decoded = g_strdup(outbase); } + iconv_close(ic); } else { w(g_warning("Cannot decode charset, header display may be corrupt: %s: %s", encname, strerror(errno))); /* TODO: Should this do this, or just leave the encoded strings? */ @@ -1095,46 +1096,109 @@ header_decode_string(const char *in) return header_decode_text(in, strlen(in)); } +/* how long a sequence of pre-encoded words should be less than, to attempt to + fit into a properly folded word. Only a guide. */ +#define CAMEL_FOLD_PREENCODED (24) + /* FIXME: needs a way to cache iconv opens for different charsets? */ static void rfc2047_encode_word(GString *outstring, const char *in, int len, const char *type, unsigned short safemask) { - iconv_t ic; + iconv_t ic = (iconv_t *)-1; char *buffer, *out, *ascii; - size_t inlen, outlen, enclen; + size_t inlen, outlen, enclen, bufflen; + const char *inptr, *p; + int first = 1; - d(printf("Converting '%.*s' to %s\n", len, in, type)); + d(printf("Converting [%d] '%.*s' to %s\n", len, len, in, type)); /* convert utf8->encoding */ - outlen = len*6; - buffer = alloca(outlen); + bufflen = len*6+16; + buffer = alloca(bufflen); inlen = len; - out = buffer; - - /* if we can't convert from utf-8, just encode as utf-8 */ - if (!strcasecmp(type, "UTF-8") - || (ic = iconv_open(type, "UTF-8")) == (iconv_t)-1) { - memcpy(buffer, in, len); - out = buffer+len; - type = "UTF-8"; - } else { - if (iconv(ic, &in, &inlen, &out, &outlen) == -1) { - w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno))); + inptr = in; + + ascii = alloca(bufflen); + + if (strcasecmp(type, "UTF-8") != 0) + ic = iconv_open(type, "UTF-8"); + + while (inlen) { + int convlen, i, proclen; + + /* break up words into smaller bits, what we really want is encoded + overhead < 75, + but we'll just guess what that means in terms of input chars, and assume its good enough */ + + out = buffer; + outlen = bufflen; + + if (ic == (iconv_t) -1) { + /* native encoding case, the easy one (?) */ + /* we work out how much we can convert, and still be in length */ + /* proclen will be the result of input characters that we can convert, to the nearest + (approximated) valid utf8 char */ + convlen = 0; + proclen = 0; + p = inptr; + i = 0; + while (p < (in+len) && convlen < (75 - strlen("=?utf-8?q??="))) { + unsigned char c = *p++; + + if (c >= 0xc0) + proclen = i; + i++; + if (c < 0x80) + proclen = i; + if (camel_mime_special_table[c] & safemask) + convlen += 1; + else + convlen += 3; + } + /* well, we probably have broken utf8, just copy it anyway what the heck */ + if (proclen == 0) { + w(g_warning("Appear to have truncated utf8 sequence")); + proclen = inlen; + } + memcpy(out, inptr, proclen); + inptr += proclen; + inlen -= proclen; + out += proclen; + } else { + /* well we could do similar, but we can't (without undue effort), we'll just break it up into + hopefully-small-enough chunks, and leave it at that */ + convlen = MIN(inlen, CAMEL_FOLD_PREENCODED); + p = inptr; + if (iconv(ic, &inptr, &convlen, &out, &outlen) == -1) { + w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno))); + /* blah, we include it anyway, better than infinite loop ... */ + inptr = p + convlen; + } else { + /* make sure we flush out any shift state */ + iconv(ic, NULL, 0, &out, &outlen); + } + inlen -= (inptr - p); } - iconv_close(ic); - } - enclen = out-buffer; - /* now create qp version */ - ascii = alloca(enclen*3 + strlen(type) + 8); - out = ascii; - /* should determine which encoding is smaller, and use that? */ - out += sprintf(out, "=?%s?Q?", type); - out += quoted_encode(buffer, enclen, out, safemask); - sprintf(out, "?="); + enclen = out-buffer; + + /* create token */ + out = ascii; + if (first) + first = 0; + else + *out++ = ' '; + out += sprintf(out, "=?%s?Q?", type); + out += quoted_encode(buffer, enclen, out, safemask); + sprintf(out, "?="); + + d(printf("converted part = %s\n", ascii)); - d(printf("converted = %s\n", ascii)); - g_string_append(outstring, ascii); + g_string_append(outstring, ascii); + } + + if (ic == (iconv_t) -1) { + iconv_close(ic); + } } @@ -1162,7 +1226,6 @@ header_encode_string(const unsigned char *in) /* This gets each word out of the input, and checks to see what charset can be used to encode it. */ /* TODO: Work out when to merge subsequent words, or across word-parts */ - /* FIXME: Make sure a converted word is less than the encoding size */ out = g_string_new(""); inptr = in; encoding = 0; @@ -1275,6 +1338,20 @@ header_encode_phrase(const unsigned char *in) out = g_string_new(""); +#if 0 + { + int i; + + printf("encoding phrase: %s\n", in); + for (i=0;in[i];i++) { + printf(" %02x", in[i]); + if (((i) & 15) == 15) + printf("\n"); + } + printf("\n"); + } +#endif + /* break the input into words */ type = WORD_ATOM; count = 0; @@ -1338,12 +1415,18 @@ header_encode_phrase(const unsigned char *in) nextl = g_list_next(wordl); while (nextl) { next = nextl->data; - /* merge nodes of the same (or lower?) type*/ - if (word->type == next->type || (next->type < word->type && word->type < WORD_2047) ) { - word->end = next->end; - words = g_list_remove_link(words, nextl); - g_free(next); - nextl = g_list_next(wordl); + /* merge nodes of the same type AND we are not creating too long a string */ + if (word->type == next->type) { + if (next->end - word->start < CAMEL_FOLD_PREENCODED) { + word->end = next->end; + words = g_list_remove_link(words, nextl); + g_free(next); + nextl = g_list_next(wordl); + } else { + /* if it is going to be too long, make sure we include the separating whitespace */ + word->end = next->start; + break; + } } else { break; } @@ -1377,7 +1460,12 @@ header_encode_phrase(const unsigned char *in) if (nextl) { int i; next = nextl->data; - for (i=next->start-word->end;i>0;i--) + /* if they are adjacent, it means we already had the spaces encoded internally, + so now we just need to output 1 space */ + i=next->start-word->end; + if (i==0) + i=1; + for (;i>0;i--) out = g_string_append_c(out, ' '); } @@ -1822,17 +1910,27 @@ header_decode_mailbox(const char **in) /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */ name = g_string_new(""); while (pre) { - char *text; + char *text, *last; - /* perform internationalised decoding, and appent */ + /* perform internationalised decoding, and append */ text = header_decode_string(pre); name = g_string_append(name, text); - g_free(pre); + last = pre; g_free(text); pre = header_decode_word(&inptr); - if (pre) - name = g_string_append_c(name, ' '); + if (pre) { + int l = strlen(last); + int p = strlen(pre); + /* dont append ' ' between sucsessive encoded words */ + if ((l>6 && last[l-2] == '?' && last[l-1] == '=') + && (p>6 && pre[0] == '=' && pre[1] == '?')) { + /* dont append ' ' */ + } else { + name = g_string_append_c(name, ' '); + } + } + g_free(last); } header_decode_lwsp(&inptr); if (*inptr == '<') { @@ -2999,21 +3097,45 @@ header_address_list_format(struct _header_address *a) } /* simple header folding */ -/* note: assumes the input has not already been folded */ +/* will work even if the header is already folded */ char * -header_fold(const char *in) +header_fold(const char *in, int headerlen) { int len, outlen, i; - const char *inptr = in, *space; + const char *inptr = in, *space, *p, *n; GString *out; char *ret; + int needunfold = FALSE; + + if (in == NULL) + return NULL; - len = strlen(in); - if (len <= CAMEL_FOLD_SIZE) + /* first, check to see if we even need to fold */ + len = headerlen + 2; + p = in; + while (*p) { + n = strchr(p, '\n'); + if (n == NULL) { + n = p+strlen(p); + } else { + needunfold = TRUE; + } + len += n-p; + + if (len >= CAMEL_FOLD_SIZE) + break; + len = 0; + p = n; + } + if (len < CAMEL_FOLD_SIZE) return g_strdup(in); + /* we need to fold, so first unfold (if we need to), then process */ + if (needunfold) + inptr = in = header_unfold(in); + out = g_string_new(""); - outlen = 0; + outlen = headerlen+2; while (*inptr) { space = strchr(inptr, ' '); if (space) { @@ -3021,7 +3143,9 @@ header_fold(const char *in) } else { len = strlen(inptr); } + printf("next word '%.*s'\n", len, inptr); if (outlen + len > CAMEL_FOLD_SIZE) { + printf("outlen = %d wordlen = %d\n", outlen, len); g_string_append(out, "\n\t"); outlen = 1; /* check for very long words, just cut them up */ @@ -3042,6 +3166,10 @@ header_fold(const char *in) } ret = out->str; g_string_free(out, FALSE); + + if (needunfold) + g_free((char *)in); + return ret; } |