diff options
author | Michael Zucci <zucchi@src.gnome.org> | 2000-12-11 19:40:15 +0800 |
---|---|---|
committer | Michael Zucci <zucchi@src.gnome.org> | 2000-12-11 19:40:15 +0800 |
commit | 1c95a1e9859e02781267975b821b9f62467b79d0 (patch) | |
tree | 9273fed0890c9a444ea9c7ff7044cff91a2a4c6b | |
parent | c08e99018cacc660a1995507b8d505f45f41cc95 (diff) | |
download | gsoc2013-evolution-1c95a1e9859e02781267975b821b9f62467b79d0.tar gsoc2013-evolution-1c95a1e9859e02781267975b821b9f62467b79d0.tar.gz gsoc2013-evolution-1c95a1e9859e02781267975b821b9f62467b79d0.tar.bz2 gsoc2013-evolution-1c95a1e9859e02781267975b821b9f62467b79d0.tar.lz gsoc2013-evolution-1c95a1e9859e02781267975b821b9f62467b79d0.tar.xz gsoc2013-evolution-1c95a1e9859e02781267975b821b9f62467b79d0.tar.zst gsoc2013-evolution-1c95a1e9859e02781267975b821b9f62467b79d0.zip |
Remove use of linewrap filter. Headers are now wrapped. encode_8bit
* providers/smtp/camel-smtp-transport.c (smtp_data): Remove use of
linewrap filter. Headers are now wrapped. encode_8bit already
enforces a 998 octet line limit.
(smtp_data): Also fixed a memleak, we always have to unref our own
copy of the filters. We also dont need to remove them manually,
so dont bother. The type's an int too ...
* camel-internet-address.c (internet_unformat): When scanning past
quotes, remove them also.
(camel_internet_address_format_address): If the name contains "'s,
or ','s then strip and quotes and wrap the whole lot in one set of
quotes.
* Makefile.am (noinst_HEADERS): We dont want to install
camel-charset-map-private.h, ever. There are probably other
similar files ..?
* camel-mime-part.c (write_to_stream): Fold header lines
appropriately as we're writing them out.
* camel-mime-utils.c (header_fold): Add a new argument, headerlen,
tells it how long the associated header token is.
(header_fold): Also,k check to see if we need to fold first, using
a better algorithm, and also accept already-folded lines, and
re-process accordingly.
(rfc2047_decode_word): Add a little buffer space to iconv output
for shifting overheads?
(rfc2047_decode_word): finish the iconv with a null call, to flush
shift state, etc.
(rfc2047_encode_word): Attempt to break up long words into
appropriately sized, independent, chunks. See rfc2047, section 2.
(header_decode_mailbox): Dont add in extra spaces into the output
if we are decoding adjacent encoded words. We can only guess this
case, as some broken mailers put encoded words inside quoted
words.
(header_encode_phrase): Dont merge words if they are going to end
up too long. Also change back ot only merge consecutive words of
the same type. e.g. 'foo. blah fum.' -> "foo." blah "fum." or
'iam an. idiot' -> iam "an." idiot
svn path=/trunk/; revision=6902
-rw-r--r-- | camel/ChangeLog | 40 | ||||
-rw-r--r-- | camel/Makefile.am | 5 | ||||
-rw-r--r-- | camel/camel-internet-address.c | 42 | ||||
-rw-r--r-- | camel/camel-mime-part.c | 8 | ||||
-rw-r--r-- | camel/camel-mime-utils.c | 228 | ||||
-rw-r--r-- | camel/camel-mime-utils.h | 5 | ||||
-rw-r--r-- | camel/providers/smtp/camel-smtp-transport.c | 15 |
7 files changed, 266 insertions, 77 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index c9f3e2bf80..2867f7e9b6 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,5 +1,45 @@ 2000-12-11 Not Zed <NotZed@HelixCode.com> + * providers/smtp/camel-smtp-transport.c (smtp_data): Remove use of + linewrap filter. Headers are now wrapped. encode_8bit already + enforces a 998 octet line limit. + (smtp_data): Also fixed a memleak, we always have to unref our own + copy of the filters. We also dont need to remove them manually, + so dont bother. The type's an int too ... + + * camel-internet-address.c (internet_unformat): When scanning past + quotes, remove them also. + (camel_internet_address_format_address): If the name contains "'s, + or ','s then strip and quotes and wrap the whole lot in one set of + quotes. + + * Makefile.am (noinst_HEADERS): We dont want to install + camel-charset-map-private.h, ever. There are probably other + similar files ..? + + * camel-mime-part.c (write_to_stream): Fold header lines + appropriately as we're writing them out. + + * camel-mime-utils.c (header_fold): Add a new argument, headerlen, + tells it how long the associated header token is. + (header_fold): Also,k check to see if we need to fold first, using + a better algorithm, and also accept already-folded lines, and + re-process accordingly. + (rfc2047_decode_word): Add a little buffer space to iconv output + for shifting overheads? + (rfc2047_decode_word): finish the iconv with a null call, to flush + shift state, etc. + (rfc2047_encode_word): Attempt to break up long words into + appropriately sized, independent, chunks. See rfc2047, section 2. + (header_decode_mailbox): Dont add in extra spaces into the output + if we are decoding adjacent encoded words. We can only guess this + case, as some broken mailers put encoded words inside quoted + words. + (header_encode_phrase): Dont merge words if they are going to end + up too long. Also change back ot only merge consecutive words of + the same type. e.g. 'foo. blah fum.' -> "foo." blah "fum." or + 'iam an. idiot' -> iam "an." idiot + * camel-medium.c (camel_medium_set_header): Hrm, we actually want to call set_header, not add_header here, probably explains some duplicate X-Evolution headers i was trying to track down. Also diff --git a/camel/Makefile.am b/camel/Makefile.am index cb2924e467..bff90e3d89 100644 --- a/camel/Makefile.am +++ b/camel/Makefile.am @@ -71,7 +71,6 @@ libcamelinclude_HEADERS = \ broken-date-parser.h \ camel-address.h \ camel-charset-map.h \ - camel-charset-map-private.h \ camel-data-wrapper.h \ camel-exception-list.def \ camel-exception.h \ @@ -126,6 +125,10 @@ libcamel_la_LDFLAGS = -version-info 0:0:0 -rpath $(libdir) libcamel_la_LIBADD = $(top_builddir)/e-util/libeutil.la $(UNICODE_LIBS) + +noinst_HEADERS = \ + camel-charset-map-private.h + EXTRA_DIST = \ README diff --git a/camel/camel-internet-address.c b/camel/camel-internet-address.c index 1bcd532625..32e383c093 100644 --- a/camel/camel-internet-address.c +++ b/camel/camel-internet-address.c @@ -162,10 +162,16 @@ internet_unformat(CamelAddress *a, const char *raw) do { c = (unsigned char)*p++; switch (c) { - /* HMMM. Not sure we need this, we dont quote the names anyway ... */ + /* removes quotes, they should only be around the total name anyway */ case '"': - while (*p && *p != '"') - p++; + p[-1] = ' '; + while (*p) + if (*p == '"') { + *p++ = ' '; + break; + } else { + p++; + } break; case '<': if (name == NULL) @@ -186,7 +192,7 @@ internet_unformat(CamelAddress *a, const char *raw) name = g_strstrip(name); addr = g_strstrip(addr); if (addr[0]) { - d(printf("found address: %s <%s>\n", name, addr)); + d(printf("found address: '%s' <%s>\n", name, addr)); camel_internet_address_add((CamelInternetAddress *)a, name, addr); } name = NULL; @@ -419,8 +425,8 @@ camel_internet_address_encode_address(const char *real, const char *addr) /** * camel_internet_address_format_address: - * @name: - * @addr: + * @name: A name, quotes may be stripped from it. + * @addr: Assumes a valid rfc822 email address. * * Function to format a single address, suitable for display. * @@ -433,11 +439,27 @@ camel_internet_address_format_address(const char *name, const char *addr) g_assert(addr); -#warning "If name contains a quote, then we're thrown for six ... " - if (name && name[0]) + if (name && name[0]) { + const char *p = name; + char *o, c; + + while ((c = *p++)) { + if (c == '\"' || c == ',') { + o = ret = g_malloc(strlen(name)+3+strlen(addr)+3 + 1); + p = name; + *o++ = '\"'; + while ((c = *p++)) + if (c != '\"') + *o++ = c; + *o++ = '\"'; + sprintf(o, " <%s>", addr); + d(printf("encoded '%s' => '%s'\n", name, ret)); + return ret; + } + } ret = g_strdup_printf("%s <%s>", name, addr); - else - ret = g_strdup_printf("%s", addr); + } else + ret = g_strdup(addr); return ret; } diff --git a/camel/camel-mime-part.c b/camel/camel-mime-part.c index 86ac9c0b13..e553e60257 100644 --- a/camel/camel-mime-part.c +++ b/camel/camel-mime-part.c @@ -494,13 +494,17 @@ write_to_stream(CamelDataWrapper *data_wrapper, CamelStream *stream) if (mp->headers) { struct _header_raw *h = mp->headers; + char *val; while (h) { - if (h->value == NULL){ + val = h->value; + if (val == NULL) { g_warning("h->value is NULL here for %s", h->name); count = 0; } else { - count = camel_stream_printf(stream, "%s%s%s\n", h->name, isspace(h->value[0]) ? ":" : ": ", h->value); + val = header_fold(val, strlen(h->name)); + count = camel_stream_printf(stream, "%s%s%s\n", h->name, isspace(val[0]) ? ":" : ": ", val); + g_free(val); } if (count == -1) return -1; diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index ccdd03634e..425c77d35d 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -877,7 +877,7 @@ rfc2047_decode_word(const char *in, int len) int inlen, outlen; iconv_t ic; - d(printf("decoding '%.*s'\n", len, in)); + d(printf("rfc2047: decoding '%.*s'\n", len, in)); /* just make sure we're not passed shit */ if (len<7 @@ -916,7 +916,7 @@ rfc2047_decode_word(const char *in, int len) inbuf = decword; - outlen = inlen*6; + outlen = inlen*6+16; outbase = alloca(outlen); outbuf = outbase; @@ -924,11 +924,12 @@ rfc2047_decode_word(const char *in, int len) ic = iconv_open("UTF-8", encname); if (ic != (iconv_t)-1) { ret = iconv(ic, (const char **)&inbuf, &inlen, &outbuf, &outlen); - iconv_close(ic); if (ret>=0) { + iconv(ic, NULL, 0, &outbuf, &outlen); *outbuf = 0; decoded = g_strdup(outbase); } + iconv_close(ic); } else { w(g_warning("Cannot decode charset, header display may be corrupt: %s: %s", encname, strerror(errno))); /* TODO: Should this do this, or just leave the encoded strings? */ @@ -1095,46 +1096,109 @@ header_decode_string(const char *in) return header_decode_text(in, strlen(in)); } +/* how long a sequence of pre-encoded words should be less than, to attempt to + fit into a properly folded word. Only a guide. */ +#define CAMEL_FOLD_PREENCODED (24) + /* FIXME: needs a way to cache iconv opens for different charsets? */ static void rfc2047_encode_word(GString *outstring, const char *in, int len, const char *type, unsigned short safemask) { - iconv_t ic; + iconv_t ic = (iconv_t *)-1; char *buffer, *out, *ascii; - size_t inlen, outlen, enclen; + size_t inlen, outlen, enclen, bufflen; + const char *inptr, *p; + int first = 1; - d(printf("Converting '%.*s' to %s\n", len, in, type)); + d(printf("Converting [%d] '%.*s' to %s\n", len, len, in, type)); /* convert utf8->encoding */ - outlen = len*6; - buffer = alloca(outlen); + bufflen = len*6+16; + buffer = alloca(bufflen); inlen = len; - out = buffer; - - /* if we can't convert from utf-8, just encode as utf-8 */ - if (!strcasecmp(type, "UTF-8") - || (ic = iconv_open(type, "UTF-8")) == (iconv_t)-1) { - memcpy(buffer, in, len); - out = buffer+len; - type = "UTF-8"; - } else { - if (iconv(ic, &in, &inlen, &out, &outlen) == -1) { - w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno))); + inptr = in; + + ascii = alloca(bufflen); + + if (strcasecmp(type, "UTF-8") != 0) + ic = iconv_open(type, "UTF-8"); + + while (inlen) { + int convlen, i, proclen; + + /* break up words into smaller bits, what we really want is encoded + overhead < 75, + but we'll just guess what that means in terms of input chars, and assume its good enough */ + + out = buffer; + outlen = bufflen; + + if (ic == (iconv_t) -1) { + /* native encoding case, the easy one (?) */ + /* we work out how much we can convert, and still be in length */ + /* proclen will be the result of input characters that we can convert, to the nearest + (approximated) valid utf8 char */ + convlen = 0; + proclen = 0; + p = inptr; + i = 0; + while (p < (in+len) && convlen < (75 - strlen("=?utf-8?q??="))) { + unsigned char c = *p++; + + if (c >= 0xc0) + proclen = i; + i++; + if (c < 0x80) + proclen = i; + if (camel_mime_special_table[c] & safemask) + convlen += 1; + else + convlen += 3; + } + /* well, we probably have broken utf8, just copy it anyway what the heck */ + if (proclen == 0) { + w(g_warning("Appear to have truncated utf8 sequence")); + proclen = inlen; + } + memcpy(out, inptr, proclen); + inptr += proclen; + inlen -= proclen; + out += proclen; + } else { + /* well we could do similar, but we can't (without undue effort), we'll just break it up into + hopefully-small-enough chunks, and leave it at that */ + convlen = MIN(inlen, CAMEL_FOLD_PREENCODED); + p = inptr; + if (iconv(ic, &inptr, &convlen, &out, &outlen) == -1) { + w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno))); + /* blah, we include it anyway, better than infinite loop ... */ + inptr = p + convlen; + } else { + /* make sure we flush out any shift state */ + iconv(ic, NULL, 0, &out, &outlen); + } + inlen -= (inptr - p); } - iconv_close(ic); - } - enclen = out-buffer; - /* now create qp version */ - ascii = alloca(enclen*3 + strlen(type) + 8); - out = ascii; - /* should determine which encoding is smaller, and use that? */ - out += sprintf(out, "=?%s?Q?", type); - out += quoted_encode(buffer, enclen, out, safemask); - sprintf(out, "?="); + enclen = out-buffer; + + /* create token */ + out = ascii; + if (first) + first = 0; + else + *out++ = ' '; + out += sprintf(out, "=?%s?Q?", type); + out += quoted_encode(buffer, enclen, out, safemask); + sprintf(out, "?="); + + d(printf("converted part = %s\n", ascii)); - d(printf("converted = %s\n", ascii)); - g_string_append(outstring, ascii); + g_string_append(outstring, ascii); + } + + if (ic == (iconv_t) -1) { + iconv_close(ic); + } } @@ -1162,7 +1226,6 @@ header_encode_string(const unsigned char *in) /* This gets each word out of the input, and checks to see what charset can be used to encode it. */ /* TODO: Work out when to merge subsequent words, or across word-parts */ - /* FIXME: Make sure a converted word is less than the encoding size */ out = g_string_new(""); inptr = in; encoding = 0; @@ -1275,6 +1338,20 @@ header_encode_phrase(const unsigned char *in) out = g_string_new(""); +#if 0 + { + int i; + + printf("encoding phrase: %s\n", in); + for (i=0;in[i];i++) { + printf(" %02x", in[i]); + if (((i) & 15) == 15) + printf("\n"); + } + printf("\n"); + } +#endif + /* break the input into words */ type = WORD_ATOM; count = 0; @@ -1338,12 +1415,18 @@ header_encode_phrase(const unsigned char *in) nextl = g_list_next(wordl); while (nextl) { next = nextl->data; - /* merge nodes of the same (or lower?) type*/ - if (word->type == next->type || (next->type < word->type && word->type < WORD_2047) ) { - word->end = next->end; - words = g_list_remove_link(words, nextl); - g_free(next); - nextl = g_list_next(wordl); + /* merge nodes of the same type AND we are not creating too long a string */ + if (word->type == next->type) { + if (next->end - word->start < CAMEL_FOLD_PREENCODED) { + word->end = next->end; + words = g_list_remove_link(words, nextl); + g_free(next); + nextl = g_list_next(wordl); + } else { + /* if it is going to be too long, make sure we include the separating whitespace */ + word->end = next->start; + break; + } } else { break; } @@ -1377,7 +1460,12 @@ header_encode_phrase(const unsigned char *in) if (nextl) { int i; next = nextl->data; - for (i=next->start-word->end;i>0;i--) + /* if they are adjacent, it means we already had the spaces encoded internally, + so now we just need to output 1 space */ + i=next->start-word->end; + if (i==0) + i=1; + for (;i>0;i--) out = g_string_append_c(out, ' '); } @@ -1822,17 +1910,27 @@ header_decode_mailbox(const char **in) /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */ name = g_string_new(""); while (pre) { - char *text; + char *text, *last; - /* perform internationalised decoding, and appent */ + /* perform internationalised decoding, and append */ text = header_decode_string(pre); name = g_string_append(name, text); - g_free(pre); + last = pre; g_free(text); pre = header_decode_word(&inptr); - if (pre) - name = g_string_append_c(name, ' '); + if (pre) { + int l = strlen(last); + int p = strlen(pre); + /* dont append ' ' between sucsessive encoded words */ + if ((l>6 && last[l-2] == '?' && last[l-1] == '=') + && (p>6 && pre[0] == '=' && pre[1] == '?')) { + /* dont append ' ' */ + } else { + name = g_string_append_c(name, ' '); + } + } + g_free(last); } header_decode_lwsp(&inptr); if (*inptr == '<') { @@ -2999,21 +3097,45 @@ header_address_list_format(struct _header_address *a) } /* simple header folding */ -/* note: assumes the input has not already been folded */ +/* will work even if the header is already folded */ char * -header_fold(const char *in) +header_fold(const char *in, int headerlen) { int len, outlen, i; - const char *inptr = in, *space; + const char *inptr = in, *space, *p, *n; GString *out; char *ret; + int needunfold = FALSE; + + if (in == NULL) + return NULL; - len = strlen(in); - if (len <= CAMEL_FOLD_SIZE) + /* first, check to see if we even need to fold */ + len = headerlen + 2; + p = in; + while (*p) { + n = strchr(p, '\n'); + if (n == NULL) { + n = p+strlen(p); + } else { + needunfold = TRUE; + } + len += n-p; + + if (len >= CAMEL_FOLD_SIZE) + break; + len = 0; + p = n; + } + if (len < CAMEL_FOLD_SIZE) return g_strdup(in); + /* we need to fold, so first unfold (if we need to), then process */ + if (needunfold) + inptr = in = header_unfold(in); + out = g_string_new(""); - outlen = 0; + outlen = headerlen+2; while (*inptr) { space = strchr(inptr, ' '); if (space) { @@ -3021,7 +3143,9 @@ header_fold(const char *in) } else { len = strlen(inptr); } + printf("next word '%.*s'\n", len, inptr); if (outlen + len > CAMEL_FOLD_SIZE) { + printf("outlen = %d wordlen = %d\n", outlen, len); g_string_append(out, "\n\t"); outlen = 1; /* check for very long words, just cut them up */ @@ -3042,6 +3166,10 @@ header_fold(const char *in) } ret = out->str; g_string_free(out, FALSE); + + if (needunfold) + g_free((char *)in); + return ret; } diff --git a/camel/camel-mime-utils.h b/camel/camel-mime-utils.h index 9bcdb063ac..119eda9626 100644 --- a/camel/camel-mime-utils.h +++ b/camel/camel-mime-utils.h @@ -26,7 +26,7 @@ #include <time.h> /* maximum size of a line from header_fold() */ -#define CAMEL_FOLD_SIZE (72) +#define CAMEL_FOLD_SIZE (77) /* a list of references for this message */ struct _header_references { @@ -138,10 +138,11 @@ const char *header_raw_find(struct _header_raw **list, const char *name, int *of const char *header_raw_find_next(struct _header_raw **list, const char *name, int *ofset, const char *last); void header_raw_replace(struct _header_raw **list, const char *name, const char *value, int offset); void header_raw_remove(struct _header_raw **list, const char *name); +void header_raw_fold(struct _header_raw **list); void header_raw_clear(struct _header_raw **list); /* fold a header */ -char *header_fold(const char *in); +char *header_fold(const char *in, int headerlen); char *header_unfold(const char *in); /* decode a header which is a simple token */ diff --git a/camel/providers/smtp/camel-smtp-transport.c b/camel/providers/smtp/camel-smtp-transport.c index a4115c60e8..2d20cadce8 100644 --- a/camel/providers/smtp/camel-smtp-transport.c +++ b/camel/providers/smtp/camel-smtp-transport.c @@ -599,13 +599,11 @@ smtp_data (CamelSmtpTransport *transport, CamelMedium *message, gboolean has_8bi /* now we can actually send what's important :p */ gchar *cmdbuf, *respbuf = NULL; CamelStreamFilter *filtered_stream; - CamelMimeFilter *crlffilter, *lwfilter; - gint crlfid, lwid; - + CamelMimeFilter *crlffilter; /* if the message contains 8bit mime parts and the server doesn't support it, encode 8bit parts to the best - encoding. */ + encoding. This will also enforce an encoding to keep the lines in limit */ if (has_8bit_parts && !CAMEL_TRANSPORT (transport)->supports_8bit) camel_mime_message_encode_8bit_parts (CAMEL_MIME_MESSAGE (message)); @@ -641,12 +639,9 @@ smtp_data (CamelSmtpTransport *transport, CamelMedium *message, gboolean has_8bi respbuf = NULL; /* setup stream filtering */ - lwfilter = camel_mime_filter_linewrap_new (998, 998, '\t'); crlffilter = camel_mime_filter_crlf_new (CAMEL_MIME_FILTER_CRLF_ENCODE, CAMEL_MIME_FILTER_CRLF_MODE_CRLF_DOTS); - filtered_stream = camel_stream_filter_new_with_stream (transport->ostream); - lwid = camel_stream_filter_add (filtered_stream, CAMEL_MIME_FILTER (lwfilter)); - crlfid = camel_stream_filter_add (filtered_stream, CAMEL_MIME_FILTER (crlffilter)); + camel_stream_filter_add (filtered_stream, CAMEL_MIME_FILTER (crlffilter)); if (camel_data_wrapper_write_to_stream (CAMEL_DATA_WRAPPER (message), CAMEL_STREAM (filtered_stream)) == -1) { camel_exception_setv (ex, CAMEL_EXCEPTION_SYSTEM, @@ -654,15 +649,11 @@ smtp_data (CamelSmtpTransport *transport, CamelMedium *message, gboolean has_8bi "%s: mail not sent"), g_strerror (errno)); - camel_stream_filter_remove (filtered_stream, lwid); - camel_stream_filter_remove (filtered_stream, crlfid); camel_object_unref (CAMEL_OBJECT (filtered_stream)); return FALSE; } - camel_stream_filter_remove (filtered_stream, lwid); - camel_stream_filter_remove (filtered_stream, crlfid); camel_stream_flush (CAMEL_STREAM (filtered_stream)); camel_object_unref (CAMEL_OBJECT (filtered_stream)); |