diff options
-rw-r--r-- | camel/ChangeLog | 17 | ||||
-rw-r--r-- | camel/camel-data-wrapper.c | 1 | ||||
-rw-r--r-- | camel/camel-data-wrapper.h | 4 | ||||
-rw-r--r-- | camel/camel-mime-part-utils.c | 255 | ||||
-rw-r--r-- | camel/camel-mime-part.c | 8 |
5 files changed, 135 insertions, 150 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index 598ae85f18..e10d1fb943 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,20 @@ +2001-08-27 Not Zed <NotZed@Ximian.com> + + * camel-mime-part-utils.c (convert_buffer): re-read the iconv man + page, and treat the return value properly. It returns the number + of non-reversible conversions performed, not the number of output + characters, sigh. + (check_html_charset): Changed to just take a buffer of data, and + not the mime parser. + (simple_data_wrapper_construct_from_parser): Since we dont need + the charset till we have all the data, search for the charset + after we've read the data, if we have html data with no charset in + the header. + (simple_data_wrapper_construct_from_parser): Remove the + seekable_source stuff. + + * Re-apply patches from before. + 2001-08-25 Not Zed <NotZed@Ximian.com> ["Summarising" and "Synchronising" are spelt with a "s" in diff --git a/camel/camel-data-wrapper.c b/camel/camel-data-wrapper.c index dd2f475b1e..3a618ead6f 100644 --- a/camel/camel-data-wrapper.c +++ b/camel/camel-data-wrapper.c @@ -71,6 +71,7 @@ camel_data_wrapper_init (gpointer object, gpointer klass) camel_data_wrapper->mime_type = header_content_type_new ("application", "octet-stream"); camel_data_wrapper->offline = FALSE; + camel_data_wrapper->rawtext = FALSE; } static void diff --git a/camel/camel-data-wrapper.h b/camel/camel-data-wrapper.h index d256c5e842..9d7b62dd0c 100644 --- a/camel/camel-data-wrapper.h +++ b/camel/camel-data-wrapper.h @@ -48,7 +48,9 @@ struct _CamelDataWrapper CamelContentType *mime_type; CamelStream *stream; - gboolean offline; + + unsigned int offline:1; + unsigned int rawtext:1; }; typedef struct { diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c index d1c41377d1..a1024ae2b2 100644 --- a/camel/camel-mime-part-utils.c +++ b/camel/camel-mime-part-utils.c @@ -29,6 +29,7 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <errno.h> #include "string-utils.h" #include "camel-mime-part-utils.h" @@ -49,40 +50,16 @@ /* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */ static const char * -check_html_charset (CamelMimeParser *mp, CamelMimeFilterBasicType enctype) +check_html_charset(char *buffer, int length) { - const char *buf; - off_t offset; - int length; CamelHTMLParser *hp; const char *charset = NULL; camel_html_parser_t state; struct _header_content_type *ct; - CamelMimeFilterBasic *fdec = NULL; - - /* if we can't find the charset within the first 2k, we ain't gonna find it */ - offset = camel_mime_parser_tell(mp); - length = camel_mime_parser_read(mp, &buf, 2048); - - d(printf("Checking html for meta content-type: '%.*s'", len, buf)); - - if (length == 0) { - camel_mime_parser_seek(mp, offset, SEEK_SET); - return NULL; - } /* if we need to first base64/qp decode, do this here, sigh */ hp = camel_html_parser_new(); - if (enctype != 0) { - int dummy, len; - char *buffer; - - fdec = camel_mime_filter_basic_new_type(enctype); - camel_mime_filter_filter((CamelMimeFilter *)fdec, (char *)buf, length, 0, &buffer, &len, &dummy); - camel_html_parser_set_data(hp, buffer, len, TRUE); - } else { - camel_html_parser_set_data(hp, buf, length, TRUE); - } + camel_html_parser_set_data(hp, buffer, length, TRUE); do { const char *data; @@ -96,7 +73,7 @@ check_html_charset (CamelMimeParser *mp, CamelMimeFilterBasicType enctype) switch(state) { case CAMEL_HTML_PARSER_ELEMENT: val = camel_html_parser_tag(hp); - d(printf("Got tag: %s\n", tag)); + d(printf("Got tag: %s\n", val)); if (g_strcasecmp(val, "meta") == 0 && (val = camel_html_parser_attr(hp, "http-equiv")) && g_strcasecmp(val, "content-type") == 0 @@ -115,47 +92,85 @@ check_html_charset (CamelMimeParser *mp, CamelMimeFilterBasicType enctype) } while (charset == NULL && state != CAMEL_HTML_PARSER_EOF); camel_object_unref((CamelObject *)hp); - if (fdec) - camel_object_unref((CamelObject *)fdec); - - camel_mime_parser_seek(mp, offset, SEEK_SET); return charset; } +static GByteArray *convert_buffer(GByteArray *in, const char *to, const char *from) +{ + iconv_t ic; + int inlen, outlen, i=2; + char *inbuf, *outbuf; + char *buffer; + GByteArray *out = NULL; + + d(printf("converting buffer from %s to %s: '%.*s'\n", from, to, (int)in->len, in->data)); + + ic = iconv_open(to, from); + if (ic == (iconv_t) -1) { + g_warning("Cannot convert from '%s' to '%s': %s", from, to, strerror(errno)); + return NULL; + } + + do { + /* make plenty of space? */ + outlen = in->len * i + 16; + buffer = g_malloc(outlen); + + inbuf = in->data; + inlen = in->len; + outbuf = buffer; + + if (iconv(ic, (const char **)&inbuf, &inlen, &outbuf, &outlen) == -1) { + g_free(buffer); + g_warning("conversion failed: %s", strerror(errno)); + /* we didn't have enough space */ + if (errno == E2BIG && i<6) { + i++; + continue; + } + break; + } + + out = g_byte_array_new(); + g_byte_array_append(out, buffer, (in->len*i+16) - outlen); + + /* close off the conversion */ + outbuf = buffer; + outlen = in->len * i + 16; + if (iconv(ic, NULL, 0, &outbuf, &outlen) != -1) + g_byte_array_append(out, buffer, (in->len*i+16) - outlen); + g_free(buffer); + + d(printf("converted: '%.*s'\n", (int)out->len, out->data)); + + break; + } while (1); + + iconv_close(ic); + + return out; +} + /* simple data wrapper */ static void simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp) { - CamelMimeFilter *fdec = NULL, *fcrlf = NULL, *fch = NULL; - int len, decid = -1, crlfid = -1, chrid = -1; + CamelMimeFilter *fdec = NULL, *fcrlf = NULL; + int len, decid = -1, crlfid = -1; struct _header_content_type *ct; - CamelSeekableStream *seekable_source = NULL; - CamelStream *source; GByteArray *buffer; - off_t start = 0, end; char *encoding, *buf; + const char *charset = NULL; CamelMimeFilterBasicType enctype = 0; - + CamelStream *mem; + d(printf("constructing data-wrapper\n")); - /* Ok, try and be smart. If we're storing a small message (typical) convert it, - and store it in memory as we parse it ... if not, throw away the conversion - and scan till the end ... */ - - /* if we can't seek, dont have a stream/etc, then we must cache it */ - source = camel_mime_parser_stream (mp); - if (source) { - camel_object_ref ((CamelObject *)source); - if (CAMEL_IS_SEEKABLE_STREAM (source)) { - seekable_source = CAMEL_SEEKABLE_STREAM (source); - } - } - /* first, work out conversion, if any, required, we dont care about what we dont know about */ - encoding = header_content_encoding_decode (camel_mime_parser_header (mp, "content-transfer-encoding", NULL)); + encoding = header_content_encoding_decode(camel_mime_parser_header(mp, "content-transfer-encoding", NULL)); if (encoding) { - if (!strcasecmp (encoding, "base64")) { + if (!strcasecmp(encoding, "base64")) { d(printf("Adding base64 decoder ...\n")); enctype = CAMEL_MIME_FILTER_BASIC_BASE64_DEC; } else if (!strcasecmp(encoding, "quoted-printable")) { @@ -171,106 +186,61 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser } /* If we're doing text, we also need to do CRLF->LF and may have to convert it to UTF8 as well. */ - ct = camel_mime_parser_content_type (mp); - if (header_content_type_is (ct, "text", "*")) { - const char *charset = header_content_type_param (ct, "charset"); + ct = camel_mime_parser_content_type(mp); + if (header_content_type_is(ct, "text", "*")) { + charset = header_content_type_param(ct, "charset"); if (fdec) { d(printf("Adding CRLF conversion filter\n")); - fcrlf = (CamelMimeFilter *)camel_mime_filter_crlf_new (CAMEL_MIME_FILTER_CRLF_DECODE, - CAMEL_MIME_FILTER_CRLF_MODE_CRLF_ONLY); - crlfid = camel_mime_parser_filter_add (mp, fcrlf); - } - - /* Possible Lame Mailer Alert... check the META tags for a charset */ - if (!charset && header_content_type_is (ct, "text", "html")) - charset = check_html_charset (mp, enctype); - - /* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */ - if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) { - d(printf("Adding conversion filter from %s to UTF-8\n", charset)); - fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert (charset, "UTF-8"); - if (fch) { - chrid = camel_mime_parser_filter_add (mp, (CamelMimeFilter *)fch); - } else { - g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset); - } + fcrlf = (CamelMimeFilter *)camel_mime_filter_crlf_new(CAMEL_MIME_FILTER_CRLF_DECODE, + CAMEL_MIME_FILTER_CRLF_MODE_CRLF_ONLY); + crlfid = camel_mime_parser_filter_add(mp, fcrlf); } } - buffer = g_byte_array_new (); - - if (seekable_source /* !cache */) { - start = camel_mime_parser_tell (mp) + seekable_source->bound_start; - } - - while (camel_mime_parser_step (mp, &buf, &len) != HSCAN_BODY_END) { + /* read in the entire content */ + buffer = g_byte_array_new(); + while (camel_mime_parser_step(mp, &buf, &len) != HSCAN_BODY_END) { d(printf("appending o/p data: %d: %.*s\n", len, len, buf)); - if (buffer) { - if (buffer->len > 20480 && seekable_source) { - /* is this a 'big' message? Yes? We dont want to convert it all then. */ - camel_mime_parser_filter_remove (mp, decid); - camel_mime_parser_filter_remove (mp, chrid); - decid = -1; - chrid = -1; - g_byte_array_free (buffer, TRUE); - buffer = NULL; - } else { - g_byte_array_append (buffer, buf, len); - } - } + g_byte_array_append(buffer, buf, len); } - - if (buffer) { - CamelStream *mem; - - d(printf("Small message part, kept in memory!\n")); - - mem = camel_stream_mem_new_with_byte_array (buffer); - camel_data_wrapper_construct_from_stream (dw, mem); - camel_object_unref ((CamelObject *)mem); - } else { - CamelStream *sub; - CamelStreamFilter *filter; - - d(printf("Big message part, left on disk ...\n")); - - end = camel_mime_parser_tell (mp) + seekable_source->bound_start; - sub = camel_seekable_substream_new_with_seekable_stream_and_bounds (seekable_source, start, end); - if (fdec || fch) { - filter = camel_stream_filter_new_with_stream (sub); - if (fdec) { - camel_mime_filter_reset (fdec); - camel_stream_filter_add (filter, fdec); - } - if (fcrlf) { - camel_mime_filter_reset (fcrlf); - camel_stream_filter_add (filter, fcrlf); - } - if (fch) { - camel_mime_filter_reset (fch); - camel_stream_filter_add (filter, fch); - } - camel_data_wrapper_construct_from_stream (dw, (CamelStream *)filter); - camel_object_unref ((CamelObject *)filter); + + /* Possible Lame Mailer Alert... check the META tags for a charset */ + if (!charset && header_content_type_is (ct, "text", "html")) + charset = check_html_charset(buffer->data, buffer->len); + + /* if we need to do charset conversion, see if we can/it works/etc */ + if (charset && !(strcasecmp(charset, "us-ascii") == 0 + || strcasecmp(charset, "utf-8") == 0 + || strncasecmp(charset, "x-", 2) == 0)) { + GByteArray *out; + + out = convert_buffer(buffer, "UTF-8", charset); + if (out) { + /* converted ok, use this data instead */ + g_byte_array_free(buffer, TRUE); + buffer = out; } else { - camel_data_wrapper_construct_from_stream (dw, sub); + g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset); + /* else failed to convert, leave as raw? */ + dw->rawtext = TRUE; + /* should we change the content-type header? */ } - camel_object_unref ((CamelObject *)sub); } - - camel_mime_parser_filter_remove (mp, decid); - camel_mime_parser_filter_remove (mp, crlfid); - camel_mime_parser_filter_remove (mp, chrid); + + d(printf("message part kept in memory!\n")); + + mem = camel_stream_mem_new_with_byte_array(buffer); + camel_data_wrapper_construct_from_stream(dw, mem); + camel_object_unref((CamelObject *)mem); + + camel_mime_parser_filter_remove(mp, decid); + camel_mime_parser_filter_remove(mp, crlfid); if (fdec) - camel_object_unref ((CamelObject *)fdec); + camel_object_unref((CamelObject *)fdec); if (fcrlf) - camel_object_unref ((CamelObject *)fcrlf); - if (fch) - camel_object_unref ((CamelObject *)fch); - if (source) - camel_object_unref ((CamelObject *)source); + camel_object_unref((CamelObject *)fcrlf); } /* This replaces the data wrapper repository ... and/or could be replaced by it? */ @@ -294,10 +264,8 @@ camel_mime_part_construct_content_from_parser (CamelMimePart *dw, CamelMimeParse break; case HSCAN_MULTIPART: { CamelDataWrapper *bodypart; - -#ifndef NO_WARNINGS -#warning This should use a camel-mime-multipart -#endif + + /* FIXME: we should use a came-mime-mutlipart, not jsut a camel-multipart, but who cares */ d(printf("Creating multi-part\n")); content = (CamelDataWrapper *)camel_multipart_new (); @@ -321,9 +289,6 @@ camel_mime_part_construct_content_from_parser (CamelMimePart *dw, CamelMimeParse g_warning("Invalid state encountered???: %d", camel_mime_parser_state (mp)); } if (content) { -#ifndef NO_WARNINGS -#warning there just has got to be a better way ... to transfer the mime-type to the datawrapper -#endif /* would you believe you have to set this BEFORE you set the content object??? oh my god !!!! */ camel_data_wrapper_set_mime_type_field (content, camel_mime_part_get_content_type ((CamelMimePart *)dw)); diff --git a/camel/camel-mime-part.c b/camel/camel-mime-part.c index a6ab3e58d5..a4d9a2eb0d 100644 --- a/camel/camel-mime-part.c +++ b/camel/camel-mime-part.c @@ -606,10 +606,10 @@ write_to_stream(CamelDataWrapper *data_wrapper, CamelStream *stream) break; } - if (header_content_type_is (mp->content_type, "text", "*")) { - charset = header_content_type_param (mp->content_type, "charset"); - if (charset && !(!g_strcasecmp (charset, "us-ascii") || !g_strcasecmp (charset, "utf-8"))) { - charenc = (CamelMimeFilter *)camel_mime_filter_charset_new_convert ("UTF-8", charset); + if (!data_wrapper->rawtext && header_content_type_is(mp->content_type, "text", "*")) { + charset = header_content_type_param(mp->content_type, "charset"); + if (charset && !(!strcasecmp(charset, "us-ascii") || !strcasecmp(charset, "utf-8"))) { + charenc = (CamelMimeFilter *)camel_mime_filter_charset_new_convert("UTF-8", charset); } } |