aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-mime-part-utils.c
diff options
context:
space:
mode:
authorJeffrey Stedfast <fejj@ximian.com>2001-12-18 09:28:27 +0800
committerJeffrey Stedfast <fejj@src.gnome.org>2001-12-18 09:28:27 +0800
commitf6408daa103092f18789a719a4123224b259f71f (patch)
tree838b491516e1b3669428136d73019aa9afe5f2c3 /camel/camel-mime-part-utils.c
parent13299ab7e073cf4d412cec019e4240a7634c1cf5 (diff)
downloadgsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.gz
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.bz2
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.lz
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.xz
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.tar.zst
gsoc2013-evolution-f6408daa103092f18789a719a4123224b259f71f.zip
New function to map ISO charsets to the Windows charsets.
2001-12-17 Jeffrey Stedfast <fejj@ximian.com> * camel-charset-map.c (camel_charset_iso_to_windows): New function to map ISO charsets to the Windows charsets. * camel-mime-part-utils.c (broken_windows_charset): Detect Windows charsets. (simple_data_wrapper_construct_from_parser): Simplify a tad and also check for iso-8859-* charsets that are really Windows charsets. Fixes bug #12631. svn path=/trunk/; revision=15144
Diffstat (limited to 'camel/camel-mime-part-utils.c')
-rw-r--r--camel/camel-mime-part-utils.c75
1 files changed, 49 insertions, 26 deletions
diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c
index 65c99c6dc8..08787df2cd 100644
--- a/camel/camel-mime-part-utils.c
+++ b/camel/camel-mime-part-utils.c
@@ -155,6 +155,28 @@ convert_buffer (GByteArray *in, const char *to, const char *from)
return out;
}
+/* We don't really use the charset argument except for debugging... */
+static gboolean
+broken_windows_charset (GByteArray *buffer, const char *charset)
+{
+ register unsigned char *inptr;
+ unsigned char *inend;
+
+ inptr = buffer->data;
+ inend = inptr + buffer->len;
+
+ while (inptr < inend) {
+ register unsigned char c = *inptr++;
+
+ if (c >= 128 && c <= 159) {
+ g_warning ("Encountered Windows charset parading as %s", charset);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
static gboolean
is_7bit (GByteArray *buffer)
{
@@ -172,33 +194,24 @@ static void
simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp)
{
CamelMimeFilter *fdec = NULL, *fcrlf = NULL;
+ CamelMimeFilterBasicType enctype;
int len, decid = -1, crlfid = -1;
struct _header_content_type *ct;
+ const char *charset = NULL;
GByteArray *buffer;
char *encoding, *buf;
- const char *charset = NULL;
- CamelMimeFilterBasicType enctype = 0;
CamelStream *mem;
-
- d(printf("constructing data-wrapper\n"));
+
+ d(printf ("simple_data_wrapper_construct_from_parser()\n"));
/* first, work out conversion, if any, required, we dont care about what we dont know about */
- encoding = header_content_encoding_decode(camel_mime_parser_header(mp, "content-transfer-encoding", NULL));
+ encoding = header_content_encoding_decode (camel_mime_parser_header (mp, "Content-Transfer-Encoding", NULL));
if (encoding) {
- if (!strcasecmp(encoding, "base64")) {
- d(printf("Adding base64 decoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_BASE64_DEC;
- } else if (!strcasecmp(encoding, "quoted-printable")) {
- d(printf("Adding quoted-printable decoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_QP_DEC;
- } else if (!strcasecmp (encoding, "x-uuencode")) {
- d(printf("Adding uudecoder ...\n"));
- enctype = CAMEL_MIME_FILTER_BASIC_UU_DEC;
- }
+ enctype = camel_mime_part_encoding_from_string (encoding);
g_free (encoding);
- if (enctype != 0) {
- fdec = (CamelMimeFilter *)camel_mime_filter_basic_new_type(enctype);
+ if (enctype != CAMEL_MIME_PART_ENCODING_DEFAULT) {
+ fdec = (CamelMimeFilter *) camel_mime_filter_basic_new_type (enctype);
decid = camel_mime_parser_filter_add (mp, fdec);
}
}
@@ -229,21 +242,32 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
charset = check_html_charset(buffer->data, buffer->len);
/* if we need to do charset conversion, see if we can/it works/etc */
- if (charset && !(strcasecmp(charset, "us-ascii") == 0
- || strcasecmp(charset, "utf-8") == 0
- || strncasecmp(charset, "x-", 2) == 0)) {
+ if (charset && !(strcasecmp (charset, "us-ascii") == 0
+ || strcasecmp (charset, "utf-8") == 0
+ || strncasecmp (charset, "x-", 2) == 0)) {
GByteArray *out;
- out = convert_buffer(buffer, "UTF-8", charset);
+ /* You often see Microsoft Windows users announcing their texts
+ * as being in ISO-8859-1 even when in fact they contain funny
+ * characters from the Windows-CP1252 superset.
+ */
+ if (!strncasecmp (charset, "iso-8859", 8)) {
+ /* check for Windows-specific chars... */
+ if (broken_windows_charset (buffer, charset)) {
+ charset = camel_charset_iso_to_windows (charset);
+ charset = e_iconv_charset_name (charset);
+ }
+ }
+
+ out = convert_buffer (buffer, "UTF-8", charset);
if (out) {
/* converted ok, use this data instead */
g_byte_array_free(buffer, TRUE);
buffer = out;
} else {
- g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);
/* else failed to convert, leave as raw? */
+ g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);
dw->rawtext = TRUE;
- /* should we change the content-type header? */
}
} else if (header_content_type_is (ct, "text", "*")) {
if (charset == NULL) {
@@ -258,10 +282,9 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
dw->rawtext = !g_utf8_validate (buffer->data, buffer->len, NULL);
}
}
-
-
+
d(printf("message part kept in memory!\n"));
-
+
mem = camel_stream_mem_new_with_byte_array(buffer);
camel_data_wrapper_construct_from_stream(dw, mem);
camel_object_unref((CamelObject *)mem);