diff options
-rw-r--r-- | camel/ChangeLog | 31 | ||||
-rw-r--r-- | camel/camel-charset-map.c | 5 | ||||
-rw-r--r-- | camel/camel-folder-summary.c | 31 | ||||
-rw-r--r-- | camel/camel-mime-filter-charset.c | 17 | ||||
-rw-r--r-- | camel/camel-mime-message.c | 8 | ||||
-rw-r--r-- | camel/camel-mime-part-utils.c | 6 | ||||
-rw-r--r-- | camel/camel-mime-part.c | 41 | ||||
-rw-r--r-- | camel/camel-mime-utils.c | 122 | ||||
-rw-r--r-- | camel/camel-mime-utils.h | 20 |
9 files changed, 165 insertions, 116 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index 9119e3439c..f59951c452 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,5 +1,36 @@ 2001-07-19 Jeffrey Stedfast <fejj@ximian.com> + * camel-mime-filter-charset.c + (camel_mime_filter_charset_new_convert): Convert to the + iconv-friendly charset names. + + * providers/imap/camel-imap-store.c (create_folder): Fixed a + compiler warning about returning without a value in a non-void + function. Blah. + + * camel-mime-part.c (process_header): Pass the locale charset as + the default_charset to header_decode_string(). + + * camel-folder-summary.c (camel_folder_summary_format_string): + Pass the locale charset as the default_charset to + header_decode_string(). + (content_info_new): Same. + + * camel-mime-message.c (process_header): Pass the locale charset + as the default_charset to header_decode_string(). + + * camel-mime-utils.c (append_8bit): New helper function who's + purpose is similar to append_latin1() but for 8bit text that we + are assuming is not latin1. + (header_decode_text): Now takes a default_charset parameter and + calls append_8bit when appropriate. + (header_decode_string): Also takes a default_charset parameter + now. + (header_decode_mailbox): Pass NULL as the default_charset to + header_decode_string(). + +2001-07-19 Jeffrey Stedfast <fejj@ximian.com> + * camel-pgp-context.c (pgp_verify): Modified to treat the return value from camel_charset_locale_name() as a const char*. diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c index d03da27c61..c007658553 100644 --- a/camel/camel-charset-map.c +++ b/camel/camel-charset-map.c @@ -405,17 +405,18 @@ camel_charset_get_iconv_friendly_name (const char *name) new_charset[3] = '-'; memcpy (new_charset + 4, name + 3, len - 3); new_charset[len + 1] = '\0'; - g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset); } else { /* *shrug* - add it to the hash table just the way it is? */ new_charset = g_strdup (name); - g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset); } + g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset); charset = new_charset; } ICONV_CHARSETS_UNLOCK (); + g_warning ("camel_charset_get_iconv_friendly_name (\"%s\") => \"%s\"", name, charset); + return charset; } diff --git a/camel/camel-folder-summary.c b/camel/camel-folder-summary.c index 120d927766..ac525dd619 100644 --- a/camel/camel-folder-summary.c +++ b/camel/camel-folder-summary.c @@ -1375,15 +1375,16 @@ camel_folder_summary_format_address(struct _header_raw *h, const char *name) } char * -camel_folder_summary_format_string(struct _header_raw *h, const char *name) +camel_folder_summary_format_string (struct _header_raw *h, const char *name) { - const char *text; - - text = header_raw_find(&h, name, NULL); + const char *charset, *text; + + text = header_raw_find (&h, name, NULL); if (text) { - while (isspace(*text)) + while (isspace ((unsigned) *text)) text++; - return header_decode_string(text); + charset = camel_charset_locale_name (); + return header_decode_string (text, charset); } else { return NULL; } @@ -1695,16 +1696,18 @@ message_info_free(CamelFolderSummary *s, CamelMessageInfo *mi) } static CamelMessageContentInfo * -content_info_new(CamelFolderSummary *s, struct _header_raw *h) +content_info_new (CamelFolderSummary *s, struct _header_raw *h) { CamelMessageContentInfo *ci; - - ci = camel_folder_summary_content_info_new(s); - - ci->id = header_msgid_decode(header_raw_find(&h, "content-id", NULL)); - ci->description = header_decode_string(header_raw_find(&h, "content-description", NULL)); - ci->encoding = header_content_encoding_decode(header_raw_find(&h, "content-transfer-encoding", NULL)); - + const char *charset; + + ci = camel_folder_summary_content_info_new (s); + + charset = camel_charset_locale_name (); + ci->id = header_msgid_decode (header_raw_find (&h, "content-id", NULL)); + ci->description = header_decode_string (header_raw_find (&h, "content-description", NULL), NULL); + ci->encoding = header_content_encoding_decode (header_raw_find (&h, "content-transfer-encoding", NULL)); + return ci; } diff --git a/camel/camel-mime-filter-charset.c b/camel/camel-mime-filter-charset.c index 808e4064a7..d3fb126970 100644 --- a/camel/camel-mime-filter-charset.c +++ b/camel/camel-mime-filter-charset.c @@ -25,6 +25,7 @@ #include <errno.h> #include "camel-mime-filter-charset.h" +#include "camel-charset-map.h" #define d(x) @@ -226,18 +227,22 @@ camel_mime_filter_charset_new (void) } CamelMimeFilterCharset * -camel_mime_filter_charset_new_convert(const char *from_charset, const char *to_charset) +camel_mime_filter_charset_new_convert (const char *from_charset, const char *to_charset) { CamelMimeFilterCharset *new = CAMEL_MIME_FILTER_CHARSET (camel_object_new (camel_mime_filter_charset_get_type ())); - - new->ic = iconv_open(to_charset, from_charset); + + from_charset = camel_charset_get_iconv_friendly_name (from_charset); + to_charset = camel_charset_get_iconv_friendly_name (to_charset); + + new->ic = iconv_open (to_charset, from_charset); if (new->ic == (iconv_t) -1) { g_warning("Cannot create charset conversion from %s to %s: %s", from_charset, to_charset, strerror(errno)); - camel_object_unref((CamelObject *)new); + camel_object_unref ((CamelObject *)new); new = NULL; } else { - new->from = g_strdup(from_charset); - new->to = g_strdup(to_charset); + new->from = g_strdup (from_charset); + new->to = g_strdup (to_charset); } + return new; } diff --git a/camel/camel-mime-message.c b/camel/camel-mime-message.c index 8b42765656..cbae7fb3f1 100644 --- a/camel/camel-mime-message.c +++ b/camel/camel-mime-message.c @@ -513,6 +513,7 @@ process_header (CamelMedium *medium, const char *header_name, const char *header CamelHeaderType header_type; CamelMimeMessage *message = CAMEL_MIME_MESSAGE (medium); CamelInternetAddress *addr; + const char *charset; header_type = (CamelHeaderType)g_hash_table_lookup (header_name_table, header_name); switch (header_type) { @@ -529,8 +530,9 @@ process_header (CamelMedium *medium, const char *header_name, const char *header camel_address_decode (CAMEL_ADDRESS (message->reply_to), header_value); break; case HEADER_SUBJECT: - g_free(message->subject); - message->subject = g_strstrip (header_decode_string (header_value)); + g_free (message->subject); + charset = camel_charset_locale_name (); + message->subject = g_strstrip (header_decode_string (header_value, charset)); break; case HEADER_TO: case HEADER_CC: @@ -719,7 +721,7 @@ find_best_encoding (CamelMimePart *part, CamelBestencRequired required, CamelBes if (istext) charsetin = camel_mime_filter_bestenc_get_best_charset (bestenc); - d(printf("charsetin = %s\n", charsetin)); + d(printf("charsetin = %s\n", charsetin ? charsetin : "(null)")); /* if we have US-ASCII, or we're not doing text, we dont need to bother with the rest */ if (charsetin != NULL && (required & CAMEL_BESTENC_GET_CHARSET) != 0) { diff --git a/camel/camel-mime-part-utils.c b/camel/camel-mime-part-utils.c index 9467efc003..1b674cf653 100644 --- a/camel/camel-mime-part-utils.c +++ b/camel/camel-mime-part-utils.c @@ -88,9 +88,9 @@ check_html_charset (CamelMimeParser *mp, CamelMimeFilterBasicType enctype) const char *data; int len; const char *val; - + state = camel_html_parser_step(hp, &data, &len); - + /* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */ switch(state) { @@ -184,7 +184,7 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser /* Possible Lame Mailer Alert... check the META tags for a charset */ if (!charset && header_content_type_is (ct, "text", "html")) - charset = check_html_charset(mp, enctype); + charset = check_html_charset (mp, enctype); /* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */ if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) { diff --git a/camel/camel-mime-part.c b/camel/camel-mime-part.c index 733d08c719..a6ab3e58d5 100644 --- a/camel/camel-mime-part.c +++ b/camel/camel-mime-part.c @@ -40,6 +40,7 @@ #include "camel-mime-part.h" #include "camel-mime-part-utils.h" #include "camel-exception.h" +#include "camel-charset-map.h" #include "string-utils.h" #define d(x) /*(printf("%s(%d): ", __FILE__, __LINE__),(x))*/ @@ -200,6 +201,7 @@ process_header(CamelMedium *medium, const char *header_name, const char *header_ { CamelMimePart *mime_part = CAMEL_MIME_PART (medium); CamelHeaderType header_type; + const char *charset; char *text; /* Try to parse the header pair. If it corresponds to something */ @@ -209,30 +211,29 @@ process_header(CamelMedium *medium, const char *header_name, const char *header_ header_type = (CamelHeaderType) g_hash_table_lookup (header_name_table, header_name); switch (header_type) { case HEADER_DESCRIPTION: /* raw header->utf8 conversion */ - text = header_decode_string(header_value); - g_free(mime_part->description); - mime_part->description = g_strstrip (text); + g_free (mime_part->description); + charset = camel_charset_locale_name (); + mime_part->description = g_strstrip (header_decode_string (header_value, charset)); break; case HEADER_DISPOSITION: - set_disposition(mime_part, header_value); + set_disposition (mime_part, header_value); break; case HEADER_CONTENT_ID: - text = header_msgid_decode(header_value); - g_free(mime_part->content_id); - mime_part->content_id = text; + g_free (mime_part->content_id); + mime_part->content_id = header_msgid_decode (header_value); break; case HEADER_ENCODING: - text = header_token_decode(header_value); + text = header_token_decode (header_value); mime_part->encoding = camel_mime_part_encoding_from_string (text); - g_free(text); + g_free (text); break; case HEADER_CONTENT_MD5: - g_free(mime_part->content_MD5); - mime_part->content_MD5 = g_strdup(header_value); + g_free (mime_part->content_MD5); + mime_part->content_MD5 = g_strdup (header_value); break; case HEADER_CONTENT_LOCATION: - g_free(mime_part->content_location); - mime_part->content_location = header_location_decode(header_value); + g_free (mime_part->content_location); + mime_part->content_location = header_location_decode (header_value); break; case HEADER_CONTENT_TYPE: if (mime_part->content_type) @@ -298,7 +299,7 @@ get_headers (CamelMedium *medium) headers = g_array_new (FALSE, FALSE, sizeof (CamelMediumHeader)); for (h = part->headers; h; h = h->next) { header.name = h->name; - header.value = header_decode_string (h->value); + header.value = header_decode_string (h->value, NULL); g_array_append_val (headers, header); } @@ -604,14 +605,14 @@ write_to_stream(CamelDataWrapper *data_wrapper, CamelStream *stream) default: break; } - - if (header_content_type_is(mp->content_type, "text", "*")) { - charset = header_content_type_param(mp->content_type, "charset"); - if (!(charset == NULL || !strcasecmp(charset, "us-ascii") || !strcasecmp(charset, "utf-8"))) { - charenc = (CamelMimeFilter *)camel_mime_filter_charset_new_convert("utf-8", charset); + + if (header_content_type_is (mp->content_type, "text", "*")) { + charset = header_content_type_param (mp->content_type, "charset"); + if (charset && !(!g_strcasecmp (charset, "us-ascii") || !g_strcasecmp (charset, "utf-8"))) { + charenc = (CamelMimeFilter *)camel_mime_filter_charset_new_convert ("UTF-8", charset); } } - + if (filter || charenc) { filter_stream = camel_stream_filter_new_with_stream(stream); diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index ba704b19dc..594c1de05e 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -1004,32 +1004,63 @@ g_string_append_len(GString *st, const char *s, int l) according to the rfc's. Anyway, since the conversion to utf-8 is trivial, just do it here without iconv */ static GString * -append_latin1(GString *out, const char *in, int len) +append_latin1 (GString *out, const char *in, int len) { unsigned int c; - + while (len) { c = (unsigned int)*in++; len--; if (c & 0x80) { - out = g_string_append_c(out, 0xc0 | ((c>>6) & 0x3)); /* 110000xx */ - out = g_string_append_c(out, 0x80 | (c&0x3f)); /* 10xxxxxx */ + out = g_string_append_c (out, 0xc0 | ((c >> 6) & 0x3)); /* 110000xx */ + out = g_string_append_c (out, 0x80 | (c & 0x3f)); /* 10xxxxxx */ } else { - out = g_string_append_c(out, c); + out = g_string_append_c (out, c); } } return out; } +static void +append_8bit (GString *out, const char *inbuf, int inlen, const char *default_charset) +{ + char *outbase, *outbuf; + int outlen; + iconv_t ic; + + ic = iconv_open ("UTF-8", default_charset); + if (ic != (iconv_t) -1) { + int ret; + + outlen = inlen * 6 + 16; + outbuf = outbase = g_malloc (outlen); + + ret = iconv (ic, &inbuf, &inlen, &outbuf, &outlen); + if (ret >= 0) { + iconv (ic, NULL, 0, &outbuf, &outlen); + *outbuf = '\0'; + } + + iconv_close (ic); + + /* FIXME: is outlen == strlen (outbuf) ?? */ + g_string_append_len (out, outbase, strlen (outbase)); + } else { + /* bah, completely broken...just append as raw text */ + g_string_append_len (out, inbuf, inlen); + } +} + /* decodes a simple text, rfc822 */ static char * -header_decode_text (const char *in, int inlen) +header_decode_text (const char *in, int inlen, const char *default_charset) { GString *out; char *inptr, *inend, *start, *word_start; char *decoded; gboolean wasdword = FALSE; gboolean wasspace = FALSE; + gboolean islatin1 = FALSE; out = g_string_new (""); start = inptr = (char *) in; @@ -1056,8 +1087,12 @@ header_decode_text (const char *in, int inlen) g_string_append (out, dword); g_free (dword); wasdword = TRUE; + } else if (islatin1 || !default_charset) { + /* append_latin1 is safe for 7bit ascii too */ + append_latin1 (out, start, inptr - start - 1); + wasdword = FALSE; } else { - out = append_latin1 (out, start, inptr - start - 1); + append_8bit (out, start, inptr - start - 1, default_charset); wasdword = FALSE; } @@ -1068,6 +1103,11 @@ header_decode_text (const char *in, int inlen) wasspace = FALSE; if (!word_start) word_start = inptr - 1; + + if (c & 0x80 || c <= 127) + islatin1 = TRUE; + else + islatin1 = FALSE; } } @@ -1087,8 +1127,11 @@ header_decode_text (const char *in, int inlen) g_string_append (out, dword); g_free (dword); + } else if (islatin1 || !default_charset) { + /* append_latin1 is safe for 7bit ascii too */ + append_latin1 (out, start, inptr - start); } else { - out = append_latin1 (out, start, inptr - start); + append_8bit (out, start, inptr - start, default_charset); } } @@ -1098,49 +1141,12 @@ header_decode_text (const char *in, int inlen) return decoded; } -#if 0 /* This is broken */ - -/* so in what way is it broken? */ - -/* decodes a simple text, rfc822 */ -static char * -header_decode_text(const char *in, int inlen) -{ - GString *out; - const char *inptr = in; - const char *inend = in+inlen; - char *encstart, *encend; - char *decword; - - out = g_string_new(""); - while ( (encstart = strstr(inptr, "=?")) - && (encend = strstr(encstart+2, "?=")) ) { - - decword = rfc2047_decode_word(encstart, encend-encstart+2); - if (decword) { - out = g_string_append_len(out, inptr, encstart-inptr); - out = g_string_append_len(out, decword, strlen(decword)); - g_free (decword); - } else { - out = append_latin1(out, inptr, encend-inptr+2); - } - inptr = encend+2; - } - out = append_latin1(out, inptr, inend-inptr); - - encstart = out->str; - g_string_free(out, FALSE); - - return encstart; -} -#endif - char * -header_decode_string(const char *in) +header_decode_string (const char *in, const char *default_charset) { if (in == NULL) return NULL; - return header_decode_text(in, strlen(in)); + return header_decode_text (in, strlen (in), default_charset); } /* how long a sequence of pre-encoded words should be less than, to attempt to @@ -2248,13 +2254,13 @@ header_decode_mailbox(const char **in) header_decode_lwsp(&inptr); if (!(*inptr == '.' || *inptr == '@' || *inptr==',' || *inptr=='\0')) { /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */ - name = g_string_new(""); + name = g_string_new (""); while (pre) { char *text, *last; /* perform internationalised decoding, and append */ - text = header_decode_string(pre); - name = g_string_append(name, text); + text = header_decode_string (pre, NULL); + g_string_append (name, text); last = pre; g_free(text); @@ -2362,19 +2368,19 @@ header_decode_mailbox(const char **in) if (comend > comstart) { d(printf(" looking at subset '%.*s'\n", comend-comstart, comstart)); - tmp = g_strndup(comstart, comend-comstart); - text = header_decode_string(tmp); - name = g_string_new(text); - g_free(tmp); - g_free(text); + tmp = g_strndup (comstart, comend-comstart); + text = header_decode_string (tmp, NULL); + name = g_string_new (text); + g_free (tmp); + g_free (text); } } } - + *in = inptr; - + if (addr->len > 0) { - address = header_address_new_name(name?name->str:"", addr->str); + address = header_address_new_name(name ? name->str : "", addr->str); } g_string_free(addr, TRUE); diff --git a/camel/camel-mime-utils.h b/camel/camel-mime-utils.h index c32485d000..899fffbae5 100644 --- a/camel/camel-mime-utils.h +++ b/camel/camel-mime-utils.h @@ -145,26 +145,26 @@ void header_raw_clear(struct _header_raw **list); char *header_raw_check_mailing_list(struct _header_raw **list); /* fold a header */ -char *header_address_fold(const char *in, int headerlen); -char *header_fold(const char *in, int headerlen); -char *header_unfold(const char *in); +char *header_address_fold (const char *in, int headerlen); +char *header_fold (const char *in, int headerlen); +char *header_unfold (const char *in); /* decode a header which is a simple token */ -char *header_token_decode(const char *in); +char *header_token_decode (const char *in); /* decode/encode a string type, like a subject line */ -char *header_decode_string(const char *in); -char *header_encode_string(const unsigned char *in); +char *header_decode_string (const char *in, const char *default_charset); +char *header_encode_string (const unsigned char *in); /* encode a phrase, like the real name of an address */ -char *header_encode_phrase(const unsigned char *in); +char *header_encode_phrase (const unsigned char *in); /* decode an email date field into a GMT time, + optional offset */ -time_t header_decode_date(const char *in, int *saveoffset); -char *header_format_date(time_t time, int offset); +time_t header_decode_date (const char *in, int *saveoffset); +char *header_format_date (time_t time, int offset); /* decode a message id */ -char *header_msgid_decode(const char *in); +char *header_msgid_decode (const char *in); /* generate msg id */ char *header_msgid_generate (void); |