From abada7e2cd02933caa7a2643c0771b3ee7a63cfe Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Thu, 20 Feb 2003 21:04:19 +0000 Subject: Call camel_iconv_init(). (camel_shutdown): Call camel_iconv_shutdown(). 2003-02-20 Jeffrey Stedfast * camel.c (camel_init): Call camel_iconv_init(). (camel_shutdown): Call camel_iconv_shutdown(). * camel-sasl-digest-md5.c (digest_response): Updated to use camel-iconv and the new camel-charset-map functions. * camel-mime-utils.c: Updated to use camel-iconv and the new camel-charset-map functions. * camel-mime-part-utils.c (check_html_charset): Use camel_charset_canonical_name() instead of e_iconv_charset_name() which is longer available. (convert_buffer): Use camel-iconv. (simple_data_wrapper_construct_from_parser): Since camel_charset_iso_to_windows() returns the charset in it's canonical format, no need to re-canonicalise it. * camel-mime-part.c (process_header): Use camel_charset_canonical_name() instead of e_iconv_charset_name() which is longer available. * camel-mime-message.c (process_header): Use camel_charset_canonical_name() instead of e_iconv_charset_name() which is longer available. * camel-mime-filter-charset.c: Use camel-iconv. * camel-folder-summary.c (message_info_new): Use camel_charset_canonical_name() instead of e_iconv_charset_name() which is longer available. (content_info_new): Use camel_charset_locale_name(). (camel_message_info_new_from_header): Same as message_info_new(). * camel-search-private.c: Use g_alloca() instead of alloca(). * camel-filter-search.c (check_header): Use camel_charset_canonical_name() instead of e_iconv_charset_name() which is longer available. * camel-charset-map.c (camel_charset_locale_name): New function, replaces e_iconv_locale_charset(). (camel_charset_canonical_name): New function, similar to e_iconv_charset_name() but instead of returning the iconv-friendly name, it returns the canonical name. (g_iconv will do the iconv-friendly name conversions for us). svn path=/trunk/; revision=19977 --- camel/camel-charset-map.c | 189 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 181 insertions(+), 8 deletions(-) (limited to 'camel/camel-charset-map.c') diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c index be57d882e8..5fcd490dde 100644 --- a/camel/camel-charset-map.c +++ b/camel/camel-charset-map.c @@ -3,9 +3,10 @@ /* * Authors: * Michael Zucchi + * Jeffrey Stedfast * Dan Winship * - * Copyright 2000, 2001 Ximian, Inc. (www.ximian.com) + * Copyright 2000, 2003 Ximian, Inc. (www.ximian.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -22,12 +23,15 @@ * USA */ + #ifdef HAVE_CONFIG_H #include #endif -#include #include +#include +#include +#include /* if you want to build the charset map, compile this with something like: @@ -200,16 +204,16 @@ int main (void) #include "camel-charset-map.h" #include "camel-charset-map-private.h" #include "string-utils.h" + +#include #include #include -#include #include -#include #ifdef ENABLE_THREADS #include #endif -#ifdef HAVE_ALLOCA_H -#include +#ifdef HAVE_CODESET +#include #endif void @@ -295,9 +299,179 @@ camel_charset_best (const char *in, int len) } +#ifdef G_THREADS_ENABLED +static GStaticMutex lock = G_STATIC_MUTEX_INIT; +#define LOCK() g_static_mutex_lock(&lock) +#define UNLOCK() g_static_mutex_unlock(&lock) +#else +#define LOCK() +#define UNLOCK() +#endif + +static char *locale_charset = NULL; +static GHashTable *canon_charsets = NULL; + +static void +canon_charsets_init (int keep) +{ + char *locale; + + LOCK (); + + if (canon_charsets != NULL) { + if (!keep) + UNLOCK (); + return; + } + + canon_charsets = g_hash_table_new (g_str_hash, g_str_equal); + + locale = setlocale (LC_ALL, NULL); + + if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) { + /* The locale "C" or "POSIX" is a portable locale; its + * LC_CTYPE part corresponds to the 7-bit ASCII character + * set. + */ + + locale_charset = NULL; + } else { +#ifdef HAVE_CODESET + locale_charset = g_strdup (nl_langinfo (CODESET)); + g_ascii_strdown (locale_charset, -1); +#else + /* A locale name is typically of the form language[_terri- + * tory][.codeset][@modifier], where language is an ISO 639 + * language code, territory is an ISO 3166 country code, and + * codeset is a character set or encoding identifier like + * ISO-8859-1 or UTF-8. + */ + char *codeset, *p; + + codeset = strchr (locale, '.'); + if (codeset) { + codeset++; + + /* ; is a hack for debian systems and / is a hack for Solaris systems */ + for (p = codeset; *p && !strchr ("@;/", *p); p++) + ; + locale_charset = g_strndup (codeset, p - codeset); + g_ascii_strdown (locale_charset, -1); + } else { + /* charset unknown */ + locale_charset = NULL; + } +#endif + } + + if (!keep) + UNLOCK (); +} + + +/** + * camel_charset_locale_name: + * + * Returns the name of the system's locale charset. + **/ +const char * +camel_charset_locale_name (void) +{ + canon_charsets_init (FALSE); + + return locale_charset; +} + + +/** + * camel_charset_canonical_name: + * @charset: charset to canonicalise + * + * Returns the charset in its canonical format. This is currently only + * needed for iso charsets but also handles canonicalisation of + * windows charsets. May need to expand this to handle canincalisation + * of more charsets in the future? + **/ +const char * +camel_charset_canonical_name (const char *charset) +{ + char *name, *canon, *tmp; + + if (charset == NULL) + return NULL; + + name = g_alloca (strlen (charset)); + strcpy (name, charset); + g_ascii_strdown (name, -1); + + canon_charsets_init (TRUE); + canon = g_hash_table_lookup (canon_charsets, name); + if (canon != NULL) { + UNLOCK (); + return canon; + } + + /* Unknown, try canonicalise some basic charset types to something that should work */ + if (strncmp (name, "iso", 3) == 0) { + /* Convert iso-nnnn-n or isonnnn-n or iso_nnnn-n to iso-nnnn-n or isonnnn-n */ + int iso, codepage; + char *p; + + tmp = name + 3; + if (*tmp == '-' || *tmp == '_') + tmp++; + + iso = strtoul (tmp, &p, 10); + + if (iso == 10646) { + /* they all become iso-10646 */ + canon = g_strdup ("iso-10646"); + } else { + /* iso-8859-# */ + tmp = p; + if (*tmp == '-' || *tmp == '_') + tmp++; + + codepage = strtoul (tmp, &p, 10); + + if (p > tmp) { + /* codepage is numeric */ + canon = g_strdup_printf ("iso-%d-%d", iso, codepage); + } else { + /* codepage is a string - probably iso-2022-jp or something */ + canon = g_strdup_printf ("iso-%d-%s", iso, p); + } + } + } else if (strncmp (name, "windows-", 8) == 0) { + /* Convert windows-#### and windows-cp#### to windows-cp#### */ + tmp = name + 8; + if (!strncmp (tmp, "cp", 2)) + tmp += 2; + canon = g_strdup_printf ("windows-cp%s", tmp); + } else if (strncmp (name, "microsoft-", 10) == 0) { + /* Convert microsoft-#### or microsoft-cp#### to windows-cp#### */ + tmp = name + 10; + if (!strncmp (tmp, "cp", 2)) + tmp += 2; + canon = g_strdup_printf ("windows-cp%s", tmp); + } else if (strncmp (name, "cp125", 5) == 0) { + /* Convert cp125# to windows-cp#### */ + canon = g_strdup_printf ("windows-%s", name); + } else { + /* Just assume its ok enough as is, case and all */ + canon = g_strdup (charset); + } + + g_hash_table_insert (canon_charsets, g_strdup (name), canon); + UNLOCK (); + + return canon; +} + + /** * camel_charset_iso_to_windows: - * @isocharset: an ISO charset + * @isocharset: a canonicalised ISO charset * * Returns the equivalent Windows charset. **/ @@ -351,4 +525,3 @@ camel_charset_iso_to_windows (const char *isocharset) } #endif /* !BUILD_MAP */ - -- cgit v1.2.3