aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-charset-map.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-charset-map.c')
-rw-r--r--camel/camel-charset-map.c189
1 files changed, 181 insertions, 8 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c
index be57d882e8..5fcd490dde 100644
--- a/camel/camel-charset-map.c
+++ b/camel/camel-charset-map.c
@@ -3,9 +3,10 @@
/*
* Authors:
* Michael Zucchi <notzed@ximian.com>
+ * Jeffrey Stedfast <fejj@ximian.com>
* Dan Winship <danw@ximian.com>
*
- * Copyright 2000, 2001 Ximian, Inc. (www.ximian.com)
+ * Copyright 2000, 2003 Ximian, Inc. (www.ximian.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -22,12 +23,15 @@
* USA
*/
+
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
-#include <errno.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
/*
if you want to build the charset map, compile this with something like:
@@ -200,16 +204,16 @@ int main (void)
#include "camel-charset-map.h"
#include "camel-charset-map-private.h"
#include "string-utils.h"
+
+#include <glib.h>
#include <glib/gunicode.h>
#include <locale.h>
-#include <string.h>
#include <ctype.h>
-#include <glib.h>
#ifdef ENABLE_THREADS
#include <pthread.h>
#endif
-#ifdef HAVE_ALLOCA_H
-#include <alloca.h>
+#ifdef HAVE_CODESET
+#include <langinfo.h>
#endif
void
@@ -295,9 +299,179 @@ camel_charset_best (const char *in, int len)
}
+#ifdef G_THREADS_ENABLED
+static GStaticMutex lock = G_STATIC_MUTEX_INIT;
+#define LOCK() g_static_mutex_lock(&lock)
+#define UNLOCK() g_static_mutex_unlock(&lock)
+#else
+#define LOCK()
+#define UNLOCK()
+#endif
+
+static char *locale_charset = NULL;
+static GHashTable *canon_charsets = NULL;
+
+static void
+canon_charsets_init (int keep)
+{
+ char *locale;
+
+ LOCK ();
+
+ if (canon_charsets != NULL) {
+ if (!keep)
+ UNLOCK ();
+ return;
+ }
+
+ canon_charsets = g_hash_table_new (g_str_hash, g_str_equal);
+
+ locale = setlocale (LC_ALL, NULL);
+
+ if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
+ /* The locale "C" or "POSIX" is a portable locale; its
+ * LC_CTYPE part corresponds to the 7-bit ASCII character
+ * set.
+ */
+
+ locale_charset = NULL;
+ } else {
+#ifdef HAVE_CODESET
+ locale_charset = g_strdup (nl_langinfo (CODESET));
+ g_ascii_strdown (locale_charset, -1);
+#else
+ /* A locale name is typically of the form language[_terri-
+ * tory][.codeset][@modifier], where language is an ISO 639
+ * language code, territory is an ISO 3166 country code, and
+ * codeset is a character set or encoding identifier like
+ * ISO-8859-1 or UTF-8.
+ */
+ char *codeset, *p;
+
+ codeset = strchr (locale, '.');
+ if (codeset) {
+ codeset++;
+
+ /* ; is a hack for debian systems and / is a hack for Solaris systems */
+ for (p = codeset; *p && !strchr ("@;/", *p); p++)
+ ;
+ locale_charset = g_strndup (codeset, p - codeset);
+ g_ascii_strdown (locale_charset, -1);
+ } else {
+ /* charset unknown */
+ locale_charset = NULL;
+ }
+#endif
+ }
+
+ if (!keep)
+ UNLOCK ();
+}
+
+
+/**
+ * camel_charset_locale_name:
+ *
+ * Returns the name of the system's locale charset.
+ **/
+const char *
+camel_charset_locale_name (void)
+{
+ canon_charsets_init (FALSE);
+
+ return locale_charset;
+}
+
+
+/**
+ * camel_charset_canonical_name:
+ * @charset: charset to canonicalise
+ *
+ * Returns the charset in its canonical format. This is currently only
+ * needed for iso charsets but also handles canonicalisation of
+ * windows charsets. May need to expand this to handle canincalisation
+ * of more charsets in the future?
+ **/
+const char *
+camel_charset_canonical_name (const char *charset)
+{
+ char *name, *canon, *tmp;
+
+ if (charset == NULL)
+ return NULL;
+
+ name = g_alloca (strlen (charset));
+ strcpy (name, charset);
+ g_ascii_strdown (name, -1);
+
+ canon_charsets_init (TRUE);
+ canon = g_hash_table_lookup (canon_charsets, name);
+ if (canon != NULL) {
+ UNLOCK ();
+ return canon;
+ }
+
+ /* Unknown, try canonicalise some basic charset types to something that should work */
+ if (strncmp (name, "iso", 3) == 0) {
+ /* Convert iso-nnnn-n or isonnnn-n or iso_nnnn-n to iso-nnnn-n or isonnnn-n */
+ int iso, codepage;
+ char *p;
+
+ tmp = name + 3;
+ if (*tmp == '-' || *tmp == '_')
+ tmp++;
+
+ iso = strtoul (tmp, &p, 10);
+
+ if (iso == 10646) {
+ /* they all become iso-10646 */
+ canon = g_strdup ("iso-10646");
+ } else {
+ /* iso-8859-# */
+ tmp = p;
+ if (*tmp == '-' || *tmp == '_')
+ tmp++;
+
+ codepage = strtoul (tmp, &p, 10);
+
+ if (p > tmp) {
+ /* codepage is numeric */
+ canon = g_strdup_printf ("iso-%d-%d", iso, codepage);
+ } else {
+ /* codepage is a string - probably iso-2022-jp or something */
+ canon = g_strdup_printf ("iso-%d-%s", iso, p);
+ }
+ }
+ } else if (strncmp (name, "windows-", 8) == 0) {
+ /* Convert windows-#### and windows-cp#### to windows-cp#### */
+ tmp = name + 8;
+ if (!strncmp (tmp, "cp", 2))
+ tmp += 2;
+ canon = g_strdup_printf ("windows-cp%s", tmp);
+ } else if (strncmp (name, "microsoft-", 10) == 0) {
+ /* Convert microsoft-#### or microsoft-cp#### to windows-cp#### */
+ tmp = name + 10;
+ if (!strncmp (tmp, "cp", 2))
+ tmp += 2;
+ canon = g_strdup_printf ("windows-cp%s", tmp);
+ } else if (strncmp (name, "cp125", 5) == 0) {
+ /* Convert cp125# to windows-cp#### */
+ canon = g_strdup_printf ("windows-%s", name);
+ } else {
+ /* Just assume its ok enough as is, case and all */
+ canon = g_strdup (charset);
+ }
+
+ g_hash_table_insert (canon_charsets, g_strdup (name), canon);
+ UNLOCK ();
+
+ return canon;
+}
+
+
/**
* camel_charset_iso_to_windows:
- * @isocharset: an ISO charset
+ * @isocharset: a canonicalised ISO charset
*
* Returns the equivalent Windows charset.
**/
@@ -351,4 +525,3 @@ camel_charset_iso_to_windows (const char *isocharset)
}
#endif /* !BUILD_MAP */
-