aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-iconv.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-iconv.c')
-rw-r--r--camel/camel-iconv.c536
1 files changed, 285 insertions, 251 deletions
diff --git a/camel/camel-iconv.c b/camel/camel-iconv.c
index 9bf614d148..3f1708c18b 100644
--- a/camel/camel-iconv.c
+++ b/camel/camel-iconv.c
@@ -1,6 +1,7 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Authors: Jeffrey Stedfast <fejj@ximian.com>
+ * Michael Zucchi <notzed@ximian.com>
*
* Copyright 2003 Ximian, Inc. (www.ximian.com)
*
@@ -31,25 +32,58 @@
#include <string.h>
#include <errno.h>
+#include <locale.h>
+
+#ifdef HAVE_CODESET
+#include <langinfo.h>
+#endif
+
#include "e-util/e-memory.h"
-#include "camel/camel-charset-map.h"
+#include "camel-charset-map.h"
+#include "string-utils.h"
#include "camel-iconv.h"
+#define d(x)
-#define ICONV_CACHE_SIZE (16)
+#ifdef G_THREADS_ENABLED
+static GStaticMutex lock = G_STATIC_MUTEX_INIT;
+#define LOCK() g_static_mutex_lock (&lock)
+#define UNLOCK() g_static_mutex_unlock (&lock)
+#else
+#define LOCK()
+#define UNLOCK()
+#endif
+
+
+struct _iconv_cache_node {
+ struct _iconv_cache_node *next;
+ struct _iconv_cache_node *prev;
+
+ struct _iconv_cache *parent;
-struct _iconv_cache_bucket {
- struct _iconv_cache_bucket *next;
- struct _iconv_cache_bucket *prev;
- guint32 refcount:31;
- guint32 used:1;
+ int busy;
iconv_t cd;
- char *key;
};
+struct _iconv_cache {
+ struct _iconv_cache *next;
+ struct _iconv_cache *prev;
+
+ char *conv;
+
+ EDList open; /* stores iconv_cache_nodes, busy ones up front */
+};
+
+#define ICONV_CACHE_SIZE (16)
-/* a useful website on charset alaises:
- * http://www.li18nux.org/subgroups/sa/locnameguide/v1.1draft/CodesetAliasTable-V11.html */
+static EDList iconv_cache_list;
+static GHashTable *iconv_cache;
+static GHashTable *iconv_cache_open;
+static unsigned int iconv_cache_size = 0;
+
+static GHashTable *iconv_charsets = NULL;
+static char *locale_charset = NULL;
+static char *locale_lang = NULL;
struct {
char *charset;
@@ -113,190 +147,82 @@ struct {
};
-static GHashTable *iconv_charsets;
-
-static EMemChunk *cache_chunk;
-static struct _iconv_cache_bucket *iconv_cache_buckets;
-static GHashTable *iconv_cache;
-static GHashTable *iconv_open_hash;
-static unsigned int iconv_cache_size = 0;
-
-#ifdef G_THREADS_ENABLED
-static GStaticMutex iconv_cache_lock = G_STATIC_MUTEX_INIT;
-static GStaticMutex iconv_charset_lock = G_STATIC_MUTEX_INIT;
-#define ICONV_CACHE_LOCK() g_static_mutex_lock (&iconv_cache_lock)
-#define ICONV_CACHE_UNLOCK() g_static_mutex_unlock (&iconv_cache_lock)
-#define ICONV_CHARSET_LOCK() g_static_mutex_lock (&iconv_charset_lock)
-#define ICONV_CHARSET_UNLOCK() g_static_mutex_unlock (&iconv_charset_lock)
-#else
-#define ICONV_CACHE_LOCK()
-#define ICONV_CACHE_UNLOCK()
-#define ICONV_CHARSET_LOCK()
-#define ICONV_CHARSET_UNLOCK()
-#endif /* G_THREADS_ENABLED */
-
-
-/* caller *must* hold the iconv_cache_lock to call any of the following functions */
-
-
-/**
- * iconv_cache_bucket_new:
- * @key: cache key
- * @cd: iconv descriptor
- *
- * Creates a new cache bucket, inserts it into the cache and
- * increments the cache size.
- *
- * Returns a pointer to the newly allocated cache bucket.
- **/
-static struct _iconv_cache_bucket *
-iconv_cache_bucket_new (const char *key, iconv_t cd)
-{
- struct _iconv_cache_bucket *bucket;
-
- bucket = e_memchunk_alloc (cache_chunk);
- bucket->next = NULL;
- bucket->prev = NULL;
- bucket->key = g_strdup (key);
- bucket->refcount = 1;
- bucket->used = TRUE;
- bucket->cd = cd;
-
- g_hash_table_insert (iconv_cache, bucket->key, bucket);
-
- /* FIXME: Since iconv_cache_expire_unused() traverses the list
- from head to tail, perhaps it might be better to append new
- nodes rather than prepending? This way older cache buckets
- expire first? */
- bucket->next = iconv_cache_buckets;
- iconv_cache_buckets = bucket;
-
- iconv_cache_size++;
-
- return bucket;
-}
-
-
-/**
- * iconv_cache_bucket_expire:
- * @bucket: cache bucket
- *
- * Expires a single cache bucket @bucket. This should only ever be
- * called on a bucket that currently has no used iconv descriptors
- * open.
- **/
-static void
-iconv_cache_bucket_expire (struct _iconv_cache_bucket *bucket)
-{
- g_hash_table_remove (iconv_cache, bucket->key);
-
- if (bucket->prev) {
- bucket->prev->next = bucket->next;
- if (bucket->next)
- bucket->next->prev = bucket->prev;
- } else {
- iconv_cache_buckets = bucket->next;
- if (bucket->next)
- bucket->next->prev = NULL;
- }
-
- g_free (bucket->key);
- g_iconv_close (bucket->cd);
- e_memchunk_free (cache_chunk, bucket);
-
- iconv_cache_size--;
-}
-
-
-/**
- * iconv_cache_expire_unused:
- *
- * Expires as many unused cache buckets as it needs to in order to get
- * the total number of buckets < ICONV_CACHE_SIZE.
- **/
-static void
-iconv_cache_expire_unused (void)
-{
- struct _iconv_cache_bucket *bucket, *next;
-
- bucket = iconv_cache_buckets;
- while (bucket && iconv_cache_size >= ICONV_CACHE_SIZE) {
- next = bucket->next;
-
- if (bucket->refcount == 0)
- iconv_cache_bucket_expire (bucket);
-
- bucket = next;
- }
-}
-
-
-static void
-iconv_charset_free (char *name, char *iname, gpointer user_data)
-{
- g_free (name);
- g_free (iname);
-}
-
-void
-camel_iconv_shutdown (void)
-{
- struct _iconv_cache_bucket *bucket, *next;
-
- g_hash_table_foreach (iconv_charsets, (GHFunc) iconv_charset_free, NULL);
- g_hash_table_destroy (iconv_charsets);
-
- bucket = iconv_cache_buckets;
- while (bucket) {
- next = bucket->next;
-
- g_free (bucket->key);
- g_iconv_close (bucket->cd);
- e_memchunk_free (cache_chunk, bucket);
-
- bucket = next;
- }
-
- g_hash_table_destroy (iconv_cache);
- g_hash_table_destroy (iconv_open_hash);
-
- e_memchunk_destroy (cache_chunk);
-}
-
-
/**
* camel_iconv_init:
*
* Initialize Camel's iconv cache. This *MUST* be called before any
* camel-iconv interfaces will work correctly.
**/
-void
-camel_iconv_init (void)
+static void
+camel_iconv_init (int keep)
{
- static int initialized = FALSE;
char *from, *to;
int i;
- if (initialized)
+ LOCK ();
+
+ if (iconv_charsets != NULL) {
+ if (!keep)
+ UNLOCK();
return;
+ }
iconv_charsets = g_hash_table_new (g_str_hash, g_str_equal);
for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
from = g_strdup (known_iconv_charsets[i].charset);
to = g_strdup (known_iconv_charsets[i].iconv_name);
- g_ascii_strdown (from, -1);
-
+ e_strdown (from);
g_hash_table_insert (iconv_charsets, from, to);
}
- iconv_cache_buckets = NULL;
+ e_dlist_init (&iconv_cache_list);
iconv_cache = g_hash_table_new (g_str_hash, g_str_equal);
- iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
+ iconv_cache_open = g_hash_table_new (NULL, NULL);
+
+ locale = setlocale (LC_ALL, NULL);
- cache_chunk = e_memchunk_new (ICONV_CACHE_SIZE, sizeof (struct _iconv_cache_bucket));
+ if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
+ /* The locale "C" or "POSIX" is a portable locale; its
+ * LC_CTYPE part corresponds to the 7-bit ASCII character
+ * set.
+ */
+
+ locale_charset = NULL;
+ locale_lang = NULL;
+ } else {
+#ifdef HAVE_CODESET
+ locale_charset = g_strdup (nl_langinfo (CODESET));
+ camel_strdown (locale_charset);
+#else
+ /* A locale name is typically of the form language[_terri-
+ * tory][.codeset][@modifier], where language is an ISO 639
+ * language code, territory is an ISO 3166 country code, and
+ * codeset is a character set or encoding identifier like
+ * ISO-8859-1 or UTF-8.
+ */
+ char *codeset, *p;
+
+ codeset = strchr (locale, '.');
+ if (codeset) {
+ codeset++;
+
+ /* ; is a hack for debian systems and / is a hack for Solaris systems */
+ for (p = codeset; *p && !strchr ("@;/", *p); p++);
+ locale_charset = g_strndup (codeset, p - codeset);
+ camel_strdown (locale_charset);
+ } else {
+ /* charset unknown */
+ locale_charset = NULL;
+ }
+#endif
+
+ /* parse the locale lang */
+ locale_parse_lang (locale);
+ }
- initialized = TRUE;
+ if (!keep)
+ UNLOCK ();
}
@@ -319,19 +245,43 @@ camel_iconv_charset_name (const char *charset)
name = g_alloca (strlen (charset) + 1);
strcpy (name, charset);
- g_ascii_strdown (name, -1);
+ camel_strdown (name);
- ICONV_CHARSET_LOCK ();
+ camel_iconv_init (TRUE);
if ((iname = g_hash_table_lookup (iconv_charsets, name)) != NULL) {
- ICONV_CHARSET_UNLOCK ();
+ UNLOCK ();
return iname;
}
/* Unknown, try to convert some basic charset types to something that should work */
if (!strncmp (name, "iso", 3)) {
- /* camel_charset_canonical_name() can handle this case */
- ICONV_CHARSET_UNLOCK ();
- return camel_charset_canonical_name (charset);
+ /* Convert iso-####-# or iso####-# or iso_####-# into the canonical form: iso-####-# */
+ int iso, codepage;
+ char *p;
+
+ tmp = name + 3;
+ if (*tmp == '-' || *tmp == '_')
+ tmp++;
+
+ iso = strtoul (tmp, &p, 10);
+ if (iso == 10646) {
+ /* they all become iso-10646 */
+ ret = g_strdup ("iso-10646");
+ } else {
+ tmp = p;
+ if (*tmp == '-' || *tmp == '_')
+ tmp++;
+
+ codepage = strtoul (tmp, &p, 10);
+
+ if (p > tmp) {
+ /* codepage is numeric */
+ ret = g_strdup_printf ("iso-%d-%d", iso, codepage);
+ } else {
+ /* codepage is a string - probably iso-2022-jp or something */
+ ret = g_strdup_printf ("iso-%d-%s", iso, p);
+ }
+ }
} else if (strncmp (name, "windows-", 8) == 0) {
/* Convert windows-#### or windows-cp#### to cp#### */
tmp = name + 8;
@@ -350,11 +300,31 @@ camel_iconv_charset_name (const char *charset)
}
g_hash_table_insert (iconv_charsets, g_strdup (name), iname);
- ICONV_CHARSET_UNLOCK ();
+ UNLOCK ();
return iname;
}
+static void
+flush_entry (struct _iconv_cache *ic)
+{
+ struct _iconv_cache_node *in, *nn;
+
+ in = (struct _iconv_cache_node *) ic->open.head;
+ nn = in->next;
+ while (nn) {
+ if (in->cd != (iconv_t) -1) {
+ g_hash_table_remove (iconv_cache_open, in->cd);
+ g_iconv_close (in->cd);
+ }
+ g_free (in);
+ in = nn;
+ nn = in->next;
+ }
+ g_free (ic->conv);
+ g_free (ic);
+}
+
/**
* camel_iconv_open:
@@ -373,7 +343,8 @@ camel_iconv_charset_name (const char *charset)
iconv_t
camel_iconv_open (const char *to, const char *from)
{
- struct _iconv_cache_bucket *bucket;
+ struct _iconv_cache_node *in;
+ struct _iconv_cache *ic;
iconv_t cd;
char *key;
@@ -383,7 +354,7 @@ camel_iconv_open (const char *to, const char *from)
}
if (!strcasecmp (from, "x-unknown"))
- from = camel_charset_locale_name ();
+ from = camel_iconv_locale_charset ();
/* Even tho g_iconv_open will find the appropriate charset
* format(s) for the to/from charset strings (hahaha, yea
@@ -394,55 +365,78 @@ camel_iconv_open (const char *to, const char *from)
key = g_alloca (strlen (from) + strlen (to) + 2);
sprintf (key, "%s:%s", from, to);
- ICONV_CACHE_LOCK ();
+ LOCK ();
- bucket = g_hash_table_lookup (iconv_cache, key);
- if (bucket) {
- if (bucket->used) {
- cd = g_iconv_open (to, from);
- if (cd == (iconv_t) -1)
- goto exception;
- } else {
- /* Apparently iconv on Solaris <= 7 segfaults if you pass in
- * NULL for anything but inbuf; work around that. (NULL outbuf
- * or NULL *outbuf is allowed by Unix98.)
- */
- size_t inleft = 0, outleft = 0;
- char *outbuf = NULL;
-
- cd = bucket->cd;
- bucket->used = TRUE;
-
- /* reset the descriptor */
- g_iconv (cd, NULL, &inleft, &outbuf, &outleft);
+ ic = g_hash_table_lookup (iconv_cache, key);
+ if (ic) {
+ e_dlist_remove ((EDListNode *) ic);
+ } else {
+ struct _iconv_cache *last = (struct _iconv_cache *)iconv_cache_list.tailpred;
+ struct _iconv_cache *prev;
+
+ prev = last->prev;
+ while (prev && iconv_cache_size > ICONV_CACHE_SIZE) {
+ in = (struct _iconv_cache_node *) last->open.head;
+ if (in->next && !in->busy) {
+ d(printf ("Flushing iconv converter '%s'\n", last->conv));
+ e_dlist_remove ((EDListNode *)last);
+ g_hash_table_remove (iconv_cache, last->conv);
+ flush_entry (last);
+ iconv_cache_size--;
+ }
+ last = prev;
+ prev = last->prev;
}
- bucket->refcount++;
- } else {
- cd = g_iconv_open (to, from);
- if (cd == (iconv_t) -1)
- goto exception;
+ iconv_cache_size++;
- iconv_cache_expire_unused ();
+ ic = g_new (struct _iconv_cache, 1);
+ e_dlist_init (&ic->open);
+ ic->conv = g_strdup (tofrom);
+ g_hash_table_insert (iconv_cache, ic->conv, ic);
- bucket = iconv_cache_bucket_new (key, cd);
+ cd(printf ("Creating iconv converter '%s'\n", ic->conv));
}
- g_hash_table_insert (iconv_open_hash, cd, bucket->key);
-
- ICONV_CACHE_UNLOCK ();
-
- return cd;
-
- exception:
+ e_dlist_addhead (&iconv_cache_list, (EDListNode *) ic);
- ICONV_CACHE_UNLOCK ();
+ /* If we have a free iconv, use it */
+ in = (struct _iconv_cache_node *) ic->open.tailpred;
+ if (in->prev && !in->busy) {
+ cd(printf ("using existing iconv converter '%s'\n", ic->conv));
+ cd = in->cd;
+ if (cd != (iconv_t) -1) {
+ /* work around some broken iconv implementations
+ * that die if the length arguments are NULL
+ */
+ size_t buggy_iconv_len = 0;
+ char *buggy_iconv_buf = NULL;
+
+ /* resets the converter */
+ g_iconv (cd, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len);
+ in->busy = TRUE;
+ e_dlist_remove ((EDListNode *) in);
+ e_dlist_addhead (&ic->open, (EDListNode *) in);
+ }
+ } else {
+ d(printf ("creating new iconv converter '%s'\n", ic->conv));
+ cd = g_iconv_open (to, from);
+ in = g_new (struct _iconv_cache_node, 1);
+ in->cd = cd;
+ in->parent = ic;
+ e_dlist_addhead (&ic->open, (EDListNode *) in);
+ if (cd != (iconv_t) -1) {
+ g_hash_table_insert (iconv_cache_open, cd, in);
+ in->busy = TRUE;
+ } else {
+ errnosav = errno;
+ g_warning ("Could not open converter for '%s' to '%s' charset", from, to);
+ in->busy = FALSE;
+ errno = errnosav;
+ }
+ }
- if (errno == EINVAL)
- g_warning ("Conversion from '%s' to '%s' is not supported", from, to);
- else
- g_warning ("Could not open converter from '%s' to '%s': %s",
- from, to, g_strerror (errno));
+ UNLOCK();
return cd;
}
@@ -477,41 +471,81 @@ camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size
int
camel_iconv_close (iconv_t cd)
{
- struct _iconv_cache_bucket *bucket;
- const char *key;
+ struct _iconv_cache_node *in;
- if (cd == (iconv_t) -1)
- return 0;
-
- ICONV_CACHE_LOCK ();
+ if (cd == (iconv_t)-1)
+ return;
- key = g_hash_table_lookup (iconv_open_hash, cd);
- if (key) {
- g_hash_table_remove (iconv_open_hash, cd);
-
- bucket = g_hash_table_lookup (iconv_cache, key);
- g_assert (bucket);
-
- bucket->refcount--;
-
- if (cd == bucket->cd)
- bucket->used = FALSE;
- else
- g_iconv_close (cd);
-
- if (!bucket->refcount && iconv_cache_size > ICONV_CACHE_SIZE) {
- /* expire this cache bucket */
- iconv_cache_bucket_expire (bucket);
- }
+ LOCK ();
+ in = g_hash_table_lookup (iconv_cache_open, cd);
+ if (in) {
+ d(printf ("closing iconv converter '%s'\n", in->parent->conv));
+ e_dlist_remove ((EDListNode *) in);
+ in->busy = FALSE;
+ e_dlist_addtail (&in->parent->open, (EDListNode *) in);
} else {
- ICONV_CACHE_UNLOCK ();
-
- g_warning ("This iconv context wasn't opened using camel_iconv_open()");
-
- return g_iconv_close (cd);
+ g_warning ("trying to close iconv i dont know about: %p", cd);
+ g_iconv_close (cd);
}
+ UNLOCK ();
+}
+
+const char *
+camel_iconv_locale_charset (void)
+{
+ camel_iconv_init (FALSE);
+
+ return locale_charset;
+}
+
+
+const char *
+camel_iconv_locale_language (void)
+{
+ camel_iconv_init (FALSE);
+
+ return locale_lang;
+}
+
+/* map CJKR charsets to their language code */
+/* NOTE: only support charset names that will be returned by
+ * e_iconv_charset_name() so that we don't have to keep track of all
+ * the aliases too. */
+static struct {
+ char *charset;
+ char *lang;
+} cjkr_lang_map[] = {
+ { "Big5", "zh" },
+ { "BIG5HKCS", "zh" },
+ { "gb2312", "zh" },
+ { "gb18030", "zh" },
+ { "gbk", "zh" },
+ { "euc-tw", "zh" },
+ { "iso-2022-jp", "ja" },
+ { "sjis", "ja" },
+ { "ujis", "ja" },
+ { "eucJP", "ja" },
+ { "euc-jp", "ja" },
+ { "euc-kr", "ko" },
+ { "koi8-r", "ru" },
+ { "koi8-u", "uk" }
+};
+
+#define NUM_CJKR_LANGS (sizeof (cjkr_lang_map) / sizeof (cjkr_lang_map[0]))
+
+const char *
+camel_iconv_charset_language (const char *charset)
+{
+ int i;
- ICONV_CACHE_UNLOCK ();
+ if (!charset)
+ return NULL;
+
+ charset = camel_iconv_charset_name (charset);
+ for (i = 0; i < NUM_CJKR_LANGS; i++) {
+ if (!strcasecmp (cjkr_lang_map[i].charset, charset))
+ return cjkr_lang_map[i].lang;
+ }
- return 0;
+ return NULL;
}