aboutsummaryrefslogtreecommitdiffstats
path: root/e-util
diff options
context:
space:
mode:
Diffstat (limited to 'e-util')
-rw-r--r--e-util/e-iconv.c416
-rw-r--r--e-util/e-iconv.h42
2 files changed, 458 insertions, 0 deletions
diff --git a/e-util/e-iconv.c b/e-util/e-iconv.c
new file mode 100644
index 0000000000..b968eb168a
--- /dev/null
+++ b/e-util/e-iconv.c
@@ -0,0 +1,416 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
+ *
+ * Copyright (C) 2001 Ximian Inc.
+ * Authors: Michael Zucchi <notzed@ximian.com>
+ * Jeffery Steadfast <fejj@ximian.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+#include <stdio.h>
+
+#include <glib.h>
+#include "e-iconv.h"
+
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
+
+#ifdef HAVE_CODESET
+#include <langinfo.h>
+#endif
+
+/* FIXME: Use feature test */
+#if 0
+#ifndef __sun__
+#define ICONV_ISO_NEEDS_DASH (1)
+#endif
+#endif
+
+#define cd(x)
+
+#ifdef G_THREADS_ENABLED
+static GStaticMutex lock = G_STATIC_MUTEX_INIT;
+#define LOCK() g_static_mutex_lock(&lock)
+#define UNLOCK() g_static_mutex_unlock(&lock)
+#else
+#define LOCK()
+#define UNLOCK()
+#endif
+
+typedef struct _EDListNode {
+ struct _EDListNode *next;
+ struct _EDListNode *prev;
+} EDListNode;
+
+typedef struct _EDList {
+ struct _EDListNode *head;
+ struct _EDListNode *tail;
+ struct _EDListNode *tailpred;
+} EDList;
+
+#define E_DLIST_INITIALISER(l) { (EDListNode *)&l.tail, 0, (EDListNode *)&l.head }
+
+struct _iconv_cache_node {
+ struct _iconv_cache_node *next;
+ struct _iconv_cache_node *prev;
+
+ struct _iconv_cache *parent;
+
+ int busy;
+ iconv_t ip;
+};
+
+struct _iconv_cache {
+ struct _iconv_cache *next;
+ struct _iconv_cache *prev;
+
+ char *conv;
+
+ EDList open; /* stores iconv_cache_nodes, busy ones up front */
+};
+
+#define E_ICONV_CACHE_SIZE (16)
+
+static EDList iconv_cache_list;
+static GHashTable *iconv_cache;
+static GHashTable *iconv_cache_open;
+static unsigned int iconv_cache_size = 0;
+
+static GHashTable *iconv_charsets = NULL;
+static const char *locale_charset = NULL;
+
+struct {
+ char *charset;
+ char *iconv_name;
+} known_iconv_charsets[] = {
+#if 0
+ /* charset name, iconv-friendly charset name */
+ { "iso-8859-1", "iso-8859-1" },
+ { "iso8859-1", "iso-8859-1" },
+ /* the above mostly serves as an example for iso-style charsets,
+ but we have code that will populate the iso-*'s if/when they
+ show up in e_iconv_charset_name() so I'm
+ not going to bother putting them all in here... */
+ { "windows-cp1251", "cp1251" },
+ { "windows-1251", "cp1251" },
+ { "cp1251", "cp1251" },
+ /* the above mostly serves as an example for windows-style
+ charsets, but we have code that will parse and convert them
+ to their cp#### equivalents if/when they show up in
+ e_iconv_charset_name() so I'm not going to bother
+ putting them all in here either... */
+#endif
+ { "ks_c_5601-1987", "euc-kr" },
+
+ /* FIXME: Japanese/Korean/Chinese stuff needs checking */
+ { "euckr-0", "euc-kr" },
+ { "big5-0", "big5" },
+ { "big5.eten-0", "big5" },
+ { "big5hkscs-0", "big5hkcs" },
+ { "gb2312-0", "gb2312" },
+ { "gb2312.1980-0", "gb2312" },
+ { "gb18030-0", "gb18030" },
+ { "gbk-0", "gbk" },
+
+ { "eucjp-0", "euc-jp" },
+ { "ujis-0", "ujis" },
+ { "jisx0208.1983-0","shift-jis" },
+ { "jisx0212.1990-0","shift-jis" },
+ { NULL, NULL }
+};
+
+
+
+/* Another copy of this trivial list implementation
+ Why? This stuff gets called a lot (potentially), should run fast,
+ and g_list's are f@@#$ed up to make this a hassle */
+static void e_dlist_init(EDList *v)
+{
+ v->head = (EDListNode *)&v->tail;
+ v->tail = 0;
+ v->tailpred = (EDListNode *)&v->head;
+}
+
+static EDListNode *e_dlist_addhead(EDList *l, EDListNode *n)
+{
+ n->next = l->head;
+ n->prev = (EDListNode *)&l->head;
+ l->head->prev = n;
+ l->head = n;
+ return n;
+}
+
+static EDListNode *e_dlist_addtail(EDList *l, EDListNode *n)
+{
+ n->next = (EDListNode *)&l->tail;
+ n->prev = l->tailpred;
+ l->tailpred->next = n;
+ l->tailpred = n;
+ return n;
+}
+
+static EDListNode *e_dlist_remove(EDListNode *n)
+{
+ n->next->prev = n->prev;
+ n->prev->next = n->next;
+ return n;
+}
+
+/* NOTE: Owns the lock on return if keep is TRUE ! */
+static void
+e_iconv_init(int keep)
+{
+ char *from, *to, *locale;
+ int i;
+
+ LOCK();
+
+ if (iconv_charsets != NULL) {
+ if (!keep)
+ UNLOCK();
+ return;
+ }
+
+ iconv_charsets = g_hash_table_new(g_str_hash, g_str_equal);
+
+ for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
+ from = g_strdup(known_iconv_charsets[i].charset);
+ to = g_strdup(known_iconv_charsets[i].iconv_name);
+ g_strdown(from);
+ g_strdown(to);
+ g_hash_table_insert(iconv_charsets, from, to);
+ }
+
+ e_dlist_init(&iconv_cache_list);
+ iconv_cache = g_hash_table_new(g_str_hash, g_str_equal);
+ iconv_cache_open = g_hash_table_new(NULL, NULL);
+
+ locale = setlocale (LC_ALL, NULL);
+
+ if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) {
+ /* The locale "C" or "POSIX" is a portable locale; its
+ * LC_CTYPE part corresponds to the 7-bit ASCII character
+ * set.
+ */
+
+ locale_charset = NULL;
+ } else {
+#ifdef HAVE_CODESET
+ locale_charset = g_strdup(nl_langinfo(CODESET));
+ g_strdown((char *)locale_charset);
+#else
+ /* A locale name is typically of the form language[_terri-
+ * tory][.codeset][@modifier], where language is an ISO 639
+ * language code, territory is an ISO 3166 country code, and
+ * codeset is a character set or encoding identifier like
+ * ISO-8859-1 or UTF-8.
+ */
+ char *p;
+ int len;
+
+ p = strchr (locale, '@');
+ if (p == NULL)
+ p = strchr (locale, '/'); /* This is a hack for Solaris systems */
+
+ len = p ? (p - locale) : strlen (locale);
+ if ((p = strchr (locale, '.'))) {
+ locale_charset = g_strndup (p + 1, len - (p - locale) + 1);
+ g_strdown (locale_charset);
+ }
+#endif
+ }
+
+ if (!keep)
+ UNLOCK();
+}
+
+const char *e_iconv_charset_name(const char *charset)
+{
+ char *name, *ret, *tmp;
+
+ if (charset == NULL)
+ return NULL;
+
+ name = alloca(strlen(charset)+1);
+ strcpy(name, charset);
+ g_strdown(name);
+
+ e_iconv_init(TRUE);
+ ret = g_hash_table_lookup(iconv_charsets, name);
+ if (ret != NULL) {
+ UNLOCK();
+ return ret;
+ }
+
+ /* Unknown, try canonicalise some basic charset types to something that should work */
+ if (strncmp(name, "iso", 3) == 0) {
+ /* Convert iso-nnnn-n or isonnnn-n or iso_nnnn-n to iso-nnnn-n or isonnnn-n */
+ tmp = name+3;
+ if (tmp[0] == '_' || tmp[0] == '-')
+ tmp++;
+#ifdef ICONV_ISO_NEEDS_DASH
+ ret = g_strdup_printf("iso-%s", tmp);
+#else
+ ret = g_strdup_printf("iso%s", tmp);
+#endif
+ } else if (strncmp(name, "windows-", 8) == 0) {
+ /* Convert windows-nnnnn or windows-cpnnnnn to cpnnnn */
+ tmp = name+8;
+ if (strncmp(tmp, "cp", 2))
+ tmp+=2;
+ ret = g_strdup_printf("cp%s", tmp);
+ } else {
+ /* Just assume its ok enough as is */
+ ret = g_strdup(name);
+ }
+
+ g_hash_table_insert(iconv_charsets, g_strdup(name), ret);
+ UNLOCK();
+
+ return ret;
+}
+
+static void
+flush_entry(struct _iconv_cache *ic)
+{
+ struct _iconv_cache_node *in, *nn;
+
+ in = (struct _iconv_cache_node *)ic->open.head;
+ nn = in->next;
+ while (nn) {
+ if (in->ip != (iconv_t)-1) {
+ g_hash_table_remove(iconv_cache_open, in->ip);
+ iconv_close(in->ip);
+ }
+ g_free(in);
+ in = nn;
+ nn = in->next;
+ }
+ g_free(ic);
+}
+
+/* This should run pretty quick, its called a lot */
+iconv_t e_iconv_open(const char *oto, const char *ofrom)
+{
+ const char *to, *from;
+ char *tofrom;
+ struct _iconv_cache *ic;
+ struct _iconv_cache_node *in;
+ iconv_t ip;
+
+ to = e_iconv_charset_name(oto);
+ from = e_iconv_charset_name(ofrom);
+ tofrom = alloca(strlen(to) +strlen(from) + 2);
+ sprintf(tofrom, "%s%%%s", to, from);
+
+ LOCK();
+
+ ic = g_hash_table_lookup(iconv_cache, tofrom);
+ if (ic) {
+ e_dlist_remove((EDListNode *)ic);
+ } else {
+ struct _iconv_cache *last = (struct _iconv_cache *)iconv_cache_list.tailpred;
+ struct _iconv_cache *prev;
+
+ prev = last->prev;
+ while (prev && iconv_cache_size > E_ICONV_CACHE_SIZE) {
+ in = (struct _iconv_cache_node *)last->open.head;
+ if (in->next && !in->busy) {
+ cd(printf("Flushing iconv converter '%s'\n", last->conv));
+ e_dlist_remove((EDListNode *)last);
+ g_hash_table_remove(iconv_cache, last->conv);
+ flush_entry(last);
+ iconv_cache_size--;
+ }
+ last = prev;
+ prev = last->prev;
+ }
+
+ iconv_cache_size++;
+
+ ic = g_malloc(sizeof(*ic));
+ e_dlist_init(&ic->open);
+ ic->conv = g_strdup(tofrom);
+ g_hash_table_insert(iconv_cache, ic->conv, ic);
+
+ cd(printf("Creating iconv converter '%s'\n", ic->conv));
+ }
+ e_dlist_addhead(&iconv_cache_list, (EDListNode *)ic);
+
+ /* If we have a free iconv, use it */
+ in = (struct _iconv_cache_node *)ic->open.tailpred;
+ if (in->prev && !in->busy) {
+ cd(printf("using existing iconv converter '%s'\n", ic->conv));
+ ip = in->ip;
+ if (ip != (iconv_t)-1) {
+ /* resets the converter */
+ iconv(ip, NULL, NULL, NULL, NULL);
+ in->busy = TRUE;
+ e_dlist_remove((EDListNode *)in);
+ e_dlist_addhead(&ic->open, (EDListNode *)in);
+ }
+ } else {
+ ip = iconv_open(to, from);
+ cd(printf("creating new iconv converter '%s'\n", ic->conv));
+ in = g_malloc(sizeof(*in));
+ in->ip = ip;
+ in->busy = TRUE;
+ in->parent = ic;
+ e_dlist_addhead(&ic->open, (EDListNode *)in);
+ if (ip != (iconv_t)-1)
+ g_hash_table_insert(iconv_cache_open, ip, in);
+ }
+
+ UNLOCK();
+
+ return ip;
+}
+
+void
+e_iconv_close(iconv_t ip)
+{
+ struct _iconv_cache_node *in;
+
+ if (ip == (iconv_t)-1)
+ return;
+
+ LOCK();
+ in = g_hash_table_lookup(iconv_cache_open, ip);
+ if (in) {
+ cd(printf("closing iconv converter '%s'\n", in->parent->conv));
+ e_dlist_remove((EDListNode *)in);
+ in->busy = FALSE;
+ e_dlist_addtail(&in->parent->open, (EDListNode *)in);
+ } else {
+ g_warning("trying to close iconv i dont know about: %p", ip);
+ iconv_close(ip);
+ }
+ UNLOCK();
+
+}
+
+const char *e_iconv_locale_charset(void)
+{
+ e_iconv_init(FALSE);
+
+ return locale_charset;
+}
diff --git a/e-util/e-iconv.h b/e-util/e-iconv.h
new file mode 100644
index 0000000000..dee9ce3573
--- /dev/null
+++ b/e-util/e-iconv.h
@@ -0,0 +1,42 @@
+
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
+ *
+ * Copyright (C) 2001 Ximian Inc.
+ * Author: Michael Zucchi <notzed@ximian.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _E_ICONV_H_
+#define _E_ICONV_H_
+
+#include <iconv.h>
+
+#ifdef __cplusplus
+extern "C" {
+#pragma }
+#endif /* __cplusplus */
+
+const char *e_iconv_charset_name(const char *charset);
+iconv_t e_iconv_open(const char *oto, const char *ofrom);
+void e_iconv_close(iconv_t ip);
+const char *e_iconv_locale_charset(void);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* !_E_ICONV_H_ */