diff options
-rw-r--r-- | e-util/e-iconv.c | 416 | ||||
-rw-r--r-- | e-util/e-iconv.h | 42 | ||||
-rw-r--r-- | widgets/misc/e-unicode.c | 33 |
3 files changed, 483 insertions, 8 deletions
diff --git a/e-util/e-iconv.c b/e-util/e-iconv.c new file mode 100644 index 0000000000..b968eb168a --- /dev/null +++ b/e-util/e-iconv.c @@ -0,0 +1,416 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- + * + * Copyright (C) 2001 Ximian Inc. + * Authors: Michael Zucchi <notzed@ximian.com> + * Jeffery Steadfast <fejj@ximian.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> +#include <stdio.h> + +#include <glib.h> +#include "e-iconv.h" + +#ifdef HAVE_ALLOCA_H +#include <alloca.h> +#endif + +#ifdef HAVE_CODESET +#include <langinfo.h> +#endif + +/* FIXME: Use feature test */ +#if 0 +#ifndef __sun__ +#define ICONV_ISO_NEEDS_DASH (1) +#endif +#endif + +#define cd(x) + +#ifdef G_THREADS_ENABLED +static GStaticMutex lock = G_STATIC_MUTEX_INIT; +#define LOCK() g_static_mutex_lock(&lock) +#define UNLOCK() g_static_mutex_unlock(&lock) +#else +#define LOCK() +#define UNLOCK() +#endif + +typedef struct _EDListNode { + struct _EDListNode *next; + struct _EDListNode *prev; +} EDListNode; + +typedef struct _EDList { + struct _EDListNode *head; + struct _EDListNode *tail; + struct _EDListNode *tailpred; +} EDList; + +#define E_DLIST_INITIALISER(l) { (EDListNode *)&l.tail, 0, (EDListNode *)&l.head } + +struct _iconv_cache_node { + struct _iconv_cache_node *next; + struct _iconv_cache_node *prev; + + struct _iconv_cache *parent; + + int busy; + iconv_t ip; +}; + +struct _iconv_cache { + struct _iconv_cache *next; + struct _iconv_cache *prev; + + char *conv; + + EDList open; /* stores iconv_cache_nodes, busy ones up front */ +}; + +#define E_ICONV_CACHE_SIZE (16) + +static EDList iconv_cache_list; +static GHashTable *iconv_cache; +static GHashTable *iconv_cache_open; +static unsigned int iconv_cache_size = 0; + +static GHashTable *iconv_charsets = NULL; +static const char *locale_charset = NULL; + +struct { + char *charset; + char *iconv_name; +} known_iconv_charsets[] = { +#if 0 + /* charset name, iconv-friendly charset name */ + { "iso-8859-1", "iso-8859-1" }, + { "iso8859-1", "iso-8859-1" }, + /* the above mostly serves as an example for iso-style charsets, + but we have code that will populate the iso-*'s if/when they + show up in e_iconv_charset_name() so I'm + not going to bother putting them all in here... */ + { "windows-cp1251", "cp1251" }, + { "windows-1251", "cp1251" }, + { "cp1251", "cp1251" }, + /* the above mostly serves as an example for windows-style + charsets, but we have code that will parse and convert them + to their cp#### equivalents if/when they show up in + e_iconv_charset_name() so I'm not going to bother + putting them all in here either... */ +#endif + { "ks_c_5601-1987", "euc-kr" }, + + /* FIXME: Japanese/Korean/Chinese stuff needs checking */ + { "euckr-0", "euc-kr" }, + { "big5-0", "big5" }, + { "big5.eten-0", "big5" }, + { "big5hkscs-0", "big5hkcs" }, + { "gb2312-0", "gb2312" }, + { "gb2312.1980-0", "gb2312" }, + { "gb18030-0", "gb18030" }, + { "gbk-0", "gbk" }, + + { "eucjp-0", "euc-jp" }, + { "ujis-0", "ujis" }, + { "jisx0208.1983-0","shift-jis" }, + { "jisx0212.1990-0","shift-jis" }, + { NULL, NULL } +}; + + + +/* Another copy of this trivial list implementation + Why? This stuff gets called a lot (potentially), should run fast, + and g_list's are f@@#$ed up to make this a hassle */ +static void e_dlist_init(EDList *v) +{ + v->head = (EDListNode *)&v->tail; + v->tail = 0; + v->tailpred = (EDListNode *)&v->head; +} + +static EDListNode *e_dlist_addhead(EDList *l, EDListNode *n) +{ + n->next = l->head; + n->prev = (EDListNode *)&l->head; + l->head->prev = n; + l->head = n; + return n; +} + +static EDListNode *e_dlist_addtail(EDList *l, EDListNode *n) +{ + n->next = (EDListNode *)&l->tail; + n->prev = l->tailpred; + l->tailpred->next = n; + l->tailpred = n; + return n; +} + +static EDListNode *e_dlist_remove(EDListNode *n) +{ + n->next->prev = n->prev; + n->prev->next = n->next; + return n; +} + +/* NOTE: Owns the lock on return if keep is TRUE ! */ +static void +e_iconv_init(int keep) +{ + char *from, *to, *locale; + int i; + + LOCK(); + + if (iconv_charsets != NULL) { + if (!keep) + UNLOCK(); + return; + } + + iconv_charsets = g_hash_table_new(g_str_hash, g_str_equal); + + for (i = 0; known_iconv_charsets[i].charset != NULL; i++) { + from = g_strdup(known_iconv_charsets[i].charset); + to = g_strdup(known_iconv_charsets[i].iconv_name); + g_strdown(from); + g_strdown(to); + g_hash_table_insert(iconv_charsets, from, to); + } + + e_dlist_init(&iconv_cache_list); + iconv_cache = g_hash_table_new(g_str_hash, g_str_equal); + iconv_cache_open = g_hash_table_new(NULL, NULL); + + locale = setlocale (LC_ALL, NULL); + + if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) { + /* The locale "C" or "POSIX" is a portable locale; its + * LC_CTYPE part corresponds to the 7-bit ASCII character + * set. + */ + + locale_charset = NULL; + } else { +#ifdef HAVE_CODESET + locale_charset = g_strdup(nl_langinfo(CODESET)); + g_strdown((char *)locale_charset); +#else + /* A locale name is typically of the form language[_terri- + * tory][.codeset][@modifier], where language is an ISO 639 + * language code, territory is an ISO 3166 country code, and + * codeset is a character set or encoding identifier like + * ISO-8859-1 or UTF-8. + */ + char *p; + int len; + + p = strchr (locale, '@'); + if (p == NULL) + p = strchr (locale, '/'); /* This is a hack for Solaris systems */ + + len = p ? (p - locale) : strlen (locale); + if ((p = strchr (locale, '.'))) { + locale_charset = g_strndup (p + 1, len - (p - locale) + 1); + g_strdown (locale_charset); + } +#endif + } + + if (!keep) + UNLOCK(); +} + +const char *e_iconv_charset_name(const char *charset) +{ + char *name, *ret, *tmp; + + if (charset == NULL) + return NULL; + + name = alloca(strlen(charset)+1); + strcpy(name, charset); + g_strdown(name); + + e_iconv_init(TRUE); + ret = g_hash_table_lookup(iconv_charsets, name); + if (ret != NULL) { + UNLOCK(); + return ret; + } + + /* Unknown, try canonicalise some basic charset types to something that should work */ + if (strncmp(name, "iso", 3) == 0) { + /* Convert iso-nnnn-n or isonnnn-n or iso_nnnn-n to iso-nnnn-n or isonnnn-n */ + tmp = name+3; + if (tmp[0] == '_' || tmp[0] == '-') + tmp++; +#ifdef ICONV_ISO_NEEDS_DASH + ret = g_strdup_printf("iso-%s", tmp); +#else + ret = g_strdup_printf("iso%s", tmp); +#endif + } else if (strncmp(name, "windows-", 8) == 0) { + /* Convert windows-nnnnn or windows-cpnnnnn to cpnnnn */ + tmp = name+8; + if (strncmp(tmp, "cp", 2)) + tmp+=2; + ret = g_strdup_printf("cp%s", tmp); + } else { + /* Just assume its ok enough as is */ + ret = g_strdup(name); + } + + g_hash_table_insert(iconv_charsets, g_strdup(name), ret); + UNLOCK(); + + return ret; +} + +static void +flush_entry(struct _iconv_cache *ic) +{ + struct _iconv_cache_node *in, *nn; + + in = (struct _iconv_cache_node *)ic->open.head; + nn = in->next; + while (nn) { + if (in->ip != (iconv_t)-1) { + g_hash_table_remove(iconv_cache_open, in->ip); + iconv_close(in->ip); + } + g_free(in); + in = nn; + nn = in->next; + } + g_free(ic); +} + +/* This should run pretty quick, its called a lot */ +iconv_t e_iconv_open(const char *oto, const char *ofrom) +{ + const char *to, *from; + char *tofrom; + struct _iconv_cache *ic; + struct _iconv_cache_node *in; + iconv_t ip; + + to = e_iconv_charset_name(oto); + from = e_iconv_charset_name(ofrom); + tofrom = alloca(strlen(to) +strlen(from) + 2); + sprintf(tofrom, "%s%%%s", to, from); + + LOCK(); + + ic = g_hash_table_lookup(iconv_cache, tofrom); + if (ic) { + e_dlist_remove((EDListNode *)ic); + } else { + struct _iconv_cache *last = (struct _iconv_cache *)iconv_cache_list.tailpred; + struct _iconv_cache *prev; + + prev = last->prev; + while (prev && iconv_cache_size > E_ICONV_CACHE_SIZE) { + in = (struct _iconv_cache_node *)last->open.head; + if (in->next && !in->busy) { + cd(printf("Flushing iconv converter '%s'\n", last->conv)); + e_dlist_remove((EDListNode *)last); + g_hash_table_remove(iconv_cache, last->conv); + flush_entry(last); + iconv_cache_size--; + } + last = prev; + prev = last->prev; + } + + iconv_cache_size++; + + ic = g_malloc(sizeof(*ic)); + e_dlist_init(&ic->open); + ic->conv = g_strdup(tofrom); + g_hash_table_insert(iconv_cache, ic->conv, ic); + + cd(printf("Creating iconv converter '%s'\n", ic->conv)); + } + e_dlist_addhead(&iconv_cache_list, (EDListNode *)ic); + + /* If we have a free iconv, use it */ + in = (struct _iconv_cache_node *)ic->open.tailpred; + if (in->prev && !in->busy) { + cd(printf("using existing iconv converter '%s'\n", ic->conv)); + ip = in->ip; + if (ip != (iconv_t)-1) { + /* resets the converter */ + iconv(ip, NULL, NULL, NULL, NULL); + in->busy = TRUE; + e_dlist_remove((EDListNode *)in); + e_dlist_addhead(&ic->open, (EDListNode *)in); + } + } else { + ip = iconv_open(to, from); + cd(printf("creating new iconv converter '%s'\n", ic->conv)); + in = g_malloc(sizeof(*in)); + in->ip = ip; + in->busy = TRUE; + in->parent = ic; + e_dlist_addhead(&ic->open, (EDListNode *)in); + if (ip != (iconv_t)-1) + g_hash_table_insert(iconv_cache_open, ip, in); + } + + UNLOCK(); + + return ip; +} + +void +e_iconv_close(iconv_t ip) +{ + struct _iconv_cache_node *in; + + if (ip == (iconv_t)-1) + return; + + LOCK(); + in = g_hash_table_lookup(iconv_cache_open, ip); + if (in) { + cd(printf("closing iconv converter '%s'\n", in->parent->conv)); + e_dlist_remove((EDListNode *)in); + in->busy = FALSE; + e_dlist_addtail(&in->parent->open, (EDListNode *)in); + } else { + g_warning("trying to close iconv i dont know about: %p", ip); + iconv_close(ip); + } + UNLOCK(); + +} + +const char *e_iconv_locale_charset(void) +{ + e_iconv_init(FALSE); + + return locale_charset; +} diff --git a/e-util/e-iconv.h b/e-util/e-iconv.h new file mode 100644 index 0000000000..dee9ce3573 --- /dev/null +++ b/e-util/e-iconv.h @@ -0,0 +1,42 @@ + +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- + * + * Copyright (C) 2001 Ximian Inc. + * Author: Michael Zucchi <notzed@ximian.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef _E_ICONV_H_ +#define _E_ICONV_H_ + +#include <iconv.h> + +#ifdef __cplusplus +extern "C" { +#pragma } +#endif /* __cplusplus */ + +const char *e_iconv_charset_name(const char *charset); +iconv_t e_iconv_open(const char *oto, const char *ofrom); +void e_iconv_close(iconv_t ip); +const char *e_iconv_locale_charset(void); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* !_E_ICONV_H_ */ diff --git a/widgets/misc/e-unicode.c b/widgets/misc/e-unicode.c index 3e3344d37c..9fb780cf63 100644 --- a/widgets/misc/e-unicode.c +++ b/widgets/misc/e-unicode.c @@ -24,6 +24,7 @@ #include "e-font.h" #include <gnome-xml/xmlmemory.h> #include <stdlib.h> +#include "gal/util/e-iconv.h" #define d(x) x @@ -336,12 +337,15 @@ gchar * e_utf8_from_charset_string_sized (const gchar *charset, const gchar *string, gint bytes) { iconv_t ic; + char *ret; if (!string) return NULL; - ic = e_iconv_from_charset (charset); + ic = e_iconv_open("utf-8", charset); + ret = e_utf8_from_iconv_string_sized (ic, string, bytes); + e_iconv_close(ic); - return e_utf8_from_iconv_string_sized (ic, string, bytes); + return ret; } gchar * @@ -355,12 +359,15 @@ gchar * e_utf8_to_charset_string_sized (const gchar *charset, const gchar *string, gint bytes) { iconv_t ic; + char *ret; if (!string) return NULL; - ic = e_iconv_to_charset (charset); + ic = e_iconv_open(charset, "utf-8"); + ret = e_utf8_to_iconv_string_sized (ic, string, bytes); + e_iconv_close(ic); - return e_utf8_to_iconv_string_sized (ic, string, bytes); + return ret; } gchar * @@ -442,6 +449,8 @@ e_utf8_from_gtk_string_sized (GtkWidget *widget, const gchar *string, gint bytes *ob = '\0'; + e_iconv_close(ic); + return new; } @@ -518,6 +527,8 @@ e_utf8_to_gtk_string_sized (GtkWidget *widget, const gchar *string, gint bytes) *ob = '\0'; + e_iconv_close(ic); + return new; } @@ -532,12 +543,15 @@ gchar * e_utf8_from_locale_string_sized (const gchar *string, gint bytes) { iconv_t ic; + char *ret; if (!string) return NULL; - ic = e_iconv_from_locale (); + ic = e_iconv_open("utf-8", e_iconv_locale_charset()); + ret = e_utf8_from_iconv_string_sized (ic, string, bytes); + e_iconv_close(ic); - return e_utf8_from_iconv_string_sized (ic, string, bytes); + return ret; } gchar * @@ -551,12 +565,15 @@ gchar * e_utf8_to_locale_string_sized (const gchar *string, gint bytes) { iconv_t ic; + char *ret; if (!string) return NULL; - ic = e_iconv_to_locale (); + ic = e_iconv_open(e_iconv_locale_charset(), "utf-8"); + ret = e_utf8_to_iconv_string_sized (ic, string, bytes); + e_iconv_close(ic); - return e_utf8_to_iconv_string_sized (ic, string, bytes); + return ret; } gchar * |