aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-iconv.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-iconv.c')
-rw-r--r--camel/camel-iconv.c268
1 files changed, 180 insertions, 88 deletions
diff --git a/camel/camel-iconv.c b/camel/camel-iconv.c
index 3f1708c18b..fee061a2b9 100644
--- a/camel/camel-iconv.c
+++ b/camel/camel-iconv.c
@@ -55,12 +55,25 @@ static GStaticMutex lock = G_STATIC_MUTEX_INIT;
#endif
+typedef struct _EDListNode {
+ struct _EDListNode *next;
+ struct _EDListNode *prev;
+} EDListNode;
+
+typedef struct _EDList {
+ struct _EDListNode *head;
+ struct _EDListNode *tail;
+ struct _EDListNode *tailpred;
+} EDList;
+
+#define E_DLIST_INITIALISER(l) { (EDListNode *)&l.tail, 0, (EDListNode *)&l.head }
+
struct _iconv_cache_node {
struct _iconv_cache_node *next;
struct _iconv_cache_node *prev;
-
+
struct _iconv_cache *parent;
-
+
int busy;
iconv_t cd;
};
@@ -68,13 +81,13 @@ struct _iconv_cache_node {
struct _iconv_cache {
struct _iconv_cache *next;
struct _iconv_cache *prev;
-
+
char *conv;
-
+
EDList open; /* stores iconv_cache_nodes, busy ones up front */
};
-#define ICONV_CACHE_SIZE (16)
+#define CAMEL_ICONV_CACHE_SIZE (16)
static EDList iconv_cache_list;
static GHashTable *iconv_cache;
@@ -91,62 +104,136 @@ struct {
} known_iconv_charsets[] = {
#if 0
/* charset name, iconv-friendly charset name */
- { "iso-8859-1", "iso-8859-1" },
- { "iso8859-1", "iso-8859-1" },
+ { "iso-8859-1", "iso-8859-1" },
+ { "iso8859-1", "iso-8859-1" },
/* the above mostly serves as an example for iso-style charsets,
but we have code that will populate the iso-*'s if/when they
- show up in camel_iconv_charset_name() so I'm
+ show up in e_iconv_charset_name() so I'm
not going to bother putting them all in here... */
- { "windows-cp1251", "cp1251" },
- { "windows-1251", "cp1251" },
- { "cp1251", "cp1251" },
+ { "windows-cp1251", "cp1251" },
+ { "windows-1251", "cp1251" },
+ { "cp1251", "cp1251" },
/* the above mostly serves as an example for windows-style
charsets, but we have code that will parse and convert them
to their cp#### equivalents if/when they show up in
- camel_iconv_charset_name() so I'm not going to bother
+ e_iconv_charset_name() so I'm not going to bother
putting them all in here either... */
#endif
/* charset name (lowercase!), iconv-friendly name (sometimes case sensitive) */
- { "utf-8", "UTF-8" },
- { "utf8", "UTF-8" },
+ { "utf-8", "UTF-8" },
/* 10646 is a special case, its usually UCS-2 big endian */
/* This might need some checking but should be ok for solaris/linux */
- { "iso-10646-1", "UCS-2BE" },
- { "iso_10646-1", "UCS-2BE" },
- { "iso10646-1", "UCS-2BE" },
- { "iso-10646", "UCS-2BE" },
- { "iso_10646", "UCS-2BE" },
- { "iso10646", "UCS-2BE" },
-
- /* "ks_c_5601-1987" seems to be the most common of this lot */
- { "ks_c_5601-1987", "EUC-KR" },
- { "5601", "EUC-KR" },
- { "ksc-5601", "EUC-KR" },
- { "ksc-5601-1987", "EUC-KR" },
- { "ksc-5601_1987", "EUC-KR" },
+ { "iso-10646-1", "UCS-2BE" },
+ { "iso_10646-1", "UCS-2BE" },
+ { "iso10646-1", "UCS-2BE" },
+ { "iso-10646", "UCS-2BE" },
+ { "iso_10646", "UCS-2BE" },
+ { "iso10646", "UCS-2BE" },
+
+ { "ks_c_5601-1987", "EUC-KR" },
/* FIXME: Japanese/Korean/Chinese stuff needs checking */
- { "euckr-0", "EUC-KR" },
- { "5601", "EUC-KR" },
- { "big5-0", "BIG5" },
- { "big5.eten-0", "BIG5" },
- { "big5hkscs-0", "BIG5HKCS" },
- { "gb2312-0", "gb2312" },
- { "gb2312.1980-0", "gb2312" },
- { "euc-cn", "gb2312" },
- { "gb18030-0", "gb18030" },
- { "gbk-0", "GBK" },
-
- { "eucjp-0", "eucJP" }, /* should this map to "EUC-JP" instead? */
- { "ujis-0", "ujis" }, /* we might want to map this to EUC-JP */
- { "jisx0208.1983-0", "SJIS" },
- { "jisx0212.1990-0", "SJIS" },
- { "pck", "SJIS" },
- { NULL, NULL }
+ { "euckr-0", "EUC-KR" },
+ { "5601", "EUC-KR" },
+ { "big5-0", "BIG5" },
+ { "big5.eten-0", "BIG5" },
+ { "big5hkscs-0", "BIG5HKSCS" },
+ { "gb2312-0", "gb2312" },
+ { "gb2312.1980-0", "gb2312" },
+ { "gb-2312", "gb2312" },
+ { "gb18030-0", "gb18030" },
+ { "gbk-0", "GBK" },
+
+ { "eucjp-0", "eucJP" },
+ { "ujis-0", "ujis" },
+ { "jisx0208.1983-0","SJIS" },
+ { "jisx0212.1990-0","SJIS" },
+ { "pck", "SJIS" },
+ { NULL, NULL }
};
+
+/* Another copy of this trivial list implementation
+ Why? This stuff gets called a lot (potentially), should run fast,
+ and g_list's are f@@#$ed up to make this a hassle */
+static void
+e_dlist_init (EDList *v)
+{
+ v->head = (EDListNode *) &v->tail;
+ v->tail = 0;
+ v->tailpred = (EDListNode *) &v->head;
+}
+
+static EDListNode *
+e_dlist_addhead (EDList *l, EDListNode *n)
+{
+ n->next = l->head;
+ n->prev = (EDListNode *) &l->head;
+ l->head->prev = n;
+ l->head = n;
+ return n;
+}
+
+static EDListNode *
+e_dlist_addtail (EDList *l, EDListNode *n)
+{
+ n->next = (EDListNode *) &l->tail;
+ n->prev = l->tailpred;
+ l->tailpred->next = n;
+ l->tailpred = n;
+ return n;
+}
+
+static EDListNode *
+e_dlist_remove (EDListNode *n)
+{
+ n->next->prev = n->prev;
+ n->prev->next = n->next;
+ return n;
+}
+
+
+static void
+locale_parse_lang (const char *locale)
+{
+ char *codeset, *lang;
+
+ if ((codeset = strchr (locale, '.')))
+ lang = g_strndup (locale, codeset - locale);
+ else
+ lang = g_strdup (locale);
+
+ /* validate the language */
+ if (strlen (lang) >= 2) {
+ if (lang[2] == '-' || lang[2] == '_') {
+ /* canonicalise the lang */
+ camel_strdown (lang);
+
+ /* validate the country code */
+ if (strlen (lang + 3) > 2) {
+ /* invalid country code */
+ lang[2] = '\0';
+ } else {
+ lang[2] = '-';
+ e_strup (lang + 3);
+ }
+ } else if (lang[2] != '\0') {
+ /* invalid language */
+ g_free (lang);
+ lang = NULL;
+ }
+
+ locale_lang = lang;
+ } else {
+ /* invalid language */
+ locale_lang = NULL;
+ g_free (lang);
+ }
+}
+
+
/**
* camel_iconv_init:
*
@@ -156,14 +243,14 @@ struct {
static void
camel_iconv_init (int keep)
{
- char *from, *to;
+ char *from, *to, *locale;
int i;
LOCK ();
if (iconv_charsets != NULL) {
if (!keep)
- UNLOCK();
+ UNLOCK ();
return;
}
@@ -172,7 +259,7 @@ camel_iconv_init (int keep)
for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
from = g_strdup (known_iconv_charsets[i].charset);
to = g_strdup (known_iconv_charsets[i].iconv_name);
- e_strdown (from);
+ camel_strdown (from);
g_hash_table_insert (iconv_charsets, from, to);
}
@@ -230,7 +317,7 @@ camel_iconv_init (int keep)
* camel_iconv_charset_name:
* @charset: charset name
*
- * Maps charset names to the names that glib's g_iconv_open() is more
+ * Maps charset names to the names that iconv_open() is more
* likely able to handle.
*
* Returns an iconv-friendly name for @charset.
@@ -253,9 +340,9 @@ camel_iconv_charset_name (const char *charset)
return iname;
}
- /* Unknown, try to convert some basic charset types to something that should work */
- if (!strncmp (name, "iso", 3)) {
- /* Convert iso-####-# or iso####-# or iso_####-# into the canonical form: iso-####-# */
+ /* Unknown, try canonicalise some basic charset types to something that should work */
+ if (strncmp (name, "iso", 3) == 0) {
+ /* Convert iso-####-# or iso####-# or iso_####-# to iso-####-# or iso####-# */
int iso, codepage;
char *p;
@@ -264,9 +351,10 @@ camel_iconv_charset_name (const char *charset)
tmp++;
iso = strtoul (tmp, &p, 10);
+
if (iso == 10646) {
- /* they all become iso-10646 */
- ret = g_strdup ("iso-10646");
+ /* they all become ICONV_10646 */
+ iname = g_strdup (ICONV_10646);
} else {
tmp = p;
if (*tmp == '-' || *tmp == '_')
@@ -276,10 +364,15 @@ camel_iconv_charset_name (const char *charset)
if (p > tmp) {
/* codepage is numeric */
- ret = g_strdup_printf ("iso-%d-%d", iso, codepage);
+#ifdef __aix__
+ if (codepage == 13)
+ iname = g_strdup ("IBM-921");
+ else
+#endif /* __aix__ */
+ iname = g_strdup_printf (ICONV_ISO_D_FORMAT, iso, codepage);
} else {
/* codepage is a string - probably iso-2022-jp or something */
- ret = g_strdup_printf ("iso-%d-%s", iso, p);
+ iname = g_strdup_printf (ICONV_ISO_S_FORMAT, iso, p);
}
}
} else if (strncmp (name, "windows-", 8) == 0) {
@@ -295,7 +388,7 @@ camel_iconv_charset_name (const char *charset)
tmp += 2;
iname = g_strdup_printf ("CP%s", tmp);
} else {
- /* Just assume its ok enough as is, case and all - let g_iconv_open() handle this */
+ /* Just assume its ok enough as is, case and all */
iname = g_strdup (charset);
}
@@ -309,23 +402,26 @@ static void
flush_entry (struct _iconv_cache *ic)
{
struct _iconv_cache_node *in, *nn;
-
+
in = (struct _iconv_cache_node *) ic->open.head;
nn = in->next;
while (nn) {
if (in->cd != (iconv_t) -1) {
g_hash_table_remove (iconv_cache_open, in->cd);
- g_iconv_close (in->cd);
+ iconv_close (in->cd);
}
+
g_free (in);
in = nn;
nn = in->next;
}
+
g_free (ic->conv);
g_free (ic);
}
+/* This should run pretty quick, its called a lot */
/**
* camel_iconv_open:
* @to: charset to convert to
@@ -343,27 +439,21 @@ flush_entry (struct _iconv_cache *ic)
iconv_t
camel_iconv_open (const char *to, const char *from)
{
- struct _iconv_cache_node *in;
struct _iconv_cache *ic;
+ struct _iconv_cache_node *in;
+ int errnosav;
iconv_t cd;
char *key;
- if (from == NULL || to == NULL) {
+ if (to == NULL || from == NULL) {
errno = EINVAL;
return (iconv_t) -1;
}
- if (!strcasecmp (from, "x-unknown"))
- from = camel_iconv_locale_charset ();
-
- /* Even tho g_iconv_open will find the appropriate charset
- * format(s) for the to/from charset strings (hahaha, yea
- * right), we still convert them to their canonical format
- * here so that our key is in a standard format */
- from = camel_iconv_charset_name (from);
to = camel_iconv_charset_name (to);
- key = g_alloca (strlen (from) + strlen (to) + 2);
- sprintf (key, "%s:%s", from, to);
+ from = camel_iconv_charset_name (from);
+ key = g_alloca (strlen (to) + strlen (from) + 2);
+ sprintf (key, "%s:%s", to, from);
LOCK ();
@@ -371,15 +461,15 @@ camel_iconv_open (const char *to, const char *from)
if (ic) {
e_dlist_remove ((EDListNode *) ic);
} else {
- struct _iconv_cache *last = (struct _iconv_cache *)iconv_cache_list.tailpred;
+ struct _iconv_cache *last = (struct _iconv_cache *) iconv_cache_list.tailpred;
struct _iconv_cache *prev;
prev = last->prev;
- while (prev && iconv_cache_size > ICONV_CACHE_SIZE) {
+ while (prev && iconv_cache_size > CAMEL_ICONV_CACHE_SIZE) {
in = (struct _iconv_cache_node *) last->open.head;
if (in->next && !in->busy) {
d(printf ("Flushing iconv converter '%s'\n", last->conv));
- e_dlist_remove ((EDListNode *)last);
+ e_dlist_remove ((EDListNode *) last);
g_hash_table_remove (iconv_cache, last->conv);
flush_entry (last);
iconv_cache_size--;
@@ -390,20 +480,19 @@ camel_iconv_open (const char *to, const char *from)
iconv_cache_size++;
- ic = g_new (struct _iconv_cache, 1);
+ ic = g_new (struct _iconv_cache);
e_dlist_init (&ic->open);
- ic->conv = g_strdup (tofrom);
+ ic->conv = g_strdup (key);
g_hash_table_insert (iconv_cache, ic->conv, ic);
- cd(printf ("Creating iconv converter '%s'\n", ic->conv));
+ d(printf ("Creating iconv converter '%s'\n", ic->conv));
}
-
e_dlist_addhead (&iconv_cache_list, (EDListNode *) ic);
/* If we have a free iconv, use it */
in = (struct _iconv_cache_node *) ic->open.tailpred;
if (in->prev && !in->busy) {
- cd(printf ("using existing iconv converter '%s'\n", ic->conv));
+ d(printf ("using existing iconv converter '%s'\n", ic->conv));
cd = in->cd;
if (cd != (iconv_t) -1) {
/* work around some broken iconv implementations
@@ -413,15 +502,15 @@ camel_iconv_open (const char *to, const char *from)
char *buggy_iconv_buf = NULL;
/* resets the converter */
- g_iconv (cd, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len);
+ iconv (cd, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len);
in->busy = TRUE;
e_dlist_remove ((EDListNode *) in);
e_dlist_addhead (&ic->open, (EDListNode *) in);
}
} else {
- d(printf ("creating new iconv converter '%s'\n", ic->conv));
- cd = g_iconv_open (to, from);
- in = g_new (struct _iconv_cache_node, 1);
+ d(printf("creating new iconv converter '%s'\n", ic->conv));
+ cd = iconv_open (to, from);
+ in = g_new (struct _iconv_cache_node);
in->cd = cd;
in->parent = ic;
e_dlist_addhead (&ic->open, (EDListNode *) in);
@@ -436,7 +525,7 @@ camel_iconv_open (const char *to, const char *from)
}
}
- UNLOCK();
+ UNLOCK ();
return cd;
}
@@ -455,7 +544,7 @@ camel_iconv_open (const char *to, const char *from)
size_t
camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size_t *outleft)
{
- return g_iconv (cd, (char **) inbuf, inleft, outbuf, outleft);
+ return iconv (cd, (ICONV_CONST char **) inbuf, inleft, outbuf, outleft);
}
@@ -468,15 +557,16 @@ camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size
* Returns 0 on success or -1 on fail as well as setting an
* appropriate errno value.
**/
-int
+void
camel_iconv_close (iconv_t cd)
{
struct _iconv_cache_node *in;
- if (cd == (iconv_t)-1)
+ if (cd == (iconv_t) -1)
return;
LOCK ();
+
in = g_hash_table_lookup (iconv_cache_open, cd);
if (in) {
d(printf ("closing iconv converter '%s'\n", in->parent->conv));
@@ -485,11 +575,13 @@ camel_iconv_close (iconv_t cd)
e_dlist_addtail (&in->parent->open, (EDListNode *) in);
} else {
g_warning ("trying to close iconv i dont know about: %p", cd);
- g_iconv_close (cd);
+ iconv_close (cd);
}
+
UNLOCK ();
}
+
const char *
camel_iconv_locale_charset (void)
{
@@ -516,7 +608,7 @@ static struct {
char *lang;
} cjkr_lang_map[] = {
{ "Big5", "zh" },
- { "BIG5HKCS", "zh" },
+ { "BIG5HKSCS", "zh" },
{ "gb2312", "zh" },
{ "gb18030", "zh" },
{ "gbk", "zh" },