From 11b8f83aadaf2c49a7a29721fde80ac2015b2484 Mon Sep 17 00:00:00 2001
From: Jeffrey Stedfast <fejj@ximian.com>
Date: Tue, 8 Jul 2003 18:36:40 +0000
Subject: Updated (new copy/paste from e-iconv).

2003-07-08  Jeffrey Stedfast  <fejj@ximian.com>

	* camel-iconv.c: Updated (new copy/paste from e-iconv).

	* camel-block-file.c (camel_block_file_get_block): Use
	camel_read() rather than libc read.

	* camel-tcp-stream-raw.c (stream_read): Use camel_read().
	(stream_write): Use camel_write().

	* camel-stream-fs.c (stream_read): Use camel_read().
	(stream_write): Use camel_write().

svn path=/trunk/; revision=21759
---
 camel/ChangeLog          |   5 +
 camel/camel-block-file.c |   6 +-
 camel/camel-iconv.c      | 268 +++++++++++++++++++++++++++++++----------------
 camel/camel-iconv.h      |   2 +-
 4 files changed, 189 insertions(+), 92 deletions(-)

(limited to 'camel')

diff --git a/camel/ChangeLog b/camel/ChangeLog
index 456ac014ad..b8d6bfa1f6 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,5 +1,10 @@
 2003-07-08  Jeffrey Stedfast  <fejj@ximian.com>
 
+	* camel-iconv.c: Updated (new copy/paste from e-iconv).
+
+	* camel-block-file.c (camel_block_file_get_block): Use
+	camel_read() rather than libc read.
+
 	* camel-tcp-stream-raw.c (stream_read): Use camel_read().
 	(stream_write): Use camel_write().
 
diff --git a/camel/camel-block-file.c b/camel/camel-block-file.c
index 885ebc34f5..de7d757367 100644
--- a/camel/camel-block-file.c
+++ b/camel/camel-block-file.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2001 Ximian Inc.
+ * Copyright (C) 2001-2003 Ximian Inc.
  *
  * Authors: Michael Zucchi <notzed@ximian.com>
  *
@@ -573,8 +573,8 @@ CamelBlock *camel_block_file_get_block(CamelBlockFile *bs, camel_block_t id)
 
 		bl = g_malloc0(sizeof(*bl));
 		bl->id = id;
-		if (lseek(bs->fd, id, SEEK_SET) == -1
-		    || read(bs->fd, bl->data, CAMEL_BLOCK_SIZE) == -1) {
+		if (lseek(bs->fd, id, SEEK_SET) == -1 ||
+		    camel_read (bs->fd, bl->data, CAMEL_BLOCK_SIZE) == -1) {
 			block_file_unuse(bs);
 			CAMEL_BLOCK_FILE_UNLOCK(bs, cache_lock);
 			g_free(bl);
diff --git a/camel/camel-iconv.c b/camel/camel-iconv.c
index 3f1708c18b..fee061a2b9 100644
--- a/camel/camel-iconv.c
+++ b/camel/camel-iconv.c
@@ -55,12 +55,25 @@ static GStaticMutex lock = G_STATIC_MUTEX_INIT;
 #endif
 
 
+typedef struct _EDListNode {
+	struct _EDListNode *next;
+	struct _EDListNode *prev;
+} EDListNode;
+
+typedef struct _EDList {
+	struct _EDListNode *head;
+	struct _EDListNode *tail;
+	struct _EDListNode *tailpred;
+} EDList;
+
+#define E_DLIST_INITIALISER(l) { (EDListNode *)&l.tail, 0, (EDListNode *)&l.head }
+
 struct _iconv_cache_node {
 	struct _iconv_cache_node *next;
 	struct _iconv_cache_node *prev;
-
+	
 	struct _iconv_cache *parent;
-
+	
 	int busy;
 	iconv_t cd;
 };
@@ -68,13 +81,13 @@ struct _iconv_cache_node {
 struct _iconv_cache {
 	struct _iconv_cache *next;
 	struct _iconv_cache *prev;
-
+	
 	char *conv;
-
+	
 	EDList open;		/* stores iconv_cache_nodes, busy ones up front */
 };
 
-#define ICONV_CACHE_SIZE   (16)
+#define CAMEL_ICONV_CACHE_SIZE (16)
 
 static EDList iconv_cache_list;
 static GHashTable *iconv_cache;
@@ -91,62 +104,136 @@ struct {
 } known_iconv_charsets[] = {
 #if 0
 	/* charset name, iconv-friendly charset name */
-	{ "iso-8859-1",      "iso-8859-1" },
-	{ "iso8859-1",       "iso-8859-1" },
+	{ "iso-8859-1",     "iso-8859-1" },
+	{ "iso8859-1",      "iso-8859-1" },
 	/* the above mostly serves as an example for iso-style charsets,
 	   but we have code that will populate the iso-*'s if/when they
-	   show up in camel_iconv_charset_name() so I'm
+	   show up in e_iconv_charset_name() so I'm
 	   not going to bother putting them all in here... */
-	{ "windows-cp1251",  "cp1251"     },
-	{ "windows-1251",    "cp1251"     },
-	{ "cp1251",          "cp1251"     },
+	{ "windows-cp1251", "cp1251"     },
+	{ "windows-1251",   "cp1251"     },
+	{ "cp1251",         "cp1251"     },
 	/* the above mostly serves as an example for windows-style
 	   charsets, but we have code that will parse and convert them
 	   to their cp#### equivalents if/when they show up in
-	   camel_iconv_charset_name() so I'm not going to bother
+	   e_iconv_charset_name() so I'm not going to bother
 	   putting them all in here either... */
 #endif
 	/* charset name (lowercase!), iconv-friendly name (sometimes case sensitive) */
-	{ "utf-8",           "UTF-8"      },
-	{ "utf8",            "UTF-8"      },
+	{ "utf-8",          "UTF-8"      },
 	
 	/* 10646 is a special case, its usually UCS-2 big endian */
 	/* This might need some checking but should be ok for solaris/linux */
-	{ "iso-10646-1",     "UCS-2BE"    },
-	{ "iso_10646-1",     "UCS-2BE"    },
-	{ "iso10646-1",      "UCS-2BE"    },
-	{ "iso-10646",       "UCS-2BE"    },
-	{ "iso_10646",       "UCS-2BE"    },
-	{ "iso10646",        "UCS-2BE"    },
-	
-	/* "ks_c_5601-1987" seems to be the most common of this lot */
-	{ "ks_c_5601-1987",  "EUC-KR"     },
-	{ "5601",            "EUC-KR"     },
-	{ "ksc-5601",        "EUC-KR"     },
-	{ "ksc-5601-1987",   "EUC-KR"     },
-	{ "ksc-5601_1987",   "EUC-KR"     },
+	{ "iso-10646-1",    "UCS-2BE"    },
+	{ "iso_10646-1",    "UCS-2BE"    },
+	{ "iso10646-1",     "UCS-2BE"    },
+	{ "iso-10646",      "UCS-2BE"    },
+	{ "iso_10646",      "UCS-2BE"    },
+	{ "iso10646",       "UCS-2BE"    },
+	
+	{ "ks_c_5601-1987", "EUC-KR"     },
 	
 	/* FIXME: Japanese/Korean/Chinese stuff needs checking */
-	{ "euckr-0",         "EUC-KR"     },
-	{ "5601",            "EUC-KR"     },
-	{ "big5-0",          "BIG5"       },
-	{ "big5.eten-0",     "BIG5"       },
-	{ "big5hkscs-0",     "BIG5HKCS"   },
-	{ "gb2312-0",        "gb2312"     },
-	{ "gb2312.1980-0",   "gb2312"     },
-	{ "euc-cn",          "gb2312"     },
-	{ "gb18030-0",       "gb18030"    },
-	{ "gbk-0",           "GBK"        },
-	
-	{ "eucjp-0",         "eucJP"  	  },  /* should this map to "EUC-JP" instead? */
-	{ "ujis-0",          "ujis"  	  },  /* we might want to map this to EUC-JP */
-	{ "jisx0208.1983-0", "SJIS"       },
-	{ "jisx0212.1990-0", "SJIS"       },
-	{ "pck",	     "SJIS"       },
-	{ NULL,              NULL         }
+	{ "euckr-0",        "EUC-KR"     },
+	{ "5601",           "EUC-KR"     },
+	{ "big5-0",         "BIG5"       },
+	{ "big5.eten-0",    "BIG5"       },
+	{ "big5hkscs-0",    "BIG5HKSCS"  },
+	{ "gb2312-0",       "gb2312"     },
+	{ "gb2312.1980-0",  "gb2312"     },
+	{ "gb-2312",        "gb2312"     },
+	{ "gb18030-0",      "gb18030"    },
+	{ "gbk-0",          "GBK"        },
+	
+	{ "eucjp-0",        "eucJP"  	 },
+	{ "ujis-0",         "ujis"  	 },
+	{ "jisx0208.1983-0","SJIS"       },
+	{ "jisx0212.1990-0","SJIS"       },
+	{ "pck",	    "SJIS"       },
+	{ NULL,             NULL         }
 };
 
 
+
+/* Another copy of this trivial list implementation
+   Why?  This stuff gets called a lot (potentially), should run fast,
+   and g_list's are f@@#$ed up to make this a hassle */
+static void
+e_dlist_init (EDList *v)
+{
+        v->head = (EDListNode *) &v->tail;
+        v->tail = 0;
+        v->tailpred = (EDListNode *) &v->head;
+}
+
+static EDListNode *
+e_dlist_addhead (EDList *l, EDListNode *n)
+{
+        n->next = l->head;
+        n->prev = (EDListNode *) &l->head;
+        l->head->prev = n;
+        l->head = n;
+        return n;
+}
+
+static EDListNode *
+e_dlist_addtail (EDList *l, EDListNode *n)
+{
+        n->next = (EDListNode *) &l->tail;
+        n->prev = l->tailpred;
+        l->tailpred->next = n;
+        l->tailpred = n;
+        return n;
+}
+
+static EDListNode *
+e_dlist_remove (EDListNode *n)
+{
+        n->next->prev = n->prev;
+        n->prev->next = n->next;
+        return n;
+}
+
+
+static void
+locale_parse_lang (const char *locale)
+{
+	char *codeset, *lang;
+	
+	if ((codeset = strchr (locale, '.')))
+		lang = g_strndup (locale, codeset - locale);
+	else
+		lang = g_strdup (locale);
+	
+	/* validate the language */
+	if (strlen (lang) >= 2) {
+		if (lang[2] == '-' || lang[2] == '_') {
+			/* canonicalise the lang */
+			camel_strdown (lang);
+			
+			/* validate the country code */
+			if (strlen (lang + 3) > 2) {
+				/* invalid country code */
+				lang[2] = '\0';
+			} else {
+				lang[2] = '-';
+				e_strup (lang + 3);
+			}
+		} else if (lang[2] != '\0') {
+			/* invalid language */
+			g_free (lang);
+			lang = NULL;
+		}
+		
+		locale_lang = lang;
+	} else {
+		/* invalid language */
+		locale_lang = NULL;
+		g_free (lang);
+	}
+}
+
+
 /**
  * camel_iconv_init:
  *
@@ -156,14 +243,14 @@ struct {
 static void
 camel_iconv_init (int keep)
 {
-	char *from, *to;
+	char *from, *to, *locale;
 	int i;
 	
 	LOCK ();
 	
 	if (iconv_charsets != NULL) {
 		if (!keep)
-			UNLOCK();
+			UNLOCK ();
 		return;
 	}
 	
@@ -172,7 +259,7 @@ camel_iconv_init (int keep)
 	for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
 		from = g_strdup (known_iconv_charsets[i].charset);
 		to = g_strdup (known_iconv_charsets[i].iconv_name);
-		e_strdown (from);
+		camel_strdown (from);
 		g_hash_table_insert (iconv_charsets, from, to);
 	}
 	
@@ -230,7 +317,7 @@ camel_iconv_init (int keep)
  * camel_iconv_charset_name:
  * @charset: charset name
  *
- * Maps charset names to the names that glib's g_iconv_open() is more
+ * Maps charset names to the names that iconv_open() is more
  * likely able to handle.
  *
  * Returns an iconv-friendly name for @charset.
@@ -253,9 +340,9 @@ camel_iconv_charset_name (const char *charset)
 		return iname;
 	}
 	
-	/* Unknown, try to convert some basic charset types to something that should work */
-	if (!strncmp (name, "iso", 3)) {
-		/* Convert iso-####-# or iso####-# or iso_####-# into the canonical form: iso-####-# */
+	/* Unknown, try canonicalise some basic charset types to something that should work */
+	if (strncmp (name, "iso", 3) == 0) {
+		/* Convert iso-####-# or iso####-# or iso_####-# to iso-####-# or iso####-# */
 		int iso, codepage;
 		char *p;
 		
@@ -264,9 +351,10 @@ camel_iconv_charset_name (const char *charset)
 			tmp++;
 		
 		iso = strtoul (tmp, &p, 10);
+		
 		if (iso == 10646) {
-			/* they all become iso-10646 */
-			ret = g_strdup ("iso-10646");
+			/* they all become ICONV_10646 */
+			iname = g_strdup (ICONV_10646);
 		} else {
 			tmp = p;
 			if (*tmp == '-' || *tmp == '_')
@@ -276,10 +364,15 @@ camel_iconv_charset_name (const char *charset)
 			
 			if (p > tmp) {
 				/* codepage is numeric */
-				ret = g_strdup_printf ("iso-%d-%d", iso, codepage);
+#ifdef __aix__
+				if (codepage == 13)
+					iname = g_strdup ("IBM-921");
+				else
+#endif /* __aix__ */
+					iname = g_strdup_printf (ICONV_ISO_D_FORMAT, iso, codepage);
 			} else {
 				/* codepage is a string - probably iso-2022-jp or something */
-				ret = g_strdup_printf ("iso-%d-%s", iso, p);
+				iname = g_strdup_printf (ICONV_ISO_S_FORMAT, iso, p);
 			}
 		}
 	} else if (strncmp (name, "windows-", 8) == 0) {
@@ -295,7 +388,7 @@ camel_iconv_charset_name (const char *charset)
 			tmp += 2;
 		iname = g_strdup_printf ("CP%s", tmp);	
 	} else {
-		/* Just assume its ok enough as is, case and all - let g_iconv_open() handle this */
+		/* Just assume its ok enough as is, case and all */
 		iname = g_strdup (charset);
 	}
 	
@@ -309,23 +402,26 @@ static void
 flush_entry (struct _iconv_cache *ic)
 {
 	struct _iconv_cache_node *in, *nn;
-
+	
 	in = (struct _iconv_cache_node *) ic->open.head;
 	nn = in->next;
 	while (nn) {
 		if (in->cd != (iconv_t) -1) {
 			g_hash_table_remove (iconv_cache_open, in->cd);
-			g_iconv_close (in->cd);
+			iconv_close (in->cd);
 		}
+		
 		g_free (in);
 		in = nn;
 		nn = in->next;
 	}
+	
 	g_free (ic->conv);
 	g_free (ic);
 }
 
 
+/* This should run pretty quick, its called a lot */
 /**
  * camel_iconv_open:
  * @to: charset to convert to
@@ -343,27 +439,21 @@ flush_entry (struct _iconv_cache *ic)
 iconv_t
 camel_iconv_open (const char *to, const char *from)
 {
-	struct _iconv_cache_node *in;
 	struct _iconv_cache *ic;
+	struct _iconv_cache_node *in;
+	int errnosav;
 	iconv_t cd;
 	char *key;
 	
-	if (from == NULL || to == NULL) {
+	if (to == NULL || from == NULL) {
 		errno = EINVAL;
 		return (iconv_t) -1;
 	}
 	
-	if (!strcasecmp (from, "x-unknown"))
-		from = camel_iconv_locale_charset ();
-	
-	/* Even tho g_iconv_open will find the appropriate charset
-	 * format(s) for the to/from charset strings (hahaha, yea
-	 * right), we still convert them to their canonical format
-	 * here so that our key is in a standard format */
-	from = camel_iconv_charset_name (from);
 	to = camel_iconv_charset_name (to);
-	key = g_alloca (strlen (from) + strlen (to) + 2);
-	sprintf (key, "%s:%s", from, to);
+	from = camel_iconv_charset_name (from);
+	key = g_alloca (strlen (to) + strlen (from) + 2);
+	sprintf (key, "%s:%s", to, from);
 	
 	LOCK ();
 	
@@ -371,15 +461,15 @@ camel_iconv_open (const char *to, const char *from)
 	if (ic) {
 		e_dlist_remove ((EDListNode *) ic);
 	} else {
-		struct _iconv_cache *last = (struct _iconv_cache *)iconv_cache_list.tailpred;
+		struct _iconv_cache *last = (struct _iconv_cache *) iconv_cache_list.tailpred;
 		struct _iconv_cache *prev;
 		
 		prev = last->prev;
-		while (prev && iconv_cache_size > ICONV_CACHE_SIZE) {
+		while (prev && iconv_cache_size > CAMEL_ICONV_CACHE_SIZE) {
 			in = (struct _iconv_cache_node *) last->open.head;
 			if (in->next && !in->busy) {
 				d(printf ("Flushing iconv converter '%s'\n", last->conv));
-				e_dlist_remove ((EDListNode *)last);
+				e_dlist_remove ((EDListNode *) last);
 				g_hash_table_remove (iconv_cache, last->conv);
 				flush_entry (last);
 				iconv_cache_size--;
@@ -390,20 +480,19 @@ camel_iconv_open (const char *to, const char *from)
 		
 		iconv_cache_size++;
 		
-		ic = g_new (struct _iconv_cache, 1);
+		ic = g_new (struct _iconv_cache);
 		e_dlist_init (&ic->open);
-		ic->conv = g_strdup (tofrom);
+		ic->conv = g_strdup (key);
 		g_hash_table_insert (iconv_cache, ic->conv, ic);
 		
-		cd(printf ("Creating iconv converter '%s'\n", ic->conv));
+		d(printf ("Creating iconv converter '%s'\n", ic->conv));
 	}
-	
 	e_dlist_addhead (&iconv_cache_list, (EDListNode *) ic);
 	
 	/* If we have a free iconv, use it */
 	in = (struct _iconv_cache_node *) ic->open.tailpred;
 	if (in->prev && !in->busy) {
-		cd(printf ("using existing iconv converter '%s'\n", ic->conv));
+		d(printf ("using existing iconv converter '%s'\n", ic->conv));
 		cd = in->cd;
 		if (cd != (iconv_t) -1) {
 			/* work around some broken iconv implementations 
@@ -413,15 +502,15 @@ camel_iconv_open (const char *to, const char *from)
 			char *buggy_iconv_buf = NULL;
 			
 			/* resets the converter */
-			g_iconv (cd, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len);
+			iconv (cd, &buggy_iconv_buf, &buggy_iconv_len, &buggy_iconv_buf, &buggy_iconv_len);
 			in->busy = TRUE;
 			e_dlist_remove ((EDListNode *) in);
 			e_dlist_addhead (&ic->open, (EDListNode *) in);
 		}
 	} else {
-		d(printf ("creating new iconv converter '%s'\n", ic->conv));
-		cd = g_iconv_open (to, from);
-		in = g_new (struct _iconv_cache_node, 1);
+		d(printf("creating new iconv converter '%s'\n", ic->conv));
+		cd = iconv_open (to, from);
+		in = g_new (struct _iconv_cache_node);
 		in->cd = cd;
 		in->parent = ic;
 		e_dlist_addhead (&ic->open, (EDListNode *) in);
@@ -436,7 +525,7 @@ camel_iconv_open (const char *to, const char *from)
 		}
 	}
 	
-	UNLOCK();
+	UNLOCK ();
 	
 	return cd;
 }
@@ -455,7 +544,7 @@ camel_iconv_open (const char *to, const char *from)
 size_t
 camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size_t *outleft)
 {
-	return g_iconv (cd, (char **) inbuf, inleft, outbuf, outleft);
+	return iconv (cd, (ICONV_CONST char **) inbuf, inleft, outbuf, outleft);
 }
 
 
@@ -468,15 +557,16 @@ camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size
  * Returns 0 on success or -1 on fail as well as setting an
  * appropriate errno value.
  **/
-int
+void
 camel_iconv_close (iconv_t cd)
 {
 	struct _iconv_cache_node *in;
 	
-	if (cd == (iconv_t)-1)
+	if (cd == (iconv_t) -1)
 		return;
 	
 	LOCK ();
+	
 	in = g_hash_table_lookup (iconv_cache_open, cd);
 	if (in) {
 		d(printf ("closing iconv converter '%s'\n", in->parent->conv));
@@ -485,11 +575,13 @@ camel_iconv_close (iconv_t cd)
 		e_dlist_addtail (&in->parent->open, (EDListNode *) in);
 	} else {
 		g_warning ("trying to close iconv i dont know about: %p", cd);
-		g_iconv_close (cd);
+		iconv_close (cd);
 	}
+	
 	UNLOCK ();
 }
 
+
 const char *
 camel_iconv_locale_charset (void)
 {
@@ -516,7 +608,7 @@ static struct {
 	char *lang;
 } cjkr_lang_map[] = {
 	{ "Big5",        "zh" },
-	{ "BIG5HKCS",    "zh" },
+	{ "BIG5HKSCS",   "zh" },
 	{ "gb2312",      "zh" },
 	{ "gb18030",     "zh" },
 	{ "gbk",         "zh" },
diff --git a/camel/camel-iconv.h b/camel/camel-iconv.h
index 09f0a5cc8c..f57346816a 100644
--- a/camel/camel-iconv.h
+++ b/camel/camel-iconv.h
@@ -41,7 +41,7 @@ const char *camel_iconv_charset_language (const char *charset);
 
 iconv_t camel_iconv_open (const char *to, const char *from);
 size_t camel_iconv (iconv_t cd, const char **inbuf, size_t *inleft, char **outbuf, size_t *outleft);
-int camel_iconv_close (iconv_t cd);
+void camel_iconv_close (iconv_t cd);
 
 #ifdef __cplusplus
 }
-- 
cgit v1.2.3