New function to fold headers.

2000-09-28 Not Zed <NotZed@HelixCode.com> * camel-mime-utils.c (header_fold): New function to fold headers. 2000-09-27 Not Zed <NotZed@HelixCode.com> * camel-mime-parser.c (folder_scan_header): If we had an empty header, then it must be end of the headers too. (folder_scan_init): No we dont need to init the outbuf with a nul terminator. * camel-folder-summary.c (camel_folder_summary_set_uid): New function to reset the uid to a higher value. * providers/mbox/camel-mbox-summary.c (camel_mbox_summary_sync): "something failed (yo!)" what sort of crap is this? Fixed all the indenting again, what wanker keeps running stuff through indent? (message_info_new): Check the uid we loaded off the disk, if it existed already, assign a new one. If it didn't then make sure the nextuid is higher. * camel-charset-map.c: New file, used to build a large unicode decoding mapping table, and use it to determine what is the lowest charset a given word can be encoded with. Uses tables from libunicode's source. * camel-internet-address.c (internet_encode): Use header_phrase_encode to properly encode the fullname, as required. refixed indenting. Who keeps doing that? (camel_internet_address_find_address): Changed fatal return/warnings into assertions. * camel-mime-utils.c (header_raw_append_parse): Check : explicitly (removed from is_fieldname() macro). (camel_mime_special_table): Changed to short, so we can represent more bit types. (quoted_encode): Take a mask of the safe chars for this encoding. (header_address_decode): Removed a #warning that makes no sense anymore. (header_decode_date): Fixed the 'broken date' parser code, if it ever decoded it it just threw away the result. (header_encode_string): Use better charset matching for encoding strings as well. 2000-08-31 Not Zed <NotZed@HelixCode.com> * providers/mh/camel-mh-summary.c (camel_mh_summary_sync): Save the index if we do a sync. (camel_mh_summary_check): Save the index here too. Probably. svn path=/trunk/; revision=5615
author: Not Zed <NotZed@HelixCode.com> 2000-09-28 19:31:29 +0800
committer: Michael Zucci <zucchi@src.gnome.org> 2000-09-28 19:31:29 +0800
commit: e71de4c02186efe577412709346154df96e35054 (patch)
tree: 125ad7e9d02cf42bf8acebefed08c39b8df675e5 /camel/camel-charset-map.c
parent: 18579aa7e01b4d313d2cf05f355aa4698b99cf7d (diff)
download: gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar
gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.gz
gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.bz2
gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.lz
gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.xz
gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.zst
gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.zip
1 files changed, 257 insertions, 0 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c
new file mode 100644
index 0000000000..b6ad0a5f37
--- /dev/null
+++ b/camel/camel-charset-map.c
@@ -0,0 +1,257 @@
+
+#include <stdio.h>
+
+/*
+  if you want to build the charset map, add the root directory of
+  libunicode to the include path and define BUILD_MAP,
+  then run it as 
+    ./a.out > camel-charset-map-private.h
+
+  The tables genereated work like this:
+
+   An indirect array for each page of unicode character
+   Each array element has an indirect pointer to one of the bytes of
+   the generated bitmask.
+*/
+
+#ifdef BUILD_MAP
+#include "iso/iso8859-2.h"
+#include "iso/iso8859-3.h"
+#include "iso/iso8859-4.h"
+#include "iso/iso8859-5.h"
+#include "iso/iso8859-6.h"
+#include "iso/iso8859-7.h"
+#include "iso/iso8859-8.h"
+#include "iso/iso8859-9.h"
+#include "iso/iso8859-10.h"
+#include "iso/iso8859-14.h"
+#include "iso/iso8859-15.h"
+#include "iso/koi8-r.h"
+#include "iso/koi8-u.h"
+#include "msft/cp932.h"
+#include "jis/shiftjis.h"
+
+static struct {
+	unsigned short *table;
+	char *name;
+	int type;		/* type of table */
+	unsigned int bit;	/* assigned bit */
+} tables[] = {
+	{ iso8859_2_table, "iso-8859-2", 0, 0} ,
+	{ iso8859_3_table,  "iso-8859-3", 0, 0} ,
+	{ iso8859_4_table, "iso-8859-4", 0, 0},
+	{ iso8859_5_table, "iso-8859-5", 0, 0},
+/* apparently -6 has special digits? */
+	{ iso8859_6_table, "iso-8859-6", 0, 0},
+	{ iso8859_7_table, "iso-8859-7", 0, 0},
+	{ iso8859_8_table, "iso-8859-8", 0, 0},
+	{ iso8859_9_table, "iso-8859-9", 0, 0},
+	{ iso8859_10_table, "iso-8859-10", 0, 0},
+	{ iso8859_14_table, "iso-8859-14", 0, 0},
+	{ iso8859_15_table, "iso-8859-15", 0, 0},
+	{ koi8_r_table, "koi8-r", 0, 0},
+	{ koi8_u_table, "koi8-u", 0, 0},
+	{ cp932_table, "CP932", 1, 0},
+	{ sjis_table, "Shift-JIS", 1, 0},
+	{ 0, 0}
+};
+
+unsigned int encoding_map[256 * 256];
+
+static void
+add_bigmap(unsigned short **table, int bit)
+{
+	int i;
+	int j;
+
+	for (i=0;i<256;i++) {
+		unsigned short *tab = table[i];
+		if (tab) {
+			for (j=0;j<256;j++) {
+				if (tab[j])
+					encoding_map[tab[j]] |= bit;
+			}
+		}
+	}
+}
+
+main()
+{
+	int i, j;
+	unsigned short *tab;
+	int max, min;
+	int bit = 0x01;
+	int k;
+	int bytes;
+
+#if 0
+	/* iso-latin-1 (not needed-detected in code) */
+	for (i=0;i<256;i++) {
+		encoding_map[i] |= bit;
+	}
+	bit <<= 1;
+#endif
+
+	/* dont count the terminator */
+	bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8;
+
+	/* the other latin charsets */
+	for (j=0;tables[j].table;j++) {
+		switch (tables[j].type) {
+		case 0:		/* table from 128-256 */
+			tab = tables[j].table;
+			for (i=0;i<128;i++) {
+				/* 0-127 is the common */
+				encoding_map[i] |= bit;
+				encoding_map[tab[i]] |= bit;
+			}
+			break;
+		case 1:		/* sparse table */
+			add_bigmap(tables[j].table, bit);
+			break;
+		}
+		tables[j].bit = bit;
+		bit <<= 1;
+	}
+
+	printf("/* This file is automatically generated: DO NOT EDIT */\n\n");
+
+	for (i=0;i<256;i++) {
+		/* first, do we need this block? */
+		for (k=0;k<bytes;k++) {
+			for (j=0;j<256;j++) {
+				if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
+					break;
+			}
+			if (j < 256) {
+				/* yes, dump it */
+				printf("static unsigned char m%02x%x[256] = {\n\t", i, k);
+				for (j=0;j<256;j++) {
+					printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff );
+					if (((j+1)&7) == 0 && j<255)
+						printf("\n\t");
+				}
+				printf("\n};\n\n");
+			}
+		}
+	}
+
+	printf("struct {\n");
+	for (k=0;k<bytes;k++) {
+		printf("\tunsigned char *bits%d;\n", k);
+	}
+	printf("} camel_charmap[256] = {\n\t");
+	for (i=0;i<256;i++) {
+		/* first, do we need this block? */
+		printf("{ ");
+		for (k=0;k<bytes;k++) {
+			for (j=0;j<256;j++) {
+				if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0)
+					break;
+			}
+			if (j < 256) {
+				printf("m%02x%x, ", i, k);
+			} else {
+				printf("0, ");
+			}
+		}
+		printf("}, ");
+		if (((i+1)&7) == 0 && i<255)
+			printf("\n\t");
+	}
+	printf("\n};\n\n");
+
+	printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
+	for (j=0;tables[j].table;j++) {
+		printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
+	}
+	printf("};\n\n");
+
+	printf("#define charset_mask(x) \\\n");
+	for (k=0;k<bytes;k++) {
+		if (k!=0)
+			printf("\t| ");
+		else
+			printf("\t");
+		printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8);
+		if (k<bytes-1)
+			printf("\t\\\n");
+	}
+	printf("\n\n");
+
+}
+
+#else
+
+#include "camel-charset-map.h"
+#include "camel-charset-map-private.h"
+#include <unicode.h>
+#include <glib.h>
+
+unsigned int
+camel_charset_mask(unsigned int c)
+{
+	if (c>0xffff)
+		return 0;
+
+	return charset_mask(c);
+}
+
+/* gets the best charset from the mask of chars in it */
+const char *
+camel_charset_best_mask(unsigned int mask)
+{
+	int i;
+
+	for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) {
+		if (camel_charinfo[i].bit & mask)
+			return camel_charinfo[i].name;
+	}
+	return "UTF-8";
+}
+
+/* finds the minimum charset for this string NULL means US-ASCII */
+const char *
+camel_charset_best(const char *in, int len)
+{
+	int i;
+	unsigned int mask = ~0;
+	int level = 0;
+	const char *inptr = in, *inend = in+len;
+
+	/* check what charset a given string will fit in */
+	while (inptr < inend) {
+		unicode_char_t c;
+		const char *newinptr;
+		newinptr = unicode_get_utf8(inptr, &c);
+		if (newinptr == NULL) {
+			inptr++;
+			continue;
+		}
+		inptr = newinptr;
+		if (c<=0xffff) {
+			mask |= camel_charset_mask(c);
+		
+			if (c>=128 && c<256)
+				level = MAX(level, 1);
+			else if (c>=256)
+				level = MAX(level, 2);
+		} else {
+			mask = 0;
+			level = MAX(level, 2);
+		}
+	}
+
+	switch(level) {
+	case 0:
+		return NULL;
+	case 1:
+		return "ISO-8859-1";
+	case 2:
+		return camel_charset_best_mask(mask);
+	}
+}
+
+
+#endif /* !BUILD_MAP */
+
author	Not Zed <NotZed@HelixCode.com>	2000-09-28 19:31:29 +0800
committer	Michael Zucci <zucchi@src.gnome.org>	2000-09-28 19:31:29 +0800
commit	e71de4c02186efe577412709346154df96e35054 (patch)
tree	125ad7e9d02cf42bf8acebefed08c39b8df675e5 /camel/camel-charset-map.c
parent	18579aa7e01b4d313d2cf05f355aa4698b99cf7d (diff)
download	gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.gz gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.bz2 gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.lz gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.xz gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.tar.zst gsoc2013-evolution-e71de4c02186efe577412709346154df96e35054.zip