diff options
Diffstat (limited to 'camel/camel-charset-map.c')
-rw-r--r-- | camel/camel-charset-map.c | 322 |
1 files changed, 0 insertions, 322 deletions
diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c deleted file mode 100644 index d5d7665dac..0000000000 --- a/camel/camel-charset-map.c +++ /dev/null @@ -1,322 +0,0 @@ - -#include <stdio.h> - -/* - if you want to build the charset map, add the root directory of - libunicode to the include path and define BUILD_MAP, - then run it as - ./a.out > camel-charset-map-private.h - - The tables genereated work like this: - - An indirect array for each page of unicode character - Each array element has an indirect pointer to one of the bytes of - the generated bitmask. -*/ - -#ifdef BUILD_MAP -#include "iso/iso8859-2.h" -#include "iso/iso8859-3.h" -#include "iso/iso8859-4.h" -#include "iso/iso8859-5.h" -#include "iso/iso8859-6.h" -#include "iso/iso8859-7.h" -#include "iso/iso8859-8.h" -#include "iso/iso8859-9.h" -#include "iso/iso8859-10.h" -#include "iso/iso8859-13.h" -#include "iso/iso8859-14.h" -#include "iso/iso8859-15.h" -#include "iso/windows-1250.h" -#include "iso/windows-1252.h" -#include "iso/windows-1257.h" -#include "iso/koi8-r.h" -#include "iso/koi8-u.h" -#include "iso/tis620.2533-1.h" -#include "iso/armscii-8.h" -#include "iso/georgian-academy.h" -#include "iso/georgian-ps.h" -#include "msft/cp932.h" -#include "jis/shiftjis.h" - -static struct { - unsigned short *table; - char *name; - int type; /* type of table */ - unsigned int bit; /* assigned bit */ -} tables[] = { - { iso8859_2_table, "iso-8859-2", 0, 0} , - { iso8859_3_table, "iso-8859-3", 0, 0} , - { iso8859_4_table, "iso-8859-4", 0, 0}, - { iso8859_5_table, "iso-8859-5", 0, 0}, -/* apparently -6 has special digits? */ - { iso8859_6_table, "iso-8859-6", 0, 0}, - { iso8859_7_table, "iso-8859-7", 0, 0}, - { iso8859_8_table, "iso-8859-8", 0, 0}, - { iso8859_9_table, "iso-8859-9", 0, 0}, - { iso8859_10_table, "iso-8859-10", 0, 0}, - { iso8859_13_table, "iso-8859-13", 0, 0}, - { iso8859_14_table, "iso-8859-14", 0, 0}, - { iso8859_15_table, "iso-8859-15", 0, 0}, - { windows_1250_table, "windows-1250", 0, 0}, - { windows_1252_table, "windows-1252", 0, 0}, - { windows_1257_table, "windows-1257", 0, 0}, - { koi8_r_table, "koi8-r", 0, 0}, - { koi8_u_table, "koi8-u", 0, 0}, - { tis_620_table, "tis620.2533-1", 0, 0}, - { armscii_8_table, "armscii-8", 0, 0}, - { georgian_academy_table, "georgian-academy", 0, 0}, - { georgian_ps_table, "georgian-ps", 0, 0}, - { cp932_table, "CP932", 1, 0}, - { sjis_table, "Shift-JIS", 1, 0}, - { 0, 0} -}; - -unsigned int encoding_map[256 * 256]; - -static void -add_bigmap(unsigned short **table, int bit) -{ - int i; - int j; - - for (i=0;i<256;i++) { - unsigned short *tab = table[i]; - if (tab) { - for (j=0;j<256;j++) { - if (tab[j]) - encoding_map[tab[j]] |= bit; - } - } - } -} - -void main(void) -{ - int i, j; - unsigned short *tab; - int max, min; - int bit = 0x01; - int k; - int bytes; - -#if 0 - /* iso-latin-1 (not needed-detected in code) */ - for (i=0;i<256;i++) { - encoding_map[i] |= bit; - } - bit <<= 1; -#endif - - /* dont count the terminator */ - bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8; - - /* the other latin charsets */ - for (j=0;tables[j].table;j++) { - switch (tables[j].type) { - case 0: /* table from 128-256 */ - tab = tables[j].table; - for (i=0;i<128;i++) { - /* 0-127 is the common */ - encoding_map[i] |= bit; - encoding_map[tab[i]] |= bit; - } - break; - case 1: /* sparse table */ - add_bigmap(tables[j].table, bit); - break; - } - tables[j].bit = bit; - bit <<= 1; - } - - printf("/* This file is automatically generated: DO NOT EDIT */\n\n"); - - for (i=0;i<256;i++) { - /* first, do we need this block? */ - for (k=0;k<bytes;k++) { - for (j=0;j<256;j++) { - if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0) - break; - } - if (j < 256) { - /* yes, dump it */ - printf("static unsigned char m%02x%x[256] = {\n\t", i, k); - for (j=0;j<256;j++) { - printf("0x%02x, ", (encoding_map[i*256+j] >> (k*8)) & 0xff ); - if (((j+1)&7) == 0 && j<255) - printf("\n\t"); - } - printf("\n};\n\n"); - } - } - } - - printf("struct {\n"); - for (k=0;k<bytes;k++) { - printf("\tunsigned char *bits%d;\n", k); - } - printf("} camel_charmap[256] = {\n\t"); - for (i=0;i<256;i++) { - /* first, do we need this block? */ - printf("{ "); - for (k=0;k<bytes;k++) { - for (j=0;j<256;j++) { - if ((encoding_map[i*256 + j] & (0xff << (k*8))) != 0) - break; - } - if (j < 256) { - printf("m%02x%x, ", i, k); - } else { - printf("0, "); - } - } - printf("}, "); - if (((i+1)&7) == 0 && i<255) - printf("\n\t"); - } - printf("\n};\n\n"); - - printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n"); - for (j=0;tables[j].table;j++) { - printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit); - } - printf("};\n\n"); - - printf("#define charset_mask(x) \\\n"); - for (k=0;k<bytes;k++) { - if (k!=0) - printf("\t| "); - else - printf("\t"); - printf("(camel_charmap[(x)>>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8); - if (k<bytes-1) - printf("\t\\\n"); - } - printf("\n\n"); - -} - -#else - -#include "camel-charset-map.h" -#include "camel-charset-map-private.h" -#include <unicode.h> -#include <locale.h> -#include <glib.h> - -void camel_charset_init(CamelCharset *c) -{ - c->mask = ~0; - c->level = 0; -} - -void -camel_charset_step(CamelCharset *c, const char *in, int len) -{ - register unsigned int mask; - register int level; - const char *inptr = in, *inend = in+len; - - mask = c->mask; - level = c->level; - - /* check what charset a given string will fit in */ - while (inptr < inend) { - unicode_char_t c; - const char *newinptr; - newinptr = unicode_get_utf8(inptr, &c); - if (newinptr == NULL) { - inptr++; - continue; - } - inptr = newinptr; - if (c<=0xffff) { - mask &= charset_mask(c); - - if (c>=128 && c<256) - level = MAX(level, 1); - else if (c>=256) - level = MAX(level, 2); - } else { - mask = 0; - level = MAX(level, 2); - } - } - - c->mask = mask; - c->level = level; -} - -/* gets the best charset from the mask of chars in it */ -static const char * -camel_charset_best_mask(unsigned int mask) -{ - int i; - - for (i=0;i<sizeof(camel_charinfo)/sizeof(camel_charinfo[0]);i++) { - if (camel_charinfo[i].bit & mask) - return camel_charinfo[i].name; - } - return "UTF-8"; -} - -const char *camel_charset_best_name(CamelCharset *charset) -{ - if (charset->level == 1) - return "ISO-8859-1"; - else if (charset->level == 2) - return camel_charset_best_mask(charset->mask); - else - return NULL; - -} - -/* finds the minimum charset for this string NULL means US-ASCII */ -const char * -camel_charset_best(const char *in, int len) -{ - CamelCharset charset; - - camel_charset_init(&charset); - camel_charset_step(&charset, in, len); - return camel_charset_best_name(&charset); -} - -char * -camel_charset_locale_name (void) -{ - char *locale, *charset; - - locale = setlocale (LC_ALL, NULL); - - if (!locale || !strcmp (locale, "C") || !strcmp (locale, "POSIX")) { - /* The locale "C" or "POSIX" is a portable locale; its - * LC_CTYPE part corresponds to the 7-bit ASCII character - * set. - */ - - return NULL; - } else { - /* A locale name is typically of the form language[_terri- - * tory][.codeset][@modifier], where language is an ISO 639 - * language code, territory is an ISO 3166 country code, and - * codeset is a character set or encoding identifier like - * ISO-8859-1 or UTF-8. - */ - char *p; - int len; - - p = strchr (locale, '@'); - len = p ? (p - locale) : strlen (locale); - if ((p = strchr (locale, '.'))) { - charset = g_strndup (p + 1, len - (p - locale) + 1); - g_strdown (charset); - } - } - - return charset; -} - -#endif /* !BUILD_MAP */ - |