From ab7d6ef097bf18a3ebb97c4d73947fe8d2f059c9 Mon Sep 17 00:00:00 2001 From: nobody Date: Sat, 28 Dec 2002 04:49:29 +0000 Subject: This commit was manufactured by cvs2svn to create tag 'GHEX_2_0_0'. svn path=/tags/GHEX_2_0_0/; revision=19188 --- camel/camel-charset-map.c | 354 ---------------------------------------------- 1 file changed, 354 deletions(-) delete mode 100644 camel/camel-charset-map.c (limited to 'camel/camel-charset-map.c') diff --git a/camel/camel-charset-map.c b/camel/camel-charset-map.c deleted file mode 100644 index be57d882e8..0000000000 --- a/camel/camel-charset-map.c +++ /dev/null @@ -1,354 +0,0 @@ -/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; -*- */ - -/* - * Authors: - * Michael Zucchi - * Dan Winship - * - * Copyright 2000, 2001 Ximian, Inc. (www.ximian.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include - -/* - if you want to build the charset map, compile this with something like: - gcc -DBUILD_MAP camel-charset-map.c `glib-config --cflags` - (plus any -I/-L/-l flags you need for iconv), then run it as - ./a.out > camel-charset-map-private.h - - Note that the big-endian variant isn't tested... - - The tables genereated work like this: - - An indirect array for each page of unicode character - Each array element has an indirect pointer to one of the bytes of - the generated bitmask. -*/ - -#ifdef BUILD_MAP -#include -#include - -static struct { - char *name; - unsigned int bit; /* assigned bit */ -} tables[] = { - /* These are the 8bit character sets (other than iso-8859-1, - * which is special-cased) which are supported by both other - * mailers and the GNOME environment. Note that the order - * they're listed in is the order they'll be tried in, so put - * the more-popular ones first. - */ - { "iso-8859-2", 0 }, /* Central/Eastern European */ - { "iso-8859-4", 0 }, /* Baltic */ - { "koi8-r", 0 }, /* Russian */ - { "koi8-u", 0 }, /* Ukranian */ - { "iso-8859-5", 0 }, /* Least-popular Russian encoding */ - { "iso-8859-7", 0 }, /* Greek */ - { "iso-8859-8", 0 }, /* Hebrew; Visual */ - { "iso-8859-9", 0 }, /* Turkish */ - { "iso-8859-13", 0 }, /* Baltic again */ - { "iso-8859-15", 0 }, /* New-and-improved iso-8859-1, but most - * programs that support this support UTF8 - */ - { "windows-1251", 0 }, /* Russian */ - { 0, 0 } -}; - -unsigned int encoding_map[256 * 256]; - -#if G_BYTE_ORDER == G_BIG_ENDIAN -#define UCS "UCS-4BE" -#else -#define UCS "UCS-4LE" -#endif - -int main (void) -{ - int i, j; - int max, min; - int bit = 0x01; - int k; - int bytes; - iconv_t cd; - char in[128]; - guint32 out[128]; - char *inptr, *outptr; - size_t inlen, outlen; - - /* dont count the terminator */ - bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8; - - for (i = 0; i < 128; i++) - in[i] = i + 128; - - for (j = 0; tables[j].name; j++) { - cd = iconv_open (UCS, tables[j].name); - inptr = in; - outptr = (char *)(out); - inlen = sizeof (in); - outlen = sizeof (out); - while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) { - if (errno == EILSEQ) { - inptr++; - inlen--; - } else { - printf ("%s\n", strerror (errno)); - exit (1); - } - } - iconv_close (cd); - - for (i = 0; i < 128 - outlen / 4; i++) { - encoding_map[i] |= bit; - encoding_map[out[i]] |= bit; - } - - tables[j].bit = bit; - bit <<= 1; - } - - printf("/* This file is automatically generated: DO NOT EDIT */\n\n"); - - for (i=0;i<256;i++) { - /* first, do we need this block? */ - for (k=0;k> (k*8)) & 0xff ); - if (((j+1)&7) == 0 && j<255) - printf("\n\t"); - } - printf("\n};\n\n"); - } - } - } - - printf("struct {\n"); - for (k=0;k>8].bits%d?camel_charmap[(x)>>8].bits%d[(x)&0xff]<<%d:0)", k, k, k*8); - if (k -#include -#include -#include -#include -#ifdef ENABLE_THREADS -#include -#endif -#ifdef HAVE_ALLOCA_H -#include -#endif - -void -camel_charset_init (CamelCharset *c) -{ - c->mask = ~0; - c->level = 0; -} - -void -camel_charset_step (CamelCharset *c, const char *in, int len) -{ - register unsigned int mask; - register int level; - const char *inptr = in, *inend = in+len; - - mask = c->mask; - level = c->level; - - /* check what charset a given string will fit in */ - while (inptr < inend) { - gunichar c; - const char *newinptr; - newinptr = g_utf8_next_char(inptr); - c = g_utf8_get_char(inptr); - if (newinptr == NULL || !g_unichar_validate (c)) { - inptr++; - continue; - } - - inptr = newinptr; - if (c<=0xffff) { - mask &= charset_mask(c); - - if (c>=128 && c<256) - level = MAX(level, 1); - else if (c>=256) - level = MAX(level, 2); - } else { - mask = 0; - level = MAX(level, 2); - } - } - - c->mask = mask; - c->level = level; -} - -/* gets the best charset from the mask of chars in it */ -static const char * -camel_charset_best_mask(unsigned int mask) -{ - int i; - - for (i=0;ilevel == 1) - return "ISO-8859-1"; - else if (charset->level == 2) - return camel_charset_best_mask (charset->mask); - else - return NULL; - -} - -/* finds the minimum charset for this string NULL means US-ASCII */ -const char * -camel_charset_best (const char *in, int len) -{ - CamelCharset charset; - - camel_charset_init (&charset); - camel_charset_step (&charset, in, len); - return camel_charset_best_name (&charset); -} - - -/** - * camel_charset_iso_to_windows: - * @isocharset: an ISO charset - * - * Returns the equivalent Windows charset. - **/ -const char * -camel_charset_iso_to_windows (const char *isocharset) -{ - /* According to http://czyborra.com/charsets/codepages.html, - * the charset mapping is as follows: - * - * us-ascii maps to windows-cp1252 - * iso-8859-1 maps to windows-cp1252 - * iso-8859-2 maps to windows-cp1250 - * iso-8859-3 maps to windows-cp???? - * iso-8859-4 maps to windows-cp???? - * iso-8859-5 maps to windows-cp1251 - * iso-8859-6 maps to windows-cp1256 - * iso-8859-7 maps to windows-cp1253 - * iso-8859-8 maps to windows-cp1255 - * iso-8859-9 maps to windows-cp1254 - * iso-8859-10 maps to windows-cp???? - * iso-8859-11 maps to windows-cp???? - * iso-8859-12 maps to windows-cp???? - * iso-8859-13 maps to windows-cp1257 - * - * Assumptions: - * - I'm going to assume that since iso-8859-4 and - * iso-8859-13 are Baltic that it also maps to - * windows-cp1257. - */ - - if (!strcasecmp (isocharset, "iso-8859-1") || !strcasecmp (isocharset, "us-ascii")) - return "windows-cp1252"; - else if (!strcasecmp (isocharset, "iso-8859-2")) - return "windows-cp1250"; - else if (!strcasecmp (isocharset, "iso-8859-4")) - return "windows-cp1257"; - else if (!strcasecmp (isocharset, "iso-8859-5")) - return "windows-cp1251"; - else if (!strcasecmp (isocharset, "iso-8859-6")) - return "windows-cp1256"; - else if (!strcasecmp (isocharset, "iso-8859-7")) - return "windows-cp1253"; - else if (!strcasecmp (isocharset, "iso-8859-8")) - return "windows-cp1255"; - else if (!strcasecmp (isocharset, "iso-8859-9")) - return "windows-cp1254"; - else if (!strcasecmp (isocharset, "iso-8859-13")) - return "windows-cp1257"; - - return isocharset; -} - -#endif /* !BUILD_MAP */ - -- cgit v1.2.3