aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-utf8.c
diff options
context:
space:
mode:
authornobody <nobody@localhost>2003-05-03 19:02:31 +0800
committernobody <nobody@localhost>2003-05-03 19:02:31 +0800
commitd34577c91655ef19466b05f9270e2bc5c4ab83fa (patch)
tree1445967f5c0ef4c749bc1b4030295ea1a12e3c00 /camel/camel-utf8.c
parent19f2626e65d1700ff9c631a70ecb917f98dfcb38 (diff)
downloadgsoc2013-evolution-R3_1.tar
gsoc2013-evolution-R3_1.tar.gz
gsoc2013-evolution-R3_1.tar.bz2
gsoc2013-evolution-R3_1.tar.lz
gsoc2013-evolution-R3_1.tar.xz
gsoc2013-evolution-R3_1.tar.zst
gsoc2013-evolution-R3_1.zip
This commit was manufactured by cvs2svn to create tag 'R3_1'.R3_1
svn path=/tags/R3_1/; revision=21091
Diffstat (limited to 'camel/camel-utf8.c')
-rw-r--r--camel/camel-utf8.c313
1 files changed, 0 insertions, 313 deletions
diff --git a/camel/camel-utf8.c b/camel/camel-utf8.c
deleted file mode 100644
index 3c7af65b4d..0000000000
--- a/camel/camel-utf8.c
+++ /dev/null
@@ -1,313 +0,0 @@
-
-#include <glib.h>
-#include "camel-utf8.h"
-
-/**
- * camel_utf8_putc:
- * @ptr:
- * @c:
- *
- * Output a 32 bit unicode character as utf8 octets. At most 4 octets will
- * be written to @ptr. @ptr will be advanced to the next character position.
- **/
-void
-camel_utf8_putc(unsigned char **ptr, guint32 c)
-{
- register unsigned char *p = *ptr;
-
- if (c <= 0x7f)
- *p++ = c;
- else if (c <= 0x7ff) {
- *p++ = 0xc0 | c >> 6;
- *p++ = 0x80 | (c & 0x3f);
- } else if (c <= 0xffff) {
- *p++ = 0xe0 | c >> 12;
- *p++ = 0x80 | ((c >> 6) & 0x3f);
- *p++ = 0x80 | (c & 0x3f);
- } else {
- /* see unicode standard 3.0, S 3.8, max 4 octets */
- *p++ = 0xf0 | c >> 18;
- *p++ = 0x80 | ((c >> 12) & 0x3f);
- *p++ = 0x80 | ((c >> 6) & 0x3f);
- *p++ = 0x80 | (c & 0x3f);
- }
-
- *ptr = p;
-}
-
-/**
- * camel_utf8_getc:
- * @ptr:
- *
- * Get a Unicode character from a utf8 stream. @ptr will be advanced
- * to the next character position. Invalid utf8 characters will be
- * silently skipped. @ptr should point to a NUL terminated array.
- *
- * Return value: The next Unicode character. @ptr will be advanced to
- * the next character always.
- **/
-guint32
-camel_utf8_getc(const unsigned char **ptr)
-{
- register unsigned char *p = (unsigned char *)*ptr;
- register unsigned char c, r;
- register guint32 v, m;
-
-again:
- r = *p++;
-loop:
- if (r < 0x80) {
- *ptr = p;
- v = r;
- } else if (r < 0xf8) { /* valid start char? (max 4 octets) */
- v = r;
- m = 0x7f80; /* used to mask out the length bits */
- do {
- c = *p++;
- if ((c & 0xc0) != 0x80) {
- r = c;
- goto loop;
- }
- v = (v<<6) | (c & 0x3f);
- r<<=1;
- m<<=5;
- } while (r & 0x40);
-
- *ptr = p;
-
- v &= ~m;
- } else {
- goto again;
- }
-
- return v;
-}
-
-/**
- * camel_utf8_getc_limit:
- * @ptr:
- * @end: must not be NULL.
- *
- * Get the next utf8 char at @ptr, and return it, advancing @ptr to
- * the next character. If @end is reached before a full utf8
- * character can be read, then the invalid Unicode char 0xffff is
- * returned as a sentinel (Unicode 3.1, section 2.7), and @ptr is not
- * advanced.
- *
- * Return value: The next utf8 char, or 0xffff.
- **/
-guint32
-camel_utf8_getc_limit(const unsigned char **ptr, const unsigned char *end)
-{
- register unsigned char *p = (unsigned char *)*ptr;
- register unsigned char c, r;
- register guint32 v = 0xffff, m;
-
-again:
- while (p < end) {
- r = *p++;
-loop:
- if (r < 0x80) {
- *ptr = p;
- return r;
- } else if (r < 0xf8) { /* valid start char? (max 4 octets) */
- v = r;
- m = 0x7f80; /* used to mask out the length bits */
- do {
- if (p >= end)
- return 0xffff;
-
- c = *p++;
- if ((c & 0xc0) != 0x80) {
- r = c;
- goto loop;
- }
- v = (v<<6) | (c & 0x3f);
- r<<=1;
- m<<=5;
- } while (r & 0x40);
-
- *ptr = p;
-
- v &= ~m;
- return v;
- } else {
- goto again;
- }
- }
-
- return 0xffff;
-}
-
-void
-g_string_append_u(GString *out, guint32 c)
-{
- unsigned char buffer[8];
- unsigned char *p = buffer;
-
- camel_utf8_putc(&p, c);
- *p = 0;
- g_string_append(out, buffer);
-}
-
-static char *utf7_alphabet =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
-
-static unsigned char utf7_rank[256] = {
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e,0x3f,0xff,0xff,0xff,
- 0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
- 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff,
- 0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
- 0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
-};
-
-/**
- * camel_utf7_utf8:
- * @ptr:
- *
- * Convert a modified utf7 string to utf8. If the utf7 string
- * contains 8 bit characters, they are treated as iso-8859-1.
- *
- * The IMAP rules [rfc2060] are used in the utf7 encoding.
- *
- * Return value: The converted string.
- **/
-char *
-camel_utf7_utf8(const char *ptr)
-{
- const unsigned char *p = (unsigned char *)ptr;
- unsigned int c;
- guint32 v=0, x;
- GString *out;
- int i=0;
- int state = 0;
- char *ret;
-
- out = g_string_new("");
- do {
- c = *p++;
- switch(state) {
- case 0:
- if (c == '&')
- state = 1;
- else
- g_string_append_u(out, c);
- break;
- case 1:
- if (c == '-') {
- g_string_append_c(out, '&');
- state = 0;
- } else if (utf7_rank[c] != 0xff) {
- v = utf7_rank[c];
- i = 6;
- state = 2;
- } else {
- /* invalid */
- g_string_append(out, "&-");
- state = 0;
- }
- break;
- case 2:
- if (c == '-') {
- state = 0;
- } else if (utf7_rank[c] != 0xff) {
- v = (v<<6) | utf7_rank[c];
- i+=6;
- if (i >= 16) {
- x = (v >> (i-16)) & 0xffff;
- g_string_append_u(out, x);
- i-=16;
- }
- } else {
- g_string_append_u(out, c);
- state = 0;
- }
- break;
- }
- } while (c);
-
- ret = g_strdup(out->str);
- g_string_free(out, TRUE);
-
- return ret;
-}
-
-static void utf7_closeb64(GString *out, guint32 v, guint32 i)
-{
- guint32 x;
-
- if (i>0) {
- x = (v << (6-i)) & 0x3f;
- g_string_append_c(out, utf7_alphabet[x]);
- }
- g_string_append_c(out, '-');
-}
-
-/**
- * camel_utf8_utf7:
- * @ptr:
- *
- * Convert a utf8 string to a modified utf7 format.
- *
- * The IMAP rules [rfc2060] are used in the utf7 encoding.
- *
- * Return value:
- **/
-char *
-camel_utf8_utf7(const char *ptr)
-{
- const unsigned char *p = (unsigned char *)ptr;
- unsigned int c;
- guint32 x, v = 0;
- int state = 0;
- GString *out;
- int i = 0;
- char *ret;
-
- out = g_string_new("");
-
- while ( (c = camel_utf8_getc(&p)) ) {
- if (c >= 0x20 && c <= 0x7e) {
- if (state == 1) {
- utf7_closeb64(out, v, i);
- state = 0;
- i = 0;
- }
- if (c == '&')
- g_string_append(out, "&-");
- else
- g_string_append_c(out, c);
- } else {
- if (state == 0) {
- g_string_append_c(out, '&');
- state = 1;
- }
- v = (v << 16) | c;
- i += 16;
- while (i >= 6) {
- x = (v >> (i-6)) & 0x3f;
- g_string_append_c(out, utf7_alphabet[x]);
- i -= 6;
- }
- }
- }
-
- if (state == 1)
- utf7_closeb64(out, v, i);
-
- ret = g_strdup(out->str);
- g_string_free(out, TRUE);
-
- return ret;
-}