diff options
Diffstat (limited to 'camel/gmime-rfc2047.c')
-rw-r--r-- | camel/gmime-rfc2047.c | 491 |
1 files changed, 0 insertions, 491 deletions
diff --git a/camel/gmime-rfc2047.c b/camel/gmime-rfc2047.c deleted file mode 100644 index 339a54d521..0000000000 --- a/camel/gmime-rfc2047.c +++ /dev/null @@ -1,491 +0,0 @@ -/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ -/* gmime-rfc2047.c: implemention of RFC2047 */ - -/* - * Author : - * Bertrand Guiheneuf <bertrand@helixcode.com> - * - * Copyright 1999, 2000 Helix Code, Inc. (http://www.helixcode.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -/* - * Authors: Robert Brady <rwb197@ecs.soton.ac.uk> - */ - -#include <stdio.h> -#include <ctype.h> -#include <unicode.h> -#include <string.h> - -#include "gmime-rfc2047.h" - -#define NOT_RANKED -1 - -/* This should be changed ASAP to use the base64 code Miguel comitted */ - -const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -static unsigned char base64_rank[256]; -static int base64_rank_table_built; -static void build_base64_rank_table (void); - -static int -hexval (gchar c) { - if (isdigit (c)) return c-'0'; - c = tolower (c); - return c - 'a' + 10; -} - -static gchar * -decode_quoted (const gchar *text, const gchar *end) -{ - gchar *to = malloc(end - text + 1), *to_2 = to; - - if (!to) return NULL; - while (*text && text < end) { - if (*text == '=') { - gchar a = hexval (text[1]); - gchar b = hexval (text[2]); - int c = (a << 4) + b; - *to = c; - to++; - text+=3; - } else if (*text == '_') { - *to = ' '; - to++; - text++; - } else { - *to = *text; - to++; - text++; - } - } - return to_2; -} - -static gchar * -decode_base64 (const gchar *data, const gchar *end) -{ - unsigned short pattern = 0; - int bits = 0; - int delimiter = '='; - gchar x; - gchar *buffer = g_malloc((end - data) * 3); - gchar *t = buffer; - int Q = 0; - - if (!buffer) return NULL; - - while (*data != delimiter) { - x = base64_rank[(unsigned char)(*data++)]; - if (x == NOT_RANKED) - continue; - pattern <<= 6; - pattern |= x; - bits += 6; - if (bits >= 8) { - x = (pattern >> (bits - 8)) & 0xff; - *t++ = x; - Q++; - bits -= 8; - } - } - *t = 0; - return buffer; -} - -static void -build_base64_rank_table (void) -{ - int i; - - if (!base64_rank_table_built) { - for (i = 0; i < 256; i++) - base64_rank[i] = NOT_RANKED; - for (i = 0; i < 64; i++) - base64_rank[(int) base64_alphabet[i]] = i; - base64_rank_table_built = 1; - } -} - - -static gchar * -rfc2047_decode_word (const gchar *data, const gchar *into_what) -{ - const char *charset = strstr (data, "=?"), *encoding, *text, *end; - - char *buffer, *b, *cooked_data; - - buffer = g_malloc (strlen(data) * 2); - b = buffer; - - if (!charset) return strdup (data); - charset+=2; - - encoding = strchr (charset, '?'); - if (!encoding) return strdup (data); - encoding++; - - text = strchr(encoding, '?'); - if (!text) return strdup (data); - text++; - - end = strstr(text, "?="); - if (!end) return strdup (data); - - b[0] = 0; - - if (toupper(*encoding)=='Q') - cooked_data = decode_quoted (text, end); - else if (toupper (*encoding)=='B') - cooked_data = decode_base64 (text, end); - else - return g_strdup(data); - - { - char *c = strchr (charset, '?'); - char *q = g_malloc (c - charset + 1); - char *cook_2 = cooked_data; - int cook_len = strlen (cook_2); - int b_len = 4096; - unicode_iconv_t i; - strncpy (q, charset, c - charset); - q[c - charset] = 0; - i = unicode_iconv_open (into_what, q); - if (!i) { - g_free (q); - return g_strdup (buffer); - } - if (unicode_iconv (i, &cook_2, &cook_len, &b, &b_len)==-1) - /* FIXME : use approximation code if we can't convert it properly. */ - ; - unicode_iconv_close (i); - *b = 0; - } - - return g_strdup (buffer); -} - -static const gchar * -find_end_of_encoded_word (const gchar *data) -{ - /* We can't just search for ?=, - because of the case : - "=?charset?q?=ff?=" :( */ - if (!data) return NULL; - data = strstr (data, "=?"); - if (!data) return NULL; - data = strchr(data+2, '?'); - if (!data) return NULL; - data = strchr (data+1, '?'); - if (!data) return NULL; - data = strstr (data+1, "?="); - if (!data) return NULL; - return data + 2; -} - -gchar * -gmime_rfc2047_decode (const gchar *data, const gchar *into_what) -{ - char *buffer = malloc (strlen(data) * 4), *b = buffer; - - int was_encoded_word = 0; - - build_base64_rank_table (); - - while (data && *data) { - char *word_start = strstr (data, "=?"), *decoded; - if (!word_start) { - strcpy (b, data); - b[strlen (data)] = 0; - return buffer; - } - if (word_start != data) { - - if (strspn (data, " \t\n\r") != (word_start - data)) { - strncpy (b, data, word_start - data); - b += word_start - data; - *b = 0; - } - } - decoded = rfc2047_decode_word (word_start, into_what); - strcpy (b, decoded); - b += strlen (decoded); - *b = 0; - g_free (decoded); - - data = find_end_of_encoded_word (data); - } - - *b = 0; - return buffer; -} - -#define isnt_ascii(a) ((a) <= 0x1f || (a) >= 0x7f) - -static int -rfc2047_clean (const gchar *string, const gchar *max) -{ - /* if (strstr (string, "?=")) return 1; */ - while (string < max) { - if (isnt_ascii ((unsigned char)*string)) - return 0; - string++; - } - return 1; -} - -static gchar * -encode_word (const gchar *string, int length, const gchar *said_charset) -{ - const gchar *max = string + length; - if (rfc2047_clean(string, max)) { - /* don't bother encoding it if it has no odd characters in it */ - return g_strndup (string, length); - } - { - char *temp = malloc (length * 4 + 1), *t = temp; - t += sprintf (t, "=?%s?q?", said_charset); - while (string < max) { - if (*string == ' ') - *(t++) = '_'; - else if ((*string <= 0x1f) || (*string >= 0x7f) || (*string == '=') || (*string == '?')) - t += sprintf (t, "=%2x", (unsigned char)*string); - else - *(t++) = *string; - - string++; - } - t += sprintf (t, "?="); - *t = 0; - return temp; - } -} - -static int -words_in(char *a) -{ - int words = 1; - while (*a) { - if (*(a++)==' ') - words++; - } - return words; -} - -struct word_data { - const char *word; - int word_length; - const char *to_encode_in; - char *encoded; - enum { - wt_None, - wt_Address, - } type; -}; - -static int string_can_fit_in(const char *a, int length, const char *charset) -{ - while (length--) { - if (*a < 0x1f || *a >= 0x7f) return 0; - a++; - } - return 1; -} - -static void -show_entry(struct word_data *a) -{ - a->type = wt_None; - - if (string_can_fit_in(a->word, a->word_length, "US-ASCII")) - a->to_encode_in = "US-ASCII"; - - if (a->word[0]=='<' && a->word[a->word_length-1]=='>') { - a->type = wt_Address; - } -} - -static void -break_into_words(const char *string, struct word_data *a, int words) -{ - int i; - for (i=0;i<words;i++) { - - char *next_space = strchr(string, ' '); - - if (!next_space) { - a[i].word = string; - a[i].word_length = strlen(string); - a[i].to_encode_in = NULL; /* i.e. the default */ - - show_entry(a+i); - - return; - } - - a[i].word = string; - a[i].word_length = next_space - string; - a[i].to_encode_in = NULL; - - show_entry(a+i); - - string = next_space + 1; - - } -} - -static void -join_words(struct word_data *a, int words) -{ - int i; - for (i=(words-1);i>0;i--) { - if (a[i].to_encode_in == a[i-1].to_encode_in) { - a[i-1].word_length += 1 + a[i].word_length; - a[i].word = 0; - a[i].word_length = 0; - } - - } -} - -static void show_words(struct word_data *words, int count) -{ - int i; - for (i=0;i<count;i++) - if (words[i].word) - show_entry(words+i); -} - -gchar * -gmime_rfc2047_encode (const gchar *string, const gchar *charset) -{ - int temp_len = strlen (string)*4 + 1, word_count; - char *temp = g_malloc (temp_len), *temp_2 = temp; - int string_length = strlen (string); - char *encoded = NULL, *p; - struct word_data *words; - - /* first, let us convert to UTF-8 */ - unicode_iconv_t i = unicode_iconv_open ("UTF-8", charset); - unicode_iconv (i, &string, &string_length, &temp_2, &temp_len); - unicode_iconv_close (i); - - /* null terminate it */ - *temp_2 = 0; - - /* now encode it as if it were a single word */ - - word_count = words_in ( temp ); - - words = g_malloc(sizeof (struct word_data) * word_count); - break_into_words(temp, words, word_count); - - join_words(words, word_count); - - show_words(words, word_count); - - { - size_t len = 0; - int c = 0; - for (c = 0;c<word_count;c++) { - if (words[c].word) - { - words[c].encoded = encode_word(words[c].word, words[c].word_length, - words[c].to_encode_in ? words[c].to_encode_in : - "UTF-8"); - len += strlen(words[c].encoded) + 1; - } - } - - { - encoded = g_malloc(len+1); - p = encoded; - for (c = 0; c < word_count;c++) if (words[c].word) { - strcpy(p, words[c].encoded); - p += strlen(p); - strcpy(p, " "); - p++; - } - *p = 0; - } - } - - - /* - - real algorithm : - - we need to - - split it into words - - identify portions that have NOT to be encoded (i.e. <> and the comment starter/ender ) - - identify the best character set for each word - - merge words which share a character set, allow jumping and merging with words which - would be ok to encode in non-US-ASCII. - - if we have to use 2 character sets, try and collapse them into one. - - (e.g. if one word contains letters in latin-1, and another letters in latin-2, use - latin-2 for the first word as well if possible). - - finally : - - if utf-8 will still be used, use it for everything. - - and then, at last, generate the encoded text, using base64/quoted-printable for - each word depending upon which is more efficient. - - TODO : - create a priority list of encodings - - i.e. - - US-ASCII, ISO-8859-1, ISO-8859-2, ISO-8859-3, KOI8, - - Should survey for most popular charsets : - what do people usually use for the following scripts? - - * Chinese/Japanese/Korean - * Greek - * Cyrillic - - (any other scripts commonly used in mail/news?) - - This algorithm is probably far from optimal, but should be - reasonably efficient for simple cases. (and almost free if - the text is just in US-ASCII : like 99% of the text that will - pass through it) - - - - current status : - - Algorithm now partially implemented. - - */ - - g_free(words); - g_free(temp); - - return encoded; -} |