#include <glib.h>
#include "camel-utf8.h"

/**
 * camel_utf8_putc:
 * @ptr: 
 * @c: 
 * 
 * Output a 32 bit unicode character as utf8 octets.  At most 4 octets will
 * be written to @ptr.  @ptr will be advanced to the next character position.
 **/
void
camel_utf8_putc(unsigned char **ptr, guint32 c)
{
	register unsigned char *p = *ptr;

	if (c <= 0x7f)
		*p++ = c;
	else if (c <= 0x7ff) {
		*p++ = 0xc0 | c >> 6;
		*p++ = 0x80 | (c & 0x3f);
	} else if (c <= 0xffff) {
		*p++ = 0xe0 | c >> 12;
		*p++ = 0x80 | ((c >> 6) & 0x3f);
		*p++ = 0x80 | (c & 0x3f);
	} else {
		/* see unicode standard 3.0, S 3.8, max 4 octets */
		*p++ = 0xf0 | c >> 18;
		*p++ = 0x80 | ((c >> 12) & 0x3f);
		*p++ = 0x80 | ((c >> 6) & 0x3f);
		*p++ = 0x80 | (c & 0x3f);
	}

	*ptr = p;
}

/**
 * camel_utf8_getc:
 * @ptr: 
 * 
 * Get a Unicode character from a utf8 stream.  @ptr will be advanced
 * to the next character position.  Invalid utf8 characters will be
 * silently skipped.  @ptr should point to a NUL terminated array.
 * 
 * Return value: The next Unicode character.  @ptr will be advanced to
 * the next character always.
 **/
guint32
camel_utf8_getc(const unsigned char **ptr)
{
	register unsigned char *p = (unsigned char *)*ptr;
	register unsigned char c, r;
	register guint32 v, m;

again:
	r = *p++;
loop:
	if (r < 0x80) {
		*ptr = p;
		v = r;
	} else if (r < 0xf8) { /* valid start char? (max 4 octets) */
		v = r;
		m = 0x7f80;	/* used to mask out the length bits */
		do {
			c = *p++;
			if ((c & 0xc0) != 0x80) {
				r = c;
				goto loop;
			}
			v = (v<<6) | (c & 0x3f);
			r<<=1;
			m<<=5;
		} while (r & 0x40);
		
		*ptr = p;

		v &= ~m;
	} else {
		goto again;
	}

	return v;
}

void
g_string_append_u(GString *out, guint32 c)
{
	unsigned char buffer[8];
	unsigned char *p = buffer;

	camel_utf8_putc(&p, c);
	*p = 0;
	g_string_append(out, buffer);
}

static char *utf7_alphabet =
	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";

static unsigned char utf7_rank[256] = {
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e,0x3f,0xff,0xff,0xff,
	0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
	0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff,
	0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
	0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
};

/**
 * camel_utf7_utf8:
 * @ptr: 
 * 
 * Convert a modified utf7 string to utf8.  If the utf7 string
 * contains 8 bit characters, they are treated as iso-8859-1.
 * 
 * The IMAP rules [rfc2060] are used in the utf7 encoding.
 *
 * Return value: The converted string.
 **/
char *
camel_utf7_utf8(const char *ptr)
{
	const unsigned char *p = (unsigned char *)ptr;
	unsigned int c;
	guint32 v=0, x;
	GString *out;
	int i=0;
	int state = 0;
	char *ret;

	out = g_string_new("");
	do {
		c = *p++;
		switch(state) {
		case 0:
			if (c == '&')
				state = 1;
			else
				g_string_append_u(out, c);
			break;
		case 1:
			if (c == '-') {
				g_string_append_c(out, '&');
				state = 0;
			} else if (utf7_rank[c] != 0xff) {
				v = utf7_rank[c];
				i = 6;
				state = 2;
			} else {
				/* invalid */
				g_string_append(out, "&-");
				state = 0;
			}
			break;
		case 2:
			if (c == '-') {
				state = 0;
			} else if (utf7_rank[c] != 0xff) {
				v = (v<<6) | utf7_rank[c];
				i+=6;
				if (i >= 16) {
					x = (v >> (i-16)) & 0xffff;
					g_string_append_u(out, x);
					i-=16;
				}
			} else {
				g_string_append_u(out, c);
				state = 0;
			}
			break;
		}
	} while (c);

	ret = g_strdup(out->str);
	g_string_free(out, TRUE);

	return ret;
}

static void utf7_closeb64(GString *out, guint32 v, guint32 i)
{
	guint32 x;

	if (i>0) {
		x = (v << (6-i)) & 0x3f;
		g_string_append_c(out, utf7_alphabet[x]);
	}
	g_string_append_c(out, '-');
}

/**
 * camel_utf8_utf7:
 * @ptr: 
 * 
 * Convert a utf8 string to a modified utf7 format.
 *
 * The IMAP rules [rfc2060] are used in the utf7 encoding.
 * 
 * Return value: 
 **/
char *
camel_utf8_utf7(const char *ptr)
{
	const unsigned char *p = (unsigned char *)ptr;
	unsigned int c;
	guint32 x, v = 0;
	int state = 0;
	GString *out;
	int i = 0;
	char *ret;

	out = g_string_new("");

	while ( (c = camel_utf8_getc(&p)) ) {
		if (c >= 0x20 && c <= 0x7e) {
			if (state == 1) {
				utf7_closeb64(out, v, i);
				state = 0;
				i = 0;
			}
			if (c == '&')
				g_string_append(out, "&-");
			else
				g_string_append_c(out, c);
		} else {
			if (state == 0) {
				g_string_append_c(out, '&');
				state = 1;
			}
			v = (v << 16) | c;
			i += 16;
			while (i >= 6) {
				x = (v >> (i-6)) & 0x3f;
				g_string_append_c(out, utf7_alphabet[x]);
				i -= 6;
			}
		}
	}

	if (state == 1)
		utf7_closeb64(out, v, i);

	ret = g_strdup(out->str);
	g_string_free(out, TRUE);

	return ret;
}