1 files changed, 1199 insertions, 0 deletions
diff --git a/camel/camel-mime-parser.c b/camel/camel-mime-parser.c
new file mode 100644
index 0000000000..cee6454707
--- /dev/null
+++ b/camel/camel-mime-parser.c
@@ -0,0 +1,1199 @@
+/*
+ *  Copyright (C) 2000 Helix Code Inc.
+ *
+ *  Authors: Michael Zucchi <notzed@helixcode.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public License
+ *  as published by the Free Software Foundation; either version 2 of
+ *  the License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public
+ *  License along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* What should hopefully be a fast mail parser */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <string.h>
+
+#include <stdio.h>
+#include <errno.h>
+
+#include <unicode.h>
+
+#include "camel-mime-parser.h"
+#include "camel-mime-utils.h"
+#include "camel-mime-filter.h"
+#include "camel-stream.h"
+#include "camel-seekable-stream.h"
+
+#define r(x)
+#define h(x)
+#define c(x)
+#define d(x)
+
+#define SCAN_BUF 4096		/* size of read buffer */
+#define SCAN_HEAD 128		/* headroom guaranteed to be before each read buffer */
+
+/* a little hacky, but i couldn't be bothered renaming everything */
+#define _header_scan_state _CamelMimeParserPrivate
+#define _PRIVATE(o) (((CamelMimeParser *)(o))->priv)
+
+struct _header_scan_state {
+
+    /* global state */
+
+	enum _header_state state;
+
+	/* for building headers during scanning */
+	char *outbuf;
+	char *outptr;
+	char *outend;
+
+	int fd;			/* input for a fd input */
+	CamelStream *stream;	/* or for a stream */
+
+	/* for scanning input buffers */
+	char *realbuf;		/* the real buffer, SCAN_HEAD*2 + SCAN_BUF bytes */
+	char *inbuf;		/* points to a subset of the allocated memory, the underflow */
+	char *inptr;		/* (upto SCAN_HEAD) is for use by filters so they dont copy all data */
+	char *inend;
+
+	int atleast;
+
+	int seek;		/* current offset to start of buffer */
+
+	int midline;		/* are we mid-line interrupted? */
+	int scan_from;		/* do we care about From lines? */
+
+	int start_of_from;	/* where from started */
+	int start_of_headers;	/* where headers started from the last scan */
+
+	int header_start;	/* start of last header, or -1 */
+
+	struct _header_scan_stack *top_part;	/* top of message header */
+	int top_start;		/* offset of start */
+
+	struct _header_scan_stack *pending; /* if we're pending part info, from the wrong part end */
+
+	/* filters to apply to all content before output */
+	int filterid;		/* id of next filter */
+	struct _header_scan_filter *filters;
+
+    /* per message/part info */
+	struct _header_scan_stack *parts;
+
+};
+
+struct _header_scan_stack {
+	struct _header_scan_stack *parent;
+
+	enum _header_state savestate; /* state at invocation of this part */
+
+	struct _header_raw *headers;	/* headers for this part */
+
+	struct _header_content_type *content_type;
+
+	char *boundary;		/* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
+	int boundarylen;	/* length of boundary, including leading -- */
+};
+
+struct _header_scan_filter {
+	struct _header_scan_filter *next;
+	int id;
+	CamelMimeFilter *filter;
+};
+
+static void folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength);
+static int folder_scan_init_with_fd(struct _header_scan_state *s, int fd);
+static int folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream);
+static struct _header_scan_state *folder_scan_init(void);
+static void folder_scan_close(struct _header_scan_state *s);
+static struct _header_scan_stack *folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length);
+static struct _header_scan_stack *folder_scan_header(struct _header_scan_state *s, int *lastone);
+static int folder_scan_skip_line(struct _header_scan_state *s);
+static off_t folder_seek(struct _header_scan_state *s, off_t offset, int whence);
+static off_t folder_tell(struct _header_scan_state *s);
+
+static void camel_mime_parser_class_init (CamelMimeParserClass *klass);
+static void camel_mime_parser_init       (CamelMimeParser *obj);
+
+static char *states[] = {
+	"HSCAN_INITIAL",
+	"HSCAN_FROM",		/* got 'From' line */
+	"HSCAN_HEADER",		/* toplevel header */
+	"HSCAN_BODY",		/* scanning body of message */
+	"HSCAN_MULTIPART",	/* got multipart header */
+	"HSCAN_MESSAGE",		/* rfc822 message */
+
+	"HSCAN_PART",		/* part of a multipart */
+	"<invalid>",
+
+	"HSCAN_EOF",		/* end of file */
+	"HSCAN_FROM_END",
+	"HSCAN_HEAER_END",
+	"HSCAN_BODY_END",
+	"HSCAN_MULTIPART_END",
+	"HSCAN_MESSAGE_END",
+};
+
+static GtkObjectClass *camel_mime_parser_parent;
+
+enum SIGNALS {
+	LAST_SIGNAL
+};
+
+static guint signals[LAST_SIGNAL] = { 0 };
+
+guint
+camel_mime_parser_get_type (void)
+{
+	static guint type = 0;
+	
+	if (!type) {
+		GtkTypeInfo type_info = {
+			"CamelMimeParser",
+			sizeof (CamelMimeParser),
+			sizeof (CamelMimeParserClass),
+			(GtkClassInitFunc) camel_mime_parser_class_init,
+			(GtkObjectInitFunc) camel_mime_parser_init,
+			(GtkArgSetFunc) NULL,
+			(GtkArgGetFunc) NULL
+		};
+		
+		type = gtk_type_unique (gtk_object_get_type (), &type_info);
+	}
+	
+	return type;
+}
+
+static void
+finalise(GtkObject *o)
+{
+	struct _header_scan_state *s = _PRIVATE(o);
+
+	folder_scan_close(s);
+
+	((GtkObjectClass *)camel_mime_parser_parent)->finalize (o);
+}
+
+static void
+camel_mime_parser_class_init (CamelMimeParserClass *klass)
+{
+	GtkObjectClass *object_class = (GtkObjectClass *) klass;
+	
+	camel_mime_parser_parent = gtk_type_class (gtk_object_get_type ());
+
+	object_class->finalize = finalise;
+
+	gtk_object_class_add_signals (object_class, signals, LAST_SIGNAL);
+}
+
+static void
+camel_mime_parser_init (CamelMimeParser *obj)
+{
+	struct _header_scan_state *s;
+
+	s = folder_scan_init();
+	_PRIVATE(obj) = s;
+}
+
+/**
+ * camel_mime_parser_new:
+ *
+ * Create a new CamelMimeParser object.
+ * 
+ * Return value: A new CamelMimeParser widget.
+ **/
+CamelMimeParser *
+camel_mime_parser_new (void)
+{
+	CamelMimeParser *new = CAMEL_MIME_PARSER ( gtk_type_new (camel_mime_parser_get_type ()));
+	return new;
+}
+
+
+int
+camel_mime_parser_filter_add(CamelMimeParser *m, CamelMimeFilter *mf)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+	struct _header_scan_filter *f, *new;
+
+	new = g_malloc(sizeof(*new));
+	new->filter = mf;
+	new->id = s->filterid++;
+	if (s->filterid == -1)
+		s->filterid++;
+	new->next = 0;
+	gtk_object_ref((GtkObject *)mf);
+
+	/* yes, this is correct, since 'next' is the first element of the struct */
+	f = (struct _header_scan_filter *)&s->filters;
+	while (f->next)
+		f = f->next;
+	f->next = new;
+	return new->id;
+}
+
+void
+camel_mime_parser_filter_remove(CamelMimeParser *m, int id)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+	struct _header_scan_filter *f, *old;
+	
+	f = (struct _header_scan_filter *)&s->filters;
+	while (f && f->next) {
+		old = f->next;
+		if (old->id == id) {
+			gtk_object_unref((GtkObject *)old->filter);
+			f->next = old->next;
+			g_free(old);
+			/* there should only be a single matching id, but
+			   scan the whole lot anyway */
+		}
+		f = f->next;
+	}
+}
+
+const char *
+camel_mime_parser_header(CamelMimeParser *m, const char *name, int *offset)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	if (s->parts &&
+	    s->parts->headers) {
+		return header_raw_find(&s->parts->headers, name, offset);
+	}
+	return NULL;
+}
+
+struct _header_raw *
+camel_mime_parser_headers_raw(CamelMimeParser *m)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	if (s->parts)
+		return s->parts->headers;
+	return NULL;
+}
+
+int
+camel_mime_parser_init_with_fd(CamelMimeParser *m, int fd)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	return folder_scan_init_with_fd(s, fd);
+}
+
+int
+camel_mime_parser_init_with_stream(CamelMimeParser *m, CamelStream *stream)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	return folder_scan_init_with_stream(s, stream);
+}
+
+void
+camel_mime_parser_scan_from(CamelMimeParser *m, int scan_from)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+	s->scan_from = scan_from;
+}
+
+struct _header_content_type *
+camel_mime_parser_content_type(CamelMimeParser *m)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	/* FIXME: should this search up until its found the 'right'
+	   content-type?  can it? */
+	if (s->parts)
+		return s->parts->content_type;
+	return NULL;
+}
+
+enum _header_state
+camel_mime_parser_step(CamelMimeParser *m, char **databuffer, int *datalength)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	folder_scan_step(s, databuffer, datalength);
+	return s->state;
+}
+
+off_t camel_mime_parser_tell(CamelMimeParser *m)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	return folder_tell(s);
+}
+
+off_t camel_mime_parser_tell_start_headers(CamelMimeParser *m)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	return s->start_of_headers;
+}
+
+off_t camel_mime_parser_tell_start_from(CamelMimeParser *m)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+
+	return s->start_of_from;
+}
+
+off_t camel_mime_parser_seek(CamelMimeParser *m, off_t off, int whence)
+{
+	struct _header_scan_state *s = _PRIVATE(m);
+	return folder_seek(s, off, whence);
+}
+
+/* ********************************************************************** */
+/*    Implementation							  */
+/* ********************************************************************** */
+
+/* read the next bit of data, ensure there is enough room 'atleast' bytes */
+static int
+folder_read(struct _header_scan_state *s)
+{
+	int len;
+	int inoffset;
+
+	if (s->inptr<s->inend-s->atleast)
+		return s->inend-s->inptr;
+
+	/* check for any remaning bytes (under the atleast limit( */
+	inoffset = s->inend - s->inptr;
+	if (inoffset>0) {
+		memcpy(s->inbuf, s->inptr, inoffset);
+	}
+	if (s->stream) {
+		len = camel_stream_read(s->stream, s->inbuf+inoffset, SCAN_BUF-inoffset);
+	} else {
+		len = read(s->fd, s->inbuf+inoffset, SCAN_BUF-inoffset);
+	}
+	r(printf("read %d bytes, offset = %d\n", len, inoffset));
+	if (len>=0) {
+		/* add on the last read block */
+		s->seek += s->inptr - s->inbuf;
+		s->inptr = s->inbuf;
+		s->inend = s->inbuf+len+inoffset;
+		r(printf("content = %d '%.*s'\n",s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
+	}
+	r(printf("content = %d '%.*s'\n", s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
+	return s->inend-s->inptr;
+}
+
+/* return the current absolute position of the data pointer */
+static off_t
+folder_tell(struct _header_scan_state *s)
+{
+	return s->seek + (s->inptr - s->inbuf);
+}
+
+/*
+  need some way to prime the parser state, so this actually works for 
+  other than top-level messages
+*/
+static off_t
+folder_seek(struct _header_scan_state *s, off_t offset, int whence)
+{
+	off_t newoffset;
+	int len;
+
+	if (s->stream) {
+		if (CAMEL_IS_SEEKABLE_STREAM(s->stream)) {
+			/* NOTE: assumes whence seekable stream == whence libc, which is probably
+			   the case (or bloody well should've been) */
+			newoffset = camel_seekable_stream_seek((CamelSeekableStream *)s->stream, offset, whence);
+		} else {
+			newoffset = -1;
+			errno = EINVAL;
+		}
+	} else {
+		newoffset = lseek(s->fd, offset, whence);
+	}
+	if (newoffset != -1) {
+		s->seek = newoffset;
+		s->inptr = s->inbuf;
+		s->inend = s->inbuf;
+		if (s->stream)
+			len = camel_stream_read(s->stream, s->inbuf, SCAN_BUF);
+		else
+			len = read(s->fd, s->inbuf, SCAN_BUF);
+		if (len>=0)
+			s->inend = s->inbuf+len;
+		else
+			newoffset = -1;
+	}
+	return newoffset;
+}
+
+static void
+folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h)
+{
+	h->parent = s->parts;
+	s->parts = h;
+}
+
+static void
+folder_pull_part(struct _header_scan_state *s)
+{
+	struct _header_scan_stack *h;
+
+	h = s->parts;
+	if (h) {
+		s->parts = h->parent;
+		g_free(h->boundary);
+		header_raw_clear(&h->headers);
+		header_content_type_unref(h->content_type);
+		g_free(h);
+	} else {
+		g_warning("Header stack underflow!\n");
+	}
+}
+
+static int
+folder_scan_skip_line(struct _header_scan_state *s)
+{
+	int atleast = s->atleast;
+	register char *inptr, *inend, c;
+	int len;
+
+	s->atleast = 1;
+
+	while ( (len = folder_read(s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */
+		inptr = s->inptr;
+		inend = s->inend-1;
+
+		c = -1;
+		while (inptr<inend
+		       && (c = *inptr++)!='\n')
+			;
+
+		s->inptr = inptr;
+
+		if (c=='\n') {
+			s->atleast = atleast;
+			return 0;
+		}
+	}
+
+	s->atleast = atleast;
+
+	return -1;		/* not found */
+}
+
+static struct _header_scan_stack *
+folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *lastone)
+{
+	struct _header_scan_stack *part;
+	int len = s->atleast-2;	/* make sure we dont access past the buffer */
+
+	h(printf("checking boundary marker upto %d bytes\n", len));
+	part = s->parts;
+	while (part) {
+		h(printf("  boundary: %s\n", part->boundary));
+		h(printf("   against: '%.*s'\n", len, boundary));
+		if (part->boundary
+		    && part->boundarylen <= len
+		    && memcmp(boundary, part->boundary, part->boundarylen)==0) {
+			h(printf("matched boundary: %s\n", part->boundary));
+			/* again, make sure we're in range */
+			if (part->boundarylen <= len+2) {
+				h(printf("checking lastone\n"));
+				*lastone = (boundary[part->boundarylen]=='-'
+					    && boundary[part->boundarylen+1]=='-');
+			} else {
+				h(printf("not enough room to check last one?\n"));
+				*lastone = FALSE;
+			}
+			/*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/
+			return part;
+		}
+		part = part->parent;
+	}
+	return NULL;
+}
+
+static struct _header_scan_stack *
+folder_scan_header(struct _header_scan_state *s, int *lastone)
+{
+	int atleast = s->atleast;
+	register char *inptr, *inend;
+	char *start;
+	int len;
+	struct _header_scan_stack *part, *overpart = s->parts;
+	struct _header_scan_stack *h;
+
+	h(printf("scanning first bit\n"));
+
+	h = g_malloc0(sizeof(*h));
+
+	/* FIXME: this info should be cached ? */
+	part = s->parts;
+	s->atleast = 5;
+	while (part) {
+		if (part->boundary)
+			s->atleast = MAX(s->atleast, part->boundarylen+2);
+		part = part->parent;
+	}
+#if 0
+	s->atleast = MAX(s->atleast, 5);
+	if (s->parts)
+		s->atleast = MAX(s->atleast, s->parts->boundarylen+2);
+#endif
+
+	*lastone = FALSE;
+retry:
+
+	while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
+		inptr = s->inptr;
+		inend = s->inend-s->atleast;
+		start = inptr;
+
+		while (inptr<=inend) {
+			register int c=-1;
+			/*printf("  '%.20s'\n", inptr);*/
+
+			if (!s->midline
+			    && (part = folder_boundary_check(s, inptr, lastone))) {
+				if ((s->outptr>s->outbuf) || (inptr-start))
+					goto header_truncated; /* may not actually be truncated */
+				
+				goto normal_exit;
+			}
+
+			/* goto next line */
+			while (inptr<=inend && (c = *inptr++)!='\n')
+				;
+
+			/* allocate/append - this wont get executed unless we have *huge* headers,
+			   and then probably only once */
+			{
+				register int headerlen = inptr-start;
+				register int len = (s->outend - s->outbuf);
+				char *outnew;
+				
+				if (headerlen >= len) {
+					len = (len+headerlen)*2+1;
+					outnew = g_realloc(s->outbuf, len);
+					s->outptr = s->outptr - s->outbuf + outnew;
+					s->outbuf = outnew;
+					s->outend = outnew + len;
+				}
+				memcpy(s->outptr, start, headerlen);
+				s->outptr += headerlen;
+			}
+
+			h(printf("outbuf[0] = %02x '%c' oubuf[1] = %02x '%c'\n",
+				 s->outbuf[0], isprint(s->outbuf[0])?s->outbuf[0]:'.',
+				 s->outbuf[1], isprint(s->outbuf[1])?s->outbuf[1]:'.'));
+
+			if (s->header_start == -1)
+				s->header_start = (start-s->inbuf) + s->seek;
+
+			if (c!='\n') {
+				s->midline = TRUE;
+			} else {
+				if (!(inptr[0] == ' ' || inptr[0] == '\t')) {
+					if (s->outbuf[0] == '\n'
+					    || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) {
+						goto header_done;
+					}
+
+					s->outptr[0] = 0;
+
+					d(printf("header %.10s at %d\n", s->outbuf, s->header_start));
+
+					header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
+					if (inptr[0]=='\n'
+					    || (inptr[0] == '\r' && inptr[1]=='\n')) {
+						inptr++;
+						goto header_done;
+					}
+					s->outptr = s->outbuf;
+					s->header_start = -1;
+				}
+				s->midline = FALSE;
+				start = inptr;
+			}
+		}
+		s->inptr = inptr;
+	}
+
+	/* ok, we're at the end of the data, just make sure we're not missing out some small
+	   truncated header markers */
+	if (overpart) {
+		overpart = overpart->parent;
+		while (overpart) {
+			if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) {
+				s->atleast = overpart->boundarylen+2;
+				h(printf("Retrying next smaller part ...\n"));
+				goto retry;
+			}
+			overpart = overpart->parent;
+		}
+	}
+
+	if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
+		start = s->inptr;
+		inptr = s->inend;
+		goto header_truncated;
+	}
+
+	s->atleast = atleast;
+
+	return h;
+
+header_truncated:
+
+	{
+		register int headerlen = inptr-start;
+		register int len = (s->outend - s->outbuf);
+		char *outnew;
+		
+		if (headerlen >= len) {
+			len = (len+headerlen)*2+1;
+			outnew = g_realloc(s->outbuf, len);
+			s->outptr = s->outptr - s->outbuf + outnew;
+			s->outbuf = outnew;
+			s->outend = outnew + len;
+		}
+		memcpy(s->outptr, start, headerlen);
+		s->outptr += headerlen;
+	}
+	s->outptr[0] = 0;
+
+	if (s->header_start == -1)
+		s->header_start = (start-s->inbuf) + s->seek;
+
+	if (s->outbuf[0] == '\n'
+	    || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) {
+		goto header_done;
+	}
+
+	header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
+
+header_done:
+	part = s->parts;
+
+	s->outptr = s->outbuf;
+normal_exit:
+	s->inptr = inptr;
+	s->atleast = atleast;
+	s->header_start = -1;
+	return h;
+}
+
+static struct _header_scan_stack *
+folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length)
+{
+	int atleast = s->atleast;
+	register char *inptr, *inend;
+	char *start;
+	int len;
+	struct _header_scan_stack *part, *overpart = s->parts;
+	int already_packed = FALSE;
+
+	/*printf("scanning content\n");*/
+
+	/* FIXME: this info should be cached ? */
+	part = s->parts;
+	s->atleast = 5;
+	while (part) {
+		if (part->boundary) {
+			c(printf("boundary: %s\n", part->boundary));
+			s->atleast = MAX(s->atleast, part->boundarylen+2);
+		}
+		part = part->parent;
+	}
+/*	s->atleast = MAX(s->atleast, 5);*/
+#if 0
+	if (s->parts)
+		s->atleast = MAX(s->atleast, s->parts->boundarylen+2);
+#endif
+	*lastone = FALSE;
+
+retry:
+	c(printf("atleast = %d\n", s->atleast));
+	
+	while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
+		inptr = s->inptr;
+		inend = s->inend-s->atleast;
+		start = inptr;
+
+		c(printf("inptr = %p, inend = %p\n", inptr, inend));
+
+		while (inptr<=inend) {
+			if (!s->midline
+			    && (part = folder_boundary_check(s, inptr, lastone))) {
+				if ( (inptr-start) )
+					goto content;
+				
+				goto normal_exit;
+			}
+			/* goto the next line */
+			while (inptr<=inend && (*inptr++)!='\n')
+				;
+			
+			s->midline = FALSE;
+		}
+
+		/* *sigh* so much for the beautiful simplicity of the code so far - here we
+		   have the snot to deal with the nasty end-cases that come from the read-ahead
+		   buffers we use */
+		/* what this does, is if we are somewhere near the end of the buffer,
+		   force it to the front, and re-read, ensuring we bunch as much together
+		   as possible, for the final read, without copying too much of the time */
+		/* make sure we dont loop forever, but also make sure we try smaller
+		   boundaries, if there are any, so we dont miss any. */
+		/* this is not needed for the header scanner, since it copies its own
+		   data */
+		c(printf("start offset = %d  atleast = %d\n", start-s->inbuf, s->atleast));
+		if (start > (s->inbuf + s->atleast)) {
+			/* force a re-scan of this data */
+			s->inptr = start;
+			if (already_packed)
+				goto smaller_boundary;
+			c(printf("near the end, try and bunch things up a bit first\n"));
+			already_packed = TRUE;
+		} else {
+			c(printf("dumping what i've got ...\n"));
+			/* what would be nice here, is if that we're at eof, we bunch the last
+			   little bit in the same content, but i dont think this is easy */
+			goto content_mid;
+		}
+	}
+
+	c(printf("length read = %d\n", len));
+smaller_boundary:
+
+	/* ok, we're at the end of the data, just make sure we're not missing out some small
+	   truncated header markers */
+	if (overpart) {
+		overpart = overpart->parent;
+		while (overpart) {
+			if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) {
+				s->atleast = overpart->boundarylen+2;
+				c(printf("Retrying next smaller part ...\n"));
+				goto retry;
+			}
+			overpart = overpart->parent;
+		}
+	}
+
+	if (s->inend > s->inptr) {
+		start = s->inptr;
+		inptr = s->inend;
+		goto content;
+	}
+
+	*length = 0;
+	s->atleast = atleast;
+	return NULL;
+
+content_mid:
+	s->midline = TRUE;
+content:
+	part = s->parts;
+normal_exit:
+	s->atleast = atleast;
+	s->inptr = inptr;
+
+	*data = start;
+	*length = inptr-start;
+
+/*	printf("got %scontent: %.*s", s->midline?"partial ":"", inptr-start, start);*/
+
+	return part;
+}
+
+
+static void
+folder_scan_close(struct _header_scan_state *s)
+{
+	g_free(s->realbuf);
+	g_free(s->outbuf);
+	while (s->parts)
+		folder_pull_part(s);
+	g_free(s);
+}
+
+
+static struct _header_scan_state *
+folder_scan_init(void)
+{
+	struct _header_scan_state *s;
+
+	s = g_malloc(sizeof(*s));
+
+	s->fd = -1;
+	s->stream = NULL;
+
+	s->outbuf = g_malloc(1024);
+	s->outptr = s->outbuf;
+	s->outend = s->outbuf+1024;
+
+	s->realbuf = g_malloc(SCAN_BUF + SCAN_HEAD*2);
+	s->inbuf = s->realbuf + SCAN_HEAD;
+	s->inptr = s->inbuf;
+	s->inend = s->inbuf;
+	s->atleast = 0;
+
+	s->seek = 0;		/* current character position in file of the last read block */
+
+	s->header_start = -1;
+
+	s->start_of_from = -1;
+	s->start_of_headers = -1;
+
+	s->midline = FALSE;
+	s->scan_from = FALSE;
+
+	s->filters = NULL;
+	s->filterid = 1;
+
+	s->parts = NULL;
+
+	s->state = HSCAN_INITIAL;
+	return s;
+}
+
+static int
+folder_scan_init_with_fd(struct _header_scan_state *s, int fd)
+{
+	int len;
+
+	len = read(fd, s->inbuf, SCAN_BUF);
+	if (len>=0) {
+		s->inend = s->inbuf+len;
+		s->fd = fd;
+		if (s->stream) {
+			gtk_object_unref((GtkObject *)s->stream);
+			s->stream = NULL;
+		}
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+static int
+folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream)
+{
+	int len;
+
+	len = camel_stream_read(stream, s->inbuf, SCAN_BUF);
+	if (len>=0) {
+		s->inend = s->inbuf+len;
+		s->stream = stream;
+		if (s->fd != -1) {
+			close(s->fd);
+			s->fd = -1;
+		}
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+#define USE_FROM
+
+static void
+folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength)
+{
+	struct _header_scan_stack *h, *hb;
+	const char *content;
+	const char *bound;
+	int type;
+	int state;
+	struct _header_content_type *ct = NULL;
+	struct _header_scan_filter *f;
+	size_t presize;
+
+/*	printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/
+
+tail_recurse:
+	d({
+		printf("\nSCAN STACK:\n");
+		printf("  '%s' :\n", states[s->state]);
+		hb = s->parts;
+		while (hb) {
+			printf("  '%s' : %s\n", states[hb->savestate], hb->boundary);
+			hb = hb->parent;
+		}
+		printf("\n");
+	});
+
+	switch (s->state) {
+
+	case HSCAN_INITIAL:
+#ifdef USE_FROM
+		if (s->scan_from) {
+			/* FIXME: it would be nice not to have to allocate this every pass */
+			h = g_malloc0(sizeof(*h));
+			h->boundary = g_strdup("From ");
+			h->boundarylen = strlen(h->boundary);
+			folder_push_part(s, h);
+			
+			h = s->parts;
+			do {
+				hb = folder_scan_content(s, &state, databuffer, datalength);
+			} while (hb==h && *datalength>0);
+			
+			if (*datalength==0 && hb==h) {
+				d(printf("found 'From '\n"));
+				s->start_of_from = folder_tell(s);
+				folder_scan_skip_line(s);
+				h->savestate = HSCAN_INITIAL;
+				s->state = HSCAN_FROM;
+			} else {
+				folder_pull_part(s);
+				s->state = HSCAN_EOF;
+			}
+			return;
+		} else {
+			s->start_of_from = -1;
+		}
+
+#endif
+	case HSCAN_FROM:
+		s->start_of_headers = folder_tell(s);
+		h = folder_scan_header(s, &state);
+#ifdef USE_FROM
+		if (s->scan_from)
+			h->savestate = HSCAN_FROM_END;
+		else
+#endif
+			h->savestate = HSCAN_EOF;
+
+		/* FIXME: should this check for MIME-Version: 1.0 as well? */
+
+		type = HSCAN_HEADER;
+		if ( (content = header_raw_find(&h->headers, "Content-Type", NULL))
+		     && (ct = header_content_type_decode(content))) {
+			if (!strcasecmp(ct->type, "multipart")) {
+				bound = header_content_type_param(ct, "boundary");
+				if (bound) {
+					d(printf("multipart, boundary = %s\n", bound));
+					h->boundarylen = strlen(bound)+2;
+					h->boundary = g_malloc(h->boundarylen+3);
+					sprintf(h->boundary, "--%s--", bound);
+					type = HSCAN_MULTIPART;
+				} else {
+					g_warning("Multipart with no boundary, treating as text/plain");
+				}
+			} else if (!strcasecmp(ct->type, "message")) {
+				if (!strcasecmp(ct->subtype, "rfc822")
+				    /*|| !strcasecmp(ct->subtype, "partial")*/) {
+					type = HSCAN_MESSAGE;
+				}
+			}
+		}
+		h->content_type = ct;
+		folder_push_part(s, h);
+		s->state = type;
+		return;
+
+	case HSCAN_HEADER:
+		s->state = HSCAN_BODY;
+
+	case HSCAN_BODY:
+		h = s->parts;
+		*datalength = 0;
+		presize = SCAN_HEAD;
+		f = s->filters;
+
+		do {
+			hb = folder_scan_content(s, &state, databuffer, datalength);
+			if (*datalength>0) {
+				d(printf("Content raw: '%.*s'\n", *datalength, *databuffer));
+
+				while (f) {
+					camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize,
+								 databuffer, datalength, &presize);
+					f = f->next;
+				}
+				return;
+			}
+		} while (hb==h && *datalength>0);
+
+		/* check for any filter completion data */
+		while (f) {
+			camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize,
+						 databuffer, datalength, &presize);
+			f = f->next;
+		}
+		if (*datalength > 0)
+			return;
+
+		s->state = HSCAN_BODY_END;
+		break;
+
+	case HSCAN_MULTIPART:
+		h = s->parts;
+		do {
+			do {
+				hb = folder_scan_content(s, &state, databuffer, datalength);
+				if (*datalength>0) {
+					/* FIXME: needs a state to return this shit??? */
+					d(printf("Multipart Content: '%.*s'\n", *datalength, *databuffer));
+				}
+			} while (hb==h && *datalength>0);
+			if (*datalength==0 && hb==h) {
+				d(printf("got boundary: %s\n", hb->boundary));
+				folder_scan_skip_line(s);
+				if (!state) {
+					s->state = HSCAN_FROM;
+					folder_scan_step(s, databuffer, datalength);
+					s->parts->savestate = HSCAN_MULTIPART; /* set return state for the new head part */
+					return;
+				}
+			} else {
+				break;
+			}
+		} while (1);
+
+		s->state = HSCAN_MULTIPART_END;
+		break;
+
+	case HSCAN_MESSAGE:
+		s->state = HSCAN_FROM;
+		folder_scan_step(s, databuffer, datalength);
+		s->parts->savestate = HSCAN_MESSAGE_END;
+		break;
+
+	case HSCAN_FROM_END:
+	case HSCAN_BODY_END:
+	case HSCAN_MULTIPART_END:
+	case HSCAN_MESSAGE_END:
+		s->state = s->parts->savestate;
+		folder_pull_part(s);
+		if (s->state & HSCAN_END)
+			return;
+		goto tail_recurse;
+
+	case HSCAN_EOF:
+		return;
+
+	default:
+		g_warning("Invalid state in camel-mime-parser: %d", s->state);
+		break;
+	}
+
+	return;
+}
+
+#ifdef STANDALONE
+int main(int argc, char **argv)
+{
+	int fd;
+	struct _header_scan_state *s;
+	char *data;
+	int len;
+	int state;
+	char *name = "/tmp/evmail/Inbox";
+	struct _header_scan_stack *h;
+	int i;
+	int attach = 0;
+
+	if (argc==2)
+		name = argv[1];
+
+	printf("opening: %s", name);
+
+	for (i=1;i<argc;i++) {
+		const char *encoding = NULL, *charset = NULL;
+		char *attachname;
+
+		name = argv[i];
+		printf("opening: %s", name);
+		
+		fd = open(name, O_RDONLY);
+		if (fd==-1) {
+			perror("Cannot open mailbox");
+			exit(1);
+		}
+		s = folder_scan_init(fd);
+		s->scan_from = FALSE;
+#if 0
+		h = g_malloc0(sizeof(*h));
+		h->savestate = HSCAN_EOF;
+		folder_push_part(s, h);
+#endif	
+		while (s->state != HSCAN_EOF) {
+			folder_scan_step(s, &data, &len);
+			printf("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]);
+			switch (s->state) {
+			case HSCAN_HEADER:
+				if (s->parts->content_type
+				    && (charset = header_content_type_param(s->parts->content_type, "charset"))) {
+					if (strcasecmp(charset, "us-ascii")) {
+						folder_push_filter_charset(s, "UTF-8", charset);
+					} else {
+						charset = NULL;
+					}
+				} else {
+					charset = NULL;
+				}
+
+				encoding = header_raw_find(&s->parts->headers, "Content-transfer-encoding");
+				printf("encoding = '%s'\n", encoding);
+				if (encoding && !strncasecmp(encoding, " base64", 7)) {
+					printf("adding base64 filter\n");
+					attachname = g_strdup_printf("attach.%d.%d", i, attach++);
+					folder_push_filter_save(s, attachname);
+					g_free(attachname);
+					folder_push_filter_mime(s, 0);
+				}
+				if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) {
+					printf("adding quoted-printable filter\n");
+					attachname = g_strdup_printf("attach.%d.%d", i, attach++);
+					folder_push_filter_save(s, attachname);
+					g_free(attachname);
+					folder_push_filter_mime(s, 1);
+				}
+
+				break;
+			case HSCAN_BODY:
+				break;
+			case HSCAN_BODY_END:
+				if (encoding && !strncasecmp(encoding, " base64", 7)) {
+					printf("removing filters\n");
+					folder_filter_pull(s);
+					folder_filter_pull(s);
+				}
+				if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) {
+					printf("removing filters\n");
+					folder_filter_pull(s);
+					folder_filter_pull(s);
+				}
+				if (charset) {
+					folder_filter_pull(s);
+					charset = NULL;
+				}
+				encoding = NULL;
+				break;
+			default:
+				break;
+			}
+		}
+		folder_scan_close(s);
+		close(fd);
+	}
+	return 0;
+}
+
+#endif /* STANDALONE */