aboutsummaryrefslogblamecommitdiffstats
path: root/camel/camel-mime-parser.c
blob: 2f1d9fd70d910a290a44acdec5cf89309ec6658d (plain) (tree)










































































                                                                                                       
                                                                                            

























































































































































































































































                                                                                                                            






                                                   




                                                                              








                                                            





























                                                                       

















                                                              





























































































































































































































































                                                                                                                 

                                                                                       
                                                            




























































                                                                                                         
                                                         
                            

























































































































































                                                                                                              



                                                         
























                                                                                               
                      

























                                                              

                                     


















                                                                               

                                                                 
                                   
                                                    









































































































































































































































































































                                                                                                                           
/*
 *  Copyright (C) 2000 Helix Code Inc.
 *
 *  Authors: Michael Zucchi <notzed@helixcode.com>
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public License
 *  as published by the Free Software Foundation; either version 2 of
 *  the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public
 *  License along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* What should hopefully be a fast mail parser */

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#include <string.h>

#include <stdio.h>
#include <errno.h>

#include <unicode.h>

#include "camel-mime-parser.h"
#include "camel-mime-utils.h"
#include "camel-mime-filter.h"
#include "camel-stream.h"
#include "camel-seekable-stream.h"

#define r(x)
#define h(x)
#define c(x)
#define d(x)

#define SCAN_BUF 4096       /* size of read buffer */
#define SCAN_HEAD 128       /* headroom guaranteed to be before each read buffer */

/* a little hacky, but i couldn't be bothered renaming everything */
#define _header_scan_state _CamelMimeParserPrivate
#define _PRIVATE(o) (((CamelMimeParser *)(o))->priv)

struct _header_scan_state {

    /* global state */

    enum _header_state state;

    /* for building headers during scanning */
    char *outbuf;
    char *outptr;
    char *outend;

    int fd;         /* input for a fd input */
    CamelStream *stream;    /* or for a stream */

    /* for scanning input buffers */
    char *realbuf;      /* the real buffer, SCAN_HEAD*2 + SCAN_BUF bytes */
    char *inbuf;        /* points to a subset of the allocated memory, the underflow */
    char *inptr;        /* (upto SCAN_HEAD) is for use by filters so they dont copy all data */
    char *inend;

    int atleast;

    int seek;       /* current offset to start of buffer */
    int unstep;     /* how many states to 'unstep' (repeat the current state) */

    int midline;        /* are we mid-line interrupted? */
    int scan_from;      /* do we care about From lines? */

    int start_of_from;  /* where from started */
    int start_of_headers;   /* where headers started from the last scan */

    int header_start;   /* start of last header, or -1 */

    struct _header_scan_stack *top_part;    /* top of message header */
    int top_start;      /* offset of start */

    struct _header_scan_stack *pending; /* if we're pending part info, from the wrong part end */

    /* filters to apply to all content before output */
    int filterid;       /* id of next filter */
    struct _header_scan_filter *filters;

    /* per message/part info */
    struct _header_scan_stack *parts;

};

struct _header_scan_stack {
    struct _header_scan_stack *parent;

    enum _header_state savestate; /* state at invocation of this part */

    struct _header_raw *headers;    /* headers for this part */

    struct _header_content_type *content_type;

    char *boundary;     /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
    int boundarylen;    /* length of boundary, including leading -- */
};

struct _header_scan_filter {
    struct _header_scan_filter *next;
    int id;
    CamelMimeFilter *filter;
};

static void folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength);
static int folder_scan_init_with_fd(struct _header_scan_state *s, int fd);
static int folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream);
static struct _header_scan_state *folder_scan_init(void);
static void folder_scan_close(struct _header_scan_state *s);
static struct _header_scan_stack *folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length);
static struct _header_scan_stack *folder_scan_header(struct _header_scan_state *s, int *lastone);
static int folder_scan_skip_line(struct _header_scan_state *s);
static off_t folder_seek(struct _header_scan_state *s, off_t offset, int whence);
static off_t folder_tell(struct _header_scan_state *s);

static void camel_mime_parser_class_init (CamelMimeParserClass *klass);
static void camel_mime_parser_init       (CamelMimeParser *obj);

static char *states[] = {
    "HSCAN_INITIAL",
    "HSCAN_FROM",       /* got 'From' line */
    "HSCAN_HEADER",     /* toplevel header */
    "HSCAN_BODY",       /* scanning body of message */
    "HSCAN_MULTIPART",  /* got multipart header */
    "HSCAN_MESSAGE",        /* rfc822 message */

    "HSCAN_PART",       /* part of a multipart */
    "<invalid>",

    "HSCAN_EOF",        /* end of file */
    "HSCAN_FROM_END",
    "HSCAN_HEAER_END",
    "HSCAN_BODY_END",
    "HSCAN_MULTIPART_END",
    "HSCAN_MESSAGE_END",
};

static GtkObjectClass *camel_mime_parser_parent;

enum SIGNALS {
    LAST_SIGNAL
};

static guint signals[LAST_SIGNAL] = { 0 };

guint
camel_mime_parser_get_type (void)
{
    static guint type = 0;
    
    if (!type) {
        GtkTypeInfo type_info = {
            "CamelMimeParser",
            sizeof (CamelMimeParser),
            sizeof (CamelMimeParserClass),
            (GtkClassInitFunc) camel_mime_parser_class_init,
            (GtkObjectInitFunc) camel_mime_parser_init,
            (GtkArgSetFunc) NULL,
            (GtkArgGetFunc) NULL
        };
        
        type = gtk_type_unique (gtk_object_get_type (), &type_info);
    }
    
    return type;
}

static void
finalise(GtkObject *o)
{
    struct _header_scan_state *s = _PRIVATE(o);

    folder_scan_close(s);

    ((GtkObjectClass *)camel_mime_parser_parent)->finalize (o);
}

static void
camel_mime_parser_class_init (CamelMimeParserClass *klass)
{
    GtkObjectClass *object_class = (GtkObjectClass *) klass;
    
    camel_mime_parser_parent = gtk_type_class (gtk_object_get_type ());

    object_class->finalize = finalise;

    gtk_object_class_add_signals (object_class, signals, LAST_SIGNAL);
}

static void
camel_mime_parser_init (CamelMimeParser *obj)
{
    struct _header_scan_state *s;

    s = folder_scan_init();
    _PRIVATE(obj) = s;
}

/**
 * camel_mime_parser_new:
 *
 * Create a new CamelMimeParser object.
 * 
 * Return value: A new CamelMimeParser widget.
 **/
CamelMimeParser *
camel_mime_parser_new (void)
{
    CamelMimeParser *new = CAMEL_MIME_PARSER ( gtk_type_new (camel_mime_parser_get_type ()));
    return new;
}


int
camel_mime_parser_filter_add(CamelMimeParser *m, CamelMimeFilter *mf)
{
    struct _header_scan_state *s = _PRIVATE(m);
    struct _header_scan_filter *f, *new;

    new = g_malloc(sizeof(*new));
    new->filter = mf;
    new->id = s->filterid++;
    if (s->filterid == -1)
        s->filterid++;
    new->next = 0;
    gtk_object_ref((GtkObject *)mf);

    /* yes, this is correct, since 'next' is the first element of the struct */
    f = (struct _header_scan_filter *)&s->filters;
    while (f->next)
        f = f->next;
    f->next = new;
    return new->id;
}

void
camel_mime_parser_filter_remove(CamelMimeParser *m, int id)
{
    struct _header_scan_state *s = _PRIVATE(m);
    struct _header_scan_filter *f, *old;
    
    f = (struct _header_scan_filter *)&s->filters;
    while (f && f->next) {
        old = f->next;
        if (old->id == id) {
            gtk_object_unref((GtkObject *)old->filter);
            f->next = old->next;
            g_free(old);
            /* there should only be a single matching id, but
               scan the whole lot anyway */
        }
        f = f->next;
    }
}

const char *
camel_mime_parser_header(CamelMimeParser *m, const char *name, int *offset)
{
    struct _header_scan_state *s = _PRIVATE(m);

    if (s->parts &&
        s->parts->headers) {
        return header_raw_find(&s->parts->headers, name, offset);
    }
    return NULL;
}

struct _header_raw *
camel_mime_parser_headers_raw(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);

    if (s->parts)
        return s->parts->headers;
    return NULL;
}

int
camel_mime_parser_init_with_fd(CamelMimeParser *m, int fd)
{
    struct _header_scan_state *s = _PRIVATE(m);

    return folder_scan_init_with_fd(s, fd);
}

int
camel_mime_parser_init_with_stream(CamelMimeParser *m, CamelStream *stream)
{
    struct _header_scan_state *s = _PRIVATE(m);

    return folder_scan_init_with_stream(s, stream);
}

void
camel_mime_parser_scan_from(CamelMimeParser *m, int scan_from)
{
    struct _header_scan_state *s = _PRIVATE(m);
    s->scan_from = scan_from;
}

struct _header_content_type *
camel_mime_parser_content_type(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);

    /* FIXME: should this search up until its found the 'right'
       content-type?  can it? */
    if (s->parts)
        return s->parts->content_type;
    return NULL;
}

void camel_mime_parser_unstep(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);

    s->unstep++;
}

enum _header_state
camel_mime_parser_step(CamelMimeParser *m, char **databuffer, int *datalength)
{
    struct _header_scan_state *s = _PRIVATE(m);

    d(printf("OLD STATE:  '%s' :\n", states[s->state]));

    if (s->unstep <= 0)
        folder_scan_step(s, databuffer, datalength);
    else
        s->unstep--;

    d(printf("NEW STATE:  '%s' :\n", states[s->state]));

    return s->state;
}

off_t camel_mime_parser_tell(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);

    return folder_tell(s);
}

off_t camel_mime_parser_tell_start_headers(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);

    return s->start_of_headers;
}

off_t camel_mime_parser_tell_start_from(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);

    return s->start_of_from;
}

off_t camel_mime_parser_seek(CamelMimeParser *m, off_t off, int whence)
{
    struct _header_scan_state *s = _PRIVATE(m);
    return folder_seek(s, off, whence);
}

enum _header_state camel_mime_parser_state(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);
    return s->state;
}

CamelStream *camel_mime_parser_stream(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);
    return s->stream;
}

int camel_mime_parser_fd(CamelMimeParser *m)
{
    struct _header_scan_state *s = _PRIVATE(m);
    return s->fd;
}

/* ********************************************************************** */
/*    Implementation                              */
/* ********************************************************************** */

/* read the next bit of data, ensure there is enough room 'atleast' bytes */
static int
folder_read(struct _header_scan_state *s)
{
    int len;
    int inoffset;

    if (s->inptr<s->inend-s->atleast)
        return s->inend-s->inptr;

    /* check for any remaning bytes (under the atleast limit( */
    inoffset = s->inend - s->inptr;
    if (inoffset>0) {
        memcpy(s->inbuf, s->inptr, inoffset);
    }
    if (s->stream) {
        len = camel_stream_read(s->stream, s->inbuf+inoffset, SCAN_BUF-inoffset);
    } else {
        len = read(s->fd, s->inbuf+inoffset, SCAN_BUF-inoffset);
    }
    r(printf("read %d bytes, offset = %d\n", len, inoffset));
    if (len>=0) {
        /* add on the last read block */
        s->seek += s->inptr - s->inbuf;
        s->inptr = s->inbuf;
        s->inend = s->inbuf+len+inoffset;
        r(printf("content = %d '%.*s'\n",s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
    }
    r(printf("content = %d '%.*s'\n", s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
    return s->inend-s->inptr;
}

/* return the current absolute position of the data pointer */
static off_t
folder_tell(struct _header_scan_state *s)
{
    return s->seek + (s->inptr - s->inbuf);
}

/*
  need some way to prime the parser state, so this actually works for 
  other than top-level messages
*/
static off_t
folder_seek(struct _header_scan_state *s, off_t offset, int whence)
{
    off_t newoffset;
    int len;

    if (s->stream) {
        if (CAMEL_IS_SEEKABLE_STREAM(s->stream)) {
            /* NOTE: assumes whence seekable stream == whence libc, which is probably
               the case (or bloody well should've been) */
            newoffset = camel_seekable_stream_seek((CamelSeekableStream *)s->stream, offset, whence);
        } else {
            newoffset = -1;
            errno = EINVAL;
        }
    } else {
        newoffset = lseek(s->fd, offset, whence);
    }
    if (newoffset != -1) {
        s->seek = newoffset;
        s->inptr = s->inbuf;
        s->inend = s->inbuf;
        if (s->stream)
            len = camel_stream_read(s->stream, s->inbuf, SCAN_BUF);
        else
            len = read(s->fd, s->inbuf, SCAN_BUF);
        if (len>=0)
            s->inend = s->inbuf+len;
        else
            newoffset = -1;
    }
    return newoffset;
}

static void
folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h)
{
    h->parent = s->parts;
    s->parts = h;
}

static void
folder_pull_part(struct _header_scan_state *s)
{
    struct _header_scan_stack *h;

    h = s->parts;
    if (h) {
        s->parts = h->parent;
        g_free(h->boundary);
        header_raw_clear(&h->headers);
        header_content_type_unref(h->content_type);
        g_free(h);
    } else {
        g_warning("Header stack underflow!\n");
    }
}

static int
folder_scan_skip_line(struct _header_scan_state *s)
{
    int atleast = s->atleast;
    register char *inptr, *inend, c;
    int len;

    s->atleast = 1;

    while ( (len = folder_read(s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */
        inptr = s->inptr;
        inend = s->inend-1;

        c = -1;
        while (inptr<inend
               && (c = *inptr++)!='\n')
            ;

        s->inptr = inptr;

        if (c=='\n') {
            s->atleast = atleast;
            return 0;
        }
    }

    s->atleast = atleast;

    return -1;      /* not found */
}

static struct _header_scan_stack *
folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *lastone)
{
    struct _header_scan_stack *part;
    int len = s->atleast-2; /* make sure we dont access past the buffer */

    h(printf("checking boundary marker upto %d bytes\n", len));
    part = s->parts;
    while (part) {
        h(printf("  boundary: %s\n", part->boundary));
        h(printf("   against: '%.*s'\n", len, boundary));
        if (part->boundary
            && part->boundarylen <= len
            && memcmp(boundary, part->boundary, part->boundarylen)==0) {
            h(printf("matched boundary: %s\n", part->boundary));
            /* again, make sure we're in range */
            if (part->boundarylen <= len+2) {
                h(printf("checking lastone\n"));
                *lastone = (boundary[part->boundarylen]=='-'
                        && boundary[part->boundarylen+1]=='-');
            } else {
                h(printf("not enough room to check last one?\n"));
                *lastone = FALSE;
            }
            /*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/
            return part;
        }
        part = part->parent;
    }
    return NULL;
}

static struct _header_scan_stack *
folder_scan_header(struct _header_scan_state *s, int *lastone)
{
    int atleast = s->atleast;
    register char *inptr, *inend;
    char *start;
    int len;
    struct _header_scan_stack *part, *overpart = s->parts;
    struct _header_scan_stack *h;

    h(printf("scanning first bit\n"));

    h = g_malloc0(sizeof(*h));

    /* FIXME: this info should be cached ? */
    part = s->parts;
    s->atleast = 5;
    while (part) {
        if (part->boundary)
            s->atleast = MAX(s->atleast, part->boundarylen+2);
        part = part->parent;
    }
#if 0
    s->atleast = MAX(s->atleast, 5);
    if (s->parts)
        s->atleast = MAX(s->atleast, s->parts->boundarylen+2);
#endif

    *lastone = FALSE;
retry:

    while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
        inptr = s->inptr;
        inend = s->inend-s->atleast;
        start = inptr;

        while (inptr<=inend) {
            register int c=-1;
            /*printf("  '%.20s'\n", inptr);*/

            if (!s->midline
                && (part = folder_boundary_check(s, inptr, lastone))) {
                if ((s->outptr>s->outbuf) || (inptr-start))
                    goto header_truncated; /* may not actually be truncated */
                
                goto normal_exit;
            }

            /* goto next line */
            while (inptr<=inend && (c = *inptr++)!='\n')
                ;

            /* allocate/append - this wont get executed unless we have *huge* headers,
               and then probably only once */
            {
                register int headerlen = inptr-start;
                register int len = (s->outend - s->outbuf);
                char *outnew;
                
                if (headerlen >= len) {
                    len = (len+headerlen)*2+1;
                    outnew = g_realloc(s->outbuf, len);
                    s->outptr = s->outptr - s->outbuf + outnew;
                    s->outbuf = outnew;
                    s->outend = outnew + len;
                }
                memcpy(s->outptr, start, headerlen);
                s->outptr += headerlen;
            }

            h(printf("outbuf[0] = %02x '%c' oubuf[1] = %02x '%c'\n",
                 s->outbuf[0], isprint(s->outbuf[0])?s->outbuf[0]:'.',
                 s->outbuf[1], isprint(s->outbuf[1])?s->outbuf[1]:'.'));

            if (s->header_start == -1)
                s->header_start = (start-s->inbuf) + s->seek;

            if (c!='\n') {
                s->midline = TRUE;
            } else {
                if (!(inptr[0] == ' ' || inptr[0] == '\t')) {
                    if (s->outbuf[0] == '\n'
                        || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) {
                        goto header_done;
                    }

                    /* we always have at least _1_ char here ... */
                    if (s->outptr[-1] == '\n')
                        s->outptr--;
                    s->outptr[0] = 0;

                    d(printf("header %.10s at %d\n", s->outbuf, s->header_start));

                    header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
                    if (inptr[0]=='\n'
                        || (inptr[0] == '\r' && inptr[1]=='\n')) {
                        inptr++;
                        goto header_done;
                    }
                    s->outptr = s->outbuf;
                    s->header_start = -1;
                }
                s->midline = FALSE;
                start = inptr;
            }
        }
        s->inptr = inptr;
    }

    /* ok, we're at the end of the data, just make sure we're not missing out some small
       truncated header markers */
    if (overpart) {
        overpart = overpart->parent;
        while (overpart) {
            if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) {
                s->atleast = overpart->boundarylen+2;
                h(printf("Retrying next smaller part ...\n"));
                goto retry;
            }
            overpart = overpart->parent;
        }
    }

    if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
        start = s->inptr;
        inptr = s->inend;
        goto header_truncated;
    }

    s->atleast = atleast;

    return h;

header_truncated:

    {
        register int headerlen = inptr-start;
        register int len = (s->outend - s->outbuf);
        char *outnew;
        
        if (headerlen >= len) {
            len = (len+headerlen)*2+1;
            outnew = g_realloc(s->outbuf, len);
            s->outptr = s->outptr - s->outbuf + outnew;
            s->outbuf = outnew;
            s->outend = outnew + len;
        }
        memcpy(s->outptr, start, headerlen);
        s->outptr += headerlen;
    }
    if (s->outptr>s->outbuf && s->outptr[-1] == '\n')
        s->outptr--;
    s->outptr[0] = 0;

    if (s->header_start == -1)
        s->header_start = (start-s->inbuf) + s->seek;

    if (s->outbuf[0] == '\n'
        || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) {
        goto header_done;
    }

    header_raw_append_parse(&h->headers, s->outbuf, s->header_start);

header_done:
    part = s->parts;

    s->outptr = s->outbuf;
normal_exit:
    s->inptr = inptr;
    s->atleast = atleast;
    s->header_start = -1;
    return h;
}

static struct _header_scan_stack *
folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int *length)
{
    int atleast = s->atleast;
    register char *inptr, *inend;
    char *start;
    int len;
    struct _header_scan_stack *part, *overpart = s->parts;
    int already_packed = FALSE;

    /*printf("scanning content\n");*/

    /* FIXME: this info should be cached ? */
    part = s->parts;
    s->atleast = 5;
    while (part) {
        if (part->boundary) {
            c(printf("boundary: %s\n", part->boundary));
            s->atleast = MAX(s->atleast, part->boundarylen+2);
        }
        part = part->parent;
    }
/*  s->atleast = MAX(s->atleast, 5);*/
#if 0
    if (s->parts)
        s->atleast = MAX(s->atleast, s->parts->boundarylen+2);
#endif
    *lastone = FALSE;

retry:
    c(printf("atleast = %d\n", s->atleast));
    
    while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
        inptr = s->inptr;
        inend = s->inend-s->atleast;
        start = inptr;

        c(printf("inptr = %p, inend = %p\n", inptr, inend));

        while (inptr<=inend) {
            if (!s->midline
                && (part = folder_boundary_check(s, inptr, lastone))) {
                if ( (inptr-start) )
                    goto content;
                
                goto normal_exit;
            }
            /* goto the next line */
            while (inptr<=inend && (*inptr++)!='\n')
                ;
            
            s->midline = FALSE;
        }

        /* *sigh* so much for the beautiful simplicity of the code so far - here we
           have the snot to deal with the nasty end-cases that come from the read-ahead
           buffers we use */
        /* what this does, is if we are somewhere near the end of the buffer,
           force it to the front, and re-read, ensuring we bunch as much together
           as possible, for the final read, without copying too much of the time */
        /* make sure we dont loop forever, but also make sure we try smaller
           boundaries, if there are any, so we dont miss any. */
        /* this is not needed for the header scanner, since it copies its own
           data */
        c(printf("start offset = %d  atleast = %d\n", start-s->inbuf, s->atleast));
        if (start > (s->inbuf + s->atleast)) {
            /* force a re-scan of this data */
            s->inptr = start;
            if (already_packed)
                goto smaller_boundary;
            c(printf("near the end, try and bunch things up a bit first\n"));
            already_packed = TRUE;
        } else {
            c(printf("dumping what i've got ...\n"));
            /* what would be nice here, is if that we're at eof, we bunch the last
               little bit in the same content, but i dont think this is easy */
            goto content_mid;
        }
    }

    c(printf("length read = %d\n", len));
smaller_boundary:

    /* ok, we're at the end of the data, just make sure we're not missing out some small
       truncated header markers */
    if (overpart) {
        overpart = overpart->parent;
        while (overpart) {
            if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) {
                s->atleast = overpart->boundarylen+2;
                c(printf("Retrying next smaller part ...\n"));
                goto retry;
            }
            overpart = overpart->parent;
        }
    }

    if (s->inend > s->inptr) {
        start = s->inptr;
        inptr = s->inend;
        goto content;
    }

    *length = 0;
    s->atleast = atleast;
    return NULL;

content_mid:
    s->midline = TRUE;
content:
    part = s->parts;
normal_exit:
    s->atleast = atleast;
    s->inptr = inptr;

    *data = start;
    *length = inptr-start;

/*  printf("got %scontent: %.*s", s->midline?"partial ":"", inptr-start, start);*/

    return part;
}


static void
folder_scan_close(struct _header_scan_state *s)
{
    g_free(s->realbuf);
    g_free(s->outbuf);
    while (s->parts)
        folder_pull_part(s);
    if (s->fd != -1)
        close(s->fd);
    if (s->stream)
        gtk_object_unref((GtkObject *)s->stream);
    g_free(s);
}


static struct _header_scan_state *
folder_scan_init(void)
{
    struct _header_scan_state *s;

    s = g_malloc(sizeof(*s));

    s->fd = -1;
    s->stream = NULL;

    s->outbuf = g_malloc(1024);
    s->outptr = s->outbuf;
    s->outend = s->outbuf+1024;

    s->realbuf = g_malloc(SCAN_BUF + SCAN_HEAD*2);
    s->inbuf = s->realbuf + SCAN_HEAD;
    s->inptr = s->inbuf;
    s->inend = s->inbuf;
    s->atleast = 0;

    s->seek = 0;        /* current character position in file of the last read block */
    s->unstep = 0;

    s->header_start = -1;

    s->start_of_from = -1;
    s->start_of_headers = -1;

    s->midline = FALSE;
    s->scan_from = FALSE;

    s->filters = NULL;
    s->filterid = 1;

    s->parts = NULL;

    s->state = HSCAN_INITIAL;
    return s;
}

static int
folder_scan_init_with_fd(struct _header_scan_state *s, int fd)
{
    int len;

    len = read(fd, s->inbuf, SCAN_BUF);
    if (len>=0) {
        s->inend = s->inbuf+len;
        if (s->fd != -1)
            close(s->fd);
        s->fd = fd;
        if (s->stream) {
            gtk_object_unref((GtkObject *)s->stream);
            s->stream = NULL;
        }
        return 0;
    } else {
        return -1;
    }
}

static int
folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream)
{
    int len;

    len = camel_stream_read(stream, s->inbuf, SCAN_BUF);
    if (len>=0) {
        s->inend = s->inbuf+len;
        if (s->stream)
            gtk_object_unref((GtkObject *)s->stream);
        s->stream = stream;
        gtk_object_ref((GtkObject *)stream);
        if (s->fd != -1) {
            close(s->fd);
            s->fd = -1;
        }
        return 0;
    } else {
        return -1;
    }
}

#define USE_FROM

static void
folder_scan_step(struct _header_scan_state *s, char **databuffer, int *datalength)
{
    struct _header_scan_stack *h, *hb;
    const char *content;
    const char *bound;
    int type;
    int state;
    struct _header_content_type *ct = NULL;
    struct _header_scan_filter *f;
    size_t presize;

/*  printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/

tail_recurse:
    d({
        printf("\nSCAN STACK:\n");
        printf("  '%s' :\n", states[s->state]);
        hb = s->parts;
        while (hb) {
            printf("  '%s' : %s\n", states[hb->savestate], hb->boundary);
            hb = hb->parent;
        }
        printf("\n");
    });

    switch (s->state) {

    case HSCAN_INITIAL:
#ifdef USE_FROM
        if (s->scan_from) {
            /* FIXME: it would be nice not to have to allocate this every pass */
            h = g_malloc0(sizeof(*h));
            h->boundary = g_strdup("From ");
            h->boundarylen = strlen(h->boundary);
            folder_push_part(s, h);
            
            h = s->parts;
            do {
                hb = folder_scan_content(s, &state, databuffer, datalength);
            } while (hb==h && *datalength>0);
            
            if (*datalength==0 && hb==h) {
                d(printf("found 'From '\n"));
                s->start_of_from = folder_tell(s);
                folder_scan_skip_line(s);
                h->savestate = HSCAN_INITIAL;
                s->state = HSCAN_FROM;
            } else {
                folder_pull_part(s);
                s->state = HSCAN_EOF;
            }
            return;
        } else {
            s->start_of_from = -1;
        }

#endif
    case HSCAN_FROM:
        s->start_of_headers = folder_tell(s);
        h = folder_scan_header(s, &state);
#ifdef USE_FROM
        if (s->scan_from)
            h->savestate = HSCAN_FROM_END;
        else
#endif
            h->savestate = HSCAN_EOF;

        /* FIXME: should this check for MIME-Version: 1.0 as well? */

        type = HSCAN_HEADER;
        if ( (content = header_raw_find(&h->headers, "Content-Type", NULL))
             && (ct = header_content_type_decode(content))) {
            if (!strcasecmp(ct->type, "multipart")) {
                bound = header_content_type_param(ct, "boundary");
                if (bound) {
                    d(printf("multipart, boundary = %s\n", bound));
                    h->boundarylen = strlen(bound)+2;
                    h->boundary = g_malloc(h->boundarylen+3);
                    sprintf(h->boundary, "--%s--", bound);
                    type = HSCAN_MULTIPART;
                } else {
                    g_warning("Multipart with no boundary, treating as text/plain");
                }
            } else if (!strcasecmp(ct->type, "message")) {
                if (!strcasecmp(ct->subtype, "rfc822")
                    /*|| !strcasecmp(ct->subtype, "partial")*/) {
                    type = HSCAN_MESSAGE;
                }
            }
        }
        h->content_type = ct;
        folder_push_part(s, h);
        s->state = type;
        return;

    case HSCAN_HEADER:
        s->state = HSCAN_BODY;

    case HSCAN_BODY:
        h = s->parts;
        *datalength = 0;
        presize = SCAN_HEAD;
        f = s->filters;

        do {
            hb = folder_scan_content(s, &state, databuffer, datalength);
            if (*datalength>0) {
                d(printf("Content raw: '%.*s'\n", *datalength, *databuffer));

                while (f) {
                    camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize,
                                 databuffer, datalength, &presize);
                    f = f->next;
                }
                return;
            }
        } while (hb==h && *datalength>0);

        /* check for any filter completion data */
        while (f) {
            camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize,
                         databuffer, datalength, &presize);
            f = f->next;
        }
        if (*datalength > 0)
            return;

        s->state = HSCAN_BODY_END;
        break;

    case HSCAN_MULTIPART:
        h = s->parts;
        do {
            do {
                hb = folder_scan_content(s, &state, databuffer, datalength);
                if (*datalength>0) {
                    /* FIXME: needs a state to return this shit??? */
                    d(printf("Multipart Content: '%.*s'\n", *datalength, *databuffer));
                }
            } while (hb==h && *datalength>0);
            if (*datalength==0 && hb==h) {
                d(printf("got boundary: %s\n", hb->boundary));
                folder_scan_skip_line(s);
                if (!state) {
                    s->state = HSCAN_FROM;
                    folder_scan_step(s, databuffer, datalength);
                    s->parts->savestate = HSCAN_MULTIPART; /* set return state for the new head part */
                    return;
                }
            } else {
                break;
            }
        } while (1);

        s->state = HSCAN_MULTIPART_END;
        break;

    case HSCAN_MESSAGE:
        s->state = HSCAN_FROM;
        folder_scan_step(s, databuffer, datalength);
        s->parts->savestate = HSCAN_MESSAGE_END;
        break;

    case HSCAN_FROM_END:
    case HSCAN_BODY_END:
    case HSCAN_MULTIPART_END:
    case HSCAN_MESSAGE_END:
        s->state = s->parts->savestate;
        folder_pull_part(s);
        if (s->state & HSCAN_END)
            return;
        goto tail_recurse;

    case HSCAN_EOF:
        return;

    default:
        g_warning("Invalid state in camel-mime-parser: %d", s->state);
        break;
    }

    return;
}

#ifdef STANDALONE
int main(int argc, char **argv)
{
    int fd;
    struct _header_scan_state *s;
    char *data;
    int len;
    int state;
    char *name = "/tmp/evmail/Inbox";
    struct _header_scan_stack *h;
    int i;
    int attach = 0;

    if (argc==2)
        name = argv[1];

    printf("opening: %s", name);

    for (i=1;i<argc;i++) {
        const char *encoding = NULL, *charset = NULL;
        char *attachname;

        name = argv[i];
        printf("opening: %s", name);
        
        fd = open(name, O_RDONLY);
        if (fd==-1) {
            perror("Cannot open mailbox");
            exit(1);
        }
        s = folder_scan_init(fd);
        s->scan_from = FALSE;
#if 0
        h = g_malloc0(sizeof(*h));
        h->savestate = HSCAN_EOF;
        folder_push_part(s, h);
#endif  
        while (s->state != HSCAN_EOF) {
            folder_scan_step(s, &data, &len);
            printf("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]);
            switch (s->state) {
            case HSCAN_HEADER:
                if (s->parts->content_type
                    && (charset = header_content_type_param(s->parts->content_type, "charset"))) {
                    if (strcasecmp(charset, "us-ascii")) {
                        folder_push_filter_charset(s, "UTF-8", charset);
                    } else {
                        charset = NULL;
                    }
                } else {
                    charset = NULL;
                }

                encoding = header_raw_find(&s->parts->headers, "Content-transfer-encoding");
                printf("encoding = '%s'\n", encoding);
                if (encoding && !strncasecmp(encoding, " base64", 7)) {
                    printf("adding base64 filter\n");
                    attachname = g_strdup_printf("attach.%d.%d", i, attach++);
                    folder_push_filter_save(s, attachname);
                    g_free(attachname);
                    folder_push_filter_mime(s, 0);
                }
                if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) {
                    printf("adding quoted-printable filter\n");
                    attachname = g_strdup_printf("attach.%d.%d", i, attach++);
                    folder_push_filter_save(s, attachname);
                    g_free(attachname);
                    folder_push_filter_mime(s, 1);
                }

                break;
            case HSCAN_BODY:
                break;
            case HSCAN_BODY_END:
                if (encoding && !strncasecmp(encoding, " base64", 7)) {
                    printf("removing filters\n");
                    folder_filter_pull(s);
                    folder_filter_pull(s);
                }
                if (encoding && !strncasecmp(encoding, " quoted-printable", 17)) {
                    printf("removing filters\n");
                    folder_filter_pull(s);
                    folder_filter_pull(s);
                }
                if (charset) {
                    folder_filter_pull(s);
                    charset = NULL;
                }
                encoding = NULL;
                break;
            default:
                break;
            }
        }
        folder_scan_close(s);
        close(fd);
    }
    return 0;
}

#endif /* STANDALONE */