diff options
Diffstat (limited to 'camel')
-rw-r--r-- | camel/ChangeLog | 23 | ||||
-rw-r--r-- | camel/Makefile.am | 22 | ||||
-rw-r--r-- | camel/camel-folder-summary.c | 66 | ||||
-rw-r--r-- | camel/camel-mime-parser.c | 220 | ||||
-rw-r--r-- | camel/camel-mime-parser.h | 3 | ||||
-rw-r--r-- | camel/camel-mime-utils.c | 72 | ||||
-rw-r--r-- | camel/providers/mbox/camel-mbox-folder.c | 2 |
7 files changed, 362 insertions, 46 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index ec34de00d5..d2f2b6a732 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,26 @@ +2000-05-04 NotZed <NotZed@HelixCode.com> + + * providers/mbox/camel-mbox-folder.c (summary_get_message_info): + Maxcount is minimum of the max and the requested count, not the + maximum :) + + * camel-mime-parser.c (folder_scan_content): Properly set midline, + so we dont falsely catch offset boundary markers (i.e. From inside + content). + (folder_read): Set a sentinal on the end of the read data (\n) so + we dont have to check the buffer boundary in the inner loop. + (mempool_*): New experimental memory management routines, speed + up simple structure parsing by about 25% ... not compiled in by + default. Something similar may be needed for camel-mime-utils to + address performance issues with g_malloc and friends. + + * camel-mime-utils.c: Added a macro w(x) used to wrap all warnings + about mime/rfc violations, so they can be turned off. + + * camel-folder-summary.c (summary_build_content_info): Step after + the end of a message ... + Turn into a stand-alone program for testing and profiling. + 2000-05-04 Dan Winship <danw@helixcode.com> * providers/pop3/camel-pop3-store.c (pop3_connect): Don't fall diff --git a/camel/Makefile.am b/camel/Makefile.am index 513cd25648..c3948235ed 100644 --- a/camel/Makefile.am +++ b/camel/Makefile.am @@ -38,7 +38,6 @@ libcamel_la_SOURCES = \ camel-exception.c \ camel-folder.c \ camel-folder-search.c \ - camel-folder-summary.c \ camel-medium.c \ camel-marshal-utils.c \ camel-mime-message.c \ @@ -76,13 +75,15 @@ libcamel_la_SOURCES = \ camel-internet-address.c \ $(pthread_SRC) +# camel-folder-summary.c \ +# camel-folder-summary.h \ + libcamelinclude_HEADERS = \ camel.h \ camel-data-wrapper.h \ camel-exception.h \ camel-folder.h \ camel-folder-search.h \ - camel-folder-summary.h \ camel-marshal-utils.h \ camel-medium.h \ camel-mime-message.h \ @@ -129,3 +130,20 @@ libcamel_la_LDFLAGS = -version-info 0:0:0 -rpath $(libdir) EXTRA_DIST = \ $(libcamel_extra_sources) \ README + +noinst_PROGRAMS = \ + camel-folder-summary + +camel_folder_summary_SOURCES = \ + camel-folder-summary.c + +camel_folder_summary_LDADD = \ + ../camel/libcamel.la \ + ../e-util/libeutil.la \ + ../libibex/libibex.la \ + $(GNOME_LIBDIR) \ + $(GNOMEUI_LIBS) \ + $(INTLLIBS) \ + $(PTHREAD_LIB) \ + $(EXTRA_GNOME_LIBS) + diff --git a/camel/camel-folder-summary.c b/camel/camel-folder-summary.c index c54100375f..bb3857ee39 100644 --- a/camel/camel-folder-summary.c +++ b/camel/camel-folder-summary.c @@ -1028,7 +1028,8 @@ summary_build_content_info(CamelFolderSummary *s, CamelMimeParser *mp) } else { g_error("Parsing failed: no content of a message?"); } - if (!(state == HSCAN_MESSAGE_END)) { + state = camel_mime_parser_step(mp, &buffer, &len); + if (state != HSCAN_MESSAGE_END) { g_error("Bad parser state: Expecing MESSAGE_END or MESSAGE_EOF, got: %d", state); camel_mime_parser_unstep(mp); } @@ -1039,3 +1040,66 @@ summary_build_content_info(CamelFolderSummary *s, CamelMimeParser *mp) return info; } + +#if 1 +int main(int argc, char **argv) +{ + CamelMimeParser *mp; + int fd; + CamelFolderSummary *s; + char *buffer; + int len; + extern int strdup_count, malloc_count, free_count; + + gtk_init(&argc, &argv); + +#if 0 + { + int i; + char *s; + char buf[1024]; + + for (i=0;i<434712;i++) { + memcpy(buf, " ", 50); + buf[50] = 0; +#if 0 + s = g_strdup(buf); + g_free(s); +#endif + } + return 0; + } +#endif + + if (argc < 2 ) { + printf("usage: %s mbox\n", argv[0]); + return 1; + } + + fd = open(argv[1], O_RDONLY); + + mp = camel_mime_parser_new(); + camel_mime_parser_scan_from(mp, TRUE); +/* camel_mime_parser_set_header_regex(mp, "^(content-[^:]*|subject|from|to|date):");*/ + camel_mime_parser_init_with_fd(mp, fd); + + s = camel_folder_summary_new(); + camel_folder_summary_set_build_content(s, TRUE); + + while (camel_mime_parser_step(mp, &buffer, &len) == HSCAN_FROM) { + /*printf("Parsing message ...\n");*/ + camel_folder_summary_add_from_parser(s, mp); + if (camel_mime_parser_step(mp, &buffer, &len) != HSCAN_FROM_END) { + g_warning("Uknown state encountered, excpecting %d, got %d\n", HSCAN_FROM_END, camel_mime_parser_state(mp)); + break; + } + } + printf("summarised %d messages\n", camel_folder_summary_count(s)); + + printf("g_strdup count = %d\n", strdup_count); + printf("g_malloc count = %d\n", malloc_count); + printf("g_free count = %d\n", free_count); + return 0; +} + +#endif diff --git a/camel/camel-mime-parser.c b/camel/camel-mime-parser.c index cf81eed25b..95ecc4d994 100644 --- a/camel/camel-mime-parser.c +++ b/camel/camel-mime-parser.c @@ -32,6 +32,8 @@ #include <unicode.h> +#include <regex.h> + #include <glib.h> #include "camel-mime-parser.h" #include "camel-mime-utils.h" @@ -44,6 +46,138 @@ #define c(x) #define d(x) +/*#define MEMPOOL*/ + +#if 0 +extern int strdup_count; +extern int malloc_count; +extern int free_count; + +#define g_strdup(x) (strdup_count++, g_strdup(x)) +#define g_malloc(x) (malloc_count++, g_malloc(x)) +#define g_free(x) (free_count++, g_free(x)) + +#endif + + +#ifdef MEMPOOL +typedef struct _MemPoolNode { + struct _MemPoolNode *next; + + int free; + char data[1]; +} MemPoolNode; + +typedef struct _MemPoolThresholdNode { + struct _MemPoolThresholdNode *next; + char data[1]; +} MemPoolThresholdNode; + +typedef struct _MemPool { + int blocksize; + int threshold; + struct _MemPoolNode *blocks; + struct _MemPoolThresholdNode *threshold_blocks; +} MemPool; + +MemPool *mempool_new(int blocksize, int threshold); +void *mempool_alloc(MemPool *pool, int size); +void mempool_flush(MemPool *pool, int freeall); +void mempool_free(MemPool *pool); + +MemPool *mempool_new(int blocksize, int threshold) +{ + MemPool *pool; + + pool = g_malloc(sizeof(*pool)); + if (threshold >= blocksize) + threshold = blocksize * 2 / 3; + pool->blocksize = blocksize; + pool->threshold = threshold; + pool->blocks = NULL; + pool->threshold_blocks = NULL; + return pool; +} + +void *mempool_alloc(MemPool *pool, int size) +{ + if (size>=pool->threshold) { + MemPoolThresholdNode *n; + + n = g_malloc(sizeof(*n) - sizeof(char) + size); + n->next = pool->threshold_blocks; + pool->threshold_blocks = n; + return &n->data[0]; + } else { + MemPoolNode *n; + + n = pool->blocks; + while (n) { + if (n->free >= size) { + n->free -= size; + return &n->data[n->free]; + } + n = n->next; + } + + n = g_malloc(sizeof(*n) - sizeof(char) + pool->blocksize); + n->next = pool->blocks; + pool->blocks = n; + n->free = pool->blocksize - size; + return &n->data[n->free]; + } +} + +void mempool_flush(MemPool *pool, int freeall) +{ + MemPoolThresholdNode *tn, *tw; + MemPoolNode *pw, *pn; + + tw = pool->threshold_blocks; + while (tw) { + tn = tw->next; + g_free(tw); + tw = tn; + } + pool->threshold_blocks = NULL; + + if (freeall) { + pw = pool->blocks; + while (pw) { + pn = pw->next; + g_free(pw); + pw = pn; + } + pool->blocks = NULL; + } else { + pw = pool->blocks; + while (pw) { + pw->free = pool->blocksize; + pw = pw->next; + } + } +} + +void mempool_free(MemPool *pool) +{ + if (pool) { + mempool_flush(pool, 1); + g_free(pool); + } +} +#endif + + + + + + + + + + + + #define SCAN_BUF 4096 /* size of read buffer */ #define SCAN_HEAD 128 /* headroom guaranteed to be before each read buffer */ @@ -103,6 +237,9 @@ struct _header_scan_stack { enum _header_state savestate; /* state at invocation of this part */ +#ifdef MEMPOOL + MemPool *pool; /* memory pool to keep track of headers/etc at this level */ +#endif struct _header_raw *headers; /* headers for this part */ struct _header_content_type *content_type; @@ -673,6 +810,8 @@ folder_read(struct _header_scan_state *s) r(printf("content = %d '%.*s'\n",s->inend - s->inptr, s->inend - s->inptr, s->inptr)); } r(printf("content = %d '%.*s'\n", s->inend - s->inptr, s->inend - s->inptr, s->inptr)); + /* set a sentinal, for the inner loops to check against */ + s->inend[0] = '\n'; return s->inend-s->inptr; } @@ -737,7 +876,11 @@ folder_pull_part(struct _header_scan_state *s) if (h) { s->parts = h->parent; g_free(h->boundary); +#ifdef MEMPOOL + mempool_free(h->pool); +#else header_raw_clear(&h->headers); +#endif header_content_type_unref(h->content_type); g_free(h); } else { @@ -809,6 +952,48 @@ folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *l return NULL; } +#ifdef MEMPOOL +static void +header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h, char *header, int offset) +{ + struct _header_raw *l, *n; + char *content; + + d(printf("Header: %s: %s\n", name, value)); + + content = strchr(header, ':'); + if (content) { + register int len; + n = mempool_alloc(h->pool, sizeof(*n)); + n->next = NULL; + + len = content-header; + n->name = mempool_alloc(h->pool, len+1); + memcpy(n->name, header, len); + n->name[len] = 0; + + content++; + + len = s->outptr - content; + n->value = mempool_alloc(h->pool, len+1); + memcpy(n->value, content, len); + n->value[len] = 0; + + n->offset = offset; + + l = (struct _header_raw *)&h->headers; + while (l->next) { + l = l->next; + } + l->next = n; + } + +} + +#define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c)) + +#endif + /* Copy the string start->inptr into the header buffer (s->outbuf), grow if necessary and track the start offset of the header */ @@ -835,15 +1020,18 @@ static struct _header_scan_stack * folder_scan_header(struct _header_scan_state *s, int *lastone) { int atleast = s->atleast; - register char *inptr, *inend; char *start; int len; struct _header_scan_stack *part, *overpart = s->parts; struct _header_scan_stack *h; + register char *inptr, *inend; h(printf("scanning first bit\n")); h = g_malloc0(sizeof(*h)); +#ifdef MEMPOOL + h->pool = mempool_new(8192, 4096); +#endif /* FIXME: this info should be cached ? */ part = s->parts; @@ -868,7 +1056,6 @@ retry: start = inptr; while (inptr<=inend) { - register int c=-1; /*printf(" '%.20s'\n", inptr);*/ if (!s->midline @@ -880,18 +1067,24 @@ retry: } /* goto next line */ - while (inptr<=inend && (c = *inptr++)!='\n') + while ((*inptr++)!='\n') ; + /* check against the real buffer end, not our 'atleast limited' end */ + if (inptr>= s->inend) { + inptr--; + s->midline = TRUE; + } else { + s->midline = FALSE; + } + header_append(s, start, inptr); h(printf("outbuf[0] = %02x '%c' oubuf[1] = %02x '%c'\n", s->outbuf[0], isprint(s->outbuf[0])?s->outbuf[0]:'.', s->outbuf[1], isprint(s->outbuf[1])?s->outbuf[1]:'.')); - if (c!='\n') { - s->midline = TRUE; - } else { + if (!s->midline) { if (!(inptr[0] == ' ' || inptr[0] == '\t')) { if (s->outbuf[0] == '\n' || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) { @@ -906,6 +1099,7 @@ retry: d(printf("header %.10s at %d\n", s->outbuf, s->header_start)); header_raw_append_parse(&h->headers, s->outbuf, s->header_start); + if (inptr[0]=='\n' || (inptr[0] == '\r' && inptr[1]=='\n')) { inptr++; @@ -914,7 +1108,6 @@ retry: s->outptr = s->outbuf; s->header_start = -1; } - s->midline = FALSE; start = inptr; } } @@ -1018,11 +1211,17 @@ retry: goto normal_exit; } + /* goto the next line */ - while (inptr<=inend && (*inptr++)!='\n') + while ((*inptr++)!='\n') ; - - s->midline = FALSE; + /* check against the real buffer end, not our 'atleast limited' end */ + if (inptr>= s->inend) { + inptr--; + s->midline = TRUE; + } else { + s->midline = FALSE; + } } /* *sigh* so much for the beautiful simplicity of the code so far - here we @@ -1517,3 +1716,4 @@ int main(int argc, char **argv) } #endif /* STANDALONE */ + diff --git a/camel/camel-mime-parser.h b/camel/camel-mime-parser.h index cbc7b6095e..1a84cefa28 100644 --- a/camel/camel-mime-parser.h +++ b/camel/camel-mime-parser.h @@ -86,6 +86,9 @@ int camel_mime_parser_fd(CamelMimeParser *m); /* scan 'From' separators? */ void camel_mime_parser_scan_from(CamelMimeParser *, int); +/* what headers to save, MUST include ^Content-Type: */ +int camel_mime_parser_set_header_regex(CamelMimeParser *m, char *matchstr); + /* normal interface */ enum _header_state camel_mime_parser_step(CamelMimeParser *, char **, int *); void camel_mime_parser_unstep(CamelMimeParser *); diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c index 3f4c4d99d3..ef5d038a52 100644 --- a/camel/camel-mime-utils.c +++ b/camel/camel-mime-utils.c @@ -38,6 +38,17 @@ #include "camel-mime-utils.h" +int strdup_count = 0; +int malloc_count = 0; +int free_count = 0; + +#define g_strdup(x) (strdup_count++, g_strdup(x)) +#define g_malloc(x) (malloc_count++, g_malloc(x)) +#define g_free(x) (free_count++, g_free(x)) + +/* for all warnings ... */ +#define w(x) + #define d(x) #define d2(x) @@ -720,7 +731,7 @@ rfc2047_decode_word(const char *in, int len) decoded = g_strdup(outbase); } } else { - g_warning("Cannot decode charset, header display may be corrupt: %s: %s", encname, strerror(errno)); + w(g_warning("Cannot decode charset, header display may be corrupt: %s: %s", encname, strerror(errno))); /* TODO: Should this do this, or just leave the encoded strings? */ decword[inlen] = 0; decoded = g_strdup(decword); @@ -799,7 +810,7 @@ char *rfc2047_encode_word(const char *in, int len, char *type) char *buffer, *out, *ascii; size_t inlen, outlen, enclen; - printf("Converting '%.*s' to %s\n", len, in, type); + d(printf("Converting '%.*s' to %s\n", len, in, type)); /* convert utf8->encoding */ outlen = len*6; @@ -815,7 +826,7 @@ char *rfc2047_encode_word(const char *in, int len, char *type) type = "UTF-8"; } else { if (unicode_iconv(ic, &in, &inlen, &out, &outlen) == -1) { - g_warning("Conversion problem: conversion truncated: %s", strerror(errno)); + w(g_warning("Conversion problem: conversion truncated: %s", strerror(errno))); } unicode_iconv_close(ic); } @@ -829,7 +840,7 @@ char *rfc2047_encode_word(const char *in, int len, char *type) out += quoted_encode(buffer, enclen, out); sprintf(out, "?="); - printf("converted = %s\n", ascii); + d(printf("converted = %s\n", ascii)); return g_strdup(ascii); } @@ -868,7 +879,7 @@ header_encode_string(const unsigned char *in) const char *newinptr; newinptr = unicode_get_utf8(inptr, &c); if (newinptr == NULL) { - g_warning("Invalid UTF-8 sequence encountered (pos %d, char '%c'): %s", (inptr-in), inptr[0], in); + w(g_warning("Invalid UTF-8 sequence encountered (pos %d, char '%c'): %s", (inptr-in), inptr[0], in)); inptr++; continue; } @@ -1221,14 +1232,14 @@ header_decode_domain(const char **in) g_string_append(domain, " ]"); inptr++; } else { - g_warning("closing ']' not found in domain: %s", *in); + w(g_warning("closing ']' not found in domain: %s", *in)); } } else { char *a = header_decode_atom(&inptr); if (a) { g_string_append(domain, a); } else { - g_warning("missing atom from domain-ref"); + w(g_warning("missing atom from domain-ref")); break; } } @@ -1270,7 +1281,7 @@ header_decode_addrspec(const char **in) g_string_append(addr, word); header_decode_lwsp(&inptr); } else { - g_warning("Invalid address spec: %s", *in); + w(g_warning("Invalid address spec: %s", *in)); } } if (*inptr == '@') { @@ -1280,13 +1291,13 @@ header_decode_addrspec(const char **in) if (word) { g_string_append(addr, word); } else { - g_warning("Invalid address, missing domain: %s", *in); + w(g_warning("Invalid address, missing domain: %s", *in)); } } else { - g_warning("Invalid addr-spec, missing @: %s", *in); + w(g_warning("Invalid addr-spec, missing @: %s", *in)); } } else { - g_warning("invalid addr-spec, no local part"); + w(g_warning("invalid addr-spec, no local part")); } /* FIXME: return null on error? */ @@ -1357,20 +1368,20 @@ header_decode_mailbox(const char **in) if (*inptr == ':') { inptr++; } else { - g_warning("broken route-address, missing ':': %s", *in); + w(g_warning("broken route-address, missing ':': %s", *in)); } } pre = header_decode_word(&inptr); header_decode_lwsp(&inptr); } else { - g_warning("broken address? %s", *in); + w(g_warning("broken address? %s", *in)); } } if (pre) { g_string_append(addr, pre); } else { - g_warning("No local-part for email address: %s", *in); + w(g_warning("No local-part for email address: %s", *in)); } /* should be at word '.' localpart */ @@ -1393,7 +1404,7 @@ header_decode_mailbox(const char **in) dom = header_decode_domain(&inptr); g_string_append(addr, dom); } else { - g_warning("invalid address, no '@' domain part at %c: %s", *inptr, *in); + w(g_warning("invalid address, no '@' domain part at %c: %s", *inptr, *in)); } if (closeme) { @@ -1401,7 +1412,7 @@ header_decode_mailbox(const char **in) if (*inptr == '>') { inptr++; } else { - g_warning("invalid route address, no closing '>': %s", *in); + w(g_warning("invalid route address, no closing '>': %s", *in)); } } else if (name == NULL) { /* check for comment after address */ char *text, *tmp; @@ -1469,7 +1480,7 @@ header_decode_address(const char **in) if (*inptr == ';') { inptr++; } else { - g_warning("Invalid group spec, missing closing ';': %s", *in); + w(g_warning("Invalid group spec, missing closing ';': %s", *in)); } } else { inptr++; @@ -1505,18 +1516,15 @@ header_msgid_decode(const char *in) if (*inptr == '>') { inptr++; } else { - g_warning("Missing closing '>' on message id: %s", in); + w(g_warning("Missing closing '>' on message id: %s", in)); } } else { - g_warning("Cannot find message id in: %s", in); + w(g_warning("Cannot find message id in: %s", in)); } } else { - g_warning("missing opening '<' on message id: %s", in); + w(g_warning("missing opening '<' on message id: %s", in)); } - if (msgid) { - d(printf("Got message id: %s\n", msgid)); - } return msgid; } @@ -1555,11 +1563,11 @@ header_address_decode(const char *in) } while (inptr != last); if (*inptr) { - g_warning("Invalid input detected at %c (%d): %s\n or at: %s", *inptr, inptr-in, in, inptr); + w(g_warning("Invalid input detected at %c (%d): %s\n or at: %s", *inptr, inptr-in, in, inptr)); } if (inptr == last) { - g_warning("detected invalid input loop at : %s", last); + w(g_warning("detected invalid input loop at : %s", last)); } return list; @@ -1661,11 +1669,11 @@ header_content_type_decode(const char *in) subtype = decode_token(&inptr); } if (subtype == NULL && (!strcasecmp(type, "text"))) { - g_warning("text type with no subtype, resorting to text/plain: %s", in); + w(g_warning("text type with no subtype, resorting to text/plain: %s", in)); subtype = g_strdup("plain"); } if (subtype == NULL) { - g_warning("MIME type with no subtype: %s", in); + w(g_warning("MIME type with no subtype: %s", in)); } t = header_content_type_new(type, subtype); @@ -1710,9 +1718,9 @@ header_content_type_format(struct _header_content_type *ct) out = g_string_new(""); if (ct->type == NULL) { g_string_sprintfa(out, "text/plain"); - g_warning("Content-Type with no main type"); + w(g_warning("Content-Type with no main type")); } else if (ct->subtype == NULL) { - g_warning("Content-Type with no sub type: %s", ct->type); + w(g_warning("Content-Type with no sub type: %s", ct->type)); if (!strcasecmp(ct->type, "multipart")) g_string_sprintfa(out, "%s/mixed", ct->type); else @@ -1747,7 +1755,7 @@ CamelMimeDisposition *header_disposition_decode(const char *in) d->refcount = 1; d->disposition = decode_token(&inptr); if (d->disposition == NULL) - g_warning("Empty disposition type"); + w(g_warning("Empty disposition type")); d->params = header_param_list_decode(&inptr); return d; } @@ -1872,7 +1880,7 @@ header_decode_date(const char *in, int *saveoffset) if (*inptr == ',') inptr++; else - g_warning("day not followed by ','"); + w(g_warning("day not followed by ','")); } } tm.tm_mday = header_decode_int(&inptr); @@ -1975,7 +1983,7 @@ check_header(struct _header_raw *h) p = h->value; while (p && *p) { if (!isascii(*p)) { - g_warning("Appending header violates rfc: %s: %s", h->name, h->value); + w(g_warning("Appending header violates rfc: %s: %s", h->name, h->value)); return; } p++; diff --git a/camel/providers/mbox/camel-mbox-folder.c b/camel/providers/mbox/camel-mbox-folder.c index 7a0151f515..bc1913cb0d 100644 --- a/camel/providers/mbox/camel-mbox-folder.c +++ b/camel/providers/mbox/camel-mbox-folder.c @@ -934,7 +934,7 @@ GPtrArray *summary_get_message_info (CamelFolder *folder, int first, int count) CamelMboxFolder *mbox_folder = (CamelMboxFolder *)folder; maxcount = camel_mbox_summary_message_count(mbox_folder->summary); - maxcount = MAX(count, maxcount); + maxcount = MIN(count, maxcount); for (i=first;i<maxcount;i++) g_ptr_array_add(array, g_ptr_array_index(mbox_folder->summary->messages, i)); |