aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-mime-parser.c
diff options
context:
space:
mode:
authorNot Zed <NotZed@HelixCode.com>2000-11-08 17:13:52 +0800
committerMichael Zucci <zucchi@src.gnome.org>2000-11-08 17:13:52 +0800
commit2b97bd935b8d4becffb4df667a2ee6162c706465 (patch)
treedd75b0c2f60651e8910fda93a5910d6699f94ef7 /camel/camel-mime-parser.c
parent18c2d259b26f9a6d3d86e1b0a183750d4f482d72 (diff)
downloadgsoc2013-evolution-2b97bd935b8d4becffb4df667a2ee6162c706465.tar
gsoc2013-evolution-2b97bd935b8d4becffb4df667a2ee6162c706465.tar.gz
gsoc2013-evolution-2b97bd935b8d4becffb4df667a2ee6162c706465.tar.bz2
gsoc2013-evolution-2b97bd935b8d4becffb4df667a2ee6162c706465.tar.lz
gsoc2013-evolution-2b97bd935b8d4becffb4df667a2ee6162c706465.tar.xz
gsoc2013-evolution-2b97bd935b8d4becffb4df667a2ee6162c706465.tar.zst
gsoc2013-evolution-2b97bd935b8d4becffb4df667a2ee6162c706465.zip
Keep track of the caller bestenc flags that make sense.
2000-11-08 Not Zed <NotZed@HelixCode.com> * camel-mime-message.c (find_best_encoding): Keep track of the caller bestenc flags that make sense. * camel-mime-filter-bestenc.c (filter): Added code to detect when we have "^From " lines in the sequence of text. (camel_mime_filter_bestenc_get_best_encoding): Added a new flag CAMEL_BESTENC_NO_FROM: if set, it will not allow any lines matching "^From " to appear in the output - currently forcing base64 encoding to achieve this. * camel-mime-parser.c (folder_scan_step): Call camel_mime-filter_complete() once we're done, rather than filter_filter(). (folder_scan_content): Some fixes for state changing; so that when we do find another boundary it is properly flagged. Since we strip the last \n off all data, we must take that into account too. Sigh. Fixes a rather nasty set of bugs where multipart messages could start including following messages as parts, etc. (struct _header_scan_stack): Added new parameter, boundarylenfinal, which holds the length of the final boundary, if it is different (e.g. for From lines, whihc aren't) (folder_scan_step): Setup teh boundarylenfinal value when creating a new boundary. (folder_scan_content): Hmm, if we hit the end-of-buffer sentinal, reset the scanner back to leave 'atleast' chars in the buffer still, dump that content, and retry again. Stops us losing a check for a boundary on some data we haven't really looked at yet! (folder_scan_content): Use boundarylenfinal to calculate 'atleast'. (folder_scan_header): And here too. (folder_boundary_check): Use the atleast value directly, dont truncate it. Use the boundarylen/boundarylenfinal values directly too. (struct _header_scan_stack): Add an atleast parameter to cache the atleast info. (folder_push_part): Determine/set 'atleast', every time we add a new part. (folder_scan_header): Get the cached atleast info from the current part. (folder_scan_content): And here too. (folder_scan_header): Fix a problem where a part starting with " text" would be interpreted as a followon header wrongly. * camel-mime-filter-charset.c (complete): Add some assertions to find a bug. svn path=/trunk/; revision=6500
Diffstat (limited to 'camel/camel-mime-parser.c')
-rw-r--r--camel/camel-mime-parser.c237
1 files changed, 111 insertions, 126 deletions
diff --git a/camel/camel-mime-parser.c b/camel/camel-mime-parser.c
index efa808170e..6b68013401 100644
--- a/camel/camel-mime-parser.c
+++ b/camel/camel-mime-parser.c
@@ -260,7 +260,9 @@ struct _header_scan_stack {
int prestage; /* used to determine if it is a pre-boundary or post-boundary data segment */
char *boundary; /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
- int boundarylen; /* length of boundary, including leading -- */
+ int boundarylen; /* actual length of boundary, including leading -- if there is one */
+ int boundarylenfinal; /* length of boundary, including trailing -- if there is one */
+ int atleast; /* the biggest boundary from here to the parent */
};
struct _header_scan_filter {
@@ -946,6 +948,11 @@ folder_seek(struct _header_scan_state *s, off_t offset, int whence)
static void
folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h)
{
+ if (s->parts && s->parts->atleast > h->boundarylenfinal)
+ h->atleast = s->parts->atleast;
+ else
+ h->atleast = h->boundarylenfinal;
+
h->parent = s->parts;
s->parts = h;
}
@@ -1003,30 +1010,38 @@ folder_scan_skip_line(struct _header_scan_state *s)
s->atleast = atleast;
- return -1; /* not found */
-}
+ return -1; /* not found */
+ }
/* TODO: Is there any way to make this run faster? It gets called a lot ... */
static struct _header_scan_stack *
folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *lastone)
{
struct _header_scan_stack *part;
- int len = s->atleast-2; /* make sure we dont access past the buffer */
-
+ int len = s->atleast; /* make sure we dont access past the buffer */
+
h(printf("checking boundary marker upto %d bytes\n", len));
part = s->parts;
while (part) {
h(printf(" boundary: %s\n", part->boundary));
- h(printf(" against: '%.*s'\n", len, boundary));
+ h(printf(" against: '%.*s'\n", s->atleast, boundary));
if (part->boundary
&& part->boundarylen <= len
&& memcmp(boundary, part->boundary, part->boundarylen)==0) {
h(printf("matched boundary: %s\n", part->boundary));
/* again, make sure we're in range */
- if (part->boundarylen <= len+2) {
- h(printf("checking lastone\n"));
- *lastone = (boundary[part->boundarylen]=='-'
- && boundary[part->boundarylen+1]=='-');
+ if (part->boundarylenfinal <= len) {
+ int extra = part->boundarylenfinal - part->boundarylen;
+
+ /* check the extra stuff on an final boundary, normally -- for mime parts */
+ if (extra>0) {
+ *lastone = memcmp(&boundary[part->boundarylen],
+ &part->boundary[part->boundarylen],
+ extra) == 0;
+ } else {
+ *lastone = TRUE;
+ }
+ h(printf("checking lastone = %s\n", *lastone?"TRUE":"FALSE"));
} else {
h(printf("not enough room to check last one?\n"));
*lastone = FALSE;
@@ -1045,34 +1060,34 @@ header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h
{
struct _header_raw *l, *n;
char *content;
-
+
content = strchr(header, ':');
if (content) {
register int len;
n = mempool_alloc(h->pool, sizeof(*n));
n->next = NULL;
-
+
len = content-header;
n->name = mempool_alloc(h->pool, len+1);
memcpy(n->name, header, len);
n->name[len] = 0;
-
+
content++;
-
+
len = s->outptr - content;
n->value = mempool_alloc(h->pool, len+1);
memcpy(n->value, content, len);
n->value[len] = 0;
-
+
n->offset = offset;
-
+
l = (struct _header_raw *)&h->headers;
while (l->next) {
l = l->next;
}
l->next = n;
}
-
+
}
#define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c))
@@ -1119,37 +1134,28 @@ folder_scan_header(struct _header_scan_state *s, int *lastone)
h->pool = mempool_new(8192, 4096);
#endif
- /* FIXME: this info should be cached ? */
part = s->parts;
- s->atleast = 5;
- while (part) {
- if (part->boundary)
- s->atleast = MAX(s->atleast, part->boundarylen+2);
- part = part->parent;
- }
-#if 0
- s->atleast = MAX(s->atleast, 5);
- if (s->parts)
- s->atleast = MAX(s->atleast, s->parts->boundarylen+2);
-#endif
-
+ if (part)
+ s->atleast = part->atleast;
+ else
+ s->atleast = 1;
*lastone = FALSE;
retry:
-
+
while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
inptr = s->inptr;
inend = s->inend-s->atleast;
-
+
while (inptr<=inend) {
/*printf(" '%.20s'\n", inptr);*/
-
+
start = inptr;
-
+
if (!s->midline) {
if ((part = folder_boundary_check(s, inptr, lastone))) {
if ((s->outptr>s->outbuf) || (inptr-start))
goto header_truncated; /* may not actually be truncated */
-
+
goto normal_exit;
}
@@ -1164,15 +1170,15 @@ retry:
*start = ' ';
}
}
-
+
/* goto next line */
while ((*inptr++)!='\n')
;
-
+
g_assert(inptr<=s->inend+1);
-
+
header_append(s, start, inptr-1);
-
+
/* check against the real buffer end, not our 'atleast limited' end */
/* also make sure we have at least 1 char lookahead, so even if we found a \n at
the end, well, make out we didn't, and re-scan it next pass */
@@ -1182,7 +1188,7 @@ retry:
} else {
s->midline = FALSE;
}
-
+
h(printf("midline = %s\n", s->midline?"TRUE":"FALSE"));
h(printf("outbuf[0] = %02x '%c' oubuf[1] = %02x '%c'\n",
s->outbuf[0], isprint(s->outbuf[0])?s->outbuf[0]:'.',
@@ -1190,7 +1196,9 @@ retry:
h(printf("inptr[0] = %02x '%c' inptr[1] = %02x '%c'\n",
inptr[0], isprint(inptr[0])?inptr[0]:'.',
inptr[1], isprint(inptr[1])?inptr[1]:'.'));
-
+
+ /* this is wont handle no headers \n\n foobr - treats it as continuation */
+/*
if (!s->midline
&& !(inptr[0] == ' ' || inptr[0] == '\t')) {
h(printf("ok, checking\n"));
@@ -1200,23 +1208,34 @@ retry:
h(printf("header done?\n"));
goto header_done;
}
+*/
+ if (!s->midline) {
+ h(printf("ok, checking\n"));
+ if (s->outbuf == s->outptr
+ || s->outbuf[0] == '\n'
+ || (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) {
+ h(printf("header done?\n"));
+ goto header_done;
+ }
- /* we always have at least _1_ char here ... */
- if (s->outptr > s->outbuf && s->outptr[-1] == '\n')
- s->outptr--;
- s->outptr[0] = 0;
-
- d(printf("header '%.10s' at %d\n", s->outbuf, s->header_start));
+ if (!(inptr[0] == ' ' || inptr[0] == '\t')) {
+ /* we always have at least _1_ char here ... */
+ if (s->outptr > s->outbuf && s->outptr[-1] == '\n')
+ s->outptr--;
+ s->outptr[0] = 0;
- header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
+ d(printf("header '%.10s' at %d\n", s->outbuf, s->header_start));
+
+ header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
- if (inptr[0]=='\n'
- || (inptr[0] == '\r' && inptr[1]=='\n')) {
- inptr++;
- goto header_done;
+ if (inptr[0]=='\n'
+ || (inptr[0] == '\r' && inptr[1]=='\n')) {
+ inptr++;
+ goto header_done;
+ }
+ s->outptr = s->outbuf;
+ s->header_start = -1;
}
- s->outptr = s->outbuf;
- s->header_start = -1;
}
}
s->inptr = inptr;
@@ -1227,43 +1246,43 @@ retry:
if (overpart) {
overpart = overpart->parent;
while (overpart) {
- if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) {
- s->atleast = overpart->boundarylen+2;
+ if (overpart->boundary && (overpart->boundarylenfinal) < s->atleast) {
+ s->atleast = overpart->boundarylenfinal;
h(printf("Retrying next smaller part ...\n"));
goto retry;
}
overpart = overpart->parent;
}
}
-
+
if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
start = s->inptr;
inptr = s->inend;
goto header_truncated;
}
-
+
s->atleast = atleast;
-
+
return h;
-
+
header_truncated:
-
+
header_append(s, start, inptr);
-
+
if (s->outptr>s->outbuf && s->outptr[-1] == '\n')
s->outptr--;
s->outptr[0] = 0;
-
+
if (s->outbuf[0] == '\n'
|| (s->outbuf[0] == '\r' && s->outbuf[1]=='\n')) {
goto header_done;
}
-
+
header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
-
+
header_done:
part = s->parts;
-
+
s->outptr = s->outbuf;
normal_exit:
s->inptr = inptr;
@@ -1281,28 +1300,16 @@ folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, int
char *start;
int len;
struct _header_scan_stack *part, *overpart = s->parts;
- int already_packed = FALSE;
int onboundary = FALSE;
c(printf("scanning content\n"));
- /* FIXME: this info should be cached ? */
part = s->parts;
- s->atleast = 5;
- while (part) {
- if (part->boundary) {
- c(printf("boundary: %s\n", part->boundary));
- s->atleast = MAX(s->atleast, part->boundarylen+2);
- }
- part = part->parent;
- }
-/* s->atleast = MAX(s->atleast, 5);*/
-#if 0
- if (s->parts)
- s->atleast = MAX(s->atleast, s->parts->boundarylen+2);
-#endif
+ if (part)
+ s->atleast = part->atleast;
+ else
+ s->atleast = 1;
*lastone = FALSE;
-
retry:
c(printf("atleast = %d\n", s->atleast));
@@ -1316,65 +1323,43 @@ retry:
while (inptr<=inend) {
if (!s->midline
&& (part = folder_boundary_check(s, inptr, lastone))) {
- if ( (inptr-start) ) {
- onboundary = TRUE;
+ onboundary = TRUE;
+
+ /* since we truncate the boundary data, we need at least 1 char here spare,
+ to remain in the same state */
+ if ( (inptr-start) > 1)
goto content;
- }
-
- goto normal_exit;
- }
+
+ /* otherwise, jump to the state of the boundary we actually found */
+ goto normal_exit; }
/* goto the next line */
while ((*inptr++)!='\n')
;
- /* check against the real buffer end, not our 'atleast limited' end */
- if (inptr> s->inend) {
- inptr--;
+ /* check the sentinal, if we went past the atleast limit, and reset it to there */
+ if (inptr > inend+1) {
s->midline = TRUE;
+ inptr = inend+1;
} else {
s->midline = FALSE;
}
-
- g_assert(inptr<=s->inend);
}
- /* *sigh* so much for the beautiful simplicity of the code so far - here we
- have the snot to deal with the nasty end-cases that come from the read-ahead
- buffers we use */
- /* what this does, is if we are somewhere near the end of the buffer,
- force it to the front, and re-read, ensuring we bunch as much together
- as possible, for the final read, without copying too much of the time */
- /* make sure we dont loop forever, but also make sure we try smaller
- boundaries, if there are any, so we dont miss any. */
- /* this is not needed for the header scanner, since it copies its own
- data */
- c(printf("start offset = %d atleast = %d\n", start-s->inbuf, s->atleast));
- if (start > (s->inbuf + s->atleast)) {
- /* force a re-scan of this data */
- s->inptr = start;
- if (already_packed)
- goto smaller_boundary;
- c(printf("near the end, try and bunch things up a bit first\n"));
- already_packed = TRUE;
- } else {
- c(printf("dumping what i've got ...\n"));
- /* what would be nice here, is if that we're at eof, we bunch the last
- little bit in the same content, but i dont think this is easy */
- goto content_mid;
- }
+ c(printf("ran out of input, dumping what i have (%d) bytes midline = %s\n",
+ inptr-start, s->midline?"TRUE":"FALSE"));
+ goto content;
}
c(printf("length read = %d\n", len));
-smaller_boundary:
/* ok, we're at the end of the data, just make sure we're not missing out some small
truncated header markers */
if (overpart) {
overpart = overpart->parent;
while (overpart) {
- if (overpart->boundary && (overpart->boundarylen+2) < s->atleast) {
- s->atleast = overpart->boundarylen+2;
+ if (overpart->boundary && (overpart->boundarylenfinal) < s->atleast) {
+ s->atleast = overpart->boundarylenfinal;
c(printf("Retrying next smaller part ...\n"));
goto retry;
}
@@ -1392,8 +1377,6 @@ smaller_boundary:
s->atleast = atleast;
return NULL;
-content_mid:
- s->midline = TRUE;
content:
part = s->parts;
normal_exit:
@@ -1402,12 +1385,12 @@ normal_exit:
*data = start;
/* if we hit a boundary, we should not include the closing \n */
- if (onboundary)
+ if (onboundary && (inptr-start)>0)
*length = inptr-start-1;
else
*length = inptr-start;
-/* printf("got %scontent: %.*s", s->midline?"partial ":"", inptr-start, start);*/
+ /*printf("got %scontent: '%.*s'\n", s->midline?"partial ":"", inptr-start, start);*/
return part;
}
@@ -1559,6 +1542,7 @@ tail_recurse:
h = g_malloc0(sizeof(*h));
h->boundary = g_strdup("From ");
h->boundarylen = strlen(h->boundary);
+ h->boundarylenfinal = h->boundarylen;
folder_push_part(s, h);
h = s->parts;
@@ -1602,6 +1586,7 @@ tail_recurse:
if (bound) {
d(printf("multipart, boundary = %s\n", bound));
h->boundarylen = strlen(bound)+2;
+ h->boundarylenfinal = h->boundarylen+2;
h->boundary = g_malloc(h->boundarylen+3);
sprintf(h->boundary, "--%s--", bound);
type = HSCAN_MULTIPART;
@@ -1668,8 +1653,8 @@ tail_recurse:
/* check for any filter completion data */
if (*datalength > 0) {
while (f) {
- camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize,
- databuffer, datalength, &presize);
+ camel_mime_filter_complete(f->filter, *databuffer, *datalength, presize,
+ databuffer, datalength, &presize);
f = f->next;
}
}