diff options
author | Not Zed <NotZed@Ximian.com> | 2001-03-14 15:06:54 +0800 |
---|---|---|
committer | Michael Zucci <zucchi@src.gnome.org> | 2001-03-14 15:06:54 +0800 |
commit | 0e9c0268a8ae99b88d7b98539181c0e938e5cb53 (patch) | |
tree | 448e5a9d7b6797e4a396c942a8370c5db2a99540 /camel/camel-mime-filter-html.c | |
parent | 83b4aa2b1d586cd9014394343b303d71e31ad3b8 (diff) | |
download | gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.gz gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.bz2 gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.lz gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.xz gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.zst gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.zip |
re-constify inbuf, to remove a warning.
2001-03-14 Not Zed <NotZed@Ximian.com>
* camel-mime-filter-charset.c (filter, complete): re-constify
inbuf, to remove a warning.
* camel-mime-parser.c (folder_scan_step): When we're out of data,
run the filter_complete. For some reason the logic that was there
was never being run, always try it now, i think it was to work
around a buggy filter, rather than fix it the right way.
* camel-folder-summary.c (summary_build_content_info): If indexing
html parts, use the html filter to convert it to some indexable
format.
(summary_build_content_info): Reset the filters before adding them
back to the stream, if they get re-used in a given instance
(likely).
* Makefile.am (libcamelinclude_HEADERS): Added
camel-mime-filter-html.[ch].
(INCLUDES): Added xml clags
2001-03-05 Not Zed <NotZed@Ximian.com>
* camel-folder-search.c (camel_folder_search_class_init): Setup a
new function, "uid" which matches uids.
(search_uid): Implement the "match uid" command.
svn path=/trunk/; revision=8705
Diffstat (limited to 'camel/camel-mime-filter-html.c')
-rw-r--r-- | camel/camel-mime-filter-html.c | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/camel/camel-mime-filter-html.c b/camel/camel-mime-filter-html.c new file mode 100644 index 0000000000..06d0ef252b --- /dev/null +++ b/camel/camel-mime-filter-html.c @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2001 Ximian Inc. + * + * Authors: Michael Zucchi <notzed@helixcode.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "camel-mime-filter-html.h" + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> + + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> + +#include "xmlmemory.h" +#include "HTMLparser.h" +#include "HTMLtree.h" + +#define d(x) + +static void camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass); +static void camel_mime_filter_html_init (CamelObject *o); +static void camel_mime_filter_html_finalize (CamelObject *o); + +static CamelMimeFilterClass *camel_mime_filter_html_parent; + +struct _CamelMimeFilterHTMLPrivate { + htmlParserCtxtPtr ctxt; +}; + +/* ********************************************************************** */ + +/* HTML parser */ + +#define ARRAY_LEN(x) (sizeof(x)/sizeof((x)[0])) + +static struct { + char *element; + char *remap; +} map_start[] = { + { "p", "\n\n" }, + { "br", "\n" }, + { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" }, +}; + + +static struct { + char *element; + char *remap; +} map_end[] = { + { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" }, +}; + +static void +characters(void *ctx, const xmlChar *ch, int len) +{ + CamelMimeFilter *mf = ctx; + + memcpy(mf->outptr, ch, len); + mf->outptr+= len; +} + +#if 0 +/* we probably dont want to index comments */ +static void +comment(void *ctx, const xmlChar *value) +{ + CamelMimeFilter *mf = ctx; + + mf->outptr += sprintf(mf->outptr, " %s \n", value); +} +#endif + +/* we map element starts to stuff sometimes, so we can properly break up + words and lines. + This is very dumb, and needs to be smarter: e.g. + <b>F</b>\nooBar should -> "FooBar" +*/ +static void +startElement(void *ctx, const xmlChar *name, const xmlChar **atts) +{ + int i; + CamelMimeFilter *mf = ctx; + + /* we grab all "content" from "meta" tags, and dump it in the output, + it might be useful for searching with. This should probably be pickier */ + if (!strcasecmp(name, "meta")) { + if (atts) { + for (i=0;atts[i];i+=2) { + if (!strcmp(atts[i], "content")) + mf->outptr += sprintf(mf->outptr, " %s \n", atts[i+1]); + } + } + return; + } + + /* FIXME: use a hashtable */ + for (i=0;i<ARRAY_LEN(map_start);i++) { + if (!strcasecmp(map_start[i].element, name)) { + characters(ctx, map_start[i].remap, strlen(map_start[i].remap)); + break; + } + } +} + +static void +endElement(void *ctx, const xmlChar *name) +{ + int i; + + /* FIXME: use a hashtable */ + for (i=0;i<ARRAY_LEN(map_end);i++) { + if (!strcasecmp(map_end[i].element, name)) { + characters(ctx, map_end[i].remap, strlen(map_end[i].remap)); + break; + } + } +} + +/* dum de dum, well we can print out some crap for now */ +static void +warning(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.warning: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +static void +error(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.error: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +static void +fatalError(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.fatalError: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +static xmlSAXHandler indexSAXHandler = { + NULL, /* internalSubset */ + NULL, /*isStandalone,*/ + NULL, /*hasInternalSubset,*/ + NULL, /*hasExternalSubset,*/ + NULL, /*resolveEntity,*/ + NULL, /*getEntity,*/ + NULL, /*entityDecl,*/ + NULL, /*notationDecl,*/ + NULL, /*attributeDecl,*/ + NULL, /*elementDecl,*/ + NULL, /*unparsedEntityDecl,*/ + NULL, /*setDocumentLocator,*/ + NULL, /*startDocument,*/ + NULL, /*endDocument,*/ + startElement, + endElement, + NULL, /*reference,*/ + characters, + NULL, /*ignorableWhitespace,*/ + NULL, /*processingInstruction,*/ + NULL, /*comment,*/ + warning, + error, + fatalError, + NULL, /*getParameterEntity,*/ +}; + + +/* ********************************************************************** */ + + +CamelType +camel_mime_filter_html_get_type (void) +{ + static CamelType type = CAMEL_INVALID_TYPE; + + if (type == CAMEL_INVALID_TYPE) { + type = camel_type_register (camel_mime_filter_get_type (), "CamelMimeFilterHTML", + sizeof (CamelMimeFilterHTML), + sizeof (CamelMimeFilterHTMLClass), + (CamelObjectClassInitFunc) camel_mime_filter_html_class_init, + NULL, + (CamelObjectInitFunc) camel_mime_filter_html_init, + (CamelObjectFinalizeFunc) camel_mime_filter_html_finalize); + } + + return type; +} + +static void +camel_mime_filter_html_finalize(CamelObject *o) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)o; + + if (f->priv->ctxt) + htmlFreeParserCtxt(f->priv->ctxt); +} + +static void +camel_mime_filter_html_init (CamelObject *o) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)o; + + f->priv = g_malloc0(sizeof(*f->priv)); +} + +static void +complete(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; + + camel_mime_filter_set_size(mf, len*2+256, FALSE); + mf->outptr = mf->outbuf; + + d(printf("converting html end:\n%.*s\n", (int)len, in)); + + if (f->priv->ctxt == NULL) { + f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0); + len = 0; + } + + htmlParseChunk(f->priv->ctxt, in, len, 1); + + *out = mf->outbuf; + *outlenptr = mf->outptr - mf->outbuf; + *outprespace = mf->outbuf - mf->outreal; + + d(printf("converted html end:\n%.*s\n", (int)*outlenptr, *out)); +} + +static void +filter(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; + + camel_mime_filter_set_size(mf, len*2+16, FALSE); + mf->outptr = mf->outbuf; + + d(printf("converting html:\n%.*s\n", (int)len, in)); + + if (f->priv->ctxt == NULL) + f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0); + else + htmlParseChunk(f->priv->ctxt, in, len, 0); + + *out = mf->outbuf; + *outlenptr = mf->outptr - mf->outbuf; + *outprespace = mf->outbuf - mf->outreal; + + d(printf("converted html:\n%.*s\n", (int)*outlenptr, *out)); +} + +static void +reset(CamelMimeFilter *mf) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; + + if (f->priv->ctxt != NULL) { + htmlFreeParserCtxt(f->priv->ctxt); + f->priv->ctxt = NULL; + } +} + +static void +camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass) +{ + CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass; + + camel_mime_filter_html_parent = CAMEL_MIME_FILTER_CLASS (camel_type_get_global_classfuncs (camel_mime_filter_get_type ())); + + filter_class->reset = reset; + filter_class->filter = filter; + filter_class->complete = complete; +} + +/** + * camel_mime_filter_html_new: + * + * Create a new CamelMimeFilterHTML object. + * + * Return value: A new CamelMimeFilterHTML widget. + **/ +CamelMimeFilterHTML * +camel_mime_filter_html_new (void) +{ + CamelMimeFilterHTML *new = CAMEL_MIME_FILTER_HTML ( camel_object_new (camel_mime_filter_html_get_type ())); + return new; +} + |