diff options
Diffstat (limited to 'camel/camel-mime-filter-html.c')
-rw-r--r-- | camel/camel-mime-filter-html.c | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/camel/camel-mime-filter-html.c b/camel/camel-mime-filter-html.c new file mode 100644 index 0000000000..06d0ef252b --- /dev/null +++ b/camel/camel-mime-filter-html.c @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2001 Ximian Inc. + * + * Authors: Michael Zucchi <notzed@helixcode.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "camel-mime-filter-html.h" + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> + + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> + +#include "xmlmemory.h" +#include "HTMLparser.h" +#include "HTMLtree.h" + +#define d(x) + +static void camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass); +static void camel_mime_filter_html_init (CamelObject *o); +static void camel_mime_filter_html_finalize (CamelObject *o); + +static CamelMimeFilterClass *camel_mime_filter_html_parent; + +struct _CamelMimeFilterHTMLPrivate { + htmlParserCtxtPtr ctxt; +}; + +/* ********************************************************************** */ + +/* HTML parser */ + +#define ARRAY_LEN(x) (sizeof(x)/sizeof((x)[0])) + +static struct { + char *element; + char *remap; +} map_start[] = { + { "p", "\n\n" }, + { "br", "\n" }, + { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" }, +}; + + +static struct { + char *element; + char *remap; +} map_end[] = { + { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" }, +}; + +static void +characters(void *ctx, const xmlChar *ch, int len) +{ + CamelMimeFilter *mf = ctx; + + memcpy(mf->outptr, ch, len); + mf->outptr+= len; +} + +#if 0 +/* we probably dont want to index comments */ +static void +comment(void *ctx, const xmlChar *value) +{ + CamelMimeFilter *mf = ctx; + + mf->outptr += sprintf(mf->outptr, " %s \n", value); +} +#endif + +/* we map element starts to stuff sometimes, so we can properly break up + words and lines. + This is very dumb, and needs to be smarter: e.g. + <b>F</b>\nooBar should -> "FooBar" +*/ +static void +startElement(void *ctx, const xmlChar *name, const xmlChar **atts) +{ + int i; + CamelMimeFilter *mf = ctx; + + /* we grab all "content" from "meta" tags, and dump it in the output, + it might be useful for searching with. This should probably be pickier */ + if (!strcasecmp(name, "meta")) { + if (atts) { + for (i=0;atts[i];i+=2) { + if (!strcmp(atts[i], "content")) + mf->outptr += sprintf(mf->outptr, " %s \n", atts[i+1]); + } + } + return; + } + + /* FIXME: use a hashtable */ + for (i=0;i<ARRAY_LEN(map_start);i++) { + if (!strcasecmp(map_start[i].element, name)) { + characters(ctx, map_start[i].remap, strlen(map_start[i].remap)); + break; + } + } +} + +static void +endElement(void *ctx, const xmlChar *name) +{ + int i; + + /* FIXME: use a hashtable */ + for (i=0;i<ARRAY_LEN(map_end);i++) { + if (!strcasecmp(map_end[i].element, name)) { + characters(ctx, map_end[i].remap, strlen(map_end[i].remap)); + break; + } + } +} + +/* dum de dum, well we can print out some crap for now */ +static void +warning(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.warning: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +static void +error(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.error: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +static void +fatalError(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.fatalError: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +static xmlSAXHandler indexSAXHandler = { + NULL, /* internalSubset */ + NULL, /*isStandalone,*/ + NULL, /*hasInternalSubset,*/ + NULL, /*hasExternalSubset,*/ + NULL, /*resolveEntity,*/ + NULL, /*getEntity,*/ + NULL, /*entityDecl,*/ + NULL, /*notationDecl,*/ + NULL, /*attributeDecl,*/ + NULL, /*elementDecl,*/ + NULL, /*unparsedEntityDecl,*/ + NULL, /*setDocumentLocator,*/ + NULL, /*startDocument,*/ + NULL, /*endDocument,*/ + startElement, + endElement, + NULL, /*reference,*/ + characters, + NULL, /*ignorableWhitespace,*/ + NULL, /*processingInstruction,*/ + NULL, /*comment,*/ + warning, + error, + fatalError, + NULL, /*getParameterEntity,*/ +}; + + +/* ********************************************************************** */ + + +CamelType +camel_mime_filter_html_get_type (void) +{ + static CamelType type = CAMEL_INVALID_TYPE; + + if (type == CAMEL_INVALID_TYPE) { + type = camel_type_register (camel_mime_filter_get_type (), "CamelMimeFilterHTML", + sizeof (CamelMimeFilterHTML), + sizeof (CamelMimeFilterHTMLClass), + (CamelObjectClassInitFunc) camel_mime_filter_html_class_init, + NULL, + (CamelObjectInitFunc) camel_mime_filter_html_init, + (CamelObjectFinalizeFunc) camel_mime_filter_html_finalize); + } + + return type; +} + +static void +camel_mime_filter_html_finalize(CamelObject *o) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)o; + + if (f->priv->ctxt) + htmlFreeParserCtxt(f->priv->ctxt); +} + +static void +camel_mime_filter_html_init (CamelObject *o) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)o; + + f->priv = g_malloc0(sizeof(*f->priv)); +} + +static void +complete(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; + + camel_mime_filter_set_size(mf, len*2+256, FALSE); + mf->outptr = mf->outbuf; + + d(printf("converting html end:\n%.*s\n", (int)len, in)); + + if (f->priv->ctxt == NULL) { + f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0); + len = 0; + } + + htmlParseChunk(f->priv->ctxt, in, len, 1); + + *out = mf->outbuf; + *outlenptr = mf->outptr - mf->outbuf; + *outprespace = mf->outbuf - mf->outreal; + + d(printf("converted html end:\n%.*s\n", (int)*outlenptr, *out)); +} + +static void +filter(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; + + camel_mime_filter_set_size(mf, len*2+16, FALSE); + mf->outptr = mf->outbuf; + + d(printf("converting html:\n%.*s\n", (int)len, in)); + + if (f->priv->ctxt == NULL) + f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0); + else + htmlParseChunk(f->priv->ctxt, in, len, 0); + + *out = mf->outbuf; + *outlenptr = mf->outptr - mf->outbuf; + *outprespace = mf->outbuf - mf->outreal; + + d(printf("converted html:\n%.*s\n", (int)*outlenptr, *out)); +} + +static void +reset(CamelMimeFilter *mf) +{ + CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; + + if (f->priv->ctxt != NULL) { + htmlFreeParserCtxt(f->priv->ctxt); + f->priv->ctxt = NULL; + } +} + +static void +camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass) +{ + CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass; + + camel_mime_filter_html_parent = CAMEL_MIME_FILTER_CLASS (camel_type_get_global_classfuncs (camel_mime_filter_get_type ())); + + filter_class->reset = reset; + filter_class->filter = filter; + filter_class->complete = complete; +} + +/** + * camel_mime_filter_html_new: + * + * Create a new CamelMimeFilterHTML object. + * + * Return value: A new CamelMimeFilterHTML widget. + **/ +CamelMimeFilterHTML * +camel_mime_filter_html_new (void) +{ + CamelMimeFilterHTML *new = CAMEL_MIME_FILTER_HTML ( camel_object_new (camel_mime_filter_html_get_type ())); + return new; +} + |