/* * Copyright (C) 2001 Ximian Inc. * * Authors: Michael Zucchi * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public License * as published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "camel-mime-filter-html.h" #include #include #include #include #include #include #include #include #include "xmlmemory.h" #include "HTMLparser.h" #include "HTMLtree.h" #define d(x) static void camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass); static void camel_mime_filter_html_init (CamelObject *o); static void camel_mime_filter_html_finalize (CamelObject *o); static CamelMimeFilterClass *camel_mime_filter_html_parent; struct _CamelMimeFilterHTMLPrivate { htmlParserCtxtPtr ctxt; }; /* ********************************************************************** */ /* HTML parser */ #define ARRAY_LEN(x) (sizeof(x)/sizeof((x)[0])) static struct { char *element; char *remap; } map_start[] = { { "p", "\n\n" }, { "br", "\n" }, { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" }, }; static struct { char *element; char *remap; } map_end[] = { { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" }, }; static void characters(void *ctx, const xmlChar *ch, int len) { CamelMimeFilter *mf = ctx; memcpy(mf->outptr, ch, len); mf->outptr+= len; } #if 0 /* we probably dont want to index comments */ static void comment(void *ctx, const xmlChar *value) { CamelMimeFilter *mf = ctx; mf->outptr += sprintf(mf->outptr, " %s \n", value); } #endif /* we map element starts to stuff sometimes, so we can properly break up words and lines. This is very dumb, and needs to be smarter: e.g. F\nooBar should -> "FooBar" */ static void startElement(void *ctx, const xmlChar *name, const xmlChar **atts) { int i; CamelMimeFilter *mf = ctx; /* we grab all "content" from "meta" tags, and dump it in the output, it might be useful for searching with. This should probably be pickier */ if (!strcasecmp(name, "meta")) { if (atts) { for (i=0;atts[i];i+=2) { if (!strcmp(atts[i], "content")) mf->outptr += sprintf(mf->outptr, " %s \n", atts[i+1]); } } return; } /* FIXME: use a hashtable */ for (i=0;ipriv->ctxt) htmlFreeParserCtxt(f->priv->ctxt); } static void camel_mime_filter_html_init (CamelObject *o) { CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)o; f->priv = g_malloc0(sizeof(*f->priv)); } static void complete(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace) { CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; camel_mime_filter_set_size(mf, len*2+256, FALSE); mf->outptr = mf->outbuf; d(printf("converting html end:\n%.*s\n", (int)len, in)); if (f->priv->ctxt == NULL) { f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0); len = 0; } htmlParseChunk(f->priv->ctxt, in, len, 1); *out = mf->outbuf; *outlenptr = mf->outptr - mf->outbuf; *outprespace = mf->outbuf - mf->outreal; d(printf("converted html end:\n%.*s\n", (int)*outlenptr, *out)); } static void filter(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace) { CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; camel_mime_filter_set_size(mf, len*2+16, FALSE); mf->outptr = mf->outbuf; d(printf("converting html:\n%.*s\n", (int)len, in)); if (f->priv->ctxt == NULL) f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0); else htmlParseChunk(f->priv->ctxt, in, len, 0); *out = mf->outbuf; *outlenptr = mf->outptr - mf->outbuf; *outprespace = mf->outbuf - mf->outreal; d(printf("converted html:\n%.*s\n", (int)*outlenptr, *out)); } static void reset(CamelMimeFilter *mf) { CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf; if (f->priv->ctxt != NULL) { htmlFreeParserCtxt(f->priv->ctxt); f->priv->ctxt = NULL; } } static void camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass) { CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass; camel_mime_filter_html_parent = CAMEL_MIME_FILTER_CLASS (camel_type_get_global_classfuncs (camel_mime_filter_get_type ())); filter_class->reset = reset; filter_class->filter = filter; filter_class->complete = complete; } /** * camel_mime_filter_html_new: * * Create a new CamelMimeFilterHTML object. * * Return value: A new CamelMimeFilterHTML widget. **/ CamelMimeFilterHTML * camel_mime_filter_html_new (void) { CamelMimeFilterHTML *new = CAMEL_MIME_FILTER_HTML ( camel_object_new (camel_mime_filter_html_get_type ())); return new; }