aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-mime-filter-html.c
diff options
context:
space:
mode:
authorNot Zed <NotZed@Ximian.com>2001-03-14 15:06:54 +0800
committerMichael Zucci <zucchi@src.gnome.org>2001-03-14 15:06:54 +0800
commit0e9c0268a8ae99b88d7b98539181c0e938e5cb53 (patch)
tree448e5a9d7b6797e4a396c942a8370c5db2a99540 /camel/camel-mime-filter-html.c
parent83b4aa2b1d586cd9014394343b303d71e31ad3b8 (diff)
downloadgsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar
gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.gz
gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.bz2
gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.lz
gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.xz
gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.tar.zst
gsoc2013-evolution-0e9c0268a8ae99b88d7b98539181c0e938e5cb53.zip
re-constify inbuf, to remove a warning.
2001-03-14 Not Zed <NotZed@Ximian.com> * camel-mime-filter-charset.c (filter, complete): re-constify inbuf, to remove a warning. * camel-mime-parser.c (folder_scan_step): When we're out of data, run the filter_complete. For some reason the logic that was there was never being run, always try it now, i think it was to work around a buggy filter, rather than fix it the right way. * camel-folder-summary.c (summary_build_content_info): If indexing html parts, use the html filter to convert it to some indexable format. (summary_build_content_info): Reset the filters before adding them back to the stream, if they get re-used in a given instance (likely). * Makefile.am (libcamelinclude_HEADERS): Added camel-mime-filter-html.[ch]. (INCLUDES): Added xml clags 2001-03-05 Not Zed <NotZed@Ximian.com> * camel-folder-search.c (camel_folder_search_class_init): Setup a new function, "uid" which matches uids. (search_uid): Implement the "match uid" command. svn path=/trunk/; revision=8705
Diffstat (limited to 'camel/camel-mime-filter-html.c')
-rw-r--r--camel/camel-mime-filter-html.c322
1 files changed, 322 insertions, 0 deletions
diff --git a/camel/camel-mime-filter-html.c b/camel/camel-mime-filter-html.c
new file mode 100644
index 0000000000..06d0ef252b
--- /dev/null
+++ b/camel/camel-mime-filter-html.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2001 Ximian Inc.
+ *
+ * Authors: Michael Zucchi <notzed@helixcode.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "camel-mime-filter-html.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include "xmlmemory.h"
+#include "HTMLparser.h"
+#include "HTMLtree.h"
+
+#define d(x)
+
+static void camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass);
+static void camel_mime_filter_html_init (CamelObject *o);
+static void camel_mime_filter_html_finalize (CamelObject *o);
+
+static CamelMimeFilterClass *camel_mime_filter_html_parent;
+
+struct _CamelMimeFilterHTMLPrivate {
+ htmlParserCtxtPtr ctxt;
+};
+
+/* ********************************************************************** */
+
+/* HTML parser */
+
+#define ARRAY_LEN(x) (sizeof(x)/sizeof((x)[0]))
+
+static struct {
+ char *element;
+ char *remap;
+} map_start[] = {
+ { "p", "\n\n" },
+ { "br", "\n" },
+ { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" },
+};
+
+
+static struct {
+ char *element;
+ char *remap;
+} map_end[] = {
+ { "h1", "\n" }, { "h2", "\n" }, { "h3", "\n" }, { "h4", "\n" }, { "h5", "\n" }, { "h6", "\n" },
+};
+
+static void
+characters(void *ctx, const xmlChar *ch, int len)
+{
+ CamelMimeFilter *mf = ctx;
+
+ memcpy(mf->outptr, ch, len);
+ mf->outptr+= len;
+}
+
+#if 0
+/* we probably dont want to index comments */
+static void
+comment(void *ctx, const xmlChar *value)
+{
+ CamelMimeFilter *mf = ctx;
+
+ mf->outptr += sprintf(mf->outptr, " %s \n", value);
+}
+#endif
+
+/* we map element starts to stuff sometimes, so we can properly break up
+ words and lines.
+ This is very dumb, and needs to be smarter: e.g.
+ <b>F</b>\nooBar should -> "FooBar"
+*/
+static void
+startElement(void *ctx, const xmlChar *name, const xmlChar **atts)
+{
+ int i;
+ CamelMimeFilter *mf = ctx;
+
+ /* we grab all "content" from "meta" tags, and dump it in the output,
+ it might be useful for searching with. This should probably be pickier */
+ if (!strcasecmp(name, "meta")) {
+ if (atts) {
+ for (i=0;atts[i];i+=2) {
+ if (!strcmp(atts[i], "content"))
+ mf->outptr += sprintf(mf->outptr, " %s \n", atts[i+1]);
+ }
+ }
+ return;
+ }
+
+ /* FIXME: use a hashtable */
+ for (i=0;i<ARRAY_LEN(map_start);i++) {
+ if (!strcasecmp(map_start[i].element, name)) {
+ characters(ctx, map_start[i].remap, strlen(map_start[i].remap));
+ break;
+ }
+ }
+}
+
+static void
+endElement(void *ctx, const xmlChar *name)
+{
+ int i;
+
+ /* FIXME: use a hashtable */
+ for (i=0;i<ARRAY_LEN(map_end);i++) {
+ if (!strcasecmp(map_end[i].element, name)) {
+ characters(ctx, map_end[i].remap, strlen(map_end[i].remap));
+ break;
+ }
+ }
+}
+
+/* dum de dum, well we can print out some crap for now */
+static void
+warning(void *ctx, const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ fprintf(stdout, "SAX.warning: ");
+ vfprintf(stdout, msg, args);
+ va_end(args);
+}
+
+static void
+error(void *ctx, const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ fprintf(stdout, "SAX.error: ");
+ vfprintf(stdout, msg, args);
+ va_end(args);
+}
+
+static void
+fatalError(void *ctx, const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ fprintf(stdout, "SAX.fatalError: ");
+ vfprintf(stdout, msg, args);
+ va_end(args);
+}
+
+static xmlSAXHandler indexSAXHandler = {
+ NULL, /* internalSubset */
+ NULL, /*isStandalone,*/
+ NULL, /*hasInternalSubset,*/
+ NULL, /*hasExternalSubset,*/
+ NULL, /*resolveEntity,*/
+ NULL, /*getEntity,*/
+ NULL, /*entityDecl,*/
+ NULL, /*notationDecl,*/
+ NULL, /*attributeDecl,*/
+ NULL, /*elementDecl,*/
+ NULL, /*unparsedEntityDecl,*/
+ NULL, /*setDocumentLocator,*/
+ NULL, /*startDocument,*/
+ NULL, /*endDocument,*/
+ startElement,
+ endElement,
+ NULL, /*reference,*/
+ characters,
+ NULL, /*ignorableWhitespace,*/
+ NULL, /*processingInstruction,*/
+ NULL, /*comment,*/
+ warning,
+ error,
+ fatalError,
+ NULL, /*getParameterEntity,*/
+};
+
+
+/* ********************************************************************** */
+
+
+CamelType
+camel_mime_filter_html_get_type (void)
+{
+ static CamelType type = CAMEL_INVALID_TYPE;
+
+ if (type == CAMEL_INVALID_TYPE) {
+ type = camel_type_register (camel_mime_filter_get_type (), "CamelMimeFilterHTML",
+ sizeof (CamelMimeFilterHTML),
+ sizeof (CamelMimeFilterHTMLClass),
+ (CamelObjectClassInitFunc) camel_mime_filter_html_class_init,
+ NULL,
+ (CamelObjectInitFunc) camel_mime_filter_html_init,
+ (CamelObjectFinalizeFunc) camel_mime_filter_html_finalize);
+ }
+
+ return type;
+}
+
+static void
+camel_mime_filter_html_finalize(CamelObject *o)
+{
+ CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)o;
+
+ if (f->priv->ctxt)
+ htmlFreeParserCtxt(f->priv->ctxt);
+}
+
+static void
+camel_mime_filter_html_init (CamelObject *o)
+{
+ CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)o;
+
+ f->priv = g_malloc0(sizeof(*f->priv));
+}
+
+static void
+complete(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace)
+{
+ CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf;
+
+ camel_mime_filter_set_size(mf, len*2+256, FALSE);
+ mf->outptr = mf->outbuf;
+
+ d(printf("converting html end:\n%.*s\n", (int)len, in));
+
+ if (f->priv->ctxt == NULL) {
+ f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0);
+ len = 0;
+ }
+
+ htmlParseChunk(f->priv->ctxt, in, len, 1);
+
+ *out = mf->outbuf;
+ *outlenptr = mf->outptr - mf->outbuf;
+ *outprespace = mf->outbuf - mf->outreal;
+
+ d(printf("converted html end:\n%.*s\n", (int)*outlenptr, *out));
+}
+
+static void
+filter(CamelMimeFilter *mf, char *in, size_t len, size_t prespace, char **out, size_t *outlenptr, size_t *outprespace)
+{
+ CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf;
+
+ camel_mime_filter_set_size(mf, len*2+16, FALSE);
+ mf->outptr = mf->outbuf;
+
+ d(printf("converting html:\n%.*s\n", (int)len, in));
+
+ if (f->priv->ctxt == NULL)
+ f->priv->ctxt = htmlCreatePushParserCtxt(&indexSAXHandler, f, in, len, "", 0);
+ else
+ htmlParseChunk(f->priv->ctxt, in, len, 0);
+
+ *out = mf->outbuf;
+ *outlenptr = mf->outptr - mf->outbuf;
+ *outprespace = mf->outbuf - mf->outreal;
+
+ d(printf("converted html:\n%.*s\n", (int)*outlenptr, *out));
+}
+
+static void
+reset(CamelMimeFilter *mf)
+{
+ CamelMimeFilterHTML *f = (CamelMimeFilterHTML *)mf;
+
+ if (f->priv->ctxt != NULL) {
+ htmlFreeParserCtxt(f->priv->ctxt);
+ f->priv->ctxt = NULL;
+ }
+}
+
+static void
+camel_mime_filter_html_class_init (CamelMimeFilterHTMLClass *klass)
+{
+ CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass;
+
+ camel_mime_filter_html_parent = CAMEL_MIME_FILTER_CLASS (camel_type_get_global_classfuncs (camel_mime_filter_get_type ()));
+
+ filter_class->reset = reset;
+ filter_class->filter = filter;
+ filter_class->complete = complete;
+}
+
+/**
+ * camel_mime_filter_html_new:
+ *
+ * Create a new CamelMimeFilterHTML object.
+ *
+ * Return value: A new CamelMimeFilterHTML widget.
+ **/
+CamelMimeFilterHTML *
+camel_mime_filter_html_new (void)
+{
+ CamelMimeFilterHTML *new = CAMEL_MIME_FILTER_HTML ( camel_object_new (camel_mime_filter_html_get_type ()));
+ return new;
+}
+