/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * Authors: Jeffrey Stedfast * * Copyright 2001 Ximian, Inc. (www.ximian.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA. * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include "camel-mime-filter-tohtml.h" #define d(x) struct _UrlRegexPattern { unsigned int mask; char *pattern; char *prefix; regex_t *preg; regmatch_t matches; }; static struct _UrlRegexPattern patterns[] = { { CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "(news|nntp|telnet|file|ftp|http|https)://([-a-z0-9]+(:[-a-z0-9]+)?@)?[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-a-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "", NULL, { 0, 0 } }, { CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "http://", NULL, { 0, 0 } }, { CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "ftp://", NULL, { 0, 0 } }, { CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES, "([-_a-z0-9.\\+]+@[-_a-z0-9.]+)", "mailto:", NULL, { 0, 0 } } }; #define NUM_URL_REGEX_PATTERNS (sizeof (patterns) / sizeof (patterns[0])) static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass); static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter); static void camel_mime_filter_tohtml_finalize (CamelObject *obj); static CamelMimeFilterClass *camel_mime_filter_tohtml_parent; CamelType camel_mime_filter_tohtml_get_type (void) { static CamelType type = CAMEL_INVALID_TYPE; if (type == CAMEL_INVALID_TYPE) { type = camel_type_register (camel_mime_filter_get_type (), "CamelMimeFilterToHTML", sizeof (CamelMimeFilterToHTML), sizeof (CamelMimeFilterToHTMLClass), (CamelObjectClassInitFunc) camel_mime_filter_tohtml_class_init, NULL, (CamelObjectInitFunc) camel_mime_filter_tohtml_init, (CamelObjectFinalizeFunc) camel_mime_filter_tohtml_finalize); } return type; } static void camel_mime_filter_tohtml_finalize (CamelObject *obj) { CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj; int i; for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) { if (filter->patterns[i].preg) { regfree (filter->patterns[i].preg); g_free (filter->patterns[i].preg); } } g_free (filter->patterns); } static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter) { int i; /* FIXME: use a global set of patterns instead? */ filter->patterns = g_malloc (sizeof (patterns)); memcpy (filter->patterns, patterns, sizeof (patterns)); for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) { filter->patterns[i].preg = g_malloc (sizeof (regex_t)); if (regcomp (filter->patterns[i].preg, patterns[i].pattern, REG_EXTENDED) == -1) { /* error building the regex_t so we can't use this pattern */ filter->patterns[i].preg = NULL; filter->patterns[i].mask = 0; } } filter->flags = 0; filter->colour = 0; filter->column = 0; filter->pre_open = FALSE; } static char * check_size (CamelMimeFilter *filter, char *outptr, char **outend, size_t len) { size_t offset; if (*outend - outptr >= len) return outptr; offset = outptr - filter->outbuf; camel_mime_filter_set_size (filter, filter->outsize + len, TRUE); *outend = filter->outbuf + filter->outsize; return filter->outbuf + offset; } static int citation_depth (const char *in) { register const char *inptr = in; int depth = 1; if (*inptr++ != '>') return 0; /* check that it isn't an escaped From line */ if (!strncmp (inptr, "From", 4)) return 0; while (*inptr != '\n') { if (*inptr == ' ') inptr++; if (*inptr++ != '>') break; depth++; } return depth; } static char * writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend) { CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter; register const char *inptr = in; while (inptr < inend) { unsigned char u; outptr = check_size (filter, outptr, outend, 9); switch ((u = (unsigned char) *inptr++)) { case '<': outptr = g_stpcpy (outptr, "<"); html->column++; break; case '>': outptr = g_stpcpy (outptr, ">"); html->column++; break; case '&': outptr = g_stpcpy (outptr, "&"); html->column++; break; case '"': outptr = g_stpcpy (outptr, """); html->column++; break; case '\t': if (html->flags & (CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES)) { do { outptr = check_size (filter, outptr, outend, 7); outptr = g_stpcpy (outptr, " "); html->column++; } while (html->column % 8); break; } /* otherwise, FALL THROUGH */ case ' ': if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES) { if (inptr == (in + 1) || *inptr == ' ' || *inptr == '\t') { outptr = g_stpcpy (outptr, " "); html->column++; break; } } /* otherwise, FALL THROUGH */ default: if (!(u >= 0x20 && u < 0x80)) { if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT) *outptr++ = '?'; else outptr += g_snprintf (outptr, 9, "&#%d;", (int) u); } else { *outptr++ = (char) u; } html->column++; break; } } return outptr; } static void html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace, char **out, size_t *outlen, size_t *outprespace, gboolean flush) { CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter; register char *inptr, *outptr; char *start, *outend; const char *inend; int depth; camel_mime_filter_set_size (filter, inlen * 2 + 6, FALSE); inptr = in; inend = in + inlen; outptr = filter->outbuf; outend = filter->outbuf + filter->outsize; if (html->flags & CAMEL_MIME_FILTER_TOHTML_PRE && !html->pre_open) { outptr = g_stpcpy (outptr, "
");
		html->pre_open = TRUE;
	}
	
	start = inptr;
	while (inptr < inend && *inptr != '\n')
		inptr++;
	
	while (inptr < inend) {
		html->column = 0;
		depth = 0;
		
		if (html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) {
			if ((depth = citation_depth (start)) > 0) {
				char font[25];
				
				/* FIXME: we could easily support multiple colour depths here */
				
				g_snprintf (font, 25, "", html->colour);
				
				outptr = check_size (filter, outptr, &outend, 25);
				outptr = g_stpcpy (outptr, font);
			} else if (*start == '>') {
				/* >From line */
				start++;
			}
		} else if (html->flags & CAMEL_MIME_FILTER_TOHTML_CITE) {
			outptr = check_size (filter, outptr, &outend, 6);
			outptr = g_stpcpy (outptr, "> ");
			html->column += 2;
		}
		
#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
		if (html->flags & CONVERT_URLS) {
			struct _UrlRegexPattern *fmatch, *pat;
			size_t matchlen, len;
			regoff_t offset;
			char *linebuf;
			char save;
			int i;
			
			len = inptr - start;
			linebuf = g_malloc (len + 1);
			memcpy (linebuf, start, len);
			linebuf[len] = '\0';
			
			start = linebuf;
			save = '\0';
			
			do {
				/* search for all of our patterns */
				offset = 0;
				fmatch = NULL;
				for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
					pat = html->patterns + i;
					if ((html->flags & pat->mask) &&
					    !regexec (pat->preg, start, 1, &pat->matches, 0)) {
						if (pat->matches.rm_so < offset) {
							*(start + offset) = save;
							fmatch = NULL;
						}
						
						if (!fmatch) {
							fmatch = pat;
							offset = pat->matches.rm_so;
							
							/* optimisation so we don't have to search the
							   entire line buffer for the next pattern */
							save = *(start + offset);
							*(start + offset) = '\0';
						}
					}
				}
				
				if (fmatch) {
					/* restore our char */
					*(start + offset) = save;
					
					/* write out anything before the first regex match */
					outptr = writeln (filter, start, start + offset, outptr, &outend);
					start += offset;
					len -= offset;
					
#define MATCHLEN(matches) (matches.rm_eo - matches.rm_so)
					matchlen = MATCHLEN (fmatch->matches);
					
					i = 20 + strlen (fmatch->prefix) + matchlen + matchlen;
					outptr = check_size (filter, outptr, &outend, i);
					
					/* write out the href tag */
					outptr = g_stpcpy (outptr, "prefix);
					memcpy (outptr, start, matchlen);
					outptr += matchlen;
					outptr = g_stpcpy (outptr, "\">");
					
					/* now write the matched string */
					memcpy (outptr, start, matchlen);
					html->column += matchlen;
					outptr += matchlen;
					start += matchlen;
					len -= matchlen;
					
					/* close the href tag */
					outptr = g_stpcpy (outptr, "");
				} else {
					/* nothing matched so write out the remainder of this line buffer */
					outptr = writeln (filter, start, start + len, outptr, &outend);
					break;
				}
			} while (len > 0);
			
			g_free (linebuf);
		} else {
			outptr = writeln (filter, start, inptr, outptr, &outend);
		}
		
		if ((html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) && depth > 0) {
			outptr = check_size (filter, outptr, &outend, 8);
			outptr = g_stpcpy (outptr, "");
		}
		
		if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_NL) {
			outptr = check_size (filter, outptr, &outend, 5);
			outptr = g_stpcpy (outptr, "
"); } *outptr++ = '\n'; start = ++inptr; while (inptr < inend && *inptr != '\n') inptr++; } if (flush) { /* flush the rest of our input buffer */ if (start < inend) outptr = writeln (filter, start, inend, outptr, &outend); if (html->pre_open) { /* close the pre-tag */ outptr = check_size (filter, outptr, &outend, 10); outptr = g_stpcpy (outptr, "
"); } } else if (start < inend) { /* backup */ camel_mime_filter_backup (filter, start, (unsigned) (inend - start)); } *out = filter->outbuf; *outlen = outptr - filter->outbuf; *outprespace = filter->outpre; } static void filter_filter (CamelMimeFilter *filter, char *in, size_t len, size_t prespace, char **out, size_t *outlen, size_t *outprespace) { html_convert (filter, in, len, prespace, out, outlen, outprespace, FALSE); } static void filter_complete (CamelMimeFilter *filter, char *in, size_t len, size_t prespace, char **out, size_t *outlen, size_t *outprespace) { html_convert (filter, in, len, prespace, out, outlen, outprespace, TRUE); } static void filter_reset (CamelMimeFilter *filter) { CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter; html->column = 0; html->pre_open = FALSE; } static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass) { CamelMimeFilterClass *filter_class = (CamelMimeFilterClass *) klass; camel_mime_filter_tohtml_parent = CAMEL_MIME_FILTER_CLASS (camel_type_get_global_classfuncs (camel_mime_filter_get_type ())); filter_class->reset = filter_reset; filter_class->filter = filter_filter; filter_class->complete = filter_complete; } /** * camel_mime_filter_tohtml_new: * @flags: * @colour: * * Creates a new CamelMimeFilterToHTML object. * * Returns a new CamelMimeFilter object. **/ CamelMimeFilter * camel_mime_filter_tohtml_new (guint32 flags, guint32 colour) { CamelMimeFilterToHTML *new; new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ())); new->flags = flags; new->colour = colour; return CAMEL_MIME_FILTER (new); }