From 2e5b2bbb530b642dda56b332cfd93810ac415098 Mon Sep 17 00:00:00 2001 From: nobody Date: Tue, 1 Apr 2003 17:15:53 +0000 Subject: This commit was manufactured by cvs2svn to create tag 'EVOLUTION_1_2_4'. svn path=/tags/EVOLUTION_1_2_4/; revision=20614 --- camel/camel-mime-filter-tohtml.c | 573 +++++++++++++++++++++++---------------- 1 file changed, 337 insertions(+), 236 deletions(-) (limited to 'camel/camel-mime-filter-tohtml.c') diff --git a/camel/camel-mime-filter-tohtml.c b/camel/camel-mime-filter-tohtml.c index 60c4686824..b638a315b4 100644 --- a/camel/camel-mime-filter-tohtml.c +++ b/camel/camel-mime-filter-tohtml.c @@ -27,37 +27,15 @@ #include #include +#include -#include "camel-url-scanner.h" #include "camel-mime-filter-tohtml.h" -#include "camel-utf8.h" #define d(x) -#define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS -#define CONVERT_ADDRSPEC CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES - -static struct { - unsigned int mask; - urlpattern_t pattern; -} patterns[] = { - { CONVERT_WEB_URLS, { "file://", "", camel_url_file_start, camel_url_file_end } }, - { CONVERT_WEB_URLS, { "ftp://", "", camel_url_web_start, camel_url_web_end } }, - { CONVERT_WEB_URLS, { "http://", "", camel_url_web_start, camel_url_web_end } }, - { CONVERT_WEB_URLS, { "https://", "", camel_url_web_start, camel_url_web_end } }, - { CONVERT_WEB_URLS, { "news://", "", camel_url_web_start, camel_url_web_end } }, - { CONVERT_WEB_URLS, { "nntp://", "", camel_url_web_start, camel_url_web_end } }, - { CONVERT_WEB_URLS, { "telnet://", "", camel_url_web_start, camel_url_web_end } }, - { CONVERT_WEB_URLS, { "www.", "http://", camel_url_web_start, camel_url_web_end } }, - { CONVERT_WEB_URLS, { "ftp.", "ftp://", camel_url_web_start, camel_url_web_end } }, - { CONVERT_ADDRSPEC, { "@", "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } }, -}; - -#define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0])) - static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass); -static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter); -static void camel_mime_filter_tohtml_finalize (CamelObject *obj); +static void camel_mime_filter_tohtml_init (CamelObject *o); +static void camel_mime_filter_tohtml_finalize (CamelObject *o); static CamelMimeFilterClass *camel_mime_filter_tohtml_parent; @@ -82,22 +60,15 @@ camel_mime_filter_tohtml_get_type (void) } static void -camel_mime_filter_tohtml_finalize (CamelObject *obj) +camel_mime_filter_tohtml_finalize (CamelObject *o) { - CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj; - - camel_url_scanner_free (filter->scanner); + ; } static void -camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter) +camel_mime_filter_tohtml_init (CamelObject *o) { - filter->scanner = camel_url_scanner_new (); - - filter->flags = 0; - filter->colour = 0; - filter->column = 0; - filter->pre_open = FALSE; + ; } @@ -118,97 +89,188 @@ check_size (CamelMimeFilter *filter, char *outptr, char **outend, size_t len) return filter->outbuf + offset; } -static int -citation_depth (const char *in) + +static unsigned short special_chars[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 4, 3, 0, 0, 0, 0, 7, 3, 7, 0, 0, 7, 12, 12, 1, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 7, 3, 0, 7, 4, + 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 3, 7, 3, 0, 4, + 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 7, 4, 0, 0, +}; + + +#define IS_NON_ADDR (1 << 0) +#define IS_NON_URL (1 << 1) +#define IS_GARBAGE (1 << 2) +#define IS_DOMAIN (1 << 3) + +#define NON_EMAIL_CHARS "()<>@,;:\\\"/[]`'|\n\t " +#define NON_URL_CHARS "()<>,;\\\"[]`'|\n\t " +#define TRAILING_URL_GARBAGE ",.!?;:>)}\\`'-_|\n\t " + +#define is_addr_char(c) ((unsigned char) (c) < 128 && !(special_chars[(unsigned char) (c)] & IS_NON_ADDR)) +#define is_url_char(c) ((unsigned char) (c) < 128 && !(special_chars[(unsigned char) (c)] & IS_NON_URL)) +#define is_trailing_garbage(c) ((unsigned char) (c) > 127 || (special_chars[(unsigned char) (c)] & IS_GARBAGE)) +#define is_domain_name_char(c) ((unsigned char) (c) < 128 && (special_chars[(unsigned char) (c)] & IS_DOMAIN)) + + +#if 0 +static void +table_init (void) { - register const char *inptr = in; - int depth = 1; + int max, ch, i; + char *c; + + memset (special_chars, 0, sizeof (special_chars)); + for (c = NON_EMAIL_CHARS; *c; c++) + special_chars[(int) *c] |= IS_NON_ADDR; + for (c = NON_URL_CHARS; *c; c++) + special_chars[(int) *c] |= IS_NON_URL; + for (c = TRAILING_URL_GARBAGE; *c; c++) + special_chars[(int) *c] |= IS_GARBAGE; + +#define is_ascii_alpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z')) + + for (ch = 0; ch < 128; ch++) { + if (is_ascii_alpha (ch) || isdigit (ch) || ch == '.' || ch == '-') + special_chars[ch] |= IS_DOMAIN; + } - if (*inptr++ != '>') - return 0; + max = sizeof (special_chars) / sizeof (special_chars[0]); + printf ("static unsigned short special_chars[%d] = {", max); + for (i = 0; i < max; i++) { + if (i % 16 == 0) + printf ("\n\t"); + printf ("%3d,", special_chars[i]); + } + printf ("\n};\n"); +} +#endif + +static char * +url_extract (char **in, int inlen, gboolean check, gboolean *backup) +{ + unsigned char *inptr, *inend, *p; + char *url; - /* check that it isn't an escaped From line */ - if (!strncmp (inptr, "From", 4)) - return 0; + inptr = (unsigned char *) *in; + inend = inptr + inlen; - while (*inptr != '\n') { - if (*inptr == ' ') - inptr++; - - if (*inptr++ != '>') - break; - - depth++; + while (inptr < inend && is_url_char (*inptr)) + inptr++; + + if ((char *) inptr == *in) + return NULL; + + /* back up if we probably went too far. */ + while (inptr > (unsigned char *) *in && is_trailing_garbage (*(inptr - 1))) + inptr--; + + if (check) { + /* make sure we weren't fooled. */ + p = memchr (*in, ':', (char *) inptr - *in); + if (!p) + return NULL; } - return depth; + if (inptr == inend && backup) { + *backup = TRUE; + return NULL; + } + + url = g_strndup (*in, (char *) inptr - *in); + *in = inptr; + + return url; } static char * -writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend) +email_address_extract (char **in, char *inend, char *start, char **outptr, gboolean *backup) { - CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter; - const char *inptr = in; - - while (inptr < inend) { - guint32 u; + char *addr, *pre, *end, *dot; + + /* *in points to the '@'. Look backward for a valid local-part */ + pre = *in; + while (pre - 1 >= start && is_addr_char (*(pre - 1))) + pre--; + + if (pre == *in) + return NULL; + + /* Now look forward for a valid domain part */ + for (end = *in + 1, dot = NULL; end < inend && is_domain_name_char (*end); end++) { + if (*end == '.' && !dot) + dot = end; + } + + if (end >= inend && backup) { + *backup = TRUE; + *outptr -= (*in - pre); + *in = pre; + return NULL; + } + + if (!dot) + return NULL; + + /* Remove trailing garbage */ + while (end > *in && is_trailing_garbage (*(end - 1))) + end--; + if (dot > end) + return NULL; + + addr = g_strndup (pre, end - pre); + *outptr -= (*in - pre); + *in = end; + + return addr; +} - outptr = check_size (filter, outptr, outend, 16); +static gboolean +is_citation (char *inptr, char *inend, gboolean saw_citation, gboolean *backup) +{ + if (*inptr != '>') + return FALSE; + + if (inend - inptr >= 6) { + /* make sure this isn't just mbox From-magling... */ + if (strncmp (inptr, ">From ", 6) != 0) + return TRUE; + } else if (backup) { + /* we don't have enough data to tell, so return */ + *backup = TRUE; + return saw_citation; + } + + /* if the previous line was a citation, then say this one is too */ + if (saw_citation) + return TRUE; + + /* otherwise it was just an isolated ">From " line */ + return FALSE; +} - u = camel_utf8_getc_limit ((const unsigned char **) &inptr, inend); - switch (u) { - case 0xffff: - g_warning("Truncated utf8 buffer"); - return outptr; - case '<': - outptr = g_stpcpy (outptr, "<"); - html->column++; - break; - case '>': - outptr = g_stpcpy (outptr, ">"); - html->column++; - break; - case '&': - outptr = g_stpcpy (outptr, "&"); - html->column++; - break; - case '"': - outptr = g_stpcpy (outptr, """); - html->column++; - break; - case '\t': - if (html->flags & (CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES)) { - do { - outptr = check_size (filter, outptr, outend, 7); - outptr = g_stpcpy (outptr, " "); - html->column++; - } while (html->column % 8); - break; - } - /* otherwise, FALL THROUGH */ - case ' ': - if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES - && ((inptr == (in + 1) || *inptr == ' ' || *inptr == '\t'))) { - outptr = g_stpcpy (outptr, " "); - html->column++; - break; - } - /* otherwise, FALL THROUGH */ - default: - if (u >= 20 && u <0x80) - *outptr++ = u; - else { - if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT) - *outptr++ = '?'; - else - outptr += sprintf(outptr, "&#%u;", u); - } - html->column++; - break; - } +static gboolean +is_protocol (char *inptr, char *inend, gboolean *backup) +{ + if (inend - inptr >= 8) { + if (!strncasecmp (inptr, "http://", 7) || + !strncasecmp (inptr, "https://", 8) || + !strncasecmp (inptr, "ftp://", 6) || + !strncasecmp (inptr, "nntp://", 7) || + !strncasecmp (inptr, "mailto:", 7) || + !strncasecmp (inptr, "news:", 5) || + !strncasecmp (inptr, "file:", 5)) + return TRUE; + } else if (backup) { + *backup = TRUE; + return FALSE; } - return outptr; + return FALSE; } static void @@ -216,128 +278,193 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace, char **out, size_t *outlen, size_t *outprespace, gboolean flush) { CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter; - register char *inptr, *outptr; - char *start, *outend; - const char *inend; - int depth; + char *inptr, *inend, *outptr, *outend, *start; + gboolean backup = FALSE; camel_mime_filter_set_size (filter, inlen * 2 + 6, FALSE); - inptr = in; + inptr = start = in; inend = in + inlen; outptr = filter->outbuf; outend = filter->outbuf + filter->outsize; if (html->flags & CAMEL_MIME_FILTER_TOHTML_PRE && !html->pre_open) { - outptr = g_stpcpy (outptr, "
");
+		outptr += sprintf (outptr, "%s", "
");
 		html->pre_open = TRUE;
 	}
 	
-	start = inptr;
-	while (inptr < inend && *inptr != '\n')
-		inptr++;
-	
 	while (inptr < inend) {
-		html->column = 0;
-		depth = 0;
+		unsigned char u;
 		
-		if (html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) {
-			if ((depth = citation_depth (start)) > 0) {
-				char font[25];
-				
-				/* FIXME: we could easily support multiple colour depths here */
-				
-				g_snprintf (font, 25, "", html->colour);
-				
-				outptr = check_size (filter, outptr, &outend, 25);
-				outptr = g_stpcpy (outptr, font);
-			} else if (*start == '>') {
-				/* >From line */
-				start++;
+		if (html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION && html->column == 0) {
+			html->saw_citation = is_citation (inptr, inend, html->saw_citation,
+							  flush ? &backup : NULL);
+			if (backup)
+				break;
+			
+			if (html->saw_citation) {
+				if (!html->coloured) {
+					char font[25];
+					
+					g_snprintf (font, 25, "", html->colour);
+					
+					outptr = check_size (filter, outptr, &outend, 25);
+					outptr += sprintf (outptr, "%s", font);
+					html->coloured = TRUE;
+				}
+			} else if (html->coloured) {
+				outptr = check_size (filter, outptr, &outend, 10);
+				outptr += sprintf (outptr, "%s", "");
+				html->coloured = FALSE;
 			}
-		} else if (html->flags & CAMEL_MIME_FILTER_TOHTML_CITE) {
+			
+			/* display mbox-mangled ">From " as "From " */
+			if (*inptr == '>' && !html->saw_citation)
+				inptr++;
+		} else if (html->flags & CAMEL_MIME_FILTER_TOHTML_CITE && html->column == 0) {
 			outptr = check_size (filter, outptr, &outend, 6);
-			outptr = g_stpcpy (outptr, "> ");
-			html->column += 2;
+			outptr += sprintf (outptr, "%s", "> ");
 		}
 		
-#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
-		if (html->flags & CONVERT_URLS) {
-			size_t matchlen, buflen, len;
-			urlmatch_t match;
-			
-			len = inptr - start;
+		if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS && isalpha ((int) *inptr)) {
+			char *refurl = NULL, *dispurl = NULL;
 			
-			do {
-				if (camel_url_scanner_scan (html->scanner, start, len, &match)) {
-					/* write out anything before the first regex match */
-					outptr = writeln (filter, start, start + match.um_so,
-							  outptr, &outend);
-					
-					start += match.um_so;
-					len -= match.um_so;
-					
-					matchlen = match.um_eo - match.um_so;
-					
-					buflen = 20 + strlen (match.prefix) + matchlen + matchlen;
-					outptr = check_size (filter, outptr, &outend, buflen);
-					
-					/* write out the href tag */
-					outptr = g_stpcpy (outptr, "");
-					
-					/* now write the matched string */
-					memcpy (outptr, start, matchlen);
-					html->column += matchlen;
-					outptr += matchlen;
-					start += matchlen;
-					len -= matchlen;
-					
-					/* close the href tag */
-					outptr = g_stpcpy (outptr, "");
-				} else {
-					/* nothing matched so write out the remainder of this line buffer */
-					outptr = writeln (filter, start, start + len, outptr, &outend);
+			if (is_protocol (inptr, inend, flush ? &backup : NULL)) {
+				dispurl = url_extract (&inptr, inend - inptr, TRUE,
+						       flush ? &backup : NULL);
+				if (backup)
 					break;
+				
+				if (dispurl)
+					refurl = g_strdup (dispurl);
+			} else {
+				if (backup)
+					break;
+				
+				if (!strncasecmp (inptr, "www.", 4) && ((unsigned char) inptr[4]) < 0x80
+				    && isalnum ((int) inptr[4])) {
+					dispurl = url_extract (&inptr, inend - inptr, FALSE,
+							      flush ? &backup : NULL);
+					if (backup)
+						break;
+					
+					if (dispurl)
+						refurl = g_strdup_printf ("http://%s", dispurl);
 				}
-			} while (len > 0);
-		} else {
-			outptr = writeln (filter, start, inptr, outptr, &outend);
-		}
-		
-		if ((html->flags & CAMEL_MIME_FILTER_TOHTML_MARK_CITATION) && depth > 0) {
-			outptr = check_size (filter, outptr, &outend, 8);
-			outptr = g_stpcpy (outptr, "");
+			}
+			
+			if (dispurl) {
+				outptr = check_size (filter, outptr, &outend,
+						     strlen (refurl) +
+						     strlen (dispurl) + 15);
+				outptr += sprintf (outptr, "%s",
+						   refurl, dispurl);
+				html->column += strlen (dispurl);
+				g_free (refurl);
+				g_free (dispurl);
+			}
+			
+			if (inptr >= inend)
+				break;
 		}
 		
-		if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_NL) {
-			outptr = check_size (filter, outptr, &outend, 5);
-			outptr = g_stpcpy (outptr, "
"); + if (*inptr == '@' && (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)) { + char *addr, *outaddr; + + addr = email_address_extract (&inptr, inend, start, &outptr, + flush ? &backup : NULL); + if (backup) + break; + + if (addr) { + outaddr = g_strdup_printf ("%s", + addr, addr); + outptr = check_size (filter, outptr, &outend, strlen (outaddr)); + outptr += sprintf (outptr, "%s", outaddr); + html->column += strlen (addr); + g_free (addr); + g_free (outaddr); + } } - *outptr++ = '\n'; + outptr = check_size (filter, outptr, &outend, 32); - start = ++inptr; - while (inptr < inend && *inptr != '\n') - inptr++; + switch ((u = (unsigned char) *inptr++)) { + case '<': + outptr += sprintf (outptr, "%s", "<"); + html->column++; + break; + + case '>': + outptr += sprintf (outptr, "%s", ">"); + html->column++; + break; + + case '&': + outptr += sprintf (outptr, "%s", "&"); + html->column++; + break; + + case '"': + outptr += sprintf (outptr, "%s", """); + html->column++; + break; + + case '\n': + if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_NL) + outptr += sprintf (outptr, "%s", "
"); + + *outptr++ = '\n'; + start = inptr; + html->column = 0; + break; + + case '\t': + if (html->flags & (CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES)) { + do { + outptr = check_size (filter, outptr, &outend, 7); + outptr += sprintf (outptr, "%s", " "); + html->column++; + } while (html->column % 8); + break; + } + /* otherwise, FALL THROUGH */ + + case ' ': + if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES) { + if (inptr == in || (inptr < inend && (*(inptr + 1) == ' ' || + *(inptr + 1) == '\t' || + *(inptr - 1) == '\n'))) { + outptr += sprintf (outptr, "%s", " "); + html->column++; + break; + } + } + /* otherwise, FALL THROUGH */ + + default: + if ((u >= 0x20 && u < 0x80) || + (u == '\r' || u == '\t') || html->flags & CAMEL_MIME_FILTER_TOHTML_PRESERVE_8BIT) { + /* Default case, just copy. */ + *outptr++ = (char) u; + } else { + if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT) + *outptr++ = '?'; + else + outptr += g_snprintf (outptr, 9, "&#%d;", (int) u); + } + html->column++; + break; + } } - if (flush) { - /* flush the rest of our input buffer */ - if (start < inend) - outptr = writeln (filter, start, inend, outptr, &outend); - - if (html->pre_open) { - /* close the pre-tag */ - outptr = check_size (filter, outptr, &outend, 10); - outptr = g_stpcpy (outptr, "
"); - } - } else if (start < inend) { - /* backup */ - camel_mime_filter_backup (filter, start, (unsigned) (inend - start)); + if (inptr < inend) + camel_mime_filter_backup (filter, inptr, inend - inptr); + + if (flush && html->pre_open) { + outptr = check_size (filter, outptr, &outend, 10); + outptr += sprintf (outptr, "%s", "
"); + html->pre_open = FALSE; } *out = filter->outbuf; @@ -366,6 +493,8 @@ filter_reset (CamelMimeFilter *filter) html->column = 0; html->pre_open = FALSE; + html->saw_citation = FALSE; + html->coloured = FALSE; } static void @@ -394,39 +523,11 @@ CamelMimeFilter * camel_mime_filter_tohtml_new (guint32 flags, guint32 colour) { CamelMimeFilterToHTML *new; - int i; new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ())); new->flags = flags; new->colour = colour; - for (i = 0; i < NUM_URL_PATTERNS; i++) { - if (patterns[i].mask & flags) - camel_url_scanner_add (new->scanner, &patterns[i].pattern); - } - return CAMEL_MIME_FILTER (new); } - - -char * -camel_text_to_html (const char *in, guint32 flags, guint32 colour) -{ - CamelMimeFilter *filter; - size_t outlen, outpre; - char *outbuf; - - g_return_val_if_fail (in != NULL, NULL); - - filter = camel_mime_filter_tohtml_new (flags, colour); - - camel_mime_filter_complete (filter, (char *) in, strlen (in), 0, - &outbuf, &outlen, &outpre); - - outbuf = g_strndup (outbuf, outlen); - - camel_object_unref (filter); - - return outbuf; -} -- cgit v1.2.3