From 1de04ce7bdef50857a26f32a326abcd30ff10020 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Tue, 10 Dec 2002 17:40:34 +0000 Subject: Use camel-url-scanner instead of regex. 2002-12-10 Jeffrey Stedfast * camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner instead of regex. svn path=/trunk/; revision=19087 --- camel/ChangeLog | 5 ++ camel/camel-mime-filter-tohtml.c | 126 ++++++++++++--------------------------- camel/camel-mime-filter-tohtml.h | 2 +- 3 files changed, 43 insertions(+), 90 deletions(-) diff --git a/camel/ChangeLog b/camel/ChangeLog index 592dd9c74b..eaecfaccf7 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,8 @@ +2002-12-10 Jeffrey Stedfast + + * camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner + instead of regex. + 2002-12-09 Jeffrey Stedfast * camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be diff --git a/camel/camel-mime-filter-tohtml.c b/camel/camel-mime-filter-tohtml.c index db84b7f9fe..ab6cca1fd9 100644 --- a/camel/camel-mime-filter-tohtml.c +++ b/camel/camel-mime-filter-tohtml.c @@ -27,31 +27,32 @@ #include #include -#include -#include +#include "camel-url-scanner.h" #include "camel-mime-filter-tohtml.h" #define d(x) +#define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS +#define CONVERT_ADDRSPEC CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES -struct _UrlRegexPattern { +static struct { unsigned int mask; - char *pattern; - char *prefix; - regex_t *preg; - regmatch_t matches; + urlpattern_t pattern; +} patterns[] = { + { CONVERT_WEB_URLS, { "file://", "", camel_url_file_start, camel_url_file_end } }, + { CONVERT_WEB_URLS, { "ftp://", "", camel_url_web_start, camel_url_web_end } }, + { CONVERT_WEB_URLS, { "http://", "", camel_url_web_start, camel_url_web_end } }, + { CONVERT_WEB_URLS, { "https://", "", camel_url_web_start, camel_url_web_end } }, + { CONVERT_WEB_URLS, { "news://", "", camel_url_web_start, camel_url_web_end } }, + { CONVERT_WEB_URLS, { "nntp://", "", camel_url_web_start, camel_url_web_end } }, + { CONVERT_WEB_URLS, { "telnet://", "", camel_url_web_start, camel_url_web_end } }, + { CONVERT_WEB_URLS, { "www.", "http://", camel_url_web_start, camel_url_web_end } }, + { CONVERT_WEB_URLS, { "ftp.", "ftp://", camel_url_web_start, camel_url_web_end } }, + { CONVERT_ADDRSPEC, { "@", "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } }, }; -static struct _UrlRegexPattern patterns[] = { - { CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "(news|nntp|telnet|file|ftp|http|https)://([-a-z0-9]+(:[-a-z0-9]+)?@)?[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-a-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "", NULL, { 0, 0 } }, - { CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "http://", NULL, { 0, 0 } }, - { CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "ftp://", NULL, { 0, 0 } }, - { CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES, "([-_a-z0-9.\\+]+@[-_a-z0-9.]+)", "mailto:", NULL, { 0, 0 } } -}; - -#define NUM_URL_REGEX_PATTERNS (sizeof (patterns) / sizeof (patterns[0])) - +#define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0])) static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass); static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter); @@ -83,35 +84,14 @@ static void camel_mime_filter_tohtml_finalize (CamelObject *obj) { CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj; - int i; - for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) { - if (filter->patterns[i].preg) { - regfree (filter->patterns[i].preg); - g_free (filter->patterns[i].preg); - } - } - - g_free (filter->patterns); + camel_url_scanner_free (filter->scanner); } static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter) { - int i; - - /* FIXME: use a global set of patterns instead? */ - filter->patterns = g_malloc (sizeof (patterns)); - memcpy (filter->patterns, patterns, sizeof (patterns)); - - for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) { - filter->patterns[i].preg = g_malloc (sizeof (regex_t)); - if (regcomp (filter->patterns[i].preg, patterns[i].pattern, REG_EXTENDED) == -1) { - /* error building the regex_t so we can't use this pattern */ - filter->patterns[i].preg = NULL; - filter->patterns[i].mask = 0; - } - } + filter->scanner = camel_url_scanner_new (); filter->flags = 0; filter->colour = 0; @@ -279,64 +259,28 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace, #define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES) if (html->flags & CONVERT_URLS) { - struct _UrlRegexPattern *fmatch, *pat; - size_t matchlen, len; - regoff_t offset; - char *linebuf; - char save; - int i; + size_t matchlen, buflen, len; + urlmatch_t match; len = inptr - start; - linebuf = g_malloc (len + 1); - memcpy (linebuf, start, len); - linebuf[len] = '\0'; - - start = linebuf; - save = '\0'; do { - /* search for all of our patterns */ - offset = 0; - fmatch = NULL; - for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) { - pat = html->patterns + i; - if ((html->flags & pat->mask) && - !regexec (pat->preg, start, 1, &pat->matches, 0)) { - if (pat->matches.rm_so < offset) { - *(start + offset) = save; - fmatch = NULL; - } - - if (!fmatch) { - fmatch = pat; - offset = pat->matches.rm_so; - - /* optimisation so we don't have to search the - entire line buffer for the next pattern */ - save = *(start + offset); - *(start + offset) = '\0'; - } - } - } - - if (fmatch) { - /* restore our char */ - *(start + offset) = save; - + if (camel_url_scanner_scan (html->scanner, start, len, &match)) { /* write out anything before the first regex match */ - outptr = writeln (filter, start, start + offset, outptr, &outend); - start += offset; - len -= offset; + outptr = writeln (filter, start, start + match.um_so, + outptr, &outend); -#define MATCHLEN(matches) (matches.rm_eo - matches.rm_so) - matchlen = MATCHLEN (fmatch->matches); + start += match.um_so; + len -= match.um_so; - i = 20 + strlen (fmatch->prefix) + matchlen + matchlen; - outptr = check_size (filter, outptr, &outend, i); + matchlen = match.um_eo - match.um_so; + + buflen = 20 + strlen (match.prefix) + matchlen + matchlen; + outptr = check_size (filter, outptr, &outend, buflen); /* write out the href tag */ outptr = g_stpcpy (outptr, "prefix); + outptr = g_stpcpy (outptr, match.prefix); memcpy (outptr, start, matchlen); outptr += matchlen; outptr = g_stpcpy (outptr, "\">"); @@ -356,8 +300,6 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace, break; } } while (len > 0); - - g_free (linebuf); } else { outptr = writeln (filter, start, inptr, outptr, &outend); } @@ -448,11 +390,17 @@ CamelMimeFilter * camel_mime_filter_tohtml_new (guint32 flags, guint32 colour) { CamelMimeFilterToHTML *new; + int i; new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ())); new->flags = flags; new->colour = colour; + for (i = 0; i < NUM_URL_PATTERNS; i++) { + if (patterns[i].mask & flags) + camel_url_scanner_add (new->scanner, &patterns[i].pattern); + } + return CAMEL_MIME_FILTER (new); } diff --git a/camel/camel-mime-filter-tohtml.h b/camel/camel-mime-filter-tohtml.h index 0dff90d651..2c1c07e5e3 100644 --- a/camel/camel-mime-filter-tohtml.h +++ b/camel/camel-mime-filter-tohtml.h @@ -51,7 +51,7 @@ typedef struct _CamelMimeFilterToHTML CamelMimeFilterToHTML; struct _CamelMimeFilterToHTML { CamelMimeFilter parent; - struct _UrlRegexPattern *patterns; + struct _CamelUrlScanner *scanner; guint32 flags; guint32 colour; -- cgit v1.2.3