From 1a403012d41b1619f4d946921184425b62b322a9 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Thu, 25 Jan 2001 02:04:28 +0000 Subject: Changed the subject search to handle case sensitive when it is mixed case. 2001-01-25 Not Zed * tests/folder/test3.c: Changed the subject search to handle case sensitive when it is mixed case. * Makefile.am (libcamel_la_SOURCES): Added camel-ssearch-private.c (noinst_HEADERS): Added camel-search-private.h * camel-folder-search.c (check_header): New function to do the work of the various header checks. (search_header_matches): (search_header_starts_with): (search_header_ends_with): (search_header_contains): Use check_header to do the work. (build_match_regex): Removed. * camel-search-private.c (header_soundex): New function to match words to phrases using soundex algorithm. * camel-filter-search.c (soundexcmp): Removed. (check_match): Moved to search-private.h 2001-01-24 Not Zed * camel-search-private.c (camel_search_build_match_regex): Added extra flags, so the same function can be used for start/end/whole/partial matches. (camel_search_header_match): Convenience function to check a single header against all sorts of different matches. * providers/imap/camel-imap-search.c (imap_body_contains): Fix for e_sexp api changes. * camel-folder-search.c: Fix for e_sexp api changes. (search_header_contains): Free args/quit on unknown header. (search_header_matches): " (search_header_starts_with): " (search_header_ends_with): " (match_message): Add an exception argument. (search_body_contains): Free args/quit on fatal error. (message_body_contains): Removed (moved to camel-search-private.c), fixed callers. * camel-filter-search.c: Fix for e_sexp api changes. (build_match_regex, message_body_contains): Moved into camel-filter-private.c Fixed callers. (check_header): moved guts to camel-search-private, and changed to use regex's for everything. Just calls that with the right args. (check_header): GEts the header, decodes it, and checks for failure, and whatnot. (check_match): Removed. (header_soundex): Changed significantly. Now it soundexes each word in the header separately, and compares it to the first argument. * tests/folder/test9.c (main): Fix for api changes. (main): Added tests to see that invalid match and action rules are properly detected. * camel-filter-driver.c (camel_filter_driver_filter_mbox): Remove the 'finished message' bit. (camel_filter_driver_filter_message): Remove an accidentally checked in debug. (camel_filter_driver_filter_message): Fix for e-sexp api changes. svn path=/trunk/; revision=7796 --- camel/camel-filter-search.c | 260 +++++--------------------------------------- 1 file changed, 30 insertions(+), 230 deletions(-) (limited to 'camel/camel-filter-search.c') diff --git a/camel/camel-filter-search.c b/camel/camel-filter-search.c index c8c5290d37..b7590fe197 100644 --- a/camel/camel-filter-search.c +++ b/camel/camel-filter-search.c @@ -29,9 +29,6 @@ #include #include -#warning "Fixme: remove gal/widgets/e-unicode dependency" -#include - #include "e-util/e-sexp.h" #include "camel-mime-message.h" @@ -39,6 +36,7 @@ #include "camel-exception.h" #include "camel-multipart.h" #include "camel-stream-mem.h" +#include "camel-search-private.h" #define d(x) @@ -98,165 +96,8 @@ static struct { { "get-source", (ESExpFunc *) get_source, 0 }, }; -/* builds the regex into pattern */ -/* taken from camel-folder-search, with added isregex & exception parameter */ -/* Basically, we build a new regex, either based on subset regex's, or substrings, - that can be executed once over the whoel body, to match anything suitable. - This is more efficient than multiple searches, and probably most (naive) strstr - implementations, over long content. - - A small issue is that case-insenstivity wont work entirely correct for utf8 strings. */ -static int -build_match_regex(regex_t *pattern, int isregex, int argc, struct _ESExpResult **argv, CamelException *ex) -{ - GString *match = g_string_new(""); - int c, i, count=0, err; - char *word; - - /* build a regex pattern we can use to match the words, we OR them together */ - if (argc>1) - g_string_append_c(match, '('); - for (i=0;itype == ESEXP_RES_STRING) { - if (count > 0) - g_string_append_c(match, '|'); - /* escape any special chars (not sure if this list is complete) */ - word = argv[i]->value.string; - if (isregex) { - g_string_append(match, word); - } else { - while ((c = *word++)) { - if (strchr("*\\.()[]^$+", c) != NULL) { - g_string_append_c(match, '\\'); - } - g_string_append_c(match, c); - } - } - count++; - } else { - g_warning("Invalid type passed to body-contains match function"); - } - } - if (argc>1) - g_string_append_c(match, ')'); - err = regcomp(pattern, match->str, REG_EXTENDED|REG_ICASE|REG_NOSUB); - if (err != 0) { - /* regerror gets called twice to get the full error string - length to do proper posix error reporting */ - int len = regerror(err, pattern, 0, 0); - char *buffer = g_malloc0(len + 1); - - regerror(err, pattern, buffer, len); - camel_exception_setv(ex, CAMEL_EXCEPTION_SYSTEM, - _("Regular expression compilation failed: %s: %s"), - match->str, buffer); - - regfree(pattern); - } - d(printf("Built regex: '%s'\n", match->str)); - g_string_free(match, TRUE); - return err; -} - -static unsigned char soundex_table[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 49, 50, 51, 0, 49, 50, 0, 0, 50, 50, 52, 53, 53, 0, - 49, 50, 54, 50, 51, 0, 49, 0, 50, 0, 50, 0, 0, 0, 0, 0, - 0, 0, 49, 50, 51, 0, 49, 50, 0, 0, 50, 50, 52, 53, 53, 0, - 49, 50, 54, 50, 51, 0, 49, 0, 50, 0, 50, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static void -soundexify (const gchar *sound, gchar code[5]) -{ - guchar *c, last = '\0'; - gint n; - - for (c = (guchar *) sound; *c && !isalpha (*c); c++); - code[0] = toupper (*c); - memset (code + 1, '0', 3); - for (n = 1; *c && n < 5; c++) { - guchar ch = soundex_table[*c]; - - if (ch && ch != last) { - code[n++] = ch; - last = ch; - } - } - code[4] = '\0'; -} - -static gint -soundexcmp (const gchar *sound1, const gchar *sound2) -{ - gchar code1[5], code2[5]; - - soundexify (sound1, code1); - soundexify (sound2, code2); - - return strcmp (code1, code2); -} - -static gboolean check_match(const char *value, const char *match, int how) -{ - const char *p; - - while (*value && isspace(*value)) - value++; - - if (strlen(value) < strlen(match)) - return FALSE; - - /* from dan the man, if we have mixed case, perform a case-sensitive match, - otherwise not */ - p = match; - while (*p) { - if (isupper(*p)) { - switch(how) { - case 0: /* is */ - return strcmp(value, match) == 0; - case 1: /* contains */ - return strstr(value, match) != NULL; - case 2: /* starts with */ - return strncmp(value, match, strlen(match)) == 0; - case 3: /* ends with */ - return strcmp(value+strlen(value)-strlen(match), match) == 0; - case 4: /* soundex */ - return soundexcmp(value, match) == 0; - } - return FALSE; - } - p++; - } - switch(how) { - case 0: /* is */ - return strcasecmp(value, match) == 0; - case 1: /* contains */ - return e_utf8_strstrcase(value, match) != NULL; - case 2: /* starts with */ - return strncasecmp(value, match, strlen(match)) == 0; - case 3: /* ends with */ - return strcasecmp(value+strlen(value)-strlen(match), match) == 0; - case 4: /* soundex */ - return soundexcmp(value, match) == 0; - } - - return FALSE; -} - static ESExpResult * -check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms, int how) +check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms, camel_search_match_t how) { gboolean matched = FALSE; ESExpResult *r; @@ -268,7 +109,7 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa if (header) { for (i=1;itype == ESEXP_RES_STRING - && check_match(header, argv[i]->value.string, how)) { + && camel_search_header_match(header, argv[i]->value.string, how)) { matched = TRUE; break; } @@ -276,7 +117,7 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa } } - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = matched; return r; @@ -285,32 +126,32 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa static ESExpResult * header_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 1); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_CONTAINS); } static ESExpResult * header_matches (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 0); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_EXACT); } static ESExpResult * header_starts_with (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 2); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_STARTS); } static ESExpResult * header_ends_with (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 3); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_ENDS); } static ESExpResult * header_soundex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 4); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_SOUNDEX); } static ESExpResult * @@ -325,7 +166,7 @@ header_exists (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes matched = camel_medium_get_header (CAMEL_MEDIUM (fms->message), argv[i]->value.string) != NULL; } - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = matched; return r; @@ -334,14 +175,14 @@ header_exists (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes static ESExpResult * header_regex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; const char *contents; if (argc>1 && argv[0]->type == ESEXP_RES_STRING && (contents = camel_medium_get_header (CAMEL_MEDIUM (fms->message), argv[0]->value.string)) - && build_match_regex(&pattern, TRUE, argc-1, argv+1, fms->ex) == 0) { + && camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_REGEX|CAMEL_SEARCH_MATCH_ICASE, argc-1, argv+1, fms->ex) == 0) { r->value.bool = regexec(&pattern, contents, 0, NULL, 0) == 0; regfree(&pattern); } else @@ -378,11 +219,11 @@ get_full_header (CamelMimeMessage *message) static ESExpResult * header_full_regex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; char *contents; - if (build_match_regex(&pattern, TRUE, argc, argv, fms->ex) == 0) { + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_REGEX|CAMEL_SEARCH_MATCH_ICASE, argc-1, argv+1, fms->ex) == 0) { contents = get_full_header (fms->message); r->value.bool = regexec(&pattern, contents, 0, NULL, 0) == 0; g_free(contents); @@ -402,61 +243,20 @@ match_all (struct _ESExp *f, int argc, struct _ESExpTerm **argv, FilterMessageSe if (argc > 0) return e_sexp_term_eval(f, argv[0]); - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = FALSE; return r; } -/* performs a 'slow' content-based match */ -/* taken directly from camel-folder-search.c */ -static gboolean -message_body_contains(CamelDataWrapper *object, regex_t *pattern) -{ - CamelDataWrapper *containee; - int truth = FALSE; - int parts, i; - - containee = camel_medium_get_content_object(CAMEL_MEDIUM(object)); - - if (containee == NULL) - return FALSE; - - /* TODO: I find it odd that get_part and get_content_object do not - add a reference, probably need fixing for multithreading */ - - /* using the object types is more accurate than using the mime/types */ - if (CAMEL_IS_MULTIPART(containee)) { - parts = camel_multipart_get_number(CAMEL_MULTIPART(containee)); - for (i=0;imime_type, "text", "*")) { - /* for all other text parts, we look inside, otherwise we dont care */ - CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new(); - - camel_data_wrapper_write_to_stream(containee, (CamelStream *)mem); - camel_stream_write((CamelStream *)mem, "", 1); - truth = regexec(pattern, mem->buffer->data, 0, NULL, 0) == 0; - camel_object_unref((CamelObject *)mem); - } - return truth; -} - static ESExpResult * body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; - if (build_match_regex(&pattern, FALSE, argc, argv, fms->ex) == 0) { - r->value.bool = message_body_contains((CamelDataWrapper *)fms->message, &pattern); + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, fms->ex) == 0) { + r->value.bool = camel_search_message_body_contains((CamelDataWrapper *)fms->message, &pattern); regfree(&pattern); } else r->value.bool = FALSE; @@ -467,11 +267,11 @@ body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes static ESExpResult * body_regex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; - if (build_match_regex(&pattern, TRUE, argc, argv, fms->ex) == 0) { - r->value.bool = message_body_contains((CamelDataWrapper *)fms->message, &pattern); + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE|CAMEL_SEARCH_MATCH_REGEX, argc, argv, fms->ex) == 0) { + r->value.bool = camel_search_message_body_contains((CamelDataWrapper *)fms->message, &pattern); regfree(&pattern); } else r->value.bool = FALSE; @@ -495,7 +295,7 @@ user_flag (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessage } } - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; return r; @@ -510,7 +310,7 @@ system_flag (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa if (argc == 1) truth = camel_system_flag_get (fms->info->flags, argv[0]->value.string); - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; return r; @@ -524,7 +324,7 @@ user_tag (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageS tag = camel_tag_get (&fms->info->user_tags, argv[0]->value.string); - r = e_sexp_result_new (ESEXP_RES_STRING); + r = e_sexp_result_new(f, ESEXP_RES_STRING); r->value.string = g_strdup (tag ? tag : ""); return r; @@ -535,7 +335,7 @@ get_sent_date (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes { ESExpResult *r; - r = e_sexp_result_new(ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = camel_mime_message_get_date(fms->message, NULL); return r; @@ -546,7 +346,7 @@ get_received_date (struct _ESExp *f, int argc, struct _ESExpResult **argv, Filte { ESExpResult *r; - r = e_sexp_result_new(ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = camel_mime_message_get_date_received(fms->message, NULL); return r; @@ -557,7 +357,7 @@ get_current_date (struct _ESExp *f, int argc, struct _ESExpResult **argv, Filter { ESExpResult *r; - r = e_sexp_result_new (ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = time (NULL); return r; @@ -571,7 +371,7 @@ get_score (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessage tag = camel_tag_get (&fms->info->user_tags, "score"); - r = e_sexp_result_new (ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); if (tag) r->value.number = atoi (tag); else @@ -585,7 +385,7 @@ get_source (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessag { ESExpResult *r; - r = e_sexp_result_new (ESEXP_RES_STRING); + r = e_sexp_result_new(f, ESEXP_RES_STRING); r->value.string = g_strdup (fms->source); return r; @@ -632,7 +432,7 @@ gboolean camel_filter_search_match(CamelMimeMessage *message, CamelMessageInfo * else retval = FALSE; - e_sexp_result_free (result); + e_sexp_result_free (sexp, result); e_sexp_unref(sexp); return retval; -- cgit v1.2.3