From 1a403012d41b1619f4d946921184425b62b322a9 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Thu, 25 Jan 2001 02:04:28 +0000 Subject: Changed the subject search to handle case sensitive when it is mixed case. 2001-01-25 Not Zed * tests/folder/test3.c: Changed the subject search to handle case sensitive when it is mixed case. * Makefile.am (libcamel_la_SOURCES): Added camel-ssearch-private.c (noinst_HEADERS): Added camel-search-private.h * camel-folder-search.c (check_header): New function to do the work of the various header checks. (search_header_matches): (search_header_starts_with): (search_header_ends_with): (search_header_contains): Use check_header to do the work. (build_match_regex): Removed. * camel-search-private.c (header_soundex): New function to match words to phrases using soundex algorithm. * camel-filter-search.c (soundexcmp): Removed. (check_match): Moved to search-private.h 2001-01-24 Not Zed * camel-search-private.c (camel_search_build_match_regex): Added extra flags, so the same function can be used for start/end/whole/partial matches. (camel_search_header_match): Convenience function to check a single header against all sorts of different matches. * providers/imap/camel-imap-search.c (imap_body_contains): Fix for e_sexp api changes. * camel-folder-search.c: Fix for e_sexp api changes. (search_header_contains): Free args/quit on unknown header. (search_header_matches): " (search_header_starts_with): " (search_header_ends_with): " (match_message): Add an exception argument. (search_body_contains): Free args/quit on fatal error. (message_body_contains): Removed (moved to camel-search-private.c), fixed callers. * camel-filter-search.c: Fix for e_sexp api changes. (build_match_regex, message_body_contains): Moved into camel-filter-private.c Fixed callers. (check_header): moved guts to camel-search-private, and changed to use regex's for everything. Just calls that with the right args. (check_header): GEts the header, decodes it, and checks for failure, and whatnot. (check_match): Removed. (header_soundex): Changed significantly. Now it soundexes each word in the header separately, and compares it to the first argument. * tests/folder/test9.c (main): Fix for api changes. (main): Added tests to see that invalid match and action rules are properly detected. * camel-filter-driver.c (camel_filter_driver_filter_mbox): Remove the 'finished message' bit. (camel_filter_driver_filter_message): Remove an accidentally checked in debug. (camel_filter_driver_filter_message): Fix for e-sexp api changes. svn path=/trunk/; revision=7796 --- camel/ChangeLog | 55 +++++ camel/Makefile.am | 4 +- camel/camel-filter-driver.c | 2 +- camel/camel-filter-search.c | 260 +++------------------ camel/camel-folder-search.c | 381 +++++-------------------------- camel/providers/imap/camel-imap-search.c | 4 +- camel/tests/folder/test3.c | 2 +- 7 files changed, 143 insertions(+), 565 deletions(-) diff --git a/camel/ChangeLog b/camel/ChangeLog index 8542d1f0d1..2f5dbaf1ee 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,5 +1,59 @@ +2001-01-25 Not Zed + + * tests/folder/test3.c: Changed the subject search to handle case + sensitive when it is mixed case. + + * Makefile.am (libcamel_la_SOURCES): Added camel-ssearch-private.c + (noinst_HEADERS): Added camel-search-private.h + + * camel-folder-search.c (check_header): New function to do the + work of the various header checks. + (search_header_matches): + (search_header_starts_with): + (search_header_ends_with): + (search_header_contains): Use check_header to do the work. + (build_match_regex): Removed. + + * camel-search-private.c (header_soundex): New + function to match words to phrases using soundex algorithm. + + * camel-filter-search.c (soundexcmp): Removed. + (check_match): Moved to search-private.h + 2001-01-24 Not Zed + * camel-search-private.c (camel_search_build_match_regex): Added + extra flags, so the same function can be used for + start/end/whole/partial matches. + (camel_search_header_match): Convenience function to check a + single header against all sorts of different matches. + + * providers/imap/camel-imap-search.c (imap_body_contains): Fix for + e_sexp api changes. + + * camel-folder-search.c: Fix for e_sexp api changes. + (search_header_contains): Free args/quit on unknown header. + (search_header_matches): " + (search_header_starts_with): " + (search_header_ends_with): " + (match_message): Add an exception argument. + (search_body_contains): Free args/quit on fatal error. + (message_body_contains): Removed (moved to + camel-search-private.c), fixed callers. + + * camel-filter-search.c: Fix for e_sexp api changes. + (build_match_regex, message_body_contains): Moved into + camel-filter-private.c Fixed callers. + (check_header): moved guts to camel-search-private, and + changed to use regex's for everything. Just calls that with the + right args. + (check_header): GEts the header, decodes it, and checks for + failure, and whatnot. + (check_match): Removed. + (header_soundex): Changed significantly. Now it soundexes each + word in the header separately, and compares it to the first + argument. + * tests/folder/test9.c (main): Fix for api changes. (main): Added tests to see that invalid match and action rules are properly detected. @@ -8,6 +62,7 @@ the 'finished message' bit. (camel_filter_driver_filter_message): Remove an accidentally checked in debug. + (camel_filter_driver_filter_message): Fix for e-sexp api changes. 2001-01-23 Dan Winship diff --git a/camel/Makefile.am b/camel/Makefile.am index 8564db65d5..063ec03f08 100644 --- a/camel/Makefile.am +++ b/camel/Makefile.am @@ -48,6 +48,7 @@ libcamel_la_SOURCES = \ camel-object.c \ camel-provider.c \ camel-remote-store.c \ + camel-search-private.c \ camel-seekable-stream.c \ camel-seekable-substream.c \ camel-service.c \ @@ -130,7 +131,8 @@ libcamel_la_LIBADD = $(top_builddir)/e-util/libeutil.la $(UNICODE_LIBS) noinst_HEADERS = \ camel-charset-map-private.h \ - camel-private.h + camel-private.h \ + camel-search-private.h EXTRA_DIST = \ README diff --git a/camel/camel-filter-driver.c b/camel/camel-filter-driver.c index e125fd595d..58e8de5412 100644 --- a/camel/camel-filter-driver.c +++ b/camel/camel-filter-driver.c @@ -772,7 +772,7 @@ camel_filter_driver_filter_message (CamelFilterDriver *driver, CamelMimeMessage camel_exception_setv(ex, 1, _("Error executing filter: %s: %s"), e_sexp_error(p->eval), node->action); goto error; } - e_sexp_result_free (r); + e_sexp_result_free (p->eval, r); if (p->terminated) break; } diff --git a/camel/camel-filter-search.c b/camel/camel-filter-search.c index c8c5290d37..b7590fe197 100644 --- a/camel/camel-filter-search.c +++ b/camel/camel-filter-search.c @@ -29,9 +29,6 @@ #include #include -#warning "Fixme: remove gal/widgets/e-unicode dependency" -#include - #include "e-util/e-sexp.h" #include "camel-mime-message.h" @@ -39,6 +36,7 @@ #include "camel-exception.h" #include "camel-multipart.h" #include "camel-stream-mem.h" +#include "camel-search-private.h" #define d(x) @@ -98,165 +96,8 @@ static struct { { "get-source", (ESExpFunc *) get_source, 0 }, }; -/* builds the regex into pattern */ -/* taken from camel-folder-search, with added isregex & exception parameter */ -/* Basically, we build a new regex, either based on subset regex's, or substrings, - that can be executed once over the whoel body, to match anything suitable. - This is more efficient than multiple searches, and probably most (naive) strstr - implementations, over long content. - - A small issue is that case-insenstivity wont work entirely correct for utf8 strings. */ -static int -build_match_regex(regex_t *pattern, int isregex, int argc, struct _ESExpResult **argv, CamelException *ex) -{ - GString *match = g_string_new(""); - int c, i, count=0, err; - char *word; - - /* build a regex pattern we can use to match the words, we OR them together */ - if (argc>1) - g_string_append_c(match, '('); - for (i=0;itype == ESEXP_RES_STRING) { - if (count > 0) - g_string_append_c(match, '|'); - /* escape any special chars (not sure if this list is complete) */ - word = argv[i]->value.string; - if (isregex) { - g_string_append(match, word); - } else { - while ((c = *word++)) { - if (strchr("*\\.()[]^$+", c) != NULL) { - g_string_append_c(match, '\\'); - } - g_string_append_c(match, c); - } - } - count++; - } else { - g_warning("Invalid type passed to body-contains match function"); - } - } - if (argc>1) - g_string_append_c(match, ')'); - err = regcomp(pattern, match->str, REG_EXTENDED|REG_ICASE|REG_NOSUB); - if (err != 0) { - /* regerror gets called twice to get the full error string - length to do proper posix error reporting */ - int len = regerror(err, pattern, 0, 0); - char *buffer = g_malloc0(len + 1); - - regerror(err, pattern, buffer, len); - camel_exception_setv(ex, CAMEL_EXCEPTION_SYSTEM, - _("Regular expression compilation failed: %s: %s"), - match->str, buffer); - - regfree(pattern); - } - d(printf("Built regex: '%s'\n", match->str)); - g_string_free(match, TRUE); - return err; -} - -static unsigned char soundex_table[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 49, 50, 51, 0, 49, 50, 0, 0, 50, 50, 52, 53, 53, 0, - 49, 50, 54, 50, 51, 0, 49, 0, 50, 0, 50, 0, 0, 0, 0, 0, - 0, 0, 49, 50, 51, 0, 49, 50, 0, 0, 50, 50, 52, 53, 53, 0, - 49, 50, 54, 50, 51, 0, 49, 0, 50, 0, 50, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static void -soundexify (const gchar *sound, gchar code[5]) -{ - guchar *c, last = '\0'; - gint n; - - for (c = (guchar *) sound; *c && !isalpha (*c); c++); - code[0] = toupper (*c); - memset (code + 1, '0', 3); - for (n = 1; *c && n < 5; c++) { - guchar ch = soundex_table[*c]; - - if (ch && ch != last) { - code[n++] = ch; - last = ch; - } - } - code[4] = '\0'; -} - -static gint -soundexcmp (const gchar *sound1, const gchar *sound2) -{ - gchar code1[5], code2[5]; - - soundexify (sound1, code1); - soundexify (sound2, code2); - - return strcmp (code1, code2); -} - -static gboolean check_match(const char *value, const char *match, int how) -{ - const char *p; - - while (*value && isspace(*value)) - value++; - - if (strlen(value) < strlen(match)) - return FALSE; - - /* from dan the man, if we have mixed case, perform a case-sensitive match, - otherwise not */ - p = match; - while (*p) { - if (isupper(*p)) { - switch(how) { - case 0: /* is */ - return strcmp(value, match) == 0; - case 1: /* contains */ - return strstr(value, match) != NULL; - case 2: /* starts with */ - return strncmp(value, match, strlen(match)) == 0; - case 3: /* ends with */ - return strcmp(value+strlen(value)-strlen(match), match) == 0; - case 4: /* soundex */ - return soundexcmp(value, match) == 0; - } - return FALSE; - } - p++; - } - switch(how) { - case 0: /* is */ - return strcasecmp(value, match) == 0; - case 1: /* contains */ - return e_utf8_strstrcase(value, match) != NULL; - case 2: /* starts with */ - return strncasecmp(value, match, strlen(match)) == 0; - case 3: /* ends with */ - return strcasecmp(value+strlen(value)-strlen(match), match) == 0; - case 4: /* soundex */ - return soundexcmp(value, match) == 0; - } - - return FALSE; -} - static ESExpResult * -check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms, int how) +check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms, camel_search_match_t how) { gboolean matched = FALSE; ESExpResult *r; @@ -268,7 +109,7 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa if (header) { for (i=1;itype == ESEXP_RES_STRING - && check_match(header, argv[i]->value.string, how)) { + && camel_search_header_match(header, argv[i]->value.string, how)) { matched = TRUE; break; } @@ -276,7 +117,7 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa } } - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = matched; return r; @@ -285,32 +126,32 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa static ESExpResult * header_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 1); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_CONTAINS); } static ESExpResult * header_matches (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 0); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_EXACT); } static ESExpResult * header_starts_with (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 2); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_STARTS); } static ESExpResult * header_ends_with (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 3); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_ENDS); } static ESExpResult * header_soundex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - return check_header(f, argc, argv, fms, 4); + return check_header(f, argc, argv, fms, CAMEL_SEARCH_MATCH_SOUNDEX); } static ESExpResult * @@ -325,7 +166,7 @@ header_exists (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes matched = camel_medium_get_header (CAMEL_MEDIUM (fms->message), argv[i]->value.string) != NULL; } - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = matched; return r; @@ -334,14 +175,14 @@ header_exists (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes static ESExpResult * header_regex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; const char *contents; if (argc>1 && argv[0]->type == ESEXP_RES_STRING && (contents = camel_medium_get_header (CAMEL_MEDIUM (fms->message), argv[0]->value.string)) - && build_match_regex(&pattern, TRUE, argc-1, argv+1, fms->ex) == 0) { + && camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_REGEX|CAMEL_SEARCH_MATCH_ICASE, argc-1, argv+1, fms->ex) == 0) { r->value.bool = regexec(&pattern, contents, 0, NULL, 0) == 0; regfree(&pattern); } else @@ -378,11 +219,11 @@ get_full_header (CamelMimeMessage *message) static ESExpResult * header_full_regex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; char *contents; - if (build_match_regex(&pattern, TRUE, argc, argv, fms->ex) == 0) { + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_REGEX|CAMEL_SEARCH_MATCH_ICASE, argc-1, argv+1, fms->ex) == 0) { contents = get_full_header (fms->message); r->value.bool = regexec(&pattern, contents, 0, NULL, 0) == 0; g_free(contents); @@ -402,61 +243,20 @@ match_all (struct _ESExp *f, int argc, struct _ESExpTerm **argv, FilterMessageSe if (argc > 0) return e_sexp_term_eval(f, argv[0]); - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = FALSE; return r; } -/* performs a 'slow' content-based match */ -/* taken directly from camel-folder-search.c */ -static gboolean -message_body_contains(CamelDataWrapper *object, regex_t *pattern) -{ - CamelDataWrapper *containee; - int truth = FALSE; - int parts, i; - - containee = camel_medium_get_content_object(CAMEL_MEDIUM(object)); - - if (containee == NULL) - return FALSE; - - /* TODO: I find it odd that get_part and get_content_object do not - add a reference, probably need fixing for multithreading */ - - /* using the object types is more accurate than using the mime/types */ - if (CAMEL_IS_MULTIPART(containee)) { - parts = camel_multipart_get_number(CAMEL_MULTIPART(containee)); - for (i=0;imime_type, "text", "*")) { - /* for all other text parts, we look inside, otherwise we dont care */ - CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new(); - - camel_data_wrapper_write_to_stream(containee, (CamelStream *)mem); - camel_stream_write((CamelStream *)mem, "", 1); - truth = regexec(pattern, mem->buffer->data, 0, NULL, 0) == 0; - camel_object_unref((CamelObject *)mem); - } - return truth; -} - static ESExpResult * body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; - if (build_match_regex(&pattern, FALSE, argc, argv, fms->ex) == 0) { - r->value.bool = message_body_contains((CamelDataWrapper *)fms->message, &pattern); + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, fms->ex) == 0) { + r->value.bool = camel_search_message_body_contains((CamelDataWrapper *)fms->message, &pattern); regfree(&pattern); } else r->value.bool = FALSE; @@ -467,11 +267,11 @@ body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes static ESExpResult * body_regex (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageSearch *fms) { - ESExpResult *r = e_sexp_result_new (ESEXP_RES_BOOL); + ESExpResult *r = e_sexp_result_new(f, ESEXP_RES_BOOL); regex_t pattern; - if (build_match_regex(&pattern, TRUE, argc, argv, fms->ex) == 0) { - r->value.bool = message_body_contains((CamelDataWrapper *)fms->message, &pattern); + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE|CAMEL_SEARCH_MATCH_REGEX, argc, argv, fms->ex) == 0) { + r->value.bool = camel_search_message_body_contains((CamelDataWrapper *)fms->message, &pattern); regfree(&pattern); } else r->value.bool = FALSE; @@ -495,7 +295,7 @@ user_flag (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessage } } - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; return r; @@ -510,7 +310,7 @@ system_flag (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessa if (argc == 1) truth = camel_system_flag_get (fms->info->flags, argv[0]->value.string); - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; return r; @@ -524,7 +324,7 @@ user_tag (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessageS tag = camel_tag_get (&fms->info->user_tags, argv[0]->value.string); - r = e_sexp_result_new (ESEXP_RES_STRING); + r = e_sexp_result_new(f, ESEXP_RES_STRING); r->value.string = g_strdup (tag ? tag : ""); return r; @@ -535,7 +335,7 @@ get_sent_date (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMes { ESExpResult *r; - r = e_sexp_result_new(ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = camel_mime_message_get_date(fms->message, NULL); return r; @@ -546,7 +346,7 @@ get_received_date (struct _ESExp *f, int argc, struct _ESExpResult **argv, Filte { ESExpResult *r; - r = e_sexp_result_new(ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = camel_mime_message_get_date_received(fms->message, NULL); return r; @@ -557,7 +357,7 @@ get_current_date (struct _ESExp *f, int argc, struct _ESExpResult **argv, Filter { ESExpResult *r; - r = e_sexp_result_new (ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = time (NULL); return r; @@ -571,7 +371,7 @@ get_score (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessage tag = camel_tag_get (&fms->info->user_tags, "score"); - r = e_sexp_result_new (ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); if (tag) r->value.number = atoi (tag); else @@ -585,7 +385,7 @@ get_source (struct _ESExp *f, int argc, struct _ESExpResult **argv, FilterMessag { ESExpResult *r; - r = e_sexp_result_new (ESEXP_RES_STRING); + r = e_sexp_result_new(f, ESEXP_RES_STRING); r->value.string = g_strdup (fms->source); return r; @@ -632,7 +432,7 @@ gboolean camel_filter_search_match(CamelMimeMessage *message, CamelMessageInfo * else retval = FALSE; - e_sexp_result_free (result); + e_sexp_result_free (sexp, result); e_sexp_unref(sexp); return retval; diff --git a/camel/camel-folder-search.c b/camel/camel-folder-search.c index 45409de3c9..d2d69c4d08 100644 --- a/camel/camel-folder-search.c +++ b/camel/camel-folder-search.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000 Helix Code Inc. + * Copyright (C) 2000,2001 Ximian Inc. * * Authors: Michael Zucchi * @@ -30,8 +30,6 @@ #include #include -#warning "Fixme: remove gal/widgets/e-unicode dependency" -#include #include "camel-folder-search.h" #include "string-utils.h" @@ -41,6 +39,7 @@ #include "camel-mime-message.h" #include "camel-stream-mem.h" #include "e-util/e-memory.h" +#include "camel-search-private.h" #define d(x) #define r(x) @@ -364,7 +363,7 @@ camel_folder_search_execute_expression(CamelFolderSearch *search, const char *ex g_ptr_array_add(matches, e_mempool_strdup(pool, g_ptr_array_index(r->value.ptrarray, i))); } } - e_sexp_result_free(r); + e_sexp_result_free(search->sexp, r); /* instead of putting the mempool_hash in the structure, we keep the api clean by putting a reference to it in a hashtable. Lets us do some debugging and catch unfree'd results as well. */ @@ -440,10 +439,10 @@ search_dummy(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder ESExpResult *r; if (search->current == NULL) { - r = e_sexp_result_new(ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = FALSE; } else { - r = e_sexp_result_new(ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new(); } @@ -459,7 +458,7 @@ search_match_all(struct _ESExp *f, int argc, struct _ESExpTerm **argv, CamelFold if (argc>1) { g_warning("match-all only takes a single argument, other arguments ignored"); } - r = e_sexp_result_new(ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new(); /* we are only matching a single message? */ @@ -475,7 +474,7 @@ search_match_all(struct _ESExp *f, int argc, struct _ESExpTerm **argv, CamelFold g_warning("invalid syntax, matches require a single bool result"); e_sexp_fatal_error(f, _("(match-all) requires a single bool result")); } - e_sexp_result_free(r1); + e_sexp_result_free(f, r1); } else { g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(search->current)); } @@ -503,7 +502,7 @@ search_match_all(struct _ESExp *f, int argc, struct _ESExpTerm **argv, CamelFold g_warning("invalid syntax, matches require a single bool result"); e_sexp_fatal_error(f, _("(match-all) requires a single bool result")); } - e_sexp_result_free(r1); + e_sexp_result_free(f, r1); } else { g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(search->current)); } @@ -514,12 +513,12 @@ search_match_all(struct _ESExp *f, int argc, struct _ESExpTerm **argv, CamelFold } static ESExpResult * -search_header_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search) +check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search, camel_search_match_t how) { ESExpResult *r; int truth = FALSE; - r(printf("executing header-contains\n")); + r(printf("executing check-header\n")); /* are we inside a match-all? */ if (search->current && argc>1 @@ -544,240 +543,50 @@ search_header_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, C } else if (!strcasecmp(headername, "cc")) { header = camel_message_info_cc(search->current); } else { - g_warning("Performing query on unknown header: %s", headername); + e_sexp_resultv_free(f, argc, argv); + e_sexp_fatal_error(f, _("Performing query on unknown header: %s"), headername); } if (header) { /* performs an OR of all words */ for (i=1;itype == ESEXP_RES_STRING - && e_utf8_strstrcase (header, argv[i]->value.string)) { - r(printf("%s got a match with %s of %s\n", - camel_message_info_uid(search->current), - header, argv[i]->value.string)); + && camel_search_header_match(header, argv[i]->value.string, how)) { truth = TRUE; - break; } } } } /* TODO: else, find all matches */ - r = e_sexp_result_new(ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; return r; } +static ESExpResult * +search_header_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search) +{ + return check_header(f, argc, argv, search, CAMEL_SEARCH_MATCH_CONTAINS); +} + static ESExpResult * search_header_matches(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search) { - ESExpResult *r; - - r(printf ("executing header-matches\n")); - - if (search->current && argc == 2) { - char *headername; - const char *header = NULL; - char strbuf[32]; - gboolean truth = FALSE; - - /* only a subset of headers are supported .. */ - headername = argv[0]->value.string; - if (!strcasecmp (headername, "subject")) { - header = camel_message_info_subject (search->current); - } else if (!strcasecmp (headername, "date")) { - /* FIXME: not a very useful form of the date */ - sprintf (strbuf, "%d", (int)search->current->date_sent); - header = strbuf; - } else if (!strcasecmp (headername, "from")) { - header = camel_message_info_from (search->current); - } else if (!strcasecmp (headername, "to")) { - header = camel_message_info_to (search->current); - } else if (!strcasecmp (headername, "cc")) { - header = camel_message_info_cc (search->current); - } else { - g_warning ("Performing query on unknown header: %s", headername); - } - - if (header && argv[1]->type == ESEXP_RES_STRING) { - /* danw says to use search-engine style matching... - * This means that if the search match string is - * lowercase then compare case-insensitive else - * compare case-sensitive. */ - gboolean is_lowercase = TRUE; - char *match = argv[1]->value.string; - char *c; - - /* remove any leading white space... */ - for ( ; *header && isspace (*header); header++); - - for (c = match; *c; c++) { - if (isalpha (*c) && isupper (*c)) { - is_lowercase = FALSE; - break; - } - } - - if (is_lowercase) { - if (!g_strcasecmp (header, match)) - truth = TRUE; - } else { - if (!strcmp (header, match)) - truth = TRUE; - } - } - - r = e_sexp_result_new (ESEXP_RES_BOOL); - r->value.bool = truth; - } else { - r = e_sexp_result_new (ESEXP_RES_ARRAY_PTR); - r->value.ptrarray = g_ptr_array_new (); - } - - return r; + return check_header(f, argc, argv, search, CAMEL_SEARCH_MATCH_EXACT); } static ESExpResult * search_header_starts_with (struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search) { - ESExpResult *r; - - r(printf ("executing header-starts-with\n")); - - if (search->current && argc == 2) { - char *headername, *match; - const char *header = NULL; - char strbuf[32]; - gboolean truth = FALSE; - - /* only a subset of headers are supported .. */ - headername = argv[0]->value.string; - if (!strcasecmp (headername, "subject")) { - header = camel_message_info_subject (search->current); - } else if (!strcasecmp (headername, "date")) { - /* FIXME: not a very useful form of the date */ - sprintf (strbuf, "%d", (int)search->current->date_sent); - header = strbuf; - } else if (!strcasecmp (headername, "from")) { - header = camel_message_info_from (search->current); - } else if (!strcasecmp (headername, "to")) { - header = camel_message_info_to (search->current); - } else if (!strcasecmp (headername, "cc")) { - header = camel_message_info_cc (search->current); - } else { - g_warning ("Performing query on unknown header: %s", headername); - } - - match = argv[1]->value.string; - - if (header && strlen (header) >= strlen (match)) { - /* danw says to use search-engine style matching... - * This means that if the search match string is - * lowercase then compare case-insensitive else - * compare case-sensitive. */ - gboolean is_lowercase = TRUE; - char *c; - - /* remove any leading white space... */ - for ( ; *header && isspace (*header); header++); - - for (c = match; *c; c++) { - if (isalpha (*c) && isupper (*c)) { - is_lowercase = FALSE; - break; - } - } - - if (is_lowercase) { - if (!g_strncasecmp (header, match, strlen (match))) - truth = TRUE; - } else { - if (!strncmp (header, match, strlen (match))) - truth = TRUE; - } - } - - r = e_sexp_result_new (ESEXP_RES_BOOL); - r->value.bool = truth; - } else { - r = e_sexp_result_new (ESEXP_RES_ARRAY_PTR); - r->value.ptrarray = g_ptr_array_new (); - } - - return r; + return check_header(f, argc, argv, search, CAMEL_SEARCH_MATCH_STARTS); } static ESExpResult * search_header_ends_with (struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search) { - ESExpResult *r; - - r(printf ("executing header-ends-with\n")); - - if (search->current && argc == 2) { - char *headername, *match; - const char *header = NULL; - char strbuf[32]; - gboolean truth = FALSE; - - /* only a subset of headers are supported .. */ - headername = argv[0]->value.string; - if (!strcasecmp (headername, "subject")) { - header = camel_message_info_subject (search->current); - } else if (!strcasecmp (headername, "date")) { - /* FIXME: not a very useful form of the date */ - sprintf (strbuf, "%d", (int)search->current->date_sent); - header = strbuf; - } else if (!strcasecmp (headername, "from")) { - header = camel_message_info_from (search->current); - } else if (!strcasecmp (headername, "to")) { - header = camel_message_info_to (search->current); - } else if (!strcasecmp (headername, "cc")) { - header = camel_message_info_cc (search->current); - } else { - g_warning ("Performing query on unknown header: %s", headername); - } - - match = argv[1]->value.string; - - if (header && strlen (header) >= strlen (match)) { - /* danw says to use search-engine style matching... - * This means that if the search match string is - * lowercase then compare case-insensitive else - * compare case-sensitive. */ - gboolean is_lowercase = TRUE; - char *c, *end; - - /* remove any leading white space... */ - for ( ; *header && isspace (*header); header++); - - for (c = match; *c; c++) { - if (isalpha (*c) && isupper (*c)) { - is_lowercase = FALSE; - break; - } - } - - end = (char *) header + strlen (header) - strlen (match); - - if (is_lowercase) { - if (!g_strncasecmp (header, match, strlen (match))) - truth = TRUE; - } else { - if (!strncmp (header, match, strlen (match))) - truth = TRUE; - } - } - - r = e_sexp_result_new (ESEXP_RES_BOOL); - r->value.bool = truth; - } else { - r = e_sexp_result_new (ESEXP_RES_ARRAY_PTR); - r->value.ptrarray = g_ptr_array_new (); - } - - return r; + return check_header(f, argc, argv, search, CAMEL_SEARCH_MATCH_ENDS); } static ESExpResult * @@ -788,17 +597,13 @@ search_header_exists (struct _ESExp *f, int argc, struct _ESExpResult **argv, Ca r(printf ("executing header-exists\n")); if (search->current) { - const gchar *value = NULL; - + r = e_sexp_result_new(f, ESEXP_RES_BOOL); if (argc == 1 && argv[0]->type == ESEXP_RES_STRING) - value = camel_medium_get_header (CAMEL_MEDIUM (search->current), - argv[0]->value.string); + r->value.bool = camel_medium_get_header(CAMEL_MEDIUM(search->current), argv[0]->value.string) != NULL; - r = e_sexp_result_new (ESEXP_RES_BOOL); - r->value.bool = value ? TRUE : FALSE; } else { - r = e_sexp_result_new (ESEXP_RES_ARRAY_PTR); - r->value.ptrarray = g_ptr_array_new (); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); + r->value.ptrarray = g_ptr_array_new(); } return r; @@ -817,104 +622,17 @@ g_lib_sux_htor(char *key, int value, struct _glib_sux_donkeys *fuckup) g_ptr_array_add(fuckup->uids, key); } -/* performs a 'slow' content-based match */ -/* there is also an identical copy of this in camel-filter-search.c */ -static gboolean -message_body_contains(CamelDataWrapper *object, regex_t *pattern) -{ - CamelDataWrapper *containee; - int truth = FALSE; - int parts, i; - - containee = camel_medium_get_content_object(CAMEL_MEDIUM(object)); - - if (containee == NULL) - return FALSE; - - /* TODO: I find it odd that get_part and get_content_object do not - add a reference, probably need fixing for multithreading */ - - /* using the object types is more accurate than using the mime/types */ - if (CAMEL_IS_MULTIPART(containee)) { - parts = camel_multipart_get_number(CAMEL_MULTIPART(containee)); - for (i=0;imime_type, "text", "*")) { - /* for all other text parts, we look inside, otherwise we dont care */ - CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new(); - - camel_data_wrapper_write_to_stream(containee, (CamelStream *)mem); - camel_stream_write((CamelStream *)mem, "", 1); - truth = regexec(pattern, mem->buffer->data, 0, NULL, 0) == 0; - camel_object_unref((CamelObject *)mem); - } - return truth; -} - -/* builds the regex into pattern */ static int -build_match_regex(regex_t *pattern, int argc, struct _ESExpResult **argv) -{ - GString *match = g_string_new(""); - int c, i, count=0, err; - char *word; - - /* build a regex pattern we can use to match the words, we OR them together */ - if (argc>1) - g_string_append_c(match, '('); - for (i=0;itype == ESEXP_RES_STRING) { - if (count > 0) - g_string_append_c(match, '|'); - /* escape any special chars (not sure if this list is complete) */ - word = argv[i]->value.string; - while ((c = *word++)) { - if (strchr("*\\.()[]^$+", c) != NULL) { - g_string_append_c(match, '\\'); - } - g_string_append_c(match, c); - } - count++; - } else { - g_warning("Invalid type passed to body-contains match function"); - } - } - if (argc>1) - g_string_append_c(match, ')'); - err = regcomp(pattern, match->str, REG_EXTENDED|REG_ICASE|REG_NOSUB); - if (err != 0) { - char buffer[1024]; /* dont really care if its longer than this ... */ - - regerror(err, pattern, buffer, 1023); - g_warning("Internal error with search pattern: %s: %s", match->str, buffer); - regfree(pattern); - } - d(printf("Built regex: '%s'\n", match->str)); - g_string_free(match, TRUE); - return err; -} - -static int -match_message(CamelFolder *folder, const char *uid, regex_t *pattern) +match_message(CamelFolder *folder, const char *uid, regex_t *pattern, CamelException *ex) { CamelMimeMessage *msg; int truth = FALSE; - CamelException *ex; - ex = camel_exception_new(); msg = camel_folder_get_message(folder, uid, ex); if (!camel_exception_is_set(ex) && msg!=NULL) { - truth = message_body_contains((CamelDataWrapper *)msg, pattern); + truth = camel_search_message_body_contains((CamelDataWrapper *)msg, pattern); camel_object_unref((CamelObject *)msg); } - camel_exception_free(ex); return truth; } @@ -928,28 +646,29 @@ search_body_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, Cam if (search->current) { int truth = FALSE; - r = e_sexp_result_new(ESEXP_RES_BOOL); if (search->body_index) { for (i=0;itype == ESEXP_RES_STRING) { truth = ibex_find_name(search->body_index, (char *)camel_message_info_uid(search->current), argv[i]->value.string); } else { - g_warning("Invalid type passed to body-contains match function"); + e_sexp_resultv_free(f, argc, argv); + e_sexp_fatal_error(f, _("Invalid type in body-contains, expecting string")); } } } else if (search->folder) { /* we do a 'slow' direct search */ - if (build_match_regex(&pattern, argc, argv) == 0) { - truth = match_message(search->folder, camel_message_info_uid(search->current), &pattern); + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_REGEX|CAMEL_SEARCH_MATCH_ICASE, argc, argv, search->priv->ex) == 0) { + truth = match_message(search->folder, camel_message_info_uid(search->current), &pattern, search->priv->ex); regfree(&pattern); } } else { g_warning("Cannot perform indexed body query with no index or folder set"); } + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; } else { - r = e_sexp_result_new(ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); if (search->body_index) { if (argc==1) { @@ -969,7 +688,9 @@ search_body_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, Cam } g_ptr_array_free(pa, FALSE); } else { - g_warning("invalid type passed to body-contains"); + e_sexp_result_free(f, r); + e_sexp_resultv_free(f, argc, argv); + e_sexp_fatal_error(f, _("Invalid type in body-contains, expecting string")); } } lambdafoo.uids = g_ptr_array_new(); @@ -980,12 +701,12 @@ search_body_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, Cam } else if (search->folder) { /* do a slow search */ r->value.ptrarray = g_ptr_array_new(); - if (build_match_regex(&pattern, argc, argv) == 0) { + if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_REGEX|CAMEL_SEARCH_MATCH_ICASE, argc, argv, search->priv->ex) == 0) { if (search->summary) { for (i=0;isummary->len;i++) { CamelMessageInfo *info = g_ptr_array_index(search->summary, i); - if (match_message(search->folder, camel_message_info_uid(info), &pattern)) + if (match_message(search->folder, camel_message_info_uid(info), &pattern, search->priv->ex)) g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info)); } } /* else? we could always get the summary from the folder, but then @@ -1020,10 +741,10 @@ search_user_flag(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFo break; } } - r = e_sexp_result_new(ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; } else { - r = e_sexp_result_new(ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new(); } @@ -1043,10 +764,10 @@ search_system_flag (struct _ESExp *f, int argc, struct _ESExpResult **argv, Came if (argc == 1) truth = camel_system_flag_get (search->current->flags, argv[0]->value.string); - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = truth; } else { - r = e_sexp_result_new (ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new (); } @@ -1065,10 +786,10 @@ static ESExpResult *search_user_tag(struct _ESExp *f, int argc, struct _ESExpRes if (argc == 1) { value = camel_tag_get(&search->current->user_tags, argv[0]->value.string); } - r = e_sexp_result_new(ESEXP_RES_STRING); + r = e_sexp_result_new(f, ESEXP_RES_STRING); r->value.string = g_strdup(value?value:""); } else { - r = e_sexp_result_new(ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new(); } @@ -1084,11 +805,11 @@ search_get_sent_date(struct _ESExp *f, int argc, struct _ESExpResult **argv, Cam /* are we inside a match-all? */ if (s->current) { - r = e_sexp_result_new (ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = s->current->date_sent; } else { - r = e_sexp_result_new (ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new (); } @@ -1104,11 +825,11 @@ search_get_received_date(struct _ESExp *f, int argc, struct _ESExpResult **argv, /* are we inside a match-all? */ if (s->current) { - r = e_sexp_result_new (ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = s->current->date_received; } else { - r = e_sexp_result_new (ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new (); } @@ -1122,7 +843,7 @@ search_get_current_date(struct _ESExp *f, int argc, struct _ESExpResult **argv, r(printf("executing get-current-date\n")); - r = e_sexp_result_new (ESEXP_RES_INT); + r = e_sexp_result_new(f, ESEXP_RES_INT); r->value.number = time (NULL); return r; } diff --git a/camel/providers/imap/camel-imap-search.c b/camel/providers/imap/camel-imap-search.c index 97ed02b6c2..06cf4a8d30 100644 --- a/camel/providers/imap/camel-imap-search.c +++ b/camel/providers/imap/camel-imap-search.c @@ -83,13 +83,13 @@ imap_body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, if (s->current) { uid = camel_message_info_uid (s->current); - r = e_sexp_result_new (ESEXP_RES_BOOL); + r = e_sexp_result_new(f, ESEXP_RES_BOOL); r->value.bool = FALSE; response = camel_imap_command (store, s->folder, NULL, "UID SEARCH UID %s BODY \"%s\"", uid, value); } else { - r = e_sexp_result_new(ESEXP_RES_ARRAY_PTR); + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new (); response = camel_imap_command (store, s->folder, NULL, "UID SEARCH BODY \"%s\"", diff --git a/camel/tests/folder/test3.c b/camel/tests/folder/test3.c index e231c4911a..c29ef00a43 100644 --- a/camel/tests/folder/test3.c +++ b/camel/tests/folder/test3.c @@ -78,7 +78,7 @@ static struct { char *expr; } searches[] = { { { 100, 50, 0 }, "(header-contains \"subject\" \"subject\")" }, - { { 100, 50, 0 }, "(header-contains \"subject\" \"Subject\")" }, + { { 0, 0, 0 }, "(header-contains \"subject\" \"Subject\")" }, { { 100, 50, 0 }, "(body-contains \"content\")" }, { { 100, 50, 0 }, "(body-contains \"Content\")" }, -- cgit v1.2.3