diff options
-rw-r--r-- | camel/ChangeLog | 11 | ||||
-rw-r--r-- | camel/camel-search-private.c | 79 | ||||
-rw-r--r-- | camel/tests/folder/test9.c | 6 | ||||
-rw-r--r-- | camel/tests/misc/Makefile.am | 5 | ||||
-rw-r--r-- | camel/tests/misc/README | 2 | ||||
-rw-r--r-- | camel/tests/misc/split.c | 113 |
6 files changed, 187 insertions, 29 deletions
diff --git a/camel/ChangeLog b/camel/ChangeLog index ac20408330..cd1dafa405 100644 --- a/camel/ChangeLog +++ b/camel/ChangeLog @@ -1,3 +1,14 @@ +2002-09-20 Not Zed <NotZed@Ximian.com> + + * tests/misc/split.c: New regression test for word + splitting/search input parsing code. + + * tests/folder/test9.c (main): Fix for filter_driver api change. + + * camel-search-private.c (camel_search_words_split): Handle "'s + and \'s to escape characters. For fat, lazy, slobs who dont like + anything changing. + 2002-09-19 Not Zed <NotZed@Ximian.com> * camel-filter-search.c (camel_filter_search_match): Take new diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c index 5adf2b146f..7202254294 100644 --- a/camel/camel-search-private.c +++ b/camel/camel-search-private.c @@ -531,44 +531,77 @@ loop: return v; } +static void +output_c(GString *w, guint32 c, int *type) +{ + int utf8len; + char utf8[8]; + + if (!g_unichar_isalnum(c)) + *type = CAMEL_SEARCH_WORD_COMPLEX | (*type & CAMEL_SEARCH_WORD_8BIT); + else + c = g_unichar_tolower(c); + + if (c > 0x80) + *type |= CAMEL_SEARCH_WORD_8BIT; + + /* FIXME: use camel_utf8_putc */ + utf8len = g_unichar_to_utf8(c, utf8); + utf8[utf8len] = 0; + g_string_append(w, utf8); +} + +static void +output_w(GString *w, GPtrArray *list, int type) +{ + struct _camel_search_word *word; + + if (w->len) { + word = g_malloc0(sizeof(*word)); + word->word = g_strdup(w->str); + word->type = type; + g_ptr_array_add(list, word); + g_string_truncate(w, 0); + } +} + struct _camel_search_words * camel_search_words_split(const unsigned char *in) { int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0; GString *w; - struct _camel_search_word *word; struct _camel_search_words *words; GPtrArray *list = g_ptr_array_new(); guint32 c; - int utf8len; - char utf8[8]; + int inquote = 0; words = g_malloc0(sizeof(*words)); w = g_string_new(""); do { c = camel_utf8_getc(&in); - if (c == 0 || g_unichar_isspace(c)) { - if (w->len) { - word = g_malloc0(sizeof(*word)); - word->word = g_strdup(w->str); - word->type = type; - g_ptr_array_add(list, word); - all |= type; - type = CAMEL_SEARCH_WORD_SIMPLE; - g_string_truncate(w, 0); - } + + if (c == 0 + || (inquote && c == '"') + || (!inquote && g_unichar_isspace(c))) { + output_w(w, list, type); + all |= type; + type = CAMEL_SEARCH_WORD_SIMPLE; + inquote = 0; } else { - if (!g_unichar_isalnum(c)) - type = CAMEL_SEARCH_WORD_COMPLEX; - else - c = g_unichar_tolower(c); - if (c > 0x80) - type |= CAMEL_SEARCH_WORD_8BIT; - - utf8len = g_unichar_to_utf8(c, utf8); - utf8[utf8len] = 0; - g_string_append(w, utf8); + if (c == '\\') { + c = camel_utf8_getc(&in); + if (c) + output_c(w, c, &type); + else { + output_w(w, list, type); + all |= type; + } + } else if (c == '\"') { + inquote = 1; + } else { + output_c(w, c, &type); + } } } while (c); diff --git a/camel/tests/folder/test9.c b/camel/tests/folder/test9.c index 87efb2246c..79d34c76a5 100644 --- a/camel/tests/folder/test9.c +++ b/camel/tests/folder/test9.c @@ -167,7 +167,7 @@ int main(int argc, char **argv) pull(); push("Building filters"); - driver = camel_filter_driver_new(); + driver = camel_filter_driver_new(session); camel_filter_driver_set_folder_func(driver, get_folder, NULL); for (i=0;i<ARRAY_LEN(rules);i++) { camel_filter_driver_add_rule(driver, rules[i].name, rules[i].match, rules[i].action); @@ -188,7 +188,7 @@ int main(int argc, char **argv) push("Testing broken match rules"); for (i=0;i<ARRAY_LEN(brokens);i++) { push("rule %s", brokens[i].match); - driver = camel_filter_driver_new(); + driver = camel_filter_driver_new(session); camel_filter_driver_set_folder_func(driver, get_folder, NULL); camel_filter_driver_add_rule(driver, brokens[i].name, brokens[i].match, brokens[i].action); camel_filter_driver_filter_mbox(driver, "/tmp/camel-test/inbox", NULL, ex); @@ -202,7 +202,7 @@ int main(int argc, char **argv) push("Testing broken action rules"); for (i=0;i<ARRAY_LEN(brokena);i++) { push("rule %s", brokena[i].action); - driver = camel_filter_driver_new(); + driver = camel_filter_driver_new(session); camel_filter_driver_set_folder_func(driver, get_folder, NULL); camel_filter_driver_add_rule(driver, brokena[i].name, brokena[i].match, brokena[i].action); camel_filter_driver_filter_mbox(driver, "/tmp/camel-test/inbox", NULL, ex); diff --git a/camel/tests/misc/Makefile.am b/camel/tests/misc/Makefile.am index 44d28cb0a5..4ad681158d 100644 --- a/camel/tests/misc/Makefile.am +++ b/camel/tests/misc/Makefile.am @@ -20,9 +20,10 @@ LDADD = \ check_PROGRAMS = \ url \ - utf7 + utf7 \ + split -TESTS = url utf7 +TESTS = url utf7 split diff --git a/camel/tests/misc/README b/camel/tests/misc/README index 17cbba017a..e92f579cf6 100644 --- a/camel/tests/misc/README +++ b/camel/tests/misc/README @@ -1,4 +1,4 @@ url URL parsing utf7 UTF7 and UTF8 processing - +split word splitting for searching diff --git a/camel/tests/misc/split.c b/camel/tests/misc/split.c new file mode 100644 index 0000000000..254b91974f --- /dev/null +++ b/camel/tests/misc/split.c @@ -0,0 +1,113 @@ +#include <config.h> + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <glib.h> +#include <e-util/e-sexp.h> +#include <camel/camel-exception.h> +#include <camel/camel-search-private.h> + +#include "camel-test.h" + +/* TODO: should put utf8 stuff here too */ + +static struct { + char *word; + int count; + struct { + char *word; + int type; + } splits[5]; +} split_tests[] = { + { "simple", 1, { { "simple", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "two words", 2, { { "two", CAMEL_SEARCH_WORD_SIMPLE }, {"words" , CAMEL_SEARCH_WORD_SIMPLE } } }, + { "compl;ex", 1, { { "compl;ex", CAMEL_SEARCH_WORD_COMPLEX } } }, + { "compl;ex simple", 2, { { "compl;ex", CAMEL_SEARCH_WORD_COMPLEX} , {"simple", CAMEL_SEARCH_WORD_SIMPLE} } }, + { "\"quoted\"", 1, { { "quoted", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "\"quoted double\"", 1, { { "quoted double", CAMEL_SEARCH_WORD_COMPLEX } } }, + { "\"quoted double\" compl;ex", 2, { { "quoted double", CAMEL_SEARCH_WORD_COMPLEX }, { "compl;ex", CAMEL_SEARCH_WORD_COMPLEX } } }, + { "\"quoted double \\\" escaped\"", 1, { { "quoted double \" escaped", CAMEL_SEARCH_WORD_COMPLEX } } }, + { "\"quoted\\\"double\" \\\" escaped\\\"", 3, { { "quoted\"double", CAMEL_SEARCH_WORD_COMPLEX }, {"\"", CAMEL_SEARCH_WORD_COMPLEX}, { "escaped\"", CAMEL_SEARCH_WORD_COMPLEX } } }, + { "\\\"escaped", 1, { { "\"escaped", CAMEL_SEARCH_WORD_COMPLEX } } }, + +}; +#define SPLIT_LENGTH (sizeof(split_tests)/sizeof(split_tests[0])) + +static struct { + char *word; + int count; + struct { + char *word; + int type; + } splits[5]; +} simple_tests[] = { + { "simple", 1, { {"simple", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "simpleCaSe", 1, { { "simplecase", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "two words", 2, { { "two", CAMEL_SEARCH_WORD_SIMPLE }, { "words", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "two wordscAsE", 2, { { "two", CAMEL_SEARCH_WORD_SIMPLE} , { "wordscase", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "compl;ex", 2, { { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "compl;ex simple", 3, { { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE }, { "simple", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "\"quoted compl;ex\" simple", 4, { { "quoted", CAMEL_SEARCH_WORD_SIMPLE}, { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE }, { "simple", CAMEL_SEARCH_WORD_SIMPLE } } }, + { "\\\" \"quoted\"compl;ex\" simple", 4, { { "quoted", CAMEL_SEARCH_WORD_SIMPLE}, { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE }, { "simple", CAMEL_SEARCH_WORD_SIMPLE } } }, +}; + +#define SIMPLE_LENGTH (sizeof(simple_tests)/sizeof(simple_tests[0])) + +int +main (int argc, char **argv) +{ + int i, j; + struct _camel_search_words *words, *tmp; + + camel_test_init(argc, argv); + + camel_test_start("Search splitting"); + + for (i=0; i<SPLIT_LENGTH; i++) { + camel_test_push("split %d '%s'", i, split_tests[i].word); + + words = camel_search_words_split(split_tests[i].word); + check(words != NULL); + check_msg(words->len == split_tests[i].count, "words->len = %d, count = %d", words->len, split_tests[i].count); + + for (j=0;j<words->len;j++) { + check_msg(strcmp(split_tests[i].splits[j].word, words->words[j]->word) == 0, + "'%s' != '%s'", split_tests[i].splits[j].word, words->words[j]->word); + check(split_tests[i].splits[j].type == words->words[j]->type); + } + + camel_search_words_free(words); + camel_test_pull(); + } + + camel_test_end(); + + camel_test_start("Search splitting - simple"); + + for (i=0; i<SIMPLE_LENGTH; i++) { + camel_test_push("simple split %d '%s'", i, simple_tests[i].word); + + tmp = camel_search_words_split(simple_tests[i].word); + check(tmp != NULL); + + words = camel_search_words_simple(tmp); + check(words != NULL); + check_msg(words->len == simple_tests[i].count, "words->len = %d, count = %d", words->len, simple_tests[i].count); + + for (j=0;j<words->len;j++) { + check_msg(strcmp(simple_tests[i].splits[j].word, words->words[j]->word) == 0, + "'%s' != '%s'", simple_tests[i].splits[j].word, words->words[j]->word); + check(simple_tests[i].splits[j].type == words->words[j]->type); + } + + camel_search_words_free(words); + camel_search_words_free(tmp); + camel_test_pull(); + } + + camel_test_end(); + + return 0; +} |