From 4c66cbd9f159141dc99b162f7f93018e71d214e0 Mon Sep 17 00:00:00 2001
From: Not Zed <NotZed@Ximian.com>
Date: Fri, 20 Sep 2002 05:30:27 +0000
Subject: New regression test for word splitting/search input parsing code.

2002-09-20  Not Zed  <NotZed@Ximian.com>

        * tests/misc/split.c: New regression test for word
        splitting/search input parsing code.

        * tests/folder/test9.c (main): Fix for filter_driver api change.

        * camel-search-private.c (camel_search_words_split): Handle "'s
        and \'s to escape characters.  For fat, lazy, slobs who dont like
        anything changing.

svn path=/trunk/; revision=18130
---
 camel/ChangeLog              |  11 +++++
 camel/camel-search-private.c |  79 +++++++++++++++++++++---------
 camel/tests/folder/test9.c   |   6 +--
 camel/tests/misc/Makefile.am |   5 +-
 camel/tests/misc/README      |   2 +-
 camel/tests/misc/split.c     | 113 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 187 insertions(+), 29 deletions(-)
 create mode 100644 camel/tests/misc/split.c

diff --git a/camel/ChangeLog b/camel/ChangeLog
index ac20408330..cd1dafa405 100644
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@@ -1,3 +1,14 @@
+2002-09-20  Not Zed  <NotZed@Ximian.com>
+
+	* tests/misc/split.c: New regression test for word
+	splitting/search input parsing code.
+
+	* tests/folder/test9.c (main): Fix for filter_driver api change.
+
+	* camel-search-private.c (camel_search_words_split): Handle "'s
+	and \'s to escape characters.  For fat, lazy, slobs who dont like
+	anything changing.
+
 2002-09-19  Not Zed  <NotZed@Ximian.com>
 
 	* camel-filter-search.c (camel_filter_search_match): Take new
diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c
index 5adf2b146f..7202254294 100644
--- a/camel/camel-search-private.c
+++ b/camel/camel-search-private.c
@@ -531,44 +531,77 @@ loop:
 	return v;
 }
 
+static void
+output_c(GString *w, guint32 c, int *type)
+{
+	int utf8len;
+	char utf8[8];
+
+	if (!g_unichar_isalnum(c))
+		*type = CAMEL_SEARCH_WORD_COMPLEX | (*type & CAMEL_SEARCH_WORD_8BIT);
+	else
+		c = g_unichar_tolower(c);
+
+	if (c > 0x80)
+		*type |= CAMEL_SEARCH_WORD_8BIT;
+
+	/* FIXME: use camel_utf8_putc */
+	utf8len = g_unichar_to_utf8(c, utf8);
+	utf8[utf8len] = 0;
+	g_string_append(w, utf8);
+}
+
+static void
+output_w(GString *w, GPtrArray *list, int type)
+{
+	struct _camel_search_word *word;
+
+	if (w->len) {
+		word = g_malloc0(sizeof(*word));
+		word->word = g_strdup(w->str);
+		word->type = type;
+		g_ptr_array_add(list, word);
+		g_string_truncate(w, 0);
+	}
+}
+
 struct _camel_search_words *
 camel_search_words_split(const unsigned char *in)
 {
 	int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
 	GString *w;
-	struct _camel_search_word *word;
 	struct _camel_search_words *words;
 	GPtrArray *list = g_ptr_array_new();
 	guint32 c;
-	int utf8len;
-	char utf8[8];
+	int inquote = 0;
 
 	words = g_malloc0(sizeof(*words));	
 	w = g_string_new("");
 
 	do {
 		c = camel_utf8_getc(&in);
-		if (c == 0 || g_unichar_isspace(c)) {
-			if (w->len) {
-				word = g_malloc0(sizeof(*word));
-				word->word = g_strdup(w->str);
-				word->type = type;
-				g_ptr_array_add(list, word);
-				all |= type;
-				type = CAMEL_SEARCH_WORD_SIMPLE;
-				g_string_truncate(w, 0);
-			}
+
+		if (c == 0
+		    || (inquote && c == '"')
+		    || (!inquote && g_unichar_isspace(c))) {
+			output_w(w, list, type);
+			all |= type;
+			type = CAMEL_SEARCH_WORD_SIMPLE;
+			inquote = 0;
 		} else {
-			if (!g_unichar_isalnum(c))
-				type = CAMEL_SEARCH_WORD_COMPLEX;
-			else
-				c = g_unichar_tolower(c);
-			if (c > 0x80)
-				type |= CAMEL_SEARCH_WORD_8BIT;
-
-			utf8len = g_unichar_to_utf8(c, utf8);
-			utf8[utf8len] = 0;
-			g_string_append(w, utf8);
+			if (c == '\\') {
+				c = camel_utf8_getc(&in);
+				if (c)
+					output_c(w, c, &type);
+				else {
+					output_w(w, list, type);
+					all |= type;
+				}
+			} else if (c == '\"') {
+				inquote = 1;
+			} else {
+				output_c(w, c, &type);
+			}
 		}
 	} while (c);
 
diff --git a/camel/tests/folder/test9.c b/camel/tests/folder/test9.c
index 87efb2246c..79d34c76a5 100644
--- a/camel/tests/folder/test9.c
+++ b/camel/tests/folder/test9.c
@@ -167,7 +167,7 @@ int main(int argc, char **argv)
 	pull();
 
 	push("Building filters");
-	driver = camel_filter_driver_new();
+	driver = camel_filter_driver_new(session);
 	camel_filter_driver_set_folder_func(driver, get_folder, NULL);
 	for (i=0;i<ARRAY_LEN(rules);i++) {
 		camel_filter_driver_add_rule(driver, rules[i].name, rules[i].match, rules[i].action);
@@ -188,7 +188,7 @@ int main(int argc, char **argv)
 	push("Testing broken match rules");
 	for (i=0;i<ARRAY_LEN(brokens);i++) {
 		push("rule %s", brokens[i].match);
-		driver = camel_filter_driver_new();
+		driver = camel_filter_driver_new(session);
 		camel_filter_driver_set_folder_func(driver, get_folder, NULL);
 		camel_filter_driver_add_rule(driver, brokens[i].name, brokens[i].match, brokens[i].action);
 		camel_filter_driver_filter_mbox(driver, "/tmp/camel-test/inbox", NULL, ex);
@@ -202,7 +202,7 @@ int main(int argc, char **argv)
 	push("Testing broken action rules");
 	for (i=0;i<ARRAY_LEN(brokena);i++) {
 		push("rule %s", brokena[i].action);
-		driver = camel_filter_driver_new();
+		driver = camel_filter_driver_new(session);
 		camel_filter_driver_set_folder_func(driver, get_folder, NULL);
 		camel_filter_driver_add_rule(driver, brokena[i].name, brokena[i].match, brokena[i].action);
 		camel_filter_driver_filter_mbox(driver, "/tmp/camel-test/inbox", NULL, ex);
diff --git a/camel/tests/misc/Makefile.am b/camel/tests/misc/Makefile.am
index 44d28cb0a5..4ad681158d 100644
--- a/camel/tests/misc/Makefile.am
+++ b/camel/tests/misc/Makefile.am
@@ -20,9 +20,10 @@ LDADD = \
 
 check_PROGRAMS =  	\
 	url		\
-	utf7
+	utf7		\
+	split
 
-TESTS = url utf7
+TESTS = url utf7 split
 
 
 
diff --git a/camel/tests/misc/README b/camel/tests/misc/README
index 17cbba017a..e92f579cf6 100644
--- a/camel/tests/misc/README
+++ b/camel/tests/misc/README
@@ -1,4 +1,4 @@
 
 url	URL parsing
 utf7	UTF7 and UTF8 processing
-
+split	word splitting for searching
diff --git a/camel/tests/misc/split.c b/camel/tests/misc/split.c
new file mode 100644
index 0000000000..254b91974f
--- /dev/null
+++ b/camel/tests/misc/split.c
@@ -0,0 +1,113 @@
+#include <config.h>
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <glib.h>
+#include <e-util/e-sexp.h>
+#include <camel/camel-exception.h>
+#include <camel/camel-search-private.h>
+
+#include "camel-test.h"
+
+/* TODO: should put utf8 stuff here too */
+
+static struct {
+	char *word;
+	int count;
+	struct {
+		char *word;
+		int type;
+	} splits[5];
+} split_tests[] = {
+	{ "simple", 1, { { "simple", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "two words", 2, { { "two", CAMEL_SEARCH_WORD_SIMPLE }, {"words" , CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "compl;ex", 1, { { "compl;ex", CAMEL_SEARCH_WORD_COMPLEX } } },
+	{ "compl;ex simple", 2, { { "compl;ex", CAMEL_SEARCH_WORD_COMPLEX} , {"simple", CAMEL_SEARCH_WORD_SIMPLE} } },
+	{ "\"quoted\"", 1, { { "quoted", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "\"quoted double\"", 1, { { "quoted double", CAMEL_SEARCH_WORD_COMPLEX } } },
+	{ "\"quoted double\" compl;ex", 2, { { "quoted double", CAMEL_SEARCH_WORD_COMPLEX }, { "compl;ex", CAMEL_SEARCH_WORD_COMPLEX } } },
+	{ "\"quoted double \\\" escaped\"", 1, { { "quoted double \" escaped", CAMEL_SEARCH_WORD_COMPLEX } } },
+	{ "\"quoted\\\"double\" \\\" escaped\\\"", 3, { { "quoted\"double", CAMEL_SEARCH_WORD_COMPLEX }, {"\"", CAMEL_SEARCH_WORD_COMPLEX}, { "escaped\"", CAMEL_SEARCH_WORD_COMPLEX } } },
+	{ "\\\"escaped", 1, { { "\"escaped", CAMEL_SEARCH_WORD_COMPLEX } } },
+
+};
+#define SPLIT_LENGTH (sizeof(split_tests)/sizeof(split_tests[0]))
+
+static struct {
+	char *word;
+	int count;
+	struct {
+		char *word;
+		int type;
+	} splits[5];
+} simple_tests[] = {
+	{ "simple", 1, { {"simple", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "simpleCaSe", 1, { { "simplecase", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "two words", 2, { { "two", CAMEL_SEARCH_WORD_SIMPLE }, { "words", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "two wordscAsE", 2, { { "two", CAMEL_SEARCH_WORD_SIMPLE} ,  { "wordscase", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "compl;ex", 2, { { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "compl;ex simple", 3, { { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE }, { "simple", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "\"quoted compl;ex\" simple", 4, { { "quoted", CAMEL_SEARCH_WORD_SIMPLE}, { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE }, { "simple", CAMEL_SEARCH_WORD_SIMPLE } } },
+	{ "\\\" \"quoted\"compl;ex\" simple", 4, { { "quoted", CAMEL_SEARCH_WORD_SIMPLE}, { "compl", CAMEL_SEARCH_WORD_SIMPLE }, { "ex", CAMEL_SEARCH_WORD_SIMPLE }, { "simple", CAMEL_SEARCH_WORD_SIMPLE } } },
+};
+
+#define SIMPLE_LENGTH (sizeof(simple_tests)/sizeof(simple_tests[0]))
+
+int
+main (int argc, char **argv)
+{
+	int i, j;
+	struct _camel_search_words *words, *tmp;
+
+	camel_test_init(argc, argv);
+
+	camel_test_start("Search splitting");
+
+	for (i=0; i<SPLIT_LENGTH; i++) {
+		camel_test_push("split %d '%s'", i, split_tests[i].word);
+
+		words = camel_search_words_split(split_tests[i].word);
+		check(words != NULL);
+		check_msg(words->len == split_tests[i].count, "words->len = %d, count = %d", words->len, split_tests[i].count);
+
+		for (j=0;j<words->len;j++) {
+			check_msg(strcmp(split_tests[i].splits[j].word, words->words[j]->word) == 0,
+				  "'%s' != '%s'", split_tests[i].splits[j].word, words->words[j]->word);
+			check(split_tests[i].splits[j].type == words->words[j]->type);
+		}
+
+		camel_search_words_free(words);
+		camel_test_pull();
+	}
+
+	camel_test_end();
+
+	camel_test_start("Search splitting - simple");
+
+	for (i=0; i<SIMPLE_LENGTH; i++) {
+		camel_test_push("simple split %d '%s'", i, simple_tests[i].word);
+
+		tmp = camel_search_words_split(simple_tests[i].word);
+		check(tmp != NULL);
+
+		words = camel_search_words_simple(tmp);
+		check(words != NULL);
+		check_msg(words->len == simple_tests[i].count, "words->len = %d, count = %d", words->len, simple_tests[i].count);
+
+		for (j=0;j<words->len;j++) {
+			check_msg(strcmp(simple_tests[i].splits[j].word, words->words[j]->word) == 0,
+				  "'%s' != '%s'", simple_tests[i].splits[j].word, words->words[j]->word);
+			check(simple_tests[i].splits[j].type == words->words[j]->type);
+		}
+
+		camel_search_words_free(words);
+		camel_search_words_free(tmp);
+		camel_test_pull();
+	}
+
+	camel_test_end();
+
+	return 0;
+}
-- 
cgit v1.2.3