From ab7d6ef097bf18a3ebb97c4d73947fe8d2f059c9 Mon Sep 17 00:00:00 2001
From: nobody <nobody@localhost>
Date: Sat, 28 Dec 2002 04:49:29 +0000
Subject: This commit was manufactured by cvs2svn to create tag 'GHEX_2_0_0'.

svn path=/tags/GHEX_2_0_0/; revision=19188
---
 camel/camel-search-private.c | 683 -------------------------------------------
 1 file changed, 683 deletions(-)
 delete mode 100644 camel/camel-search-private.c

(limited to 'camel/camel-search-private.c')

diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c
deleted file mode 100644
index 80152e7410..0000000000
--- a/camel/camel-search-private.c
+++ /dev/null
@@ -1,683 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- *  Authors: Jeffrey Stedfast <fejj@ximian.com>
- *	     Michael Zucchi <NotZed@Ximian.com>
- *
- *  Copyright 2000 Ximian, Inc. (www.ximian.com)
- *  Copyright 2001 Ximian Inc. (www.ximian.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- */
-
-/* (from glibc headers:
-   POSIX says that <sys/types.h> must be included (by the caller) before <regex.h>.  */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <sys/types.h>
-#include <regex.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdio.h>
-
-#include "camel-exception.h"
-#include "camel-mime-message.h"
-#include "camel-multipart.h"
-#include "camel-stream-mem.h"
-#include "e-util/e-sexp.h"
-
-#include "camel-search-private.h"
-
-#include <glib/gunicode.h>
-
-#define d(x)
-
-/* builds the regex into pattern */
-/* taken from camel-folder-search, with added isregex & exception parameter */
-/* Basically, we build a new regex, either based on subset regex's, or substrings,
-   that can be executed once over the whoel body, to match anything suitable.
-   This is more efficient than multiple searches, and probably most (naive) strstr
-   implementations, over long content.
-
-   A small issue is that case-insenstivity wont work entirely correct for utf8 strings. */
-int
-camel_search_build_match_regex (regex_t *pattern, camel_search_flags_t type, int argc,
-				struct _ESExpResult **argv, CamelException *ex)
-{
-	GString *match = g_string_new("");
-	int c, i, count=0, err;
-	char *word;
-	int flags;
-	
-	/* build a regex pattern we can use to match the words, we OR them together */
-	if (argc>1)
-		g_string_append_c (match, '(');
-	for (i = 0; i < argc; i++) {
-		if (argv[i]->type == ESEXP_RES_STRING) {
-			if (count > 0)
-				g_string_append_c (match, '|');
-			
-			word = argv[i]->value.string;
-			if (type & CAMEL_SEARCH_MATCH_REGEX) {
-				/* no need to escape because this should already be a valid regex */
-				g_string_append (match, word);
-			} else {
-				/* escape any special chars (not sure if this list is complete) */
-				if (type & CAMEL_SEARCH_MATCH_START)
-					g_string_append_c (match, '^');
-				while ((c = *word++)) {
-					if (strchr ("*\\.()[]^$+", c) != NULL) {
-						g_string_append_c (match, '\\');
-					}
-					g_string_append_c (match, c);
-				}
-				if (type & CAMEL_SEARCH_MATCH_END)
-					g_string_append_c (match, '^');
-			}
-			count++;
-		} else {
-			g_warning("Invalid type passed to body-contains match function");
-		}
-	}
-	if (argc > 1)
-		g_string_append_c (match, ')');
-	flags = REG_EXTENDED|REG_NOSUB;
-	if (type & CAMEL_SEARCH_MATCH_ICASE)
-		flags |= REG_ICASE;
-	if (type & CAMEL_SEARCH_MATCH_NEWLINE)
-		flags |= REG_NEWLINE;
-	err = regcomp (pattern, match->str, flags);
-	if (err != 0) {
-		/* regerror gets called twice to get the full error string 
-		   length to do proper posix error reporting */
-		int len = regerror (err, pattern, 0, 0);
-		char *buffer = g_malloc0 (len + 1);
-		
-		regerror (err, pattern, buffer, len);
-		camel_exception_setv (ex, CAMEL_EXCEPTION_SYSTEM,
-				      _("Regular expression compilation failed: %s: %s"),
-				      match->str, buffer);
-		
-		regfree (pattern);
-	}
-	d(printf("Built regex: '%s'\n", match->str));
-	g_string_free (match, TRUE);
-	
-	return err;
-}
-
-static unsigned char soundex_table[256] = {
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0, 49, 50, 51,  0, 49, 50,  0,  0, 50, 50, 52, 53, 53,  0,
-	 49, 50, 54, 50, 51,  0, 49,  0, 50,  0, 50,  0,  0,  0,  0,  0,
-	  0,  0, 49, 50, 51,  0, 49, 50,  0,  0, 50, 50, 52, 53, 53,  0,
-	 49, 50, 54, 50, 51,  0, 49,  0, 50,  0, 50,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-	  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-};
-
-static void
-soundexify (const gchar *sound, gchar code[5])
-{
-	guchar *c, last = '\0';
-	gint n;
-	
-	for (c = (guchar *) sound; *c && !isalpha (*c); c++);
-	code[0] = toupper (*c);
-	memset (code + 1, '0', 3);
-	for (n = 1; *c && n < 5; c++) {
-		guchar ch = soundex_table[*c];
-		
-		if (ch && ch != last) {
-			code[n++] = ch;
-			last = ch;
-		}
-	}
-	code[4] = '\0';
-}
-
-static gboolean
-header_soundex (const char *header, const char *match)
-{
-	char mcode[5], hcode[5];
-	const char *p;
-	char c;
-	GString *word;
-	int truth = FALSE;
-	
-	soundexify (match, mcode);
-	
-	/* split the header into words, and soundexify and compare each one */
-	/* FIXME: Should this convert to utf8, and split based on that, and what not?
-	   soundex only makes sense for us-ascii though ... */
-	
-	word = g_string_new("");
-	p = header;
-	do {
-		c = *p++;
-		if (c == 0 || isspace (c)) {
-			if (word->len > 0) {
-				soundexify (word->str, hcode);
-				if (strcmp (hcode, mcode) == 0)
-					truth = TRUE;
-			}
-			g_string_truncate (word, 0);
-		} else if (isalpha (c))
-			g_string_append_c (word, c);
-	} while (c && !truth);
-	g_string_free (word, TRUE);
-	
-	return truth;
-}
-
-/* FIXME: This is stupidly slow and needs to be removed */
-static gunichar
-utf8_get (const char **inp)
-{
-	const unsigned char *p = *inp;
-	gunichar c;
-
-	if (p == NULL)
-		return 0;
-	
-	c = g_utf8_get_char (p);
-	*inp = g_unichar_validate (c) ? g_utf8_next_char (p) : NULL;
-	
-	return c;
-}
-
-const char *
-camel_ustrstrcase (const char *haystack, const char *needle)
-{
-	gunichar *nuni, *puni;
-	gunichar u;
-	const char *p;
-	
-	g_return_val_if_fail (haystack != NULL, NULL);
-	g_return_val_if_fail (needle != NULL, NULL);
-	
-	if (strlen (needle) == 0)
-		return haystack;
-	if (strlen (haystack) == 0)
-		return NULL;
-	
-	puni = nuni = alloca (sizeof (gunichar) * strlen (needle));
-	
-	p = needle;
-	while ((u = utf8_get (&p)))
-		*puni++ = g_unichar_tolower (u);
-	
-	/* NULL means there was illegal utf-8 sequence */
-	if (!p)
-		return NULL;
-	
-	p = haystack;
-	while ((u = utf8_get (&p))) {
-		gunichar c;
-		
-		c = g_unichar_tolower (u);
-		/* We have valid stripped char */
-		if (c == nuni[0]) {
-			const gchar *q = p;
-			gint npos = 1;
-			
-			while (nuni + npos < puni) {
-				u = utf8_get (&q);
-				if (!q || !u)
-					return NULL;
-				
-				c = g_unichar_tolower (u);				
-				if (c != nuni[npos])
-					break;
-				
-				npos++;
-			}
-			
-			if (nuni + npos == puni)
-				return p;
-		}
-	}
-	
-	return NULL;
-}
-
-#define CAMEL_SEARCH_COMPARE(x, y, z) G_STMT_START {   \
-	if ((x) == (z)) {                              \
-		if ((y) == (z))                        \
-			return 0;                      \
-		else                                   \
-			return -1;                     \
-	} else if ((y) == (z))                         \
-		return 1;                              \
-} G_STMT_END
-
-static int
-camel_ustrcasecmp (const char *s1, const char *s2)
-{
-	gunichar u1, u2 = 0;
-	
-	CAMEL_SEARCH_COMPARE (s1, s2, NULL);
-	
-	u1 = utf8_get (&s1);
-	u2 = utf8_get (&s2);
-	while (u1 && u2) {
-		u1 = g_unichar_tolower (u1);
-		u2 = g_unichar_tolower (u2);
-		if (u1 < u2)
-			return -1;
-		else if (u1 > u2)
-			return 1;
-		
-		u1 = utf8_get (&s1);
-		u2 = utf8_get (&s2);
-	}
-	
-	/* end of one of the strings ? */
-	CAMEL_SEARCH_COMPARE (u1, u2, 0);
-	
-	/* if we have invalid utf8 sequence ?  */
-	CAMEL_SEARCH_COMPARE (s1, s2, NULL);
-	
-	return 0;
-}
-
-static int
-camel_ustrncasecmp (const char *s1, const char *s2, size_t len)
-{
-	gunichar u1, u2 = 0;
-	
-	CAMEL_SEARCH_COMPARE (s1, s2, NULL);
-	
-	u1 = utf8_get (&s1);
-	u2 = utf8_get (&s2);
-	while (len > 0 && u1 && u2) {
-		u1 = g_unichar_tolower (u1);
-		u2 = g_unichar_tolower (u2);
-		if (u1 < u2)
-			return -1;
-		else if (u1 > u2)
-			return 1;
-		
-		len--;
-		u1 = utf8_get (&s1);
-		u2 = utf8_get (&s2);
-	}
-	
-	if (len == 0)
-		return 0;
-	
-	/* end of one of the strings ? */
-	CAMEL_SEARCH_COMPARE (u1, u2, 0);
-	
-	/* if we have invalid utf8 sequence ?  */
-	CAMEL_SEARCH_COMPARE (s1, s2, NULL);
-	
-	return 0;
-}
-
-/* value is the match value suitable for exact match if required */
-static int
-header_match(const char *value, const char *match, camel_search_match_t how)
-{
-	const char *p;
-	int vlen, mlen;
-	
-	if (how == CAMEL_SEARCH_MATCH_SOUNDEX)
-		return header_soundex (value, match);
-
-	vlen = strlen(value);
-	mlen = strlen(match);
-	if (vlen < mlen)
-		return FALSE;
-	
-	/* from dan the man, if we have mixed case, perform a case-sensitive match,
-	   otherwise not */
-	p = match;
-	while (*p) {
-		if (isupper(*p)) {
-			switch (how) {
-			case CAMEL_SEARCH_MATCH_EXACT:
-				return strcmp(value, match) == 0;
-			case CAMEL_SEARCH_MATCH_CONTAINS:
-				return strstr(value, match) != NULL;
-			case CAMEL_SEARCH_MATCH_STARTS:
-				return strncmp(value, match, mlen) == 0;
-			case CAMEL_SEARCH_MATCH_ENDS:
-				return strcmp(value + vlen - mlen, match) == 0;
-			default:
-				break;
-			}
-			return FALSE;
-		}
-		p++;
-	}
-	
-	switch (how) {
-	case CAMEL_SEARCH_MATCH_EXACT:
-		return camel_ustrcasecmp(value, match) == 0;
-	case CAMEL_SEARCH_MATCH_CONTAINS:
-		return camel_ustrstrcase(value, match) != NULL;
-	case CAMEL_SEARCH_MATCH_STARTS:
-		return camel_ustrncasecmp(value, match, mlen) == 0;
-	case CAMEL_SEARCH_MATCH_ENDS:
-		return camel_ustrcasecmp(value + vlen - mlen, match) == 0;
-	default:
-		break;
-	}
-	
-	return FALSE;
-}
-
-/* searhces for match inside value, if match is mixed case, hten use case-sensitive,
-   else insensitive */
-gboolean
-camel_search_header_match (const char *value, const char *match, camel_search_match_t how, camel_search_t type, const char *default_charset)
-{
-	const char *name, *addr;
-	int truth = FALSE, i;
-	CamelInternetAddress *cia;
-	char *v, *vdom, *mdom;
-
-	while (*value && isspace (*value))
-		value++;
-	
-	switch(type) {
-	case CAMEL_SEARCH_TYPE_ENCODED:
-		v = header_decode_string(value, default_charset); /* FIXME: Find header charset */
-		truth = header_match(v, match, how);
-		g_free(v);
-		break;
-	case CAMEL_SEARCH_TYPE_MLIST:
-		/* Special mailing list old-version domain hack
-		   If one of the mailing list names doesn't have an @ in it, its old-style, so
-		   only match against the pre-domain part, which should be common */
-
-		vdom = strchr(value, '@');
-		mdom = strchr(match, '@');
-		if (mdom == NULL && vdom != NULL) {
-			v = alloca(vdom-value+1);
-			memcpy(v, value, vdom-value);
-			v[vdom-value] = 0;
-			value = (char *)v;
-		} else if (mdom != NULL && vdom == NULL) {
-			v = alloca(mdom-match+1);
-			memcpy(v, match, mdom-match);
-			v[mdom-match] = 0;
-			match = (char *)v;
-		}
-		/* Falls through */
-	case CAMEL_SEARCH_TYPE_ASIS:
-		truth = header_match(value, match, how);
-		break;
-	case CAMEL_SEARCH_TYPE_ADDRESS_ENCODED:
-	case CAMEL_SEARCH_TYPE_ADDRESS:
-		/* possible simple case to save some work if we can */
-		if (header_match(value, match, how))
-			return TRUE;
-
-		/* Now we decode any addresses, and try asis matches on name and address parts */
-		cia = camel_internet_address_new();
-		if (type == CAMEL_SEARCH_TYPE_ADDRESS_ENCODED)
-			camel_address_decode((CamelAddress *)cia, value);
-		else
-			camel_address_unformat((CamelAddress *)cia, value);
-
-		for (i=0; !truth && camel_internet_address_get(cia, i, &name, &addr);i++)
-			truth = (name && header_match(name, match, how)) || (addr && header_match(addr, match, how));
-
-		camel_object_unref((CamelObject *)cia);
-		break;
-	}
-
-	return truth;
-}
-
-/* performs a 'slow' content-based match */
-/* there is also an identical copy of this in camel-filter-search.c */
-gboolean
-camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern)
-{
-	CamelDataWrapper *containee;
-	int truth = FALSE;
-	int parts, i;
-	
-	containee = camel_medium_get_content_object (CAMEL_MEDIUM (object));
-	
-	if (containee == NULL)
-		return FALSE;
-	
-	/* using the object types is more accurate than using the mime/types */
-	if (CAMEL_IS_MULTIPART (containee)) {
-		parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
-		for (i = 0; i < parts && truth == FALSE; i++) {
-			CamelDataWrapper *part = (CamelDataWrapper *)camel_multipart_get_part (CAMEL_MULTIPART (containee), i);
-			if (part)
-				truth = camel_search_message_body_contains (part, pattern);
-		}
-	} else if (CAMEL_IS_MIME_MESSAGE (containee)) {
-		/* for messages we only look at its contents */
-		truth = camel_search_message_body_contains ((CamelDataWrapper *)containee, pattern);
-	} else if (header_content_type_is(CAMEL_DATA_WRAPPER (containee)->mime_type, "text", "*")) {
-		/* for all other text parts, we look inside, otherwise we dont care */
-		CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new ();
-		
-		camel_data_wrapper_write_to_stream (containee, CAMEL_STREAM (mem));
-		camel_stream_write (CAMEL_STREAM (mem), "", 1);
-		truth = regexec (pattern, mem->buffer->data, 0, NULL, 0) == 0;
-		camel_object_unref (CAMEL_OBJECT (mem));
-	}
-	
-	return truth;
-}
-
-static __inline__ guint32
-camel_utf8_getc(const unsigned char **ptr)
-{
-	register unsigned char *p = (unsigned char *)*ptr;
-	register unsigned char c, r;
-	register guint32 v, m;
-
-again:
-	r = *p++;
-loop:
-	if (r < 0x80) {
-		*ptr = p;
-		v = r;
-	} else if (r < 0xfe) { /* valid start char? */
-		v = r;
-		m = 0x7f80;	/* used to mask out the length bits */
-		do {
-			c = *p++;
-			if ((c & 0xc0) != 0x80) {
-				r = c;
-				goto loop;
-			}
-			v = (v<<6) | (c & 0x3f);
-			r<<=1;
-			m<<=5;
-		} while (r & 0x40);
-		
-		*ptr = p;
-
-		v &= ~m;
-	} else {
-		goto again;
-	}
-
-	return v;
-}
-
-static void
-output_c(GString *w, guint32 c, int *type)
-{
-	int utf8len;
-	char utf8[8];
-
-	if (!g_unichar_isalnum(c))
-		*type = CAMEL_SEARCH_WORD_COMPLEX | (*type & CAMEL_SEARCH_WORD_8BIT);
-	else
-		c = g_unichar_tolower(c);
-
-	if (c > 0x80)
-		*type |= CAMEL_SEARCH_WORD_8BIT;
-
-	/* FIXME: use camel_utf8_putc */
-	utf8len = g_unichar_to_utf8(c, utf8);
-	utf8[utf8len] = 0;
-	g_string_append(w, utf8);
-}
-
-static void
-output_w(GString *w, GPtrArray *list, int type)
-{
-	struct _camel_search_word *word;
-
-	if (w->len) {
-		word = g_malloc0(sizeof(*word));
-		word->word = g_strdup(w->str);
-		word->type = type;
-		g_ptr_array_add(list, word);
-		g_string_truncate(w, 0);
-	}
-}
-
-struct _camel_search_words *
-camel_search_words_split(const unsigned char *in)
-{
-	int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
-	GString *w;
-	struct _camel_search_words *words;
-	GPtrArray *list = g_ptr_array_new();
-	guint32 c;
-	int inquote = 0;
-
-	words = g_malloc0(sizeof(*words));	
-	w = g_string_new("");
-
-	do {
-		c = camel_utf8_getc(&in);
-
-		if (c == 0
-		    || (inquote && c == '"')
-		    || (!inquote && g_unichar_isspace(c))) {
-			output_w(w, list, type);
-			all |= type;
-			type = CAMEL_SEARCH_WORD_SIMPLE;
-			inquote = 0;
-		} else {
-			if (c == '\\') {
-				c = camel_utf8_getc(&in);
-				if (c)
-					output_c(w, c, &type);
-				else {
-					output_w(w, list, type);
-					all |= type;
-				}
-			} else if (c == '\"') {
-				inquote = 1;
-			} else {
-				output_c(w, c, &type);
-			}
-		}
-	} while (c);
-
-	g_string_free(w, TRUE);
-	words->len = list->len;
-	words->words = (struct _camel_search_word **)list->pdata;
-	words->type = all;
-	g_ptr_array_free(list, FALSE);
-
-	return words;
-}
-
-/* takes an existing 'words' list, and converts it to another consisting of
-   only simple words, with any punctuation etc stripped */
-struct _camel_search_words *
-camel_search_words_simple(struct _camel_search_words *wordin)
-{
-	int i;
-	const unsigned char *ptr, *start, *last;
-	int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
-	GPtrArray *list = g_ptr_array_new();
-	struct _camel_search_word *word;
-	struct _camel_search_words *words;
-	guint32 c;
-
-	words = g_malloc0(sizeof(*words));	
-
-	for (i=0;i<wordin->len;i++) {
-		if ((wordin->words[i]->type & CAMEL_SEARCH_WORD_COMPLEX) == 0) {
-			word = g_malloc0(sizeof(*word));
-			word->type = wordin->words[i]->type;
-			word->word = g_strdup(wordin->words[i]->word);
-			g_ptr_array_add(list, word);
-		} else {
-			ptr = wordin->words[i]->word;
-			start = last = ptr;
-			do {
-				c = camel_utf8_getc(&ptr);
-				if (c == 0 || !g_unichar_isalnum(c)) {
-					if (last > start) {
-						word = g_malloc0(sizeof(*word));
-						word->word = g_strndup(start, last-start);
-						word->type = type;
-						g_ptr_array_add(list, word);
-						all |= type;
-						type = CAMEL_SEARCH_WORD_SIMPLE;
-					}
-					start = ptr;
-				}
-				if (c > 0x80)
-					type = CAMEL_SEARCH_WORD_8BIT;
-				last = ptr;
-			} while (c);
-		}
-	}
-
-	words->len = list->len;
-	words->words = (struct _camel_search_word **)list->pdata;
-	words->type = all;
-	g_ptr_array_free(list, FALSE);
-
-	return words;
-}
-
-void
-camel_search_words_free(struct _camel_search_words *words)
-{
-	int i;
-
-	for (i=0;i<words->len;i++) {
-		struct _camel_search_word *word = words->words[i];
-
-		g_free(word->word);
-		g_free(word);
-	}
-	g_free(words->words);
-	g_free(words);
-}
-
-- 
cgit v1.2.3