aboutsummaryrefslogtreecommitdiffstats
path: root/camel/camel-search-private.c
diff options
context:
space:
mode:
Diffstat (limited to 'camel/camel-search-private.c')
-rw-r--r--camel/camel-search-private.c135
1 files changed, 82 insertions, 53 deletions
diff --git a/camel/camel-search-private.c b/camel/camel-search-private.c
index 1162986af2..3b0daef962 100644
--- a/camel/camel-search-private.c
+++ b/camel/camel-search-private.c
@@ -50,7 +50,8 @@
A small issue is that case-insenstivity wont work entirely correct for utf8 strings. */
int
-camel_search_build_match_regex(regex_t *pattern, camel_search_flags_t type, int argc, struct _ESExpResult **argv, CamelException *ex)
+camel_search_build_match_regex (regex_t *pattern, camel_search_flags_t type, int argc,
+ struct _ESExpResult **argv, CamelException *ex)
{
GString *match = g_string_new("");
int c, i, count=0, err;
@@ -64,11 +65,13 @@ camel_search_build_match_regex(regex_t *pattern, camel_search_flags_t type, int
if (argv[i]->type == ESEXP_RES_STRING) {
if (count > 0)
g_string_append_c(match, '|');
- /* escape any special chars (not sure if this list is complete) */
+
word = argv[i]->value.string;
if (type & CAMEL_SEARCH_MATCH_REGEX) {
+ /* no need to escape because this should already be a valid regex */
g_string_append(match, word);
} else {
+ /* escape any special chars (not sure if this list is complete) */
if (type & CAMEL_SEARCH_MATCH_START)
g_string_append_c(match, '^');
while ((c = *word++)) {
@@ -182,7 +185,10 @@ header_soundex(const char *header, const char *match)
return truth;
}
-static guint16 utf8_get(const char **inp)
+#if 0
+/* Why do it this way when the unicode lib already has a function to do this? */
+static unicode_char_t
+utf8_get (const char **inp)
{
guint32 c, v = 0, s, shift;
const unsigned char *p = *inp;
@@ -216,56 +222,78 @@ static guint16 utf8_get(const char **inp)
*inp = p;
return v;
}
+#endif
-static const char *
-camel_ustrstrcase(const char *haystack, const char *needle)
+static unicode_char_t
+utf8_get (const char **inp)
{
- unicode_char_t *uni, *puni, *suni, u, v;
- const char *p, *s, *l;
-
- if (haystack == NULL || needle == NULL)
- return NULL;
-
- if (needle[0] == 0)
- return haystack;
-
- if (haystack[0] == 0)
- return NULL;
+ const unsigned char *p = *inp;
+ unicode_char_t c;
+
+ g_return_val_if_fail (p != NULL, 0);
+
+ p = unicode_get_utf8 (p, &c);
+ *inp = p;
+
+ return c;
+}
- puni = uni = alloca(sizeof(*uni)*(strlen(needle)+1));
+static const char *
+camel_ustrstrcase (const char *haystack, const char *needle)
+{
+ unicode_char_t *nuni, *puni;
+ unicode_char_t u;
+ const char *p;
+
+ g_return_val_if_fail (haystack != NULL, NULL);
+ g_return_val_if_fail (needle != NULL, NULL);
+ g_return_val_if_fail (strlen (needle) != 0, haystack);
+ g_return_val_if_fail (strlen (haystack) != 0, NULL);
+
+ puni = nuni = alloca (sizeof (unicode_char_t) * strlen (needle));
+
p = needle;
- while ((u = utf8_get(&p)))
- *puni++ = unicode_tolower(u);
-
- if (p == NULL)
+ while ((u = utf8_get (&p)))
+ *puni++ = unicode_tolower (u);
+
+ /* NULL means there was illegal utf-8 sequence */
+ if (!p)
return NULL;
-
- l = p = haystack;
- while ( (u = utf8_get(&p)) ) {
- v = unicode_tolower(u);
- if (uni[0] == v) {
- s = p;
- suni = uni+1;
- while (suni < puni) {
- u = utf8_get(&s);
- v = unicode_tolower(u);
- if (v != *suni)
- goto next;
- suni++;
+
+ p = haystack;
+ while ((u = utf8_get (&p))) {
+ unicode_char_t c;
+
+ c = unicode_tolower (u);
+ /* We have valid stripped char */
+ if (c == nuni[0]) {
+ const gchar *q = p;
+ gint npos = 1;
+
+ while (nuni + npos < puni) {
+ u = utf8_get (&q);
+ if (!q || !u)
+ return NULL;
+
+ c = unicode_tolower (u);
+ if (c != nuni[npos])
+ break;
+
+ npos++;
}
- return l;
+
+ if (nuni + npos == puni)
+ return p;
}
- next:
- l = p;
}
-
+
return NULL;
}
static int
-camel_ustrcasecmp(const char *s1, const char *s2)
+camel_ustrcasecmp (const char *s1, const char *s2)
{
- guint16 u1, u2=0;
+ unicode_char_t u1, u2 = 0;
if (s1 == NULL) {
if (s2 == NULL)
@@ -275,7 +303,7 @@ camel_ustrcasecmp(const char *s1, const char *s2)
}
if (s2 == NULL)
return 1;
-
+
while ((u1 = utf8_get(&s1)) && (u2 = utf8_get(&s2))) {
u1 = unicode_tolower(u1);
u2 = unicode_tolower(u2);
@@ -302,9 +330,9 @@ camel_ustrcasecmp(const char *s1, const char *s2)
}
static int
-camel_ustrncasecmp(const char *s1, const char *s2, size_t len)
+camel_ustrncasecmp (const char *s1, const char *s2, size_t len)
{
- guint16 u1, u2=0;
+ unicode_char_t u1, u2 = 0;
if (s1 == NULL) {
if (s2 == NULL)
@@ -314,7 +342,7 @@ camel_ustrncasecmp(const char *s1, const char *s2, size_t len)
}
if (s2 == NULL)
return 1;
-
+
while (len > 0 && (u1 = utf8_get(&s1)) && (u2 = utf8_get(&s2))) {
u1 = unicode_tolower(u1);
u2 = unicode_tolower(u2);
@@ -347,19 +375,20 @@ camel_ustrncasecmp(const char *s1, const char *s2, size_t len)
/* searhces for match inside value, if match is mixed case, hten use case-sensitive,
else insensitive */
-gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how)
+gboolean
+camel_search_header_match (const char *value, const char *match, camel_search_match_t how)
{
const char *p;
-
- if (how == CAMEL_SEARCH_MATCH_SOUNDEX)
- return header_soundex(value, match);
-
- while (*value && isspace(*value))
+
+ while (*value && isspace (*value))
value++;
-
- if (strlen(value) < strlen(match))
+
+ if (how == CAMEL_SEARCH_MATCH_SOUNDEX)
+ return header_soundex (value, match);
+
+ if (strlen (value) < strlen (match))
return FALSE;
-
+
/* from dan the man, if we have mixed case, perform a case-sensitive match,
otherwise not */
p = match;