From 345d090ac854ad26d2a19827d56f3d48382a1256 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Mon, 14 Jan 2002 09:35:52 +0000 Subject: Rewritten to use a cache for body searches when online. Will need some 2002-01-14 Not Zed * providers/imap/camel-imap-search.c (imap_body_contains): Rewritten to use a cache for body searches when online. Will need some heavy testing but so far seems to be beneficial. * providers/imap/camel-imap-folder.c (imap_search_by_expression, search_by_uids): dont initialise search object here. (camel_imap_folder_new): Setup search object here with pointer to cache dir. 2001-12-01 Not Zed * camel-store-summary.[ch]: New class to store a store's folder list in. Not yet completed. svn path=/trunk/; revision=15314 --- camel/providers/imap/camel-imap-search.c | 474 ++++++++++++++++++++++++------- 1 file changed, 370 insertions(+), 104 deletions(-) (limited to 'camel/providers/imap/camel-imap-search.c') diff --git a/camel/providers/imap/camel-imap-search.c b/camel/providers/imap/camel-imap-search.c index 2adaaef244..b184843f27 100644 --- a/camel/providers/imap/camel-imap-search.c +++ b/camel/providers/imap/camel-imap-search.c @@ -4,8 +4,9 @@ /* * Authors: * Dan Winship + * Michael Zucchi * - * Copyright 2000, 2001 Ximian, Inc. + * Copyright 2000, 2001, 2002 Ximian, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -35,10 +36,58 @@ #include "camel-imap-search.h" #include "camel-imap-private.h" #include "camel-imap-utils.h" +#include "camel-imap-summary.h" -static ESExpResult * -imap_body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, - CamelFolderSearch *s); +#include "e-util/md5-utils.h" /* md5 hash building */ +#include "camel-mime-utils.h" /* base64 encoding */ + +#include "camel-seekable-stream.h" + +#define d(x) x + +/* + File is: + BODY (as in body search) + Last uid when search performed + termcount: number of search terms + matchcount: number of matches + term0, term1 ... + match0, match1, match2, ... +*/ + +/* size of in-memory cache */ +#define MATCH_CACHE_SIZE (32) + +/* Also takes care of 'endianness' file magic */ +#define MATCH_MARK (('B' << 24) | ('O' << 16) | ('D' << 8) | 'Y') + +/* on-disk header, in native endianness format, matches follow */ +struct _match_header { + guint32 mark; + guint32 validity; /* uidvalidity for this folder */ + guint32 lastuid; + guint32 termcount; + guint32 matchcount; +}; + +/* in-memory record */ +struct _match_record { + struct _match_record *next; + struct _match_record *prev; + + char hash[17]; + + guint32 lastuid; + guint32 validity; + + unsigned int termcount; + char **terms; + GArray *matches; +}; + + +static void free_match(CamelImapSearch *is, struct _match_record *mr); +static ESExpResult *imap_body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *s); static CamelFolderSearchClass *imap_search_parent_class; @@ -49,12 +98,33 @@ camel_imap_search_class_init (CamelImapSearchClass *camel_imap_search_class) CamelFolderSearchClass *camel_folder_search_class = CAMEL_FOLDER_SEARCH_CLASS (camel_imap_search_class); - imap_search_parent_class = camel_type_get_global_classfuncs (camel_folder_search_get_type ()); + imap_search_parent_class = (CamelFolderSearchClass *)camel_type_get_global_classfuncs (camel_folder_search_get_type ()); /* virtual method overload */ camel_folder_search_class->body_contains = imap_body_contains; } +static void +camel_imap_search_init(CamelImapSearch *is) +{ + e_dlist_init(&is->matches); + is->matches_hash = g_hash_table_new(g_str_hash, g_str_equal); + is->matches_count = 0; + is->lastuid = 0; +} + +static void +camel_imap_search_finalise(CamelImapSearch *is) +{ + struct _match_record *mr; + + while ( (mr = (struct _match_record *)e_dlist_remtail(&is->matches)) ) + free_match(is, mr); + g_hash_table_destroy(is->matches_hash); + if (is->cache) + camel_object_unref((CamelObject *)is->cache); +} + CamelType camel_imap_search_get_type (void) { @@ -65,138 +135,334 @@ camel_imap_search_get_type (void) CAMEL_FOLDER_SEARCH_TYPE, "CamelImapSearch", sizeof (CamelImapSearch), sizeof (CamelImapSearchClass), - (CamelObjectClassInitFunc) camel_imap_search_class_init, - NULL, NULL, NULL); + (CamelObjectClassInitFunc) camel_imap_search_class_init, NULL, + (CamelObjectInitFunc) camel_imap_search_init, + (CamelObjectFinalizeFunc) camel_imap_search_finalise); } return camel_imap_search_type; } +/** + * camel_imap_search_new: + * + * Return value: A new CamelImapSearch widget. + **/ +CamelFolderSearch * +camel_imap_search_new (const char *cachedir) +{ + CamelFolderSearch *new = CAMEL_FOLDER_SEARCH (camel_object_new (camel_imap_search_get_type ())); + CamelImapSearch *is = (CamelImapSearch *)new; + + camel_folder_search_construct (new); + + is->cache = camel_data_cache_new(cachedir, 0, NULL); + if (is->cache) { + /* Expire entries after 14 days of inactivity */ + camel_data_cache_set_expire_access(is->cache, 60*60*24*14); + } + + return new; +} + + +static void +hash_match(char hash[17], int argc, struct _ESExpResult **argv) +{ + MD5Context ctx; + unsigned char digest[16]; + unsigned int state = 0, save = 0; + int i; + + md5_init(&ctx); + for (i=0;itype == ESEXP_RES_STRING) + md5_update(&ctx, argv[i]->value.string, strlen(argv[i]->value.string)); + } + md5_final(&ctx, digest); + + base64_encode_close(digest, 12, FALSE, hash, &state, &save); + + for (i=0;i<16;i++) { + if (hash[i] == '+') + hash[i] = ','; + if (hash[i] == '/') + hash[i] = '_'; + } + + hash[16] = 0; +} + static int -cmp_uid(const void *ap, const void *bp) +save_match(CamelImapSearch *is, struct _match_record *mr) { - unsigned int a, b; + guint32 mark = MATCH_MARK; + int ret = 0; + struct _match_header header; + CamelStream *stream; - a = strtoul(((char **)ap)[0], NULL, 10); - b = strtoul(((char **)bp)[0], NULL, 10); - if (acache == NULL) return -1; - else if (a>b) - return 1; + + stream = camel_data_cache_add(is->cache, "search/body-contains", mr->hash, NULL); + if (stream == NULL) + return -1; + + d(printf("Saving search cache entry to '%s': %s\n", mr->hash, mr->terms[0])); + + /* we write the whole thing, then re-write the header magic, saves fancy sync code */ + memcpy(&header.mark, " ", 4); + header.termcount = 0; + header.matchcount = mr->matches->len; + header.lastuid = mr->lastuid; + header.validity = mr->validity; + + if (camel_stream_write(stream, (char *)&header, sizeof(header)) != sizeof(header) + || camel_stream_write(stream, mr->matches->data, mr->matches->len*sizeof(guint32)) != mr->matches->len*sizeof(guint32) + || camel_seekable_stream_seek((CamelSeekableStream *)stream, 0, CAMEL_STREAM_SET) == -1 + || camel_stream_write(stream, (char *)&mark, sizeof(mark)) != sizeof(mark)) { + d(printf(" saving failed, removing cache entry\n")); + camel_data_cache_remove(is->cache, "search/body-contains", mr->hash, NULL); + ret = -1; + } + + camel_object_unref((CamelObject *)stream); + return ret; +} + +static void +free_match(CamelImapSearch *is, struct _match_record *mr) +{ + int i; + + for (i=0;itermcount;i++) + g_free(mr->terms[i]); + g_free(mr->terms); + g_array_free(mr->matches, TRUE); + g_free(mr); +} + +static struct _match_record * +load_match(CamelImapSearch *is, char hash[17], int argc, struct _ESExpResult **argv) +{ + struct _match_record *mr; + CamelStream *stream = NULL; + struct _match_header header; + int i; + + mr = g_malloc0(sizeof(*mr)); + mr->matches = g_array_new(0, 0, sizeof(guint32)); + g_assert(strlen(hash) == 16); + strcpy(mr->hash, hash); + mr->terms = g_malloc0(sizeof(mr->terms[0]) * argc); + for (i=0;itype == ESEXP_RES_STRING) { + mr->termcount++; + mr->terms[i] = g_strdup(argv[i]->value.string); + } + } + + d(printf("Loading search cache entry to '%s': %s\n", mr->hash, mr->terms[0])); + + memset(&header, 0, sizeof(header)); + if (is->cache) + stream = camel_data_cache_get(is->cache, "search/body-contains", mr->hash, NULL); + if (stream != NULL) { + /* 'cause i'm gonna be lazy, i'm going to have the termcount == 0 for now, + and not load or save them since i can't think of a nice way to do it, the hash + should be sufficient to key it */ + /* This check should also handle endianness changes, we just throw away + the data (its only a cache) */ + if (camel_stream_read(stream, (char *)&header, sizeof(header)) == sizeof(header) + && header.validity == is->validity + && header.mark == MATCH_MARK + && header.termcount == 0) { + d(printf(" found %d matches\n", header.matchcount)); + g_array_set_size(mr->matches, header.matchcount); + camel_stream_read(stream, mr->matches->data, sizeof(guint32)*header.matchcount); + } else { + d(printf(" file format invalid/validity changed\n")); + memset(&header, 0, sizeof(header)); + } + camel_object_unref((CamelObject *)stream); + } else { + d(printf(" no cache entry found\n")); + } + + mr->validity = header.validity; + if (mr->validity != is->validity) + mr->lastuid = 0; + else + mr->lastuid = header.lastuid; + + return mr; +} + +static int +sync_match(CamelImapSearch *is, struct _match_record *mr) +{ + char *p, *result, *lasts = NULL; + CamelImapResponse *response; + guint32 uid; + CamelFolder *folder = ((CamelFolderSearch *)is)->folder; + CamelImapStore *store = (CamelImapStore *)folder->parent_store; + + if (mr->lastuid >= is->lastuid && mr->validity == is->validity) + return 0; + + d(printf("updating match record for uid's %d:%d\n", mr->lastuid+1, is->lastuid)); + + /* TODO: Handle multiple search terms */ + + response = camel_imap_command (store, folder, NULL, + "UID SEARCH UID %d:%d BODY \"%s\"", + mr->lastuid+1, is->lastuid, mr->terms[0]); + if (!response) + return -1; + result = camel_imap_response_extract (store, response, "SEARCH", NULL); + if (!result) + return -1; + + p = result + sizeof ("* SEARCH"); + for (p = strtok_r (p, " ", &lasts); p; p = strtok_r (NULL, " ", &lasts)) { + uid = strtoul(p, NULL, 10); + g_array_append_vals(mr->matches, &uid, 1); + } + g_free(result); + + mr->validity = is->validity; + mr->lastuid = is->lastuid; + save_match(is, mr); return 0; } +static struct _match_record * +get_match(CamelImapSearch *is, int argc, struct _ESExpResult **argv) +{ + char hash[17]; + struct _match_record *mr; + + hash_match(hash, argc, argv); + + mr = g_hash_table_lookup(is->matches_hash, hash); + if (mr == NULL) { + while (is->matches_count >= MATCH_CACHE_SIZE) { + mr = (struct _match_record *)e_dlist_remtail(&is->matches); + if (mr) { + printf("expiring match '%s' (%s)\n", mr->hash, mr->terms[0]); + g_hash_table_remove(is->matches_hash, mr->hash); + free_match(is, mr); + is->matches_count--; + } else { + is->matches_count = 0; + } + } + mr = load_match(is, hash, argc, argv); + g_hash_table_insert(is->matches_hash, mr->hash, mr); + is->matches_count++; + } else { + e_dlist_remove((EDListNode *)mr); + } + + e_dlist_addhead(&is->matches, (EDListNode *)mr); + + /* what about offline mode? */ + /* We could cache those results too, or should we cache them elsewhere? */ + sync_match(is, mr); + + return mr; +} + static ESExpResult * -imap_body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, - CamelFolderSearch *s) +imap_body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *s) { CamelImapStore *store = CAMEL_IMAP_STORE (s->folder->parent_store); - char *value = argv[0]->value.string; - CamelImapResponse *response = NULL; - char *result, *p, *lasts = NULL, *real_uid; - const char *uid = ""; + CamelImapSearch *is = (CamelImapSearch *)s; + char *uid; ESExpResult *r; - CamelMessageInfo *info; + CamelMessageInfo *info; GHashTable *uid_hash = NULL; - char *set; - GPtrArray *sorted; - int i; + GPtrArray *array; + int i, j; + struct _match_record *mr; + guint32 uidn, *uidp; + + d(printf("Performing body search '%s'\n", argv[0]->value.string)); + + /* TODO: Cache offline searches too? */ /* If offline, search using the parent class, which can handle this manually */ if (!camel_disco_store_check_online (CAMEL_DISCO_STORE (store), NULL)) return imap_search_parent_class->body_contains(f, argc, argv, s); - if (s->current) { - uid = camel_message_info_uid (s->current); - r = e_sexp_result_new (f, ESEXP_RES_BOOL); - r->value.bool = FALSE; - response = camel_imap_command (store, s->folder, NULL, - "UID SEARCH UID %s BODY \"%s\"", - uid, value); - } else { - r = e_sexp_result_new (f, ESEXP_RES_ARRAY_PTR); - - if (argc == 1 && *value == '\0' && s->folder) { - /* optimise the match "" case - match everything */ + /* optimise the match "" case - match everything */ + if (argc == 1 && argv[0]->value.string[0] == '\0') { + if (s->current) { + r = e_sexp_result_new(f, ESEXP_RES_BOOL); + r->value.bool = TRUE; + } else { + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new (); for (i = 0; i < s->summary->len; i++) { - CamelMessageInfo *info = g_ptr_array_index (s->summary, i); - g_ptr_array_add (r->value.ptrarray, (char *)camel_message_info_uid (info)); + info = g_ptr_array_index(s->summary, i); + g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info)); } + } + } else if (s->summary->len == 0) { + /* nothing to match case, do nothing (should be handled higher up?) */ + if (s->current) { + r = e_sexp_result_new(f, ESEXP_RES_BOOL); + r->value.bool = FALSE; } else { - /* If searching a (reasonably small) subset of - the real folder size, then use a - message-set to optimise it */ - /* TODO: This peeks a bunch of 'private'ish data */ - if (s->summary->len < camel_folder_get_message_count(s->folder)/2) { - sorted = g_ptr_array_new(); - g_ptr_array_set_size(sorted, s->summary->len); - for (i=0;isummary->len;i++) - sorted->pdata[i] = (void *)camel_message_info_uid((CamelMessageInfo *)s->summary->pdata[i]); - qsort(sorted->pdata, sorted->len, sizeof(sorted->pdata[0]), cmp_uid); - set = imap_uid_array_to_set(s->folder->summary, sorted); - response = camel_imap_command (store, s->folder, NULL, - "UID SEARCH UID %s BODY \"%s\"", - set, value); - g_free(set); - g_ptr_array_free(sorted, TRUE); - } else { - response = camel_imap_command (store, s->folder, NULL, - "UID SEARCH BODY \"%s\"", - value); - } - + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new (); } - } - - if (!response) - return r; - result = camel_imap_response_extract (store, response, "SEARCH", NULL); - if (!result) - return r; - - p = result + sizeof ("* SEARCH"); - for (p = strtok_r (p, " ", &lasts); p; p = strtok_r (NULL, " ", &lasts)) { + } else { + int truth = FALSE; + + /* setup lastuid/validity for synchronising */ + info = g_ptr_array_index(s->summary, s->summary->len-1); + is->lastuid = strtoul(camel_message_info_uid(info), NULL, 10); + is->validity = ((CamelImapSummary *)(s->folder->summary))->validity; + + mr = get_match(is, argc, argv); + if (s->current) { - if (!strcmp (uid, p)) { - r->value.bool = TRUE; - break; - } + uidn = strtoul(camel_message_info_uid(s->current), NULL, 10); + uidp = (guint32 *)mr->matches->data; + j = mr->matches->len; + for (i=0;ivalue.bool = truth; } else { - /* if we need to setup a hash of summary items, this way we get - access to the summary memory which is locked for the duration of - the search, and wont vanish on us */ - if (uid_hash == NULL) { - uid_hash = g_hash_table_new (g_str_hash, g_str_equal); - for (i = 0; i < s->summary->len; i++) { - info = s->summary->pdata[i]; - g_hash_table_insert (uid_hash, (char *)camel_message_info_uid (info), info); - } + r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR); + array = r->value.ptrarray = g_ptr_array_new(); + + /* We use a hash to map the uid numbers to uid strings as required by the search api */ + /* We use the summary's strings so we dont need to alloc more */ + uid_hash = g_hash_table_new(NULL, NULL); + for (i = 0; i < s->summary->len; i++) { + info = s->summary->pdata[i]; + uid = (char *)camel_message_info_uid(info); + uidn = strtoul(uid, NULL, 10); + g_hash_table_insert(uid_hash, (void *)uidn, uid); } - if (g_hash_table_lookup_extended (uid_hash, p, (void *)&real_uid, (void *)&info)) - g_ptr_array_add (r->value.ptrarray, real_uid); + + uidp = (guint32 *)mr->matches->data; + j = mr->matches->len; + for (i=0;i