aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libibex/ChangeLog14
-rw-r--r--libibex/wordindex.h2
-rw-r--r--libibex/wordindexmem.c61
3 files changed, 76 insertions, 1 deletions
diff --git a/libibex/ChangeLog b/libibex/ChangeLog
index f4ed9f5f28..de9cb8dd40 100644
--- a/libibex/ChangeLog
+++ b/libibex/ChangeLog
@@ -1,3 +1,17 @@
+2000-11-16 Not Zed <NotZed@HelixCode.com>
+
+ * wordindexmem.c (ibex_create_word_index_mem): Initialise nameinit
+ & namecache.
+ (contains_name): On first call, load all names into memory. We
+ usually do a whole lot of lookups in a row, and this saves a lot
+ of penalties on a big list, for not too much a memory hit.
+ (find_name): If we have the namelist in memory do a quick
+ short-circuit check to see if we have to do further processing.
+ (unindex_name): Cross check the namecache, if it is active.
+ Remove it there too/or exit (no work to do).
+ (word_flush): If we have the namecache active, destroy it now, as
+ it is not needed anymore (for now).
+
2000-10-30 Kjartan Maraas <kmaraas@gnome.org>
* hash.c: #include <stdlib.h> to remove warning.
diff --git a/libibex/wordindex.h b/libibex/wordindex.h
index 353f4dddc6..3a60accb70 100644
--- a/libibex/wordindex.h
+++ b/libibex/wordindex.h
@@ -61,6 +61,8 @@ struct _IBEXWord {
struct _list wordnodes; /* LRU list of wordcache structures */
int wordcount; /* how much space used in cache */
int precount;
+ GHashTable *namecache; /* a list of names (only), cached for quick reference */
+ int nameinit;
};
diff --git a/libibex/wordindexmem.c b/libibex/wordindexmem.c
index f0cc7336f8..4c0bca7cef 100644
--- a/libibex/wordindexmem.c
+++ b/libibex/wordindexmem.c
@@ -136,6 +136,8 @@ ibex_create_word_index_mem(struct _memcache *bc, blockid_t *wordroot, blockid_t
ibex_list_new(&idx->wordnodes);
idx->wordcount = 0;
idx->precount = 0;
+ idx->namecache = g_hash_table_new(g_str_hash, g_str_equal);
+ idx->nameinit = 0;
idx->klass = &ibex_word_index_mem_class;
/* we use the same block array storage for both indexes at the moment */
@@ -271,6 +273,19 @@ static void unindex_name(struct _IBEXWord *idx, const char *name)
d(printf("unindexing %s\n", name));
+ /* if we have a namecache, check that to see if we need to remove that item, or there is no work here */
+ if (idx->nameinit) {
+ char *oldkey;
+ gboolean oldval;
+
+ if (g_hash_table_lookup_extended(idx->namecache, name, (void *)&oldkey, (void *)&oldval)) {
+ g_hash_table_remove(idx->namecache, oldkey);
+ g_free(oldkey);
+ } else {
+ return;
+ }
+ }
+
/* lookup the hash key */
nameid = idx->nameindex->klass->find(idx->nameindex, name, strlen(name));
/* get the block for this key */
@@ -330,7 +345,30 @@ static void unindex_name(struct _IBEXWord *idx, const char *name)
/* index contains (any) data for name */
static gboolean contains_name(struct _IBEXWord *idx, const char *name)
{
- return idx->nameindex->klass->find(idx->nameindex, name, strlen(name)) != 0;
+ struct _IBEXCursor *idc;
+ nameid_t wordid;
+ char *key;
+ int len;
+
+ /* load all the names into memory, since we're *usually* about to do a lot of these */
+
+ /* Note that because of the (poor) hash algorithm, this is >> faster than
+ looking up every key in turn. Basically because all keys are stored packed
+ in the same list, not in buckets of keys for the same hash (among other reasons) */
+
+ if (!idx->nameinit) {
+ printf("pre-loading all name info into memory\n");
+ idc = idx->nameindex->klass->get_cursor(idx->nameindex);
+ while ( (wordid = idc->klass->next(idc)) ) {
+ key = idc->index->klass->get_key(idc->index, wordid, &len);
+ g_hash_table_insert(idx->namecache, key, (void *)TRUE);
+ }
+ idc->klass->close(idc);
+ idx->nameinit = TRUE;
+ }
+
+ return (gboolean)g_hash_table_lookup(idx->namecache, name);
+ /*return idx->nameindex->klass->find(idx->nameindex, name, strlen(name)) != 0;*/
}
/* returns all matches for word */
@@ -391,6 +429,10 @@ static gboolean find_name(struct _IBEXWord *idx, const char *name, const char *w
struct _wordcache *cache;
int i;
+ /* if we have the namelist in memory, quick-check that */
+ if (idx->nameinit && g_hash_table_lookup(idx->namecache, name) == NULL)
+ return FALSE;
+
/* lookup the hash key for the name */
nameid = idx->nameindex->klass->find(idx->nameindex, name, strlen(name));
/* get the block for this name */
@@ -607,6 +649,10 @@ static void add_list(struct _IBEXWord *idx, const char *name, GPtrArray *words)
d(cache_sanity(idx));
+ /* make sure we keep the namecache in sync, if it is active */
+ if (idx->nameinit && g_hash_table_lookup(idx->namecache, name) == NULL)
+ g_hash_table_insert(idx->namecache, g_strdup(name), (void *)TRUE);
+
/* get the nameid and block start for this name */
add_index_key(idx->nameindex, name, &nameid, &nameblock, &nametail);
@@ -694,6 +740,14 @@ word_sync(struct _IBEXWord *idx)
return 0;
}
+static gboolean
+free_key(void *key, void *value, void *data)
+{
+ g_free(key);
+
+ return TRUE;
+}
+
/* sync and flush any in-memory data to disk and free it */
static int
word_flush(struct _IBEXWord *idx)
@@ -702,6 +756,10 @@ word_flush(struct _IBEXWord *idx)
g_hash_table_foreach_remove(idx->wordcache, sync_free_value, idx);
idx->wordcount = 0;
+ if (idx->nameinit) {
+ g_hash_table_foreach_remove(idx->namecache, free_key, NULL);
+ idx->nameinit = FALSE;
+ }
return 0;
}
@@ -715,6 +773,7 @@ static int word_close(struct _IBEXWord *idx)
idx->wordstore->klass->close(idx->wordstore);*/
idx->wordindex->klass->close(idx->wordindex);
g_hash_table_destroy(idx->wordcache);
+ g_hash_table_destroy(idx->namecache);
g_free(idx);
return 0;