From 9aae808cd0b4cf8bd35f6c0205e30c79f62632ef Mon Sep 17 00:00:00 2001
From: Not Zed <NotZed@HelixCode.com>
Date: Wed, 25 Oct 2000 13:59:44 +0000
Subject: Bugfixes, performance improvemnts.  Should scale up much better than
 before, and be more bugfree than ever!

2000-10-25  Not Zed  <NotZed@HelixCode.com>

 	* ibex_internal.h (IBEX_VERSION): Bumped to another version.  The
 	file format hasn't changed, but earlier bugs may create invalid
 	files.

 	* block.c (ibex_block_read): Use the root data directly.
 	(ibex_block_cache_open): As well.
 	(ibex_block_get): And here too.
 	(ibex_block_cache_sync): Sync the root block directly here.

 	* block.h: Pad root block out to 1024 bytes.
 	Added root block to struct _memcache.

 	* disktail.c (tail_get): Dirty the root block.
 	(tail_get): Fix for changes to root access.
 	(disk_remove): And here too.

 	* wordindexmem.c (sync_cache_entry): Handle the case of not having
 	any files in the list, which can happen now.
 	(word_index_pre): Make sure we set the wordid on the new cache
 	entry.

 	* ibex_block.c (ibex_save): Sigh.  Pass the right argument to
 	index_post.

	* block.c (ibex_block_cache_open): Create a word_index_mem for
 	indexing the words, rather than a word_index.

 	* ibex_block.c (ibex_index_buffer): If we haven't called index_pre
 	yet, do it before indexing anything.
 	(ibex_save): If wehave called index_pre previously, call
 	index_post.
 	(ibex_close): And same for here.

 	* index.h: Added a cursor class, and cursor retrieval function for
 	iterating through an index's keys.

 	* wordindexmem.c (ibex_create_word_index_mem): New word class,
 	similar to wordindex, but meant to be faster for updates.
 	(word_index_pre): Implement.  We load all keys into memory.
 	(word_index_post): Implement.  We sync and free all keys.
 	(find): Remove lru code, its no longer a cache, but a lookup
 	table.
 	(add_index_cache): Remove lru code here too.
 	(find_name): And here.
 	(word_flush): Flush the hashtable direct.
 	(word_close): Call flush to flush, rather than doing it ourselves.
 	(add_index_cache): If we are in an index state, we can assume a
 	cache miss == a new word.
 	(word_index_post): Maintain whether or not we are in an index
 	state, and the depth of the state.
 	(word_index_pre): Likewise.  Dont reread the index if we have
 	already.
 	(cache_sanity): Fixed for struct changes.

 	* wordindex.h (IBEXWordClass): Added functions to prepare/cleanup
 	for lots of indexing.  i.e. can be used to optimise indexing speed
 	at the cost of extra memory usage during the indexing process.

	* hash.c (hash_cursor_create): Create a new cursor for iterating through a
 	hashtable.
 	(hash_cursor_close): 'close' the cursor. It is upto the
 	application to close any cursors it creates.
 	(hash_cursor_next): Goto the next key id.
 	(hash_cursor_next_key): Goto the next key, reutrn the key.
 	(hash_get_cursor): Return a cursor object.

	* wordindex.c (word_index_post):
 	(word_index_pre): Added (empty) callbacks for pre/post functions.

svn path=/trunk/; revision=6165
---
 libibex/hash.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'libibex/hash.c')

diff --git a/libibex/hash.c b/libibex/hash.c
index 9395e1e00b..0581d22fcb 100644
--- a/libibex/hash.c
+++ b/libibex/hash.c
@@ -42,6 +42,14 @@
 
 typedef guint32 hashid_t;
 
+struct _HASHCursor {
+	struct _IBEXCursor cursor;
+
+	hashid_t block;
+	unsigned int index;
+	unsigned int size;
+};
+
 static struct _IBEXIndex *hash_create(struct _memcache *bc, int size);
 static struct _IBEXIndex *hash_open(struct _memcache *bc, blockid_t root);
 static int hash_sync(struct _IBEXIndex *index);
@@ -53,6 +61,12 @@ static hashid_t hash_insert(struct _IBEXIndex *index, const char *key, int keyle
 static char *hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len);
 static void hash_set_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t blockid, blockid_t tail);
 static blockid_t hash_get_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t *tail);
+static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index);
+
+static struct _IBEXCursor *hash_cursor_create(struct _IBEXIndex *);
+static void hash_cursor_close(struct _IBEXCursor *);
+static guint32 hash_cursor_next(struct _IBEXCursor *);
+static char *hash_cursor_next_key(struct _IBEXCursor *, int *keylenptr);
 
 struct _IBEXIndexClass ibex_hash_class = {
 	hash_create, hash_open,
@@ -63,6 +77,13 @@ struct _IBEXIndexClass ibex_hash_class = {
 	hash_get_key,
 	hash_set_data_block,
 	hash_get_data_block,
+	hash_get_cursor,
+};
+
+struct _IBEXCursorClass ibex_hash_cursor_class = {
+	hash_cursor_close,
+	hash_cursor_next,
+	hash_cursor_next_key
 };
 
 /* the reason we have the tail here is that otherwise we need to
@@ -197,6 +218,12 @@ static int hash_close(struct _IBEXIndex *index)
 	return 0;
 }
 
+/* get an iterator class */
+static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index)
+{
+	return hash_cursor_create(index);
+}
+
 /* convert a hashbucket id into a name */
 static char *
 hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len)
@@ -620,6 +647,74 @@ hash_insert(struct _IBEXIndex *index, const char *key, int keylen)
 	return HASH_KEY(keybucket, 0);
 }
 
+/* hash cursor functions */
+static struct _IBEXCursor *
+hash_cursor_create(struct _IBEXIndex *idx)
+{
+	struct _HASHCursor *idc;
+	struct _hashroot *hashroot;
+
+	idc = g_malloc(sizeof(*idc));
+	idc->cursor.klass = &ibex_hash_cursor_class;
+	idc->cursor.index = idx;
+	idc->block = 0;
+	idc->index = 0;
+
+	hashroot = (struct _hashroot *)ibex_block_read(idx->blocks, idx->root);
+	idc->size = hashroot->size;
+
+	return &idc->cursor;
+}
+
+static void
+hash_cursor_close(struct _IBEXCursor *idc)
+{
+	g_free(idc);
+}
+
+static guint32
+hash_cursor_next(struct _IBEXCursor *idc)
+{
+	struct _HASHCursor *hc = (struct _HASHCursor *)idc;
+	struct _hashroot *hashroot;
+	struct _hashblock *bucket;
+	struct _hashtableblock *table;
+
+	/* get the next bucket chain */
+	if (hc->block != 0) {
+		int ind;
+
+		bucket = (struct _hashblock *)ibex_block_read(idc->index->blocks, HASH_BLOCK(hc->block));
+		ind = HASH_INDEX(hc->block);
+
+		g_assert(ind < bucket->used);
+
+		hc->block = bucket->hb_keys[ind].next;
+	}
+
+	if (hc->block == 0) {
+		hashroot = (struct _hashroot *)ibex_block_read(idc->index->blocks, idc->index->root);
+		while (hc->block == 0 && hc->index < hc->size) {
+			table = (struct _hashtableblock *)
+				ibex_block_read(idc->index->blocks,
+						hashroot->table[hc->index / (BLOCK_SIZE/sizeof(blockid_t))]);
+			hc->block = table->buckets[hc->index % (BLOCK_SIZE/sizeof(blockid_t))];
+
+			hc->index++;
+		}
+	}
+
+	return hc->block;
+}
+
+static char *
+hash_cursor_next_key(struct _IBEXCursor *idc, int *keylenptr)
+{
+	/* TODO: this could be made slightly mroe efficient going to the structs direct.
+	   but i'm lazy today */
+	return idc->index->klass->get_key(idc->index, idc->klass->next(idc), keylenptr);
+}
+
 /* debug */
 void ibex_hash_dump(struct _IBEXIndex *index);
 static void ibex_hash_dump_rec(struct _IBEXIndex *index, int *words, int *wordslen);
-- 
cgit v1.2.3