From 9aae808cd0b4cf8bd35f6c0205e30c79f62632ef Mon Sep 17 00:00:00 2001 From: Not Zed Date: Wed, 25 Oct 2000 13:59:44 +0000 Subject: Bugfixes, performance improvemnts. Should scale up much better than before, and be more bugfree than ever! 2000-10-25 Not Zed * ibex_internal.h (IBEX_VERSION): Bumped to another version. The file format hasn't changed, but earlier bugs may create invalid files. * block.c (ibex_block_read): Use the root data directly. (ibex_block_cache_open): As well. (ibex_block_get): And here too. (ibex_block_cache_sync): Sync the root block directly here. * block.h: Pad root block out to 1024 bytes. Added root block to struct _memcache. * disktail.c (tail_get): Dirty the root block. (tail_get): Fix for changes to root access. (disk_remove): And here too. * wordindexmem.c (sync_cache_entry): Handle the case of not having any files in the list, which can happen now. (word_index_pre): Make sure we set the wordid on the new cache entry. * ibex_block.c (ibex_save): Sigh. Pass the right argument to index_post. * block.c (ibex_block_cache_open): Create a word_index_mem for indexing the words, rather than a word_index. * ibex_block.c (ibex_index_buffer): If we haven't called index_pre yet, do it before indexing anything. (ibex_save): If wehave called index_pre previously, call index_post. (ibex_close): And same for here. * index.h: Added a cursor class, and cursor retrieval function for iterating through an index's keys. * wordindexmem.c (ibex_create_word_index_mem): New word class, similar to wordindex, but meant to be faster for updates. (word_index_pre): Implement. We load all keys into memory. (word_index_post): Implement. We sync and free all keys. (find): Remove lru code, its no longer a cache, but a lookup table. (add_index_cache): Remove lru code here too. (find_name): And here. (word_flush): Flush the hashtable direct. (word_close): Call flush to flush, rather than doing it ourselves. (add_index_cache): If we are in an index state, we can assume a cache miss == a new word. (word_index_post): Maintain whether or not we are in an index state, and the depth of the state. (word_index_pre): Likewise. Dont reread the index if we have already. (cache_sanity): Fixed for struct changes. * wordindex.h (IBEXWordClass): Added functions to prepare/cleanup for lots of indexing. i.e. can be used to optimise indexing speed at the cost of extra memory usage during the indexing process. * hash.c (hash_cursor_create): Create a new cursor for iterating through a hashtable. (hash_cursor_close): 'close' the cursor. It is upto the application to close any cursors it creates. (hash_cursor_next): Goto the next key id. (hash_cursor_next_key): Goto the next key, reutrn the key. (hash_get_cursor): Return a cursor object. * wordindex.c (word_index_post): (word_index_pre): Added (empty) callbacks for pre/post functions. svn path=/trunk/; revision=6165 --- libibex/hash.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'libibex/hash.c') diff --git a/libibex/hash.c b/libibex/hash.c index 9395e1e00b..0581d22fcb 100644 --- a/libibex/hash.c +++ b/libibex/hash.c @@ -42,6 +42,14 @@ typedef guint32 hashid_t; +struct _HASHCursor { + struct _IBEXCursor cursor; + + hashid_t block; + unsigned int index; + unsigned int size; +}; + static struct _IBEXIndex *hash_create(struct _memcache *bc, int size); static struct _IBEXIndex *hash_open(struct _memcache *bc, blockid_t root); static int hash_sync(struct _IBEXIndex *index); @@ -53,6 +61,12 @@ static hashid_t hash_insert(struct _IBEXIndex *index, const char *key, int keyle static char *hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len); static void hash_set_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t blockid, blockid_t tail); static blockid_t hash_get_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t *tail); +static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index); + +static struct _IBEXCursor *hash_cursor_create(struct _IBEXIndex *); +static void hash_cursor_close(struct _IBEXCursor *); +static guint32 hash_cursor_next(struct _IBEXCursor *); +static char *hash_cursor_next_key(struct _IBEXCursor *, int *keylenptr); struct _IBEXIndexClass ibex_hash_class = { hash_create, hash_open, @@ -63,6 +77,13 @@ struct _IBEXIndexClass ibex_hash_class = { hash_get_key, hash_set_data_block, hash_get_data_block, + hash_get_cursor, +}; + +struct _IBEXCursorClass ibex_hash_cursor_class = { + hash_cursor_close, + hash_cursor_next, + hash_cursor_next_key }; /* the reason we have the tail here is that otherwise we need to @@ -197,6 +218,12 @@ static int hash_close(struct _IBEXIndex *index) return 0; } +/* get an iterator class */ +static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index) +{ + return hash_cursor_create(index); +} + /* convert a hashbucket id into a name */ static char * hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len) @@ -620,6 +647,74 @@ hash_insert(struct _IBEXIndex *index, const char *key, int keylen) return HASH_KEY(keybucket, 0); } +/* hash cursor functions */ +static struct _IBEXCursor * +hash_cursor_create(struct _IBEXIndex *idx) +{ + struct _HASHCursor *idc; + struct _hashroot *hashroot; + + idc = g_malloc(sizeof(*idc)); + idc->cursor.klass = &ibex_hash_cursor_class; + idc->cursor.index = idx; + idc->block = 0; + idc->index = 0; + + hashroot = (struct _hashroot *)ibex_block_read(idx->blocks, idx->root); + idc->size = hashroot->size; + + return &idc->cursor; +} + +static void +hash_cursor_close(struct _IBEXCursor *idc) +{ + g_free(idc); +} + +static guint32 +hash_cursor_next(struct _IBEXCursor *idc) +{ + struct _HASHCursor *hc = (struct _HASHCursor *)idc; + struct _hashroot *hashroot; + struct _hashblock *bucket; + struct _hashtableblock *table; + + /* get the next bucket chain */ + if (hc->block != 0) { + int ind; + + bucket = (struct _hashblock *)ibex_block_read(idc->index->blocks, HASH_BLOCK(hc->block)); + ind = HASH_INDEX(hc->block); + + g_assert(ind < bucket->used); + + hc->block = bucket->hb_keys[ind].next; + } + + if (hc->block == 0) { + hashroot = (struct _hashroot *)ibex_block_read(idc->index->blocks, idc->index->root); + while (hc->block == 0 && hc->index < hc->size) { + table = (struct _hashtableblock *) + ibex_block_read(idc->index->blocks, + hashroot->table[hc->index / (BLOCK_SIZE/sizeof(blockid_t))]); + hc->block = table->buckets[hc->index % (BLOCK_SIZE/sizeof(blockid_t))]; + + hc->index++; + } + } + + return hc->block; +} + +static char * +hash_cursor_next_key(struct _IBEXCursor *idc, int *keylenptr) +{ + /* TODO: this could be made slightly mroe efficient going to the structs direct. + but i'm lazy today */ + return idc->index->klass->get_key(idc->index, idc->klass->next(idc), keylenptr); +} + /* debug */ void ibex_hash_dump(struct _IBEXIndex *index); static void ibex_hash_dump_rec(struct _IBEXIndex *index, int *words, int *wordslen); -- cgit v1.2.3