aboutsummaryrefslogtreecommitdiffstats
path: root/libibex/testindex.c
diff options
context:
space:
mode:
Diffstat (limited to 'libibex/testindex.c')
-rw-r--r--libibex/testindex.c158
1 files changed, 158 insertions, 0 deletions
diff --git a/libibex/testindex.c b/libibex/testindex.c
new file mode 100644
index 0000000000..e21d73ff06
--- /dev/null
+++ b/libibex/testindex.c
@@ -0,0 +1,158 @@
+/* Test code for libibex */
+
+#include <stdio.h>
+#include <glib.h>
+#include <errno.h>
+#include <string.h>
+#include "ibex_internal.h"
+
+void word_index_mem_dump_info(struct _IBEXWord *idx);
+
+/*
+ The following is a routine to generate a Gaussian distribution
+ of pseudo random numbers, to make the results a little more
+ meaningful
+*/
+
+/* boxmuller.c Implements the Polar form of the Box-Muller
+ Transformation
+
+ (c) Copyright 1994, Everett F. Carter Jr.
+ Permission is granted by the author to use
+ this software for any application provided this
+ copyright notice is preserved.
+
+*/
+
+#include <stdlib.h>
+#include <math.h>
+
+#define ranf() ((float)rand()/(float)RAND_MAX)
+
+static float box_muller(float m, float s) /* normal random variate generator */
+{ /* mean m, standard deviation s */
+ float x1, x2, w, y1;
+ static float y2;
+ static int use_last = 0;
+
+ if (use_last) /* use value from previous call */
+ {
+ y1 = y2;
+ use_last = 0;
+ }
+ else
+ {
+ do {
+ x1 = 2.0 * ranf() - 1.0;
+ x2 = 2.0 * ranf() - 1.0;
+ w = x1 * x1 + x2 * x2;
+ } while ( w >= 1.0 );
+
+ w = sqrt( (-2.0 * log( w ) ) / w );
+ y1 = x1 * w;
+ y2 = x2 * w;
+ use_last = 1;
+ }
+
+ return( m + y1 * s );
+}
+
+/* gets a word from words, using m and s as distribution values */
+static char *getword(GPtrArray *words, float m, float s)
+{
+ int index;
+
+ do {
+ index = (int)box_muller(m, s);
+ } while (index<0 || index>=words->len);
+
+ return words->pdata[index];
+}
+
+
+int main(int argc, char **argv)
+{
+ int i, j;
+ GPtrArray *words = g_ptr_array_new();
+ char line[256];
+ int len;
+ FILE *file;
+ float m, s;
+ ibex *ib;
+ GString *buffer = g_string_new("");
+ int files;
+ char *dict;
+
+ srand(0xABADF00D);
+
+ files = 80000;
+ dict = "/usr/dict/words";
+
+ /* read words into an array */
+ file = fopen(dict, "r");
+ if (file == NULL) {
+ fprintf(stderr, "Cannot open word file: %s: %s\n", dict, strerror(errno));
+ return 1;
+ }
+ while (fgets(line, sizeof(line), file) != NULL) {
+ len = strlen(line);
+ if (len>0 && line[len-1]=='\n') {
+ line[len-1]=0;
+ }
+ g_ptr_array_add(words, g_strdup(line));
+ }
+ fclose(file);
+
+ fprintf(stderr, "Read %d words\n", words->len);
+
+ /* *shrug* arbitrary values really */
+ m = words->len/2;
+ /* well, the average vocabulary of a mailbox is about 10K words */
+ s = 1000.0;
+
+ printf("mean is %f, s is %f\n", m, s);
+
+ /* open ibex file */
+ ib = ibex_open("test.ibex", O_RDWR|O_CREAT, 0600);
+ if (ib == NULL) {
+ perror("Creating ibex file\n");
+ return 1;
+ }
+
+ printf("Adding %d files\n", files);
+
+ /* simulate adding new words to a bunch of files */
+ for (j=0;j<files;j++) {
+ /* always new name */
+ char *name = words->pdata[j % words->len];
+ /* something like 60 words in a typical message, say */
+ int count = (int)box_muller(60.0, 20.0);
+
+ if (j%1000 == 0)
+ word_index_mem_dump_info(ib->words);
+
+ /* cache the name info */
+ ibex_contains_name(ib, name);
+
+ /*printf("Adding %d words to '%s'\n", count, name);*/
+
+ g_string_truncate(buffer, 0);
+
+ /* build up the word buffer */
+ for (i=0;i<count;i++) {
+ if (i>0)
+ g_string_append_c(buffer, ' ');
+ g_string_append(buffer, getword(words, m, s));
+ }
+
+ /* and index it */
+ ibex_index_buffer(ib, name, buffer->str, buffer->len, NULL);
+ }
+
+ word_index_mem_dump_info(ib->words);
+
+ ibex_close(ib);
+
+ return 0;
+}
+