/* Test code for libibex */ #include <stdio.h> #include <glib.h> #include <errno.h> #include <string.h> #include "ibex_internal.h" void word_index_mem_dump_info(struct _IBEXWord *idx); /* The following is a routine to generate a Gaussian distribution of pseudo random numbers, to make the results a little more meaningful */ /* boxmuller.c Implements the Polar form of the Box-Muller Transformation (c) Copyright 1994, Everett F. Carter Jr. Permission is granted by the author to use this software for any application provided this copyright notice is preserved. */ #include <stdlib.h> #include <math.h> #define ranf() ((float)rand()/(float)RAND_MAX) static float box_muller(float m, float s) /* normal random variate generator */ { /* mean m, standard deviation s */ float x1, x2, w, y1; static float y2; static int use_last = 0; if (use_last) /* use value from previous call */ { y1 = y2; use_last = 0; } else { do { x1 = 2.0 * ranf() - 1.0; x2 = 2.0 * ranf() - 1.0; w = x1 * x1 + x2 * x2; } while ( w >= 1.0 ); w = sqrt( (-2.0 * log( w ) ) / w ); y1 = x1 * w; y2 = x2 * w; use_last = 1; } return( m + y1 * s ); } /* gets a word from words, using m and s as distribution values */ static char *getword(GPtrArray *words, float m, float s) { int index; do { index = (int)box_muller(m, s); } while (index<0 || index>=words->len); return words->pdata[index]; } int main(int argc, char **argv) { int i, j; GPtrArray *words = g_ptr_array_new(); char line[256]; int len; FILE *file; float m, s; ibex *ib; GString *buffer = g_string_new(""); int files; char *dict; srand(0xABADF00D); files = 80000; dict = "/usr/dict/words"; /* read words into an array */ file = fopen(dict, "r"); if (file == NULL) { fprintf(stderr, "Cannot open word file: %s: %s\n", dict, strerror(errno)); return 1; } while (fgets(line, sizeof(line), file) != NULL) { len = strlen(line); if (len>0 && line[len-1]=='\n') { line[len-1]=0; } g_ptr_array_add(words, g_strdup(line)); } fclose(file); fprintf(stderr, "Read %d words\n", words->len); /* *shrug* arbitrary values really */ m = words->len/2; /* well, the average vocabulary of a mailbox is about 10K words */ s = 1000.0; printf("mean is %f, s is %f\n", m, s); /* open ibex file */ ib = ibex_open("test.ibex", O_RDWR|O_CREAT, 0600); if (ib == NULL) { perror("Creating ibex file\n"); return 1; } printf("Adding %d files\n", files); /* simulate adding new words to a bunch of files */ for (j=0;j<files;j++) { /* always new name */ char *name = words->pdata[j % words->len]; /* something like 60 words in a typical message, say */ int count = (int)box_muller(60.0, 20.0); if (j%1000 == 0) word_index_mem_dump_info(ib->words); /* cache the name info */ ibex_contains_name(ib, name); /*printf("Adding %d words to '%s'\n", count, name);*/ g_string_truncate(buffer, 0); /* build up the word buffer */ for (i=0;i<count;i++) { if (i>0) g_string_append_c(buffer, ' '); g_string_append(buffer, getword(words, m, s)); } /* and index it */ ibex_index_buffer(ib, name, buffer->str, buffer->len, NULL); } word_index_mem_dump_info(ib->words); ibex_close(ib); return 0; }