/* Test code for libibex */

#include <stdio.h>
#include <glib.h>
#include <errno.h>
#include <string.h>
#include "ibex_internal.h"

void word_index_mem_dump_info(struct _IBEXWord *idx);

/*
  The following is a routine to generate a Gaussian distribution
  of pseudo random numbers, to make the results a little more
  meaningful
*/

/* boxmuller.c           Implements the Polar form of the Box-Muller
                         Transformation

                      (c) Copyright 1994, Everett F. Carter Jr.
                          Permission is granted by the author to use
                          this software for any application provided this
                          copyright notice is preserved.

*/

#include <stdlib.h>
#include <math.h>

#define ranf() ((float)rand()/(float)RAND_MAX)

static float box_muller(float m, float s)      /* normal random variate generator */
{                                       /* mean m, standard deviation s */
        float x1, x2, w, y1;
        static float y2;
        static int use_last = 0;

        if (use_last)                   /* use value from previous call */
        {
                y1 = y2;
                use_last = 0;
        }
        else
        {
                do {
                        x1 = 2.0 * ranf() - 1.0;
                        x2 = 2.0 * ranf() - 1.0;
                        w = x1 * x1 + x2 * x2;
                } while ( w >= 1.0 );

                w = sqrt( (-2.0 * log( w ) ) / w );
                y1 = x1 * w;
                y2 = x2 * w;
                use_last = 1;
        }

        return( m + y1 * s );
}

/* gets a word from words, using m and s as distribution values */
static char *getword(GPtrArray *words, float m, float s)
{
	int index;

	do {
		index = (int)box_muller(m, s);
	} while (index<0 || index>=words->len);

	return words->pdata[index];
}


int main(int argc, char **argv)
{
	int i, j;
	GPtrArray *words = g_ptr_array_new();
	char line[256];
	int len;
	FILE *file;
	float m, s;
	ibex *ib;
	GString *buffer = g_string_new("");
	int files;
	char *dict;

	srand(0xABADF00D);

	files = 80000;
	dict = "/usr/dict/words";

	/* read words into an array */
	file = fopen(dict, "r");
	if (file == NULL) {
		fprintf(stderr, "Cannot open word file: %s: %s\n", dict, strerror(errno));
		return 1;
	}
	while (fgets(line, sizeof(line), file) != NULL) {
		len = strlen(line);
		if (len>0 && line[len-1]=='\n') {
			line[len-1]=0;
		}
		g_ptr_array_add(words, g_strdup(line));
	}
	fclose(file);
	
	fprintf(stderr, "Read %d words\n", words->len);

	/* *shrug* arbitrary values really */
	m = words->len/2;
	/* well, the average vocabulary of a mailbox is about 10K words */
	s = 1000.0;

	printf("mean is %f, s is %f\n", m, s);

	/* open ibex file */
	ib = ibex_open("test.ibex", O_RDWR|O_CREAT, 0600);
	if (ib == NULL) {
		perror("Creating ibex file\n");
		return 1;
	}

	printf("Adding %d files\n", files);

	/* simulate adding new words to a bunch of files */
	for (j=0;j<files;j++) {
		/* always new name */
		char *name = words->pdata[j % words->len];
		/* something like 60 words in a typical message, say */
		int count = (int)box_muller(60.0, 20.0);

		if (j%1000 == 0)
			word_index_mem_dump_info(ib->words);

		/* cache the name info */
		ibex_contains_name(ib, name);

		/*printf("Adding %d words to '%s'\n", count, name);*/

		g_string_truncate(buffer, 0);

		/* build up the word buffer */
		for (i=0;i<count;i++) {
			if (i>0)
				g_string_append_c(buffer, ' ');
			g_string_append(buffer, getword(words, m, s));
		}

		/* and index it */
		ibex_index_buffer(ib, name, buffer->str, buffer->len, NULL);
	}

	word_index_mem_dump_info(ib->words);

	ibex_close(ib);

	return 0;
}