If we have the namecache active, and there is no name there, we add it

2000-11-17 Not Zed <NotZed@HelixCode.com> * wordindexmem.c (add_list): If we have the namecache active, and there is no name there, we add it directly and dont look it up first. * testindex.c: Some performance testing & stat gathering stuff. svn path=/trunk/; revision=6677
author: Not Zed <NotZed@HelixCode.com> 2000-11-27 10:05:14 +0800
committer: Michael Zucci <zucchi@src.gnome.org> 2000-11-27 10:05:14 +0800
commit: 6c45c449cae597f00cf57d5c9914489871ba916e (patch)
tree: 0f4e0184070be3b08dea0ca97641f18d99add985 /libibex/testindex.c
parent: 918ead73323ed08bc182e5827a1cab2e719f48b6 (diff)
download: gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar
gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.gz
gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.bz2
gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.lz
gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.xz
gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.zst
gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.zip
1 files changed, 158 insertions, 0 deletions
diff --git a/libibex/testindex.c b/libibex/testindex.c
new file mode 100644
index 0000000000..e21d73ff06
--- /dev/null
+++ b/libibex/testindex.c
@@ -0,0 +1,158 @@
+/* Test code for libibex */
+
+#include <stdio.h>
+#include <glib.h>
+#include <errno.h>
+#include <string.h>
+#include "ibex_internal.h"
+
+void word_index_mem_dump_info(struct _IBEXWord *idx);
+
+/*
+  The following is a routine to generate a Gaussian distribution
+  of pseudo random numbers, to make the results a little more
+  meaningful
+*/
+
+/* boxmuller.c           Implements the Polar form of the Box-Muller
+                         Transformation
+
+                      (c) Copyright 1994, Everett F. Carter Jr.
+                          Permission is granted by the author to use
+                          this software for any application provided this
+                          copyright notice is preserved.
+
+*/
+
+#include <stdlib.h>
+#include <math.h>
+
+#define ranf() ((float)rand()/(float)RAND_MAX)
+
+static float box_muller(float m, float s)      /* normal random variate generator */
+{                                       /* mean m, standard deviation s */
+        float x1, x2, w, y1;
+        static float y2;
+        static int use_last = 0;
+
+        if (use_last)                   /* use value from previous call */
+        {
+                y1 = y2;
+                use_last = 0;
+        }
+        else
+        {
+                do {
+                        x1 = 2.0 * ranf() - 1.0;
+                        x2 = 2.0 * ranf() - 1.0;
+                        w = x1 * x1 + x2 * x2;
+                } while ( w >= 1.0 );
+
+                w = sqrt( (-2.0 * log( w ) ) / w );
+                y1 = x1 * w;
+                y2 = x2 * w;
+                use_last = 1;
+        }
+
+        return( m + y1 * s );
+}
+
+/* gets a word from words, using m and s as distribution values */
+static char *getword(GPtrArray *words, float m, float s)
+{
+	int index;
+
+	do {
+		index = (int)box_muller(m, s);
+	} while (index<0 || index>=words->len);
+
+	return words->pdata[index];
+}
+
+
+int main(int argc, char **argv)
+{
+	int i, j;
+	GPtrArray *words = g_ptr_array_new();
+	char line[256];
+	int len;
+	FILE *file;
+	float m, s;
+	ibex *ib;
+	GString *buffer = g_string_new("");
+	int files;
+	char *dict;
+
+	srand(0xABADF00D);
+
+	files = 80000;
+	dict = "/usr/dict/words";
+
+	/* read words into an array */
+	file = fopen(dict, "r");
+	if (file == NULL) {
+		fprintf(stderr, "Cannot open word file: %s: %s\n", dict, strerror(errno));
+		return 1;
+	}
+	while (fgets(line, sizeof(line), file) != NULL) {
+		len = strlen(line);
+		if (len>0 && line[len-1]=='\n') {
+			line[len-1]=0;
+		}
+		g_ptr_array_add(words, g_strdup(line));
+	}
+	fclose(file);
+	
+	fprintf(stderr, "Read %d words\n", words->len);
+
+	/* *shrug* arbitrary values really */
+	m = words->len/2;
+	/* well, the average vocabulary of a mailbox is about 10K words */
+	s = 1000.0;
+
+	printf("mean is %f, s is %f\n", m, s);
+
+	/* open ibex file */
+	ib = ibex_open("test.ibex", O_RDWR|O_CREAT, 0600);
+	if (ib == NULL) {
+		perror("Creating ibex file\n");
+		return 1;
+	}
+
+	printf("Adding %d files\n", files);
+
+	/* simulate adding new words to a bunch of files */
+	for (j=0;j<files;j++) {
+		/* always new name */
+		char *name = words->pdata[j % words->len];
+		/* something like 60 words in a typical message, say */
+		int count = (int)box_muller(60.0, 20.0);
+
+		if (j%1000 == 0)
+			word_index_mem_dump_info(ib->words);
+
+		/* cache the name info */
+		ibex_contains_name(ib, name);
+
+		/*printf("Adding %d words to '%s'\n", count, name);*/
+
+		g_string_truncate(buffer, 0);
+
+		/* build up the word buffer */
+		for (i=0;i<count;i++) {
+			if (i>0)
+				g_string_append_c(buffer, ' ');
+			g_string_append(buffer, getword(words, m, s));
+		}
+
+		/* and index it */
+		ibex_index_buffer(ib, name, buffer->str, buffer->len, NULL);
+	}
+
+	word_index_mem_dump_info(ib->words);
+
+	ibex_close(ib);
+
+	return 0;
+}
+
author	Not Zed <NotZed@HelixCode.com>	2000-11-27 10:05:14 +0800
committer	Michael Zucci <zucchi@src.gnome.org>	2000-11-27 10:05:14 +0800
commit	6c45c449cae597f00cf57d5c9914489871ba916e (patch)
tree	0f4e0184070be3b08dea0ca97641f18d99add985 /libibex/testindex.c
parent	918ead73323ed08bc182e5827a1cab2e719f48b6 (diff)
download	gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.gz gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.bz2 gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.lz gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.xz gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.tar.zst gsoc2013-evolution-6c45c449cae597f00cf57d5c9914489871ba916e.zip