/*
Copyright 2000 Helix Code Inc.
*/
/* file.c: index file read/write ops */
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "ibex_internal.h"
static unsigned long read_number(FILE *f);
static void write_number(FILE *f, unsigned long n);
static char *get_compressed_word(FILE *f, char **lastword);
static gint free_file(gpointer key, gpointer value, gpointer data);
static void free_word(gpointer key, gpointer value, gpointer data);
/* The file format is:
*
* version string (currently "ibex1")
* file count
* list of compressed filenames, separated by \0
* word count
* list of compressed words, each followed by \0, a count, and that
* many references.
*
* All numbers are stored 7-bit big-endian, with the high bit telling
* whether or not the number continues to the next byte.
*
* compressed text consists of a byte telling how many characters the
* line has in common with the line before it, followed by the rest of
* the string. Obviously this only really works if the lists are sorted.
*/
ibex *
ibex_open(char *file, gboolean create)
{
ibex *ib;
FILE *f;
char vbuf[sizeof(IBEX_VERSION) - 1];
char *word, *lastword;
unsigned long nfiles, nwords, nrefs, ref;
ibex_file **ibfs = NULL;
int i;
GPtrArray *refs;
f = fopen(file, "r");
if (!f && (errno != ENOENT || !create))
{
if (errno == 0)
errno = ENOMEM;
return NULL;
}
ib = g_malloc(sizeof(ibex));
ib->dirty = FALSE;
ib->path = g_strdup(file);
ib->files = g_tree_new(strcmp);
ib->words = g_hash_table_new(g_str_hash, g_str_equal);
ib->oldfiles = g_ptr_array_new();
if (!f)
return ib;
/* Check version. */
if (fread(vbuf, 1, sizeof(vbuf), f) != sizeof(vbuf))
{
if (feof(f))
errno = EINVAL;
goto errout;
}
if (strncmp(vbuf, IBEX_VERSION, sizeof(vbuf) != 0))
{
errno = EINVAL;
goto errout;
}
/* Read list of files. */
nfiles = read_number(f);
ibfs = g_malloc(nfiles * sizeof(ibex_file *));
lastword = NULL;
for (i = 0; i < nfiles; i++)
{
ibfs[i] = g_malloc(sizeof(ibex_file));
ibfs[i]->name = get_compressed_word(f, &lastword);
if (!ibfs[i]->name)
goto errout;
ibfs[i]->index = 0;
g_tree_insert(ib->files, ibfs[i]->name, ibfs[i]);
}
/* Read list of words. */
nwords = read_number(f);
lastword = NULL;
for (i = 0; i < nwords; i++)
{
word = get_compressed_word(f, &lastword);
if (!word)
goto errout;
nrefs = read_number(f);
refs = g_ptr_array_new();
g_ptr_array_set_size(refs, nrefs);
while (nrefs--)
{
ref = read_number(f);
if (ref >= nfiles)
goto errout;
refs->pdata[nrefs] = ibfs[ref];
}
g_hash_table_insert(ib->words, word, refs);
}
g_free(ibfs);
fclose(f);
return ib;
errout:
fclose(f);
g_tree_traverse(ib->files, free_file, G_IN_ORDER, NULL);
g_tree_destroy(ib->files);
g_hash_table_foreach(ib->words, free_word, NULL);
g_hash_table_destroy(ib->words);
g_ptr_array_free(ib->oldfiles, TRUE);
if (ibfs)
g_free(ibfs);
g_free(ib->path);
g_free(ib);
return NULL;
}
struct ibex_write_data {
unsigned long index;
FILE *f;
char *lastname;
};
static int
get_prefix(struct ibex_write_data *iwd, char *name)
{
int i = 0;
if (iwd->lastname)
{
while (!strncmp(iwd->lastname, name, i + 1))
i++;
}
iwd->lastname = name;
return i;
}
static gint
write_file(gpointer key, gpointer value, gpointer data)
{
char *file = key;
ibex_file *ibf = value;
struct ibex_write_data *iwd = data;
int prefix;
ibf->index = iwd->index++;
prefix = get_prefix(iwd, file);
fprintf(iwd->f, "%c%s", prefix, file + prefix);
fputc(0, iwd->f);
return FALSE;
}
static void
store_word(gpointer key, gpointer value, gpointer data)
{
GTree *wtree = data;
g_tree_insert(wtree, key, value);
}
static gint
write_word(gpointer key, gpointer value, gpointer data)
{
char *word = key;
GPtrArray *refs = value;
struct ibex_write_data *iwd = data;
ibex_file *ibf;
int i, ind, prefix;
for (i = ind = 0; i < refs->len; i++)
{
ibf = g_ptr_array_index(refs, i);
if (ibf->index == -1)
{
g_ptr_array_remove_index_fast(refs, i);
i--;
}
else
ind++;
}
if (ind != 0)
{
prefix = get_prefix(iwd, word);
fprintf(iwd->f, "%c%s", prefix, word + prefix);
fputc(0, iwd->f);
write_number(iwd->f, ind);
for (i = 0; i < refs->len; i++)
{
ibf = g_ptr_array_index(refs, i);
write_number(iwd->f, ibf->index);
}
}
return FALSE;
}
int
ibex_write(ibex *ib)
{
struct ibex_write_data iwd;
GTree *wtree;
char *tmpfile;
tmpfile = g_strdup_printf("%s~", ib->path);
iwd.f = fopen(tmpfile, "w");
if (!iwd.f)
{
if (errno == 0)
errno = ENOMEM;
g_free(tmpfile);
return -1;
}
fputs(IBEX_VERSION, iwd.f);
if (ferror(iwd.f))
goto lose;
iwd.index = 0;
iwd.lastname = NULL;
write_number(iwd.f, g_tree_nnodes(ib->files));
if (ferror(iwd.f))
goto lose;
g_tree_traverse(ib->files, write_file, G_IN_ORDER, &iwd);
if (ferror(iwd.f))
goto lose;
iwd.lastname = NULL;
write_number(iwd.f, g_hash_table_size(ib->words));
if (ferror(iwd.f))
goto lose;
wtree = g_tree_new(strcmp);
g_hash_table_foreach(ib->words, store_word, wtree);
g_tree_traverse(wtree, write_word, G_IN_ORDER, &iwd);
g_tree_destroy(wtree);
if (ferror(iwd.f))
goto lose;
if (fclose(iwd.f) == 0 && rename(tmpfile, ib->path) == 0)
{
g_free(tmpfile);
ib->dirty = FALSE;
return 0;
}
lose:
unlink(tmpfile);
g_free(tmpfile);
return -1;
}
int
ibex_close(ibex *ib)
{
ibex_file *ibf;
if (ib->dirty && ibex_write(ib) == -1)
return -1;
g_tree_traverse(ib->files, free_file, G_IN_ORDER, NULL);
g_tree_destroy(ib->files);
g_hash_table_foreach(ib->words, free_word, NULL);
g_hash_table_destroy(ib->words);
while (ib->oldfiles->len)
{
ibf = g_ptr_array_remove_index(ib->oldfiles, 0);
g_free(ibf->name);
g_free(ibf);
}
g_ptr_array_free(ib->oldfiles, TRUE);
g_free(ib->path);
g_free(ib);
return 0;
}
static gint
free_file(gpointer key, gpointer value, gpointer data)
{
ibex_file *ibf = value;
g_free(ibf->name);
g_free(ibf);
return FALSE;
}
static void
free_word(gpointer key, gpointer value, gpointer data)
{
g_free(key);
g_ptr_array_free(value, TRUE);
}
static char *
get_compressed_word(FILE *f, char **lastword)
{
char *buf, *p;
int c, size;
c = getc(f);
if (c == EOF)
return NULL;
size = c + 10;
buf = g_malloc(size);
if (*lastword)
strncpy(buf, *lastword, c);
p = buf + c;
do
{
c = getc(f);
if (c == EOF)
return NULL;
if (p == buf + size)
{
buf = g_realloc(buf, size + 10);
p = buf + size;
size += 10;
}
*p++ = c;
}
while (c != 0);
*lastword = buf;
return buf;
}
static void
write_number(FILE *f, unsigned long number)
{
int i, flag = 0;
char buf[4];
i = 4;
do
{
buf[--i] = (number & 0x7F) | flag;
number = number >> 7;
flag = 0x80;
}
while (number != 0);
fwrite(buf + i, 1, 4 - i, f);
}
static unsigned long
read_number(FILE *f)
{
int byte;
unsigned long num;
num = 0;
do
{
byte = getc(f);
num = num << 7 | (byte & 0x7F);
}
while (byte & 0x80);
return num;
}