/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * Copyright (C) 2000 Helix Code, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public License * as published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with the Gnome Library; see the file COPYING.LIB. If not, * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /* file.c: index file read/write ops */ #include <ctype.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include "ibex_internal.h" static unsigned long read_number (FILE *f); static void write_number (FILE *f, unsigned long n); static char *get_compressed_word (FILE *f, char **lastword); static gint free_file (gpointer key, gpointer value, gpointer data); static void free_word (gpointer key, gpointer value, gpointer data); /* The file format is: * * version string (currently "ibex1") * file count * list of compressed filenames, separated by \0 * word count * list of compressed words, each followed by \0, a count, and that * many references. * * All numbers are stored 7-bit big-endian, with the high bit telling * whether or not the number continues to the next byte. * * compressed text consists of a byte telling how many characters the * line has in common with the line before it, followed by the rest of * the string. Obviously this only really works if the lists are sorted. */ ibex * ibex_open (char *file, gboolean create) { ibex *ib; FILE *f; char vbuf[sizeof (IBEX_VERSION) - 1]; char *word, *lastword; unsigned long nfiles, nwords, nrefs, ref; ibex_file **ibfs = NULL; int i; GPtrArray *refs; f = fopen (file, "r"); if (!f && (errno != ENOENT || !create)) { if (errno == 0) errno = ENOMEM; return NULL; } ib = g_malloc (sizeof (ibex)); ib->dirty = FALSE; ib->path = g_strdup (file); ib->files = g_tree_new (strcmp); ib->words = g_hash_table_new (g_str_hash, g_str_equal); ib->oldfiles = g_ptr_array_new (); if (!f) return ib; /* Check version. */ if (fread (vbuf, 1, sizeof (vbuf), f) != sizeof (vbuf)) { if (feof (f)) errno = EINVAL; goto errout; } if (strncmp (vbuf, IBEX_VERSION, sizeof (vbuf) != 0)) { errno = EINVAL; goto errout; } /* Read list of files. */ nfiles = read_number (f); ibfs = g_malloc (nfiles * sizeof (ibex_file *)); lastword = NULL; for (i = 0; i < nfiles; i++) { ibfs[i] = g_malloc (sizeof (ibex_file)); ibfs[i]->name = get_compressed_word (f, &lastword); if (!ibfs[i]->name) goto errout; ibfs[i]->index = 0; g_tree_insert (ib->files, ibfs[i]->name, ibfs[i]); } /* Read list of words. */ nwords = read_number (f); lastword = NULL; for (i = 0; i < nwords; i++) { word = get_compressed_word (f, &lastword); if (!word) goto errout; nrefs = read_number (f); refs = g_ptr_array_new (); g_ptr_array_set_size (refs, nrefs); while (nrefs--) { ref = read_number (f); if (ref >= nfiles) goto errout; refs->pdata[nrefs] = ibfs[ref]; } g_hash_table_insert (ib->words, word, refs); } g_free (ibfs); fclose (f); return ib; errout: fclose (f); g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL); g_tree_destroy (ib->files); g_hash_table_foreach (ib->words, free_word, NULL); g_hash_table_destroy (ib->words); g_ptr_array_free (ib->oldfiles, TRUE); if (ibfs) g_free (ibfs); g_free (ib->path); g_free (ib); return NULL; } struct ibex_write_data { unsigned long index; FILE *f; char *lastname; }; static int get_prefix (struct ibex_write_data *iwd, char *name) { int i = 0; if (iwd->lastname) { while (!strncmp (iwd->lastname, name, i + 1)) i++; } iwd->lastname = name; return i; } static gint write_file (gpointer key, gpointer value, gpointer data) { char *file = key; ibex_file *ibf = value; struct ibex_write_data *iwd = data; int prefix; ibf->index = iwd->index++; prefix = get_prefix (iwd, file); fprintf (iwd->f, "%c%s", prefix, file + prefix); fputc (0, iwd->f); return FALSE; } static void store_word (gpointer key, gpointer value, gpointer data) { GTree *wtree = data; g_tree_insert (wtree, key, value); } static gint write_word (gpointer key, gpointer value, gpointer data) { char *word = key; GPtrArray *refs = value; struct ibex_write_data *iwd = data; ibex_file *ibf; int i, ind, prefix; for (i = ind = 0; i < refs->len; i++) { ibf = g_ptr_array_index (refs, i); if (ibf->index == -1) { g_ptr_array_remove_index_fast (refs, i); i--; } else ind++; } if (ind != 0) { prefix = get_prefix (iwd, word); fprintf (iwd->f, "%c%s", prefix, word + prefix); fputc (0, iwd->f); write_number (iwd->f, ind); for (i = 0; i < refs->len; i++) { ibf = g_ptr_array_index (refs, i); write_number (iwd->f, ibf->index); } } return FALSE; } int ibex_write (ibex *ib) { struct ibex_write_data iwd; GTree *wtree; char *tmpfile; tmpfile = g_strdup_printf ("%s~", ib->path); iwd.f = fopen (tmpfile, "w"); if (!iwd.f) { if (errno == 0) errno = ENOMEM; g_free (tmpfile); return -1; } fputs (IBEX_VERSION, iwd.f); if (ferror (iwd.f)) goto lose; iwd.index = 0; iwd.lastname = NULL; write_number (iwd.f, g_tree_nnodes (ib->files)); if (ferror (iwd.f)) goto lose; g_tree_traverse (ib->files, write_file, G_IN_ORDER, &iwd); if (ferror (iwd.f)) goto lose; iwd.lastname = NULL; write_number (iwd.f, g_hash_table_size (ib->words)); if (ferror (iwd.f)) goto lose; wtree = g_tree_new (strcmp); g_hash_table_foreach (ib->words, store_word, wtree); g_tree_traverse (wtree, write_word, G_IN_ORDER, &iwd); g_tree_destroy (wtree); if (ferror (iwd.f)) goto lose; if (fclose (iwd.f) == 0 && rename (tmpfile, ib->path) == 0) { g_free (tmpfile); ib->dirty = FALSE; return 0; } lose: unlink (tmpfile); g_free (tmpfile); return -1; } int ibex_close (ibex *ib) { ibex_file *ibf; if (ib->dirty && ibex_write (ib) == -1) return -1; g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL); g_tree_destroy (ib->files); g_hash_table_foreach (ib->words, free_word, NULL); g_hash_table_destroy (ib->words); while (ib->oldfiles->len) { ibf = g_ptr_array_remove_index (ib->oldfiles, 0); g_free (ibf->name); g_free (ibf); } g_ptr_array_free (ib->oldfiles, TRUE); g_free (ib->path); g_free (ib); return 0; } static gint free_file (gpointer key, gpointer value, gpointer data) { ibex_file *ibf = value; g_free (ibf->name); g_free (ibf); return FALSE; } static void free_word (gpointer key, gpointer value, gpointer data) { g_free (key); g_ptr_array_free (value, TRUE); } static char * get_compressed_word (FILE *f, char **lastword) { char *buf, *p; int c, size; c = getc (f); if (c == EOF) return NULL; size = c + 10; buf = g_malloc (size); if (*lastword) strncpy (buf, *lastword, c); p = buf + c; do { c = getc (f); if (c == EOF) return NULL; if (p == buf + size) { buf = g_realloc (buf, size + 10); p = buf + size; size += 10; } *p++ = c; } while (c != 0); *lastword = buf; return buf; } static void write_number (FILE *f, unsigned long number) { int i, flag = 0; char buf[4]; i = 4; do { buf[--i] = (number & 0x7F) | flag; number = number >> 7; flag = 0x80; } while (number != 0); fwrite (buf + i, 1, 4 - i, f); } static unsigned long read_number (FILE *f) { int byte; unsigned long num; num = 0; do { byte = getc (f); num = num << 7 | (byte & 0x7F); } while (byte & 0x80); return num; }