diff options
Diffstat (limited to 'libibex/file.c')
-rw-r--r-- | libibex/file.c | 451 |
1 files changed, 0 insertions, 451 deletions
diff --git a/libibex/file.c b/libibex/file.c deleted file mode 100644 index f011312cab..0000000000 --- a/libibex/file.c +++ /dev/null @@ -1,451 +0,0 @@ -/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ -/* - * Copyright (C) 2000 Helix Code, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public License - * as published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with the Gnome Library; see the file COPYING.LIB. If not, - * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - */ - -/* file.c: index file read/write ops */ - -#include <ctype.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include "ibex_internal.h" - -static unsigned long read_number (FILE *f); -static void write_number (FILE *f, unsigned long n); -static char *get_compressed_word (FILE *f, char **lastword); - -static gint free_file (gpointer key, gpointer value, gpointer data); -static void free_word (gpointer key, gpointer value, gpointer data); - -/* The file format is: - * - * version string (currently "ibex1") - * file count - * list of compressed filenames, separated by \0 - * word count - * list of compressed words, each followed by \0, a count, and that - * many references. - * - * All numbers are stored 7-bit big-endian, with the high bit telling - * whether or not the number continues to the next byte. - * - * compressed text consists of a byte telling how many characters the - * line has in common with the line before it, followed by the rest of - * the string. Obviously this only really works if the lists are sorted. - */ - -/** - * ibex_open: open (or possibly create) an ibex index - * @file: the name of the file - * @flags: open flags, see open(2). - * @mode: If O_CREAT is passed in flags, then the file mode - * to create the new file with. It will be anded with the current - * umask. - * - * Open and/or create the named ibex file and return a handle to it. - * - * Return value: an ibex handle, or NULL if an error occurred. - **/ -ibex * -ibex_open (char *file, int flags, int mode) -{ - ibex *ib; - FILE *f; - char vbuf[sizeof (IBEX_VERSION) - 1]; - char *word, *lastword; - unsigned long nfiles, nwords, nrefs, ref; - ibex_file **ibfs = NULL; - int i; - GPtrArray *refs; - int fd; - char *modestr; - - fd = open(file, flags, mode); - if (fd == -1) { - printf("open failed :(\n"); - return NULL; - } - - /* yuck, this is because we use FILE * interface - internally */ - switch (flags & O_ACCMODE) { - case O_RDONLY: - modestr = "r"; - break; - case O_RDWR: - if (flags & O_APPEND) - modestr = "a+"; - else - modestr = "w+"; - break; - case O_WRONLY: - if (flags & O_APPEND) - modestr = "a"; - else - modestr = "w"; - break; - default: - if (flags & O_APPEND) - modestr = "a+"; - else - modestr = "r+"; - break; - } - - f = fdopen(fd, modestr); - if (f == NULL) { - printf("fdopen failed, modestr = '%s'\n", modestr); - if (errno == 0) - errno = ENOMEM; - return NULL; - } - - ib = g_malloc (sizeof (ibex)); - ib->dirty = FALSE; - ib->path = g_strdup (file); - ib->files = g_tree_new (strcmp); - ib->words = g_hash_table_new (g_str_hash, g_str_equal); - ib->oldfiles = g_ptr_array_new (); - - if (!f) - return ib; - - /* Check version. If its empty, then we have just created it */ - if (fread (vbuf, 1, sizeof (vbuf), f) != sizeof (vbuf)) { - if (feof (f)) { - return ib; - } - } - if (strncmp (vbuf, IBEX_VERSION, sizeof (vbuf) != 0)) { - errno = EINVAL; - goto errout; - } - - /* Read list of files. */ - nfiles = read_number (f); - ibfs = g_malloc (nfiles * sizeof (ibex_file *)); - lastword = NULL; - for (i = 0; i < nfiles; i++) { - ibfs[i] = g_malloc (sizeof (ibex_file)); - ibfs[i]->name = get_compressed_word (f, &lastword); - if (!ibfs[i]->name) - goto errout; - ibfs[i]->index = 0; - g_tree_insert (ib->files, ibfs[i]->name, ibfs[i]); - } - - /* Read list of words. */ - nwords = read_number (f); - lastword = NULL; - for (i = 0; i < nwords; i++) { - word = get_compressed_word (f, &lastword); - if (!word) - goto errout; - - nrefs = read_number (f); - refs = g_ptr_array_new (); - g_ptr_array_set_size (refs, nrefs); - while (nrefs--) { - ref = read_number (f); - if (ref >= nfiles) - goto errout; - refs->pdata[nrefs] = ibfs[ref]; - } - - g_hash_table_insert (ib->words, word, refs); - } - - g_free (ibfs); - fclose (f); - return ib; - -errout: - - fclose (f); - g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL); - g_tree_destroy (ib->files); - g_hash_table_foreach (ib->words, free_word, NULL); - g_hash_table_destroy (ib->words); - g_ptr_array_free (ib->oldfiles, TRUE); - if (ibfs) - g_free (ibfs); - g_free (ib->path); - g_free (ib); - - return NULL; -} - -struct ibex_write_data { - unsigned long index; - FILE *f; - char *lastname; -}; - -/* This is an internal function to find the longest common initial - * prefix between the last-written word and the current word. - */ -static int -get_prefix (struct ibex_write_data *iwd, char *name) -{ - int i = 0; - if (iwd->lastname) { - while (!strncmp (iwd->lastname, name, i + 1)) - i++; - } - iwd->lastname = name; - return i; -} - -static gint -write_file (gpointer key, gpointer value, gpointer data) -{ - char *file = key; - ibex_file *ibf = value; - struct ibex_write_data *iwd = data; - int prefix; - - ibf->index = iwd->index++; - prefix = get_prefix (iwd, file); - fprintf (iwd->f, "%c%s", prefix, file + prefix); - fputc (0, iwd->f); - return FALSE; -} - -static void -store_word (gpointer key, gpointer value, gpointer data) -{ - GTree *wtree = data; - - g_tree_insert (wtree, key, value); -} - -static gint -write_word (gpointer key, gpointer value, gpointer data) -{ - char *word = key; - GPtrArray *refs = value; - struct ibex_write_data *iwd = data; - ibex_file *ibf; - int i, ind, prefix; - - for (i = ind = 0; i < refs->len; i++) { - ibf = g_ptr_array_index (refs, i); - if (ibf->index == -1) { - g_ptr_array_remove_index_fast (refs, i); - i--; - } else - ind++; - } - - if (ind != 0) { - prefix = get_prefix (iwd, word); - fprintf (iwd->f, "%c%s", prefix, word + prefix); - fputc (0, iwd->f); - - write_number (iwd->f, ind); - - for (i = 0; i < refs->len; i++) { - ibf = g_ptr_array_index (refs, i); - write_number (iwd->f, ibf->index); - } - } - return FALSE; -} - -/** - * ibex_write: Write an ibex out to disk. - * @ib: the ibex - * - * This writes an ibex to disk. - * - * Return value: 0 for success, -1 for failure (in which case errno - * is set). - **/ -int -ibex_write (ibex *ib) -{ - struct ibex_write_data iwd; - GTree *wtree; - char *tmpfile; - - tmpfile = g_strdup_printf ("%s~", ib->path); - iwd.f = fopen (tmpfile, "w"); - if (!iwd.f) { - if (errno == 0) - errno = ENOMEM; - g_free (tmpfile); - return -1; - } - - fputs (IBEX_VERSION, iwd.f); - if (ferror (iwd.f)) - goto lose; - - iwd.index = 0; - iwd.lastname = NULL; - write_number (iwd.f, g_tree_nnodes (ib->files)); - if (ferror (iwd.f)) - goto lose; - g_tree_traverse (ib->files, write_file, G_IN_ORDER, &iwd); - if (ferror (iwd.f)) - goto lose; - - iwd.lastname = NULL; - write_number (iwd.f, g_hash_table_size (ib->words)); - if (ferror (iwd.f)) - goto lose; - wtree = g_tree_new (strcmp); - g_hash_table_foreach (ib->words, store_word, wtree); - g_tree_traverse (wtree, write_word, G_IN_ORDER, &iwd); - g_tree_destroy (wtree); - if (ferror (iwd.f)) - goto lose; - - if (fclose (iwd.f) == 0 && rename (tmpfile, ib->path) == 0) { - g_free (tmpfile); - ib->dirty = FALSE; - return 0; - } - -lose: - unlink (tmpfile); - g_free (tmpfile); - return -1; -} - -/** - * ibex_close: Write out the ibex file (if it has changed) and free - * the data associated with it. - * @ib: the ibex - * - * If this ibex file has been modified since it was opened, this will - * call ibex_write() to write it out to disk. It will then free all data - * associated with the ibex. After calling ibex_close(), @ib will no - * longer be a valid ibex. - * - * Return value: 0 on success, -1 on an ibex_write() failure (in which - * case @ib will not be destroyed). - **/ -int -ibex_close (ibex *ib) -{ - ibex_file *ibf; - - if (ib->dirty && ibex_write (ib) == -1) - return -1; - - g_tree_traverse (ib->files, free_file, G_IN_ORDER, NULL); - g_tree_destroy (ib->files); - g_hash_table_foreach (ib->words, free_word, NULL); - g_hash_table_destroy (ib->words); - - while (ib->oldfiles->len) { - ibf = g_ptr_array_remove_index (ib->oldfiles, 0); - g_free (ibf->name); - g_free (ibf); - } - g_ptr_array_free (ib->oldfiles, TRUE); - g_free (ib->path); - g_free (ib); - - return 0; -} - -static gint -free_file (gpointer key, gpointer value, gpointer data) -{ - ibex_file *ibf = value; - - g_free (ibf->name); - g_free (ibf); - return FALSE; -} - -static void -free_word (gpointer key, gpointer value, gpointer data) -{ - g_free (key); - g_ptr_array_free (value, TRUE); -} - -static char * -get_compressed_word (FILE *f, char **lastword) -{ - char *buf, *p; - int c, size; - - c = getc (f); - if (c == EOF) - return NULL; - - size = c + 10; - buf = g_malloc (size); - if (*lastword) - strncpy (buf, *lastword, c); - p = buf + c; - do { - c = getc (f); - if (c == EOF) - return NULL; - if (p == buf + size) { - buf = g_realloc (buf, size + 10); - p = buf + size; - size += 10; - } - *p++ = c; - } while (c != 0); - - *lastword = buf; - return buf; -} - -static void -write_number (FILE *f, unsigned long number) -{ - int i, flag = 0; - char buf[4]; - - i = 4; - do { - buf[--i] = (number & 0x7F) | flag; - number = number >> 7; - flag = 0x80; - } while (number != 0); - - fwrite (buf + i, 1, 4 - i, f); -} - -static unsigned long -read_number (FILE *f) -{ - int byte; - unsigned long num; - - num = 0; - do { - byte = getc (f); - num = num << 7 | (byte & 0x7F); - } while (byte & 0x80); - - return num; -} - |