aboutsummaryrefslogtreecommitdiffstats
path: root/libibex
diff options
context:
space:
mode:
Diffstat (limited to 'libibex')
-rw-r--r--libibex/.cvsignore7
-rw-r--r--libibex/COPYING340
-rw-r--r--libibex/ChangeLog9
-rw-r--r--libibex/Makefile30
-rw-r--r--libibex/TODO61
-rw-r--r--libibex/file.c383
-rw-r--r--libibex/find.c100
-rw-r--r--libibex/ibex.h74
-rw-r--r--libibex/ibex_internal.h26
-rw-r--r--libibex/index.c101
-rw-r--r--libibex/lookup.c74
-rw-r--r--libibex/mkindex.c75
-rw-r--r--libibex/words.c263
13 files changed, 0 insertions, 1543 deletions
diff --git a/libibex/.cvsignore b/libibex/.cvsignore
deleted file mode 100644
index fd6b811c68..0000000000
--- a/libibex/.cvsignore
+++ /dev/null
@@ -1,7 +0,0 @@
-.deps
-Makefile
-Makefile.in
-.libs
-.deps
-*.lo
-*.la
diff --git a/libibex/COPYING b/libibex/COPYING
deleted file mode 100644
index d60c31a97a..0000000000
--- a/libibex/COPYING
+++ /dev/null
@@ -1,340 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- GNU GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term "modification".) Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
- 1. You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
- 2. You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that in
- whole or in part contains or is derived from the Program or any
- part thereof, to be licensed as a whole at no charge to all third
- parties under the terms of this License.
-
- c) If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you provide
- a warranty) and that users may redistribute the program under
- these conditions, and telling the user how to view a copy of this
- License. (Exception: if the Program itself is interactive but
- does not normally print such an announcement, your work based on
- the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of Sections
- 1 and 2 above on a medium customarily used for software interchange; or,
-
- b) Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
- 4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
- 5. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
- 6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
- 7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
- 8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
- 9. The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and "any
-later version", you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
- 10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
- NO WARRANTY
-
- 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
- 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) year name of author
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, the commands you use may
-be called something other than `show w' and `show c'; they could even be
-mouse-clicks or menu items--whatever suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the program, if
-necessary. Here is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
- <signature of Ty Coon>, 1 April 1989
- Ty Coon, President of Vice
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General
-Public License instead of this License.
diff --git a/libibex/ChangeLog b/libibex/ChangeLog
deleted file mode 100644
index 4aa2d91b0e..0000000000
--- a/libibex/ChangeLog
+++ /dev/null
@@ -1,9 +0,0 @@
-2000-02-14 NotZed <notzed@zedzone.helixcode.com>
-
- * Makefile: Hack together a build using libtool. This should all
- be auto*'d at some point I guess.
-
-2000-02-13 NotZed <notzed@zedzone.helixcode.com>
-
- * Added ChangeLog file.
-
diff --git a/libibex/Makefile b/libibex/Makefile
deleted file mode 100644
index 8cc2c1b873..0000000000
--- a/libibex/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-OBJS=file.lo index.lo find.lo words.lo
-MKINDEXOBJS=mkindex.lo
-LOOKUPOBJS=lookup.lo
-
-CFLAGS=${PROF} -g -Wall -Wstrict-prototypes -Wmissing-prototypes `glib-config --cflags` `unicode-config --cflags`
-LDFLAGS=${PROF}
-
-LIBTOOL=sh ../libtool
-
-all: libibex.la mkindex lookup
-
-%.lo: %.c
- $(LIBTOOL) --mode compile $(CC) $(CFLAGS) -c -o $@ $<
-
-libibex.la: ${OBJS}
- $(LIBTOOL) --mode link $(CC) -static $^ -o $@
-
-mkindex: ${MKINDEXOBJS} libibex.la
- ${LIBTOOL) --mode link $(CC) ${LDFLAGS} -o mkindex ${MKINDEXOBJS} libibex.la \
- `glib-config --libs` `unicode-config --libs`
-
-lookup: ${LOOKUPOBJS} libibex.la
- $(LIBTOOL) --mode link ${CC} ${LDFLAGS} -o lookup ${LOOKUPOBJS} libibex.la \
- `glib-config --libs` `unicode-config --libs`
-
-clean:
- rm -f ${OBJS} libibex.la
- rm -f ${MKINDEXOBJS} mkindex
- rm -f ${LOOKUPOBJS} lookup
- rm -f *.core *~ INDEX \ No newline at end of file
diff --git a/libibex/TODO b/libibex/TODO
deleted file mode 100644
index a087c8d1f3..0000000000
--- a/libibex/TODO
+++ /dev/null
@@ -1,61 +0,0 @@
-Stability
----------
-* ibex_open should never crash, and should never return NULL without
-errno being set. Should check for errors when reading.
-
-
-Performance
------------
-* Profiling, keep thinking about data structures, etc.
-
-* Check memory usage
-
-* See if writing the "inverse image" of long ref streams helps
-compression without hurting performance now. (ie, if a word appears in
-more than half of the files, write out the list of files it _doesn't_
-appear in). (I tried this before, and it wasn't working well, but the
-file format and data structures have changed a lot.)
-
-* We could save a noticeable chunk of time if normalize_word computed
-the hash of the word and then we could pass that into
-g_hash_table_insert somehow.
-
-* Make a copy of the buffer to be indexed (or provide interface for
-caller to say ibex can munge the provided data) and then use that
-rather than constantly copying things. ?
-
-
-Functionality
--------------
-* ibex file locking
-
-* specify file mode in ibex_open
-
-* ibex_find* need to normalize the search words... should this be done
-by the caller or by ibex_find?
-
-* Needs to be some way to do a secondary search after getting results
-back from ibex_find* (ie, for "foo near bar"). This either has to be
-done by ibex, or requires us to export the normalize interface.
-
-* Does there need to be an ibex_find_any, or is that easy enough for the
-caller to do?
-
-* utf8_trans needs to cover at least two more code pages. This is
-tricky because it's not clear whether some of the letters there should
-be translated to ASCII or left as UTF8. This requires some
-investigation.
-
-* ibex_index_* need to ignore HTML tags.
- NAME = [A-Za-z][A-Za-z0-9.-]*
- </?{NAME}(\s*{NAME}(\s*=\s*({NAME}|"[^"]*"|'[^']*')))*>
- <!(--([^-]*|-[^-])--\s*)*>
-
- ugh. ok, simplifying, we get:
- <[^!](([^"'>]*("[^"]*"|'[^']*'))*> or
- <!(--([^-]*|-[^-])--\s*)*>
-
- which is still not simple. sigh.
-
-* ibex_index_* need to recognize and ignore "non-text". Particularly
-BinHex and uuencoding.
diff --git a/libibex/file.c b/libibex/file.c
deleted file mode 100644
index e9eaab7f51..0000000000
--- a/libibex/file.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-
-/* file.c: index file read/write ops */
-
-#include <ctype.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "ibex_internal.h"
-
-static unsigned long read_number(FILE *f);
-static void write_number(FILE *f, unsigned long n);
-static char *get_compressed_word(FILE *f, char **lastword);
-
-static gint free_file(gpointer key, gpointer value, gpointer data);
-static void free_word(gpointer key, gpointer value, gpointer data);
-
-/* The file format is:
- *
- * version string (currently "ibex1")
- * file count
- * list of compressed filenames, separated by \0
- * word count
- * list of compressed words, each followed by \0, a count, and that
- * many references.
- *
- * All numbers are stored 7-bit big-endian, with the high bit telling
- * whether or not the number continues to the next byte.
- *
- * compressed text consists of a byte telling how many characters the
- * line has in common with the line before it, followed by the rest of
- * the string. Obviously this only really works if the lists are sorted.
- */
-ibex *
-ibex_open(char *file, gboolean create)
-{
- ibex *ib;
- FILE *f;
- char vbuf[sizeof(IBEX_VERSION) - 1];
- char *word, *lastword;
- unsigned long nfiles, nwords, nrefs, ref;
- ibex_file **ibfs = NULL;
- int i;
- GPtrArray *refs;
-
- f = fopen(file, "r");
- if (!f && (errno != ENOENT || !create))
- {
- if (errno == 0)
- errno = ENOMEM;
- return NULL;
- }
-
- ib = g_malloc(sizeof(ibex));
- ib->dirty = FALSE;
- ib->path = g_strdup(file);
- ib->files = g_tree_new(strcmp);
- ib->words = g_hash_table_new(g_str_hash, g_str_equal);
- ib->oldfiles = g_ptr_array_new();
-
- if (!f)
- return ib;
-
- /* Check version. */
- if (fread(vbuf, 1, sizeof(vbuf), f) != sizeof(vbuf))
- {
- if (feof(f))
- errno = EINVAL;
- goto errout;
- }
- if (strncmp(vbuf, IBEX_VERSION, sizeof(vbuf) != 0))
- {
- errno = EINVAL;
- goto errout;
- }
-
- /* Read list of files. */
- nfiles = read_number(f);
- ibfs = g_malloc(nfiles * sizeof(ibex_file *));
- lastword = NULL;
- for (i = 0; i < nfiles; i++)
- {
- ibfs[i] = g_malloc(sizeof(ibex_file));
- ibfs[i]->name = get_compressed_word(f, &lastword);
- if (!ibfs[i]->name)
- goto errout;
- ibfs[i]->index = 0;
- g_tree_insert(ib->files, ibfs[i]->name, ibfs[i]);
- }
-
- /* Read list of words. */
- nwords = read_number(f);
- lastword = NULL;
- for (i = 0; i < nwords; i++)
- {
- word = get_compressed_word(f, &lastword);
- if (!word)
- goto errout;
-
- nrefs = read_number(f);
- refs = g_ptr_array_new();
- g_ptr_array_set_size(refs, nrefs);
- while (nrefs--)
- {
- ref = read_number(f);
- if (ref >= nfiles)
- goto errout;
- refs->pdata[nrefs] = ibfs[ref];
- }
-
- g_hash_table_insert(ib->words, word, refs);
- }
-
- g_free(ibfs);
- fclose(f);
- return ib;
-
-errout:
- fclose(f);
- g_tree_traverse(ib->files, free_file, G_IN_ORDER, NULL);
- g_tree_destroy(ib->files);
- g_hash_table_foreach(ib->words, free_word, NULL);
- g_hash_table_destroy(ib->words);
- g_ptr_array_free(ib->oldfiles, TRUE);
- if (ibfs)
- g_free(ibfs);
- g_free(ib->path);
- g_free(ib);
-
- return NULL;
-}
-
-struct ibex_write_data {
- unsigned long index;
- FILE *f;
- char *lastname;
-};
-
-static int
-get_prefix(struct ibex_write_data *iwd, char *name)
-{
- int i = 0;
- if (iwd->lastname)
- {
- while (!strncmp(iwd->lastname, name, i + 1))
- i++;
- }
- iwd->lastname = name;
- return i;
-}
-
-static gint
-write_file(gpointer key, gpointer value, gpointer data)
-{
- char *file = key;
- ibex_file *ibf = value;
- struct ibex_write_data *iwd = data;
- int prefix;
-
- ibf->index = iwd->index++;
- prefix = get_prefix(iwd, file);
- fprintf(iwd->f, "%c%s", prefix, file + prefix);
- fputc(0, iwd->f);
- return FALSE;
-}
-
-static void
-store_word(gpointer key, gpointer value, gpointer data)
-{
- GTree *wtree = data;
-
- g_tree_insert(wtree, key, value);
-}
-
-static gint
-write_word(gpointer key, gpointer value, gpointer data)
-{
- char *word = key;
- GPtrArray *refs = value;
- struct ibex_write_data *iwd = data;
- ibex_file *ibf;
- int i, ind, prefix;
-
- for (i = ind = 0; i < refs->len; i++)
- {
- ibf = g_ptr_array_index(refs, i);
- if (ibf->index == -1)
- {
- g_ptr_array_remove_index_fast(refs, i);
- i--;
- }
- else
- ind++;
- }
-
- if (ind != 0)
- {
- prefix = get_prefix(iwd, word);
- fprintf(iwd->f, "%c%s", prefix, word + prefix);
- fputc(0, iwd->f);
-
- write_number(iwd->f, ind);
-
- for (i = 0; i < refs->len; i++)
- {
- ibf = g_ptr_array_index(refs, i);
- write_number(iwd->f, ibf->index);
- }
- }
- return FALSE;
-}
-
-int
-ibex_write(ibex *ib)
-{
- struct ibex_write_data iwd;
- GTree *wtree;
- char *tmpfile;
-
- tmpfile = g_strdup_printf("%s~", ib->path);
- iwd.f = fopen(tmpfile, "w");
- if (!iwd.f)
- {
- if (errno == 0)
- errno = ENOMEM;
- g_free(tmpfile);
- return -1;
- }
-
- fputs(IBEX_VERSION, iwd.f);
- if (ferror(iwd.f))
- goto lose;
-
- iwd.index = 0;
- iwd.lastname = NULL;
- write_number(iwd.f, g_tree_nnodes(ib->files));
- if (ferror(iwd.f))
- goto lose;
- g_tree_traverse(ib->files, write_file, G_IN_ORDER, &iwd);
- if (ferror(iwd.f))
- goto lose;
-
- iwd.lastname = NULL;
- write_number(iwd.f, g_hash_table_size(ib->words));
- if (ferror(iwd.f))
- goto lose;
- wtree = g_tree_new(strcmp);
- g_hash_table_foreach(ib->words, store_word, wtree);
- g_tree_traverse(wtree, write_word, G_IN_ORDER, &iwd);
- g_tree_destroy(wtree);
- if (ferror(iwd.f))
- goto lose;
-
- if (fclose(iwd.f) == 0 && rename(tmpfile, ib->path) == 0)
- {
- g_free(tmpfile);
- ib->dirty = FALSE;
- return 0;
- }
-
-lose:
- unlink(tmpfile);
- g_free(tmpfile);
- return -1;
-}
-
-int
-ibex_close(ibex *ib)
-{
- ibex_file *ibf;
-
- if (ib->dirty && ibex_write(ib) == -1)
- return -1;
-
- g_tree_traverse(ib->files, free_file, G_IN_ORDER, NULL);
- g_tree_destroy(ib->files);
- g_hash_table_foreach(ib->words, free_word, NULL);
- g_hash_table_destroy(ib->words);
-
- while (ib->oldfiles->len)
- {
- ibf = g_ptr_array_remove_index(ib->oldfiles, 0);
- g_free(ibf->name);
- g_free(ibf);
- }
- g_ptr_array_free(ib->oldfiles, TRUE);
- g_free(ib->path);
- g_free(ib);
-
- return 0;
-}
-
-static gint
-free_file(gpointer key, gpointer value, gpointer data)
-{
- ibex_file *ibf = value;
-
- g_free(ibf->name);
- g_free(ibf);
- return FALSE;
-}
-
-static void
-free_word(gpointer key, gpointer value, gpointer data)
-{
- g_free(key);
- g_ptr_array_free(value, TRUE);
-}
-
-static char *
-get_compressed_word(FILE *f, char **lastword)
-{
- char *buf, *p;
- int c, size;
-
- c = getc(f);
- if (c == EOF)
- return NULL;
-
- size = c + 10;
- buf = g_malloc(size);
- if (*lastword)
- strncpy(buf, *lastword, c);
- p = buf + c;
- do
- {
- c = getc(f);
- if (c == EOF)
- return NULL;
- if (p == buf + size)
- {
- buf = g_realloc(buf, size + 10);
- p = buf + size;
- size += 10;
- }
- *p++ = c;
- }
- while (c != 0);
-
- *lastword = buf;
- return buf;
-}
-
-static void
-write_number(FILE *f, unsigned long number)
-{
- int i, flag = 0;
- char buf[4];
-
- i = 4;
- do
- {
- buf[--i] = (number & 0x7F) | flag;
- number = number >> 7;
- flag = 0x80;
- }
- while (number != 0);
-
- fwrite(buf + i, 1, 4 - i, f);
-}
-
-static unsigned long
-read_number(FILE *f)
-{
- int byte;
- unsigned long num;
-
- num = 0;
- do
- {
- byte = getc(f);
- num = num << 7 | (byte & 0x7F);
- }
- while (byte & 0x80);
-
- return num;
-}
-
diff --git a/libibex/find.c b/libibex/find.c
deleted file mode 100644
index 0937331a83..0000000000
--- a/libibex/find.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-
-/* find.c: index file searching ops */
-
-#include <string.h>
-
-#include "ibex_internal.h"
-
-GPtrArray *
-ibex_find(ibex *ib, char *word)
-{
- GPtrArray *refs, *ret;
- ibex_file *ibf;
- int i;
-
- ret = g_ptr_array_new();
- refs = g_hash_table_lookup(ib->words, word);
- if (refs)
- {
- for (i = 0; i < refs->len; i++)
- {
- ibf = g_ptr_array_index(refs, i);
- g_ptr_array_add(ret, ibf->name);
- }
- }
- return ret;
-}
-
-static gint
-build_array(gpointer key, gpointer value, gpointer data)
-{
- char *name = key;
- unsigned int count = GPOINTER_TO_UINT(value);
- GPtrArray *ret = data;
-
- if (count == 1)
- g_ptr_array_add(ret, name);
- return FALSE;
-}
-
-GPtrArray *
-ibex_find_all(ibex *ib, GPtrArray *words)
-{
- GTree *work;
- GPtrArray *wrefs, *ret;
- int i, j, count;
- char *word;
- ibex_file *ibf;
-
- if (words->len == 0)
- return g_ptr_array_new();
- else if (words->len == 1)
- return ibex_find(ib, g_ptr_array_index(words, 0));
-
- work = g_tree_new(strcmp);
- for (i = 0; i < words->len; i++)
- {
- word = g_ptr_array_index(words, i);
- wrefs = g_hash_table_lookup(ib->words, word);
- if (!wrefs)
- {
- /* One of the words isn't even in the index. */
- g_tree_destroy(work);
- return g_ptr_array_new();
- }
-
- if (i == 0)
- {
- /* Copy the references into a tree, using the filenames as
- * keys and the size of words as the value.
- */
- for (j = 0; j < wrefs->len; j++)
- {
- ibf = g_ptr_array_index(wrefs, j);
- g_tree_insert(work, ibf->name, GUINT_TO_POINTER(words->len));
- }
- }
- else
- {
- /* Increment the counts in the working tree for the references
- * for this word.
- */
- for (j = 0; j < wrefs->len; j++)
- {
- ibf = g_ptr_array_index(wrefs, j);
- count = GPOINTER_TO_UINT(g_tree_lookup(work, ibf->name));
- if (count)
- g_tree_insert(work, ibf->name, GUINT_TO_POINTER(count - 1));
- }
- }
- }
-
- /* Build an array with the refs that contain all the words. */
- ret = g_ptr_array_new();
- g_tree_traverse(work, build_array, G_IN_ORDER, ret);
- g_tree_destroy(work);
- return ret;
-}
diff --git a/libibex/ibex.h b/libibex/ibex.h
deleted file mode 100644
index e430fc700d..0000000000
--- a/libibex/ibex.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-
-#ifndef _IBEX_H
-#define _IBEX_H
-
-#include <sys/types.h>
-#include <glib.h>
-
-struct ibex;
-typedef struct ibex ibex;
-
-/* All functions that can fail set errno and return NULL or -1 on
- * failure.
- */
-
-/* Open the named ibex index file. If CREATE is true, create the file
- * if it doesn't already exist.
- */
-ibex *ibex_open(char *file, gboolean create);
-
-/* Write the ibex to disk. */
-int ibex_write(ibex *ib);
-
-/* Write the ibex to disk if it has changed, and free all memory
- * associated with it.
- */
-int ibex_close(ibex *ib);
-
-
-
-/* Index the named file. (If the FILENAME is already in the index,
- * remove the old copy.
- */
-int ibex_index_file(ibex *ib, char *filename);
-
-/* Index LEN bytes off FD, using NAME as the filename in the index.
- * (If NAME already exists in the index, this adds more data to it.)
- */
-int ibex_index_fd(ibex *ib, char *name, int fd, size_t len);
-
-/* Like ibex_index_fd, but with a buffer rather than a file descriptor.
- * The buffer does not need to be '\0'-terminated. If UNREAD is not
- * NULL, then the indexer won't assume that the buffer ends on a word
- * boundary, and will return (in UNREAD) the number of bytes from the
- * end of the buffer that it didn't use, if any.
- */
-int ibex_index_buffer(ibex *ib, char *name, char *buffer,
- size_t len, size_t *unread);
-
-/* Remove entries for a given file from the index. (Most of the removal
- * isn't actually done until the file is written out to disk, so this
- * is very fast.)
- */
-void ibex_unindex(ibex *ib, char *name);
-
-/* Rename a file in the index. (This is also fast.) */
-void ibex_rename(ibex *ib, char *oldfilename, char *newfilename);
-
-
-
-/* Find a word in the index. Returns an array of strings: the caller
- * should free the array, but should not free or modify the strings.
- */
-GPtrArray *ibex_find(ibex *ib, char *word);
-
-/* Return all the files containing all of the words in the given
- * array. Returned data is like with ibex_find.
- */
-GPtrArray *ibex_find_all(ibex *ib, GPtrArray *words);
-
-#endif /* ! _IBEX_H */
-
diff --git a/libibex/ibex_internal.h b/libibex/ibex_internal.h
deleted file mode 100644
index 1aabfb98c0..0000000000
--- a/libibex/ibex_internal.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-#include <glib.h>
-
-#include "ibex.h"
-
-#define IBEX_VERSION "ibex1"
-
-struct ibex {
- char *path;
- GTree *files;
- GHashTable *words;
- GPtrArray *oldfiles;
- gboolean dirty;
-};
-
-struct ibex_file {
- char *name;
- long index;
-};
-typedef struct ibex_file ibex_file;
-
-gint ibex__strcmp(gconstpointer a, gconstpointer b);
-
-#define IBEX_BUFSIZ 1024
diff --git a/libibex/index.c b/libibex/index.c
deleted file mode 100644
index aef891182e..0000000000
--- a/libibex/index.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-/* index.c: high-level indexing ops */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "ibex_internal.h"
-
-/* Index a file, given its name. (Replace any previously indexed contents
- * of this file with the new contents.)
- */
-int
-ibex_index_file(ibex *ib, char *filename)
-{
- int fd;
- int status;
- struct stat st;
-
- fd = open(filename, O_RDONLY);
- if (fd < 0)
- return -1;
-
- if (fstat(fd, &st) == -1)
- {
- close(fd);
- return -1;
- }
- if (!S_ISREG(st.st_mode))
- {
- close(fd);
- errno = EINVAL;
- return -1;
- }
-
- ibex_unindex(ib, filename);
- status = ibex_index_fd(ib, filename, fd, st.st_size);
- close(fd);
- return status;
-}
-
-/* Given a file descriptor and a name to refer to it by, index LEN
- * bytes of data from it.
- */
-int
-ibex_index_fd(ibex *ib, char *name, int fd, size_t len)
-{
- char *buf;
- int off = 0, nread, status;
-
- buf = g_malloc(len);
- do
- {
- nread = read(fd, buf + off, len - off);
- if (nread == -1)
- {
- g_free(buf);
- return -1;
- }
- off += nread;
- }
- while (off != len);
-
- status = ibex_index_buffer(ib, name, buf, len, NULL);
- g_free(buf);
-
- return status;
-}
-
-void
-ibex_unindex(ibex *ib, char *name)
-{
- ibex_file *ibf;
-
- ibf = g_tree_lookup(ib->files, name);
- if (ibf)
- {
- ibf->index = -1;
- g_tree_remove(ib->files, name);
- g_ptr_array_add(ib->oldfiles, ibf);
- }
-}
-
-void
-ibex_rename(ibex *ib, char *oldname, char *newname)
-{
- ibex_file *ibf;
-
- ibf = g_tree_lookup(ib->files, oldname);
- if (ibf)
- {
- g_tree_remove(ib->files, oldname);
- g_free(ibf->name);
- ibf->name = g_strdup(newname);
- g_tree_insert(ib->files, ibf->name, ibf);
- }
-}
diff --git a/libibex/lookup.c b/libibex/lookup.c
deleted file mode 100644
index d6bdec06a5..0000000000
--- a/libibex/lookup.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-
-/* lookup.c: a simple client, part 2 */
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "ibex.h"
-
-extern int optind;
-extern char *optarg;
-
-static void
-usage(void)
-{
- fprintf(stderr, "Usage: lookup [-f indexfile] word ...\n");
- exit(1);
-}
-
-int
-main(int argc, char **argv)
-{
- ibex *ib;
- GPtrArray *ans, *words;
- int opt, i;
- char *file = "INDEX";
-
- while ((opt = getopt(argc, argv, "f:")) != -1)
- {
- switch (opt)
- {
- case 'f':
- file = optarg;
- break;
-
- default:
- usage();
- break;
- }
- }
- argc -= optind;
- argv += optind;
-
- if (argc == 0)
- usage();
-
- ib = ibex_open(file, FALSE);
- if (!ib)
- {
- printf("Couldn't open %s: %s\n", file, strerror(errno));
- exit(1);
- }
-
- words = g_ptr_array_new();
- while (argc--)
- g_ptr_array_add(words, argv[argc]);
-
- ans = ibex_find_all(ib, words);
- if (ans)
- {
- for (i = 0; i < ans->len; i++)
- printf("%s\n", (char *)g_ptr_array_index(ans, i));
- exit(0);
- }
- else
- {
- printf("Nope.\n");
- exit(1);
- }
-}
diff --git a/libibex/mkindex.c b/libibex/mkindex.c
deleted file mode 100644
index 8018caef69..0000000000
--- a/libibex/mkindex.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-/* mkindex.c: a simple client, part 1 */
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "ibex.h"
-
-extern int optind;
-extern char *optarg;
-
-static void
-usage(void)
-{
- fprintf(stderr, "Usage: mkindex [-f indexfile] file ...\n");
- exit(1);
-}
-
-int
-main(int argc, char **argv)
-{
- ibex *ib;
- int opt;
- char *file = "INDEX";
-
- while ((opt = getopt(argc, argv, "f:")) != -1)
- {
- switch (opt)
- {
- case 'f':
- file = optarg;
- break;
-
- default:
- usage();
- break;
- }
- }
- argc -= optind;
- argv += optind;
-
- if (argc == 0)
- usage();
-
- ib = ibex_open(file, TRUE);
- if (!ib)
- {
- fprintf(stderr, "Couldn't open index file %s: %s\n",
- file, strerror(errno));
- exit(1);
- }
-
- while (argc--)
- {
- if (ibex_index_file(ib, argv[argc]) == -1)
- {
- fprintf(stderr, "Couldn't index %s: %s\n", argv[argc],
- strerror(errno));
- exit(1);
- }
- }
-
-
- if (ibex_close(ib) != 0)
- {
- fprintf(stderr, "Failed to write index file %s: %s\n",
- file, strerror(errno));
- exit(1);
- }
- exit(0);
-}
diff --git a/libibex/words.c b/libibex/words.c
deleted file mode 100644
index 4776634251..0000000000
--- a/libibex/words.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- Copyright 2000 Helix Code Inc.
-*/
-/* words.c: low-level indexing ops */
-
-#include <ctype.h>
-#include <errno.h>
-#include <string.h>
-
-#include <unicode.h>
-
-#include "ibex_internal.h"
-
-static signed char utf8_trans[] = {
- 'A', 'A', 'A', 'A', 'A', 'A', -1, 'C', 'E', 'E', 'E', 'E', 'I', 'I',
- 'I', 'I', -2, 'N', 'O', 'O', 'O', 'O', 'O', '*', 'O', 'U', 'U', 'U',
- 'U', 'Y', -3, -4, 'a', 'a', 'a', 'a', 'a', 'a', -5, 'c', 'e', 'e',
- 'e', 'e', 'i', 'i', 'i', 'i', -6, 'n', 'o', 'o', 'o', 'o', 'o', '/',
- 'o', 'u', 'u', 'u', 'u', 'y', -7, 'y', 'A', 'a', 'A', 'a', 'A', 'a',
- 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e',
- 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g',
- 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i',
- 'I', 'i', -8, -9, 'J', 'j', 'K', 'k', 'k', 'L', 'l', 'L', 'l', 'L',
- 'l', 'L', 'l', 'L', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', -10, -11,
- 'O', 'o', 'O', 'o', 'O', 'o', -12, -13, 'R', 'r', 'R', 'r', 'R', 'r',
- 'S', 'r', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't',
- 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w',
- 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's'
-};
-
-static char *utf8_long_trans[] = {
- "AE", "TH", "TH", "ss", "ae", "th", "th", "IJ", "ij", "NG", "ng", "OE", "oe"
-};
-
-/* This is a bit weird. It takes pointers to the start and end (actually
- * just past the end) of a UTF-8-encoded word, and a buffer at least 1
- * byte longer than the length of the word. It copies the word into the
- * buffer in all lowercase without accents, and splits up ligatures.
- * (Since any ligature would be a multi-byte character in UTF-8, splitting
- * them into two US-ASCII characters won't overrun the buffer.)
- *
- * It is not safe to call this routine with bad UTF-8.
- */
-static void
-normalize_word(char *start, char *end, char *buf)
-{
- unsigned char *s, *d;
- unicode_char_t uc;
-
- s = (unsigned char *)start;
- d = (unsigned char *)buf;
- while (s < (unsigned char *)end)
- {
- if (*s < 0x80)
- {
- /* US-ASCII character: copy unless it's an apostrophe. */
- if (*s != '\'')
- *d++ = tolower(*s);
- s++;
- }
- else
- {
- char *next = unicode_get_utf8(s, &uc);
- if (uc >= 0xc0 && uc < 0xc0 + sizeof(utf8_trans))
- {
- signed char ch = utf8_trans[uc - 0xc0];
- if (ch > 0)
- *d++ = tolower(ch);
- else
- {
- *d++ = tolower(utf8_long_trans[-ch - 1][0]);
- *d++ = tolower(utf8_long_trans[-ch - 1][1]);
- }
- s = next;
- }
- else
- {
- while (s < (unsigned char *)next)
- *d++ = *s++;
- }
- }
- }
- *d = '\0';
-}
-
-enum { IBEX_ALPHA, IBEX_NONALPHA, IBEX_INVALID, IBEX_INCOMPLETE };
-
-/* This incorporates parts of libunicode, because there's no way to
- * force libunicode to not read past a certain point.
- */
-static int
-utf8_category(char *sp, char **snp, char *send)
-{
- unsigned char *p = (unsigned char *)sp, **np = (unsigned char **)snp;
- unsigned char *end = (unsigned char *)send;
-
- if (isascii(*p))
- {
- *np = p + 1;
- if (isalpha(*p) || *p == '\'')
- return IBEX_ALPHA;
- return IBEX_NONALPHA;
- }
- else
- {
- unicode_char_t uc;
- int more;
-
- if ((*p & 0xe0) == 0xc0)
- {
- more = 1;
- uc = *p & 0x1f;
- }
- else if ((*p & 0xf0) == 0xe0)
- {
- more = 2;
- uc = *p & 0x0f;
- }
- else if ((*p & 0xf8) == 0xf0)
- {
- more = 3;
- uc = *p & 0x07;
- }
- else if ((*p & 0xfc) == 0xf8)
- {
- more = 4;
- uc = *p & 0x03;
- }
- else if ((*p & 0xfe) == 0xfc)
- {
- more = 5;
- uc = *p & 0x01;
- }
- else
- return IBEX_INVALID;
-
- if (p + more > end)
- return IBEX_INCOMPLETE;
-
- while (more--)
- {
- if ((*++p & 0xc0) != 0x80)
- return IBEX_INVALID;
- uc <<= 6;
- uc |= *p & 0x3f;
- }
-
- *np = p + 1;
- if (unicode_isalpha(uc))
- return IBEX_ALPHA;
- else
- return IBEX_NONALPHA;
- }
-}
-
-static ibex_file *
-get_ibex_file(ibex *ib, char *name)
-{
- ibex_file *ibf;
-
- ibf = g_tree_lookup(ib->files, name);
- if (!ibf)
- {
- ibf = g_malloc(sizeof(ibex_file));
- ibf->name = strdup(name);
- ibf->index = 0;
- g_tree_insert(ib->files, ibf->name, ibf);
- ib->dirty = TRUE;
- }
- return ibf;
-}
-
-static void
-ref_word(ibex *ib, ibex_file *ibf, char *word)
-{
- GPtrArray *refs;
-
- refs = g_hash_table_lookup(ib->words, word);
- if (!refs)
- {
- refs = g_ptr_array_new();
- g_hash_table_insert(ib->words, g_strdup(word), refs);
- g_ptr_array_add(refs, ibf);
- ib->dirty = TRUE;
- }
- else if (g_ptr_array_index(refs, refs->len - 1) != ibf)
- {
- g_ptr_array_add(refs, ibf);
- ib->dirty = TRUE;
- }
-}
-
-int
-ibex_index_buffer(ibex *ib, char *name, char *buffer,
- size_t len, size_t *unread)
-{
- char *p, *q, *nq, *end, *word;
- ibex_file *ibf = get_ibex_file(ib, name);
- int wordsiz, cat;
-
- end = buffer + len;
- wordsiz = 20;
- word = g_malloc(wordsiz);
-
- p = buffer;
- while (p < end)
- {
- while (p < end)
- {
- cat = utf8_category(p, &q, end);
- if (cat != IBEX_NONALPHA)
- break;
- p = q;
- }
- if (p == end)
- {
- g_free(word);
- return 0;
- }
- else if (cat == IBEX_INVALID)
- {
- errno = EINVAL;
- g_free(word);
- return -1;
- }
- else if (cat == IBEX_INCOMPLETE)
- q = end;
-
- while (q < end)
- {
- cat = utf8_category(q, &nq, end);
- if (cat != IBEX_ALPHA)
- break;
- q = nq;
- }
- if (cat == IBEX_INVALID || (cat == IBEX_INCOMPLETE && !unread))
- {
- errno = EINVAL;
- g_free(word);
- return -1;
- }
- else if (cat == IBEX_INCOMPLETE || (q == end && unread))
- {
- *unread = end - p;
- g_free(word);
- return 0;
- }
-
- if (wordsiz < q - p + 1)
- {
- wordsiz = q - p + 1;
- word = g_realloc(word, wordsiz);
- }
- normalize_word(p, q, word);
- ref_word(ib, ibf, word);
- p = q;
- }
-
- if (unread)
- *unread = 0;
- g_free(word);
- return 0;
-}