aboutsummaryrefslogtreecommitdiffstats
path: root/libibex
diff options
context:
space:
mode:
Diffstat (limited to 'libibex')
-rw-r--r--libibex/.cvsignore9
-rw-r--r--libibex/COPYING.LIB481
-rw-r--r--libibex/ChangeLog390
-rw-r--r--libibex/Makefile.am40
-rw-r--r--libibex/TODO61
-rw-r--r--libibex/block.c595
-rw-r--r--libibex/block.h114
-rw-r--r--libibex/diskarray.c255
-rw-r--r--libibex/disktail.c818
-rw-r--r--libibex/dumpindex.c63
-rw-r--r--libibex/hash.c859
-rw-r--r--libibex/ibex.h106
-rw-r--r--libibex/ibex_block.c345
-rw-r--r--libibex/ibex_db.c512
-rw-r--r--libibex/ibex_internal.h51
-rw-r--r--libibex/index.h103
-rw-r--r--libibex/lookup.c83
-rw-r--r--libibex/mkindex.c84
-rw-r--r--libibex/testindex.c200
-rw-r--r--libibex/wordindex.c646
-rw-r--r--libibex/wordindex.h74
-rw-r--r--libibex/wordindexmem.c889
22 files changed, 0 insertions, 6778 deletions
diff --git a/libibex/.cvsignore b/libibex/.cvsignore
deleted file mode 100644
index b0d27ba734..0000000000
--- a/libibex/.cvsignore
+++ /dev/null
@@ -1,9 +0,0 @@
-.deps
-Makefile
-Makefile.in
-.libs
-.deps
-*.lo
-*.la
-testindex
-dumpindex
diff --git a/libibex/COPYING.LIB b/libibex/COPYING.LIB
deleted file mode 100644
index eb685a5ec9..0000000000
--- a/libibex/COPYING.LIB
+++ /dev/null
@@ -1,481 +0,0 @@
- GNU LIBRARY GENERAL PUBLIC LICENSE
- Version 2, June 1991
-
- Copyright (C) 1991 Free Software Foundation, Inc.
- 675 Mass Ave, Cambridge, MA 02139, USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-[This is the first released version of the library GPL. It is
- numbered 2 because it goes with version 2 of the ordinary GPL.]
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-Licenses are intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-
- This license, the Library General Public License, applies to some
-specially designated Free Software Foundation software, and to any
-other libraries whose authors decide to use it. You can use it for
-your libraries, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if
-you distribute copies of the library, or if you modify it.
-
- For example, if you distribute copies of the library, whether gratis
-or for a fee, you must give the recipients all the rights that we gave
-you. You must make sure that they, too, receive or can get the source
-code. If you link a program with the library, you must provide
-complete object files to the recipients so that they can relink them
-with the library, after making changes to the library and recompiling
-it. And you must show them these terms so they know their rights.
-
- Our method of protecting your rights has two steps: (1) copyright
-the library, and (2) offer you this license which gives you legal
-permission to copy, distribute and/or modify the library.
-
- Also, for each distributor's protection, we want to make certain
-that everyone understands that there is no warranty for this free
-library. If the library is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original
-version, so that any problems introduced by others will not reflect on
-the original authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that companies distributing free
-software will individually obtain patent licenses, thus in effect
-transforming the program into proprietary software. To prevent this,
-we have made it clear that any patent must be licensed for everyone's
-free use or not licensed at all.
-
- Most GNU software, including some libraries, is covered by the ordinary
-GNU General Public License, which was designed for utility programs. This
-license, the GNU Library General Public License, applies to certain
-designated libraries. This license is quite different from the ordinary
-one; be sure to read it in full, and don't assume that anything in it is
-the same as in the ordinary license.
-
- The reason we have a separate public license for some libraries is that
-they blur the distinction we usually make between modifying or adding to a
-program and simply using it. Linking a program with a library, without
-changing the library, is in some sense simply using the library, and is
-analogous to running a utility program or application program. However, in
-a textual and legal sense, the linked executable is a combined work, a
-derivative of the original library, and the ordinary General Public License
-treats it as such.
-
- Because of this blurred distinction, using the ordinary General
-Public License for libraries did not effectively promote software
-sharing, because most developers did not use the libraries. We
-concluded that weaker conditions might promote sharing better.
-
- However, unrestricted linking of non-free programs would deprive the
-users of those programs of all benefit from the free status of the
-libraries themselves. This Library General Public License is intended to
-permit developers of non-free programs to use free libraries, while
-preserving your freedom as a user of such programs to change the free
-libraries that are incorporated in them. (We have not seen how to achieve
-this as regards changes in header files, but we have achieved it as regards
-changes in the actual functions of the Library.) The hope is that this
-will lead to faster development of free libraries.
-
- The precise terms and conditions for copying, distribution and
-modification follow. Pay close attention to the difference between a
-"work based on the library" and a "work that uses the library". The
-former contains code derived from the library, while the latter only
-works together with the library.
-
- Note that it is possible for a library to be covered by the ordinary
-General Public License rather than by this special one.
-
- GNU LIBRARY GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License Agreement applies to any software library which
-contains a notice placed by the copyright holder or other authorized
-party saying it may be distributed under the terms of this Library
-General Public License (also called "this License"). Each licensee is
-addressed as "you".
-
- A "library" means a collection of software functions and/or data
-prepared so as to be conveniently linked with application programs
-(which use some of those functions and data) to form executables.
-
- The "Library", below, refers to any such software library or work
-which has been distributed under these terms. A "work based on the
-Library" means either the Library or any derivative work under
-copyright law: that is to say, a work containing the Library or a
-portion of it, either verbatim or with modifications and/or translated
-straightforwardly into another language. (Hereinafter, translation is
-included without limitation in the term "modification".)
-
- "Source code" for a work means the preferred form of the work for
-making modifications to it. For a library, complete source code means
-all the source code for all modules it contains, plus any associated
-interface definition files, plus the scripts used to control compilation
-and installation of the library.
-
- Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running a program using the Library is not restricted, and output from
-such a program is covered only if its contents constitute a work based
-on the Library (independent of the use of the Library in a tool for
-writing it). Whether that is true depends on what the Library does
-and what the program that uses the Library does.
-
- 1. You may copy and distribute verbatim copies of the Library's
-complete source code as you receive it, in any medium, provided that
-you conspicuously and appropriately publish on each copy an
-appropriate copyright notice and disclaimer of warranty; keep intact
-all the notices that refer to this License and to the absence of any
-warranty; and distribute a copy of this License along with the
-Library.
-
- You may charge a fee for the physical act of transferring a copy,
-and you may at your option offer warranty protection in exchange for a
-fee.
-
- 2. You may modify your copy or copies of the Library or any portion
-of it, thus forming a work based on the Library, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) The modified work must itself be a software library.
-
- b) You must cause the files modified to carry prominent notices
- stating that you changed the files and the date of any change.
-
- c) You must cause the whole of the work to be licensed at no
- charge to all third parties under the terms of this License.
-
- d) If a facility in the modified Library refers to a function or a
- table of data to be supplied by an application program that uses
- the facility, other than as an argument passed when the facility
- is invoked, then you must make a good faith effort to ensure that,
- in the event an application does not supply such function or
- table, the facility still operates, and performs whatever part of
- its purpose remains meaningful.
-
- (For example, a function in a library to compute square roots has
- a purpose that is entirely well-defined independent of the
- application. Therefore, Subsection 2d requires that any
- application-supplied function or table used by this function must
- be optional: if the application does not supply it, the square
- root function must still compute square roots.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Library,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Library, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Library.
-
-In addition, mere aggregation of another work not based on the Library
-with the Library (or with a work based on the Library) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may opt to apply the terms of the ordinary GNU General Public
-License instead of this License to a given copy of the Library. To do
-this, you must alter all the notices that refer to this License, so
-that they refer to the ordinary GNU General Public License, version 2,
-instead of to this License. (If a newer version than version 2 of the
-ordinary GNU General Public License has appeared, then you can specify
-that version instead if you wish.) Do not make any other change in
-these notices.
-
- Once this change is made in a given copy, it is irreversible for
-that copy, so the ordinary GNU General Public License applies to all
-subsequent copies and derivative works made from that copy.
-
- This option is useful when you wish to copy part of the code of
-the Library into a program that is not a library.
-
- 4. You may copy and distribute the Library (or a portion or
-derivative of it, under Section 2) in object code or executable form
-under the terms of Sections 1 and 2 above provided that you accompany
-it with the complete corresponding machine-readable source code, which
-must be distributed under the terms of Sections 1 and 2 above on a
-medium customarily used for software interchange.
-
- If distribution of object code is made by offering access to copy
-from a designated place, then offering equivalent access to copy the
-source code from the same place satisfies the requirement to
-distribute the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
- 5. A program that contains no derivative of any portion of the
-Library, but is designed to work with the Library by being compiled or
-linked with it, is called a "work that uses the Library". Such a
-work, in isolation, is not a derivative work of the Library, and
-therefore falls outside the scope of this License.
-
- However, linking a "work that uses the Library" with the Library
-creates an executable that is a derivative of the Library (because it
-contains portions of the Library), rather than a "work that uses the
-library". The executable is therefore covered by this License.
-Section 6 states terms for distribution of such executables.
-
- When a "work that uses the Library" uses material from a header file
-that is part of the Library, the object code for the work may be a
-derivative work of the Library even though the source code is not.
-Whether this is true is especially significant if the work can be
-linked without the Library, or if the work is itself a library. The
-threshold for this to be true is not precisely defined by law.
-
- If such an object file uses only numerical parameters, data
-structure layouts and accessors, and small macros and small inline
-functions (ten lines or less in length), then the use of the object
-file is unrestricted, regardless of whether it is legally a derivative
-work. (Executables containing this object code plus portions of the
-Library will still fall under Section 6.)
-
- Otherwise, if the work is a derivative of the Library, you may
-distribute the object code for the work under the terms of Section 6.
-Any executables containing that work also fall under Section 6,
-whether or not they are linked directly with the Library itself.
-
- 6. As an exception to the Sections above, you may also compile or
-link a "work that uses the Library" with the Library to produce a
-work containing portions of the Library, and distribute that work
-under terms of your choice, provided that the terms permit
-modification of the work for the customer's own use and reverse
-engineering for debugging such modifications.
-
- You must give prominent notice with each copy of the work that the
-Library is used in it and that the Library and its use are covered by
-this License. You must supply a copy of this License. If the work
-during execution displays copyright notices, you must include the
-copyright notice for the Library among them, as well as a reference
-directing the user to the copy of this License. Also, you must do one
-of these things:
-
- a) Accompany the work with the complete corresponding
- machine-readable source code for the Library including whatever
- changes were used in the work (which must be distributed under
- Sections 1 and 2 above); and, if the work is an executable linked
- with the Library, with the complete machine-readable "work that
- uses the Library", as object code and/or source code, so that the
- user can modify the Library and then relink to produce a modified
- executable containing the modified Library. (It is understood
- that the user who changes the contents of definitions files in the
- Library will not necessarily be able to recompile the application
- to use the modified definitions.)
-
- b) Accompany the work with a written offer, valid for at
- least three years, to give the same user the materials
- specified in Subsection 6a, above, for a charge no more
- than the cost of performing this distribution.
-
- c) If distribution of the work is made by offering access to copy
- from a designated place, offer equivalent access to copy the above
- specified materials from the same place.
-
- d) Verify that the user has already received a copy of these
- materials or that you have already sent this user a copy.
-
- For an executable, the required form of the "work that uses the
-Library" must include any data and utility programs needed for
-reproducing the executable from it. However, as a special exception,
-the source code distributed need not include anything that is normally
-distributed (in either source or binary form) with the major
-components (compiler, kernel, and so on) of the operating system on
-which the executable runs, unless that component itself accompanies
-the executable.
-
- It may happen that this requirement contradicts the license
-restrictions of other proprietary libraries that do not normally
-accompany the operating system. Such a contradiction means you cannot
-use both them and the Library together in an executable that you
-distribute.
-
- 7. You may place library facilities that are a work based on the
-Library side-by-side in a single library together with other library
-facilities not covered by this License, and distribute such a combined
-library, provided that the separate distribution of the work based on
-the Library and of the other library facilities is otherwise
-permitted, and provided that you do these two things:
-
- a) Accompany the combined library with a copy of the same work
- based on the Library, uncombined with any other library
- facilities. This must be distributed under the terms of the
- Sections above.
-
- b) Give prominent notice with the combined library of the fact
- that part of it is a work based on the Library, and explaining
- where to find the accompanying uncombined form of the same work.
-
- 8. You may not copy, modify, sublicense, link with, or distribute
-the Library except as expressly provided under this License. Any
-attempt otherwise to copy, modify, sublicense, link with, or
-distribute the Library is void, and will automatically terminate your
-rights under this License. However, parties who have received copies,
-or rights, from you under this License will not have their licenses
-terminated so long as such parties remain in full compliance.
-
- 9. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Library or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Library (or any work based on the
-Library), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Library or works based on it.
-
- 10. Each time you redistribute the Library (or any work based on the
-Library), the recipient automatically receives a license from the
-original licensor to copy, distribute, link with or modify the Library
-subject to these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
- 11. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Library at all. For example, if a patent
-license would not permit royalty-free redistribution of the Library by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Library.
-
-If any portion of this section is held invalid or unenforceable under any
-particular circumstance, the balance of the section is intended to apply,
-and the section as a whole is intended to apply in other circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
- 12. If the distribution and/or use of the Library is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Library under this License may add
-an explicit geographical distribution limitation excluding those countries,
-so that distribution is permitted only in or among countries not thus
-excluded. In such case, this License incorporates the limitation as if
-written in the body of this License.
-
- 13. The Free Software Foundation may publish revised and/or new
-versions of the Library General Public License from time to time.
-Such new versions will be similar in spirit to the present version,
-but may differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Library
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation. If the Library does not specify a
-license version number, you may choose any version ever published by
-the Free Software Foundation.
-
- 14. If you wish to incorporate parts of the Library into other free
-programs whose distribution conditions are incompatible with these,
-write to the author to ask for permission. For software which is
-copyrighted by the Free Software Foundation, write to the Free
-Software Foundation; we sometimes make exceptions for this. Our
-decision will be guided by the two goals of preserving the free status
-of all derivatives of our free software and of promoting the sharing
-and reuse of software generally.
-
- NO WARRANTY
-
- 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
-EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
-OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
-KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
- Appendix: How to Apply These Terms to Your New Libraries
-
- If you develop a new library, and you want it to be of the greatest
-possible use to the public, we recommend making it free software that
-everyone can redistribute and change. You can do so by permitting
-redistribution under these terms (or, alternatively, under the terms of the
-ordinary General Public License).
-
- To apply these terms, attach the following notices to the library. It is
-safest to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least the
-"copyright" line and a pointer to where the full notice is found.
-
- <one line to give the library's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-Also add information on how to contact you by electronic and paper mail.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the library, if
-necessary. Here is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the
- library `Frob' (a library for tweaking knobs) written by James Random Hacker.
-
- <signature of Ty Coon>, 1 April 1990
- Ty Coon, President of Vice
-
-That's all there is to it!
diff --git a/libibex/ChangeLog b/libibex/ChangeLog
deleted file mode 100644
index 2d3fe7bace..0000000000
--- a/libibex/ChangeLog
+++ /dev/null
@@ -1,390 +0,0 @@
-2000-12-24 Not Zed <NotZed@HelixCode.com>
-
- * Merge from camel-mt-branch.
-
-2000-12-18 Not Zed <NotZed@HelixCode.com>
-
- * dumpindex.c (main): Same here.
-
- * testindex.c (main): Add a g_thread_init(). Sigh, glib's thread
- stuff is snot.
- (read_words): Setup another flat-out thread to test
- multithreadedness at little bit.
-
- * ibex_block.c (ibex_index_buffer): Add locking around internal
- calls.
- (ibex_open): Init the locking mutex.
- (ibex_close): Free the locking mutex.
- (ibex_unindex):
- (ibex_find):
- (ibex_find_name):
- (ibex_contains_name): Add locking around internal calls.
-
- * ibex_internal.h (struct ibex): Add a lock. Include config.h
-
-2000-12-13 Christopher James Lahey <clahey@helixcode.com>
-
- * disktail.c (tail_compress):
- (tail_get): Added some casts to get rid of warnings.
- (tail_dump): #if 0ed this out to get rid of a warning.
- (ibex_diskarray_dump): Added a prototype.
-
- * ibex_block.c (ibex_index_buffer): Assigned cat the value 0 to
- start off with to avoid a warning.
-
-2000-12-12 Christopher James Lahey <clahey@helixcode.com>
-
- * wordindex.c (cache_sanity): Made cache_sanity only be included
- if d(x) is defined as x.
-
- * wordindexmem.c: Made node_sanity and cache_sanity only be
- included if d(x) is defined as x or if MALLOC_CHECK is defined.
- Made sync_value only be included if d(x) is defined as x.
-
-2000-11-28 Not Zed <NotZed@HelixCode.com>
-
- * index.h: Turn off index stats by default.
-
- * ibex_block.c (ibex_save): And here.
- (ibex_close): Debug out printfs.
-
- * wordindexmem.c (ibex_create_word_index_mem): And here.
- (num): Made buf static.
-
- * block.c (ibex_block_cache_open): Debug out some printfs.
- (ibex_block_read): And here.
-
-2000-11-17 Not Zed <NotZed@HelixCode.com>
-
- * wordindexmem.c (add_list): If we have the namecache active, and
- there is no name there, we add it directly and dont look it up
- first.
-
- * testindex.c: Some performance testing & stat gathering stuff.
-
-2000-11-16 Not Zed <NotZed@HelixCode.com>
-
- * wordindexmem.c (ibex_create_word_index_mem): Initialise nameinit
- & namecache.
- (contains_name): On first call, load all names into memory. We
- usually do a whole lot of lookups in a row, and this saves a lot
- of penalties on a big list, for not too much a memory hit.
- (find_name): If we have the namelist in memory do a quick
- short-circuit check to see if we have to do further processing.
- (unindex_name): Cross check the namecache, if it is active.
- Remove it there too/or exit (no work to do).
- (word_flush): If we have the namecache active, destroy it now, as
- it is not needed anymore (for now).
-
-2000-10-30 Kjartan Maraas <kmaraas@gnome.org>
-
- * hash.c: #include <stdlib.h> to remove warning.
- * wordindex.c: #include <stdlib.h> and <string.h>.
-
-2000-10-26 Not Zed <NotZed@HelixCode.com>
-
- * block.c (ibex_block_cache_open): Use IBEX_VERSION rather than
- hardcoded version string.
-
- * ibex_internal.h (IBEX_VERSION): Bumped version again. This time
- I did change the index format.
- (IBEX_VERSION): moved into block.h
-
- * hash.c (struct _hashroot): Add a linked list of keys to the table.
- (struct _hashblock): Added a next pointer as a block number.
- (hash_insert): Link new key blocks into the key block list.
- (struct _HASHCursor): Renamed block to key and added a block item.
- (hash_cursor_next): Changed to go through the linked list of all
- hash items rather than through each hash chain separately. >>
- faster.
- (ibex_hash_dump_rec): Remove a warning.
-
-2000-10-25 <jpr@helixcode.com>
-
- * ibex_block.c: No longer include <db.h>
-
-2000-10-25 Not Zed <NotZed@HelixCode.com>
-
- * ibex_internal.h (IBEX_VERSION): Bumped to another version. The
- file format hasn't changed, but earlier bugs may create invalid
- files.
-
- * block.c (ibex_block_read): Use the root data directly.
- (ibex_block_cache_open): As well.
- (ibex_block_get): And here too.
- (ibex_block_cache_sync): Sync the root block directly here.
-
- * block.h: Pad root block out to 1024 bytes.
- Added root block to struct _memcache.
-
- * disktail.c (tail_get): Dirty the root block.
- (tail_get): Fix for changes to root access.
- (disk_remove): And here too.
-
- * wordindexmem.c (sync_cache_entry): Handle the case of not having
- any files in the list, which can happen now.
- (word_index_pre): Make sure we set the wordid on the new cache
- entry.
-
- * ibex_block.c (ibex_save): Sigh. Pass the right argument to
- index_post.
-
-2000-10-24 JP Rosevear <jpr@helixcode.com>
-
- * .cvsignore: Shush
-
-2000-10-24 Not Zed <NotZed@HelixCode.com>
-
- * block.c (ibex_block_cache_open): Create a word_index_mem for
- indexing the words, rather than a word_index.
-
- * ibex_block.c (ibex_index_buffer): If we haven't called index_pre
- yet, do it before indexing anything.
- (ibex_save): If wehave called index_pre previously, call
- index_post.
- (ibex_close): And same for here.
-
- * index.h: Added a cursor class, and cursor retrieval function for
- iterating through an index's keys.
-
- * wordindexmem.c (ibex_create_word_index_mem): New word class,
- similar to wordindex, but meant to be faster for updates.
- (word_index_pre): Implement. We load all keys into memory.
- (word_index_post): Implement. We sync and free all keys.
- (find): Remove lru code, its no longer a cache, but a lookup
- table.
- (add_index_cache): Remove lru code here too.
- (find_name): And here.
- (word_flush): Flush the hashtable direct.
- (word_close): Call flush to flush, rather than doing it ourselves.
- (add_index_cache): If we are in an index state, we can assume a
- cache miss == a new word.
- (word_index_post): Maintain whether or not we are in an index
- state, and the depth of the state.
- (word_index_pre): Likewise. Dont reread the index if we have
- already.
- (cache_sanity): Fixed for struct changes.
-
- * wordindex.h (IBEXWordClass): Added functions to prepare/cleanup
- for lots of indexing. i.e. can be used to optimise indexing speed
- at the cost of extra memory usage during the indexing process.
-
- * dumpindex.c: Dumps the contents of indexs.
-
- * hash.c (ibex_hash_dump_rec): Also print the word count.
- (hash_cursor_create): Create a new cursor for iterating through a
- hashtable.
- (hash_cursor_close): 'close' the cursor. It is upto the
- application to close any cursors it creates.
- (hash_cursor_next): Goto the next key id.
- (hash_cursor_next_key): Goto the next key, reutrn the key.
- (hash_get_cursor): Return a cursor object.
-
- * wordindex.c (unindex_name): Cross-check the cache as well.
- (word_index_post):
- (word_index_pre): Added (empty) callbacks for pre/post functions.
-
-2000-10-12 Not Zed <NotZed@HelixCode.com>
-
- * ibex_internal.h (struct ibex): Bumped ibex rev.
-
- * block.c (ibex_block_cache_open): Bumped the ibex file revision
- because of the hash table size change.
-
- * index.h: Added some stat stuff.
-
- * wordindex.c (struct _wordcache): Changed files[] to be a pointer
- to an allocated block/or an individual item.
- (find): Fix for changes to struct.
- (find_name): "
- (sync_cache_entry): "
- (add): "
- (add_list): "
- (add_index_cache): Free the cache file array if it was created.
- (word_flush): And here.
- (word_close): And here too.
- (ibex_create_word_index): Double the size of the hashtables.
- (word_flush): Make sure we reset the wordcount to 0 if we remove
- the list items. DOH.
- (add_index_cache): Use a slightly more sohpisticated aging
- algorithm to remove expired nodes.
-
-2000-10-10 Not Zed <NotZed@HelixCode.com>
-
- * hash.c (hash_find):
- (hash_remove):
- (hash_insert): Truncate key if it is too big to fit in a
- single block to MAX_KEYLEN bytes.
-
-2000-09-28 Not Zed <NotZed@HelixCode.com>
-
- * block.c (ibex_block_free): Make sure we map the 'free' block to
- a block number when unlinking a block (fixes a lot of assertion
- failures).
- (ibex_block_cache_open): Initialise sync flag on root block. If
- it is not set on open then the index could be in an invalid state,
- and should be rescanned.
- (ibex_block_cache_sync): Sync root block last, and set the sync
- flag.
- (ibex_block_cache_open): Mirror root block flags in block_cache
- struct.
- (ibex_block_cache_sync): Likewise.
- (ibex_block_read): If we write a dirty block, then we clear the
- sync flag if its still set; we are no longer synced.
-
-2000-09-19 Not Zed <NotZed@HelixCode.com>
-
- ** Merged from IBEX_DISK branch to head.
-
- * file.c:
- * find.c:
- * words.c:
- * index.c: Removed unused files.
-
- * block.h: Changed block to use only 24 bits for next and 8 for
- used, and fixed all relevant code. Some cleanup.
-
- * disktail.c (tail_get): If we use an empty tail node, then make
- sure we make it dirty.
-
-2000-09-15 Not Zed <NotZed@HelixCode.com>
-
- * wordindex.c (word_close): Free hashtable on exit too.
-
- * disktail.c: Implemented tail-node storage for the end of long
- lists, or for short lists. Should save significant disk space
- (5x?).
- Implemented special case for 1-item lists, where the tailnode
- pointer is used to store the index entry.
-
-2000-09-14 Not Zed <NotZed@HelixCode.com>
-
- * wordindex.c (add_index_key): Keys also handle tails.
-
- * hash.c (hash_set_data_block): Added new parameter to keys - a
- tail block (a full 32 bit block pointer).
- (hash_get_data_block): And same here.
-
-2000-09-12 Not Zed <NotZed@HelixCode.com>
-
- * wordindex.c (word_close): Dont close namestore twice.
-
-2000-09-11 Not Zed <NotZed@HelixCode.com>
-
- ** Redid almost everything, on-disk hash table to store an index
- to index records, mroe on the way to modularisation (more to go),
- now stores reverse indexes for deleting.
-
-2000-08-31 Not Zed <NotZed@HelixCode.com>
-
- * block.c (add_key_mem): Initialise a memory based array for newly
- added index entries.
- (add_record): Changed to cache updates in memory until we hit a
- limit, and then flush them to disk.
- (get_record): Merge in-memory records with disk records.
- (remove_record): Remove from memory first, and if that fails, goto
- disk.
- (find_record): Check memory first, then disk if that fails.
- (add_datum_list): oops, copy size * sizeof(blockid_t)
- (add_indexed): Make sure we link in the head node when we create a
- new one.
-
-2000-08-09 Christopher James Lahey <clahey@helixcode.com>
-
- * file.c, find.c: Fixed some warnings.
-
-2000-05-11 NotZed <NotZed@HelixCode.com>
-
- * index.c (ibex_unindex): Make sure we mark the ibex as dirty.
-
-2000-05-07 NotZed <NotZed@HelixCode.com>
-
- * file.c (ibex_save): New function, only write out the ibex if it
- has changed.
-
-2000-05-07 <notzed@helixcode.com>
-
- * file.c (ibex_open): Also close the fd after we're done.
-
- * find.c (ibex_contains_name): New function to find out if a file
- is indexed.
-
-2000-05-02 Matt Loper <matt@helixcode.com>
-
- * Makefile.am: set G_LOG_DOMAIN.
-
-2000-04-12 NotZed <NotZed@HelixCode.com>
-
- * find.c (ibex_dump_all): Debug function to dump the whole index
- to stdout.
-
- * words.c (get_ibex_file): Use g_strdup(), not strdup().
-
-2000-04-11 NotZed <NotZed@HelixCode.com>
-
- * file.c (write_word): Always write out all words we have (even if
- its 0 ... the file expects it). No longer check for removed files.
- (store_word): Check for removed files here, and only add to the
- ordered tree if we have references left to this word.
- (ibex_write): First insert into the tree, to determine the
- wordcount to be saved in the output file, and then write that.
- (ibex_open): Remove some debug.
-
- * words.c (ibex_index_buffer): Always set 'unread', if it is a
- valid pointer (dont rely on caller to initialise it).
-
-2000-03-26 NotZed <NotZed@HelixCode.com>
-
- * lookup.c (main): Fixed call to ibex_open.
-
- * mkindex.c (main): Fixed call to ibex_open.
-
- * file.c (ibex_open): Changed to accept flags and mode equivalent
- to open(2).
-
-2000-02-25 Dan Winship <danw@helixcode.com>
-
- * *.c: add gtk-doc-style comments
-
-2000-02-21 Matt Loper <matt@helixcode.com>
-
- * .cvsignore: Added mkindex.
-
-2000-02-21 NotZed <NotZed@HelixCode.com>
-
- * Makefile.am: change noinst_LIBRARIES to noinst_LTLIBRARIES, and
- supply -static to LDFLAGS. Duh, and changed LDADD back to
- libibex.la.
-
-2000-02-20 Matt Loper <matt@helixcode.com>
-
- * Makefile.am: changed mkindex_LDADD to libibex.a instead of
- libibex.la.
-
-2000-02-19 Matt Loper <matt@helixcode.com>
-
- * .cvsignore: added lookup.
-
-2000-02-18 Miguel de Icaza <miguel@nuclecu.unam.mx>
-
- * Makefile.am (lookup_LDADD): For now. make a libibex.a library so
- we can link it with the camel provider. I hate libtool
-
-2000-02-16 Dan Winship <danw@helixcode.com>
-
- * Makefile.am: automakify
-
-2000-02-16 NotZed <NotZed@HelixCode.com>
-
- * find.[ch] (ibex_find_name): Finds if a word is indexed under a
- given name.
-
-2000-02-14 NotZed <notzed@zedzone.helixcode.com>
-
- * Makefile: Hack together a build using libtool. This should all
- be auto*'d at some point I guess.
-
-2000-02-13 NotZed <notzed@zedzone.helixcode.com>
-
- * Added ChangeLog file.
-
diff --git a/libibex/Makefile.am b/libibex/Makefile.am
deleted file mode 100644
index ab7c92206c..0000000000
--- a/libibex/Makefile.am
+++ /dev/null
@@ -1,40 +0,0 @@
-## Process this file with automake to produce Makefile.in
-
-noinst_LTLIBRARIES = libibex.la
-
-libibex_la_SOURCES = \
- wordindex.c wordindexmem.c \
- block.c ibex.h \
- hash.c \
- disktail.c \
- ibex_block.c
-
-libibex_la_LDFLAGS = -static
-
-noinst_HEADERS = \
- ibex_internal.h \
- block.h \
- wordindex.h \
- index.h
-
-INCLUDES = -I$(srcdir) $(GLIB_CFLAGS) $(UNICODE_CFLAGS) \
- $(THREADS_CFLAGS) \
- -DG_LOG_DOMAIN=\"libibex\"
-
-
-noinst_PROGRAMS = dumpindex testindex
-
-dumpindex_SOURCES = dumpindex.c
-dumpindex_LDADD = libibex.la $(GLIB_LIBS) $(UNICODE_LIBS) $(THREADS_LIBS)
-
-testindex_SOURCES = testindex.c
-testindex_LDADD = libibex.la $(GLIB_LIBS) $(UNICODE_LIBS) $(THREADS_LIBS) -lm
-
-#noinst_PROGRAMS = mkindex lookup
-#
-#mkindex_SOURCES = mkindex.c
-#mkindex_LDADD = libibex.la $(GLIB_LIBS) $(UNICODE_LIBS)
-#
-#lookup_SOURCES = lookup.c
-#lookup_LDADD = libibex.la $(GLIB_LIBS) $(UNICODE_LIBS)
-
diff --git a/libibex/TODO b/libibex/TODO
deleted file mode 100644
index a087c8d1f3..0000000000
--- a/libibex/TODO
+++ /dev/null
@@ -1,61 +0,0 @@
-Stability
----------
-* ibex_open should never crash, and should never return NULL without
-errno being set. Should check for errors when reading.
-
-
-Performance
------------
-* Profiling, keep thinking about data structures, etc.
-
-* Check memory usage
-
-* See if writing the "inverse image" of long ref streams helps
-compression without hurting performance now. (ie, if a word appears in
-more than half of the files, write out the list of files it _doesn't_
-appear in). (I tried this before, and it wasn't working well, but the
-file format and data structures have changed a lot.)
-
-* We could save a noticeable chunk of time if normalize_word computed
-the hash of the word and then we could pass that into
-g_hash_table_insert somehow.
-
-* Make a copy of the buffer to be indexed (or provide interface for
-caller to say ibex can munge the provided data) and then use that
-rather than constantly copying things. ?
-
-
-Functionality
--------------
-* ibex file locking
-
-* specify file mode in ibex_open
-
-* ibex_find* need to normalize the search words... should this be done
-by the caller or by ibex_find?
-
-* Needs to be some way to do a secondary search after getting results
-back from ibex_find* (ie, for "foo near bar"). This either has to be
-done by ibex, or requires us to export the normalize interface.
-
-* Does there need to be an ibex_find_any, or is that easy enough for the
-caller to do?
-
-* utf8_trans needs to cover at least two more code pages. This is
-tricky because it's not clear whether some of the letters there should
-be translated to ASCII or left as UTF8. This requires some
-investigation.
-
-* ibex_index_* need to ignore HTML tags.
- NAME = [A-Za-z][A-Za-z0-9.-]*
- </?{NAME}(\s*{NAME}(\s*=\s*({NAME}|"[^"]*"|'[^']*')))*>
- <!(--([^-]*|-[^-])--\s*)*>
-
- ugh. ok, simplifying, we get:
- <[^!](([^"'>]*("[^"]*"|'[^']*'))*> or
- <!(--([^-]*|-[^-])--\s*)*>
-
- which is still not simple. sigh.
-
-* ibex_index_* need to recognize and ignore "non-text". Particularly
-BinHex and uuencoding.
diff --git a/libibex/block.c b/libibex/block.c
deleted file mode 100644
index b9057bc109..0000000000
--- a/libibex/block.c
+++ /dev/null
@@ -1,595 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/*
- block file/cache/utility functions
-*/
-
-#include <stdio.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <glib.h>
-
-#include "block.h"
-
-/*#define MALLOC_CHECK*/
-
-#ifdef MALLOC_CHECK
-#include <mcheck.h>
-#endif
-
-#define d(x)
-/*#define DEBUG*/
-
-int block_log;
-
-#ifdef IBEX_STATS
-static void
-init_stats(struct _memcache *index)
-{
- index->stats = g_hash_table_new(g_direct_hash, g_direct_equal);
-}
-
-static void
-dump_1_stat(int id, struct _stat_info *info, struct _memcache *index)
-{
- printf("%d %d %d %d %d\n", id, info->read, info->write, info->cache_hit, info->cache_miss);
-}
-
-static void
-dump_stats(struct _memcache *index)
-{
- printf("Block reads writes hits misses\n");
- g_hash_table_foreach(index->stats, dump_1_stat, index);
-}
-
-static void
-add_read(struct _memcache *index, int id)
-{
- struct _stat_info *info;
-
- info = g_hash_table_lookup(index->stats, (void *)id);
- if (info == NULL) {
- info = g_malloc0(sizeof(*info));
- g_hash_table_insert(index->stats, (void *)id, info);
- }
- info->read++;
-}
-
-static void
-add_write(struct _memcache *index, int id)
-{
- struct _stat_info *info;
-
- info = g_hash_table_lookup(index->stats, (void *)id);
- if (info == NULL) {
- info = g_malloc0(sizeof(*info));
- g_hash_table_insert(index->stats, (void *)id, info);
- }
- info->write++;
-}
-
-static void
-add_hit(struct _memcache *index, int id)
-{
- struct _stat_info *info;
-
- info = g_hash_table_lookup(index->stats, (void *)id);
- if (info == NULL) {
- info = g_malloc0(sizeof(*info));
- g_hash_table_insert(index->stats, (void *)id, info);
- }
- info->cache_hit++;
-}
-
-static void
-add_miss(struct _memcache *index, int id)
-{
- struct _stat_info *info;
-
- info = g_hash_table_lookup(index->stats, (void *)id);
- if (info == NULL) {
- info = g_malloc0(sizeof(*info));
- g_hash_table_insert(index->stats, (void *)id, info);
- }
- info->cache_miss++;
-}
-#endif /* IBEX_STATS */
-
-#ifdef MALLOC_CHECK
-static void
-checkmem(void *p)
-{
- if (p) {
- int status = mprobe(p);
-
- switch (status) {
- case MCHECK_HEAD:
- printf("Memory underrun at %p\n", p);
- abort();
- case MCHECK_TAIL:
- printf("Memory overrun at %p\n", p);
- abort();
- case MCHECK_FREE:
- printf("Double free %p\n", p);
- abort();
- }
- }
-}
-#endif
-
-/* simple list routines (for simplified memory management of cache/lists) */
-
-/**
- * ibex_list_new:
- * @v:
- *
- * Initialise a list header. A list header must always be initialised
- * before use.
- **/
-void ibex_list_new(struct _list *v)
-{
- v->head = (struct _listnode *)&v->tail;
- v->tail = 0;
- v->tailpred = (struct _listnode *)&v->head;
-}
-
-/**
- * ibex_list_addhead:
- * @l: List.
- * @n: Node to append.
- *
- * Prepend a listnode to the head of the list @l.
- *
- * Return value: Always @n.
- **/
-struct _listnode *ibex_list_addhead(struct _list *l, struct _listnode *n)
-{
- n->next = l->head;
- n->prev = (struct _listnode *)&l->head;
- l->head->prev = n;
- l->head = n;
- return n;
-}
-
-/**
- * ibex_list_addtail:
- * @l:
- * @n:
- *
- * Append a listnode to the end of the list @l.
- *
- * Return value: Always the same as @n.
- **/
-struct _listnode *ibex_list_addtail(struct _list *l, struct _listnode *n)
-{
- n->next = (struct _listnode *)&l->tail;
- n->prev = l->tailpred;
- l->tailpred->next = n;
- l->tailpred = n;
- return n;
-}
-
-/**
- * ibex_list_remove:
- * @n: The node to remove.
- *
- * Remove a listnode from a list.
- *
- * Return value: Always the same as @n.
- **/
-struct _listnode *ibex_list_remove(struct _listnode *n)
-{
- n->next->prev = n->prev;
- n->prev->next = n->next;
- return n;
-}
-
-static struct _memblock *
-memblock_addr(struct _block *block)
-{
- return (struct _memblock *)(((char *)block) - G_STRUCT_OFFSET(struct _memblock, data));
-}
-
-/* read/sync the rootblock into the block_cache structure */
-static int
-ibex_block_read_root(struct _memcache *block_cache)
-{
- lseek(block_cache->fd, 0, SEEK_SET);
- if (read(block_cache->fd, &block_cache->root, sizeof(block_cache->root)) != sizeof(block_cache->root)) {
-
- return -1;
- }
- return 0;
-}
-
-static int
-ibex_block_sync_root(struct _memcache *block_cache)
-{
- lseek(block_cache->fd, 0, SEEK_SET);
- if (write(block_cache->fd, &block_cache->root, sizeof(block_cache->root)) != sizeof(block_cache->root)) {
- return -1;
- }
- return fsync(block_cache->fd);
-}
-
-
-/**
- * ibex_block_dirty:
- * @block:
- *
- * Dirty a block. This will cause it to be written to disk on
- * a cache sync, or when the block is flushed from the cache.
- **/
-void
-ibex_block_dirty(struct _block *block)
-{
- memblock_addr(block)->flags |= BLOCK_DIRTY;
-}
-
-static void
-sync_block(struct _memcache *block_cache, struct _memblock *memblock)
-{
- if (block_log)
- printf("writing block %d\n", memblock->block);
-
- lseek(block_cache->fd, memblock->block, SEEK_SET);
- if (write(block_cache->fd, &memblock->data, sizeof(memblock->data)) != -1) {
- memblock->flags &= ~BLOCK_DIRTY;
- }
-#ifdef IBEX_STATS
- add_write(block_cache, memblock->block);
-#endif
-}
-
-/**
- * ibex_block_cache_sync:
- * @block_cache:
- *
- * Ensure the block cache is fully synced to disk.
- **/
-void
-ibex_block_cache_sync(struct _memcache *block_cache)
-{
- struct _memblock *memblock;
-
- memblock = (struct _memblock *)block_cache->nodes.head;
- while (memblock->next) {
-#ifdef MALLOC_CHECK
- checkmem(memblock);
-#endif
- if (memblock->flags & BLOCK_DIRTY) {
- sync_block(block_cache, memblock);
- }
- memblock = memblock->next;
- }
-
- block_cache->root.flags |= IBEX_ROOT_SYNCF;
- if (ibex_block_sync_root(block_cache) != 0) {
- block_cache->root.flags &= ~IBEX_ROOT_SYNCF;
- }
-
-#ifdef IBEX_STATS
- dump_stats(block_cache);
-#endif
-
-}
-
-#ifdef MALLOC_CHECK
-static void
-check_cache(struct _memcache *block_cache)
-{
- struct _memblock *mw, *mn;
-
- checkmem(block_cache);
- checkmem(block_cache->index);
-
- mw = (struct _memblock *)block_cache->nodes.head;
- mn = mw->next;
- while (mn) {
- checkmem(mw);
- mw = mn;
- mn = mn->next;
- }
-}
-#endif
-
-/**
- * ibex_block_cache_flush:
- * @block_cache:
- *
- * Ensure the block cache is fully synced to disk, and then flush
- * its contents from memory.
- **/
-void
-ibex_block_cache_flush(struct _memcache *block_cache)
-{
- struct _memblock *mw, *mn;
-
- ibex_block_cache_sync(block_cache);
-
- mw = (struct _memblock *)block_cache->nodes.head;
- mn = mw->next;
- while (mn) {
- g_hash_table_remove(block_cache->index, (void *)mw->block);
- g_free(mw);
- mw = mn;
- mn = mn->next;
- }
- ibex_list_new(&block_cache->nodes);
-}
-
-/**
- * ibex_block_read:
- * @block_cache:
- * @blockid:
- *
- * Read the data of a block by blockid. The data contents is backed by
- * the block cache, and should be considered static.
- *
- * TODO; should this return a NULL block on error?
- *
- * Return value: The address of the block data (which may be cached).
- **/
-struct _block *
-ibex_block_read(struct _memcache *block_cache, blockid_t blockid)
-{
- struct _memblock *memblock;
-
-#ifdef MALLOC_CHECK
- check_cache(block_cache);
-#endif
-
- /* nothing can read the root block directly */
- g_assert(blockid != 0);
- g_assert(blockid < block_cache->root.roof);
-
- memblock = g_hash_table_lookup(block_cache->index, (void *)blockid);
-
-#ifdef MALLOC_CHECK
- check_cache(block_cache);
-#endif
-
- if (memblock) {
- d(printf("foudn blockid in cache %d = %p\n", blockid, &memblock->data));
-
- /* 'access' page */
- ibex_list_remove((struct _listnode *)memblock);
- ibex_list_addtail(&block_cache->nodes, (struct _listnode *)memblock);
-
-#ifdef MALLOC_CHECK
- check_cache(block_cache);
-#endif
-
-#ifdef IBEX_STATS
- add_hit(block_cache, memblock->block);
-#endif
-
-#ifdef MALLOC_CHECK
- check_cache(block_cache);
-#endif
-
- return &memblock->data;
- }
-#ifdef IBEX_STATS
- add_miss(block_cache, blockid);
- add_read(block_cache, blockid);
-#endif
- if (block_log)
- printf("miss block %d\n", blockid);
-
- d(printf("loading blockid from disk %d\n", blockid));
- memblock = g_malloc(sizeof(*memblock));
- memblock->block = blockid;
- memblock->flags = 0;
- lseek(block_cache->fd, blockid, SEEK_SET);
- memset(&memblock->data, 0, sizeof(memblock->data));
- read(block_cache->fd, &memblock->data, sizeof(memblock->data));
-
- ibex_list_addtail(&block_cache->nodes, (struct _listnode *)memblock);
- g_hash_table_insert(block_cache->index, (void *)blockid, memblock);
- if (block_cache->count >= CACHE_SIZE) {
- struct _memblock *old = (struct _memblock *)block_cache->nodes.head;
- d(printf("discaring cache block %d\n", old->block));
- g_hash_table_remove(block_cache->index, (void *)old->block);
- ibex_list_remove((struct _listnode *)old);
- if (old->flags & BLOCK_DIRTY) {
- /* are we about to un-sync the file? update root and sync it */
- if (block_cache->root.flags & IBEX_ROOT_SYNCF) {
- d(printf("Unsyncing root block\n"));
-
- block_cache->root.flags &= ~IBEX_ROOT_SYNCF;
- if (ibex_block_sync_root(block_cache) != 0) {
- /* what do we do? i dont know! */
- g_warning("Could not sync root block of index: %s", strerror(errno));
- }
- }
- sync_block(block_cache, old);
- }
- g_free(old);
- } else {
- block_cache->count++;
- }
-
- d(printf(" --- cached blocks : %d\n", block_cache->count));
-
-#ifdef MALLOC_CHECK
- check_cache(block_cache);
-#endif
- return &memblock->data;
-}
-
-/**
- * ibex_block_cache_open:
- * @name:
- * @flags: Flags as to open(2), should use O_RDWR and optionally O_CREAT.
- * @mode: Mose as to open(2)
- *
- * Open a block file.
- *
- * FIXME; this currently also initialises the word and name indexes
- * because their pointers are stored in the root block. Should be
- * upto the caller to manage these pointers/data.
- *
- * Return value: NULL if the backing file could not be opened.
- **/
-struct _memcache *
-ibex_block_cache_open(const char *name, int flags, int mode)
-{
- struct _memcache *block_cache = g_malloc0(sizeof(*block_cache));
-
- d(printf("opening ibex file: %s", name));
-
- /* setup cache */
- ibex_list_new(&block_cache->nodes);
- block_cache->count = 0;
- block_cache->index = g_hash_table_new(g_direct_hash, g_direct_equal);
- block_cache->fd = open(name, flags, mode);
-
- if (block_cache->fd == -1) {
- g_hash_table_destroy(block_cache->index);
- g_free(block_cache);
- return NULL;
- }
-
- ibex_block_read_root(block_cache);
- if (block_cache->root.roof == 0
- || memcmp(block_cache->root.version, IBEX_VERSION, 4)
- || ((block_cache->root.flags & IBEX_ROOT_SYNCF) == 0)) {
- d(printf("Initialising superblock\n"));
- /* reset root data */
- memcpy(block_cache->root.version, IBEX_VERSION, 4);
- block_cache->root.roof = 1024;
- block_cache->root.free = 0;
- block_cache->root.words = 0;
- block_cache->root.names = 0;
- block_cache->root.tail = 0; /* list of tail blocks */
- block_cache->root.flags = 0;
- ibex_block_sync_root(block_cache);
- /* reset the file contents */
- ftruncate(block_cache->fd, 1024);
- } else {
- d(printf("superblock already initialised:\n"
- " roof = %d\n free = %d\n words = %d\n names = %d\n tail = %d\n",
- block_cache->root.roof, block_cache->root.free,
- block_cache->root.words, block_cache->root.names, block_cache->root.tail));
- }
- /* FIXME: this should be moved higher up in the object tree */
- {
- struct _IBEXWord *ibex_create_word_index_mem(struct _memcache *bc, blockid_t *wordroot, blockid_t *nameroot);
-
- block_cache->words = ibex_create_word_index_mem(block_cache, &block_cache->root.words,&block_cache->root.names);
- }
-
-#ifdef IBEX_STATS
- init_stats(block_cache);
-#endif
-
- return block_cache;
-}
-
-/**
- * ibex_block_cache_close:
- * @block_cache:
- *
- * Close the block file, sync any remaining cached data
- * to disk, and free all resources.
- **/
-void
-ibex_block_cache_close(struct _memcache *block_cache)
-{
- struct _memblock *mw, *mn;
-
- ibex_block_cache_sync(block_cache);
- close(block_cache->fd);
-
- mw = (struct _memblock *)block_cache->nodes.head;
- mn = mw->next;
- while (mn) {
- g_free(mw);
- mw = mn;
- mn = mw->next;
- }
-
- g_hash_table_destroy(block_cache->index);
-
- g_free(block_cache);
-}
-
-/**
- * ibex_block_free:
- * @block_cache:
- * @blockid:
- *
- * Return a block to the free pool.
- **/
-void
-ibex_block_free(struct _memcache *block_cache, blockid_t blockid)
-{
- struct _block *block = ibex_block_read(block_cache, blockid);
-
- block->next = block_number(block_cache->root.free);
- block_cache->root.free = blockid;
- ibex_block_dirty((struct _block *)block);
-}
-
-/**
- * ibex_block_get:
- * @block_cache:
- *
- * Allocate a new block, or access a previously freed block and return
- * its block id. The block will have zeroed contents.
- *
- * Return value: 0 if there are no blocks left (disk full/read only
- * file, etc).
- **/
-blockid_t
-ibex_block_get(struct _memcache *block_cache)
-{
- struct _block *block;
- blockid_t head;
-
- if (block_cache->root.free) {
- head = block_cache->root.free;
- block = ibex_block_read(block_cache, head);
- block_cache->root.free = block_location(block->next);
- } else {
- /* TODO: check the block will fit first */
- /* TODO: no need to read this block, can allocate it manually (saves a syscall/read) */
- head = block_cache->root.roof;
- block_cache->root.roof += BLOCK_SIZE;
- block = ibex_block_read(block_cache, head);
- }
-
- g_assert(head != 0);
-
- d(printf("new block = %d\n", head));
- block->next = 0;
- block->used = 0;
- ibex_block_dirty(block);
- return head;
-}
diff --git a/libibex/block.h b/libibex/block.h
deleted file mode 100644
index dbc8fd5ad8..0000000000
--- a/libibex/block.h
+++ /dev/null
@@ -1,114 +0,0 @@
-
-/* public interfaces for block io routines */
-
-#ifndef _BLOCK_H
-#define _BLOCK_H
-
-/*#define IBEX_STATS*/ /* define to get/dump block access stats */
-
-#include <glib.h>
-
-/* version of file format */
-#define IBEX_VERSION "ibx6"
-
-typedef guint32 nameid_t;
-typedef guint32 blockid_t;
-
-#define BLOCK_BITS (8)
-#define BLOCK_SIZE (1<<BLOCK_BITS)
-#define CACHE_SIZE 256 /* blocks in disk cache */
-
-/* root block */
-struct _root {
- char version[4];
-
- blockid_t free; /* list of free blocks */
- blockid_t roof; /* top of allocated space, everything below is in a free or used list */
- blockid_t tail; /* list of 'tail' blocks */
-
- blockid_t words; /* root of words index */
- blockid_t names; /* root of names index */
-
- char flags; /* state flags */
-
- /* makes structure fill up to 1024 bytes */
- char dummy[1024 - (sizeof(char)*5) - (sizeof(blockid_t)*5)];
-};
-
-#define IBEX_ROOT_SYNCF (1<<0) /* file is synced */
-
-/* basic disk structure for (data) blocks */
-struct _block {
- unsigned int next:32-BLOCK_BITS; /* next block */
- unsigned int used:BLOCK_BITS; /* number of elements used */
-
- nameid_t bl_data[(BLOCK_SIZE-4)/4]; /* references */
-};
-
-/* custom list structure, for a simple/efficient cache */
-struct _listnode {
- struct _listnode *next;
- struct _listnode *prev;
-};
-struct _list {
- struct _listnode *head;
- struct _listnode *tail;
- struct _listnode *tailpred;
-};
-
-void ibex_list_new(struct _list *v);
-struct _listnode *ibex_list_addhead(struct _list *l, struct _listnode *n);
-struct _listnode *ibex_list_addtail(struct _list *l, struct _listnode *n);
-struct _listnode *ibex_list_remove(struct _listnode *n);
-
-/* in-memory structure for block cache */
-struct _memblock {
- struct _memblock *next;
- struct _memblock *prev;
-
- blockid_t block;
- int flags;
-
- struct _block data;
-};
-#define BLOCK_DIRTY (1<<0)
-
-struct _memcache {
- struct _list nodes;
- int count; /* nodes in cache */
-
- GHashTable *index; /* blockid->memblock mapping */
- int fd; /* file fd */
-
-#ifdef IBEX_STATS
- GHashTable *stats;
-#endif
- struct _root root; /* root block */
-
- /* temporary here */
- struct _IBEXWord *words; /* word index */
-};
-
-#ifdef IBEX_STATS
-struct _stat_info {
- int read;
- int write;
- int cache_hit;
- int cache_miss;
-};
-#endif /* IBEX_STATS */
-
-struct _memcache *ibex_block_cache_open(const char *name, int flags, int mode);
-void ibex_block_cache_close(struct _memcache *block_cache);
-void ibex_block_cache_sync(struct _memcache *block_cache);
-void ibex_block_cache_flush(struct _memcache *block_cache);
-
-blockid_t ibex_block_get(struct _memcache *block_cache);
-void ibex_block_free(struct _memcache *block_cache, blockid_t blockid);
-void ibex_block_dirty(struct _block *block);
-struct _block *ibex_block_read(struct _memcache *block_cache, blockid_t blockid);
-
-#define block_number(x) ((x)>>BLOCK_BITS)
-#define block_location(x) ((x)<<BLOCK_BITS)
-
-#endif /* ! _BLOCK_H */
diff --git a/libibex/diskarray.c b/libibex/diskarray.c
deleted file mode 100644
index 2664e0f6b5..0000000000
--- a/libibex/diskarray.c
+++ /dev/null
@@ -1,255 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/* a disk based array storage class */
-
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include <glib.h>
-
-#include "block.h"
-#include "index.h"
-
-#define d(x)
-/*#define DEBUG*/
-
-static struct _IBEXStore *disk_create(struct _memcache *bc);
-static int disk_sync(struct _IBEXStore *store);
-static int disk_close(struct _IBEXStore *store);
-
-static blockid_t disk_add(struct _IBEXStore *store, blockid_t head, nameid_t data);
-static blockid_t disk_add_list(struct _IBEXStore *store, blockid_t head, GArray *data);
-static blockid_t disk_remove(struct _IBEXStore *store, blockid_t head, nameid_t data);
-static void disk_free(struct _IBEXStore *store, blockid_t head);
-
-static gboolean disk_find(struct _IBEXStore *store, blockid_t head, nameid_t data);
-static GArray *disk_get(struct _IBEXStore *store, blockid_t head);
-
-struct _IBEXStoreClass ibex_diskarray_class = {
- disk_create, disk_sync, disk_close,
- disk_add, disk_add_list,
- disk_remove, disk_free,
- disk_find, disk_get
-};
-
-static struct _IBEXStore *disk_create(struct _memcache *bc)
-{
- struct _IBEXStore *store;
-
- store = g_malloc0(sizeof(*store));
- store->klass = &ibex_diskarray_class;
- store->blocks = bc;
-
- return store;
-}
-
-static int disk_sync(struct _IBEXStore *store)
-{
- /* no cache, nop */
- return 0;
-}
-
-static int disk_close(struct _IBEXStore *store)
-{
- g_free(store);
- return 0;
-}
-
-static blockid_t
-disk_add(struct _IBEXStore *store, blockid_t head, nameid_t data)
-{
- struct _block *block;
- struct _block *newblock;
- blockid_t new;
-
- if (head == 0) {
- head = ibex_block_get(store->blocks);
- }
- block = ibex_block_read(store->blocks, head);
-
- d(printf("adding record %d to block %d (next = %d)\n", data, head, block->next));
-
- if (block->used < sizeof(block->bl_data)/sizeof(block->bl_data[0])) {
- d(printf("adding record into block %d %d\n", head, data));
- block->bl_data[block->used] = data;
- block->used++;
- ibex_block_dirty(block);
- return head;
- } else {
- new = ibex_block_get(store->blocks);
- newblock = ibex_block_read(store->blocks, new);
- newblock->next = head;
- newblock->bl_data[0] = data;
- newblock->used = 1;
- d(printf("adding record into new %d %d, next =%d\n", new, data, newblock->next));
- ibex_block_dirty(newblock);
- return new;
- }
-}
-
-static blockid_t
-disk_add_list(struct _IBEXStore *store, blockid_t head, GArray *data)
-{
- struct _block *block;
- struct _block *newblock;
- blockid_t new;
- int copied = 0;
- int left, space, tocopy;
-
- if (head == 0) {
- head = ibex_block_get(store->blocks);
- }
- block = ibex_block_read(store->blocks, head);
-
- while (copied < data->len) {
- left = data->len - copied;
- space = sizeof(block->bl_data)/sizeof(block->bl_data[0]) - block->used;
- if (space) {
- tocopy = MIN(left, space);
- memcpy(block->bl_data+block->used, &g_array_index(data, blockid_t, copied), tocopy*sizeof(blockid_t));
- block->used += tocopy;
- ibex_block_dirty(block);
- } else {
- new = ibex_block_get(store->blocks);
- newblock = ibex_block_read(store->blocks, new);
- newblock->next = head;
- tocopy = MIN(left, sizeof(block->bl_data)/sizeof(block->bl_data[0]));
- memcpy(newblock->bl_data, &g_array_index(data, blockid_t, copied), tocopy*sizeof(blockid_t));
- newblock->used = tocopy;
- block = newblock;
- head = new;
- ibex_block_dirty(newblock);
- }
- copied += tocopy;
- }
- return head;
-}
-
-static blockid_t
-disk_remove(struct _IBEXStore *store, blockid_t head, nameid_t data)
-{
- blockid_t node = head;
-
- d(printf("removing %d from %d\n", data, head));
- while (node) {
- struct _block *block = ibex_block_read(store->blocks, node);
- int i;
-
- for (i=0;i<block->used;i++) {
- if (block->bl_data[i] == data) {
- struct _block *start = ibex_block_read(store->blocks, head);
-
- start->used--;
- block->bl_data[i] = start->bl_data[start->used];
- if (start->used == 0) {
- struct _root *root = (struct _root *)ibex_block_read(store->blocks, 0);
- blockid_t new;
-
- d(printf("dropping block %d, new head = %d\n", head, start->next));
- new = start->next;
- start->next = root->free;
- root->free = head;
- head = new;
- ibex_block_dirty((struct _block *)root);
- }
- ibex_block_dirty(block);
- ibex_block_dirty(start);
- return head;
- }
- }
- node = block->next;
- }
- return head;
-}
-
-static void disk_free(struct _IBEXStore *store, blockid_t head)
-{
- blockid_t next;
- struct _block *block;
-
- while (head) {
- block = ibex_block_read(store->blocks, head);
- next = block->next;
- ibex_block_free(store->blocks, head);
- head = next;
- }
-}
-
-static gboolean
-disk_find(struct _IBEXStore *store, blockid_t head, nameid_t data)
-{
- blockid_t node = head;
-
- d(printf("finding %d from %d\n", data, head));
- while (node) {
- struct _block *block = ibex_block_read(store->blocks, node);
- int i;
-
- for (i=0;i<block->used;i++) {
- if (block->bl_data[i] == data) {
- return TRUE;
- }
- }
- node = block->next;
- }
- return FALSE;
-}
-
-static GArray *
-disk_get(struct _IBEXStore *store, blockid_t head)
-{
- GArray *result = g_array_new(0, 0, sizeof(nameid_t));
-
- while (head) {
- struct _block *block = ibex_block_read(store->blocks, head);
-
- d(printf("getting data from block %d\n", head));
-
- g_array_append_vals(result, block->bl_data, block->used);
- head = block->next;
- d(printf("next = %d\n", head));
- }
- return result;
-}
-
-void
-ibex_diskarray_dump(struct _memcache *blocks, blockid_t head)
-{
- blockid_t node = head;
-
- printf("dumping list %d\n", node);
- while (node) {
- struct _block *block = ibex_block_read(blocks, node);
- int i;
-
- printf(" block %d used %d\n ", node, block->used);
- for (i=0;i<block->used;i++) {
- printf(" %d", block->bl_data[i]);
- }
- printf("\n");
- node = block->next;
- }
-}
diff --git a/libibex/disktail.c b/libibex/disktail.c
deleted file mode 100644
index 88f5ca6c55..0000000000
--- a/libibex/disktail.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/* a disk based array storage class that stores the tails of data lists
- in common blocks */
-
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include <glib.h>
-#include <string.h>
-
-#include <stdio.h>
-
-#include "block.h"
-#include "index.h"
-
-#define d(x)
-/*#define DEBUG*/
-
-/* marker to define which root keys indicate a single length key */
-#define BLOCK_ONE (1<<BLOCK_BITS)
-
-/* tail blocks only contain tail data ... */
-/* and we pack it in, similar to the key data, only more compact */
-struct _tailblock {
- unsigned int next:32-BLOCK_BITS; /* only needs to point to block numbers */
- unsigned int used:BLOCK_BITS; /* how many entries are used */
- union {
- unsigned char offset[BLOCK_SIZE-4]; /* works upto blocksize of 1024 bytes */
- nameid_t data[(BLOCK_SIZE-4)/4];
- } tailblock_u;
-};
-#define tb_offset tailblock_u.offset
-#define tb_data tailblock_u.data
-
-/* map a tail index to a block index */
-#define TAIL_INDEX(b) ((b) & (BLOCK_SIZE-1))
-/* map a tail index to a block number */
-#define TAIL_BLOCK(b) ((b) & ~(BLOCK_SIZE-1))
-/* map a block + index to a tailid */
-#define TAIL_KEY(b, i) (((b) & ~(BLOCK_SIZE-1)) | ((i) & (BLOCK_SIZE-1)))
-
-#define TAIL_THRESHOLD ((BLOCK_SIZE-8)/6)
-
-static struct _IBEXStore *disk_create(struct _memcache *bc);
-static int disk_sync(struct _IBEXStore *store);
-static int disk_close(struct _IBEXStore *store);
-
-static blockid_t disk_add(struct _IBEXStore *store, blockid_t *head, blockid_t *tail, nameid_t data);
-static blockid_t disk_add_list(struct _IBEXStore *store, blockid_t *head, blockid_t *tail, GArray *data);
-static blockid_t disk_remove(struct _IBEXStore *store, blockid_t *head, blockid_t *tail, nameid_t data);
-static void disk_free(struct _IBEXStore *store, blockid_t head, blockid_t tail);
-
-static gboolean disk_find(struct _IBEXStore *store, blockid_t head, blockid_t tail, nameid_t data);
-static GArray *disk_get(struct _IBEXStore *store, blockid_t head, blockid_t tail);
-
-struct _IBEXStoreClass ibex_diskarray_class = {
- disk_create, disk_sync, disk_close,
- disk_add, disk_add_list,
- disk_remove, disk_free,
- disk_find, disk_get
-};
-
-
-static int
-tail_info(struct _tailblock *bucket, nameid_t tailid, blockid_t **startptr)
-{
- blockid_t *start, *end;
- int index;
-
- /* get start/end of area to zap */
- index = TAIL_INDEX(tailid);
- start = &bucket->tb_data[bucket->tb_offset[index]];
- if (index == 0) {
- end = &bucket->tb_data[sizeof(bucket->tb_data)/sizeof(bucket->tb_data[0])];
- } else {
- end = &bucket->tb_data[bucket->tb_offset[index-1]];
- }
- if (startptr)
- *startptr = start;
- return end-start;
-}
-
-/* compresses (or expand) the bucket entry, to the new size */
-static void
-tail_compress(struct _tailblock *bucket, int index, int newsize)
-{
- int i;
- blockid_t *start, *end, *newstart;
-
- /* get start/end of area to zap */
- start = &bucket->tb_data[bucket->tb_offset[index]];
- if (index == 0) {
- end = &bucket->tb_data[sizeof(bucket->tb_data)/sizeof(bucket->tb_data[0])];
- } else {
- end = &bucket->tb_data[bucket->tb_offset[index-1]];
- }
-
- if (end-start == newsize)
- return;
-
- /*
- XXXXXXXXXXXXXXXXXXXXXXXXXXXXXyyyyy
- 0 20 25
-
- newsize = 0
- end = 25
- newstart = 0
- start = 20
-
- newstart+(end-start)-newsize = 5
- i = start-newstart
-
- XXXXXXXXXXXXXXXXXXXXXXXXXXXXXyyyyy
- 0 20 25
-
- newsize = 2
- end = 25
- newstart = 0
- start = 20
-
- newstart+(end-start)-newsize = 3
- i = start-newstart + MIN(end-start, newsize)) = 22
-
- XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
- 5 25
- newsize = 5
- end = 25
- start = 25
- newstart = 5
-
- newstart+(end-start)-newsize = 0
- i = start-newstart = 20
-
- XXXXXXXXXXXXXXXXXXXXXXXXXXXXXyy
- 3 23 25
- newsize = 5
- end = 25
- start = 23
- newstart = 3
-
- newstart+(end-start)-newsize = 0
- i = start-newstart + MIN(end-start, newsize) = 22
-
- */
-
-
- /* fixup data */
- newstart = &bucket->tb_data[bucket->tb_offset[bucket->used-1]];
-
- g_assert(newstart+(end-start)-newsize <= &bucket->tb_data[sizeof(bucket->tb_data)/sizeof(bucket->tb_data[0])]);
- g_assert(newstart + (start-newstart) + MIN(end-start, newsize) <= &bucket->tb_data[sizeof(bucket->tb_data)/sizeof(bucket->tb_data[0])]);
- g_assert(newstart+(end-start)-newsize >= (blockid_t *) &bucket->tb_offset[bucket->used]);
- g_assert(newstart + (start-newstart) + MIN(end-start, newsize) >= (blockid_t *) &bucket->tb_offset[bucket->used]);
-
- memmove(newstart+(end-start)-newsize, newstart, ((start-newstart)+MIN(end-start, newsize)) * sizeof(blockid_t));
-
- /* fixup key pointers */
- for (i=index;i<bucket->used;i++) {
- bucket->tb_offset[i] += (end-start)-newsize;
- }
- ibex_block_dirty((struct _block *)bucket);
-}
-
-/*
- returns the number of blockid's free
-*/
-static int
-tail_space(struct _tailblock *tail)
-{
- if (tail->used == 0)
- return sizeof(tail->tb_data)/sizeof(tail->tb_data[0])-1;
-
- return &tail->tb_data[tail->tb_offset[tail->used-1]]
- - (blockid_t *)&tail->tb_offset[tail->used];
-}
-
-#if 0
-static void
-tail_dump(struct _memcache *blocks, blockid_t tailid)
-{
- int i;
- struct _tailblock *tail = (struct _tailblock *)ibex_block_read(blocks, TAIL_BLOCK(tailid));
-
- printf("Block %d, used %d\n", tailid, tail->used);
- for (i=0;i<sizeof(struct _tailblock)/sizeof(unsigned int);i++) {
- printf(" %08x", ((unsigned int *)tail)[i]);
- }
- printf("\n");
-}
-#endif
-
-static blockid_t
-tail_get(struct _memcache *blocks, int size)
-{
- blockid_t tailid;
- struct _tailblock *tail;
- int freeindex;
- blockid_t *end;
- int i, count = 0;
-
- d(printf("looking for a tail node with %d items in it\n", size));
-
- /* look for a node with enough space, if we dont find it fairly
- quickly, just quit. needs a better free algorithm i think ... */
- tailid = blocks->root.tail;
- while (tailid && count<5) {
- int space;
-
- d(printf(" checking tail node %d\n", tailid));
-
- tail = (struct _tailblock *)ibex_block_read(blocks, tailid);
-
- if (tail->used == 0) {
- /* assume its big enough ... */
- tail->used = 1;
- tail->tb_offset[0] = sizeof(tail->tb_data)/sizeof(tail->tb_data[0]) - size;
- d(printf("allocated %d (%d), used %d\n", tailid, tailid, tail->used));
- ibex_block_dirty((struct _block *)tail);
-
- g_assert(&tail->tb_offset[tail->used-1]
- < (unsigned char *) &tail->tb_data[tail->tb_offset[tail->used-1]]);
-
- return tailid;
- }
-
- g_assert(&tail->tb_offset[tail->used-1]
- < (unsigned char *) &tail->tb_data[tail->tb_offset[tail->used-1]]);
-
- /* see if we have a free slot first */
- freeindex = -1;
- end = &tail->tb_data[sizeof(tail->tb_data)/sizeof(tail->tb_data[0])];
- for (i=0;i<tail->used;i++) {
- if (end == &tail->tb_data[tail->tb_offset[i]]) {
- freeindex = i;
- break;
- }
- end = &tail->tb_data[tail->tb_offset[i]];
- }
-
- /* determine how much space we have available - incl any extra header we might need */
- space = ((char *)&tail->tb_data[tail->tb_offset[tail->used-1]])
- - ((char *)&tail->tb_offset[tail->used])
- /* FIXMEL work out why this is out a little bit */
- - 8;
- if (freeindex == -1)
- space -= sizeof(tail->tb_offset[0]);
-
- /* if we have enough, set it up, creating a new entry if necessary */
- /* for some really odd reason the compiler promotes this expression to unsigned, hence
- the requirement for the space>0 check ... */
- if (space>0 && space > size*sizeof(blockid_t)) {
- d(printf("space = %d, size = %d size*sizeof() = %d truth = %d\n", space, size, size*sizeof(blockid_t), space>size*sizeof(blockid_t)));
- if (freeindex == -1) {
- freeindex = tail->used;
- tail->tb_offset[tail->used] = tail->tb_offset[tail->used-1];
- tail->used++;
- }
- tail_compress(tail, freeindex, size);
- ibex_block_dirty((struct _block *)tail);
- d(printf("allocated %d (%d), used %d\n", tailid, TAIL_KEY(tailid, freeindex), tail->used));
- return TAIL_KEY(tailid, freeindex);
- }
- count++;
- tailid = block_location(tail->next);
- }
-
- d(printf("allocating new data node for tail data\n"));
- tailid = ibex_block_get(blocks);
- tail = (struct _tailblock *)ibex_block_read(blocks, tailid);
- tail->next = block_number(blocks->root.tail);
- blocks->root.tail = tailid;
- tail->used = 1;
- tail->tb_offset[0] = sizeof(tail->tb_data)/sizeof(tail->tb_data[0]) - size;
- ibex_block_dirty((struct _block *)tail);
- d(printf("allocated %d (%d), used %d\n", tailid, TAIL_KEY(tailid, 0), tail->used));
-
- g_assert(&tail->tb_offset[tail->used-1]
- < (unsigned char *) &tail->tb_data[tail->tb_offset[tail->used-1]]);
-
- return TAIL_KEY(tailid, 0);
-}
-
-static void
-tail_free(struct _memcache *blocks, blockid_t tailid)
-{
- struct _tailblock *tail;
-
- d(printf("freeing tail id %d\n", tailid));
-
- if (tailid == 0)
- return;
-
- tail = (struct _tailblock *)ibex_block_read(blocks, TAIL_BLOCK(tailid));
- d(printf(" tail %d used %d\n", tailid, tail->used));
- g_assert(tail->used);
- if (TAIL_INDEX(tailid) == tail->used - 1) {
- tail->used --;
- } else {
- tail_compress(tail, TAIL_INDEX(tailid), 0);
- }
- ibex_block_dirty((struct _block *)tail);
-}
-
-static struct _IBEXStore *disk_create(struct _memcache *bc)
-{
- struct _IBEXStore *store;
-
- store = g_malloc0(sizeof(*store));
- store->klass = &ibex_diskarray_class;
- store->blocks = bc;
-
- return store;
-}
-
-static int disk_sync(struct _IBEXStore *store)
-{
- /* no cache, nop */
- return 0;
-}
-
-static int disk_close(struct _IBEXStore *store)
-{
- g_free(store);
- return 0;
-}
-
-static blockid_t
-disk_add(struct _IBEXStore *store, blockid_t *headptr, blockid_t *tailptr, nameid_t data)
-{
- struct _block *block;
- struct _block *newblock;
- blockid_t new, head = *headptr /*, tail = *tailptr*/;
-
- g_error("Inbimplemented");
-
- if (head == 0) {
- head = ibex_block_get(store->blocks);
- }
- block = ibex_block_read(store->blocks, head);
-
- d(printf("adding record %d to block %d (next = %d)\n", data, head, block->next));
-
- if (block->used < sizeof(block->bl_data)/sizeof(block->bl_data[0])) {
- (printf("adding record into block %d %d\n", head, data));
- block->bl_data[block->used] = data;
- block->used++;
- ibex_block_dirty(block);
- return head;
- } else {
- new = ibex_block_get(store->blocks);
- newblock = ibex_block_read(store->blocks, new);
- newblock->next = block_number(head);
- newblock->bl_data[0] = data;
- newblock->used = 1;
- d(printf("adding record into new %d %d, next =%d\n", new, data, newblock->next));
- ibex_block_dirty(newblock);
- return new;
- }
-}
-
-static blockid_t
-disk_add_blocks_internal(struct _IBEXStore *store, blockid_t *headptr, blockid_t *tailptr, GArray *data)
-{
- blockid_t head = *headptr, tail = *tailptr, new;
- int tocopy;
- struct _tailblock *tailblock;
- struct _block *block, *newblock;
- int space, copied = 0, left;
-
- /* assumes this funciton is in control of any tail creation */
- g_assert(tail == 0);
-
- d(printf("Adding %d items to block list\n", data->len));
-
- if (head == 0) {
- head = ibex_block_get(store->blocks);
- d(printf("allocating new head %d\n", head));
- }
- block = ibex_block_read(store->blocks, head);
-
- /* ensure the first block is full before proceeding */
- space = sizeof(block->bl_data)/sizeof(block->bl_data[0]) - block->used;
- if (space) {
- tocopy = MIN(data->len, space);
- memcpy(block->bl_data+block->used, &g_array_index(data, blockid_t, copied), tocopy*sizeof(blockid_t));
- block->used += tocopy;
- ibex_block_dirty(block);
- copied = tocopy;
- d(printf("copied %d items to left over last node\n", tocopy));
- }
-
- while (copied < data->len) {
- left = data->len - copied;
- /* do we drop the rest in a tail? */
- if (left < TAIL_THRESHOLD) {
- d(printf("Storing remaining %d items in tail\n", left));
- tocopy = left;
- new = tail_get(store->blocks, tocopy);
- tailblock = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(new));
- memcpy(&tailblock->tb_data[tailblock->tb_offset[TAIL_INDEX(new)]],
- &g_array_index(data, blockid_t, copied), tocopy*sizeof(blockid_t));
- *tailptr = new;
- } else {
- new = ibex_block_get(store->blocks);
- newblock = (struct _block *)ibex_block_read(store->blocks, new);
- newblock->next = block_number(head);
- tocopy = MIN(left, sizeof(block->bl_data)/sizeof(block->bl_data[0]));
- d(printf("storing %d items in own block\n", tocopy));
- memcpy(newblock->bl_data, &g_array_index(data, blockid_t, copied), tocopy*sizeof(blockid_t));
- newblock->used = tocopy;
- block = newblock;
- head = new;
- ibex_block_dirty(newblock);
- }
- copied += tocopy;
- }
-
- *headptr = head;
- return head;
-}
-/*
- case 1:
- no head, no tail, adding a lot of data
- add blocks until the last, which goes in a tail node
- case 2:
- no head, no tail, adding a little data
- just add a tail node
- case 3:
- no head, tail, total exceeds a block
- merge as much as possible into new full blocks, then the remainder in the tail
- case 4:
- no head, tail, room in existing tail for data
- add new data to tail node
- case 5:
- no head, tail, no room in existing tail for data
- add a new tail node, copy data across, free old tail node
-*/
-
-static blockid_t
-disk_add_list(struct _IBEXStore *store, blockid_t *headptr, blockid_t *tailptr, GArray *data)
-{
- blockid_t new, head = *headptr, tail = *tailptr, *start;
- struct _tailblock *tailblock, *tailnew;
- int len;
- GArray *tmpdata = NULL;
-
- d(printf("adding %d items head=%d tail=%d\n", data->len, head, tail));
- if (data->len == 0)
- return head;
-
- /* store length=1 data in the tail pointer */
- if (head == 0 && tail == 0 && data->len == 1) {
- *headptr = BLOCK_ONE;
- *tailptr = g_array_index(data, blockid_t, 0);
- return BLOCK_ONE;
- }
- /* if we got length=1 data to append to, upgrade it to a real block list */
- if (head == BLOCK_ONE) {
- tmpdata = g_array_new(0, 0, sizeof(blockid_t));
- g_array_append_vals(tmpdata, data->data, data->len);
- g_array_append_val(tmpdata, tail);
- head = *headptr = 0;
- tail = *tailptr = 0;
- }
-
- /* if we have no head, then check the tail */
- if (head == 0) {
- if (tail == 0) {
- if (data->len >= TAIL_THRESHOLD) {
- /* add normally */
- head = disk_add_blocks_internal(store, headptr, tailptr, data);
- } else {
- /* else add to a tail block */
- new = tail_get(store->blocks, data->len);
- d(printf("adding %d items to a tail block %d\n", data->len, new));
- tailnew = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(new));
- memcpy(&tailnew->tb_data[tailnew->tb_offset[TAIL_INDEX(new)]],
- data->data, data->len*sizeof(blockid_t));
- *tailptr = new;
- ibex_block_dirty((struct _block *)tailnew);
- }
- } else {
- tailblock = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(tail));
- len = tail_info(tailblock, tail, &start);
- /* case 3 */
- if (len + data->len >= TAIL_THRESHOLD) {
- /* this is suboptimal, but should work - merge the tail data with
- our new data, and write it out */
- if (tmpdata == NULL) {
- tmpdata = g_array_new(0, 0, sizeof(blockid_t));
- g_array_append_vals(tmpdata, data->data, data->len);
- }
- g_array_append_vals(tmpdata, start, len);
- *tailptr = 0;
- tail_free(store->blocks, tail);
- head = disk_add_blocks_internal(store, headptr, tailptr, tmpdata);
- } else if (tail_space(tailblock) >= data->len) {
- /* can we just expand this in our node, or do we need a new one? */
- tail_compress(tailblock, TAIL_INDEX(tail), data->len + len);
- memcpy(&tailblock->tb_data[tailblock->tb_offset[TAIL_INDEX(tail)] + len],
- data->data, data->len * sizeof(blockid_t));
- ibex_block_dirty((struct _block *)tailblock);
- } else {
- /* we need to allocate a new tail node */
- new = tail_get(store->blocks, data->len + len);
- /* and copy the data across */
- tailnew = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(new));
- memcpy(&tailnew->tb_data[tailnew->tb_offset[TAIL_INDEX(new)]],
- start, len*sizeof(blockid_t));
- memcpy(&tailnew->tb_data[tailnew->tb_offset[TAIL_INDEX(new)]+len],
- data->data, data->len*sizeof(blockid_t));
- tail_free(store->blocks, tail);
- ibex_block_dirty((struct _block *)tailnew);
- *tailptr = new;
- }
- }
- } else {
- if (tail) {
- /* read/merge the tail with the new data, rewrite out.
- suboptimal, but it should be 'ok' ? */
- tailblock = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(tail));
- len = tail_info(tailblock, tail, &start);
- tmpdata = g_array_new(0, 0, sizeof(blockid_t));
- g_array_append_vals(tmpdata, start, len);
- g_array_append_vals(tmpdata, data->data, data->len);
- *tailptr = 0;
- tail_free(store->blocks, tail);
- head = disk_add_blocks_internal(store, headptr, tailptr, tmpdata);
- } else {
- head = disk_add_blocks_internal(store, headptr, tailptr, data);
- }
- }
- if (tmpdata)
- g_array_free(tmpdata, TRUE);
-
- return head;
-}
-
-static blockid_t
-disk_remove(struct _IBEXStore *store, blockid_t *headptr, blockid_t *tailptr, nameid_t data)
-{
- blockid_t head = *headptr, node = *headptr, tail = *tailptr;
- int i;
-
- /* special case for 1-item nodes */
- if (head == BLOCK_ONE) {
- if (tail == data) {
- *tailptr = 0;
- *headptr = 0;
- head = 0;
- }
- return head;
- }
-
- d(printf("removing %d from %d\n", data, head));
- while (node) {
- struct _block *block = ibex_block_read(store->blocks, node);
-
- for (i=0;i<block->used;i++) {
- if (block->bl_data[i] == data) {
- struct _block *start = ibex_block_read(store->blocks, head);
-
- d(printf("removing data from block %d\n ", head));
-
- start->used--;
- block->bl_data[i] = start->bl_data[start->used];
- if (start->used == 0) {
- blockid_t new;
-
- d(printf("dropping block %d, new head = %d\n", head, start->next));
- new = block_location(start->next);
- start->next = block_number(store->blocks->root.free);
- store->blocks->root.free = head;
- head = new;
- }
- ibex_block_dirty(block);
- ibex_block_dirty(start);
- *headptr = head;
- return head;
- }
- }
- node = block_location(block->next);
- }
-
- if (tail) {
- struct _tailblock *tailblock = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(tail));
- int len;
- blockid_t *start;
-
- len = tail_info(tailblock, tail, &start);
- for (i=0;i<len;i++) {
- if (start[i] == data) {
- for (;i<len-1;i++)
- start[i] = start[i+1];
- if (len == 1)
- *tailptr = 0;
- if (len == 1 && (tailblock->used-1) == TAIL_INDEX(tail)) {
- d(printf("dropping/unlinking tailblock %d (%d) used = %d\n",
- TAIL_BLOCK(tail), tail, tailblock->used));
- tailblock->used--;
- /* drop/unlink block? */
- ibex_block_dirty((struct _block *)tailblock);
- *tailptr = 0;
- } else {
- tail_compress(tailblock, TAIL_INDEX(tail), len-1);
- ibex_block_dirty((struct _block *)tailblock);
- }
- }
- }
-
- }
-
- return head;
-}
-
-static void disk_free(struct _IBEXStore *store, blockid_t head, blockid_t tail)
-{
- blockid_t next;
- struct _block *block;
-
- if (head == BLOCK_ONE)
- return;
-
- while (head) {
- d(printf("freeing block %d\n", head));
- block = ibex_block_read(store->blocks, head);
- next = block_location(block->next);
- ibex_block_free(store->blocks, head);
- head = next;
- }
- if (tail) {
- struct _tailblock *tailblock = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(tail));
- d(printf("freeing tail block %d (%d)\n", TAIL_BLOCK(tail), tail));
- tail_compress(tailblock, TAIL_INDEX(tail), 0);
- ibex_block_dirty((struct _block *)tailblock);
- }
-}
-
-static gboolean
-disk_find(struct _IBEXStore *store, blockid_t head, blockid_t tail, nameid_t data)
-{
- blockid_t node = head;
- int i;
-
- d(printf("finding %d from %d\n", data, head));
-
- if (head == BLOCK_ONE)
- return data == tail;
-
- /* first check full blocks */
- while (node) {
- struct _block *block = ibex_block_read(store->blocks, node);
-
- for (i=0;i<block->used;i++) {
- if (block->bl_data[i] == data) {
- return TRUE;
- }
- }
- node = block_location(block->next);
- }
-
- /* then check tail */
- if (tail) {
- struct _tailblock *tailblock = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(tail));
- int len;
- blockid_t *start;
-
- len = tail_info(tailblock, tail, &start);
- for (i=0;i<len;i++) {
- if (start[i] == data)
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-static GArray *
-disk_get(struct _IBEXStore *store, blockid_t head, blockid_t tail)
-{
- GArray *result = g_array_new(0, 0, sizeof(nameid_t));
-
- if (head == BLOCK_ONE) {
- g_array_append_val(result, tail);
- return result;
- }
-
- while (head) {
- struct _block *block = ibex_block_read(store->blocks, head);
-
- d(printf("getting data from block %d\n", head));
-
- g_array_append_vals(result, block->bl_data, block->used);
- head = block_location(block->next);
- d(printf("next = %d\n", head));
- }
-
- /* chuck on any tail data as well */
- if (tail) {
- struct _tailblock *tailblock;
- int len;
- blockid_t *start;
-
- tailblock = (struct _tailblock *)ibex_block_read(store->blocks, TAIL_BLOCK(tail));
- len = tail_info(tailblock, tail, &start);
- g_array_append_vals(result, start, len);
- }
- return result;
-}
-
-void ibex_diskarray_dump(struct _memcache *blocks, blockid_t head, blockid_t tail);
-
-void
-ibex_diskarray_dump(struct _memcache *blocks, blockid_t head, blockid_t tail)
-{
- blockid_t node = head;
-
- printf("dumping list %d tail %d\n", node, tail);
- if (head == BLOCK_ONE) {
- printf(" 1 length index: %d\n", tail);
- return;
- }
-
- while (node) {
- struct _block *block = ibex_block_read(blocks, node);
- int i;
-
- printf(" block %d used %d\n ", node, block->used);
- for (i=0;i<block->used;i++) {
- printf(" %d", block->bl_data[i]);
- }
- printf("\n");
- node = block_location(block->next);
- }
-
- printf("tail: ");
- if (tail) {
- struct _tailblock *tailblock;
- int len;
- blockid_t *start;
- int i;
-
- tailblock = (struct _tailblock *)ibex_block_read(blocks, TAIL_BLOCK(tail));
- len = tail_info(tailblock, tail, &start);
- for (i=0;i<len;i++)
- printf(" %d", start[i]);
- }
- printf("\n");
-}
-
-#if 0
-int main(int argc, char **argv)
-{
- struct _memcache *bc;
- struct _IBEXStore *disk;
- int i;
- blockid_t data[100];
- GArray adata;
- blockid_t head=0, tail=0;
-
- for (i=0;i<100;i++) {
- data[i] = i;
- }
-
- bc = ibex_block_cache_open("index.db", O_CREAT|O_RDWR, 0600);
- disk = ibex_diskarray_class.create(bc);
-
- head = 0;
- tail = 0;
- adata.data = data;
- adata.len = 70;
- for (i=0;i<100;i++) {
- printf("Loop %d\n", i);
- ibex_diskarray_class.add_list(disk, &head, &tail, &adata);
- ibex_diskarray_dump(bc, head, tail);
- }
-
-#if 0
- for (i=1;i<100;i++) {
- printf("inserting %d items\n", i);
- adata.data = data;
- adata.len = i;
- head=0;
- tail=0;
- ibex_diskarray_class.add_list(disk, &head, &tail, &adata);
- ibex_diskarray_dump(bc, head, tail);
- }
-#endif
- ibex_diskarray_class.close(disk);
- ibex_block_cache_close(bc);
- return 0;
-}
-
-#endif
diff --git a/libibex/dumpindex.c b/libibex/dumpindex.c
deleted file mode 100644
index 410a7083d6..0000000000
--- a/libibex/dumpindex.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- Dump the hash tables from an ibex file.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "ibex_internal.h"
-
-extern void ibex_hash_dump(struct _IBEXIndex *index);
-
-static void
-index_iterate(struct _IBEXIndex *index)
-{
- struct _IBEXCursor *idc;
- int len;
- char *key;
- int total = 0, totallen = 0;
-
- idc = index->klass->get_cursor(index);
- key = idc->klass->next_key(idc, &len);
- while (len) {
- total++;
- totallen += len;
- printf("%s\n", key);
- g_free(key);
- key = idc->klass->next_key(idc, &len);
- }
- g_free(key);
-
- idc->klass->close(idc);
-
- printf("Iterate Totals: %d items, total bytes %d\n", total, totallen);
-}
-
-int main(int argc, char **argv)
-{
- ibex *ib;
-
-#ifdef ENABLE_THREADS
- g_thread_init(0);
-#endif
-
- if (argc != 2) {
- printf("Usage: %s ibexfile\n", argv[0]);
- return 1;
- }
- ib = ibex_open(argv[1], O_RDONLY, 0);
- if (ib == NULL) {
- perror("Opening ibex file\n");
- return 1;
- }
-
- ibex_hash_dump(ib->words->wordindex);
- ibex_hash_dump(ib->words->nameindex);
-
- index_iterate(ib->words->wordindex);
- index_iterate(ib->words->nameindex);
-
- ibex_close(ib);
-
- return 0;
-}
diff --git a/libibex/hash.c b/libibex/hash.c
deleted file mode 100644
index f79ee8f55e..0000000000
--- a/libibex/hash.c
+++ /dev/null
@@ -1,859 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/* hash based index mechanism */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "block.h"
-#include "index.h"
-
-#define d(x)
-
-#define HASH_SIZE (1024)
-#define KEY_THRESHOLD (sizeof(struct _hashkey) + 4) /* minimum number of free bytes we worry about
- maintaining free blocks for */
-#define ARRAY_LEN(a) (sizeof(a)/sizeof(a[0]))
-
-typedef guint32 hashid_t;
-
-struct _HASHCursor {
- struct _IBEXCursor cursor;
-
- hashid_t key;
- hashid_t block;
- unsigned int index;
- unsigned int size;
-};
-
-static struct _IBEXIndex *hash_create(struct _memcache *bc, int size);
-static struct _IBEXIndex *hash_open(struct _memcache *bc, blockid_t root);
-static int hash_sync(struct _IBEXIndex *index);
-static int hash_close(struct _IBEXIndex *index);
-
-static hashid_t hash_find(struct _IBEXIndex *index, const char *key, int keylen);
-static void hash_remove(struct _IBEXIndex *index, const char *key, int keylen);
-static hashid_t hash_insert(struct _IBEXIndex *index, const char *key, int keylen);
-static char *hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len);
-static void hash_set_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t blockid, blockid_t tail);
-static blockid_t hash_get_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t *tail);
-static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index);
-
-static struct _IBEXCursor *hash_cursor_create(struct _IBEXIndex *);
-static void hash_cursor_close(struct _IBEXCursor *);
-static guint32 hash_cursor_next(struct _IBEXCursor *);
-static char *hash_cursor_next_key(struct _IBEXCursor *, int *keylenptr);
-
-struct _IBEXIndexClass ibex_hash_class = {
- hash_create, hash_open,
- hash_sync, hash_close,
- hash_find,
- hash_remove,
- hash_insert,
- hash_get_key,
- hash_set_data_block,
- hash_get_data_block,
- hash_get_cursor,
-};
-
-struct _IBEXCursorClass ibex_hash_cursor_class = {
- hash_cursor_close,
- hash_cursor_next,
- hash_cursor_next_key
-};
-
-/* the reason we have the tail here is that otherwise we need to
- have a 32 bit blockid for the root node; which would make this
- structure the same size anyway, with about 24 wasted bits */
-struct _hashkey {
- blockid_t next; /* next in hash chain */
- blockid_t tail;
- unsigned int root:32-BLOCK_BITS;
- unsigned int keyoffset:BLOCK_BITS;
-};
-
-struct _hashblock {
- unsigned int next:32-BLOCK_BITS; /* next block, linked list of all key blocks: block number */
- unsigned int used:BLOCK_BITS; /* number of elements used */
- union {
- struct _hashkey keys[(BLOCK_SIZE-4)/sizeof(struct _hashkey)];
- char keydata[BLOCK_SIZE-4];
- } hashblock_u;
-};
-#define hb_keys hashblock_u.keys
-#define hb_keydata hashblock_u.keydata
-
-/* size of block overhead + 2 key block overhead */
-#define MAX_KEYLEN (BLOCK_SIZE - 4 - 12 - 12)
-
-/* root block for a hash index */
-struct _hashroot {
- hashid_t free; /* free list */
- guint32 size; /* how big the hash table is */
- hashid_t keys; /* linked list of blocks */
- hashid_t table[(BLOCK_SIZE-8)/sizeof(hashid_t)]; /* pointers to blocks of pointers */
-};
-
-struct _hashtableblock {
- hashid_t buckets[BLOCK_SIZE/sizeof(hashid_t)];
-};
-
-/* map a hash index to a block index */
-#define HASH_INDEX(b) ((b) & (BLOCK_SIZE-1))
-/* map a hash index to a block number */
-#define HASH_BLOCK(b) ((b) & ~(BLOCK_SIZE-1))
-/* map a block + index to a hash key */
-#define HASH_KEY(b, i) (((b) & ~(BLOCK_SIZE-1)) | ((i) & (BLOCK_SIZE-1)))
-
-/* taken from tdb/gdbm */
-static unsigned int hash_key(const unsigned char *key, int keylen)
-{
- char *newkey;
- newkey = alloca(keylen+1);
- memcpy(newkey, key, keylen);
- newkey[keylen]=0;
- return g_str_hash(newkey);
-#if 0
- unsigned int value; /* Used to compute the hash value. */
- unsigned int i; /* Used to cycle through random values. */
-
- /* Set the initial value from the key size. */
- value = 0x238F13AF * keylen;
- for (i=0; i < keylen; i++) {
- value = (value + (key[i] << (i*5 % 24)));
- }
-
- value = (1103515243 * value + 12345);
-
- return value;
-#endif
-}
-
-/* create a new hash table, return a pointer to its root block */
-static struct _IBEXIndex *
-hash_create(struct _memcache *bc, int size)
-{
- blockid_t root, block;
- struct _hashroot *hashroot;
- int i;
- struct _hashtableblock *table;
- struct _IBEXIndex *index;
-
- g_assert(size<=10240);
-
- d(printf("initialising hash table, size = %d\n", size));
-
- index = g_malloc0(sizeof(*index));
- index->blocks = bc;
- index->klass = &ibex_hash_class;
- root = ibex_block_get(bc);
- index->root = root;
- d(printf(" root = %d\n", root));
- hashroot = (struct _hashroot *)ibex_block_read(bc, root);
- hashroot->free = 0;
- hashroot->size = size;
- ibex_block_dirty((struct _block *)hashroot);
- for (i=0;i<size/(BLOCK_SIZE/sizeof(blockid_t));i++) {
- d(printf("initialising hash table index block %d\n", i));
- block = hashroot->table[i] = ibex_block_get(bc);
- table = (struct _hashtableblock *)ibex_block_read(bc, block);
- memset(table, 0, sizeof(table));
- ibex_block_dirty((struct _block *)table);
- }
-
- return index;
-}
-
-static struct _IBEXIndex *
-hash_open(struct _memcache *bc, blockid_t root)
-{
- struct _IBEXIndex *index;
-
- /* FIXME: check a 'magic', and the root for validity */
-
- index = g_malloc0(sizeof(*index));
- index->blocks = bc;
- index->root = root;
- index->klass = &ibex_hash_class;
-
- return index;
-}
-
-
-static int hash_sync(struct _IBEXIndex *index)
-{
- /* nop, index always synced on disk (at least, to blocks) */
- return 0;
-}
-
-static int hash_close(struct _IBEXIndex *index)
-{
-#ifdef INDEX_STAT
- printf("Performed %d lookups, average %f depth\n", index->lookups, (double)index->lookup_total/index->lookups);
-#endif
- g_free(index);
- return 0;
-}
-
-/* get an iterator class */
-static struct _IBEXCursor *hash_get_cursor(struct _IBEXIndex *index)
-{
- return hash_cursor_create(index);
-}
-
-/* convert a hashbucket id into a name */
-static char *
-hash_get_key(struct _IBEXIndex *index, hashid_t hashbucket, int *len)
-{
- struct _hashblock *bucket;
- int ind;
- char *ret, *start, *end;
-
- if (hashbucket == 0) {
- if (len)
- *len = 0;
- return g_strdup("");
- }
-
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(hashbucket));
- ind = HASH_INDEX(hashbucket);
-
- g_assert(ind < bucket->used);
-
- start = &bucket->hb_keydata[bucket->hb_keys[ind].keyoffset];
- if (ind == 0) {
- end = &bucket->hb_keydata[sizeof(bucket->hb_keydata)/sizeof(bucket->hb_keydata[0])];
- } else {
- end = &bucket->hb_keydata[bucket->hb_keys[ind-1].keyoffset];
- }
-
- ret = g_malloc(end-start+1);
- memcpy(ret, start, end-start);
- ret[end-start]=0;
- if (len)
- *len = end-start;
- return ret;
-}
-
-/* sigh, this is fnugly code ... */
-static hashid_t
-hash_find(struct _IBEXIndex *index, const char *key, int keylen)
-{
- struct _hashroot *hashroot;
- guint32 hash;
- int hashentry;
- blockid_t hashtable;
- hashid_t hashbucket;
- struct _hashtableblock *table;
-
- g_assert(index != 0);
- g_assert(index->root != 0);
-
- d(printf("finding hash %.*s\n", keylen, key));
-
- /* truncate the key to the maximum size */
- if (keylen > MAX_KEYLEN)
- keylen = MAX_KEYLEN;
-
- hashroot = (struct _hashroot *)ibex_block_read(index->blocks, index->root);
-
- /* find the table containing this entry */
- hash = hash_key(key, keylen) % hashroot->size;
- hashtable = hashroot->table[hash / (BLOCK_SIZE/sizeof(blockid_t))];
- g_assert(hashtable != 0);
- table = (struct _hashtableblock *)ibex_block_read(index->blocks, hashtable);
- hashentry = hash % (BLOCK_SIZE/sizeof(blockid_t));
- /* and its bucket */
- hashbucket = table->buckets[hashentry];
-
-#ifdef INDEX_STAT
- index->lookups++;
-#endif
- /* go down the bucket chain, reading each entry till we are done ... */
- while (hashbucket != 0) {
- struct _hashblock *bucket;
- char *start, *end;
- int ind;
-
-#ifdef INDEX_STAT
- index->lookup_total++;
-#endif
-
- d(printf(" checking bucket %d\n", hashbucket));
-
- /* get the real bucket id from the hashbucket id */
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(hashbucket));
- /* and get the key number within the block */
- ind = HASH_INDEX(hashbucket);
-
- g_assert(ind < bucket->used);
-
- start = &bucket->hb_keydata[bucket->hb_keys[ind].keyoffset];
- if (ind == 0) {
- end = &bucket->hb_keydata[sizeof(bucket->hb_keydata)/sizeof(bucket->hb_keydata[0])];
- } else {
- end = &bucket->hb_keydata[bucket->hb_keys[ind-1].keyoffset];
- }
- if ( (end-start) == keylen
- && memcmp(start, key, keylen) == 0) {
- return hashbucket;
- }
- hashbucket = bucket->hb_keys[ind].next;
- }
- return 0;
-}
-
-static int
-hash_info(struct _hashblock *bucket, int index)
-{
- char *start, *end;
-
- g_assert(index < bucket->used);
-
- start = &bucket->hb_keydata[bucket->hb_keys[index].keyoffset];
- if (index == 0) {
- end = &bucket->hb_keydata[sizeof(bucket->hb_keydata)/sizeof(bucket->hb_keydata[0])];
- } else {
- end = &bucket->hb_keydata[bucket->hb_keys[index-1].keyoffset];
- }
-
- return end-start;
-}
-
-
-/* TODO: get rid of hash_compress/remove and just have one a-la the disktail code */
-
-/* compresses the bucket 'bucket', removing data
- at index 'index' */
-static void
-hash_compress(struct _hashblock *bucket, int index)
-{
- int i;
- char *start, *end, *newstart;
-
- /* get start/end of area to zap */
- start = &bucket->hb_keydata[bucket->hb_keys[index].keyoffset];
- if (index == 0) {
- end = &bucket->hb_keydata[sizeof(bucket->hb_keydata)/sizeof(bucket->hb_keydata[0])];
- } else {
- end = &bucket->hb_keydata[bucket->hb_keys[index-1].keyoffset];
- }
-
- if (start == end)
- return;
-
- /* fixup data */
- newstart = &bucket->hb_keydata[bucket->hb_keys[bucket->used-1].keyoffset];
- memmove(newstart+(end-start), newstart, start-newstart);
-
- /* fixup key pointers */
- for (i=index;i<bucket->used;i++) {
- bucket->hb_keys[i].keyoffset += (end-start);
- }
- ibex_block_dirty((struct _block *)bucket);
-}
-
-/* make room 'len' for the key 'index' */
-/* assumes key 'index' is already empty (0 length) */
-static void
-hash_expand(struct _hashblock *bucket, int index, int len)
-{
- int i;
- char *end, *newstart;
-
- /* get start/end of area to zap */
- if (index == 0) {
- end = &bucket->hb_keydata[sizeof(bucket->hb_keydata)/sizeof(bucket->hb_keydata[0])];
- } else {
- end = &bucket->hb_keydata[bucket->hb_keys[index-1].keyoffset];
- }
-
- /* fixup data */
- newstart = &bucket->hb_keydata[bucket->hb_keys[bucket->used-1].keyoffset];
- memmove(newstart-len, newstart, end-newstart);
-
- /* fixup key pointers */
- for (i=index;i<bucket->used;i++) {
- bucket->hb_keys[i].keyoffset -= len;
- }
- ibex_block_dirty((struct _block *)bucket);
-}
-
-static void
-hash_remove(struct _IBEXIndex *index, const char *key, int keylen)
-{
- struct _hashroot *hashroot;
- guint32 hash;
- int hashentry;
- blockid_t hashtable;
- hashid_t hashbucket, hashprev;
- struct _hashtableblock *table;
-
- g_assert(index != 0);
- g_assert(index->root != 0);
-
- d(printf("removing hash %.*s\n", keylen, key));
-
- /* truncate the key to the maximum size */
- if (keylen > MAX_KEYLEN)
- keylen = MAX_KEYLEN;
-
- hashroot = (struct _hashroot *)ibex_block_read(index->blocks, index->root);
-
- /* find the table containing this entry */
- hash = hash_key(key, keylen) % hashroot->size;
- hashtable = hashroot->table[hash / (BLOCK_SIZE/sizeof(blockid_t))];
- table = (struct _hashtableblock *)ibex_block_read(index->blocks, hashtable);
- hashentry = hash % (BLOCK_SIZE/sizeof(blockid_t));
- /* and its bucket */
- hashbucket = table->buckets[hashentry];
-
- /* go down the bucket chain, reading each entry till we are done ... */
- hashprev = 0;
- while (hashbucket != 0) {
- struct _hashblock *bucket;
- char *start, *end;
- int ind;
-
- d(printf(" checking bucket %d\n", hashbucket));
-
- /* get the real bucket id from the hashbucket id */
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(hashbucket));
- /* and get the key number within the block */
- ind = HASH_INDEX(hashbucket);
-
- g_assert(ind < bucket->used);
-
- start = &bucket->hb_keydata[bucket->hb_keys[ind].keyoffset];
- if (ind == 0) {
- end = &bucket->hb_keydata[sizeof(bucket->hb_keydata)/sizeof(bucket->hb_keydata[0])];
- } else {
- end = &bucket->hb_keydata[bucket->hb_keys[ind-1].keyoffset];
- }
- if ( (end-start) == keylen
- && memcmp(start, key, keylen) == 0) {
- struct _hashblock *prevbucket;
-
- if (hashprev == 0) {
- /* unlink from hash chain */
- table->buckets[hashentry] = bucket->hb_keys[HASH_INDEX(hashbucket)].next;
- /* link into free list */
- bucket->hb_keys[HASH_INDEX(hashbucket)].next = hashroot->free;
- hashroot->free = hashbucket;
- /* compress away data */
- hash_compress(bucket, HASH_INDEX(hashbucket));
- ibex_block_dirty((struct _block *)bucket);
- ibex_block_dirty((struct _block *)table);
- ibex_block_dirty((struct _block *)hashroot);
- } else {
- prevbucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(hashprev));
- prevbucket->hb_keys[HASH_INDEX(hashprev)].next =
- bucket->hb_keys[ind].next;
- /* link into free list */
- bucket->hb_keys[ind].next = hashroot->free;
- hashroot->free = hashbucket;
- /* compress entry */
- hash_compress(bucket, ind);
- ibex_block_dirty((struct _block *)bucket);
- ibex_block_dirty((struct _block *)prevbucket);
- ibex_block_dirty((struct _block *)hashroot);
- }
- return;
- }
- hashprev = hashbucket;
- hashbucket = bucket->hb_keys[ind].next;
- }
-}
-
-/* set where the datablock is located */
-static void
-hash_set_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t blockid, blockid_t tail)
-{
- struct _hashblock *bucket;
-
- d(printf("setting data block hash %d to %d tail %d\n", keyid, blockid, tail));
-
- /* map to a block number */
- g_assert((blockid & (BLOCK_SIZE-1)) == 0);
- blockid >>= BLOCK_BITS;
-
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(keyid));
- if (bucket->hb_keys[HASH_INDEX(keyid)].root != blockid
- || bucket->hb_keys[HASH_INDEX(keyid)].tail != tail) {
- bucket->hb_keys[HASH_INDEX(keyid)].tail = tail;
- bucket->hb_keys[HASH_INDEX(keyid)].root = blockid;
- ibex_block_dirty((struct _block *)bucket);
- }
-}
-
-static blockid_t
-hash_get_data_block(struct _IBEXIndex *index, hashid_t keyid, blockid_t *tail)
-{
- struct _hashblock *bucket;
-
- d(printf("getting data block hash %d\n", keyid));
-
- if (keyid == 0) {
- if (tail)
- *tail = 0;
- return 0;
- }
-
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(keyid));
- if (tail)
- *tail = bucket->hb_keys[HASH_INDEX(keyid)].tail;
- return bucket->hb_keys[HASH_INDEX(keyid)].root << BLOCK_BITS;
-}
-
-static hashid_t
-hash_insert(struct _IBEXIndex *index, const char *key, int keylen)
-{
- struct _hashroot *hashroot;
- guint32 hash;
- int hashentry;
- blockid_t hashtable;
- hashid_t hashbucket, keybucket, keyprev, keyfree;
- struct _hashtableblock *table;
- struct _hashblock *bucket;
- int count;
-
- g_assert(index != 0);
- g_assert(index->root != 0);
-
- /* truncate the key to the maximum size */
- if (keylen > MAX_KEYLEN)
- keylen = MAX_KEYLEN;
-
- d(printf("inserting hash %.*s\n", keylen, key));
-
- hashroot = (struct _hashroot *)ibex_block_read(index->blocks, index->root);
-
- /* find the table containing this entry */
- hash = hash_key(key, keylen) % hashroot->size;
- hashtable = hashroot->table[hash / (BLOCK_SIZE/sizeof(blockid_t))];
- table = (struct _hashtableblock *)ibex_block_read(index->blocks, hashtable);
- hashentry = hash % (BLOCK_SIZE/sizeof(blockid_t));
- /* and its bucket */
- hashbucket = table->buckets[hashentry];
-
- /* now look for a free slot, first try the free list */
- /* but dont try too hard if our key is just too long ... so just scan upto
- 4 blocks, but if we dont find a space, tough ... */
- keybucket = hashroot->free;
- keyprev = 0;
- count = 0;
- while (keybucket && count<4) {
- int space;
-
- d(printf(" checking free %d\n", keybucket));
-
- /* read the bucket containing this free key */
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(keybucket));
-
- /* check if there is enough space for the key */
- space = &bucket->hb_keydata[bucket->hb_keys[bucket->used-1].keyoffset]
- - (char *)&bucket->hb_keys[bucket->used];
- if (space >= keylen) {
- hash_expand(bucket, HASH_INDEX(keybucket), keylen);
- memcpy(&bucket->hb_keydata[bucket->hb_keys[HASH_INDEX(keybucket)].keyoffset],
- key, keylen);
-
- /* check if there is free space still in this node, and there are no other empty blocks */
- keyfree = bucket->hb_keys[HASH_INDEX(keybucket)].next;
- if ((space-keylen) >= KEY_THRESHOLD) {
- int i;
- int head = ARRAY_LEN(bucket->hb_keydata);
- int found = FALSE;
-
- for (i=0;i<bucket->used;i++) {
- if (bucket->hb_keys[i].keyoffset == head) {
- /* already have a free slot in this block, leave it */
- found = TRUE;
- break;
- }
- head = bucket->hb_keys[i].keyoffset;
- }
- if (!found) {
- /* we should link in a new free slot for this node */
- bucket->hb_keys[bucket->used].next = bucket->hb_keys[HASH_INDEX(keybucket)].next;
- bucket->hb_keys[bucket->used].keyoffset = bucket->hb_keys[bucket->used-1].keyoffset;
- keyfree = HASH_KEY(HASH_BLOCK(keybucket), bucket->used);
- bucket->used++;
- }
- }
- /* link 'keyfree' back to the parent ... */
- if (keyprev == 0) {
- hashroot->free = keyfree;
- ibex_block_dirty((struct _block *)hashroot);
- } else {
- struct _hashblock *prevbucket;
- prevbucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(keyprev));
- prevbucket->hb_keys[HASH_INDEX(keyprev)].next = keyfree;
- ibex_block_dirty((struct _block *)prevbucket);
- }
-
- /* link into the hash chain */
- bucket->hb_keys[HASH_INDEX(keybucket)].next = hashbucket;
- bucket->hb_keys[HASH_INDEX(keybucket)].root = 0;
- bucket->hb_keys[HASH_INDEX(keybucket)].tail = 0;
- table->buckets[hashentry] = keybucket;
- ibex_block_dirty((struct _block *)table);
- ibex_block_dirty((struct _block *)bucket);
-
- d(printf(" new key id %d\n", keybucket));
- d(printf(" new free id %d\n", hashroot->free));
-
- return keybucket;
- }
-
- count++;
- keyprev = keybucket;
- keybucket = bucket->hb_keys[HASH_INDEX(keybucket)].next;
- }
-
- /* else create a new block ... */
- keybucket = ibex_block_get(index->blocks);
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, keybucket);
-
- d(printf("creating new key bucket %d\n", keybucket));
-
- memset(bucket, 0, sizeof(*bucket));
-
- bucket->used = 2;
- /* first block, is the new key */
- bucket->hb_keys[0].keyoffset = ARRAY_LEN(bucket->hb_keydata) - keylen;
- memcpy(&bucket->hb_keydata[bucket->hb_keys[0].keyoffset], key, keylen);
- bucket->hb_keys[0].next = hashbucket;
- bucket->hb_keys[0].root = 0;
- bucket->hb_keys[0].tail = 0;
-
- table->buckets[hashentry] = HASH_KEY(keybucket, 0);
-
- /* next block is a free block, link into free list */
- bucket->hb_keys[1].keyoffset = bucket->hb_keys[0].keyoffset;
- bucket->hb_keys[1].next = hashroot->free;
- hashroot->free = HASH_KEY(keybucket, 1);
-
- /* link new block into keys list */
- bucket->next = block_number(hashroot->keys);
- hashroot->keys = keybucket;
-
- ibex_block_dirty((struct _block *)hashroot);
- ibex_block_dirty((struct _block *)table);
- ibex_block_dirty((struct _block *)bucket);
-
- d(printf(" new key id %d\n", HASH_KEY(keybucket, 0)));
- d(printf(" new free id %d\n", hashroot->free));
-
- return HASH_KEY(keybucket, 0);
-}
-
-/* hash cursor functions */
-static struct _IBEXCursor *
-hash_cursor_create(struct _IBEXIndex *idx)
-{
- struct _HASHCursor *idc;
- struct _hashroot *hashroot;
-
- idc = g_malloc(sizeof(*idc));
- idc->cursor.klass = &ibex_hash_cursor_class;
- idc->cursor.index = idx;
- idc->key = 0;
- idc->index = 0;
-
- hashroot = (struct _hashroot *)ibex_block_read(idx->blocks, idx->root);
- idc->size = hashroot->size;
- idc->block = hashroot->keys;
-
- return &idc->cursor;
-}
-
-static void
-hash_cursor_close(struct _IBEXCursor *idc)
-{
- g_free(idc);
-}
-
-static guint32
-hash_cursor_next(struct _IBEXCursor *idc)
-{
- struct _HASHCursor *hc = (struct _HASHCursor *)idc;
- struct _hashblock *bucket;
-
- while (hc->block != 0) {
- bucket = (struct _hashblock *)ibex_block_read(idc->index->blocks, hc->block);
- while (hc->index < bucket->used) {
- if (hash_info(bucket, hc->index) > 0) {
- hc->key = HASH_KEY(hc->block, hc->index);
- hc->index++;
- if (hc->index == bucket->used) {
- hc->index = 0;
- hc->block = block_location(bucket->next);
- }
- return hc->key;
- }
- hc->index++;
- }
- hc->index = 0;
- hc->block = block_location(bucket->next);
- }
-
- return hc->block;
-}
-
-static char *
-hash_cursor_next_key(struct _IBEXCursor *idc, int *keylenptr)
-{
- /* TODO: this could be made slightly mroe efficient going to the structs direct.
- but i'm lazy today */
- return idc->index->klass->get_key(idc->index, idc->klass->next(idc), keylenptr);
-}
-
-/* debug */
-void ibex_hash_dump(struct _IBEXIndex *index);
-static void ibex_hash_dump_rec(struct _IBEXIndex *index, int *words, int *wordslen);
-
-void ibex_hash_dump(struct _IBEXIndex *index)
-{
- int words = 0, wordslen=0;
-
- ibex_hash_dump_rec(index, &words, &wordslen);
-
- printf("Total words = %d, bytes = %d, ave length = %f\n", words, wordslen, (double)wordslen/(double)words);
-}
-
-
-static void
-ibex_hash_dump_rec(struct _IBEXIndex *index, int *words, int *wordslen)
-{
- int i;
- struct _hashtableblock *table;
- struct _hashblock *bucket;
- struct _hashroot *hashroot;
- blockid_t hashtable;
- hashid_t hashbucket;
- extern void ibex_diskarray_dump(struct _memcache *blocks, blockid_t head, blockid_t tail);
-
-
- printf("Walking hash tree:\n");
- hashroot = (struct _hashroot *)ibex_block_read(index->blocks, index->root);
- for (i=0;i<hashroot->size;i++) {
- printf("Hash table chain: %d\n", i);
- hashtable = hashroot->table[i / (BLOCK_SIZE/sizeof(blockid_t))];
- table = (struct _hashtableblock *)ibex_block_read(index->blocks, hashtable);
- hashbucket = table->buckets[i % (BLOCK_SIZE/sizeof(blockid_t))];
- while (hashbucket) {
- int len;
-
- *words = *words + 1;
-
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(hashbucket));
- printf(" bucket %d: [used %d]", hashbucket, bucket->used);
- if (HASH_INDEX(hashbucket) == 0) {
- len = ARRAY_LEN(bucket->hb_keydata) -
- bucket->hb_keys[HASH_INDEX(hashbucket)].keyoffset;
- } else {
- len = bucket->hb_keys[HASH_INDEX(hashbucket)-1].keyoffset -
- bucket->hb_keys[HASH_INDEX(hashbucket)].keyoffset;
- }
- printf("'%.*s' = %d next=%d\n", len, &bucket->hb_keydata[bucket->hb_keys[HASH_INDEX(hashbucket)].keyoffset],
- bucket->hb_keys[HASH_INDEX(hashbucket)].root,
- bucket->hb_keys[HASH_INDEX(hashbucket)].next);
-
- *wordslen = *wordslen + len;
-
- ibex_diskarray_dump(index->blocks,
- bucket->hb_keys[HASH_INDEX(hashbucket)].root << BLOCK_BITS,
- bucket->hb_keys[HASH_INDEX(hashbucket)].tail);
-
- hashbucket = bucket->hb_keys[HASH_INDEX(hashbucket)].next;
- }
- /* make sure its still in the cache */
- hashroot = (struct _hashroot *)ibex_block_read(index->blocks, index->root);
- }
-
- hashbucket = hashroot->free;
- printf("Dumping free lists ..\n");
- while (hashbucket) {
- printf(" %d", hashbucket);
- bucket = (struct _hashblock *)ibex_block_read(index->blocks, HASH_BLOCK(hashbucket));
- hashbucket = bucket->hb_keys[HASH_INDEX(hashbucket)].next;
- }
- printf("\n");
-}
-
-#if 0
-int main(int argc, char **argv)
-{
- struct _memcache *bc;
- struct _IBEXIndex *hash;
- int i;
-
- bc = ibex_block_cache_open("index.db", O_CREAT|O_RDWR, 0600);
- hash = ibex_hash_class.create(bc, 1024);
- for (i=0;i<10000;i++) {
- char key[16];
- sprintf(key, "key %d", i);
- ibex_hash_class.insert(hash, key, strlen(key));
- }
-
- for (i=500;i<1000;i++) {
- char key[16];
- sprintf(key, "key %d", i);
- ibex_hash_class.remove(hash, key, strlen(key));
- }
-
- for (i=500;i<1000;i++) {
- char key[16];
- sprintf(key, "key %d", i);
- ibex_hash_class.insert(hash, key, strlen(key));
- }
-
-
- ibex_hash_dump(hash);
-
- for (i=0;i<2000;i++) {
- char key[16], *lookup;
- hashid_t keyid;
- blockid_t root, tail;
-
- sprintf(key, "key %d", i);
- keyid = ibex_hash_class.find(hash, key, strlen(key));
- lookup = ibex_hash_class.get_key(hash, keyid, 0);
- root = ibex_hash_class.get_data(hash, keyid, &tail);
- printf("key %s = %d = '%s' root:%d tail:%d \n", key, keyid, lookup, root, tail);
- g_free(lookup);
- }
-
- ibex_hash_class.close(hash);
- ibex_block_cache_close(bc);
- return 0;
-}
-
-#endif
diff --git a/libibex/ibex.h b/libibex/ibex.h
deleted file mode 100644
index 03635e275d..0000000000
--- a/libibex/ibex.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-#ifndef IBEX_H
-#define IBEX_H
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <glib.h>
-
-struct ibex;
-typedef struct ibex ibex;
-
-/* All functions that can fail set errno and return NULL or -1 on
- * failure.
- */
-
-/* Open the named ibex index file. If CREATE is true, create the file
- * if it doesn't already exist.
- */
-ibex *ibex_open (char *file, int flags, int mode);
-
-/* Write the ibex to disk. */
-int ibex_write (ibex *ib);
-
-/* only save if ibex has changed. */
-int ibex_save (ibex *ib);
-
-/* Write the ibex to disk if it has changed, and free all memory
- * associated with it.
- */
-int ibex_close (ibex *ib);
-
-
-
-/* Index the named file. (If the FILENAME is already in the index,
- * remove the old copy.
- */
-int ibex_index_file (ibex *ib, char *filename);
-
-/* Index LEN bytes off FD, using NAME as the filename in the index.
- * (If NAME already exists in the index, this adds more data to it.)
- */
-int ibex_index_fd (ibex *ib, char *name, int fd, size_t len);
-
-/* Like ibex_index_fd, but with a buffer rather than a file descriptor.
- * The buffer does not need to be '\0'-terminated. If UNREAD is not
- * NULL, then the indexer won't assume that the buffer ends on a word
- * boundary, and will return (in UNREAD) the number of bytes from the
- * end of the buffer that it didn't use, if any.
- */
-int ibex_index_buffer (ibex *ib, char *name, char *buffer,
- size_t len, size_t *unread);
-
-/* Remove entries for a given file from the index. (Most of the removal
- * isn't actually done until the file is written out to disk, so this
- * is very fast.)
- */
-void ibex_unindex (ibex *ib, char *name);
-
-/* Rename a file in the index. (This is also fast.) */
-void ibex_rename (ibex *ib, char *oldfilename, char *newfilename);
-
-
-
-/* Find a word in the index. Returns an array of strings: the caller
- * should free the array, but should not free or modify the strings.
- */
-GPtrArray *ibex_find (ibex *ib, char *word);
-
-/* Find if a word is contained in a specific name reference.
- */
-gboolean ibex_find_name (ibex *ib, char *name, char *word);
-
-/* has a file been indexed?
- */
-gboolean ibex_contains_name(ibex *ib, char *name);
-
-/* Return all the files containing all of the words in the given
- * array. Returned data is like with ibex_find.
- */
-GPtrArray *ibex_find_all (ibex *ib, GPtrArray *words);
-
-/* Debug function, dumps the whole index tree, showing removed files as well */
-void ibex_dump_all (ibex *ib);
-
-#endif /* ! IBEX_H */
-
diff --git a/libibex/ibex_block.c b/libibex/ibex_block.c
deleted file mode 100644
index faeee232ac..0000000000
--- a/libibex/ibex_block.c
+++ /dev/null
@@ -1,345 +0,0 @@
-
-#include <glib.h>
-
-#include <stdio.h>
-#include <unicode.h>
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-
-#include "ibex_internal.h"
-
-#define d(x)
-
-static signed char utf8_trans[] = {
- 'A', 'A', 'A', 'A', 'A', 'A', -1, 'C', 'E', 'E', 'E', 'E', 'I', 'I',
- 'I', 'I', -2, 'N', 'O', 'O', 'O', 'O', 'O', '*', 'O', 'U', 'U', 'U',
- 'U', 'Y', -3, -4, 'a', 'a', 'a', 'a', 'a', 'a', -5, 'c', 'e', 'e',
- 'e', 'e', 'i', 'i', 'i', 'i', -6, 'n', 'o', 'o', 'o', 'o', 'o', '/',
- 'o', 'u', 'u', 'u', 'u', 'y', -7, 'y', 'A', 'a', 'A', 'a', 'A', 'a',
- 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e',
- 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g',
- 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i',
- 'I', 'i', -8, -9, 'J', 'j', 'K', 'k', 'k', 'L', 'l', 'L', 'l', 'L',
- 'l', 'L', 'l', 'L', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', -10, -11,
- 'O', 'o', 'O', 'o', 'O', 'o', -12, -13, 'R', 'r', 'R', 'r', 'R', 'r',
- 'S', 'r', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't',
- 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w',
- 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's'
-};
-
-static char *utf8_long_trans[] = {
- "AE", "TH", "TH", "ss", "ae", "th", "th", "IJ", "ij",
- "NG", "ng", "OE", "oe"
-};
-
-/* This is a bit weird. It takes pointers to the start and end (actually
- * just past the end) of a UTF-8-encoded word, and a buffer at least 1
- * byte longer than the length of the word. It copies the word into the
- * buffer in all lowercase without accents, and splits up ligatures.
- * (Since any ligature would be a multi-byte character in UTF-8, splitting
- * them into two US-ASCII characters won't overrun the buffer.)
- *
- * It is not safe to call this routine with bad UTF-8.
- */
-static void
-ibex_normalise_word(char *start, char *end, char *buf)
-{
- unsigned char *s, *d;
- unicode_char_t uc;
-
- s = (unsigned char *)start;
- d = (unsigned char *)buf;
- while (s < (unsigned char *)end) {
- if (*s < 0x80) {
- /* US-ASCII character: copy unless it's
- * an apostrophe.
- */
- if (*s != '\'')
- *d++ = tolower (*s);
- s++;
- } else {
- char *next = unicode_get_utf8 (s, &uc);
- if (uc >= 0xc0 && uc < 0xc0 + sizeof (utf8_trans)) {
- signed char ch = utf8_trans[uc - 0xc0];
- if (ch > 0)
- *d++ = tolower (ch);
- else {
- *d++ = tolower (utf8_long_trans[-ch - 1][0]);
- *d++ = tolower (utf8_long_trans[-ch - 1][1]);
- }
- s = next;
- } else {
- while (s < (unsigned char *)next)
- *d++ = *s++;
- }
- }
- }
- *d = '\0';
-}
-
-enum { IBEX_ALPHA, IBEX_NONALPHA, IBEX_INVALID, IBEX_INCOMPLETE };
-
-/* This incorporates parts of libunicode, because there's no way to
- * force libunicode to not read past a certain point.
- */
-static int
-utf8_category (char *sp, char **snp, char *send)
-{
- unsigned char *p = (unsigned char *)sp, **np = (unsigned char **)snp;
- unsigned char *end = (unsigned char *)send;
-
- if (isascii (*p)) {
- *np = p + 1;
- if (isalpha (*p) || *p == '\'')
- return IBEX_ALPHA;
- return IBEX_NONALPHA;
- } else {
- unicode_char_t uc;
- int more;
-
- if ((*p & 0xe0) == 0xc0) {
- more = 1;
- uc = *p & 0x1f;
- } else if ((*p & 0xf0) == 0xe0) {
- more = 2;
- uc = *p & 0x0f;
- } else if ((*p & 0xf8) == 0xf0) {
- more = 3;
- uc = *p & 0x07;
- } else if ((*p & 0xfc) == 0xf8) {
- more = 4;
- uc = *p & 0x03;
- } else if ((*p & 0xfe) == 0xfc) {
- more = 5;
- uc = *p & 0x01;
- } else
- return IBEX_INVALID;
-
- if (p + more > end)
- return IBEX_INCOMPLETE;
-
- while (more--) {
- if ((*++p & 0xc0) != 0x80)
- return IBEX_INVALID;
- uc <<= 6;
- uc |= *p & 0x3f;
- }
-
- *np = p + 1;
- if (unicode_isalpha (uc))
- return IBEX_ALPHA;
- else
- return IBEX_NONALPHA;
- }
-}
-
-/**
- * ibex_index_buffer: the lowest-level ibex indexing interface
- * @ib: an ibex
- * @name: the name of the file being indexed
- * @buffer: a buffer containing data from the file
- * @len: the length of @buffer
- * @unread: an output argument containing the number of unread bytes
- *
- * This routine indexes up to @len bytes from @buffer into @ib.
- * If @unread is NULL, the indexer assumes that the buffer ends on a
- * word boundary, and will index all the way to the end of the
- * buffer. If @unread is not NULL, and the buffer ends with an
- * alphabetic character, the indexer will assume that the buffer has
- * been cut off in the middle of a word, and return the number of
- * un-indexed bytes at the end of the buffer in *@unread. The caller
- * should then read in more data through whatever means it has
- * and pass in the unread bytes from the original buffer, followed
- * by the new data, on its next call.
- *
- * Return value: 0 on success, -1 on failure.
- **/
-int
-ibex_index_buffer (ibex *ib, char *name, char *buffer, size_t len, size_t *unread)
-{
- char *p, *q, *nq, *end, *word;
- int wordsiz, cat = 0;
- GHashTable *words = g_hash_table_new(g_str_hash, g_str_equal);
- GPtrArray *wordlist = g_ptr_array_new();
- int i, ret=-1;
-
- if (unread)
- *unread = 0;
-
- end = buffer + len;
- wordsiz = 20;
- word = g_malloc (wordsiz);
-
- p = buffer;
- while (p < end) {
- while (p < end) {
- cat = utf8_category (p, &q, end);
- if (cat != IBEX_NONALPHA)
- break;
- p = q;
- }
- if (p == end) {
- goto done;
- } else if (cat == IBEX_INVALID) {
- goto error;
- } else if (cat == IBEX_INCOMPLETE)
- q = end;
-
- while (q < end) {
- cat = utf8_category (q, &nq, end);
- if (cat != IBEX_ALPHA)
- break;
- q = nq;
- }
- if (cat == IBEX_INVALID ||
- (cat == IBEX_INCOMPLETE && !unread)) {
- goto error;
- } else if (cat == IBEX_INCOMPLETE || (q == end && unread)) {
- *unread = end - p;
- goto done;
- }
-
- if (wordsiz < q - p + 1) {
- wordsiz = q - p + 1;
- word = g_realloc (word, wordsiz);
- }
- ibex_normalise_word (p, q, word);
- if (word[0]) {
- if (g_hash_table_lookup(words, word) == 0) {
- char *newword = g_strdup(word);
- g_ptr_array_add(wordlist, newword);
- g_hash_table_insert(words, newword, name);
- }
- }
- p = q;
- }
-done:
- IBEX_LOCK(ib);
-
- d(printf("name %s count %d size %d\n", name, wordlist->len, len));
- if (!ib->predone) {
- ib->words->klass->index_pre(ib->words);
- ib->predone = TRUE;
- }
- ib->words->klass->add_list(ib->words, name, wordlist);
-
- IBEX_UNLOCK(ib);
-
- ret = 0;
-error:
- for (i=0;i<wordlist->len;i++)
- g_free(wordlist->pdata[i]);
- g_ptr_array_free(wordlist, TRUE);
- g_hash_table_destroy(words);
- g_free (word);
- return ret;
-}
-
-
-ibex *ibex_open (char *file, int flags, int mode)
-{
- ibex *ib;
-
- ib = g_malloc0(sizeof(*ib));
- ib->blocks = ibex_block_cache_open(file, flags, mode);
- if (ib->blocks == 0) {
- g_warning("create: Error occured?: %s\n", strerror(errno));
- g_free(ib);
- return NULL;
- }
- /* FIXME: the blockcache or the wordindex needs to manage the other one */
- ib->words = ib->blocks->words;
-
-#ifdef ENABLE_THREADS
- ib->lock = g_mutex_new();
-#endif
- return ib;
-}
-
-int ibex_save (ibex *ib)
-{
- d(printf("syncing database\n"));
-
- IBEX_LOCK(ib);
-
- if (ib->predone) {
- ib->words->klass->index_post(ib->words);
- ib->predone = FALSE;
- }
- ib->words->klass->sync(ib->words);
- /* FIXME: some return */
- ibex_block_cache_sync(ib->blocks);
-
- IBEX_UNLOCK(ib);
-
- return 0;
-}
-
-int ibex_close (ibex *ib)
-{
- int ret = 0;
-
- d(printf("closing database\n"));
-
- if (ib->predone) {
- ib->words->klass->index_post(ib->words);
- ib->predone = FALSE;
- }
-
- ib->words->klass->close(ib->words);
- ibex_block_cache_close(ib->blocks);
-#ifdef ENABLE_THREADS
- g_mutex_free(ib->lock);
-#endif
- g_free(ib);
- return ret;
-}
-
-void ibex_unindex (ibex *ib, char *name)
-{
- d(printf("trying to unindex '%s'\n", name));
- IBEX_LOCK(ib);
- ib->words->klass->unindex_name(ib->words, name);
- IBEX_UNLOCK(ib);
-}
-
-GPtrArray *ibex_find (ibex *ib, char *word)
-{
- char *normal;
- int len;
- GPtrArray *ret;
-
- len = strlen(word);
- normal = alloca(len+1);
- ibex_normalise_word(word, word+len, normal);
- IBEX_LOCK(ib);
- ret = ib->words->klass->find(ib->words, normal);
- IBEX_UNLOCK(ib);
- return ret;
-}
-
-gboolean ibex_find_name (ibex *ib, char *name, char *word)
-{
- char *normal;
- int len;
- gboolean ret;
-
- len = strlen(word);
- normal = alloca(len+1);
- ibex_normalise_word(word, word+len, normal);
- IBEX_LOCK(ib);
- ret = ib->words->klass->find_name(ib->words, name, normal);
- IBEX_UNLOCK(ib);
- return ret;
-}
-
-gboolean ibex_contains_name(ibex *ib, char *name)
-{
- gboolean ret;
-
- IBEX_LOCK(ib);
- ret = ib->words->klass->contains_name(ib->words, name);
- IBEX_UNLOCK(ib);
- return ret;
-}
diff --git a/libibex/ibex_db.c b/libibex/ibex_db.c
deleted file mode 100644
index 1ef0b3d1bc..0000000000
--- a/libibex/ibex_db.c
+++ /dev/null
@@ -1,512 +0,0 @@
-
-#include <glib.h>
-
-#include <db.h>
-#include <stdio.h>
-#include <unicode.h>
-#include <ctype.h>
-
-#include "ibex_internal.h"
-
-#define d(x)
-
-/*
- Uses 2 databases:
-
- names - HASH - lists which files are stored
- words - BTREE - index words to files
-
-*/
-
-static int
-db_delete_name(DB *db, const char *name)
-{
- DBC *cursor;
- int err, namelen;
- DBT key, data;
-
- printf("called to delete name %s\n", name);
- return 0;
-
- err = db->cursor(db, NULL, &cursor, 0);
- if (err != 0) {
- printf("Error occured?: %s\n", db_strerror(err));
- return err;
- }
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- namelen = strlen(name);
-
- err = cursor->c_get(cursor, &key, &data, DB_FIRST);
- while (err == 0) {
- if (data.size == namelen && memcmp(data.data, name, namelen) == 0) {
- d(printf("Found match, removing it\n"));
- cursor->c_del(cursor, 0);
- } else {
- data.size = namelen;
- data.data = (void *)name;
- if (cursor->c_get(cursor, &key, &data, DB_GET_BOTH) == 0) {
- d(printf("seek to match, removing it\n"));
- cursor->c_del(cursor, 0);
- } else {
- d(printf("seek to match, not found\n"));
- }
- }
- err = cursor->c_get(cursor, &key, &data, DB_NEXT_NODUP);
- }
-
- cursor->c_close(cursor);
-
- return 0;
-}
-
-static int
-db_index_words(DB *db, char *name, char **words)
-{
- DBT key, data;
- int err = 0;
- char *word;
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- data.data = name;
- data.size = strlen(name);
- for (;(word=*words);words++) {
- /* normalise word first ... */
- key.data = word;
- key.size = strlen(word);
-
- err = db->put(db, NULL, &key, &data, 0);
- if (err != 0)
- printf("Error occured?: %s\n", db_strerror(err));
- }
-
- return err;
-}
-
-static int
-db_index_word(DB *db, char *name, char *word)
-{
- DBT key, data;
- int err = 0;
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
- data.data = name;
- data.size = strlen(name);
- key.data = word;
- key.size = strlen(word);
-
- err = db->put(db, NULL, &key, &data, 0);
- if (err != 0)
- printf("Error occured?: %s\n", db_strerror(err));
-
- return err;
-}
-
-static GPtrArray *
-db_find(DB *db, char *word)
-{
- DBT key, data;
- int err;
- DBC *cursor;
- GPtrArray *matches = g_ptr_array_new();
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
-
- err = db->cursor(db, NULL, &cursor, 0);
- if (err != 0) {
- printf("Error occured?: %s\n", db_strerror(err));
- return matches;
- }
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
-
- key.size = strlen(word);
- key.data = word;
-
- err = cursor->c_get(cursor, &key, &data, DB_SET);
- while (err == 0) {
- char *name;
- name = g_malloc(data.size+1);
- memcpy(name, data.data, data.size);
- name[data.size] = '\0';
- g_ptr_array_add(matches, name);
- err = cursor->c_get(cursor, &key, &data, DB_NEXT_DUP);
- }
- if (err != DB_NOTFOUND) {
- printf("Error occured?: %s\n", db_strerror(err));
- /* what to do? doesn't matter too much its just a search ... */
- }
-
- cursor->c_close(cursor);
-
- return matches;
-}
-
-/* check that db contains key @word, optionally with contents @name */
-static gboolean
-db_contains_word(DB *db, char *name, char *word)
-{
- DBT key, data;
-
- memset(&key, 0, sizeof(key));
- memset(&data, 0, sizeof(data));
-
- if (name != NULL) {
- data.size = strlen(name);
- data.data = name;
- }
- key.size = strlen(word);
- key.data = word;
-
- return db->get(db, NULL, &key, &data, name?DB_GET_BOTH:DB_SET) == 0;
-}
-
-static int
-db_delete_word(DB *db, char *word)
-{
- DBT key;
-
- memset(&key, 0, sizeof(key));
- key.size = strlen(word);
- key.data = word;
- return db->del(db, NULL, &key, 0);
-}
-
-
-static signed char utf8_trans[] = {
- 'A', 'A', 'A', 'A', 'A', 'A', -1, 'C', 'E', 'E', 'E', 'E', 'I', 'I',
- 'I', 'I', -2, 'N', 'O', 'O', 'O', 'O', 'O', '*', 'O', 'U', 'U', 'U',
- 'U', 'Y', -3, -4, 'a', 'a', 'a', 'a', 'a', 'a', -5, 'c', 'e', 'e',
- 'e', 'e', 'i', 'i', 'i', 'i', -6, 'n', 'o', 'o', 'o', 'o', 'o', '/',
- 'o', 'u', 'u', 'u', 'u', 'y', -7, 'y', 'A', 'a', 'A', 'a', 'A', 'a',
- 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e',
- 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g',
- 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i',
- 'I', 'i', -8, -9, 'J', 'j', 'K', 'k', 'k', 'L', 'l', 'L', 'l', 'L',
- 'l', 'L', 'l', 'L', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', -10, -11,
- 'O', 'o', 'O', 'o', 'O', 'o', -12, -13, 'R', 'r', 'R', 'r', 'R', 'r',
- 'S', 'r', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't',
- 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w',
- 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's'
-};
-
-static char *utf8_long_trans[] = {
- "AE", "TH", "TH", "ss", "ae", "th", "th", "IJ", "ij",
- "NG", "ng", "OE", "oe"
-};
-
-/* This is a bit weird. It takes pointers to the start and end (actually
- * just past the end) of a UTF-8-encoded word, and a buffer at least 1
- * byte longer than the length of the word. It copies the word into the
- * buffer in all lowercase without accents, and splits up ligatures.
- * (Since any ligature would be a multi-byte character in UTF-8, splitting
- * them into two US-ASCII characters won't overrun the buffer.)
- *
- * It is not safe to call this routine with bad UTF-8.
- */
-static void
-ibex_normalise_word(char *start, char *end, char *buf)
-{
- unsigned char *s, *d;
- unicode_char_t uc;
-
- s = (unsigned char *)start;
- d = (unsigned char *)buf;
- while (s < (unsigned char *)end) {
- if (*s < 0x80) {
- /* US-ASCII character: copy unless it's
- * an apostrophe.
- */
- if (*s != '\'')
- *d++ = tolower (*s);
- s++;
- } else {
- char *next = unicode_get_utf8 (s, &uc);
- if (uc >= 0xc0 && uc < 0xc0 + sizeof (utf8_trans)) {
- signed char ch = utf8_trans[uc - 0xc0];
- if (ch > 0)
- *d++ = tolower (ch);
- else {
- *d++ = tolower (utf8_long_trans[-ch - 1][0]);
- *d++ = tolower (utf8_long_trans[-ch - 1][1]);
- }
- s = next;
- } else {
- while (s < (unsigned char *)next)
- *d++ = *s++;
- }
- }
- }
- *d = '\0';
-}
-
-enum { IBEX_ALPHA, IBEX_NONALPHA, IBEX_INVALID, IBEX_INCOMPLETE };
-
-/* This incorporates parts of libunicode, because there's no way to
- * force libunicode to not read past a certain point.
- */
-static int
-utf8_category (char *sp, char **snp, char *send)
-{
- unsigned char *p = (unsigned char *)sp, **np = (unsigned char **)snp;
- unsigned char *end = (unsigned char *)send;
-
- if (isascii (*p)) {
- *np = p + 1;
- if (isalpha (*p) || *p == '\'')
- return IBEX_ALPHA;
- return IBEX_NONALPHA;
- } else {
- unicode_char_t uc;
- int more;
-
- if ((*p & 0xe0) == 0xc0) {
- more = 1;
- uc = *p & 0x1f;
- } else if ((*p & 0xf0) == 0xe0) {
- more = 2;
- uc = *p & 0x0f;
- } else if ((*p & 0xf8) == 0xf0) {
- more = 3;
- uc = *p & 0x07;
- } else if ((*p & 0xfc) == 0xf8) {
- more = 4;
- uc = *p & 0x03;
- } else if ((*p & 0xfe) == 0xfc) {
- more = 5;
- uc = *p & 0x01;
- } else
- return IBEX_INVALID;
-
- if (p + more > end)
- return IBEX_INCOMPLETE;
-
- while (more--) {
- if ((*++p & 0xc0) != 0x80)
- return IBEX_INVALID;
- uc <<= 6;
- uc |= *p & 0x3f;
- }
-
- *np = p + 1;
- if (unicode_isalpha (uc))
- return IBEX_ALPHA;
- else
- return IBEX_NONALPHA;
- }
-}
-
-static void
-do_insert_words(char *key, char *name, ibex *ib)
-{
- db_index_word(ib->words, name, key);
- g_free(key);
-}
-
-static void
-do_free_words(char *key, char *name, void *data)
-{
- g_free(key);
-}
-
-/**
- * ibex_index_buffer: the lowest-level ibex indexing interface
- * @ib: an ibex
- * @name: the name of the file being indexed
- * @buffer: a buffer containing data from the file
- * @len: the length of @buffer
- * @unread: an output argument containing the number of unread bytes
- *
- * This routine indexes up to @len bytes from @buffer into @ib.
- * If @unread is NULL, the indexer assumes that the buffer ends on a
- * word boundary, and will index all the way to the end of the
- * buffer. If @unread is not NULL, and the buffer ends with an
- * alphabetic character, the indexer will assume that the buffer has
- * been cut off in the middle of a word, and return the number of
- * un-indexed bytes at the end of the buffer in *@unread. The caller
- * should then read in more data through whatever means it has
- * and pass in the unread bytes from the original buffer, followed
- * by the new data, on its next call.
- *
- * Return value: 0 on success, -1 on failure.
- **/
-int
-ibex_index_buffer (ibex *ib, char *name, char *buffer, size_t len, size_t *unread)
-{
- char *p, *q, *nq, *end, *word;
- int wordsiz, cat;
- GHashTable *words = g_hash_table_new(g_str_hash, g_str_equal);
-
- if (unread)
- *unread = 0;
-
- end = buffer + len;
- wordsiz = 20;
- word = g_malloc (wordsiz);
-
- p = buffer;
- while (p < end) {
- while (p < end) {
- cat = utf8_category (p, &q, end);
- if (cat != IBEX_NONALPHA)
- break;
- p = q;
- }
- if (p == end) {
- goto done;
- } else if (cat == IBEX_INVALID) {
- goto error;
- } else if (cat == IBEX_INCOMPLETE)
- q = end;
-
- while (q < end) {
- cat = utf8_category (q, &nq, end);
- if (cat != IBEX_ALPHA)
- break;
- q = nq;
- }
- if (cat == IBEX_INVALID ||
- (cat == IBEX_INCOMPLETE && !unread)) {
- goto error;
- } else if (cat == IBEX_INCOMPLETE || (q == end && unread)) {
- *unread = end - p;
- goto done;
- }
-
- if (wordsiz < q - p + 1) {
- wordsiz = q - p + 1;
- word = g_realloc (word, wordsiz);
- }
- ibex_normalise_word (p, q, word);
- if (word[0]) {
- if (g_hash_table_lookup(words, word) == 0) {
- g_hash_table_insert(words, g_strdup(word), name);
- }
- }
- p = q;
- }
-done:
- /* FIXME: do this and inserts within a transaction */
- if (!db_contains_word(ib->names, NULL, name)) {
- printf("adding '%s' to database\n", name);
- db_index_word(ib->names, "", name);
- }
- g_hash_table_foreach(words, (GHFunc)do_insert_words, ib);
- g_hash_table_destroy(words);
- g_free (word);
- return 0;
-error:
- g_hash_table_foreach(words, (GHFunc)do_free_words, NULL);
- g_hash_table_destroy(words);
- g_free (word);
- return -1;
-}
-
-
-ibex *ibex_open (char *file, int flags, int mode)
-{
- ibex *ib;
- u_int32_t dbflags = 0;
- int err;
-
- ib = g_malloc0(sizeof(*ib));
- err = db_create(&ib->words, NULL, 0);
- if (err != 0) {
- g_warning("create: Error occured?: %s\n", db_strerror(err));
- g_free(ib);
- return NULL;
- }
-
- err = ib->words->set_flags(ib->words, DB_DUP);
- if (err != 0) {
- g_warning("set flags: Error occured?: %s\n", db_strerror(err));
- ib->words->close(ib->words, 0);
- g_free(ib);
- return NULL;
- }
-
- if (flags & O_CREAT)
- dbflags |= DB_CREATE;
- if (flags & O_EXCL)
- dbflags |= DB_EXCL;
- if (flags & O_RDONLY)
- dbflags |= DB_RDONLY;
-
- /* 1MB cache size? */
- ib->words->set_cachesize(ib->words, 0, 1000000, 0);
-
- err = ib->words->open(ib->words, file, "words", DB_BTREE, dbflags, mode);
- if (err != 0) {
- printf("open: Error occured?: %s\n", db_strerror(err));
- ib->words->close(ib->words, 0);
- g_free(ib);
- return NULL;
- }
-
- /* FIXME: check returns */
- err = db_create(&ib->names, NULL, 0);
- err = ib->names->open(ib->names, file, "names", DB_HASH, dbflags, mode);
-
- return ib;
-}
-
-int ibex_save (ibex *ib)
-{
- printf("syncing database\n");
- ib->names->sync(ib->names, 0);
- return ib->words->sync(ib->words, 0);
-}
-
-int ibex_close (ibex *ib)
-{
- int ret;
-
- printf("closing database\n");
-
- ret = ib->names->close(ib->names, 0);
- ret = ib->words->close(ib->words, 0);
- g_free(ib);
- return ret;
-}
-
-void ibex_unindex (ibex *ib, char *name)
-{
- printf("trying to unindex '%s'\n", name);
- if (db_contains_word(ib->names, NULL, name)) {
- /* FIXME: do within transaction? */
- db_delete_name(ib->words, name);
- db_delete_word(ib->names, name);
- }
-}
-
-GPtrArray *ibex_find (ibex *ib, char *word)
-{
- char *normal;
- int len;
-
- len = strlen(word);
- normal = alloca(len+1);
- ibex_normalise_word(word, word+len, normal);
- return db_find(ib->words, normal);
-}
-
-gboolean ibex_find_name (ibex *ib, char *name, char *word)
-{
- char *normal;
- int len;
-
- len = strlen(word);
- normal = alloca(len+1);
- ibex_normalise_word(word, word+len, normal);
- return db_contains_word(ib->words, name, normal);
-}
-
-gboolean ibex_contains_name(ibex *ib, char *name)
-{
- return db_contains_word(ib->names, NULL, name);
-}
diff --git a/libibex/ibex_internal.h b/libibex/ibex_internal.h
deleted file mode 100644
index f2212799c6..0000000000
--- a/libibex/ibex_internal.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-#include "config.h"
-
-#include <glib.h>
-
-#include "ibex.h"
-#include "block.h"
-#include "wordindex.h"
-
-struct ibex {
- char *path;
- struct _memcache *blocks;
- struct _IBEXWord *words;
- int predone;
-
- /* sigh i hate glib's mutex stuff too */
-#ifdef ENABLE_THREADS
- GMutex *lock;
-#endif
-
-};
-
-#ifdef ENABLE_THREADS
-/*#define IBEX_LOCK(ib) (printf(__FILE__ "%d: %s: locking ibex\n", __LINE__, __FUNCTION__), g_mutex_lock(ib->lock))
- #define IBEX_UNLOCK(ib) (printf(__FILE__ "%d: %s: unlocking ibex\n", __LINE__, __FUNCTION__), g_mutex_unlock(ib->lock))*/
-#define IBEX_LOCK(ib) (g_mutex_lock(ib->lock))
-#define IBEX_UNLOCK(ib) (g_mutex_unlock(ib->lock))
-#else
-#define IBEX_LOCK(ib)
-#define IBEX_UNLOCK(ib)
-#endif
-
diff --git a/libibex/index.h b/libibex/index.h
deleted file mode 100644
index 442cba8e25..0000000000
--- a/libibex/index.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-#ifndef _INDEX_H
-#define _INDEX_H
-
-/* an indexing 'class' maps a key to 1 piece of info */
-
-/*#define INDEX_STAT*/
-
-struct _IBEXCursor {
- struct _IBEXCursorClass *klass;
- struct _IBEXIndex *index;
-};
-
-struct _IBEXCursorClass {
- void (*close)(struct _IBEXCursor *);
-
- guint32 (*next)(struct _IBEXCursor *);
- char *(*next_key)(struct _IBEXCursor *, int *keylenptr);
-};
-
-struct _IBEXIndex {
- struct _IBEXIndexClass *klass;
- struct _memcache *blocks;
- blockid_t root; /* root block of ondisk index data */
-#ifdef INDEX_STAT
- int lookups; /* how many lookups */
- int lookup_total; /* how many blocks loaded for all lookups (hash chain depth) */
-#endif
-};
-
-struct _IBEXIndexClass {
-
- struct _IBEXIndex *(*create)(struct _memcache *bc, int size);
- struct _IBEXIndex *(*open)(struct _memcache *bc, blockid_t root);
-
- int (*sync)(struct _IBEXIndex *);
- int (*close)(struct _IBEXIndex *);
-
- /* lookup a key in the index, returns the keyid of this item, or 0 if not found */
- guint32 (*find)(struct _IBEXIndex *, const char *key, int keylen);
-
- /* remove a key from the index */
- void (*remove)(struct _IBEXIndex *, const char *key, int keylen);
-
- /* insert a new key into the index, the keyid is returned */
- guint32 (*insert)(struct _IBEXIndex *, const char *key, int keylen);
-
- /* get the key contents/key length from the keyid */
- char *(*get_key)(struct _IBEXIndex *, guint32 keyid, int *keylenptr);
-
- /* set the key contents based on the keyid */
- void (*set_data)(struct _IBEXIndex *, guint32 keyid, blockid_t datablock, blockid_t tail);
-
- /* get the key contents based on the keyid */
- blockid_t (*get_data)(struct _IBEXIndex *, guint32 keyid, blockid_t *tail);
-
- /* get a cursor for iterating over all contents */
- struct _IBEXCursor *(*get_cursor)(struct _IBEXIndex *);
-};
-
-/* a storage class, stores lists of lists of id's */
-
-struct _IBEXStore {
- struct _IBEXStoreClass *klass;
- struct _memcache *blocks;
-};
-
-struct _IBEXStoreClass {
- struct _IBEXStore *(*create)(struct _memcache *bc);
- int (*sync)(struct _IBEXStore *store);
- int (*close)(struct _IBEXStore *store);
-
- blockid_t (*add)(struct _IBEXStore *store, blockid_t *head, blockid_t *tail, nameid_t data);
- blockid_t (*add_list)(struct _IBEXStore *store, blockid_t *head, blockid_t *tail, GArray *data);
- blockid_t (*remove)(struct _IBEXStore *store, blockid_t *head, blockid_t *tail, nameid_t data);
- void (*free)(struct _IBEXStore *store, blockid_t head, blockid_t tail);
-
- gboolean (*find)(struct _IBEXStore *store, blockid_t head, blockid_t tail, nameid_t data);
- GArray *(*get)(struct _IBEXStore *store, blockid_t head, blockid_t tail);
-};
-
-#endif
diff --git a/libibex/lookup.c b/libibex/lookup.c
deleted file mode 100644
index 2d01dbf850..0000000000
--- a/libibex/lookup.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- * Copyright (C) 2000 Helix Code Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* lookup.c: a simple client, part 2 */
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "ibex.h"
-
-extern int optind;
-extern char *optarg;
-
-static void
-usage (void)
-{
- fprintf (stderr, "Usage: lookup [-f indexfile] word ...\n");
- exit (1);
-}
-
-int
-main (int argc, char **argv)
-{
- ibex *ib;
- GPtrArray *ans, *words;
- int opt, i;
- char *file = "INDEX";
-
- while ((opt = getopt (argc, argv, "f:")) != -1) {
- switch (opt) {
- case 'f':
- file = optarg;
- break;
-
- default:
- usage ();
- break;
- }
- }
- argc -= optind;
- argv += optind;
-
- if (argc == 0)
- usage ();
-
- ib = ibex_open (file, O_RDWR|O_CREAT, 0600);
- if (!ib) {
- printf ("Couldn't open %s: %s\n", file, strerror (errno));
- exit (1);
- }
-
- words = g_ptr_array_new ();
- while (argc--)
- g_ptr_array_add (words, argv[argc]);
-
- ans = ibex_find_all (ib, words);
- if (ans) {
- for (i = 0; i < ans->len; i++)
- printf ("%s\n", (char *)g_ptr_array_index (ans, i));
- exit (0);
- } else {
- printf ("Nope.\n");
- exit (1);
- }
-}
diff --git a/libibex/mkindex.c b/libibex/mkindex.c
deleted file mode 100644
index 151dcecb2d..0000000000
--- a/libibex/mkindex.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/*
- * Copyright (C) 2000 Helix Code Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* mkindex.c: a simple client, part 1 */
-
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "ibex.h"
-
-extern int optind;
-extern char *optarg;
-
-static void
-usage (void)
-{
- fprintf (stderr, "Usage: mkindex [-f indexfile] file ...\n");
- exit (1);
-}
-
-int
-main (int argc, char **argv)
-{
- ibex *ib;
- int opt;
- char *file = "INDEX";
-
- while ((opt = getopt (argc, argv, "f:")) != -1) {
- switch (opt) {
- case 'f':
- file = optarg;
- break;
-
- default:
- usage ();
- break;
- }
- }
- argc -= optind;
- argv += optind;
-
- if (argc == 0)
- usage ();
-
- ib = ibex_open (file, O_CREAT|O_RDWR, 0600);
- if (!ib) {
- fprintf (stderr, "Couldn't open index file %s: %s\n",
- file, strerror (errno));
- exit (1);
- }
-
- while (argc--) {
- if (ibex_index_file (ib, argv[argc]) == -1) {
- fprintf (stderr, "Couldn't index %s: %s\n",
- argv[argc], strerror (errno));
- exit (1);
- }
- }
-
- if (ibex_close (ib) != 0) {
- fprintf (stderr, "Failed to write index file %s: %s\n",
- file, strerror (errno));
- exit (1);
- }
- exit (0);
-}
diff --git a/libibex/testindex.c b/libibex/testindex.c
deleted file mode 100644
index a3b6a9ce03..0000000000
--- a/libibex/testindex.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/* Test code for libibex */
-
-#include <stdio.h>
-#include <glib.h>
-#include <errno.h>
-#include <string.h>
-#include "ibex_internal.h"
-
-#ifdef ENABLE_THREADS
-#include <pthread.h>
-#endif
-
-void word_index_mem_dump_info(struct _IBEXWord *idx);
-
-/*
- The following is a routine to generate a Gaussian distribution
- of pseudo random numbers, to make the results a little more
- meaningful
-*/
-
-/* boxmuller.c Implements the Polar form of the Box-Muller
- Transformation
-
- (c) Copyright 1994, Everett F. Carter Jr.
- Permission is granted by the author to use
- this software for any application provided this
- copyright notice is preserved.
-
-*/
-
-#include <stdlib.h>
-#include <math.h>
-
-#define ranf() ((float)rand()/(float)RAND_MAX)
-
-static float box_muller(float m, float s) /* normal random variate generator */
-{ /* mean m, standard deviation s */
- float x1, x2, w, y1;
- static float y2;
- static int use_last = 0;
-
- if (use_last) /* use value from previous call */
- {
- y1 = y2;
- use_last = 0;
- }
- else
- {
- do {
- x1 = 2.0 * ranf() - 1.0;
- x2 = 2.0 * ranf() - 1.0;
- w = x1 * x1 + x2 * x2;
- } while ( w >= 1.0 );
-
- w = sqrt( (-2.0 * log( w ) ) / w );
- y1 = x1 * w;
- y2 = x2 * w;
- use_last = 1;
- }
-
- return( m + y1 * s );
-}
-
-/* gets a word from words, using m and s as distribution values */
-static char *getword(GPtrArray *words, float m, float s)
-{
- int index;
-
- do {
- index = (int)box_muller(m, s);
- } while (index<0 || index>=words->len);
-
- return words->pdata[index];
-}
-
-#ifdef ENABLE_THREADS
-int do_read_words;
-
-static void *
-read_words(void *in)
-{
- ibex *ib = in;
- GPtrArray *a;
- int lastlen = 0;
- int i;
-
- while (do_read_words) {
- a = ibex_find(ib, "joneses");
- if (a->len != lastlen) {
- printf("Found %d joneses!\n", a->len);
- lastlen = a->len;
- }
- for (i=0;i<a->len;i++)
- g_free(a->pdata[i]);
- g_ptr_array_free(a, TRUE);
- }
-}
-#endif
-
-int main(int argc, char **argv)
-{
- int i, j;
- GPtrArray *words = g_ptr_array_new();
- char line[256];
- int len;
- FILE *file;
- float m, s;
- ibex *ib;
- GString *buffer = g_string_new("");
- int files;
- char *dict;
-
-#ifdef ENABLE_THREADS
- pthread_t id;
-
- g_thread_init(0);
-#endif
-
- srand(0xABADF00D);
-
- files = 8000;
- dict = "/usr/dict/words";
-
- /* read words into an array */
- file = fopen(dict, "r");
- if (file == NULL) {
- fprintf(stderr, "Cannot open word file: %s: %s\n", dict, strerror(errno));
- return 1;
- }
- while (fgets(line, sizeof(line), file) != NULL) {
- len = strlen(line);
- if (len>0 && line[len-1]=='\n') {
- line[len-1]=0;
- }
- g_ptr_array_add(words, g_strdup(line));
- }
- fclose(file);
-
- fprintf(stderr, "Read %d words\n", words->len);
-
- /* *shrug* arbitrary values really */
- m = words->len/2;
- /* well, the average vocabulary of a mailbox is about 10K words */
- s = 1000.0;
-
- printf("mean is %f, s is %f\n", m, s);
-
- /* open ibex file */
- ib = ibex_open("test.ibex", O_RDWR|O_CREAT, 0600);
- if (ib == NULL) {
- perror("Creating ibex file\n");
- return 1;
- }
-
-#ifdef ENABLE_THREADS
- do_read_words = 1;
- pthread_create(&id, 0, read_words, ib);
-#endif
- printf("Adding %d files\n", files);
-
- /* simulate adding new words to a bunch of files */
- for (j=0;j<files;j++) {
- /* always new name */
- char *name = words->pdata[j % words->len];
- /* something like 60 words in a typical message, say */
- int count = (int)box_muller(60.0, 20.0);
-
- if (j%1000 == 0)
- word_index_mem_dump_info(ib->words);
-
- /* cache the name info */
- ibex_contains_name(ib, name);
-
- /*printf("Adding %d words to '%s'\n", count, name);*/
-
- g_string_truncate(buffer, 0);
-
- /* build up the word buffer */
- for (i=0;i<count;i++) {
- if (i>0)
- g_string_append_c(buffer, ' ');
- g_string_append(buffer, getword(words, m, s));
- }
-
- /* and index it */
- ibex_index_buffer(ib, name, buffer->str, buffer->len, NULL);
- }
-
- word_index_mem_dump_info(ib->words);
-
-#ifdef ENABLE_THREADS
- do_read_words = 0;
- pthread_join(id, 0);
-#endif
-
- ibex_close(ib);
-
- return 0;
-}
-
diff --git a/libibex/wordindex.c b/libibex/wordindex.c
deleted file mode 100644
index a8592cbcef..0000000000
--- a/libibex/wordindex.c
+++ /dev/null
@@ -1,646 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/* code to manage a word index */
-/* includes a cache for word index writes,
- but not for name index writes (currently), or any reads.
-
-Note the word cache is only needed during indexing of lots
-of words, and could then be discarded (:flush()).
-
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include <glib.h>
-
-#include "block.h"
-#include "index.h"
-#include "wordindex.h"
-
-#define d(x)
-
-/*#define WORDCACHE_SIZE (256)*/
-#define WORDCACHE_SIZE (4096)
-
-extern struct _IBEXStoreClass ibex_diskarray_class;
-extern struct _IBEXIndexClass ibex_hash_class;
-
-/* need 2 types of hash key?
- one that just stores the wordid / wordblock
- and one that stores the filecount/files?
-*/
-
-
-#define CACHE_FILE_COUNT (62)
-
-struct _wordcache {
- struct _wordcache *next;
- struct _wordcache *prev;
- nameid_t wordid; /* disk wordid */
- blockid_t wordblock; /* head of disk list */
- blockid_t wordtail; /* and the tail data */
- short filecount; /* how many valid items in files[] */
- short filealloc; /* how much allocated space in files[] */
- union {
- nameid_t *files; /* memory cache of files */
- nameid_t file0; /* if filecount == 1 && filealloc == 0, store directly */
- } file;
- char word[1]; /* actual word follows */
-};
-
-static void unindex_name(struct _IBEXWord *, const char *name); /* unindex all entries for name */
-static gboolean contains_name(struct _IBEXWord *, const char *name); /* index contains data for name */
-static GPtrArray *find(struct _IBEXWord *, const char *word); /* returns all matches for word */
-static gboolean find_name(struct _IBEXWord *, const char *name, const char *word); /* find if name contains word */
-static void add(struct _IBEXWord *, const char *name, const char *word); /* adds a single word to name (slow) */
-static void add_list(struct _IBEXWord *, const char *name, GPtrArray *words);/* adds a bunch of words to a given name */
-static int word_sync(struct _IBEXWord *idx);
-static int word_flush(struct _IBEXWord *idx);
-static int word_close(struct _IBEXWord *idx);
-static void word_index_pre(struct _IBEXWord *idx);
-static void word_index_post(struct _IBEXWord *idx);
-
-struct _IBEXWordClass ibex_word_index_class = {
- word_sync, word_flush, word_close,
- word_index_pre, word_index_post,
- unindex_name, contains_name,
- find, find_name,
- add, add_list
-};
-
-/* this interface isn't the best, but it'll do for now */
-struct _IBEXWord *
-ibex_create_word_index(struct _memcache *bc, blockid_t *wordroot, blockid_t *nameroot)
-{
- struct _IBEXWord *idx;
-
- idx = g_malloc0(sizeof(*idx));
- idx->wordcache = g_hash_table_new(g_str_hash, g_str_equal);
- ibex_list_new(&idx->wordnodes);
- idx->wordcount = 0;
- idx->klass = &ibex_word_index_class;
-
- /* we use the same block array storage for both indexes at the moment */
- idx->wordstore = ibex_diskarray_class.create(bc);
- idx->namestore = idx->wordstore;
-
- /* but not the same indexes! */
- if (*wordroot) {
- printf("opening wordindex root = %d\n", *wordroot);
- idx->wordindex = ibex_hash_class.open(bc, *wordroot);
- } else {
- idx->wordindex = ibex_hash_class.create(bc, 2048);
- *wordroot = idx->wordindex->root;
- printf("creating wordindex root = %d\n", *wordroot);
- }
- if (*nameroot) {
- printf("opening nameindex root = %d\n", *nameroot);
- idx->nameindex = ibex_hash_class.open(bc, *nameroot);
- } else {
- idx->nameindex = ibex_hash_class.create(bc, 2048);
- *nameroot = idx->nameindex->root;
- printf("creating nameindex root = %d\n", *nameroot);
- }
- return idx;
-}
-
-#if d(!)0
-static void
-cache_sanity(struct _wordcache *head)
-{
- while (head->next) {
- g_assert(head->filecount <= head->filealloc);
- g_assert(strlen(head->word) != 0);
- head = head->next;
- }
-}
-#endif
-
-static void word_index_pre(struct _IBEXWord *idx)
-{
-}
-
-static void word_index_post(struct _IBEXWord *idx)
-{
-}
-
-/* unindex all entries for name */
-static void unindex_name(struct _IBEXWord *idx, const char *name)
-{
- GArray *words;
- int i;
- nameid_t nameid, wordid;
- blockid_t nameblock, wordblock, newblock, nametail, wordtail, newtail;
- char *word;
- struct _wordcache *cache;
-
- d(printf("unindexing %s\n", name));
-
- /* lookup the hash key */
- nameid = idx->nameindex->klass->find(idx->nameindex, name, strlen(name));
- /* get the block for this key */
- nameblock = idx->nameindex->klass->get_data(idx->nameindex, nameid, &nametail);
- /* and the data for this block */
- words = idx->namestore->klass->get(idx->namestore, nameblock, nametail);
- /* walk it ... */
- for (i=0;i<words->len;i++) {
- /* get the word */
- wordid = g_array_index(words, nameid_t, i);
- d(printf(" word %d\n", wordid));
- /* get the data block */
- wordblock = idx->wordindex->klass->get_data(idx->wordindex, wordid, &wordtail);
- /* clear this name from it */
- newblock = wordblock;
- newtail = wordtail;
- idx->wordstore->klass->remove(idx->wordstore, &newblock, &newtail, nameid);
- if (newblock != wordblock || newtail != wordtail)
- idx->wordindex->klass->set_data(idx->wordindex, wordid, newblock, newtail);
-
- /* now check the cache as well */
- word = idx->nameindex->klass->get_key(idx->wordindex, wordid, NULL);
- if (word) {
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache) {
- /* its there, update our head/tail pointers */
- cache->wordblock = newblock;
- cache->wordtail = newtail;
-
- /* now check that we have a data entry in it */
- if (cache->filealloc == 0 && cache->filecount == 1) {
- if (cache->file.file0 == nameid) {
- cache->filecount = 0;
- }
- } else {
- int j;
-
- for (j=0;j<cache->filecount;j++) {
- if (cache->file.files[j] == nameid) {
- cache->file.files[j] = cache->file.files[cache->filecount-1];
- cache->filecount--;
- break;
- }
- }
- }
- }
- g_free(word);
- }
- }
- g_array_free(words, TRUE);
-
- /* and remove name data and itself */
- idx->namestore->klass->free(idx->namestore, nameblock, nametail);
- idx->nameindex->klass->remove(idx->nameindex, name, strlen(name));
-}
-
-/* index contains (any) data for name */
-static gboolean contains_name(struct _IBEXWord *idx, const char *name)
-{
- return idx->nameindex->klass->find(idx->nameindex, name, strlen(name)) != 0;
-}
-
-/* returns all matches for word */
-static GPtrArray *find(struct _IBEXWord *idx, const char *word)
-{
- nameid_t wordid, nameid;
- GPtrArray *res;
- GArray *names;
- int i;
- char *new;
- struct _wordcache *cache;
- blockid_t wordblock, wordtail;
-
- res = g_ptr_array_new();
-
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache) {
- /* freshen cache entry if we touch it */
- ibex_list_remove((struct _listnode *)cache);
- ibex_list_addtail(&idx->wordnodes, (struct _listnode *)cache);
- wordid = cache->wordid;
- wordblock = cache->wordblock;
- wordtail = cache->wordtail;
- } else {
- /* lookup the hash key */
- wordid = idx->wordindex->klass->find(idx->wordindex, word, strlen(word));
- /* get the block for this key */
- wordblock = idx->wordindex->klass->get_data(idx->wordindex, wordid, &wordtail);
- }
- /* and the data for this block */
- names = idx->wordstore->klass->get(idx->wordstore, wordblock, wordtail);
- /* .. including any memory-only data */
- if (cache) {
- if (cache->filealloc == 0 && cache->filecount == 1)
- g_array_append_val(names, cache->file.file0);
- else
- g_array_append_vals(names, cache->file.files, cache->filecount);
- }
-
- /* walk it ... converting id's back to strings */
- g_ptr_array_set_size(res, names->len);
- for (i=0;i<names->len;i++) {
- nameid = g_array_index(names, nameid_t, i);
- new = idx->nameindex->klass->get_key(idx->nameindex, nameid, NULL);
- res->pdata[i] = new;
- }
- g_array_free(names, TRUE);
- return res;
-}
-
-/* find if name contains word */
-static gboolean find_name(struct _IBEXWord *idx, const char *name, const char *word)
-{
- nameid_t wordid, nameid;
- blockid_t nameblock, nametail;
- struct _wordcache *cache;
- int i;
-
- /* lookup the hash key for the name */
- nameid = idx->nameindex->klass->find(idx->nameindex, name, strlen(name));
- /* get the block for this name */
- nameblock = idx->nameindex->klass->get_data(idx->nameindex, nameid, &nametail);
-
- /* check if there is an in-memory cache for this word, check its file there first */
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache) {
- /* freshen cache entry if we touch it */
- ibex_list_remove((struct _listnode *)cache);
- ibex_list_addtail(&idx->wordnodes, (struct _listnode *)cache);
- if (cache->filecount == 1 && cache->filealloc == 0) {
- if (cache->file.file0 == nameid)
- return TRUE;
- } else {
- for (i=0;i<cache->filecount;i++) {
- if (cache->file.files[i] == nameid)
- return TRUE;
- }
- }
- /* not there? well we can use the wordid anyway */
- wordid = cache->wordid;
- } else {
- /* lookup the hash key for word */
- wordid = idx->wordindex->klass->find(idx->wordindex, word, strlen(word));
- }
-
- /* see if wordid is in nameblock */
- return idx->namestore->klass->find(idx->namestore, nameblock, nametail, wordid);
-}
-
-/* cache helper functions */
-/* flush a cache entry to disk, and empty it out */
-static void
-sync_cache_entry(struct _IBEXWord *idx, struct _wordcache *cache)
-{
- GArray array; /* just use this as a header */
- blockid_t oldblock, oldtail;
-
- d(printf("syncing cache entry '%s' used %d\n", cache->word, cache->filecount));
- if (cache->filecount == 1 && cache->filealloc == 0)
- array.data = (char *)&cache->file.file0;
- else
- array.data = (char *)cache->file.files;
- array.len = cache->filecount;
- oldblock = cache->wordblock;
- oldtail = cache->wordtail;
- idx->wordstore->klass->add_list(idx->wordstore, &cache->wordblock, &cache->wordtail, &array);
- if (oldblock != cache->wordblock || oldtail != cache->wordtail) {
- idx->wordindex->klass->set_data(idx->wordindex, cache->wordid, cache->wordblock, cache->wordtail);
- }
- cache->filecount = 0;
-}
-
-/* create a new key in an index, returning its id and head block */
-static void
-add_index_key(struct _IBEXIndex *wordindex, const char *word, nameid_t *wordid, blockid_t *wordblock, blockid_t *wordtail)
-{
- /* initialise cache entry - id of word entry and head block */
- *wordid = wordindex->klass->find(wordindex, word, strlen(word));
- if (*wordid == 0) {
- *wordid = wordindex->klass->insert(wordindex, word, strlen(word));
- *wordblock = 0;
- *wordtail = 0;
- } else {
- *wordblock = wordindex->klass->get_data(wordindex, *wordid, wordtail);
- }
-}
-
-/* create a new key in a cached index (only word cache so far), flushing old keys
- if too much space is being used */
-static struct _wordcache *
-add_index_cache(struct _IBEXWord *idx, const char *word)
-{
- struct _wordcache *cache;
-
- d(printf("adding %s to cache\n", word));
-
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache == 0) {
- /* see if we have to flush off the last entry */
- if (idx->wordcount >= WORDCACHE_SIZE) {
- struct _wordcache *mincache;
- int min, count=0;
- /* remove last entry, and flush it */
- cache = (struct _wordcache *)idx->wordnodes.tailpred;
- mincache = cache;
- min = mincache->filecount;
-
- d(printf("flushing word from cache %s\n", cache->word));
- /* instead of just using the last entry, we try and find an entry with
- with only 1 item (failing that, the smallest in the range we look at) */
- /* this could probably benefit greatly from a more sophisticated aging algorithm */
- while (cache->next && count < 100) {
- if (cache->filecount == 1) {
- mincache = cache;
- break;
- }
- if (cache->filecount > 0 && cache->filecount < min) {
- mincache = cache;
- min = cache->filecount;
- }
- cache = cache->next;
- count++;
- }
- ibex_list_remove((struct _listnode *)mincache);
- g_hash_table_remove(idx->wordcache, mincache->word);
- sync_cache_entry(idx, mincache);
- if (mincache->filealloc)
- g_free(mincache->file.files);
- g_free(mincache);
- idx->wordcount--;
- }
- cache = g_malloc0(sizeof(*cache)+strlen(word));
- /* initialise cache entry - id of word entry and head block */
- add_index_key(idx->wordindex, word, &cache->wordid, &cache->wordblock, &cache->wordtail);
- /* other fields */
- strcpy(cache->word, word);
- cache->filecount = 0;
- g_hash_table_insert(idx->wordcache, cache->word, cache);
- ibex_list_addhead(&idx->wordnodes, (struct _listnode *)cache);
- idx->wordcount++;
- } else {
- /* move cache bucket ot the head of the cache list */
- ibex_list_remove((struct _listnode *)cache);
- ibex_list_addhead(&idx->wordnodes, (struct _listnode *)cache);
- }
- return cache;
-}
-
-/* adds a single word to name (slow) */
-static void add(struct _IBEXWord *idx, const char *name, const char *word)
-{
- nameid_t nameid;
- blockid_t nameblock, newblock, nametail, newtail;
- struct _wordcache *cache;
-
- g_error("Dont use wordindex::add()");
- abort();
-
- cache = add_index_cache(idx, word);
-
- /* get the nameid and block start for this name */
- add_index_key(idx->nameindex, name, &nameid, &nameblock, &nametail);
-
- /* check for repeats of the last name - dont add anything */
- if (cache->filecount == 1 && cache->filealloc == 0) {
- if (cache->file.file0 == nameid)
- return;
- } else {
- if (cache->file.files[cache->filecount] == nameid)
- return;
- }
-
- /* see if we are setting the first, drop it in the union */
- if (cache->filecount == 0 && cache->filealloc == 0) {
- cache->file.file0 = nameid;
- } else if (cache->filecount == 1 && cache->filealloc == 0) {
- nameid_t saveid;
- /* we need to allocate space for words */
- saveid = cache->file.file0;
- cache->file.files = g_malloc(sizeof(cache->file.files[0]) * CACHE_FILE_COUNT);
- /* this could possibly grow as needed, but i wont for now */
- cache->filealloc = CACHE_FILE_COUNT;
- cache->file.files[0] = saveid;
- cache->file.files[1] = nameid;
- } else {
- cache->file.files[cache->filecount] = nameid;
- }
-
- cache->filecount++;
-
- /* if we are full, force a flush now */
- if (cache->filealloc && cache->filecount >= cache->filealloc) {
- sync_cache_entry(idx, cache);
- }
-
- newtail = nametail;
- newblock = nameblock;
- idx->namestore->klass->add(idx->namestore, &newblock, &newtail, cache->wordid);
- if (newblock != nameblock || newtail != nametail) {
- idx->nameindex->klass->set_data(idx->nameindex, nameid, newblock, newtail);
- }
-}
-
-/* adds a bunch of words to a given name */
-static void add_list(struct _IBEXWord *idx, const char *name, GPtrArray *words)
-{
- int i;
- GArray *data = g_array_new(0, 0, sizeof(nameid_t));
- blockid_t nameblock, newblock, nametail, newtail;
- nameid_t nameid;
- struct _wordcache *cache;
-
- d(printf("Adding words to name %s\n", name));
-
- d(cache_sanity((struct _wordcache *)idx->wordnodes.head));
-
- /* get the nameid and block start for this name */
- add_index_key(idx->nameindex, name, &nameid, &nameblock, &nametail);
-
- d(cache_sanity((struct _wordcache *)idx->wordnodes.head));
-
- for (i=0;i<words->len;i++) {
- char *word = words->pdata[i];
-
- cache = add_index_cache(idx, word);
-
- /*d(cache_sanity((struct _wordcache *)idx->wordnodes.head));*/
-
- /* check for duplicates; doesn't catch duplicates over an overflow boundary. Watch me care. */
- if (cache->filecount == 0
- /* the 1 item case */
- || (cache->filecount == 1 && cache->filealloc == 0 && cache->file.file0 != nameid)
- /* the normal case */
- || (cache->filealloc > 0 && cache->file.files[cache->filecount-1] != nameid)) {
-
- /* see if we are setting the first, drop it in the union */
- if (cache->filecount == 0 && cache->filealloc == 0) {
- cache->file.file0 = nameid;
- } else if (cache->filecount == 1 && cache->filealloc == 0) {
- nameid_t saveid;
- /* we need to allocate space for words */
- saveid = cache->file.file0;
- cache->file.files = g_malloc(sizeof(cache->file.files[0]) * CACHE_FILE_COUNT);
- /* this could possibly grow as needed, but i wont for now */
- cache->filealloc = CACHE_FILE_COUNT;
- cache->file.files[0] = saveid;
- cache->file.files[1] = nameid;
- } else {
- cache->file.files[cache->filecount] = nameid;
- }
-
- cache->filecount++;
-
- /* if we are full, force a flush now */
- if (cache->filealloc && cache->filecount >= cache->filealloc) {
- sync_cache_entry(idx, cache);
- }
-
- /*d(cache_sanity((struct _wordcache *)idx->wordnodes.head));*/
-
- /* and append this wordid for this name in memory */
- g_array_append_val(data, cache->wordid);
- }
-
- /*d(cache_sanity((struct _wordcache *)idx->wordnodes.head));*/
- }
-
- d(cache_sanity((struct _wordcache *)idx->wordnodes.head));
-
- /* and append these word id's in one go */
- newblock = nameblock;
- newtail = nametail;
- idx->namestore->klass->add_list(idx->namestore, &newblock, &newtail, data);
- if (newblock != nameblock || newtail != nametail) {
- idx->nameindex->klass->set_data(idx->nameindex, nameid, newblock, newtail);
- }
-
- d(cache_sanity((struct _wordcache *)idx->wordnodes.head));
-
- g_array_free(data, TRUE);
-}
-
-/* sync any in-memory data to disk */
-static int
-word_sync(struct _IBEXWord *idx)
-{
- /* we just flush also, save memory */
- word_flush(idx);
-
-#if 0
- struct _wordcache *cache = (struct _wordcache *)idx->wordnodes.head;
-
- while (cache->next) {
- sync_cache_entry(idx, cache);
- cache = cache->next;
- }
-
- /*ibex_hash_dump(idx->wordindex);*/
- /*ibex_hash_dump(idx->nameindex);*/
-#endif
- return 0;
-}
-
-/* sync and flush any in-memory data to disk and free it */
-static int
-word_flush(struct _IBEXWord *idx)
-{
- struct _wordcache *cache = (struct _wordcache *)idx->wordnodes.head, *next;
- extern int block_log;
- int count= 0;
- int used=0, wasted=0;
-
- block_log = 0;
-
- next = cache->next;
- while (next) {
- count++;
- used += sizeof(struct _wordcache) + (cache->filealloc * sizeof(nameid_t));
- if (cache->filealloc)
- wasted += (cache->filealloc-cache->filecount)*sizeof(nameid_t);
- else
- wasted += (1-cache->filecount) * sizeof(nameid_t);
-
- /*printf("syncing word %s\n", cache->word);*/
- sync_cache_entry(idx, cache);
- g_hash_table_remove(idx->wordcache, cache->word);
- if (cache->filealloc)
- g_free(cache->file.files);
- g_free(cache);
- cache = next;
- next = cache->next;
- }
-
- printf("sync cache entries = %d\n used memory = %d\n wasted memory = %d\n", count, used, wasted);
-
- block_log = 0;
- ibex_list_new(&idx->wordnodes);
- idx->wordcount = 0;
- return 0;
-}
-
-static int word_close(struct _IBEXWord *idx)
-{
- struct _wordcache *cache = (struct _wordcache *)idx->wordnodes.head, *next;
- extern int block_log;
- int count= 0;
- int used=0, wasted=0;
-
- block_log = 0;
-
- next = cache->next;
- while (next) {
- count++;
- used += sizeof(struct _wordcache) + (cache->filealloc * sizeof(nameid_t));
- if (cache->filealloc)
- wasted += (cache->filealloc-cache->filecount)*sizeof(nameid_t);
- else
- wasted += (1-cache->filecount) * sizeof(nameid_t);
-
- /*printf("closing word %s\n", cache->word);*/
- sync_cache_entry(idx, cache);
- if (cache->filealloc)
- g_free(cache->file.files);
- g_free(cache);
- cache = next;
- next = cache->next;
- }
- block_log = 0;
-
- printf("cache entries = %d\n used memory = %d\n wasted memory = %d\n", count, used, wasted);
-
- idx->namestore->klass->close(idx->namestore);
- idx->nameindex->klass->close(idx->nameindex);
- /*same as namestore:
- idx->wordstore->klass->close(idx->wordstore);*/
- idx->wordindex->klass->close(idx->wordindex);
- g_hash_table_destroy(idx->wordcache);
- g_free(idx);
-
- return 0;
-}
diff --git a/libibex/wordindex.h b/libibex/wordindex.h
deleted file mode 100644
index 3a60accb70..0000000000
--- a/libibex/wordindex.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-#ifndef _WORDINDEX_H
-#define _WORDINDEX_H
-
-#include <glib.h>
-
-#include "block.h"
-#include "index.h"
-
-struct _IBEXWord;
-
-/* not used yet */
-typedef void (*IBEXNormaliseFunc)(char *source, int len, char *dest);
-
-struct _IBEXWordClass {
- int (*sync)(struct _IBEXWord *);
- int (*flush)(struct _IBEXWord *);
- int (*close)(struct _IBEXWord *);
-
- void (*index_pre)(struct _IBEXWord *); /* get ready for doing a lot of indexing. may be a nop */
- void (*index_post)(struct _IBEXWord *);
-
- void (*unindex_name)(struct _IBEXWord *, const char *name); /* unindex all entries for name */
- gboolean (*contains_name)(struct _IBEXWord *, const char *name); /* index contains data for name */
- GPtrArray *(*find)(struct _IBEXWord *, const char *word); /* returns all matches for word */
- gboolean (*find_name)(struct _IBEXWord *, const char *name, const char *word); /* find if name contains word */
- void (*add)(struct _IBEXWord *, const char *name, const char *word); /* adds a single word to name */
- void (*add_list)(struct _IBEXWord *, const char *name, GPtrArray *words);/* adds a bunch of words to a given name */
-};
-
-struct _IBEXWord {
- struct _IBEXWordClass *klass;
- struct _IBEXStore *wordstore;
- struct _IBEXIndex *wordindex;
- struct _IBEXStore *namestore;
- struct _IBEXIndex *nameindex;
-
- /* word caching info (should probably be modularised) */
- GHashTable *wordcache; /* word->struct _wordcache mapping */
- struct _list wordnodes; /* LRU list of wordcache structures */
- int wordcount; /* how much space used in cache */
- int precount;
- GHashTable *namecache; /* a list of names (only), cached for quick reference */
- int nameinit;
-};
-
-
-struct _IBEXWord *ibex_create_word_index(struct _memcache *bc, blockid_t *wordroot, blockid_t *nameroot);
-
-/* alternate implemenation */
-struct _IBEXWord *ibex_create_word_index_mem(struct _memcache *bc, blockid_t *wordroot, blockid_t *nameroot);
-
-#endif /* !_WORDINDEX_H */
diff --git a/libibex/wordindexmem.c b/libibex/wordindexmem.c
deleted file mode 100644
index 479e5b0343..0000000000
--- a/libibex/wordindexmem.c
+++ /dev/null
@@ -1,889 +0,0 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
- *
- * Copyright (C) 2000 Helix Code, Inc.
- *
- * Authors: Michael Zucchi <notzed@helixcode.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public License
- * as published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with the Gnome Library; see the file COPYING.LIB. If not,
- * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-/* this is the same as wordindex.c, but it doesn't have an LRU cache
- for word names. it has a lookup tab le that is only loaded if
- index-pre is called, otherwise it always hits disk */
-
-/* code to manage a word index */
-/* includes a cache for word index writes,
- but not for name index writes (currently), or any reads.
-
-Note the word cache is only needed during indexing of lots
-of words, and could then be discarded (:flush()).
-
-*/
-
-#include <stdio.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include <glib.h>
-
-#include "block.h"
-#include "index.h"
-#include "wordindex.h"
-
-/*#define MALLOC_CHECK*/
-
-#ifdef MALLOC_CHECK
-#include <mcheck.h>
-#endif
-
-#define d(x)
-
-/*#define WORDCACHE_SIZE (256)*/
-#define WORDCACHE_SIZE (4096)
-
-extern struct _IBEXStoreClass ibex_diskarray_class;
-extern struct _IBEXIndexClass ibex_hash_class;
-
-/* need 2 types of hash key?
- one that just stores the wordid / wordblock
- and one that stores the filecount/files?
-*/
-
-
-#define CACHE_FILE_COUNT (62)
-
-struct _wordcache {
- nameid_t wordid; /* disk wordid */
- blockid_t wordblock; /* head of disk list */
- blockid_t wordtail; /* and the tail data */
- short filecount; /* how many valid items in files[] */
- short filealloc; /* how much allocated space in files[] */
- union {
- nameid_t *files; /* memory cache of files */
- nameid_t file0; /* if filecount == 1 && filealloc == 0, store directly */
- } file;
- char word[1]; /* actual word follows */
-};
-
-static void unindex_name(struct _IBEXWord *, const char *name); /* unindex all entries for name */
-static gboolean contains_name(struct _IBEXWord *, const char *name); /* index contains data for name */
-static GPtrArray *find(struct _IBEXWord *, const char *word); /* returns all matches for word */
-static gboolean find_name(struct _IBEXWord *, const char *name, const char *word); /* find if name contains word */
-static void add(struct _IBEXWord *, const char *name, const char *word); /* adds a single word to name (slow) */
-static void add_list(struct _IBEXWord *, const char *name, GPtrArray *words);/* adds a bunch of words to a given name */
-static int word_sync(struct _IBEXWord *idx);
-static int word_flush(struct _IBEXWord *idx);
-static int word_close(struct _IBEXWord *idx);
-static void word_index_pre(struct _IBEXWord *idx);
-static void word_index_post(struct _IBEXWord *idx);
-
-static void sync_cache_entry(struct _IBEXWord *idx, struct _wordcache *cache);
-
-struct _IBEXWordClass ibex_word_index_mem_class = {
- word_sync, word_flush, word_close,
- word_index_pre, word_index_post,
- unindex_name, contains_name,
- find, find_name,
- add, add_list
-};
-
-#ifdef MALLOC_CHECK
-static void
-checkmem(void *p)
-{
- if (p) {
- int status = mprobe(p);
-
- switch (status) {
- case MCHECK_HEAD:
- printf("Memory underrun at %p\n", p);
- abort();
- case MCHECK_TAIL:
- printf("Memory overrun at %p\n", p);
- abort();
- case MCHECK_FREE:
- printf("Double free %p\n", p);
- abort();
- }
- }
-}
-#endif
-
-/* this interface isn't the best, but it'll do for now */
-struct _IBEXWord *
-ibex_create_word_index_mem(struct _memcache *bc, blockid_t *wordroot, blockid_t *nameroot)
-{
- struct _IBEXWord *idx;
-
- idx = g_malloc0(sizeof(*idx));
- idx->wordcache = g_hash_table_new(g_str_hash, g_str_equal);
- ibex_list_new(&idx->wordnodes);
- idx->wordcount = 0;
- idx->precount = 0;
- idx->namecache = g_hash_table_new(g_str_hash, g_str_equal);
- idx->nameinit = 0;
- idx->klass = &ibex_word_index_mem_class;
-
- /* we use the same block array storage for both indexes at the moment */
- idx->wordstore = ibex_diskarray_class.create(bc);
- idx->namestore = idx->wordstore;
-
- /* but not the same indexes! */
- if (*wordroot) {
- d(printf("opening wordindex root = %d\n", *wordroot));
- idx->wordindex = ibex_hash_class.open(bc, *wordroot);
- } else {
- idx->wordindex = ibex_hash_class.create(bc, 2048);
- *wordroot = idx->wordindex->root;
- d(printf("creating wordindex root = %d\n", *wordroot));
- }
- if (*nameroot) {
- d(printf("opening nameindex root = %d\n", *nameroot));
- idx->nameindex = ibex_hash_class.open(bc, *nameroot);
- } else {
- idx->nameindex = ibex_hash_class.create(bc, 2048);
- *nameroot = idx->nameindex->root;
- d(printf("creating nameindex root = %d\n", *nameroot));
- }
- return idx;
-}
-
-#if (d(!)0) || defined(MALLOC_CHECK)
-static void
-node_sanity(char *key, struct _wordcache *node, void *data)
-{
- g_assert(node->filecount <= node->filealloc || (node->filecount == 1 && node->filealloc == 0));
- g_assert(strlen(node->word) != 0);
-#ifdef MALLOC_CHECK
- checkmem(node);
- if (node->filealloc)
- checkmem(node->file.files);
-#endif
-}
-
-static void
-cache_sanity(struct _IBEXWord *idx)
-{
-#ifdef MALLOC_CHECK
- checkmem(idx);
-#endif
- g_hash_table_foreach(idx->wordcache, (GHFunc)node_sanity, idx);
-}
-#endif
-
-static void word_index_pre(struct _IBEXWord *idx)
-{
- struct _IBEXCursor *idc;
- struct _wordcache *cache;
- nameid_t wordid;
- char *key;
- int len;
-
- idx->precount ++;
- if (idx->precount > 1)
- return;
-
- /* want to load all words into the cache lookup table */
- d(printf("pre-loading all word info into memory\n"));
- idc = idx->wordindex->klass->get_cursor(idx->wordindex);
- while ( (wordid = idc->klass->next(idc)) ) {
- key = idc->index->klass->get_key(idc->index, wordid, &len);
- /*d(printf("Adding word %s\n", key));*/
- cache = g_malloc0(sizeof(*cache) + strlen(key));
- strcpy(cache->word, key);
- g_free(key);
- cache->wordid = wordid;
- cache->wordblock = idc->index->klass->get_data(idc->index, wordid, &cache->wordtail);
- cache->filecount = 0;
- cache->filealloc = 0;
- g_hash_table_insert(idx->wordcache, cache->word, cache);
- idx->wordcount++;
- }
-
-#ifdef MALLOC_CHECK
- cache_sanity(idx);
-#endif
-
- idc->klass->close(idc);
-
- d(printf("done\n"));
-}
-
-static gboolean
-sync_free_value(void *key, void *value, void *data)
-{
- struct _wordcache *cache = (struct _wordcache *)value;
- struct _IBEXWord *idx = (struct _IBEXWord *)data;
-
- sync_cache_entry(idx, cache);
- if (cache->filealloc)
- g_free(cache->file.files);
- g_free(cache);
-
- return TRUE;
-}
-
-#if d(!)0
-static void
-sync_value(void *key, void *value, void *data)
-{
- struct _wordcache *cache = (struct _wordcache *)value;
- struct _IBEXWord *idx = (struct _IBEXWord *)data;
-
- sync_cache_entry(idx, cache);
-}
-#endif
-
-static void word_index_post(struct _IBEXWord *idx)
-{
- idx->precount--;
- if (idx->precount > 0)
- return;
- idx->precount = 0;
-
-#ifdef MALLOC_CHECK
- cache_sanity(idx);
-#endif
-
- g_hash_table_foreach_remove(idx->wordcache, sync_free_value, idx);
- idx->wordcount = 0;
-}
-
-/* unindex all entries for name */
-static void unindex_name(struct _IBEXWord *idx, const char *name)
-{
- GArray *words;
- int i;
- nameid_t nameid, wordid;
- blockid_t nameblock, wordblock, newblock, nametail, wordtail, newtail;
- char *word;
- struct _wordcache *cache;
-
- d(printf("unindexing %s\n", name));
-
- /* if we have a namecache, check that to see if we need to remove that item, or there is no work here */
- if (idx->nameinit) {
- char *oldkey;
- gboolean oldval;
-
- if (g_hash_table_lookup_extended(idx->namecache, name, (void *)&oldkey, (void *)&oldval)) {
- g_hash_table_remove(idx->namecache, oldkey);
- g_free(oldkey);
- } else {
- return;
- }
- }
-
- /* lookup the hash key */
- nameid = idx->nameindex->klass->find(idx->nameindex, name, strlen(name));
- /* get the block for this key */
- nameblock = idx->nameindex->klass->get_data(idx->nameindex, nameid, &nametail);
- /* and the data for this block */
- words = idx->namestore->klass->get(idx->namestore, nameblock, nametail);
- /* walk it ... */
- for (i=0;i<words->len;i++) {
- /* get the word */
- wordid = g_array_index(words, nameid_t, i);
- d(printf(" word %d\n", wordid));
- /* get the data block */
- wordblock = idx->wordindex->klass->get_data(idx->wordindex, wordid, &wordtail);
- /* clear this name from it */
- newblock = wordblock;
- newtail = wordtail;
- idx->wordstore->klass->remove(idx->wordstore, &newblock, &newtail, nameid);
- if (newblock != wordblock || newtail != wordtail)
- idx->wordindex->klass->set_data(idx->wordindex, wordid, newblock, newtail);
-
- /* now check the cache as well */
- word = idx->nameindex->klass->get_key(idx->wordindex, wordid, NULL);
- if (word) {
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache) {
- /* its there, update our head/tail pointers */
- cache->wordblock = newblock;
- cache->wordtail = newtail;
-
- /* now check that we have a data entry in it */
- if (cache->filealloc == 0 && cache->filecount == 1) {
- if (cache->file.file0 == nameid) {
- cache->filecount = 0;
- }
- } else {
- int j;
-
- for (j=0;j<cache->filecount;j++) {
- if (cache->file.files[j] == nameid) {
- cache->file.files[j] = cache->file.files[cache->filecount-1];
- cache->filecount--;
- break;
- }
- }
- }
- }
- g_free(word);
- }
- }
- g_array_free(words, TRUE);
-
- /* and remove name data and itself */
- idx->namestore->klass->free(idx->namestore, nameblock, nametail);
- idx->nameindex->klass->remove(idx->nameindex, name, strlen(name));
-}
-
-/* index contains (any) data for name */
-static gboolean contains_name(struct _IBEXWord *idx, const char *name)
-{
- struct _IBEXCursor *idc;
- nameid_t wordid;
- char *key;
- int len;
-
- /* load all the names into memory, since we're *usually* about to do a lot of these */
-
- /* Note that because of the (poor) hash algorithm, this is >> faster than
- looking up every key in turn. Basically because all keys are stored packed
- in the same list, not in buckets of keys for the same hash (among other reasons) */
-
- if (!idx->nameinit) {
- d(printf("pre-loading all name info into memory\n"));
- idc = idx->nameindex->klass->get_cursor(idx->nameindex);
- while ( (wordid = idc->klass->next(idc)) ) {
- key = idc->index->klass->get_key(idc->index, wordid, &len);
- g_hash_table_insert(idx->namecache, key, (void *)TRUE);
- }
- idc->klass->close(idc);
- idx->nameinit = TRUE;
- }
-
- return (gboolean)g_hash_table_lookup(idx->namecache, name);
- /*return idx->nameindex->klass->find(idx->nameindex, name, strlen(name)) != 0;*/
-}
-
-/* returns all matches for word */
-static GPtrArray *find(struct _IBEXWord *idx, const char *word)
-{
- nameid_t wordid, nameid;
- GPtrArray *res;
- GArray *names;
- int i;
- char *new;
- struct _wordcache *cache;
- blockid_t wordblock, wordtail;
-
- res = g_ptr_array_new();
-
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache) {
-#if 0
- /* freshen cache entry if we touch it */
- ibex_list_remove((struct _listnode *)cache);
- ibex_list_addtail(&idx->wordnodes, (struct _listnode *)cache);
-#endif
- wordid = cache->wordid;
- wordblock = cache->wordblock;
- wordtail = cache->wordtail;
- } else {
- /* lookup the hash key */
- wordid = idx->wordindex->klass->find(idx->wordindex, word, strlen(word));
- /* get the block for this key */
- wordblock = idx->wordindex->klass->get_data(idx->wordindex, wordid, &wordtail);
- }
- /* and the data for this block */
- names = idx->wordstore->klass->get(idx->wordstore, wordblock, wordtail);
- /* .. including any memory-only data */
- if (cache) {
- if (cache->filealloc == 0 && cache->filecount == 1)
- g_array_append_val(names, cache->file.file0);
- else
- g_array_append_vals(names, cache->file.files, cache->filecount);
- }
-
- /* walk it ... converting id's back to strings */
- g_ptr_array_set_size(res, names->len);
- for (i=0;i<names->len;i++) {
- nameid = g_array_index(names, nameid_t, i);
- new = idx->nameindex->klass->get_key(idx->nameindex, nameid, NULL);
- res->pdata[i] = new;
- }
- g_array_free(names, TRUE);
- return res;
-}
-
-/* find if name contains word */
-static gboolean find_name(struct _IBEXWord *idx, const char *name, const char *word)
-{
- nameid_t wordid, nameid;
- blockid_t nameblock, nametail;
- struct _wordcache *cache;
- int i;
-
- /* if we have the namelist in memory, quick-check that */
- if (idx->nameinit && g_hash_table_lookup(idx->namecache, name) == NULL)
- return FALSE;
-
- /* lookup the hash key for the name */
- nameid = idx->nameindex->klass->find(idx->nameindex, name, strlen(name));
- /* get the block for this name */
- nameblock = idx->nameindex->klass->get_data(idx->nameindex, nameid, &nametail);
-
- /* check if there is an in-memory cache for this word, check its file there first */
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache) {
-#if 0
- /* freshen cache entry if we touch it */
- ibex_list_remove((struct _listnode *)cache);
- ibex_list_addtail(&idx->wordnodes, (struct _listnode *)cache);
-#endif
- if (cache->filecount == 1 && cache->filealloc == 0) {
- if (cache->file.file0 == nameid)
- return TRUE;
- } else {
- for (i=0;i<cache->filecount;i++) {
- if (cache->file.files[i] == nameid)
- return TRUE;
- }
- }
- /* not there? well we can use the wordid anyway */
- wordid = cache->wordid;
- } else {
- /* lookup the hash key for word */
- wordid = idx->wordindex->klass->find(idx->wordindex, word, strlen(word));
- }
-
- /* see if wordid is in nameblock */
- return idx->namestore->klass->find(idx->namestore, nameblock, nametail, wordid);
-}
-
-/* cache helper functions */
-/* flush a cache entry to disk, and empty it out */
-static void
-sync_cache_entry(struct _IBEXWord *idx, struct _wordcache *cache)
-{
- GArray array; /* just use this as a header */
- blockid_t oldblock, oldtail;
-
- if (cache->filecount == 0)
- return;
-
- d(printf("syncing cache entry '%s' used %d\n", cache->word, cache->filecount));
- if (cache->filecount == 1 && cache->filealloc == 0)
- array.data = (char *)&cache->file.file0;
- else
- array.data = (char *)cache->file.files;
- array.len = cache->filecount;
- oldblock = cache->wordblock;
- oldtail = cache->wordtail;
- idx->wordstore->klass->add_list(idx->wordstore, &cache->wordblock, &cache->wordtail, &array);
- if (oldblock != cache->wordblock || oldtail != cache->wordtail) {
- idx->wordindex->klass->set_data(idx->wordindex, cache->wordid, cache->wordblock, cache->wordtail);
- }
- cache->filecount = 0;
-}
-
-/* create a new key in an index, returning its id and head block */
-static void
-add_index_key(struct _IBEXIndex *wordindex, const char *word, nameid_t *wordid, blockid_t *wordblock, blockid_t *wordtail)
-{
- /* initialise cache entry - id of word entry and head block */
- *wordid = wordindex->klass->find(wordindex, word, strlen(word));
-
- if (*wordid == 0) {
- *wordid = wordindex->klass->insert(wordindex, word, strlen(word));
- *wordblock = 0;
- *wordtail = 0;
- } else {
- *wordblock = wordindex->klass->get_data(wordindex, *wordid, wordtail);
- }
-}
-
-/* create a new key in a cached index (only word cache so far), flushing old keys
- if too much space is being used */
-static struct _wordcache *
-add_index_cache(struct _IBEXWord *idx, const char *word)
-{
- struct _wordcache *cache;
-
- cache = g_hash_table_lookup(idx->wordcache, word);
- if (cache == 0) {
- /*d(printf("adding %s to cache\n", word));*/
-
-#if 0
- /* see if we have to flush off the last entry */
- if (idx->wordcount >= WORDCACHE_SIZE) {
- struct _wordcache *mincache;
- int min, count=0;
- /* remove last entry, and flush it */
- cache = (struct _wordcache *)idx->wordnodes.tailpred;
- mincache = cache;
- min = mincache->filecount;
-
- d(printf("flushing word from cache %s\n", cache->word));
- /* instead of just using the last entry, we try and find an entry with
- with only 1 item (failing that, the smallest in the range we look at) */
- /* this could probably benefit greatly from a more sophisticated aging algorithm */
- while (cache->next && count < 100) {
- if (cache->filecount == 1) {
- mincache = cache;
- break;
- }
- if (cache->filecount > 0 && cache->filecount < min) {
- mincache = cache;
- min = cache->filecount;
- }
- cache = cache->next;
- count++;
- }
- ibex_list_remove((struct _listnode *)mincache);
- g_hash_table_remove(idx->wordcache, mincache->word);
- sync_cache_entry(idx, mincache);
- if (mincache->filealloc)
- g_free(mincache->file.files);
- g_free(mincache);
- idx->wordcount--;
- }
-#endif
- cache = g_malloc0(sizeof(*cache)+strlen(word));
- /* if we're in an index state, we can assume we dont have it if we dont have it in memory */
- if (idx->precount == 0) {
- /* initialise cache entry - id of word entry and head block */
- add_index_key(idx->wordindex, word, &cache->wordid, &cache->wordblock, &cache->wordtail);
- } else {
- cache->wordid = idx->wordindex->klass->insert(idx->wordindex, word, strlen(word));
- }
- /* other fields */
- strcpy(cache->word, word);
- cache->filecount = 0;
- g_hash_table_insert(idx->wordcache, cache->word, cache);
-#if 0
- ibex_list_addhead(&idx->wordnodes, (struct _listnode *)cache);
-#endif
- idx->wordcount++;
- } else {
- /*d(printf("already have %s in cache\n", word));*/
-#if 0
- /* move cache bucket ot the head of the cache list */
- ibex_list_remove((struct _listnode *)cache);
- ibex_list_addhead(&idx->wordnodes, (struct _listnode *)cache);
-#endif
- }
- return cache;
-}
-
-/* adds a single word to name (slow) */
-static void add(struct _IBEXWord *idx, const char *name, const char *word)
-{
- nameid_t nameid;
- blockid_t nameblock, newblock, nametail, newtail;
- struct _wordcache *cache;
-
- g_error("Dont use wordindex::add()");
- abort();
-
- cache = add_index_cache(idx, word);
-
- /* get the nameid and block start for this name */
- add_index_key(idx->nameindex, name, &nameid, &nameblock, &nametail);
-
- /* check for repeats of the last name - dont add anything */
- if (cache->filecount == 1 && cache->filealloc == 0) {
- if (cache->file.file0 == nameid)
- return;
- } else {
- if (cache->file.files[cache->filecount] == nameid)
- return;
- }
-
- /* see if we are setting the first, drop it in the union */
- if (cache->filecount == 0 && cache->filealloc == 0) {
- cache->file.file0 = nameid;
- } else if (cache->filecount == 1 && cache->filealloc == 0) {
- nameid_t saveid;
- /* we need to allocate space for words */
- saveid = cache->file.file0;
- cache->file.files = g_malloc(sizeof(cache->file.files[0]) * CACHE_FILE_COUNT);
- /* this could possibly grow as needed, but i wont for now */
- cache->filealloc = CACHE_FILE_COUNT;
- cache->file.files[0] = saveid;
- cache->file.files[1] = nameid;
- } else {
- cache->file.files[cache->filecount] = nameid;
- }
-
- cache->filecount++;
-
- /* if we are full, force a flush now */
- if (cache->filealloc && cache->filecount >= cache->filealloc) {
- sync_cache_entry(idx, cache);
- }
-
- newtail = nametail;
- newblock = nameblock;
- idx->namestore->klass->add(idx->namestore, &newblock, &newtail, cache->wordid);
- if (newblock != nameblock || newtail != nametail) {
- idx->nameindex->klass->set_data(idx->nameindex, nameid, newblock, newtail);
- }
-}
-
-/* adds a bunch of words to a given name */
-static void add_list(struct _IBEXWord *idx, const char *name, GPtrArray *words)
-{
- int i;
- GArray *data = g_array_new(0, 0, sizeof(nameid_t));
- blockid_t nameblock, newblock, nametail, newtail;
- nameid_t nameid;
- struct _wordcache *cache;
-
- d(printf("Adding words to name %s\n", name));
-
- d(cache_sanity(idx));
-
- /* make sure we keep the namecache in sync, if it is active */
- if (idx->nameinit && g_hash_table_lookup(idx->namecache, name) == NULL) {
- g_hash_table_insert(idx->namecache, g_strdup(name), (void *)TRUE);
- /* we know we dont have it in the disk hash either, so we insert anew (saves a lookup) */
- nameid = idx->nameindex->klass->insert(idx->nameindex, name, strlen(name));
- nameblock = 0;
- nametail = 0;
- } else {
- /* get the nameid and block start for this name */
- add_index_key(idx->nameindex, name, &nameid, &nameblock, &nametail);
- }
-
- d(cache_sanity(idx));
-
- for (i=0;i<words->len;i++) {
- char *word = words->pdata[i];
-
- cache = add_index_cache(idx, word);
-
- /*d(cache_sanity(idx));*/
-
- /* check for duplicates; doesn't catch duplicates over an overflow boundary. Watch me care. */
- if (cache->filecount == 0
- /* the 1 item case */
- || (cache->filecount == 1 && cache->filealloc == 0 && cache->file.file0 != nameid)
- /* the normal case */
- || (cache->filealloc > 0 && cache->file.files[cache->filecount-1] != nameid)) {
-
- /* see if we are setting the first, drop it in the union */
- if (cache->filecount == 0 && cache->filealloc == 0) {
- cache->file.file0 = nameid;
- } else if (cache->filecount == 1 && cache->filealloc == 0) {
- nameid_t saveid;
- /* we need to allocate space for words */
- saveid = cache->file.file0;
- cache->file.files = g_malloc(sizeof(cache->file.files[0]) * CACHE_FILE_COUNT);
- /* this could possibly grow as needed, but i wont for now */
- cache->filealloc = CACHE_FILE_COUNT;
- cache->file.files[0] = saveid;
- cache->file.files[1] = nameid;
- } else {
- cache->file.files[cache->filecount] = nameid;
- }
-
- cache->filecount++;
-
- /* if we are full, force a flush now */
- if (cache->filealloc && cache->filecount >= cache->filealloc) {
- sync_cache_entry(idx, cache);
- }
-
- /*d(cache_sanity(idx));*/
-
- /* and append this wordid for this name in memory */
- g_array_append_val(data, cache->wordid);
- }
-
- /*d(cache_sanity(idx));*/
- }
-
- d(cache_sanity(idx));
-
- /* and append these word id's in one go */
- newblock = nameblock;
- newtail = nametail;
- idx->namestore->klass->add_list(idx->namestore, &newblock, &newtail, data);
- if (newblock != nameblock || newtail != nametail) {
- idx->nameindex->klass->set_data(idx->nameindex, nameid, newblock, newtail);
- }
-
- d(cache_sanity(idx));
-
- g_array_free(data, TRUE);
-}
-
-/* sync any in-memory data to disk */
-static int
-word_sync(struct _IBEXWord *idx)
-{
- /* we just flush also, save memory */
- word_flush(idx);
-
-#if 0
- struct _wordcache *cache = (struct _wordcache *)idx->wordnodes.head;
-
- while (cache->next) {
- sync_cache_entry(idx, cache);
- cache = cache->next;
- }
-
- /*ibex_hash_dump(idx->wordindex);*/
- /*ibex_hash_dump(idx->nameindex);*/
-#endif
- return 0;
-}
-
-static gboolean
-free_key(void *key, void *value, void *data)
-{
- g_free(key);
-
- return TRUE;
-}
-
-/* sync and flush any in-memory data to disk and free it */
-static int
-word_flush(struct _IBEXWord *idx)
-{
- d(cache_sanity(idx));
-
- g_hash_table_foreach_remove(idx->wordcache, sync_free_value, idx);
- idx->wordcount = 0;
- if (idx->nameinit) {
- g_hash_table_foreach_remove(idx->namecache, free_key, NULL);
- idx->nameinit = FALSE;
- }
- return 0;
-}
-
-static int word_close(struct _IBEXWord *idx)
-{
- idx->klass->flush(idx);
-
- idx->namestore->klass->close(idx->namestore);
- idx->nameindex->klass->close(idx->nameindex);
- /*same as namestore:
- idx->wordstore->klass->close(idx->wordstore);*/
- idx->wordindex->klass->close(idx->wordindex);
- g_hash_table_destroy(idx->wordcache);
- g_hash_table_destroy(idx->namecache);
- g_free(idx);
-
- return 0;
-}
-
-/* debugging/tuning function */
-
-struct _stats {
- int memcache; /* total memory used by cache entries */
- int memfile; /* total mem ysed by file data */
- int memfileused; /* actual memory used by file data */
- int memword; /* total mem used by words */
- int file1; /* total file entries with only 1 entry */
- int total;
-};
-
-static void
-get_info(void *key, void *value, void *data)
-{
- struct _wordcache *cache = (struct _wordcache *)value;
- struct _stats *stats = (struct _stats *)data;
-
- /* round up to probable alignment, + malloc overheads */
- stats->memcache += ((sizeof(struct _wordcache) + strlen(cache->word) + 4 + 3) & ~3);
- if (cache->filealloc > 0) {
- /* size of file array data */
- stats->memcache += sizeof(nameid_t) * cache->filealloc + 4;
- /* actual used memory */
- stats->memfile += sizeof(nameid_t) * cache->filealloc;
- stats->memfileused += sizeof(nameid_t) * cache->filecount;
- }
- if (cache->filecount == 1 && cache->filealloc == 0)
- stats->file1++;
-
- stats->memword += strlen(cache->word);
- stats->total++;
-}
-
-static char *
-num(int num)
-{
- int n;
- static char buf[256];
- char *p = buf;
- char type = 0;
-
- n = num;
- if (n>1000000) {
- p+= sprintf(p, "%d ", n/1000000);
- n -= (n/1000000)*1000000;
- type = 'M';
- }
- if (n>1000) {
- if (num>1000000)
- p+= sprintf(p, "%03d ", n/1000);
- else
- p+= sprintf(p, "%d ", n/1000);
- n -= (n/1000)*1000;
- if (type == 0)
- type = 'K';
- }
- if (num > 1000)
- p += sprintf(p, "%03d", n);
- else
- p += sprintf(p, "%d", n);
-
- n = num;
- switch (type) {
- case 'M':
- p += sprintf(p, ", %d.%02dM", n/1024/1024, n*100/1024/1024);
- break;
- case 'K':
- p += sprintf(p, ", %d.%02dK", n/1024, n*100/1024);
- break;
- case 0:
- break;
- }
-
- return buf;
-}
-
-void word_index_mem_dump_info(struct _IBEXWord *idx);
-
-void word_index_mem_dump_info(struct _IBEXWord *idx)
-{
- struct _stats stats = { 0 };
- int useful;
-
- g_hash_table_foreach(idx->wordcache, get_info, &stats);
-
- useful = stats.total * sizeof(struct _wordcache) + stats.memword + stats.memfile;
-
- printf("Word Index Stats:\n");
- printf("Total word count: %d\n", stats.total);
- printf("Total memory used: %s\n", num(stats.memcache));
- printf("Total useful memory: %s\n", num(useful));
- printf("Total malloc/alignment overhead: %s\n", num(stats.memcache - useful));
- printf("Total buffer overhead: %s\n", num(stats.memfile - stats.memfileused));
- printf("Space taken by words: %s\n", num(stats.memword + stats.total));
- printf("Number of 1-word entries: %s\n", num(stats.file1));
- if (stats.memcache > 0)
- printf("%% unused space: %d %%\n", (stats.memfile - stats.memfileused) * 100 / stats.memcache);
-}
-