aboutsummaryrefslogtreecommitdiffstats
path: root/libibex/ibex_block.c
diff options
context:
space:
mode:
Diffstat (limited to 'libibex/ibex_block.c')
-rw-r--r--libibex/ibex_block.c57
1 files changed, 14 insertions, 43 deletions
diff --git a/libibex/ibex_block.c b/libibex/ibex_block.c
index faeee232ac..c1aad5e78a 100644
--- a/libibex/ibex_block.c
+++ b/libibex/ibex_block.c
@@ -2,7 +2,7 @@
#include <glib.h>
#include <stdio.h>
-#include <unicode.h>
+#include <gal/unicode/gunicode.h>
#include <ctype.h>
#include <string.h>
#include <errno.h>
@@ -46,7 +46,7 @@ static void
ibex_normalise_word(char *start, char *end, char *buf)
{
unsigned char *s, *d;
- unicode_char_t uc;
+ gunichar uc;
s = (unsigned char *)start;
d = (unsigned char *)buf;
@@ -59,7 +59,8 @@ ibex_normalise_word(char *start, char *end, char *buf)
*d++ = tolower (*s);
s++;
} else {
- char *next = unicode_get_utf8 (s, &uc);
+ char *next = g_utf8_next_char (s);
+ uc = g_utf8_get_char (s);
if (uc >= 0xc0 && uc < 0xc0 + sizeof (utf8_trans)) {
signed char ch = utf8_trans[uc - 0xc0];
if (ch > 0)
@@ -80,54 +81,24 @@ ibex_normalise_word(char *start, char *end, char *buf)
enum { IBEX_ALPHA, IBEX_NONALPHA, IBEX_INVALID, IBEX_INCOMPLETE };
-/* This incorporates parts of libunicode, because there's no way to
- * force libunicode to not read past a certain point.
- */
static int
-utf8_category (char *sp, char **snp, char *send)
+utf8_category (char *p, char **np, char *end)
{
- unsigned char *p = (unsigned char *)sp, **np = (unsigned char **)snp;
- unsigned char *end = (unsigned char *)send;
-
- if (isascii (*p)) {
+ if (isascii ((unsigned char)*p)) {
*np = p + 1;
- if (isalpha (*p) || *p == '\'')
+ if (isalpha ((unsigned char)*p) || *p == '\'')
return IBEX_ALPHA;
return IBEX_NONALPHA;
} else {
- unicode_char_t uc;
- int more;
-
- if ((*p & 0xe0) == 0xc0) {
- more = 1;
- uc = *p & 0x1f;
- } else if ((*p & 0xf0) == 0xe0) {
- more = 2;
- uc = *p & 0x0f;
- } else if ((*p & 0xf8) == 0xf0) {
- more = 3;
- uc = *p & 0x07;
- } else if ((*p & 0xfc) == 0xf8) {
- more = 4;
- uc = *p & 0x03;
- } else if ((*p & 0xfe) == 0xfc) {
- more = 5;
- uc = *p & 0x01;
- } else
- return IBEX_INVALID;
+ gunichar uc;
- if (p + more > end)
+ *np = g_utf8_find_next_char (p, end);
+ if (!*np)
return IBEX_INCOMPLETE;
-
- while (more--) {
- if ((*++p & 0xc0) != 0x80)
- return IBEX_INVALID;
- uc <<= 6;
- uc |= *p & 0x3f;
- }
-
- *np = p + 1;
- if (unicode_isalpha (uc))
+ uc = g_utf8_get_char (p);
+ if (uc == (gunichar) -1)
+ return IBEX_INVALID;
+ else if (g_unichar_isalpha (uc))
return IBEX_ALPHA;
else
return IBEX_NONALPHA;