diff options
Diffstat (limited to 'e-util/ename')
-rw-r--r-- | e-util/ename/.cvsignore | 8 | ||||
-rw-r--r-- | e-util/ename/Makefile.am | 51 | ||||
-rw-r--r-- | e-util/ename/TODO | 2 | ||||
-rw-r--r-- | e-util/ename/e-address-western.c | 446 | ||||
-rw-r--r-- | e-util/ename/e-address-western.h | 21 | ||||
-rw-r--r-- | e-util/ename/e-name-western-tables.h | 74 | ||||
-rw-r--r-- | e-util/ename/e-name-western.c | 958 | ||||
-rw-r--r-- | e-util/ename/e-name-western.h | 21 | ||||
-rw-r--r-- | e-util/ename/test-ename-western-gtk.c | 157 | ||||
-rw-r--r-- | e-util/ename/test-ename-western.c | 71 |
10 files changed, 0 insertions, 1809 deletions
diff --git a/e-util/ename/.cvsignore b/e-util/ename/.cvsignore deleted file mode 100644 index 4f8c173841..0000000000 --- a/e-util/ename/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.deps -.libs -Makefile -Makefile.in -*.lo -*.la -test-ename-western -test-ename-western-gtk diff --git a/e-util/ename/Makefile.am b/e-util/ename/Makefile.am deleted file mode 100644 index 480502141e..0000000000 --- a/e-util/ename/Makefile.am +++ /dev/null @@ -1,51 +0,0 @@ -INCLUDES = \ - -DG_LOG_DOMAIN=\"EName\" \ - -I$(srcdir) \ - -I$(srcdir)/.. \ - -I$(top_srcdir) \ - -I. \ - -I.. \ - -I$(top_builddir) \ - -I$(includedir) \ - $(E_NAME_CFLAGS) - -ename_libs = \ - libename.la \ - $(E_NAME_LIBS) - -lib_LTLIBRARIES = libename.la - -libename_la_SOURCES = \ - e-address-western.c \ - e-name-western.c - -libenameincludedir = $(includedir)/ename - -libenameinclude_HEADERS = \ - e-address-western.h \ - e-name-western-tables.h \ - e-name-western.h - - -noinst_LTLIBRARIES = libename-static.la -libename_static_la_SOURCES = $(libename_la_SOURCES) -libename_static_la_LDFLAGS = --all-static - - -noinst_PROGRAMS = \ - test-ename-western \ - test-ename-western-gtk - -test_ename_western_SOURCES = \ - test-ename-western.c - -test_ename_western_LDADD = \ - $(ename_libs) - -test_ename_western_gtk_SOURCES = \ - test-ename-western-gtk.c - -test_ename_western_gtk_LDADD = \ - $(ename_libs) \ - $(E_UTIL_LIBS) \ - $(top_builddir)/e-util/libeutil.la diff --git a/e-util/ename/TODO b/e-util/ename/TODO deleted file mode 100644 index 669661eea7..0000000000 --- a/e-util/ename/TODO +++ /dev/null @@ -1,2 +0,0 @@ -* Support other naming systems. -* Handle misspelled suffixes better. diff --git a/e-util/ename/e-address-western.c b/e-util/ename/e-address-western.c deleted file mode 100644 index f2200a9dba..0000000000 --- a/e-util/ename/e-address-western.c +++ /dev/null @@ -1,446 +0,0 @@ -/* -------------------------------------------------- - - An address parser, yielding fields as per RFC 2426. - - Author: - Jesse Pavel (jpavel@ximian.com) - - Copyright 2000, Ximian, Inc. - -------------------------------------------------- -*/ - -#include <ctype.h> -#include <string.h> -#include <glib.h> - -#ifdef E_ADDRESS_WESTERN_TEST - -#include "e-address-western.h" - -#else - -#include <ename/e-address-western.h> -#include <gal/util/e-util.h> - -#endif - -/* These are the keywords that will distinguish the start of an extended - address. */ - -static char *extended_keywords[] = { - "apt", "apartment", "suite", NULL -}; - - - -static gboolean -e_address_western_is_line_blank (gchar *line) -{ - gboolean blank = TRUE; - gint cntr; - - /* A blank line consists of whitespace only, or a NULL line. */ - for (cntr = 0; line[cntr] != '\0'; cntr++ ) { - if (!isspace(line[cntr])) { - blank = FALSE; - break; - } - } - - return blank; -} - - - -/* In the array of lines, `lines', we will erase the line at line_num, and - shift the remaining lines, up to line number num_lines, up one position. */ - -static void -e_address_western_shift_line (gchar *lines[], gint line_num, gint num_lines) -{ - gint cntr; - - if (line_num >= (num_lines - 1)) { - /* It is the last line, so simply shift in a NULL. */ - lines[line_num] = NULL; - } - else { - for (cntr = line_num; cntr < num_lines; cntr++) - lines[cntr] = lines[cntr + 1]; - } -} - - -static void -e_address_western_remove_blank_lines (gchar *lines[], gint *linecntr) -{ - gint cntr; - - for (cntr = 0; cntr < *linecntr; cntr++) { - if (e_address_western_is_line_blank (lines[cntr])) { - /* Delete the blank line, and shift all subsequent lines up - one spot to fill its old spot. */ - e_address_western_shift_line (lines, cntr, *linecntr); - - /* Since we must check the newly shifted line, let's - not advance the counter on this next pass. */ - cntr--; - - /* There is now one less line, total. */ - *linecntr -= 1; - } - } -} - - -static gboolean -e_address_western_is_po_box (gchar *line) -{ - gboolean retval = FALSE; - - /* In which phase of processing are we? */ - enum State { FIRSTCHAR, SECONDCHAR, WHITESPACE } state; - - - /* If the first two letters of the line are `p' and `o', and these - are in turn followed by whitespace before another letter, then I - will deem the line a representation of a PO Box address. */ - - gint cntr; - - state = FIRSTCHAR; - for (cntr = 0; line[cntr] != '\0'; cntr++) { - if (state == FIRSTCHAR) { - if (isalnum(line[cntr])) { - if (tolower(line[cntr]) == 'p') - state = SECONDCHAR; - else { - retval = FALSE; - break; - } - } - } - else if (state == SECONDCHAR) { - if (isalnum (line[cntr])) { - if (tolower(line[cntr]) == 'o') - state = WHITESPACE; - else { - retval = FALSE; - break; - } - } - } - else if (state == WHITESPACE) { - if (isspace (line[cntr])) { - retval = TRUE; - break; - } - else if (isalnum (line[cntr])) { - retval = FALSE; - break; - } - } - } - - return retval; -} - -/* A line that contains a comma followed eventually by a number is - deemed to be the line in the form of <town, region postal-code>. */ - -static gboolean -e_address_western_is_postal (guchar *line) -{ - gboolean retval; - int cntr; - - if (strchr (line, ',') == NULL) - retval = FALSE; /* No comma. */ - else { - int index; - - /* Ensure that the first character after the comma is - a letter. */ - index = strcspn (line, ","); - index++; - while (isspace(line[index])) - index++; - - if (!isalpha (line[index])) - return FALSE; /* FIXME: ugly control flow. */ - - cntr = strlen(line) - 1; - - /* Go to the character immediately following the last - whitespace character. */ - while (cntr >= 0 && isspace(line[cntr])) - cntr--; - - while (cntr >= 0 && !isspace(line[cntr])) - cntr--; - - if (cntr == 0) - retval = FALSE; - else { - if (isdigit (line[cntr+1])) - retval = TRUE; - else - retval = FALSE; - } - } - - return retval; -} - -static gchar * -e_address_western_extract_po_box (gchar *line) -{ - /* Return everything from the beginning of the line to - the end of the first word that contains a number. */ - - int index; - - index = 0; - while (!isdigit(line[index])) - index++; - - while (isgraph(line[index])) - index++; - - return g_strndup (line, index); -} - -static gchar * -e_address_western_extract_locality (gchar *line) -{ - gint index; - - /* Everything before the comma is the locality. */ - index = strcspn(line, ","); - - if (index == 0) - return NULL; - else - return g_strndup (line, index); -} - - -/* Whatever resides between the comma and the start of the - postal code is deemed to be the region. */ - -static gchar * -e_address_western_extract_region (gchar *line) -{ - gint start, end; - - start = strcspn (line, ","); - start++; - while (isspace(line[start])) - start++; - - end = strlen(line) - 1; - while (isspace (line[end])) - end--; - - while (!isspace (line[end])) - end--; - - while (isspace (line[end])) - end--; - end++; - - /* Between start and end lie the string. */ - return g_strndup ( (line+start), end-start); -} - -static gchar * -e_address_western_extract_postal_code (gchar *line) -{ - int start, end; - - end = strlen (line) - 1; - while (isspace(line[end])) - end--; - - start = end; - end++; - - while (!isspace(line[start])) - start--; - start++; - - /* Between start and end lie the string. */ - return g_strndup ( (line+start), end-start); -} - - - -static void -e_address_western_extract_street (gchar *line, gchar **street, gchar **extended) -{ - const gchar *split = NULL; - gint cntr; - - for (cntr = 0; extended_keywords[cntr] != NULL; cntr++) { - split = e_strstrcase (line, extended_keywords[cntr]); - if (split != NULL) - break; - } - - if (split != NULL) { - *street = g_strndup (line, (split - line)); - *extended = g_strdup (split); - } - else { - *street = g_strdup (line); - *extended = NULL; - } - -} - - - -EAddressWestern * -e_address_western_parse (const gchar *in_address) -{ - gchar **lines; - gint linecntr, lineindex; - gchar *address; - gint cntr; - gboolean found_po_box, found_postal; - - EAddressWestern *eaw; -#if 0 - gint start, end; /* To be used to classify address lines. */ -#endif - - if (in_address == NULL) - return NULL; - - eaw = (EAddressWestern *)g_malloc (sizeof(EAddressWestern)); - eaw->po_box = NULL; - eaw->extended = NULL; - eaw->street = NULL; - eaw->locality = NULL; - eaw->region = NULL; - eaw->postal_code = NULL; - eaw->country = NULL; - - address = g_strndup (in_address, 2047); - - /* The first thing I'll do is divide the multiline input string - into lines. */ - - /* ... count the lines. */ - linecntr = 1; - lineindex = 0; - while (address[lineindex] != '\0') { - if (address[lineindex] == '\n') - linecntr++; - - lineindex++; - } - - /* ... tally them. */ - lines = (gchar **)g_malloc (sizeof(gchar *) * (linecntr+3)); - lineindex = 0; - lines[0] = &address[0]; - linecntr = 1; - while (address[lineindex] != '\0') { - if (address[lineindex] == '\n') { - lines[linecntr] = &address[lineindex + 1]; - linecntr++; - } - - lineindex++; - } - - /* Convert the newlines at the end of each line (except the last, - because it is already NULL terminated) to NULLs. */ - for (cntr = 0; cntr < (linecntr - 1); cntr++) { - *(strchr (lines[cntr], '\n')) = '\0'; - } - - e_address_western_remove_blank_lines (lines, &linecntr); - - /* Let's just test these functions. */ - found_po_box = FALSE; - found_postal = FALSE; - - for (cntr = 0; cntr < linecntr; cntr++) { - if (e_address_western_is_po_box (lines[cntr])) { - if (eaw->po_box == NULL) - eaw->po_box = e_address_western_extract_po_box (lines[cntr]); - found_po_box = TRUE; - } - else if (e_address_western_is_postal (lines[cntr])) { - if (eaw->locality == NULL) - eaw->locality = e_address_western_extract_locality (lines[cntr]); - if (eaw->region == NULL) - eaw->region = e_address_western_extract_region (lines[cntr]); - if (eaw->postal_code == NULL) - eaw->postal_code = e_address_western_extract_postal_code (lines[cntr]); - found_postal = TRUE; - } - else { - if (found_postal) { - if (eaw->country == NULL) - eaw->country = g_strdup (lines[cntr]); - else { - gchar *temp; - temp = g_strconcat (eaw->country, "\n", lines[cntr], NULL); - g_free (eaw->country); - eaw->country = temp; - } - } - else { - if (eaw->street == NULL) { - e_address_western_extract_street (lines[cntr], &eaw->street, - &eaw->extended ); - } - else { - if (eaw->extended == NULL) { - eaw->extended = g_strdup (lines[cntr]); - } - else { - gchar *temp; - temp = g_strconcat (eaw->extended, "\n", lines[cntr], NULL); - g_free (eaw->extended); - eaw->extended = temp; - } - } - } - } - } - - g_free (lines); - g_free (address); - - return eaw; -} - - -void -e_address_western_free (EAddressWestern *eaw) -{ - if (eaw == NULL) - return; - - if (eaw->po_box != NULL) - g_free(eaw->po_box); - if (eaw->extended != NULL) - g_free(eaw->extended); - if (eaw->street != NULL) - g_free(eaw->street); - if (eaw->locality != NULL) - g_free(eaw->locality); - if (eaw->region != NULL) - g_free(eaw->region); - if (eaw->postal_code != NULL) - g_free(eaw->postal_code); - if (eaw->country != NULL) - g_free(eaw->country); - - g_free (eaw); -} - diff --git a/e-util/ename/e-address-western.h b/e-util/ename/e-address-western.h deleted file mode 100644 index e6417f88c4..0000000000 --- a/e-util/ename/e-address-western.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __E_ADDRESS_WESTERN_H__ -#define __E_ADDRESS_WESTERN_H__ - -typedef struct { - - /* Public */ - char *po_box; - char *extended; /* I'm not sure what this is. */ - char *street; - char *locality; /* For example, the city or town. */ - char *region; /* The state or province. */ - char *postal_code; - char *country; -} EAddressWestern; - -EAddressWestern *e_address_western_parse (const char *address); -void e_address_western_free (EAddressWestern *eaw); - -#endif /* ! __E_ADDRESS_WESTERN_H__ */ - - diff --git a/e-util/ename/e-name-western-tables.h b/e-util/ename/e-name-western-tables.h deleted file mode 100644 index b5459049fa..0000000000 --- a/e-util/ename/e-name-western-tables.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef __E_NAME_WESTERN_TABLES_H__ -#define __E_NAME_WESTERN_TABLES_H__ - -char *e_name_western_pfx_table[] = { - - /* - * English. - */ - "mister", "miss.", "mr.", "mrs.", "ms.", - "miss", "mr", "mrs", "ms", "sir", - "professor", "prof.", "dr", "dr.", "doctor", - "judge", "justice", "chief justice", - "congressman", "congresswoman", "commander", - "lieutenant", "lt.", "colonel", "col.", "major", "maj.", - "general", "gen.", "admiral", "admr.", "sergeant", "sgt.", - "lord", "lady", "baron", "baroness", "duke", "duchess", - "king", "queen", "prince", "princess", - - "the most honorable", "the honorable", - "the reverend", "his holiness", - "his eminence", "his majesty", "her majesty", - "his grace", "her grace", - - "president", "vice president", "secretary", "undersecretary", - "consul", "ambassador", - - "senator", "saint", "st.", "pastor", "deacon", - "father", "bishop", "archbishop", "cardinal", "pope", - "reverend", "rev.", "rabbi", - - /* - * French. - */ - "monsieur", "m.", "mademoiselle", "melle", - "madame", "mme", "professeur", "dauphin", "dauphine", - - /* - * German - */ - "herr", "frau", "fraulein", "herr doktor", "doktor frau", "doktor frau doktor", - "frau doktor", - - - /* - * Spanish. - */ - "senor", "senora", "sra.", "senorita", "srita.", - - NULL}; - -char *e_name_western_sfx_table[] = { - - /* - * English. - */ - "junior", "senior", "jr", "sr", "I", "II", "III", "IV", "V", - "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", - "XV", "XVI", "XVII", "XVIII", "XIX", "XX", "XXI", "XXII", - "phd", "ms", "md", "esq", "esq.", "esquire", - - NULL}; - -char *e_name_western_twopart_sfx_table[] = { - - /* - * English. - */ - "the first", "the second", "the third", - - NULL}; - -char *e_name_western_complex_last_table[] = {"van", "von", "de", "di", NULL}; - -#endif /* ! __E_NAME_WESTERN_TABLES_H__ */ diff --git a/e-util/ename/e-name-western.c b/e-util/ename/e-name-western.c deleted file mode 100644 index b7b2459762..0000000000 --- a/e-util/ename/e-name-western.c +++ /dev/null @@ -1,958 +0,0 @@ -/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ -/* - * A simple Western name parser. - * - * <Nat> Jamie, do you know anything about name parsing? - * <jwz> Are you going down that rat hole? Bring a flashlight. - * - * Authors: - * Nat Friedman <nat@ximian.com> - * - * Copyright 1999 - 2001, Ximian, Inc. - */ - -#include <ctype.h> -#include <string.h> -#include <glib.h> - -#include <ename/e-name-western.h> -#include <ename/e-name-western-tables.h> - -typedef struct { - int prefix_idx; - int first_idx; - int middle_idx; - int nick_idx; - int last_idx; - int suffix_idx; -} ENameWesternIdxs; - -static int -e_name_western_str_count_words (char *str) -{ - int word_count; - char *p; - - word_count = 0; - - for (p = str; p != NULL; p = strchr (p, ' ')) { - word_count ++; - p ++; - } - - return word_count; -} - -static void -e_name_western_cleanup_string (char **str) -{ - char *newstr; - char *p; - - if (*str == NULL) - return; - - /* skip any spaces and commas at the start of the string */ - p = *str; - while (isspace ((unsigned char)*p) || *p == ',') - p ++; - - /* make the copy we're going to return */ - newstr = g_strdup (p); - - if ( strlen(newstr) > 0) { - /* now search from the back, skipping over any spaces and commas */ - p = newstr + strlen (newstr) - 1; - while (isspace ((unsigned char)*p) || *p == ',') - p --; - /* advance p to after the character that caused us to exit the - previous loop, and end the string. */ - if ((! isspace ((unsigned char)*p)) && *p != ',') - p ++; - *p = '\0'; - } - - g_free (*str); - *str = newstr; -} - -static char * -e_name_western_get_words_at_idx (char *str, int idx, int num_words) -{ - char *words; - char *p; - int word_count; - int words_len; - - /* - * Walk to the end of the words. - */ - word_count = 0; - p = str + idx; - while (word_count < num_words && *p != '\0') { - while (! isspace ((unsigned char)*p) && *p != '\0') - p ++; - - while (isspace ((unsigned char)*p) && *p != '\0') - p ++; - - word_count ++; - } - - words_len = p - str - idx - 1; - - if (*p == '\0') - words_len ++; - - words = g_malloc0 (1 + words_len); - strncpy (words, str + idx, words_len); - - return words; -} - -/* - * What the fuck is wrong with glib's MAX macro. - */ -static int -e_name_western_max (const int a, const int b) -{ - if (a > b) - return a; - - return b; -} - -static gboolean -e_name_western_word_is_suffix (char *word) -{ - int i; - - for (i = 0; e_name_western_sfx_table [i] != NULL; i ++) { - int length = strlen (e_name_western_sfx_table [i]); - if (!g_strcasecmp (word, e_name_western_sfx_table [i]) || - ( !g_strncasecmp (word, e_name_western_sfx_table [i], length) && - strlen(word) == length + 1 && - word[length] == '.' )) - return TRUE; - } - - return FALSE; -} - -static char * -e_name_western_get_one_prefix_at_str (char *str) -{ - char *word; - int i; - - /* - * Check for prefixes from our table. - */ - for (i = 0; e_name_western_pfx_table [i] != NULL; i ++) { - int pfx_words; - char *words; - - pfx_words = e_name_western_str_count_words (e_name_western_pfx_table [i]); - words = e_name_western_get_words_at_idx (str, 0, pfx_words); - - if (! g_strcasecmp (words, e_name_western_pfx_table [i])) - return words; - - g_free (words); - } - - /* - * Check for prefixes we don't know about. These are always a - * sequence of more than one letters followed by a period. - */ - word = e_name_western_get_words_at_idx (str, 0, 1); - - if (strlen (word) > 2 && - isalpha ((unsigned char) word [0]) && - isalpha ((unsigned char) word [1]) && - word [strlen (word) - 1] == '.') - return word; - - g_free (word); - - return NULL; -} - -static char * -e_name_western_get_prefix_at_str (char *str) -{ - char *pfx; - char *pfx1; - char *pfx2; - char *p; - - /* Get the first prefix. */ - pfx1 = e_name_western_get_one_prefix_at_str (str); - - if (pfx1 == NULL) - return NULL; - - /* Check for a second prefix. */ - p = str + strlen (pfx1); - while (isspace ((unsigned char)*p) && *p != '\0') - p ++; - - pfx2 = e_name_western_get_one_prefix_at_str (p); - - if (pfx2 != NULL) { - int pfx_len; - - pfx_len = (p + strlen (pfx2)) - str; - pfx = g_malloc0 (pfx_len + 1); - strncpy (pfx, str, pfx_len); - } else { - pfx = g_strdup (pfx1); - } - - g_free (pfx1); - g_free (pfx2); - - return pfx; -} - -static void -e_name_western_extract_prefix (ENameWestern *name, ENameWesternIdxs *idxs) -{ - char *pfx; - - pfx = e_name_western_get_prefix_at_str (name->full); - - if (pfx == NULL) - return; - - idxs->prefix_idx = 0; - name->prefix = pfx; -} - -static gboolean -e_name_western_is_complex_last_beginning (char *word) -{ - int i; - - for (i = 0; e_name_western_complex_last_table [i] != NULL; i ++) { - - if (! g_strcasecmp ( - word, e_name_western_complex_last_table [i])) - return TRUE; - } - - return FALSE; -} - -static void -e_name_western_extract_first (ENameWestern *name, ENameWesternIdxs *idxs) -{ - /* - * If there's a prefix, then the first name is right after it. - */ - if (idxs->prefix_idx != -1) { - int first_idx; - char *p; - - first_idx = idxs->prefix_idx + strlen (name->prefix); - - /* Skip past white space. */ - p = name->full + first_idx; - while (isspace ((unsigned char)*p) && *p != '\0') - p++; - - if (*p == '\0') - return; - - idxs->first_idx = p - name->full; - name->first = e_name_western_get_words_at_idx ( - name->full, idxs->first_idx, 1); - - } else { - - /* - * Otherwise, the first name is probably the first string. - */ - idxs->first_idx = 0; - name->first = e_name_western_get_words_at_idx ( - name->full, idxs->first_idx, 1); - } - - /* - * Check that we didn't just assign the beginning of a - * compound last name to the first name. - */ - if (name->first != NULL) { - if (e_name_western_is_complex_last_beginning (name->first)) { - g_free (name->first); - name->first = NULL; - idxs->first_idx = -1; - } - } -} - -static void -e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs) -{ - char *word; - int middle_idx; - - /* - * Middle names can only exist if you have a first name. - */ - if (idxs->first_idx == -1) - return; - - middle_idx = idxs->first_idx + strlen (name->first) + 1; - - if (middle_idx > strlen (name->full)) - return; - - /* - * Search for the first space (or the terminating \0) - */ - while (isspace ((unsigned char)name->full [middle_idx]) && - name->full [middle_idx] != '\0') - middle_idx ++; - - if (name->full [middle_idx] == '\0') - return; - - /* - * Skip past the nickname, if it's there. - */ - if (name->full [middle_idx] == '\"') { - if (idxs->nick_idx == -1) - return; - - middle_idx = idxs->nick_idx + strlen (name->nick) + 1; - - while (isspace ((unsigned char)name->full [middle_idx]) && - name->full [middle_idx] != '\0') - middle_idx ++; - - if (name->full [middle_idx] == '\0') - return; - } - - /* - * Make sure this isn't the beginning of a complex last name. - */ - word = e_name_western_get_words_at_idx (name->full, middle_idx, 1); - if (e_name_western_is_complex_last_beginning (word)) { - g_free (word); - return; - } - - /* - * Make sure this isn't a suffix. - */ - e_name_western_cleanup_string (& word); - if (e_name_western_word_is_suffix (word)) { - g_free (word); - return; - } - - /* - * Make sure we didn't just grab a cute nickname. - */ - if (word [0] == '\"') { - g_free (word); - return; - } - - idxs->middle_idx = middle_idx; - name->middle = word; -} - -static void -e_name_western_extract_nickname (ENameWestern *name, ENameWesternIdxs *idxs) -{ - int idx; - int start_idx; - char *str; - - if (idxs->first_idx == -1) - return; - - if (idxs->middle_idx > idxs->first_idx) - idx = idxs->middle_idx + strlen (name->middle); - else - idx = idxs->first_idx + strlen (name->first); - - while (name->full [idx] != '\"' && name->full [idx] != '\0') - idx ++; - - if (name->full [idx] != '\"') - return; - - start_idx = idx; - - /* - * Advance to the next double quote. - */ - idx ++; - - while (name->full [idx] != '\"' && name->full [idx] != '\0') - idx ++; - - if (name->full [idx] == '\0') - return; - - str = g_malloc0 (idx - start_idx + 2); - strncpy (str, name->full + start_idx, idx - start_idx + 1); - - name->nick = str; - idxs->nick_idx = start_idx; -} - -static int -e_name_western_last_get_max_idx (ENameWestern *name, ENameWesternIdxs *idxs) -{ - int max_idx = -1; - - if (name->prefix != NULL) - max_idx = e_name_western_max ( - max_idx, idxs->prefix_idx + strlen (name->prefix)); - - if (name->first != NULL) - max_idx = e_name_western_max ( - max_idx, idxs->first_idx + strlen (name->first)); - - if (name->middle != NULL) - max_idx = e_name_western_max ( - max_idx, idxs->middle_idx + strlen (name->middle)); - - if (name->nick != NULL) - max_idx = e_name_western_max ( - max_idx, idxs->nick_idx + strlen (name->nick)); - - return max_idx; -} - -static void -e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs) -{ - char *word; - int idx = -1; - - idx = e_name_western_last_get_max_idx (name, idxs); - - /* - * In the case where there is no preceding name element, the - * name is either just a first name ("Nat", "John"), is a - * single-element name ("Cher", which we treat as a first - * name), or is just a last name. The only time we can - * differentiate a last name alone from a single-element name - * or a first name alone is if it's a complex last name ("de - * Icaza", "van Josephsen"). So if there is no preceding name - * element, we check to see whether or not the first part of - * the name is the beginning of a complex name. If it is, - * we subsume the entire string. If we accidentally subsume - * the suffix, this will get fixed in the fixup routine. - */ - if (idx == -1) { - word = e_name_western_get_words_at_idx (name->full, 0, 1); - if (! e_name_western_is_complex_last_beginning (word)) { - g_free (word); - return; - } - - name->last = g_strdup (name->full); - idxs->last_idx = 0; - return; - } - - /* Skip past the white space. */ - while (isspace ((unsigned char)name->full [idx]) && name->full [idx] != '\0') - idx ++; - - if (name->full [idx] == '\0') - return; - - word = e_name_western_get_words_at_idx (name->full, idx, 1); - e_name_western_cleanup_string (& word); - if (e_name_western_word_is_suffix (word)) { - g_free (word); - return; - } - g_free (word); - - /* - * Subsume the rest of the string into the last name. If we - * accidentally include the prefix, it will get fixed later. - * This is the only way to handle things like "Miguel de Icaza - * Amozorrutia" without dropping data and forcing the user - * to retype it. - */ - name->last = g_strdup (name->full + idx); - idxs->last_idx = idx; -} - -static char * -e_name_western_get_preceding_word (char *str, int idx) -{ - int word_len; - char *word; - char *p; - - p = str + idx; - - while (isspace ((unsigned char)*p) && p > str) - p --; - - while (! isspace ((unsigned char)*p) && p > str) - p --; - - if (isspace ((unsigned char)*p)) - p ++; - - word_len = (str + idx) - p; - word = g_malloc0 (word_len + 1); - if (word_len > 0) - strncpy (word, p, word_len); - - return word; -} - -static char * -e_name_western_get_suffix_at_str_end (char *str) -{ - char *suffix; - char *p; - - /* - * Walk backwards till we reach the beginning of the - * (potentially-comma-separated) list of suffixes. - */ - p = str + strlen (str); - while (1) { - char *nextp; - char *word; - - word = e_name_western_get_preceding_word (str, p - str); - nextp = p - strlen (word) - 1; - - e_name_western_cleanup_string (& word); - - if (e_name_western_word_is_suffix (word)) { - p = nextp; - g_free (word); - } else { - g_free (word); - break; - } - } - - if (p == (str + strlen (str))) - return NULL; - - suffix = g_strdup (p); - e_name_western_cleanup_string (& suffix); - - if (strlen (suffix) == 0) { - g_free (suffix); - return NULL; - } - - return suffix; -} - -static void -e_name_western_extract_suffix (ENameWestern *name, ENameWesternIdxs *idxs) -{ - - name->suffix = e_name_western_get_suffix_at_str_end (name->full); - - if (name->suffix == NULL) - return; - - idxs->suffix_idx = strlen (name->full) - strlen (name->suffix); -} - -static gboolean -e_name_western_detect_backwards (ENameWestern *name, ENameWesternIdxs *idxs) -{ - char *comma; - char *word; - - comma = strchr (name->full, ','); - - if (comma == NULL) - return FALSE; - - /* - * If there's a comma, we need to detect whether it's - * separating the last name from the first or just separating - * suffixes. So we grab the word which comes before the - * comma and check if it's a suffix. - */ - word = e_name_western_get_preceding_word (name->full, comma - name->full); - - if (e_name_western_word_is_suffix (word)) { - g_free (word); - return FALSE; - } - - g_free (word); - return TRUE; -} - -static void -e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs) -{ - char *prefix; - char *last; - char *suffix; - char *firstmidnick; - char *newfull; - - char *comma; - char *p; - - if (! e_name_western_detect_backwards (name, idxs)) - return; - - /* - * Convert - * <Prefix> <Last name>, <First name> <Middle[+nick] name> <Suffix> - * to - * <Prefix> <First name> <Middle[+nick] name> <Last name> <Suffix> - */ - - /* - * Grab the prefix from the beginning. - */ - prefix = e_name_western_get_prefix_at_str (name->full); - - /* - * Everything from the end of the prefix to the comma is the - * last name. - */ - comma = strchr (name->full, ','); - if (comma == NULL) - return; - - p = name->full + (prefix == NULL ? 0 : strlen (prefix)); - - while (isspace ((unsigned char)*p) && *p != '\0') - p ++; - - last = g_malloc0 (comma - p + 1); - strncpy (last, p, comma - p); - - /* - * Get the suffix off the end. - */ - suffix = e_name_western_get_suffix_at_str_end (name->full); - - /* - * Firstmidnick is everything from the comma to the beginning - * of the suffix. - */ - p = comma + 1; - - while (isspace ((unsigned char)*p) && *p != '\0') - p ++; - - if (suffix != NULL) { - char *q; - - /* - * Point q at the beginning of the suffix. - */ - q = name->full + strlen (name->full) - strlen (suffix) - 1; - - /* - * Walk backwards until we hit the space which - * separates the suffix from firstmidnick. - */ - while (! isspace ((unsigned char)*q) && q > comma) - q --; - - if ((q - p + 1) > 0) { - firstmidnick = g_malloc0 (q - p + 1); - strncpy (firstmidnick, p, q - p); - } else - firstmidnick = NULL; - } else { - firstmidnick = g_strdup (p); - } - - /* - * Create our new reordered version of the name. - */ -#define NULLSTR(a) ((a) == NULL ? "" : (a)) - newfull = g_strdup_printf ("%s %s %s %s", NULLSTR (prefix), NULLSTR (firstmidnick), - NULLSTR (last), NULLSTR (suffix)); - g_strstrip (newfull); - g_free (name->full); - name->full = newfull; - - - g_free (prefix); - g_free (firstmidnick); - g_free (last); - g_free (suffix); -} - -static void -e_name_western_zap_nil (char **str, int *idx) -{ - if (*str == NULL) - return; - - if (strlen (*str) != 0) - return; - - *idx = -1; - g_free (*str); - *str = NULL; -} - -#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \ - char *last_start = NULL; \ - if (name->last) \ - last_start = strchr (name->last, ' '); \ - if (last_start) { \ - char *new_last, *new_first; \ - \ - new_last = g_strdup (last_start + 1); \ - *last_start = '\0'; \ - \ - idxs->last_idx += (last_start - name->last) + 1; \ - \ - new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last); \ - \ - g_free (name->first); \ - g_free (name->middle); \ - g_free (name->last); \ - \ - name->first = new_first; \ - name->middle = NULL; \ - name->last = new_last; \ - \ - idxs->middle_idx = -1; \ - } else { \ - char *new_first; \ - \ - new_first = g_strdup_printf ("%s %s %s", name->first, name->middle, name->last); \ - \ - g_free (name->first); \ - g_free (name->middle); \ - g_free (name->last); \ - \ - name->first = new_first; \ - name->middle = NULL; \ - name->last = NULL; \ - idxs->middle_idx = -1; \ - idxs->last_idx = -1; \ - } - -#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION(conj) \ - if (idxs->middle_idx != -1 && !strcmp (name->middle, conj)) { \ - FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \ - } - -#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE(conj) \ - if (idxs->middle_idx != -1 && !strcasecmp (name->middle, conj)) { \ - FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \ - } - -static void -e_name_western_fixup (ENameWestern *name, ENameWesternIdxs *idxs) -{ - /* - * The middle and last names cannot be the same. - */ - if (idxs->middle_idx != -1 && idxs->middle_idx == idxs->last_idx) { - idxs->middle_idx = -1; - g_free (name->middle); - name->middle = NULL; - } - - /* - * If we have a middle name and no last name, then we mistook - * the last name for the middle name. - */ - if (idxs->last_idx == -1 && idxs->middle_idx != -1) { - idxs->last_idx = idxs->middle_idx; - name->last = name->middle; - name->middle = NULL; - idxs->middle_idx = -1; - } - - /* - * Check to see if we accidentally included the suffix in the - * last name. - */ - if (idxs->suffix_idx != -1 && idxs->last_idx != -1 && - idxs->suffix_idx < (idxs->last_idx + strlen (name->last))) { - char *sfx; - - sfx = name->last + (idxs->suffix_idx - idxs->last_idx); - if (sfx != NULL) { - char *newlast; - char *p; - - p = sfx - 1; - while (isspace ((unsigned char)*p) && p > name->last) - p --; - p ++; - - newlast = g_malloc0 (p - name->last + 1); - strncpy (newlast, name->last, p - name->last); - g_free (name->last); - name->last = newlast; - } - } - - /* - * If we have a prefix and a first name, but no last name, - * then we need to assign the first name to the last name. - * This way we get things like "Mr Friedman" correctly. - */ - if (idxs->first_idx != -1 && idxs->prefix_idx != -1 && - idxs->last_idx == -1) { - name->last = name->first; - idxs->last_idx = idxs->first_idx; - idxs->first_idx = -1; - name->first = NULL; - } - - if (idxs->middle_idx != -1) { - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("*"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("|"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("^"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&&"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("||"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("+"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("-"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("and"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("or"); - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("plus"); - - /* Spanish */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("y"); - - /* German */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("und"); - - /* Italian */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("e"); - - /* Czech */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("a"); - - /* Finnish */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("ja"); - - /* French */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("et"); - - /* Russian */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\x98"); /* u+0418 */ - CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\xb8"); /* u+0438 */ - } - - /* - * Remove stray spaces and commas (although there don't seem - * to be any in the test cases, they might show up later). - */ - e_name_western_cleanup_string (& name->prefix); - e_name_western_cleanup_string (& name->first); - e_name_western_cleanup_string (& name->middle); - e_name_western_cleanup_string (& name->nick); - e_name_western_cleanup_string (& name->last); - e_name_western_cleanup_string (& name->suffix); - - /* - * Make zero-length strings just NULL. - */ - e_name_western_zap_nil (& name->prefix, & idxs->prefix_idx); - e_name_western_zap_nil (& name->first, & idxs->first_idx); - e_name_western_zap_nil (& name->middle, & idxs->middle_idx); - e_name_western_zap_nil (& name->nick, & idxs->nick_idx); - e_name_western_zap_nil (& name->last, & idxs->last_idx); - e_name_western_zap_nil (& name->suffix, & idxs->suffix_idx); -} - -/** - * e_name_western_western_parse_fullname: - * @full_name: A string containing a Western name. - * - * Parses @full_name and returns an #ENameWestern object filled with - * the component parts of the name. - */ -ENameWestern * -e_name_western_parse (const char *full_name) -{ - ENameWesternIdxs *idxs; - ENameWestern *wname; - - wname = g_new0 (ENameWestern, 1); - - wname->full = g_strdup (full_name); - - idxs = g_new0 (ENameWesternIdxs, 1); - - idxs->prefix_idx = -1; - idxs->first_idx = -1; - idxs->middle_idx = -1; - idxs->nick_idx = -1; - idxs->last_idx = -1; - idxs->suffix_idx = -1; - - /* - * An extremely simple algorithm. - * - * The goal here is to get it right 95% of the time for - * Western names. - * - * First we check to see if this is an ass-backwards name - * ("Prefix Last, First Middle Suffix"). These names really - * suck (imagine "Dr von Johnson, Albert Roderick Jr"), so - * we reorder them first and then parse them. - * - * Next, we grab the most obvious assignments for the various - * parts of the name. Once this is done, we check for stupid - * errors and fix them up. - */ - e_name_western_reorder_asshole (wname, idxs); - - e_name_western_extract_prefix (wname, idxs); - e_name_western_extract_first (wname, idxs); - e_name_western_extract_nickname (wname, idxs); - e_name_western_extract_middle (wname, idxs); - e_name_western_extract_last (wname, idxs); - e_name_western_extract_suffix (wname, idxs); - - e_name_western_fixup (wname, idxs); - - g_free (idxs); - - return wname; -} - -/** - * e_name_western_free: - * @name: An ENameWestern object which needs to be freed. - * - * Deep-frees @name - */ -void -e_name_western_free (ENameWestern *w) -{ - - g_free (w->prefix); - g_free (w->first); - g_free (w->middle); - g_free (w->nick); - g_free (w->last); - g_free (w->suffix); - - g_free (w->full); - - g_free (w); -} diff --git a/e-util/ename/e-name-western.h b/e-util/ename/e-name-western.h deleted file mode 100644 index fa5bac494c..0000000000 --- a/e-util/ename/e-name-western.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __E_NAME_WESTERN_H__ -#define __E_NAME_WESTERN_H__ - -typedef struct { - - /* Public */ - char *prefix; - char *first; - char *middle; - char *nick; - char *last; - char *suffix; - - /* Private */ - char *full; -} ENameWestern; - -ENameWestern *e_name_western_parse (const char *full_name); -void e_name_western_free (ENameWestern *w); - -#endif /* ! __E_NAME_WESTERN_H__ */ diff --git a/e-util/ename/test-ename-western-gtk.c b/e-util/ename/test-ename-western-gtk.c deleted file mode 100644 index 8ae0ef8770..0000000000 --- a/e-util/ename/test-ename-western-gtk.c +++ /dev/null @@ -1,157 +0,0 @@ -#include <gtk/gtkmain.h> -#include <gtk/gtktable.h> -#include <libgnomeui/gnome-app.h> -#include <libgnomeui/gnome-init.h> -#include <gal/widgets/e-unicode.h> -#include "e-name-western.h" - -ENameWestern *name; -GtkWidget *full; -GtkWidget *prefix; -GtkWidget *first; -GtkWidget *middle; -GtkWidget *nick; -GtkWidget *last; -GtkWidget *suffix; - -static void -fill_entries (void) -{ - -#define SET(a,b) (e_utf8_gtk_entry_set_text (GTK_ENTRY (a), (b) == NULL ? "" : (b))) - SET(prefix, name->prefix); - SET(first, name->first); - SET(middle, name->middle); - SET(nick, name->nick); - SET(last, name->last); - SET(suffix, name->suffix); -} - -static void -full_changed_cb (GtkEntry *fulle) -{ - gchar *str; - - e_name_western_free (name); - str = e_utf8_gtk_entry_get_text (fulle); - name = e_name_western_parse (str); - fill_entries (); - - g_free (str); -} - -static void -create_window (void) -{ - GtkWidget *app; - GtkTable *table; - - GtkWidget *prefix_label; - GtkWidget *first_label; - GtkWidget *middle_label; - GtkWidget *nick_label; - GtkWidget *last_label; - GtkWidget *suffix_label; - - app = gnome_app_new ("test", "Evolution Western Name Parser"); - - table = GTK_TABLE (gtk_table_new (3, 6, FALSE)); - - full = gtk_entry_new (); - prefix = gtk_entry_new (); - first = gtk_entry_new (); - middle = gtk_entry_new (); - nick = gtk_entry_new (); - last = gtk_entry_new (); - suffix = gtk_entry_new (); - - gtk_widget_set_usize (prefix, 100, 0); - gtk_widget_set_usize (first, 100, 0); - gtk_widget_set_usize (middle, 100, 0); - gtk_widget_set_usize (nick, 100, 0); - gtk_widget_set_usize (last, 100, 0); - gtk_widget_set_usize (suffix, 100, 0); - - gtk_table_attach (table, full, 0, 6, 0, 1, - GTK_EXPAND | GTK_FILL, 0, - 0, 0); - - gtk_table_attach (table, prefix, 0, 1, 1, 2, - GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, first, 1, 2, 1, 2, - GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, middle, 2, 3, 1, 2, - GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, nick, 3, 4, 1, 2, - GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, last, 4, 5, 1, 2, - GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, suffix, 5, 6, 1, 2, - GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0, - 0, 0); - - prefix_label = gtk_label_new ("Prefix"); - first_label = gtk_label_new ("First"); - middle_label = gtk_label_new ("Middle"); - nick_label = gtk_label_new ("Nick"); - last_label = gtk_label_new ("Last"); - suffix_label = gtk_label_new ("Suffix"); - - gtk_table_attach (table, prefix_label, 0, 1, 2, 3, - GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, first_label, 1, 2, 2, 3, - GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, middle_label, 2, 3, 2, 3, - GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, nick_label, 3, 4, 2, 3, - GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, last_label, 4, 5, 2, 3, - GTK_SHRINK, 0, - 0, 0); - - gtk_table_attach (table, suffix_label, 5, 6, 2, 3, - GTK_SHRINK, 0, - 0, 0); - - gnome_app_set_contents (GNOME_APP (app), GTK_WIDGET (table)); - - gtk_widget_show_all (app); - - gtk_entry_set_text (GTK_ENTRY (full), - "The Honorable Doctor van Jacobsen, Albert Roderick \"The Clenched Fist\" Jr, MD, PhD, Esquire"); - - name = e_name_western_parse ("The Honorable Doctor van Jacobsen, Albert Roderick \"The Clenched Fist\" Jr, MD, PhD, Esquire"); - fill_entries (); - - gtk_signal_connect (GTK_OBJECT (full), "changed", full_changed_cb, NULL); -} - -int -main (int argc, char **argv) -{ - gnome_init ("Test EName", "Test EName", argc, argv); - - create_window (); - - gtk_main (); - - return 0; -} diff --git a/e-util/ename/test-ename-western.c b/e-util/ename/test-ename-western.c deleted file mode 100644 index 09847b5b4b..0000000000 --- a/e-util/ename/test-ename-western.c +++ /dev/null @@ -1,71 +0,0 @@ -#include <ctype.h> -#include <stdio.h> -#include <glib.h> -#include <gtk/gtkmain.h> -#include <ename/e-name-western.h> - -static void -do_name (char *n) -{ - ENameWestern *wname; - - wname = e_name_western_parse (n); - - printf ("Full Name: [%s]\n", n); - - printf ("Prefix: [%s]\n", wname->prefix); - printf ("First: [%s]\n", wname->first); - printf ("Middle: [%s]\n", wname->middle); - printf ("Nick: [%s]\n", wname->nick); - printf ("Last: [%s]\n", wname->last); - printf ("Suffix: [%s]\n", wname->suffix); - - printf ("\n"); - - e_name_western_free (wname); -} - -int -main (int argc, char **argv) -{ - if (argc == 2) { - while (! feof (stdin)) { - char s[256]; - - if (fgets (s, sizeof (s), stdin) == NULL) - return 0; - - g_strstrip (s); - - do_name (s); - } - - return 0; - } - - do_name ("Nat"); - do_name ("Karl Anders Carlsson"); - do_name ("Miguel de Icaza Amozorrutia"); - do_name ("The Honorable Doctor de Icaza, Miguel \"Sparky\" Junior, PhD, MD"); - do_name ("Nat Friedman MD, Phd"); - do_name ("Nat Friedman PhD"); - do_name ("Friedman, Nat"); - do_name ("Miguel de Icaza Esquire"); - do_name ("Dr Miguel \"Sparky\" de Icaza"); - do_name ("Robert H.B. Netzer"); - do_name ("W. Richard Stevens"); - do_name ("Nat Friedman"); - do_name ("N. Friedman"); - do_name ("Miguel de Icaza"); - do_name ("Drew Johnson"); - do_name ("President Bill \"Slick Willy\" Clinton"); - do_name ("The Honorable Mark J. Einstein Jr"); - do_name ("Friedman, Nat"); - do_name ("de Icaza, Miguel"); - do_name ("Mr de Icaza, Miguel"); - do_name ("Smith, John Jr"); - do_name ("Nick Glennie-Smith"); - do_name ("Dr von Johnson, Albert Roderick Jr"); - - return 0; -} |