aboutsummaryrefslogblamecommitdiffstats
path: root/e-util/ename/e-name-western.c
blob: b6802c4333b169bdb6e60a3db1ce954c4347cb24 (plain) (tree)
1
2
3
4
5
6
7
8
9
                                                                           





                                                               

                                  
  
                                      

























                                          
                                                                  
                              
                                         













                                          
                                                                   
                 

                                                                   
 
                                                 

                              

                                                                                   



                                                                           

                                                                              

                                                                             

                          







                                                                   
                       

                         



                                        
                                  


                                                      



                                                                                     
 

                                                                             



                              
                                            



















                                                                 





                                                                                    
































                                                                                          


                                                                            























                                                          

                                                                     





























































                                                                          

                                                                             


































                                                                             
                     






                                                                
                                                                     

                            
 
                                           
                            




                                                             


                                                              
                
                            




                                                 
                              


                                         

                                                                           
                


                                                                      
 
                                    





                                                                     
                                                                                    





















                                                              
                                               





                                                                            
                   
                        
                     




                                               
                                                                             
            
                                                                           
 

                                               
 
                          

                       
                                      



                                            






                                                                            
 

                                          
                       

                                          
 
                                                
 































                                                                            
                   



























                                                                          

                                
                                        

                                                                           
 
                          

                       
                                                                                  













                                                                      

                                           










                                                      

                                                                  
 

                                                                    
 

                                                    
























                                                                        
                                          

                                      
                              
                 
                                                 




























                                                                          













                                                                            
                                                    



















































                                                                                  
                                                    




                                                                

                                                                     












                                                                     
                                     
 

                                                                     






                                                          

                                                                       




                                                              

                                                                              








































                                                                                           









                                                                         
                                                                         































                                                                         











                                                                                 



































                                                                           




                                                                                         




















                                                                      















                                                                
                            










                                                               


                                                              




                                                                            

































                                                                    





                                                                                 




























































                                                                     

                         


                   
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
 * A simple Western name parser.
 *
 * <Nat> Jamie, do you know anything about name parsing?
 * <jwz> Are you going down that rat hole?  Bring a flashlight.
 *
 * Authors:
 *   Nat Friedman <nat@ximian.com>
 *
 * Copyright 1999 - 2001, Ximian, Inc.
 */

#include <ctype.h>
#include <string.h>
#include <glib.h>
 
#include <ename/e-name-western.h>
#include <ename/e-name-western-tables.h>

typedef struct {
    int prefix_idx;
    int first_idx;
    int middle_idx;
    int nick_idx;
    int last_idx;
    int suffix_idx;
} ENameWesternIdxs;

static int
e_name_western_str_count_words (char *str)
{
    int word_count;
    char *p;

    word_count = 0;

    for (p = str; p != NULL; p = g_utf8_strchr (p, -1, ' ')) {
        word_count ++;
        p = g_utf8_next_char (p);
    }

    return word_count;
}

static void
e_name_western_cleanup_string (char **str)
{
    char *newstr;
    char *p;

    if (*str == NULL)
        return;

    /* skip any spaces and commas at the start of the string */
    p = *str;
    while (g_unichar_isspace (g_utf8_get_char(p)) || *p == ',')
        p = g_utf8_next_char (p);

    /* make the copy we're going to return */
    newstr = g_strdup (p);

    if ( strlen(newstr) > 0) {
        /* now search from the back, skipping over any spaces and commas */
        p = newstr + strlen (newstr);
        p = g_utf8_prev_char (p);
        while (g_unichar_isspace (g_utf8_get_char(p)) || *p == ',')
            p = g_utf8_prev_char (p);
        /* advance p to after the character that caused us to exit the
           previous loop, and end the string. */
        if ((! g_unichar_isspace (g_utf8_get_char (p))) && *p != ',')
            p = g_utf8_next_char (p);
        *p = '\0';
    }

    g_free (*str);
    *str = newstr;
}

static char *
e_name_western_get_words_at_idx (char *str, int idx, int num_words)
{
    GString *words;
    char *p;
    int   word_count;

    /*
     * Walk to the end of the words.
     */
    words = g_string_new ("");
    word_count = 0;
    p = str + idx;
    while (word_count < num_words && *p != '\0') {
        while (! g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0') {
            words = g_string_append_unichar (words, g_utf8_get_char (p));
            p = g_utf8_next_char (p);
        }

        while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
            p = g_utf8_next_char (p);

        word_count ++;
    }

    return g_string_free (words, FALSE);
}

/*
 * What the fuck is wrong with glib's MAX macro.
 */ 
static int
e_name_western_max (const int a, const int b)
{
    if (a > b)
        return a;

    return b;
}

static gboolean
e_name_western_word_is_suffix (char *word)
{
    int i;

    for (i = 0; e_name_western_sfx_table [i] != NULL; i ++) {
        int length = strlen (e_name_western_sfx_table [i]);
        if (!g_strcasecmp (word, e_name_western_sfx_table [i]) || 
            ( !g_strncasecmp (word, e_name_western_sfx_table [i], length) &&
              strlen(word) == length + 1 &&
              word[length] == '.' ))
            return TRUE;
    }

    return FALSE;
}

static char *
e_name_western_get_one_prefix_at_str (char *str)
{
    char *word;
    int   i;

    /*
     * Check for prefixes from our table.
     */
    for (i = 0; e_name_western_pfx_table [i] != NULL; i ++) {
        int pfx_words;
        char *words;

        pfx_words = e_name_western_str_count_words (e_name_western_pfx_table [i]);
        words = e_name_western_get_words_at_idx (str, 0, pfx_words);

        if (! g_strcasecmp (words, e_name_western_pfx_table [i]))
            return words;

        g_free (words);
    }

    /*
     * Check for prefixes we don't know about.  These are always a
     * sequence of more than one letters followed by a period.
     */
    word = e_name_western_get_words_at_idx (str, 0, 1);

    if (g_utf8_strlen (word, -1) > 2 && 
        g_unichar_isalpha (g_utf8_get_char (word)) &&
        g_unichar_isalpha (g_utf8_get_char (g_utf8_next_char (word))) &&
        word [strlen (word) - 1] == '.')
        return word;

    g_free (word);

    return NULL;
}

static char *
e_name_western_get_prefix_at_str (char *str)
{
    char *pfx;
    char *pfx1;
    char *pfx2;
    char *p;

    /* Get the first prefix. */
    pfx1 = e_name_western_get_one_prefix_at_str (str);

    if (pfx1 == NULL)
        return NULL;

    /* Check for a second prefix. */
    p = str + strlen (pfx1);
    while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
        p = g_utf8_next_char (p);

    pfx2 = e_name_western_get_one_prefix_at_str (p);

    if (pfx2 != NULL) {
        int pfx_len;

        pfx_len = (p + strlen (pfx2)) - str;
        pfx = g_malloc0 (pfx_len + 1);
        strncpy (pfx, str, pfx_len);
    } else {
        pfx = g_strdup (pfx1);
    }

    g_free (pfx1);
    g_free (pfx2);

    return pfx;
}

static void
e_name_western_extract_prefix (ENameWestern *name, ENameWesternIdxs *idxs)
{
    char *pfx;

    pfx = e_name_western_get_prefix_at_str (name->full);

    if (pfx == NULL)
        return;

    idxs->prefix_idx = 0;
    name->prefix     = pfx;
}

static gboolean
e_name_western_is_complex_last_beginning (char *word)
{
    int i;

    for (i = 0; e_name_western_complex_last_table [i] != NULL; i ++) {

        if (! g_strcasecmp (
            word, e_name_western_complex_last_table [i]))
            return TRUE;
    }

    return FALSE;
}

static void
e_name_western_extract_first (ENameWestern *name, ENameWesternIdxs *idxs)
{
    /*
     * If there's a prefix, then the first name is right after it.
     */
    if (idxs->prefix_idx != -1) {
        int   first_idx;
        char *p;

        first_idx = idxs->prefix_idx + strlen (name->prefix);

        /* Skip past white space. */
        p = name->full + first_idx;
        while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
            p = g_utf8_next_char (p);

        if (*p == '\0')
            return;

        idxs->first_idx = p - name->full;
        name->first = e_name_western_get_words_at_idx (
            name->full, idxs->first_idx, 1);

    } else {

        /*
         * Otherwise, the first name is probably the first string.
         */
        idxs->first_idx = 0;
        name->first = e_name_western_get_words_at_idx (
            name->full, idxs->first_idx, 1);
    }

    /*
     * Check that we didn't just assign the beginning of a
     * compound last name to the first name.
     */
    if (name->first != NULL) {
        if (e_name_western_is_complex_last_beginning (name->first)) {
            g_free (name->first);
            name->first = NULL;
            idxs->first_idx = -1;
        }
    }
}

static void
e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs)
{
    char *word;
    char *middle;

    /*
     * Middle names can only exist if you have a first name.
     */
    if (idxs->first_idx == -1)
        return;

    middle = name->full + idxs->first_idx + strlen (name->first);
    if (*middle == '\0')
        return;

    middle = g_utf8_next_char (middle);
    if (*middle == '\0')
        return;
    
    /*
     * Search for the first space (or the terminating \0)
     */
    while (g_unichar_isspace (g_utf8_get_char (middle)) &&
           *middle != '\0')
        middle = g_utf8_next_char (middle);
        
    if (*middle == '\0')
        return;

    /*
     * Skip past the nickname, if it's there.
     */
    if (*middle == '\"') {
        if (idxs->nick_idx == -1)
            return;

        middle = name->full + idxs->nick_idx + strlen (name->nick);
        middle = g_utf8_next_char (middle);
        
        while (g_unichar_isspace (g_utf8_get_char (middle)) &&
               *middle != '\0')
            middle = g_utf8_next_char (middle);

        if (*middle == '\0')
            return;
    }

    /*
     * Make sure this isn't the beginning of a complex last name.
     */
    word = e_name_western_get_words_at_idx (name->full, middle - name->full, 1);
    if (e_name_western_is_complex_last_beginning (word)) {
        g_free (word);
        return;
    }

    /*
     * Make sure this isn't a suffix.
     */
    e_name_western_cleanup_string (& word);
    if (e_name_western_word_is_suffix (word)) {
        g_free (word);
        return;
    }

    /*
     * Make sure we didn't just grab a cute nickname.
     */
    if (word [0] == '\"') {
        g_free (word);
        return;
    }
    
    idxs->middle_idx = middle - name->full;
    name->middle = word;
}

static void
e_name_western_extract_nickname (ENameWestern *name, ENameWesternIdxs *idxs)
{
    char *nick;
    int   start_idx;
    GString *str;

    if (idxs->first_idx == -1)
        return;

    if (idxs->middle_idx > idxs->first_idx)
        nick = name->full + idxs->middle_idx + strlen (name->middle);
    else
        nick = name->full + idxs->first_idx + strlen (name->first);

    while (*nick != '\"' && *nick != '\0')
        nick = g_utf8_next_char (nick);

    if (*nick != '\"')
        return;

    start_idx = nick - name->full;

    /*
     * Advance to the next double quote.
     */
    str = g_string_new ("\"");
    nick = g_utf8_next_char (nick);

    while (*nick != '\"' && *nick != '\0') {
        str = g_string_append_unichar (str, g_utf8_get_char (nick));
        nick = g_utf8_next_char (nick);
    }

    if (*nick == '\0') {
        g_string_free (str, TRUE);
        return;
    }
    str = g_string_append (str, "\"");

    name->nick = g_string_free (str, FALSE);

    idxs->nick_idx = start_idx;
}

static int
e_name_western_last_get_max_idx (ENameWestern *name, ENameWesternIdxs *idxs)
{
    int max_idx = -1;

    if (name->prefix != NULL)
        max_idx = e_name_western_max (
            max_idx, idxs->prefix_idx + strlen (name->prefix));

    if (name->first != NULL)
        max_idx = e_name_western_max (
            max_idx, idxs->first_idx + strlen (name->first));

    if (name->middle != NULL)
        max_idx = e_name_western_max (
            max_idx, idxs->middle_idx + strlen (name->middle));

    if (name->nick != NULL)
        max_idx = e_name_western_max (
            max_idx, idxs->nick_idx + strlen (name->nick));

    return max_idx;
}

static void
e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs)
{
    char *word;
    int   idx = -1;
    char *last;

    idx = e_name_western_last_get_max_idx (name, idxs);

    /*
     * In the case where there is no preceding name element, the
     * name is either just a first name ("Nat", "John"), is a
     * single-element name ("Cher", which we treat as a first
     * name), or is just a last name.  The only time we can
     * differentiate a last name alone from a single-element name
     * or a first name alone is if it's a complex last name ("de
     * Icaza", "van Josephsen").  So if there is no preceding name
     * element, we check to see whether or not the first part of
     * the name is the beginning of a complex name.  If it is,
     * we subsume the entire string.  If we accidentally subsume
     * the suffix, this will get fixed in the fixup routine.
     */
    if (idx == -1) {
        word = e_name_western_get_words_at_idx (name->full, 0, 1);
        if (! e_name_western_is_complex_last_beginning (word)) {
            g_free (word);
            return;
        }

        name->last     = g_strdup (name->full);
        idxs->last_idx = 0;
        return;
    }

    last = name->full + idx;

    /* Skip past the white space. */
    while (g_unichar_isspace (g_utf8_get_char (last)) && *last != '\0')
        last = g_utf8_next_char (last);

    if (*last == '\0')
        return;

    word = e_name_western_get_words_at_idx (name->full, last - name->full, 1);
    e_name_western_cleanup_string (& word);
    if (e_name_western_word_is_suffix (word)) {
        g_free (word);
        return;
    }
    g_free (word);

    /*
     * Subsume the rest of the string into the last name.  If we
     * accidentally include the prefix, it will get fixed later.
     * This is the only way to handle things like "Miguel de Icaza
     * Amozorrutia" without dropping data and forcing the user
     * to retype it.
     */
    name->last = g_strdup (last);
    idxs->last_idx = last - name->full;
}

static char *
e_name_western_get_preceding_word (char *str, int idx)
{
    int   word_len;
    char *word;
    char *p;

    p = str + idx;

    while (g_unichar_isspace (g_utf8_get_char (p)) && p > str)
        p = g_utf8_prev_char (p);

    while (! g_unichar_isspace (g_utf8_get_char (p)) && p > str)
        p = g_utf8_prev_char (p);

    if (g_unichar_isspace (g_utf8_get_char (p)))
        p = g_utf8_next_char (p);

    word_len = (str + idx) - p;
    word = g_malloc0 (word_len + 1);
    if (word_len > 0)
        strncpy (word, p, word_len);

    return word;
}

static char *
e_name_western_get_suffix_at_str_end (char *str)
{
    char *suffix;
    char *p;

    /*
     * Walk backwards till we reach the beginning of the
     * (potentially-comma-separated) list of suffixes.
     */
    p = str + strlen (str);
    while (1) {
        char *nextp;
        char *word;

        word = e_name_western_get_preceding_word (str, p - str);
        nextp = p - strlen (word);
        if (nextp == str) {
            g_free (word);
            break;
        }
        nextp = g_utf8_prev_char (nextp);
        
        e_name_western_cleanup_string (& word);

        if (e_name_western_word_is_suffix (word)) {
            p = nextp;
            g_free (word);
        } else {
            g_free (word);
            break;
        }
    }

    if (p == (str + strlen (str)))
        return NULL;

    suffix = g_strdup (p);
    e_name_western_cleanup_string (& suffix);

    if (strlen (suffix) == 0) {
        g_free (suffix);
        return NULL;
    }

    return suffix;
}

static void
e_name_western_extract_suffix (ENameWestern *name, ENameWesternIdxs *idxs)
{
    name->suffix = e_name_western_get_suffix_at_str_end (name->full);

    if (name->suffix == NULL)
        return;

    idxs->suffix_idx = strlen (name->full) - strlen (name->suffix);
}

static gboolean
e_name_western_detect_backwards (ENameWestern *name, ENameWesternIdxs *idxs)
{
    char *comma;
    char *word;

    comma = g_utf8_strchr (name->full, -1, ',');

    if (comma == NULL)
        return FALSE;

    /*
     * If there's a comma, we need to detect whether it's
     * separating the last name from the first or just separating
     * suffixes.  So we grab the word which comes before the
     * comma and check if it's a suffix.
     */
    word = e_name_western_get_preceding_word (name->full, comma - name->full);

    if (e_name_western_word_is_suffix (word)) {
        g_free (word);
        return FALSE;
    }

    g_free (word);
    return TRUE;
}

static void
e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs)
{
    char *prefix;
    char *last;
    char *suffix;
    char *firstmidnick;
    char *newfull;

    char *comma;
    char *p;

    if (! e_name_western_detect_backwards (name, idxs))
        return;

    /*
     * Convert
     *    <Prefix> <Last name>, <First name> <Middle[+nick] name> <Suffix>
     * to
     *    <Prefix> <First name> <Middle[+nick] name> <Last name> <Suffix>
     */
    
    /*
     * Grab the prefix from the beginning.
     */
    prefix = e_name_western_get_prefix_at_str (name->full);

    /*
     * Everything from the end of the prefix to the comma is the
     * last name.
     */
    comma = g_utf8_strchr (name->full, -1, ',');
    if (comma == NULL)
        return;

    p = name->full + (prefix == NULL ? 0 : strlen (prefix));

    while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
        p = g_utf8_next_char (p);

    last = g_malloc0 (comma - p + 1);
    strncpy (last, p, comma - p);

    /*
     * Get the suffix off the end.
     */
    suffix = e_name_western_get_suffix_at_str_end (name->full);

    /*
     * Firstmidnick is everything from the comma to the beginning
     * of the suffix.
     */
    p = g_utf8_next_char (comma);

    while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
        p = g_utf8_next_char (p);

    if (suffix != NULL) {
        char *q;

        /*
         * Point q at the beginning of the suffix.
         */
        q = name->full + strlen (name->full) - strlen (suffix);
        q = g_utf8_prev_char (q);

        /*
         * Walk backwards until we hit the space which
         * separates the suffix from firstmidnick.
         */
        while (! g_unichar_isspace (g_utf8_get_char (q)) && q > comma)
            q = g_utf8_prev_char (q);

        if ((q - p + 1) > 0) {
            firstmidnick = g_malloc0 (q - p + 1);
            strncpy (firstmidnick, p, q - p);
        } else
            firstmidnick = NULL;
    } else {
        firstmidnick = g_strdup (p);
    }

    /*
     * Create our new reordered version of the name.
     */
#define NULLSTR(a) ((a) == NULL ? "" : (a))
    newfull = g_strdup_printf ("%s %s %s %s", NULLSTR (prefix), NULLSTR (firstmidnick),
                   NULLSTR (last), NULLSTR (suffix));
    g_strstrip (newfull);
    g_free (name->full);
    name->full = newfull;


    g_free (prefix);
    g_free (firstmidnick);
    g_free (last);
    g_free (suffix);
}

static void
e_name_western_zap_nil (char **str, int *idx)
{
    if (*str == NULL)
        return;

    if (strlen (*str) != 0)
        return;

    *idx = -1;
    g_free (*str);
    *str = NULL;
}

#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION            \
    char *last_start = NULL;                    \
    if (name->last)                         \
        last_start = g_utf8_strchr (name->last, -1, ' ');   \
    if (last_start) {                       \
        char *new_last, *new_first;             \
                                    \
        new_last = g_strdup (g_utf8_next_char (last_start));    \
        *last_start = '\0';                 \
                                    \
        idxs->last_idx += (last_start - name->last) + 1;    \
                                    \
        new_first = g_strdup_printf ("%s %s %s",        \
                         name->first,       \
                         name->middle,      \
                         name->last);       \
                                    \
        g_free (name->first);                   \
        g_free (name->middle);                  \
        g_free (name->last);                    \
                                    \
        name->first = new_first;                \
        name->middle = NULL;                    \
        name->last = new_last;                  \
                                    \
        idxs->middle_idx = -1;                  \
    } else {                            \
        char *new_first;                    \
                                    \
        new_first = g_strdup_printf ("%s %s %s",        \
                         name->first,       \
                         name->middle,      \
                         name->last);       \
                                    \
        g_free (name->first);                   \
        g_free (name->middle);                  \
        g_free (name->last);                    \
                                    \
        name->first = new_first;                \
        name->middle = NULL;                    \
        name->last = NULL;                  \
        idxs->middle_idx = -1;                  \
        idxs->last_idx = -1;                    \
    }

#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION(conj) \
    if (idxs->middle_idx != -1 && !strcmp (name->middle, conj)) {   \
        FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION    \
    }

#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE(conj) \
    if (idxs->middle_idx != -1 && !strcasecmp (name->middle, conj)) {   \
        FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION    \
    }

static void
e_name_western_fixup (ENameWestern *name, ENameWesternIdxs *idxs)
{
    /*
     * The middle and last names cannot be the same.
     */
    if (idxs->middle_idx != -1 && idxs->middle_idx == idxs->last_idx) {
        idxs->middle_idx = -1;
        g_free (name->middle);
        name->middle = NULL;
    }

    /*
     * If we have a middle name and no last name, then we mistook
     * the last name for the middle name.
     */
    if (idxs->last_idx == -1 && idxs->middle_idx != -1) {
        idxs->last_idx   = idxs->middle_idx;
        name->last       = name->middle;
        name->middle     = NULL;
        idxs->middle_idx = -1;
    }

    /*
     * Check to see if we accidentally included the suffix in the
     * last name.
     */
    if (idxs->suffix_idx != -1 && idxs->last_idx != -1 &&
        idxs->suffix_idx < (idxs->last_idx + strlen (name->last))) {
        char *sfx;

        sfx = name->last + (idxs->suffix_idx - idxs->last_idx);
        if (sfx != NULL) {
            char *newlast;
            char *p;

            p = sfx;
            p = g_utf8_prev_char (p);
            while (g_unichar_isspace (g_utf8_get_char (p)) && p > name->last)
                p = g_utf8_prev_char (p);
            p = g_utf8_next_char (p);

            newlast = g_malloc0 (p - name->last + 1);
            strncpy (newlast, name->last, p - name->last);
            g_free (name->last);
            name->last = newlast;
        }
    }

    /*
     * If we have a prefix and a first name, but no last name,
     * then we need to assign the first name to the last name.
     * This way we get things like "Mr Friedman" correctly.
     */
    if (idxs->first_idx != -1 && idxs->prefix_idx != -1 &&
        idxs->last_idx == -1) {
        name->last      = name->first;
        idxs->last_idx  = idxs->first_idx;
        idxs->first_idx = -1;
        name->first     = NULL;
    }

    if (idxs->middle_idx != -1) {
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("*");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("|");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("^");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&&");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("||");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("+");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("-");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("and");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("or");
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("plus");

        /* Spanish */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("y");

        /* German */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("und");

        /* Italian */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("e");

        /* Czech */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("a");

        /* Finnish */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("ja");

        /* French */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("et");

        /* Russian */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\x98"); /* u+0418 */
        CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\xb8"); /* u+0438 */
    }

    /*
     * Remove stray spaces and commas (although there don't seem
     * to be any in the test cases, they might show up later).
     */
    e_name_western_cleanup_string (& name->prefix);
    e_name_western_cleanup_string (& name->first);
    e_name_western_cleanup_string (& name->middle);
    e_name_western_cleanup_string (& name->nick);
    e_name_western_cleanup_string (& name->last);
    e_name_western_cleanup_string (& name->suffix);

    /*
     * Make zero-length strings just NULL.
     */
    e_name_western_zap_nil (& name->prefix, & idxs->prefix_idx);
    e_name_western_zap_nil (& name->first,  & idxs->first_idx);
    e_name_western_zap_nil (& name->middle, & idxs->middle_idx);
    e_name_western_zap_nil (& name->nick,   & idxs->nick_idx);
    e_name_western_zap_nil (& name->last,   & idxs->last_idx);
    e_name_western_zap_nil (& name->suffix, & idxs->suffix_idx);
}

/**
 * e_name_western_western_parse_fullname:
 * @full_name: A string containing a Western name.
 *
 * Parses @full_name and returns an #ENameWestern object filled with
 * the component parts of the name.
 */
ENameWestern *
e_name_western_parse (const char *full_name)
{
    ENameWesternIdxs *idxs;
    ENameWestern *wname;
    char *end;

    if (!g_utf8_validate (full_name, -1, (const char **)&end)) {
        g_warning ("e_name_western_parse passed invalid UTF-8 sequence");
        *end = '\0';
    }

    wname = g_new0 (ENameWestern, 1);

    wname->full = g_strdup (full_name);

    idxs = g_new0 (ENameWesternIdxs, 1);

    idxs->prefix_idx = -1;
    idxs->first_idx  = -1;
    idxs->middle_idx = -1;
    idxs->nick_idx   = -1;
    idxs->last_idx   = -1;
    idxs->suffix_idx = -1;
    
    /*
     * An extremely simple algorithm.
     *
     * The goal here is to get it right 95% of the time for
     * Western names.
     *
     * First we check to see if this is an ass-backwards name
     * ("Prefix Last, First Middle Suffix").  These names really
     * suck (imagine "Dr von Johnson, Albert Roderick Jr"), so
     * we reorder them first and then parse them.
     *
     * Next, we grab the most obvious assignments for the various
     * parts of the name.  Once this is done, we check for stupid
     * errors and fix them up.
     */
    e_name_western_reorder_asshole  (wname, idxs);

    e_name_western_extract_prefix   (wname, idxs);
    e_name_western_extract_first    (wname, idxs);
    e_name_western_extract_nickname (wname, idxs);
    e_name_western_extract_middle   (wname, idxs);
    e_name_western_extract_last     (wname, idxs);
    e_name_western_extract_suffix   (wname, idxs);

    e_name_western_fixup            (wname, idxs);

    g_free (idxs);

    return wname;
}

/**
 * e_name_western_free:
 * @name: An ENameWestern object which needs to be freed.
 *
 * Deep-frees @name
 */
void
e_name_western_free (ENameWestern *w)
{

    g_free (w->prefix);
    g_free (w->first);
    g_free (w->middle);
    g_free (w->nick);
    g_free (w->last);
    g_free (w->suffix);
    
    g_free (w->full);

    g_free (w);
}