/* --------------------------------------------------
An address parser, yielding fields as per RFC 2426.
Author:
Jesse Pavel (jpavel@helixcode.com)
Copyright 2000, Helix Code, Inc.
--------------------------------------------------
*/
#include <ctype.h>
#include <string.h>
#include <glib.h>
#ifdef E_ADDRESS_WESTERN_TEST
#include "e-address-western.h"
#else
#include <ename/e-address-western.h>
#include <e-util/e-util.h>
#endif
/* These are the keywords that will distinguish the start of an extended
address. */
static char *extended_keywords[] = {
"apt", "apartment", "suite", NULL
};
static const gchar *
e_address_western_strstrcase (const gchar *haystack, const gchar *needle)
{
/* find the needle in the haystack neglecting case */
gchar *ptr;
guint len;
g_return_val_if_fail (haystack != NULL, NULL);
g_return_val_if_fail (needle != NULL, NULL);
len = strlen(needle);
if (len > strlen(haystack))
return NULL;
if (len == 0)
return (char *)haystack;
for (ptr = (char *)haystack; *(ptr + len - 1) != '\0'; ptr++)
if (!g_strncasecmp(ptr, needle, len))
return ptr;
return NULL;
}
static gboolean
e_address_western_is_line_blank (gchar *line)
{
gboolean blank = TRUE;
gint cntr;
/* A blank line consists of whitespace only, or a NULL line. */
for (cntr = 0; line[cntr] != '\0'; cntr++ ) {
if (!isspace(line[cntr])) {
blank = FALSE;
break;
}
}
return blank;
}
/* In the array of lines, `lines', we will erase the line at line_num, and
shift the remaining lines, up to line number num_lines, up one position. */
static void
e_address_western_shift_line (gchar *lines[], gint line_num, gint num_lines)
{
gint cntr;
if (line_num >= (num_lines - 1)) {
/* It is the last line, so simply shift in a NULL. */
lines[line_num] = NULL;
}
else {
for (cntr = line_num; cntr < num_lines; cntr++)
lines[cntr] = lines[cntr + 1];
}
}
static void
e_address_western_remove_blank_lines (gchar *lines[], gint *linecntr)
{
gint cntr;
for (cntr = 0; cntr < *linecntr; cntr++) {
if (e_address_western_is_line_blank (lines[cntr])) {
/* Delete the blank line, and shift all subsequent lines up
one spot to fill its old spot. */
e_address_western_shift_line (lines, cntr, *linecntr);
/* Since we must check the newly shifted line, let's
not advance the counter on this next pass. */
cntr--;
/* There is now one less line, total. */
*linecntr -= 1;
}
}
}
static gboolean
e_address_western_is_po_box (gchar *line)
{
gboolean retval = FALSE;
/* In which phase of processing are we? */
enum State { FIRSTCHAR, SECONDCHAR, WHITESPACE } state;
/* If the first two letters of the line are `p' and `o', and these
are in turn followed by whitespace before another letter, then I
will deem the line a representation of a PO Box address. */
gint cntr;
state = FIRSTCHAR;
for (cntr = 0; line[cntr] != '\0'; cntr++) {
if (state == FIRSTCHAR) {
if (isalnum(line[cntr])) {
if (tolower(line[cntr]) == 'p')
state = SECONDCHAR;
else {
retval = FALSE;
break;
}
}
}
else if (state == SECONDCHAR) {
if (isalnum (line[cntr])) {
if (tolower(line[cntr]) == 'o')
state = WHITESPACE;
else {
retval = FALSE;
break;
}
}
}
else if (state == WHITESPACE) {
if (isspace (line[cntr])) {
retval = TRUE;
break;
}
else if (isalnum (line[cntr])) {
retval = FALSE;
break;
}
}
}
return retval;
}
/* A line that contains a comma followed eventually by a number is
deemed to be the line in the form of <town, region postal-code>. */
static gboolean
e_address_western_is_postal (gchar *line)
{
gboolean retval;
int cntr;
if (strchr (line, ',') == NULL)
retval = FALSE; /* No comma. */
else {
int index;
/* Ensure that the first character after the comma is
a letter. */
index = strcspn (line, ",");
index++;
while (isspace(line[index]))
index++;
if (!isalpha(line[index]))
return FALSE; /* FIXME: ugly control flow. */
cntr = strlen(line) - 1;
/* Go to the character immediately following the last
whitespace character. */
while (cntr >= 0 && isspace(line[cntr]))
cntr--;
while (cntr >= 0 && !isspace(line[cntr]))
cntr--;
if (cntr == 0)
retval = FALSE;
else {
if (isdigit (line[cntr+1]))
retval = TRUE;
else
retval = FALSE;
}
}
return retval;
}
static gchar *
e_address_western_extract_po_box (gchar *line)
{
/* Return everything from the beginning of the line to
the end of the first word that contains a number. */
int index;
index = 0;
while (!isdigit(line[index]))
index++;
while (isgraph(line[index]))
index++;
return g_strndup (line, index);
}
static gchar *
e_address_western_extract_locality (gchar *line)
{
gint index;
/* Everything before the comma is the locality. */
index = strcspn(line, ",");
if (index == 0)
return NULL;
else
return g_strndup (line, index);
}
/* Whatever resides between the comma and the start of the
postal code is deemed to be the region. */
static gchar *
e_address_western_extract_region (gchar *line)
{
gint start, end;
start = strcspn (line, ",");
start++;
while (isspace(line[start]))
start++;
end = strlen(line) - 1;
while (isspace (line[end]))
end--;
while (!isspace (line[end]))
end--;
while (isspace (line[end]))
end--;
end++;
/* Between start and end lie the string. */
return g_strndup ( (line+start), end-start);
}
static gchar *
e_address_western_extract_postal_code (gchar *line)
{
int start, end;
end = strlen (line) - 1;
while (isspace(line[end]))
end--;
start = end;
end++;
while (!isspace(line[start]))
start--;
start++;
/* Between start and end lie the string. */
return g_strndup ( (line+start), end-start);
}
static void
e_address_western_extract_street (gchar *line, gchar **street, gchar **extended)
{
const gchar *split = NULL;
gint cntr;
for (cntr = 0; extended_keywords[cntr] != NULL; cntr++) {
split = e_address_western_strstrcase (line, extended_keywords[cntr]);
if (split != NULL)
break;
}
if (split != NULL) {
*street = g_strndup (line, (split - line));
*extended = g_strdup (split);
}
else {
*street = g_strdup (line);
*extended = NULL;
}
}
EAddressWestern *
e_address_western_parse (const gchar *in_address)
{
gchar **lines;
gint linecntr, lineindex;
gchar *address;
gint cntr;
gboolean found_po_box, found_postal;
EAddressWestern *eaw;
#if 0
gint start, end; /* To be used to classify address lines. */
#endif
if (in_address == NULL)
return NULL;
eaw = (EAddressWestern *)g_malloc (sizeof(EAddressWestern));
eaw->po_box = NULL;
eaw->extended = NULL;
eaw->street = NULL;
eaw->locality = NULL;
eaw->region = NULL;
eaw->postal_code = NULL;
eaw->country = NULL;
address = g_strdup (in_address);
/* The first thing I'll do is divide the multiline input string
into lines. */
/* ... count the lines. */
linecntr = 1;
lineindex = 0;
while (address[lineindex] != '\0') {
if (address[lineindex] == '\n')
linecntr++;
lineindex++;
}
/* ... tally them. */
lines = (gchar **)g_malloc (sizeof(gchar *) * (linecntr+3));
lineindex = 0;
lines[0] = &address[0];
linecntr = 1;
while (address[lineindex] != '\0') {
if (address[lineindex] == '\n') {
lines[linecntr] = &address[lineindex + 1];
linecntr++;
}
lineindex++;
}
/* Convert the newlines at the end of each line (except the last,
because it is already NULL terminated) to NULLs. */
for (cntr = 0; cntr < (linecntr - 1); cntr++) {
*(strchr (lines[cntr], '\n')) = '\0';
}
e_address_western_remove_blank_lines (lines, &linecntr);
/* Let's just test these functions. */
found_po_box = FALSE;
found_postal = FALSE;
for (cntr = 0; cntr < linecntr; cntr++) {
if (e_address_western_is_po_box (lines[cntr])) {
if (eaw->po_box == NULL)
eaw->po_box = e_address_western_extract_po_box (lines[cntr]);
found_po_box = TRUE;
}
else if (e_address_western_is_postal (lines[cntr])) {
if (eaw->locality == NULL)
eaw->locality = e_address_western_extract_locality (lines[cntr]);
if (eaw->region == NULL)
eaw->region = e_address_western_extract_region (lines[cntr]);
if (eaw->postal_code == NULL)
eaw->postal_code = e_address_western_extract_postal_code (lines[cntr]);
found_postal = TRUE;
}
else {
if (found_postal) {
if (eaw->country == NULL)
eaw->country = g_strdup (lines[cntr]);
else {
gchar *temp;
temp = g_strconcat (eaw->country, "\n", lines[cntr]);
g_free (eaw->country);
eaw->country = temp;
}
}
else {
if (eaw->street == NULL) {
e_address_western_extract_street (lines[cntr], &eaw->street,
&eaw->extended );
}
else {
if (eaw->extended == NULL) {
eaw->extended = g_strdup (lines[cntr]);
}
else {
gchar *temp;
temp = g_strconcat (eaw->extended, "\n", lines[cntr]);
g_free (eaw->extended);
eaw->extended = temp;
}
}
}
}
}
g_free (lines);
g_free (address);
return eaw;
}
void
e_address_western_free (EAddressWestern *eaw)
{
if (eaw == NULL)
return;
if (eaw->po_box != NULL)
g_free(eaw->po_box);
if (eaw->extended != NULL)
g_free(eaw->extended);
if (eaw->street != NULL)
g_free(eaw->street);
if (eaw->locality != NULL)
g_free(eaw->locality);
if (eaw->region != NULL)
g_free(eaw->region);
if (eaw->postal_code != NULL)
g_free(eaw->postal_code);
if (eaw->country != NULL)
g_free(eaw->country);
g_free (eaw);
}