diff options
author | Christophe Fergeau <teuf@users.sourceforge.net> | 2005-02-09 02:49:04 +0800 |
---|---|---|
committer | Sivaiah Nallagatla <siva@src.gnome.org> | 2005-02-09 02:49:04 +0800 |
commit | bd22689900a6b68ba6a19b203bc64ce10ff32901 (patch) | |
tree | e49f8f4cf50bdaca505830cc8194ba7beeef7dc3 /addressbook/importers | |
parent | e5cf0530c890fa3afae9042093de30fc2b480eb8 (diff) | |
download | gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.gz gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.bz2 gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.lz gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.xz gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.tar.zst gsoc2013-evolution-bd22689900a6b68ba6a19b203bc64ce10ff32901.zip |
New functions which peeks at the conents of the file and guesses the
2005-02-08 Christophe Fergeau <teuf@users.sourceforge.net>
* importers/evolution-vcard-importer.c (has_bom)
(fix_utf16_endianness) (utf16_to_utf8)
guess_vcard_encoding) : New functions which peeks at
the conents of the file and guesses the encoding and
to convert UTF-16 strings to UTF-8.
(load_file_fn) : check the encoding of the file
and convert UTF-16 and locale encoding to UTF-8
Fixes #54825
svn path=/trunk/; revision=28750
Diffstat (limited to 'addressbook/importers')
-rw-r--r-- | addressbook/importers/evolution-vcard-importer.c | 139 |
1 files changed, 124 insertions, 15 deletions
diff --git a/addressbook/importers/evolution-vcard-importer.c b/addressbook/importers/evolution-vcard-importer.c index 8179179558..8a141466eb 100644 --- a/addressbook/importers/evolution-vcard-importer.c +++ b/addressbook/importers/evolution-vcard-importer.c @@ -267,35 +267,127 @@ static char *supported_extensions[3] = { NULL }; -/* Actually check the contents of this file */ +#define BOM (gunichar2)0xFEFF +#define ANTIBOM (gunichar2)0xFFFE + static gboolean -check_file_is_vcard (const char *filename) +has_bom (const gunichar2 *utf16) +{ + + if ((utf16 == NULL) || (*utf16 == '\0')) { + return FALSE; + } + + return ((*utf16 == BOM) || (*utf16 == ANTIBOM)); +} + +static void +fix_utf16_endianness (gunichar2 *utf16) +{ + gunichar2 *it; + + + if ((utf16 == NULL) || (*utf16 == '\0')) { + return; + } + + if (*utf16 != ANTIBOM) { + return; + } + + for (it = utf16; *it != '\0'; it++) { + *it = GUINT16_SWAP_LE_BE (*it); + } +} + +/* Converts an UTF-16 string to an UTF-8 string removing the BOM character + * WARNING: this may modify the utf16 argument if the function detects the + * string isn't using the local endianness + */ +static gchar * +utf16_to_utf8 (gunichar2 *utf16) +{ + + if (utf16 == NULL) { + return NULL; + } + + fix_utf16_endianness (utf16); + + if (*utf16 == BOM) { + utf16++; + } + + return g_utf16_to_utf8 (utf16, -1, NULL, NULL, NULL); +} + + +enum _VCardEncoding { + VCARD_ENCODING_NONE, + VCARD_ENCODING_UTF8, + VCARD_ENCODING_UTF16, + VCARD_ENCODING_LOCALE +}; + +typedef enum _VCardEncoding VCardEncoding; + + +/* Actually check the contents of this file */ +static VCardEncoding +guess_vcard_encoding (const char *filename) { FILE *handle; char line[4096]; - gboolean result; + char *line_utf8; + VCardEncoding encoding = VCARD_ENCODING_NONE; handle = fopen (filename, "r"); if (handle == NULL) { g_print ("\n"); - return FALSE; + return VCARD_ENCODING_NONE; } fgets (line, 4096, handle); if (line == NULL) { fclose (handle); g_print ("\n"); - return FALSE; + return VCARD_ENCODING_NONE; } - - if (g_ascii_strncasecmp (line, "BEGIN:VCARD", 11) == 0) { - result = TRUE; + fclose (handle); + + if (has_bom ((gunichar2*)line)) { + gunichar2 *utf16 = (gunichar2*)line; + /* Check for a BOM to try to detect UTF-16 encoded vcards + * (MacOSX address book creates such vcards for example) + */ + line_utf8 = utf16_to_utf8 (utf16); + if (line_utf8 == NULL) { + return VCARD_ENCODING_NONE; + } + encoding = VCARD_ENCODING_UTF16; + } else if (g_utf8_validate (line, -1, NULL)) { + line_utf8 = g_strdup (line); + encoding = VCARD_ENCODING_UTF8; } else { - result = FALSE; + line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL); + if (line_utf8 == NULL) { + return VCARD_ENCODING_NONE; + } + encoding = VCARD_ENCODING_LOCALE; } - fclose (handle); - return result; + if (g_ascii_strncasecmp (line_utf8, "BEGIN:VCARD", 11) != 0) { + encoding = VCARD_ENCODING_NONE; + } + + g_free (line_utf8); + return encoding; +} + +static gboolean +check_file_is_vcard (const char *filename) +{ + return guess_vcard_encoding (filename) != VCARD_ENCODING_NONE; } static void @@ -354,8 +446,9 @@ support_format_fn (EvolutionImporter *importer, return check_file_is_vcard (filename); } for (i = 0; supported_extensions[i] != NULL; i++) { - if (g_ascii_strcasecmp (supported_extensions[i], ext) == 0) + if (g_ascii_strcasecmp (supported_extensions[i], ext) == 0) { return check_file_is_vcard (filename); + } } return FALSE; @@ -386,8 +479,10 @@ load_file_fn (EvolutionImporter *importer, { VCardImporter *gci; char *contents; - - if (check_file_is_vcard (filename) == FALSE) { + VCardEncoding encoding; + + encoding = guess_vcard_encoding (filename); + if (encoding == VCARD_ENCODING_NONE) { return FALSE; } @@ -408,7 +503,21 @@ load_file_fn (EvolutionImporter *importer, if (!g_file_get_contents (filename, &contents, NULL, NULL)) { g_message (G_STRLOC ":Couldn't read file."); return FALSE; - } + } + + if (encoding == VCARD_ENCODING_UTF16) { + gchar *tmp; + gunichar2 *contents_utf16 = (gunichar2*)contents; + tmp = utf16_to_utf8 (contents_utf16); + g_free (contents); + contents = tmp; + } else if (encoding == VCARD_ENCODING_LOCALE) { + gchar *tmp; + tmp = g_locale_to_utf8 (contents, -1, NULL, NULL, NULL); + g_free (contents); + contents = tmp; + } + gci->contactlist = eab_contact_list_from_string (contents); g_free (contents); |