/*
* Copyright (C) 2003 Marco Pesenti Gritti
* Copyright (C) 2003 Christian Persch
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Id$
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "ephy-encodings.h"
#include "ephy-string.h"
#include "ephy-node-db.h"
#include "ephy-file-helpers.h"
#include "eel-gconf-extensions.h"
#include "ephy-debug.h"
#include <glib/gi18n.h>
#include <string.h>
#define EPHY_ENCODINGS_GET_PRIVATE(object)(G_TYPE_INSTANCE_GET_PRIVATE ((object), EPHY_TYPE_ENCODINGS, EphyEncodingsPrivate))
struct EphyEncodingsPrivate
{
EphyNodeDb *db;
EphyNode *root;
EphyNode *encodings;
EphyNode *detectors;
GHashTable *hash;
GSList *recent;
};
/**
* translatable encodings titles
* NOTE: if you add/remove/change encodings, please also update the schema in
* epiphany.schemas.in
*/
static const
struct
{
char *title;
char *code;
EphyLanguageGroup groups;
gboolean is_autodetector;
}
encoding_entries [] =
{
{ N_("Arabic (_IBM-864)"), "IBM864", LG_ARABIC, FALSE },
{ N_("Arabic (ISO-_8859-6)"), "ISO-8859-6", LG_ARABIC, FALSE },
{ N_("Arabic (_MacArabic)"), "x-mac-arabic", LG_ARABIC, FALSE },
{ N_("Arabic (_Windows-1256)"), "windows-1256", LG_ARABIC, FALSE },
{ N_("Baltic (_ISO-8859-13)"), "ISO-8859-13", LG_BALTIC, FALSE },
{ N_("Baltic (I_SO-8859-4)"), "ISO-8859-4", LG_BALTIC, FALSE },
{ N_("Baltic (_Windows-1257)"), "windows-1257", LG_BALTIC, FALSE },
{ N_("_Armenian (ARMSCII-8)"), "armscii-8", LG_CAUCASIAN, FALSE },
{ N_("_Georgian (GEOSTD8)"), "geostd8", LG_CAUCASIAN, FALSE },
{ N_("Central European (_IBM-852)"), "IBM852", LG_C_EUROPEAN, FALSE },
{ N_("Central European (I_SO-8859-2)"), "ISO-8859-2", LG_C_EUROPEAN, FALSE },
{ N_("Central European (_MacCE)"), "x-mac-ce", LG_C_EUROPEAN, FALSE },
{ N_("Central European (_Windows-1250)"), "windows-1250", LG_C_EUROPEAN, FALSE },
{ N_("Chinese Simplified (_GB18030)"), "gb18030", LG_CHINESE_SIMP, FALSE },
{ N_("Chinese Simplified (G_B2312)"), "GB2312", LG_CHINESE_SIMP, FALSE },
{ N_("Chinese Simplified (GB_K)"), "x-gbk", LG_CHINESE_SIMP, FALSE },
{ N_("Chinese Simplified (_HZ)"), "HZ-GB-2312", LG_CHINESE_SIMP, FALSE },
{ N_("Chinese Simplified (_ISO-2022-CN)"), "ISO-2022-CN", LG_CHINESE_SIMP, FALSE },
{ N_("Chinese Traditional (Big_5)"), "Big5", LG_CHINESE_TRAD, FALSE },
{ N_("Chinese Traditional (Big5-HK_SCS)"), "Big5-HKSCS", LG_CHINESE_TRAD, FALSE },
{ N_("Chinese Traditional (_EUC-TW)"), "x-euc-tw", LG_CHINESE_TRAD, FALSE },
{ N_("Cyrillic (_IBM-855)"), "IBM855", LG_CYRILLIC, FALSE },
{ N_("Cyrillic (I_SO-8859-5)"), "ISO-8859-5", LG_CYRILLIC, FALSE },
{ N_("Cyrillic (IS_O-IR-111)"), "ISO-IR-111", LG_CYRILLIC, FALSE },
{ N_("Cyrillic (_KOI8-R)"), "KOI8-R", LG_CYRILLIC, FALSE },
{ N_("Cyrillic (_MacCyrillic)"), "x-mac-cyrillic", LG_CYRILLIC, FALSE },
{ N_("Cyrillic (_Windows-1251)"), "windows-1251", LG_CYRILLIC, FALSE },
{ N_("Cyrillic/_Russian (IBM-866)"), "IBM866", LG_CYRILLIC, FALSE },
{ N_("Greek (_ISO-8859-7)"), "ISO-8859-7", LG_GREEK, FALSE },
{ N_("Greek (_MacGreek)"), "x-mac-greek", LG_GREEK, FALSE },
{ N_("Greek (_Windows-1253)"), "windows-1253", LG_GREEK, FALSE },
{ N_("Gujarati (_MacGujarati)"), "x-mac-gujarati", LG_INDIAN, FALSE },
{ N_("Gurmukhi (Mac_Gurmukhi)"), "x-mac-gurmukhi", LG_INDIAN, FALSE },
{ N_("Hindi (Mac_Devanagari)"), "x-mac-devanagari", LG_INDIAN, FALSE },
{ N_("Hebrew (_IBM-862)"), "IBM862", LG_HEBREW, FALSE },
{ N_("Hebrew (IS_O-8859-8-I)"), "ISO-8859-8-I", LG_HEBREW, FALSE },
{ N_("Hebrew (_MacHebrew)"), "x-mac-hebrew", LG_HEBREW, FALSE },
{ N_("Hebrew (_Windows-1255)"), "windows-1255", LG_HEBREW, FALSE },
{ N_("_Visual Hebrew (ISO-8859-8)"), "ISO-8859-8", LG_HEBREW, FALSE },
{ N_("Japanese (_EUC-JP)"), "EUC-JP", LG_JAPANESE, FALSE },
{ N_("Japanese (_ISO-2022-JP)"), "ISO-2022-JP", LG_JAPANESE, FALSE },
{ N_("Japanese (_Shift-JIS)"), "Shift_JIS", LG_JAPANESE, FALSE },
{ N_("Korean (_EUC-KR)"), "EUC-KR", LG_KOREAN, FALSE },
{ N_("Korean (_ISO-2022-KR)"), "ISO-2022-KR", LG_KOREAN, FALSE },
{ N_("Korean (_JOHAB)"), "x-johab", LG_KOREAN, FALSE },
{ N_("Korean (_UHC)"), "x-windows-949", LG_KOREAN, FALSE },
{ N_("_Celtic (ISO-8859-14)"), "ISO-8859-14", LG_NORDIC, FALSE },
{ N_("_Icelandic (MacIcelandic)"), "x-mac-icelandic", LG_NORDIC, FALSE },
{ N_("_Nordic (ISO-8859-10)"), "ISO-8859-10", LG_NORDIC, FALSE },
{ N_("_Persian (MacFarsi)"), "x-mac-farsi", LG_PERSIAN, FALSE },
{ N_("Croatian (Mac_Croatian)"), "x-mac-croatian", LG_SE_EUROPEAN, FALSE },
{ N_("_Romanian (MacRomanian)"), "x-mac-romanian", LG_SE_EUROPEAN, FALSE },
{ N_("R_omanian (ISO-8859-16)"), "ISO-8859-16", LG_SE_EUROPEAN, FALSE },
{ N_("South _European (ISO-8859-3)"), "ISO-8859-3", LG_SE_EUROPEAN, FALSE },
{ N_("Thai (TIS-_620)"), "TIS-620", LG_THAI, FALSE },
#if MOZILLA_SNAPSHOT >= 10
{ N_("Thai (IS_O-8859-11)"), "iso-8859-11", LG_THAI, FALSE },
{ N_("_Thai (Windows-874)"), "windows-874", LG_THAI, FALSE },
#endif
{ N_("Turkish (_IBM-857)"), "IBM857", LG_TURKISH, FALSE },
{ N_("Turkish (I_SO-8859-9)"), "ISO-8859-9", LG_TURKISH, FALSE },
{ N_("Turkish (_MacTurkish)"), "x-mac-turkish", LG_TURKISH, FALSE },
{ N_("Turkish (_Windows-1254)"), "windows-1254", LG_TURKISH, FALSE },
{ N_("Unicode (UTF-_8)"), "UTF-8", LG_UNICODE, FALSE },
{ N_("Unicode (UTF-_7)"), "UTF-7", LG_UNICODE, FALSE },
{ N_("Cyrillic/Ukrainian (_KOI8-U)"), "KOI8-U", LG_UKRAINIAN, FALSE },
{ N_("Cyrillic/Ukrainian (Mac_Ukrainian)"), "x-mac-ukrainian", LG_UKRAINIAN, FALSE },
{ N_("Vietnamese (_TCVN)"), "x-viet-tcvn5712", LG_VIETNAMESE, FALSE },
{ N_("Vietnamese (_VISCII)"), "VISCII", LG_VIETNAMESE, FALSE },
{ N_("Vietnamese (V_PS)"), "x-viet-vps", LG_VIETNAMESE, FALSE },
{ N_("Vietnamese (_Windows-1258)"), "windows-1258", LG_VIETNAMESE, FALSE },
{ N_("Western (_IBM-850)"), "IBM850", LG_WESTERN, FALSE },
{ N_("Western (_ISO-8859-1)"), "ISO-8859-1", LG_WESTERN, FALSE },
{ N_("Western (IS_O-8859-15)"), "ISO-8859-15", LG_WESTERN, FALSE },
{ N_("Western (_MacRoman)"), "x-mac-roman", LG_WESTERN, FALSE },
{ N_("Western (_Windows-1252)"), "windows-1252", LG_WESTERN, FALSE },
{ N_("Off"), "", LG_NONE, TRUE },
{ N_("Chinese"), "zh_parallel_state_machine", LG_CHINESE_TRAD | LG_CHINESE_SIMP, TRUE },
{ N_("Chinese Simplified"), "zhcn_parallel_state_machine", LG_CHINESE_SIMP, TRUE },
{ N_("Chinese Traditional"), "zhtw_parallel_state_machine", LG_CHINESE_TRAD, TRUE },
{ N_("East Asian"), "cjk_parallel_state_machine", LG_CHINESE_TRAD | LG_CHINESE_SIMP | LG_JAPANESE | LG_KOREAN, TRUE },
{ N_("Japanese"), "ja_parallel_state_machine", LG_JAPANESE, TRUE },
{ N_("Korean"), "ko_parallel_state_machine", LG_KOREAN, TRUE },
{ N_("Russian"), "ruprob", LG_CYRILLIC | LG_UKRAINIAN, TRUE },
{ N_("Universal"), "universal_charset_detector", LG_ALL, TRUE },
{ N_("Ukrainian"), "ukprob", LG_UKRAINIAN, TRUE }
};
static const guint n_encoding_entries = G_N_ELEMENTS (encoding_entries);
enum
{
ALL_NODE_ID = 2,
ENCODINGS_NODE_ID = 3,
DETECTORS_NODE_ID = 5
};
#define RECENT_KEY "/apps/epiphany/general/recent_encodings"
#define RECENT_MAX 4
static void ephy_encodings_class_init (EphyEncodingsClass *klass);
static void ephy_encodings_init (EphyEncodings *ma);
static GObjectClass *parent_class = NULL;
GType
ephy_encodings_get_type (void)
{
static GType ephy_encodings_type = 0;
if (ephy_encodings_type == 0)
{
static const GTypeInfo our_info =
{
sizeof (EphyEncodingsClass),
NULL,
NULL,
(GClassInitFunc) ephy_encodings_class_init,
NULL,
NULL,
sizeof (EphyEncodings),
0,
(GInstanceInitFunc) ephy_encodings_init
};
ephy_encodings_type = g_type_register_static (G_TYPE_OBJECT,
"EphyEncodings",
&our_info, 0);
}
return ephy_encodings_type;
}
static void
ephy_encodings_finalize (GObject *object)
{
EphyEncodings *encodings = EPHY_ENCODINGS (object);
g_hash_table_destroy (encodings->priv->hash);
ephy_node_unref (encodings->priv->encodings);
ephy_node_unref (encodings->priv->detectors);
ephy_node_unref (encodings->priv->root);
g_slist_foreach (encodings->priv->recent, (GFunc) g_free, NULL);
g_slist_free (encodings->priv->recent);
g_object_unref (encodings->priv->db);
LOG ("EphyEncodings finalised")
G_OBJECT_CLASS (parent_class)->finalize (object);
}
static void
ephy_encodings_class_init (EphyEncodingsClass *klass)
{
GObjectClass *object_class = G_OBJECT_CLASS (klass);
parent_class = g_type_class_peek_parent (klass);
object_class->finalize = ephy_encodings_finalize;
g_type_class_add_private (object_class, sizeof (EphyEncodingsPrivate));
}
EphyNode *
ephy_encodings_get_node (EphyEncodings *encodings,
const char *code)
{
g_return_val_if_fail (EPHY_IS_ENCODINGS (encodings), NULL);
return g_hash_table_lookup (encodings->priv->hash, code);
}
GList *
ephy_encodings_get_encodings (EphyEncodings *encodings,
EphyLanguageGroup group_mask)
{
GList *list = NULL;
GPtrArray *children;
int i, n_items;
children = ephy_node_get_children (encodings->priv->encodings);
n_items = children->len;
for (i = 0; i < n_items; i++)
{
EphyNode *kid;
EphyLanguageGroup group;
kid = g_ptr_array_index (children, i);
group = ephy_node_get_property_int
(kid, EPHY_NODE_ENCODING_PROP_LANGUAGE_GROUPS);
if ((group & group_mask) != 0)
{
list = g_list_prepend (list, kid);
}
}
return list;
}
EphyNode *
ephy_encodings_get_detectors (EphyEncodings *encodings)
{
g_return_val_if_fail (EPHY_IS_ENCODINGS (encodings), NULL);
return encodings->priv->detectors;
}
EphyNode *
ephy_encodings_get_all (EphyEncodings *encodings)
{
g_return_val_if_fail (EPHY_IS_ENCODINGS (encodings), NULL);
return encodings->priv->encodings;
}
void
ephy_encodings_add_recent (EphyEncodings *encodings,
const char *code)
{
GSList *element;
g_return_if_fail (EPHY_IS_ENCODINGS (encodings));
g_return_if_fail (code != NULL);
g_return_if_fail (ephy_encodings_get_node (encodings, code) != NULL);
/* keep the list elements unique */
element = g_slist_find_custom (encodings->priv->recent, code,
(GCompareFunc) strcmp);
if (element != NULL)
{
encodings->priv->recent =
g_slist_remove_link (encodings->priv->recent, element);
}
/* add the new code upfront */
encodings->priv->recent =
g_slist_prepend (encodings->priv->recent, g_strdup (code));
/* truncate the list if necessary; it's at most 1 element too much */
if (g_slist_length (encodings->priv->recent) > RECENT_MAX)
{
GSList *tail;
tail = g_slist_last (encodings->priv->recent);
g_free (tail->data);
encodings->priv->recent =
g_slist_remove_link (encodings->priv->recent, tail);
}
/* persist the list */
eel_gconf_set_string_list (RECENT_KEY, encodings->priv->recent);
}
GList *
ephy_encodings_get_recent (EphyEncodings *encodings)
{
GSList *l;
GList *list = NULL;
for (l = encodings->priv->recent; l != NULL; l = l->next)
{
EphyNode *node;
node = ephy_encodings_get_node (encodings, (char *) l->data);
g_return_val_if_fail (EPHY_IS_NODE (node), NULL);
list = g_list_prepend (list, node);
}
return list;
}
static void
ephy_encodings_init (EphyEncodings *encodings)
{
EphyNodeDb *db;
GSList *list, *l;
guint i;
encodings->priv = EPHY_ENCODINGS_GET_PRIVATE (encodings);
LOG ("EphyEncodings initialising")
db = ephy_node_db_new ("EncodingsDB");
encodings->priv->db = db;
encodings->priv->hash = g_hash_table_new (g_str_hash, g_str_equal);
encodings->priv->root = ephy_node_new_with_id (db, ALL_NODE_ID);
encodings->priv->encodings = ephy_node_new_with_id (db, ENCODINGS_NODE_ID);
encodings->priv->detectors = ephy_node_new_with_id (db, DETECTORS_NODE_ID);
ephy_node_ref (encodings->priv->root);
ephy_node_ref (encodings->priv->encodings);
ephy_node_ref (encodings->priv->detectors);
/* now fill the db */
for (i = 0; i < n_encoding_entries; i++)
{
EphyNode *node;
char *elided, *normalised;
GValue value = { 0, };
node = ephy_node_new (db);
ephy_node_add_child (encodings->priv->root, node);
g_hash_table_insert (encodings->priv->hash, encoding_entries[i].code, node);
if (encoding_entries[i].is_autodetector)
{
ephy_node_add_child (encodings->priv->detectors, node);
}
else
{
ephy_node_add_child (encodings->priv->encodings, node);
}
g_value_init (&value, G_TYPE_STRING);
g_value_set_string (&value, _(encoding_entries[i].title));
ephy_node_set_property (node, EPHY_NODE_ENCODING_PROP_TITLE, &value);
g_value_unset (&value);
elided = ephy_string_elide_underscores (_(encoding_entries[i].title));
normalised = g_utf8_normalize (elided, -1, G_NORMALIZE_DEFAULT);
g_value_init (&value, G_TYPE_STRING);
g_value_take_string (&value, g_utf8_collate_key (normalised, -1));
ephy_node_set_property (node, EPHY_NODE_ENCODING_PROP_COLLATION_KEY, &value);
g_value_unset (&value);
g_free (normalised);
g_value_init (&value, G_TYPE_STRING);
g_value_take_string (&value, elided);
ephy_node_set_property (node, EPHY_NODE_ENCODING_PROP_TITLE_ELIDED, &value);
g_value_unset (&value);
g_value_init (&value, G_TYPE_STRING);
g_value_set_string (&value, encoding_entries[i].code);
ephy_node_set_property (node, EPHY_NODE_ENCODING_PROP_ENCODING, &value);
g_value_unset (&value);
g_value_init (&value, G_TYPE_INT);
g_value_set_int (&value, encoding_entries[i].groups);
ephy_node_set_property (node, EPHY_NODE_ENCODING_PROP_LANGUAGE_GROUPS, &value);
g_value_unset (&value);
g_value_init (&value, G_TYPE_BOOLEAN);
g_value_set_boolean (&value, encoding_entries[i].is_autodetector);
ephy_node_set_property (node, EPHY_NODE_ENCODING_PROP_IS_AUTODETECTOR, &value);
g_value_unset (&value);
}
/* get the list of recently used encodings */
list = eel_gconf_get_string_list (RECENT_KEY);
/* make sure the list has no duplicates (GtkUIManager goes
* crazy otherwise), and only valid entries
*/
encodings->priv->recent = NULL;
for (l = list; l != NULL; l = l->next)
{
if (g_slist_find (encodings->priv->recent, l->data) == NULL
&& g_slist_length (encodings->priv->recent) < RECENT_MAX
&& ephy_encodings_get_node (encodings, l->data) != NULL)
{
encodings->priv->recent =
g_slist_prepend (encodings->priv->recent,
l->data);
}
else
{
g_free (l->data);
}
}
encodings->priv->recent = g_slist_reverse (encodings->priv->recent);
g_slist_free (list);
}
void
ephy_encoding_info_free (EphyEncodingInfo *info)
{
if (info)
{
g_free (info->encoding);
g_free (info->hint_encoding);
g_free (info->parent_encoding);
g_free (info->prev_doc_encoding);
g_free (info->forced_encoding);
g_free (info->default_encoding);
g_free (info);
}
}
gboolean
ephy_encoding_info_is_automatic (EphyEncodingInfo *info)
{
g_return_val_if_fail (info != NULL, FALSE);
return (info->encoding_source < EMBED_ENCODING_PARENT_FORCED)
&& (info->forced_encoding == NULL || info->forced_encoding[0] == '\0');
}
EphyEncodings *
ephy_encodings_new (void)
{
return g_object_new (EPHY_TYPE_ENCODINGS, NULL);
}