aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMarco Pesenti Gritti <marco@it.gnome.org>2003-06-14 02:05:27 +0800
committerMarco Pesenti Gritti <mpeseng@src.gnome.org>2003-06-14 02:05:27 +0800
commit63c30bbc992945dcae374ef87716234fa53cd184 (patch)
tree572bce53db37c3e26fab0acabe92fed2670dec8a /src
parentab152b500ba9248f625db68601dc293bfd1fa3cb (diff)
downloadgsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar
gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.gz
gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.bz2
gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.lz
gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.xz
gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.zst
gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.zip
Do not use libxml to parse mozilla bookmarks. Use the galeon parser
2003-06-13 Marco Pesenti Gritti <marco@it.gnome.org> * src/bookmarks/ephy-bookmarks-import.c: (gul_general_read_line_from_file), (gul_string_ascii_strcasestr), (ns_get_bookmark_item), (ns_parse_bookmark_item), (ephy_bookmarks_import_mozilla): Do not use libxml to parse mozilla bookmarks. Use the galeon parser instead, ugly but works.
Diffstat (limited to 'src')
-rw-r--r--src/bookmarks/ephy-bookmarks-import.c270
1 files changed, 221 insertions, 49 deletions
diff --git a/src/bookmarks/ephy-bookmarks-import.c b/src/bookmarks/ephy-bookmarks-import.c
index f574654ae..778186039 100644
--- a/src/bookmarks/ephy-bookmarks-import.c
+++ b/src/bookmarks/ephy-bookmarks-import.c
@@ -24,6 +24,19 @@
#include "ephy-bookmarks-import.h"
#include "ephy-debug.h"
+/**
+ * NSItemType: netscape bookmark item type
+ */
+typedef enum
+{
+ NS_SITE,
+ NS_NOTES,
+ NS_FOLDER,
+ NS_FOLDER_END,
+ NS_SEPARATOR,
+ NS_UNKNOWN
+} NSItemType;
+
typedef struct _XbelInfo
{
char *title;
@@ -71,42 +84,6 @@ ephy_bookmarks_import (EphyBookmarks *bookmarks,
}
static void
-mozilla_parse_bookmarks (EphyBookmarks *bookmarks,
- htmlNodePtr node,
- char **keyword)
-{
- htmlNodePtr child = node;
-
- while (child != NULL)
- {
- if (xmlStrEqual (child->name, "h3"))
- {
- *keyword = xmlNodeGetContent (child);
- }
- else if (xmlStrEqual (child->name, "a"))
- {
- xmlChar *title, *url;
- EphyNode *bmk;
-
- title = xmlNodeGetContent (child);
- url = xmlGetProp (child, "href");
- bmk = ephy_bookmarks_add (bookmarks,
- title,
- url);
- set_folder (bookmarks, bmk, *keyword);
- xmlFree (title);
- xmlFree (url);
- }
-
- mozilla_parse_bookmarks (bookmarks,
- child->children,
- keyword);
- child = child->next;
- }
-}
-
-
-static void
xbel_parse_single_bookmark (EphyBookmarks *bookmarks,
xmlNodePtr node, XbelInfo *xbel)
{
@@ -221,25 +198,220 @@ xbel_parse_bookmarks (EphyBookmarks *bookmarks,
}
}
+static gchar *
+gul_general_read_line_from_file (FILE *f)
+{
+ gchar *line = g_strdup ("");
+ gchar *t;
+ gchar *buf = g_new0 (gchar, 256);
+ while ( ! ( strchr (buf, '\n') || feof (f) ) ) {
+ fgets(buf, 256, f);
+ t = line;
+ line = g_strconcat (line, buf, NULL);
+ g_free (t);
+ }
+ g_free (buf);
+ return line;
+}
+
+static const gchar *
+gul_string_ascii_strcasestr (const gchar *a, const gchar *b)
+{
+ gchar *down_a;
+ gchar *down_b;
+ gchar *ptr;
+
+ /* copy and lower case the strings */
+ down_a = g_strdup (a);
+ down_b = g_strdup (b);
+ g_ascii_strdown (down_a, -1);
+ g_ascii_strdown (down_b, -1);
+
+ /* compare */
+ ptr = strstr (down_a, down_b);
+
+ /* free allocated strings */
+ g_free (down_a);
+ g_free (down_b);
+
+ /* return result of comparison */
+ return ptr == NULL ? NULL : (a + (ptr - down_a));
+}
+
+/**
+ * Parses a line of a mozilla/netscape bookmark file. File must be open.
+ */
+/* this has been tested fairly well */
+static NSItemType
+ns_get_bookmark_item (FILE *f, GString *name, GString *url)
+{
+ char *line = NULL;
+ char *found;
+
+ line = gul_general_read_line_from_file (f);
+
+ if ((found = (char *) gul_string_ascii_strcasestr (line, "<A HREF=")))
+ { /* declare site? */
+ g_string_assign (url, found+9); /* url=URL+ ADD_DATE ... */
+ g_string_truncate (url, strstr(url->str, "\"")-url->str);
+ found = (char *) strstr (found+9+url->len, "\">");
+ if (!found)
+ {
+ g_free (line);
+ return NS_UNKNOWN;
+ }
+ g_string_assign (name, found+2);
+ g_string_truncate (name, gul_string_ascii_strcasestr (name->str,
+ "</A>")-name->str);
+ g_free (line);
+ return NS_SITE;
+ }
+ else if ((found = (char *) gul_string_ascii_strcasestr (line, "<DT><H3")))
+ { /* declare folder? */
+ found = (char *) strstr(found+7, ">");
+ if (!found) return NS_UNKNOWN;
+ g_string_assign (name, found+1);
+ g_string_truncate (name, gul_string_ascii_strcasestr (name->str,
+ "</H3>") - name->str);
+ g_free (line);
+ return NS_FOLDER;
+ }
+ else if ((found = (char *) gul_string_ascii_strcasestr (line, "</DL>")))
+ { /* end folder? */
+ g_free (line);
+ return NS_FOLDER_END;
+ }
+
+ g_free (line);
+ return NS_UNKNOWN;
+}
+
+/**
+ * This function replaces some weird elements
+ * like &amp; &le;, etc..
+ * More info : http://www.w3.org/TR/html4/charset.html#h-5.3.2
+ * NOTE : We don't support &#D or &#xH.
+ * Patch courtesy of Almer S. Tigelaar <almer1@dds.nl>
+ */
+static char *
+ns_parse_bookmark_item (GString *string)
+{
+ char *iterator, *temp;
+ int cnt = 0;
+ GString *result = g_string_new (NULL);
+
+ g_return_val_if_fail (string != NULL, NULL);
+ g_return_val_if_fail (string->str != NULL, NULL);
+
+ iterator = string->str;
+
+ for (cnt = 0, iterator = string->str;
+ cnt <= (int)(strlen (string->str));
+ cnt++, iterator++) {
+ if (*iterator == '&') {
+ int jump = 0;
+ int i;
+
+ if (g_ascii_strncasecmp (iterator, "&amp;", 5) == 0)
+ {
+ g_string_append_c (result, '&');
+ jump = 5;
+ }
+ else if (g_ascii_strncasecmp (iterator, "&lt;", 4) == 0)
+ {
+ g_string_append_c (result, '<');
+ jump = 4;
+ }
+ else if (g_ascii_strncasecmp (iterator, "&gt;", 4) == 0)
+ {
+ g_string_append_c (result, '>');
+ jump = 4;
+ }
+ else if (g_ascii_strncasecmp (iterator, "&quot;", 6) == 0)
+ {
+ g_string_append_c (result, '\"');
+ jump = 6;
+ }
+ else
+ {
+ /* It must be some numeric thing now */
+
+ iterator++;
+
+ if (iterator && *iterator == '#') {
+ int val;
+ char *num, *tmp;
+
+ iterator++;
+
+ val = atoi (iterator);
+
+ tmp = g_strdup_printf ("%d", val);
+ jump = strlen (tmp);
+ g_free (tmp);
+
+ num = g_strdup_printf ("%c", (char) val);
+ g_string_append (result, num);
+ g_free (num);
+ }
+ }
+
+ for (i = jump - 1; i > 0; i--)
+ {
+ iterator++;
+ if (iterator == NULL)
+ break;
+ }
+ }
+ else
+ {
+ g_string_append_c (result, *iterator);
+ }
+ }
+ temp = result->str;
+ g_string_free (result, FALSE);
+ return temp;
+}
+
gboolean
ephy_bookmarks_import_mozilla (EphyBookmarks *bookmarks,
const char *filename)
{
- htmlDocPtr doc;
- htmlNodePtr child;
- char *keyword = NULL;
-
- if (g_file_test (filename, G_FILE_TEST_EXISTS) == FALSE)
+ FILE *bf; /* bookmark file */
+ GString *name = g_string_new (NULL);
+ gchar *parsedname;
+ GString *url = g_string_new (NULL);
+ char *current_folder = NULL;
+ EphyNode *bmk;
+
+ if (!(bf = fopen (filename, "r"))) {
+ g_warning ("Failed to open file: %s\n", filename);
return FALSE;
+ }
- doc = htmlParseFile (filename, "UTF-8");
- g_assert (doc != NULL);
-
- child = doc->children;
- mozilla_parse_bookmarks (bookmarks, child, &keyword);
-
- g_free (keyword);
- xmlFreeDoc (doc);
+ while (!feof (bf)) {
+ NSItemType t;
+ t = ns_get_bookmark_item (bf, name, url);
+ switch (t)
+ {
+ case NS_FOLDER:
+ g_free (current_folder);
+ current_folder = g_strdup (name->str);
+ break;
+ case NS_SITE:
+ parsedname = ns_parse_bookmark_item (name);
+ bmk = ephy_bookmarks_add (bookmarks,
+ parsedname,
+ url->str);
+ set_folder (bookmarks, bmk, current_folder);
+ break;
+ default:
+ break;
+ }
+ }
+ fclose (bf);
+ g_string_free (name, TRUE);
+ g_string_free (url, TRUE);
ephy_bookmarks_save (bookmarks);