diff options
author | Marco Pesenti Gritti <marco@it.gnome.org> | 2003-06-14 02:05:27 +0800 |
---|---|---|
committer | Marco Pesenti Gritti <mpeseng@src.gnome.org> | 2003-06-14 02:05:27 +0800 |
commit | 63c30bbc992945dcae374ef87716234fa53cd184 (patch) | |
tree | 572bce53db37c3e26fab0acabe92fed2670dec8a /src | |
parent | ab152b500ba9248f625db68601dc293bfd1fa3cb (diff) | |
download | gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.gz gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.bz2 gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.lz gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.xz gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.tar.zst gsoc2013-epiphany-63c30bbc992945dcae374ef87716234fa53cd184.zip |
Do not use libxml to parse mozilla bookmarks. Use the galeon parser
2003-06-13 Marco Pesenti Gritti <marco@it.gnome.org>
* src/bookmarks/ephy-bookmarks-import.c:
(gul_general_read_line_from_file), (gul_string_ascii_strcasestr),
(ns_get_bookmark_item), (ns_parse_bookmark_item),
(ephy_bookmarks_import_mozilla):
Do not use libxml to parse mozilla bookmarks.
Use the galeon parser instead, ugly but works.
Diffstat (limited to 'src')
-rw-r--r-- | src/bookmarks/ephy-bookmarks-import.c | 270 |
1 files changed, 221 insertions, 49 deletions
diff --git a/src/bookmarks/ephy-bookmarks-import.c b/src/bookmarks/ephy-bookmarks-import.c index f574654ae..778186039 100644 --- a/src/bookmarks/ephy-bookmarks-import.c +++ b/src/bookmarks/ephy-bookmarks-import.c @@ -24,6 +24,19 @@ #include "ephy-bookmarks-import.h" #include "ephy-debug.h" +/** + * NSItemType: netscape bookmark item type + */ +typedef enum +{ + NS_SITE, + NS_NOTES, + NS_FOLDER, + NS_FOLDER_END, + NS_SEPARATOR, + NS_UNKNOWN +} NSItemType; + typedef struct _XbelInfo { char *title; @@ -71,42 +84,6 @@ ephy_bookmarks_import (EphyBookmarks *bookmarks, } static void -mozilla_parse_bookmarks (EphyBookmarks *bookmarks, - htmlNodePtr node, - char **keyword) -{ - htmlNodePtr child = node; - - while (child != NULL) - { - if (xmlStrEqual (child->name, "h3")) - { - *keyword = xmlNodeGetContent (child); - } - else if (xmlStrEqual (child->name, "a")) - { - xmlChar *title, *url; - EphyNode *bmk; - - title = xmlNodeGetContent (child); - url = xmlGetProp (child, "href"); - bmk = ephy_bookmarks_add (bookmarks, - title, - url); - set_folder (bookmarks, bmk, *keyword); - xmlFree (title); - xmlFree (url); - } - - mozilla_parse_bookmarks (bookmarks, - child->children, - keyword); - child = child->next; - } -} - - -static void xbel_parse_single_bookmark (EphyBookmarks *bookmarks, xmlNodePtr node, XbelInfo *xbel) { @@ -221,25 +198,220 @@ xbel_parse_bookmarks (EphyBookmarks *bookmarks, } } +static gchar * +gul_general_read_line_from_file (FILE *f) +{ + gchar *line = g_strdup (""); + gchar *t; + gchar *buf = g_new0 (gchar, 256); + while ( ! ( strchr (buf, '\n') || feof (f) ) ) { + fgets(buf, 256, f); + t = line; + line = g_strconcat (line, buf, NULL); + g_free (t); + } + g_free (buf); + return line; +} + +static const gchar * +gul_string_ascii_strcasestr (const gchar *a, const gchar *b) +{ + gchar *down_a; + gchar *down_b; + gchar *ptr; + + /* copy and lower case the strings */ + down_a = g_strdup (a); + down_b = g_strdup (b); + g_ascii_strdown (down_a, -1); + g_ascii_strdown (down_b, -1); + + /* compare */ + ptr = strstr (down_a, down_b); + + /* free allocated strings */ + g_free (down_a); + g_free (down_b); + + /* return result of comparison */ + return ptr == NULL ? NULL : (a + (ptr - down_a)); +} + +/** + * Parses a line of a mozilla/netscape bookmark file. File must be open. + */ +/* this has been tested fairly well */ +static NSItemType +ns_get_bookmark_item (FILE *f, GString *name, GString *url) +{ + char *line = NULL; + char *found; + + line = gul_general_read_line_from_file (f); + + if ((found = (char *) gul_string_ascii_strcasestr (line, "<A HREF="))) + { /* declare site? */ + g_string_assign (url, found+9); /* url=URL+ ADD_DATE ... */ + g_string_truncate (url, strstr(url->str, "\"")-url->str); + found = (char *) strstr (found+9+url->len, "\">"); + if (!found) + { + g_free (line); + return NS_UNKNOWN; + } + g_string_assign (name, found+2); + g_string_truncate (name, gul_string_ascii_strcasestr (name->str, + "</A>")-name->str); + g_free (line); + return NS_SITE; + } + else if ((found = (char *) gul_string_ascii_strcasestr (line, "<DT><H3"))) + { /* declare folder? */ + found = (char *) strstr(found+7, ">"); + if (!found) return NS_UNKNOWN; + g_string_assign (name, found+1); + g_string_truncate (name, gul_string_ascii_strcasestr (name->str, + "</H3>") - name->str); + g_free (line); + return NS_FOLDER; + } + else if ((found = (char *) gul_string_ascii_strcasestr (line, "</DL>"))) + { /* end folder? */ + g_free (line); + return NS_FOLDER_END; + } + + g_free (line); + return NS_UNKNOWN; +} + +/** + * This function replaces some weird elements + * like & ≤, etc.. + * More info : http://www.w3.org/TR/html4/charset.html#h-5.3.2 + * NOTE : We don't support &#D or &#xH. + * Patch courtesy of Almer S. Tigelaar <almer1@dds.nl> + */ +static char * +ns_parse_bookmark_item (GString *string) +{ + char *iterator, *temp; + int cnt = 0; + GString *result = g_string_new (NULL); + + g_return_val_if_fail (string != NULL, NULL); + g_return_val_if_fail (string->str != NULL, NULL); + + iterator = string->str; + + for (cnt = 0, iterator = string->str; + cnt <= (int)(strlen (string->str)); + cnt++, iterator++) { + if (*iterator == '&') { + int jump = 0; + int i; + + if (g_ascii_strncasecmp (iterator, "&", 5) == 0) + { + g_string_append_c (result, '&'); + jump = 5; + } + else if (g_ascii_strncasecmp (iterator, "<", 4) == 0) + { + g_string_append_c (result, '<'); + jump = 4; + } + else if (g_ascii_strncasecmp (iterator, ">", 4) == 0) + { + g_string_append_c (result, '>'); + jump = 4; + } + else if (g_ascii_strncasecmp (iterator, """, 6) == 0) + { + g_string_append_c (result, '\"'); + jump = 6; + } + else + { + /* It must be some numeric thing now */ + + iterator++; + + if (iterator && *iterator == '#') { + int val; + char *num, *tmp; + + iterator++; + + val = atoi (iterator); + + tmp = g_strdup_printf ("%d", val); + jump = strlen (tmp); + g_free (tmp); + + num = g_strdup_printf ("%c", (char) val); + g_string_append (result, num); + g_free (num); + } + } + + for (i = jump - 1; i > 0; i--) + { + iterator++; + if (iterator == NULL) + break; + } + } + else + { + g_string_append_c (result, *iterator); + } + } + temp = result->str; + g_string_free (result, FALSE); + return temp; +} + gboolean ephy_bookmarks_import_mozilla (EphyBookmarks *bookmarks, const char *filename) { - htmlDocPtr doc; - htmlNodePtr child; - char *keyword = NULL; - - if (g_file_test (filename, G_FILE_TEST_EXISTS) == FALSE) + FILE *bf; /* bookmark file */ + GString *name = g_string_new (NULL); + gchar *parsedname; + GString *url = g_string_new (NULL); + char *current_folder = NULL; + EphyNode *bmk; + + if (!(bf = fopen (filename, "r"))) { + g_warning ("Failed to open file: %s\n", filename); return FALSE; + } - doc = htmlParseFile (filename, "UTF-8"); - g_assert (doc != NULL); - - child = doc->children; - mozilla_parse_bookmarks (bookmarks, child, &keyword); - - g_free (keyword); - xmlFreeDoc (doc); + while (!feof (bf)) { + NSItemType t; + t = ns_get_bookmark_item (bf, name, url); + switch (t) + { + case NS_FOLDER: + g_free (current_folder); + current_folder = g_strdup (name->str); + break; + case NS_SITE: + parsedname = ns_parse_bookmark_item (name); + bmk = ephy_bookmarks_add (bookmarks, + parsedname, + url->str); + set_folder (bookmarks, bmk, current_folder); + break; + default: + break; + } + } + fclose (bf); + g_string_free (name, TRUE); + g_string_free (url, TRUE); ephy_bookmarks_save (bookmarks); |