diff options
author | Diego Escalante Urrelo <diegoe@src.gnome.org> | 2008-05-24 20:18:28 +0800 |
---|---|---|
committer | Diego Escalante Urrelo <diegoe@src.gnome.org> | 2008-05-24 20:18:28 +0800 |
commit | 489a5a9563f723aefe67f9f4dfcf98199991939a (patch) | |
tree | 1e2083ba9bd985e44c45cee0a1b2c709c02eb836 | |
parent | 7d9ba33c392c5d349262cdad0a00c96ca700b741 (diff) | |
download | gsoc2013-epiphany-489a5a9563f723aefe67f9f4dfcf98199991939a.tar gsoc2013-epiphany-489a5a9563f723aefe67f9f4dfcf98199991939a.tar.gz gsoc2013-epiphany-489a5a9563f723aefe67f9f4dfcf98199991939a.tar.bz2 gsoc2013-epiphany-489a5a9563f723aefe67f9f4dfcf98199991939a.tar.lz gsoc2013-epiphany-489a5a9563f723aefe67f9f4dfcf98199991939a.tar.xz gsoc2013-epiphany-489a5a9563f723aefe67f9f4dfcf98199991939a.tar.zst gsoc2013-epiphany-489a5a9563f723aefe67f9f4dfcf98199991939a.zip |
Rewrite the bookmark HTML importer, it now uses GRegex, this is bug #533986.
Testing in trunk before committing to 2.22 branch.
svn path=/trunk/; revision=8249
-rw-r--r-- | src/bookmarks/ephy-bookmarks-import.c | 130 |
1 files changed, 74 insertions, 56 deletions
diff --git a/src/bookmarks/ephy-bookmarks-import.c b/src/bookmarks/ephy-bookmarks-import.c index 3e7096679..412223a6f 100644 --- a/src/bookmarks/ephy-bookmarks-import.c +++ b/src/bookmarks/ephy-bookmarks-import.c @@ -524,30 +524,6 @@ gul_general_read_line_from_file (FILE *f) return line; } -static const gchar * -gul_string_ascii_strcasestr (const gchar *a, const gchar *b) -{ - gchar *down_a; - gchar *down_b; - gchar *ptr; - - /* copy and lower case the strings */ - down_a = g_strdup (a); - down_b = g_strdup (b); - g_ascii_strdown (down_a, -1); - g_ascii_strdown (down_b, -1); - - /* compare */ - ptr = strstr (down_a, down_b); - - /* free allocated strings */ - g_free (down_a); - g_free (down_b); - - /* return result of comparison */ - return ptr == NULL ? NULL : (a + (ptr - down_a)); -} - /** * Parses a line of a mozilla/netscape bookmark file. File must be open. */ @@ -556,44 +532,86 @@ static NSItemType ns_get_bookmark_item (FILE *f, GString *name, GString *url) { char *line = NULL; - char *found; + GRegex *regex; + GMatchInfo *match_info; + int ret = NS_UNKNOWN; + char *match_url = NULL; + char *match_name = NULL; line = gul_general_read_line_from_file (f); - - if ((found = (char *) gul_string_ascii_strcasestr (line, "<A HREF="))) - { /* declare site? */ - g_string_assign (url, found+9); /* url=URL+ ADD_DATE ... */ - g_string_truncate (url, strstr(url->str, "\"")-url->str); - found = (char *) strstr (found+9+url->len, "\">"); - if (!found) - { - g_free (line); - return NS_UNKNOWN; - } - g_string_assign (name, found+2); - g_string_truncate (name, gul_string_ascii_strcasestr (name->str, - "</A>")-name->str); - g_free (line); - return NS_SITE; + + /* + * Regex parsing of the html file: + * 1. check if it's a bookmark, or a folder, or the end of a folder, + * note that only ONE of this things is going to happen + * 2. assign to the GStrings + * 3. return the ret val to tell our caller what we found, by default + * we don't know (NS_UNKWOWN). + */ + + /* check if it's a bookmark */ + regex = g_regex_new ("<a href=\"(?P<url>\\w.*)\".*>(?P<name>\\w.*)</a>", + G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY, NULL); + g_regex_match (regex, line, 0, &match_info); + + if (g_match_info_matches (match_info)) + { + match_url = g_match_info_fetch_named (match_info, "url"); + match_name = g_match_info_fetch_named (match_info, "name"); + ret = NS_SITE; + goto end; } - else if ((found = (char *) gul_string_ascii_strcasestr (line, "<DT><H3"))) - { /* declare folder? */ - found = (char *) strstr(found+7, ">"); - if (!found) return NS_UNKNOWN; - g_string_assign (name, found+1); - g_string_truncate (name, gul_string_ascii_strcasestr (name->str, - "</H3>") - name->str); - g_free (line); - return NS_FOLDER; + g_match_info_free (match_info); + g_regex_unref (regex); + + /* check if it's a folder start */ + regex = g_regex_new ("<h3.*>(?P<name>\\w.*)</h3>", + G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY, NULL); + + g_regex_match (regex, line, 0, &match_info); + if (g_match_info_matches (match_info)) + { + match_name = g_match_info_fetch_named (match_info, "name"); + ret = NS_FOLDER; + goto end; } - else if ((found = (char *) gul_string_ascii_strcasestr (line, "</DL>"))) - { /* end folder? */ - g_free (line); - return NS_FOLDER_END; + g_match_info_free (match_info); + g_regex_unref (regex); + + /* check if it's a folder end */ + regex = g_regex_new ("</dl>", + G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY, NULL); + + g_regex_match (regex, line, 0, &match_info); + if (g_match_info_matches (match_info)) + { + ret = NS_FOLDER_END; + goto end; } + + /* now let's use the collected stuff */ + end: + /* Due to the goto we'll always have an unfreed @match_info and + * @regex. Note that this two free/unrefs correspond to the last + * if() block too. + */ + g_match_info_free (match_info); + g_regex_unref (regex); - g_free (line); - return NS_UNKNOWN; + if (match_name) + { + g_string_assign (name, match_name); + g_free (match_name); + } + + if (match_url) + { + g_string_assign (url, match_url); + g_free (match_url); + } + + g_free (line); + return ret; } /** |