Rewrite the bookmark HTML importer, it now uses GRegex, this is bug #533986.

svn path=/branches/gnome-2-24/; revision=8358
author: Diego Escalante Urrelo <diegoe@src.gnome.org> 2008-07-25 04:42:01 +0800
committer: Diego Escalante Urrelo <diegoe@src.gnome.org> 2008-07-25 04:42:01 +0800
commit: 4b20227170765a0cc0ccebe51978a831d327f340 (patch)
tree: 00c5db37d2d5cd6244890090087cd1027e7161d3
parent: db3a1b5b080036efcee8477ff474d1452862e321 (diff)
download: gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar
gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.gz
gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.bz2
gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.lz
gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.xz
gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.zst
gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.zip
1 files changed, 74 insertions, 56 deletions
diff --git a/src/bookmarks/ephy-bookmarks-import.c b/src/bookmarks/ephy-bookmarks-import.c
index 3e7096679..412223a6f 100644
--- a/src/bookmarks/ephy-bookmarks-import.c
+++ b/src/bookmarks/ephy-bookmarks-import.c
@@ -524,30 +524,6 @@ gul_general_read_line_from_file (FILE *f)
 	return line;
 }
 
-static const gchar *
-gul_string_ascii_strcasestr (const gchar *a, const gchar *b)
-{
-	gchar *down_a;
-	gchar *down_b;
-	gchar *ptr;
-
-	/* copy and lower case the strings */
-	down_a = g_strdup (a);
-	down_b = g_strdup (b);
-	g_ascii_strdown (down_a, -1);
-	g_ascii_strdown (down_b, -1);
-
-	/* compare */
-	ptr = strstr (down_a, down_b);
-
-	/* free allocated strings */
-	g_free (down_a);
-	g_free (down_b);
-
-	/* return result of comparison */
-	return ptr == NULL ? NULL : (a + (ptr - down_a));
-}
-
 /**
  * Parses a line of a mozilla/netscape bookmark file. File must be open.
  */
@@ -556,44 +532,86 @@ static NSItemType
 ns_get_bookmark_item (FILE *f, GString *name, GString *url)
 {
 	char *line = NULL;
-	char *found;
+	GRegex *regex;
+	GMatchInfo *match_info;
+	int ret = NS_UNKNOWN;
+	char *match_url = NULL;
+	char *match_name = NULL;
 
 	line = gul_general_read_line_from_file (f);
-
-	if ((found = (char *) gul_string_ascii_strcasestr (line, "<A HREF=")))
-	{  /* declare site? */
-		g_string_assign (url, found+9);  /* url=URL+ ADD_DATE ... */
-		g_string_truncate (url, strstr(url->str, "\"")-url->str);
-		found = (char *) strstr (found+9+url->len, "\">");
-		if (!found)
-		{
-			g_free (line);
-			return NS_UNKNOWN;
-		}
-		g_string_assign (name, found+2);
-		g_string_truncate (name, gul_string_ascii_strcasestr (name->str,
-						       "</A>")-name->str);
-		g_free (line);
-		return NS_SITE;
+	
+	/*
+	 * Regex parsing of the html file:
+	 * 1. check if it's a bookmark, or a folder, or the end of a folder,
+	 * note that only ONE of this things is going to happen
+	 * 2. assign to the GStrings
+	 * 3. return the ret val to tell our caller what we found, by default 
+	 * we don't know (NS_UNKWOWN).
+	 */
+	
+	/* check if it's a bookmark */
+	regex = g_regex_new ("<a href=\"(?P<url>\\w.*)\".*>(?P<name>\\w.*)</a>", 
+				G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY, NULL);
+	g_regex_match (regex, line, 0, &match_info);
+	
+	if (g_match_info_matches (match_info))
+	{
+		match_url = g_match_info_fetch_named (match_info, "url");
+		match_name = g_match_info_fetch_named (match_info, "name");
+		ret = NS_SITE;
+		goto end;
 	}
-	else if ((found = (char *) gul_string_ascii_strcasestr (line, "<DT><H3")))
-	{ /* declare folder? */
-		found = (char *) strstr(found+7, ">");
-		if (!found) return NS_UNKNOWN;
-		g_string_assign (name, found+1);
-		g_string_truncate (name, gul_string_ascii_strcasestr (name->str,
-				   "</H3>") - name->str);
-		g_free (line);
-		return NS_FOLDER;
+	g_match_info_free (match_info);
+	g_regex_unref (regex);
+	
+	/* check if it's a folder start */
+	regex = g_regex_new ("<h3.*>(?P<name>\\w.*)</h3>", 
+				G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY, NULL);
+
+	g_regex_match (regex, line, 0, &match_info);
+	if (g_match_info_matches (match_info))
+	{
+		match_name = g_match_info_fetch_named (match_info, "name");
+		ret = NS_FOLDER;
+		goto end;
 	}
-	else if ((found = (char *) gul_string_ascii_strcasestr (line, "</DL>")))
-	{     /* end folder? */
-		g_free (line);
-		return NS_FOLDER_END;
+	g_match_info_free (match_info);
+	g_regex_unref (regex);
+	
+	/* check if it's a folder end */
+	regex = g_regex_new ("</dl>", 
+				G_REGEX_CASELESS, G_REGEX_MATCH_NOTEMPTY, NULL);
+
+	g_regex_match (regex, line, 0, &match_info);
+	if (g_match_info_matches (match_info))
+	{
+		ret = NS_FOLDER_END;
+		goto end;
 	}
+	
+	/* now let's use the collected stuff */
+	end:
+		/* Due to the goto we'll always have an unfreed @match_info and 
+		 * @regex. Note that this two free/unrefs correspond to the last 
+		 * if() block too.
+		 */
+        g_match_info_free (match_info);
+		g_regex_unref (regex);
 
-	g_free (line);
-	return NS_UNKNOWN;
+		if (match_name)
+		{
+			g_string_assign (name, match_name);
+			g_free (match_name);
+		}
+
+		if (match_url)
+		{
+			g_string_assign (url, match_url);
+			g_free (match_url);
+		}
+
+		g_free (line);
+		return ret;
 }
 
 /**
author	Diego Escalante Urrelo <diegoe@src.gnome.org>	2008-07-25 04:42:01 +0800
committer	Diego Escalante Urrelo <diegoe@src.gnome.org>	2008-07-25 04:42:01 +0800
commit	4b20227170765a0cc0ccebe51978a831d327f340 (patch)
tree	00c5db37d2d5cd6244890090087cd1027e7161d3
parent	db3a1b5b080036efcee8477ff474d1452862e321 (diff)
download	gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.gz gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.bz2 gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.lz gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.xz gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.tar.zst gsoc2013-epiphany-4b20227170765a0cc0ccebe51978a831d327f340.zip