From bebba4972777e1dde7e7087cb27dc75b83a3c38a Mon Sep 17 00:00:00 2001 From: Xavier Claessens Date: Mon, 1 Dec 2008 10:21:55 +0000 Subject: Use GRegex instead of custom code and use a new regex to detect URIs svn path=/trunk/; revision=1932 --- libempathy/empathy-utils.c | 126 --------------------------------------------- libempathy/empathy-utils.h | 19 +------ 2 files changed, 2 insertions(+), 143 deletions(-) (limited to 'libempathy') diff --git a/libempathy/empathy-utils.c b/libempathy/empathy-utils.c index b5bdb9ff7..671595e68 100644 --- a/libempathy/empathy-utils.c +++ b/libempathy/empathy-utils.c @@ -28,7 +28,6 @@ #include #include #include -#include #include @@ -44,8 +43,6 @@ #define DEBUG_FLAG EMPATHY_DEBUG_OTHER #include "empathy-debug.h" -static void regex_init (void); - gchar * empathy_substring (const gchar *str, gint start, @@ -54,129 +51,6 @@ empathy_substring (const gchar *str, return g_strndup (str + start, end - start); } -/* - * Regular Expression code to match urls. - */ -#define APTCHARS "-A-Za-z0-9,-." -#define USERCHARS "-A-Za-z0-9" -#define PASSCHARS "-A-Za-z0-9,?;.:/!%$^*&~\"#'" -#define HOSTCHARS "-A-Za-z0-9_" -#define PATHCHARS "-A-Za-z0-9_$.+!*(),;:@&=?/~#%" -#define SCHEME "(news:|telnet:|nntp:|file:/|https?:|ftps?:|webcal:)" -#define USER "[" USERCHARS "]+(:["PASSCHARS "]+)?" -#define URLPATH "/[" PATHCHARS "]*[^]'.}>) \t\r\n,\\\"]" - -static regex_t dingus[EMPATHY_REGEX_ALL]; - -static void -regex_init (void) -{ - static gboolean inited = FALSE; - const gchar *expression; - gint i; - - if (inited) { - return; - } - - for (i = 0; i < EMPATHY_REGEX_ALL; i++) { - switch (i) { - case EMPATHY_REGEX_AS_IS: - expression = - SCHEME "//(" USER "@)?[" HOSTCHARS ".]+" - "(:[0-9]+)?(" URLPATH ")?"; - break; - case EMPATHY_REGEX_BROWSER: - expression = - "(www|ftp)[" HOSTCHARS "]*\\.[" HOSTCHARS ".]+" - "(:[0-9]+)?(" URLPATH ")?"; - break; - case EMPATHY_REGEX_APT: - expression = - "apt://[" APTCHARS "]*"; - break; - case EMPATHY_REGEX_EMAIL: - expression = - "(mailto:)?[a-z0-9][a-z0-9._-]*@[a-z0-9]" - "[a-z0-9-]*(\\.[a-z0-9][a-z0-9-]*)+"; - break; - case EMPATHY_REGEX_OTHER: - expression = - "news:[-A-Z\\^_a-z{|}~!\"#$%&'()*+,./0-9;:=?`]+" - "@[" HOSTCHARS ".]+(:[0-9]+)?"; - break; - default: - /* Silence the compiler. */ - expression = NULL; - continue; - } - - memset (&dingus[i], 0, sizeof (regex_t)); - regcomp (&dingus[i], expression, REG_EXTENDED | REG_ICASE); - } - - inited = TRUE; -} - -gint -empathy_regex_match (EmpathyRegExType type, - const gchar *msg, - GArray *start, - GArray *end) -{ - regmatch_t matches[1]; - gint ret = 0; - gint num_matches = 0; - gint offset = 0; - gint i; - - g_return_val_if_fail (type >= 0 || type <= EMPATHY_REGEX_ALL, 0); - - regex_init (); - - while (!ret && type != EMPATHY_REGEX_ALL) { - ret = regexec (&dingus[type], msg + offset, 1, matches, 0); - if (ret == 0) { - gint s; - - num_matches++; - - s = matches[0].rm_so + offset; - offset = matches[0].rm_eo + offset; - - g_array_append_val (start, s); - g_array_append_val (end, offset); - } - } - - if (type != EMPATHY_REGEX_ALL) { - DEBUG ("Found %d matches for regex type:%d", num_matches, type); - return num_matches; - } - - /* If EMPATHY_REGEX_ALL then we run ALL regex's on the string. */ - for (i = 0; i < EMPATHY_REGEX_ALL; i++, ret = 0) { - while (!ret) { - ret = regexec (&dingus[i], msg + offset, 1, matches, 0); - if (ret == 0) { - gint s; - - num_matches++; - - s = matches[0].rm_so + offset; - offset = matches[0].rm_eo + offset; - - g_array_append_val (start, s); - g_array_append_val (end, offset); - } - } - } - - DEBUG ("Found %d matches for ALL regex types", num_matches); - - return num_matches; -} - gint empathy_strcasecmp (const gchar *s1, const gchar *s2) diff --git a/libempathy/empathy-utils.h b/libempathy/empathy-utils.h index a320c6246..51ddd231f 100644 --- a/libempathy/empathy-utils.h +++ b/libempathy/empathy-utils.h @@ -37,30 +37,15 @@ #include "empathy-contact.h" -G_BEGIN_DECLS - #define EMPATHY_GET_PRIV(obj,type) ((type##Priv*) ((type*)obj)->priv) #define G_STR_EMPTY(x) ((x) == NULL || (x)[0] == '\0') -typedef enum { - EMPATHY_REGEX_AS_IS, - EMPATHY_REGEX_BROWSER, - EMPATHY_REGEX_APT, - EMPATHY_REGEX_EMAIL, - EMPATHY_REGEX_OTHER, - EMPATHY_REGEX_ALL, -} EmpathyRegExType; +G_BEGIN_DECLS -/* Regular expressions */ +/* Strings */ gchar * empathy_substring (const gchar *str, gint start, gint end); -gint empathy_regex_match (EmpathyRegExType type, - const gchar *msg, - GArray *start, - GArray *end); - -/* Strings */ gint empathy_strcasecmp (const gchar *s1, const gchar *s2); gint empathy_strncasecmp (const gchar *s1, -- cgit v1.2.3