aboutsummaryrefslogtreecommitdiffstats
path: root/camel
diff options
context:
space:
mode:
Diffstat (limited to 'camel')
-rw-r--r--camel/url-util.c402
-rw-r--r--camel/url-util.h17
2 files changed, 150 insertions, 269 deletions
diff --git a/camel/url-util.c b/camel/url-util.c
index f4b769aad8..d1ea09c6c9 100644
--- a/camel/url-util.c
+++ b/camel/url-util.c
@@ -1,11 +1,9 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* url-util.c : utility functions to parse URLs */
+
/*
- * This code is adapted form gzillaurl.c (http://www.gzilla.com)
- * Copyright (C) Raph Levien <raph@acm.org>
- *
- * Modifications by Bertrand Guiheneuf <Bertrand.Guiheneuf@inria.fr>
+ * Copyright (C) 1999 Bertrand Guiheneuf <Bertrand.Guiheneuf@inria.fr>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -25,7 +23,14 @@
-
+/*
+ Here we deal with URL following the general scheme:
+ protocol://user:password@host:port/name
+ where name is a path-like string (ie dir1/dir2/....)
+ See rfc1738 for the complete description of
+ Uniform Ressource Locators
+
+ Bertrand. */
#include <ctype.h> /* for isalpha */
@@ -35,292 +40,183 @@
-/**
- * g_url_is_absolute:
- * @url:
- *
- *
- *
- * Return value:
- **/
-gboolean
-g_url_is_absolute (const char *url)
-{
- gint i;
-
- for (i = 0; url[i] != '\0'; i++) {
- if (url[i] == ':')
- return TRUE;
- else if (!isalpha (url[i]))
- return FALSE;
- }
- return FALSE;
-}
-/**
- * g_url_match_method:
- * @url:
- * @method:
- *
- *
- *
- * Return value: TRUE if the method matches
- **/
-gboolean
-g_url_match_method (const char *url, const char *method)
+static gboolean
+find_protocol(GString *url, GString **protocol, guint *position, gboolean *error)
{
- gint i;
-
- for (i = 0; method[i] != '\0'; i++)
- if (url[i] != method[i]) return FALSE;
- return (url[i] == ':');
-}
-
-
-
-/**
- * g_url_add_slash:
- * @url:
- * @size_url:
- *
- * Add the trailing slash if necessary. Return FALSE if there isn't room
- *
- * Return value:
- **/
-gboolean
-g_url_add_slash (char *url, gint size_url)
-{
- char hostname[256];
- gint port;
- char *tail;
+ guint i;
+ gchar *str_url;
+ gint len_url;
+ gchar *str_protocol;
+
+ str_url = url->str;
+ len_url = url->len;
+
+ *protocol = NULL;
+ *error = FALSE;
+ i=*position;
+
+ /* find a ':' */
+ while ( (i<len_url) && (str_url[i] != ':') ) i++;
- if (g_url_match_method (url, "http") ||
- g_url_match_method (url, "ftp")) {
- tail = g_url_parse (url, hostname, sizeof(hostname), &port);
- if (tail == NULL)
+ if (i==len_url) return FALSE;
+ i++;
+
+ /* check if it is followed by a "//" */
+ if ((i<len_url) && (str_url[i++] == '/'))
+ if ((i<len_url) && (str_url[i++] == '/'))
+ {
+
+ str_protocol = g_strndup(str_url, i-3);
+ *protocol = g_string_new(str_protocol);
+ *position=i;
return TRUE;
- if (tail[0] == '\0') {
- if (strlen (url) + 1 == size_url)
- return FALSE;
- tail[0] = '/';
- tail[1] = '\0';
}
- }
- return TRUE;
+
+ return FALSE;
}
-/**
- * g_url_relative:
- * @base_url:
- * @relative_url:
- * @new_url:
- * @size_new_url:
- *
- *
- *
- * Return value:
- **/
-gboolean
-g_url_relative (const char *base_url,
- const char *relative_url,
- char *new_url,
- gint size_new_url)
+static gboolean
+find_user(GString *url, GString **user, guint *position, gboolean *error)
{
- gint i, j, k;
- gint num_dotdot;
+ guint i;
+ guint at_pos;
- if (base_url == NULL || g_url_is_absolute (relative_url)) {
- if (strlen (relative_url) >= size_new_url)
- return FALSE;
- strcpy (new_url, relative_url);
- return g_url_add_slash (new_url, size_new_url);
- }
+ gchar *str_url;
+ gint len_url;
+ gchar *str_user;
- /* Assure that we have enough room for at least the base URL. */
- if (strlen (base_url) >= size_new_url)
- return FALSE;
+ str_url = url->str;
+ len_url = url->len;
+
+ *user = NULL;
+ i=*position;
- /* Copy http://hostname:port/ from base_url to new_url */
- i = 0;
- if (g_url_match_method (base_url, "http") ||
- g_url_match_method (base_url, "ftp")) {
- while (base_url[i] != '\0' && base_url[i] != ':')
- new_url[i] = base_url[i++];
- if (base_url[i] != '\0')
- new_url[i] = base_url[i++];
- if (base_url[i] != '\0')
- new_url[i] = base_url[i++];
- if (base_url[i] != '\0')
- new_url[i] = base_url[i++];
- while (base_url[i] != '\0' && base_url[i] != '/')
- new_url[i] = base_url[i++];
- } else {
- while (base_url[i] != '\0' && base_url[i] != ':')
- new_url[i] = base_url[i++];
- if (base_url[i] != '\0')
- new_url[i] = base_url[i++];
- }
-
- if (relative_url[0] == '/') {
- if (i + strlen (relative_url) >= size_new_url)
- return FALSE;
- strcpy (new_url + i, relative_url);
- return g_url_add_slash (new_url, size_new_url);
- }
-
- /* At this point, i points to the first slash following the hostname
- (and port) in base_url. */
-
- /* Now, figure how many ..'s to follow. */
- num_dotdot = 0;
- j = 0;
- while (relative_url[j] != '\0') {
- if (relative_url[j] == '.' &&
- relative_url[j + 1] == '/') {
- j += 2;
- } else if (relative_url[j] == '.' &&
- relative_url[j + 1] == '.' &&
- relative_url[j + 2] == '/') {
- j += 3;
- num_dotdot++;
- } else {
- break;
- }
- }
-
- /* Find num_dotdot+1 slashes back from the end, point k there. */
-
- for (k = strlen (base_url); k > i && num_dotdot >= 0; k--)
- if (base_url[k - 1] == '/')
- num_dotdot--;
-
- if (k + 1 + strlen (relative_url) - j >= size_new_url)
- return FALSE;
-
- while (i < k)
- new_url[i] = base_url[i++];
- if (relative_url[0] == '#')
- while (base_url[i] != '\0')
- new_url[i] = base_url[i++];
- else if (base_url[i] == '/' || base_url[i] == '\0')
- new_url[i++] = '/';
- strcpy (new_url + i, relative_url + j);
- return g_url_add_slash (new_url, size_new_url);
-}
-
-
+ /* find a '@' */
+ while ((i<len_url) && (str_url[i] != '@')) i++;
+
+ if (i==len_url) return FALSE;
+ at_pos = i;
+ i = *position;
+ /* find a ':' */
+ while ( (i<at_pos) && (str_url[i] != ':') ) i++;
-/* Parse the url, packing the hostname and port into the arguments, and
- returning the suffix. Return NULL in case of failure. */
+ /* now if i has not been incremented at all, there is no user */
+ if (i == *position) return FALSE;
+
+ str_user = g_strndup(str_url+ *position, i - *position);
+ *user = g_string_new(str_user);
+ if (i<at_pos) *position=i+1; /* there was a ':', skip it */
+ else *position=i;
+
+ return TRUE;
-/**
- * g_url_parse:
- * @url:
- * @hostname:
- * @hostname_size:
- * @port:
- *
- *
- *
- * Return value:
- **/
-char *
-g_url_parse (char *url,
- char *hostname,
- gint hostname_size,
- int *port)
-{
- gint i, j;
- for (i = 0; url[i] != '\0' && url[i] != ':'; i++);
- if (url[i] != ':' || url[i + 1] != '/' || url[i + 2] != '/') return NULL;
- i += 3;
- for (j = i; url[j] != '\0' && url[j] != ':' && url[j] != '/'; j++);
- if (j - i >= hostname_size) return NULL;
- memcpy (hostname, url + i, j - i);
- hostname[j - i] = '\0';
- if (url[j] == ':') {
- *port = atoi (url + j + 1);
- for (j++; url[j] != '\0' && url[j] != '/'; j++);
- }
- return url + j;
+
}
+static gboolean
+find_passwd(GString *url, GString **passwd, guint *position, gboolean *error)
+{
+ guint i;
+
+ gchar *str_url;
+ gint len_url;
+ gchar *str_passwd;
+
+ str_url = url->str;
+ len_url = url->len;
+
+ *passwd = NULL;
+ i=*position;
+
+ /* find a '@' */
+ while ((i<len_url) && (str_url[i] != '@')) i++;
+
+ if (i==len_url) return FALSE;
+ /*i has not been incremented at all, there is no passwd */
+ if (i == *position) {
+ *position = i+1;
+ return FALSE;
+ }
+
+ str_passwd = g_strndup(str_url+ *position, i - *position);
+ *passwd = g_string_new(str_passwd);
+ *position=i+1; /* skip it the '@' */
+
+ return TRUE;
-
-#ifndef UNIT_TEST
-/* Parse "http://a/b#c" into "http://a/b" and "#c" (storing both as
- newly allocated strings into *p_head and *p_tail, respectively.
-
- Note: this routine allocates new strings for the subcomponents, so
- that there's no arbitrary restriction on sizes. That's the way I want
- all the URL functions to work eventually.
-*/
-void
-g_url_parse_hash (char **p_head, char **p_tail, const char *url)
-{
- gint i;
- /* todo: I haven't checked this for standards compliance. What's it
- supposed to do when there are two hashes? */
- for (i = 0; url[i] != '\0' && url[i] != '#'; i++);
- *p_tail = g_strdup (url + i);
- *p_head = g_new (char, i + 1);
- memcpy (*p_head, url, i);
- (*p_head)[i] = '\0';
}
-#endif
-
-#ifdef UNIT_TEST
-/* Unit test as follows:
-
- gcc -g -I/usr/local/include/gtk -DUNIT_TEST camelurl.c -o camelurl
- ./camelurl base_url relative_url
-
+/* to tests this file :
+ gcc -o test_url_util `glib-config --cflags` -DTEST_URL_UTIL url-util.c `glib-config --libs
+ ./test_url_util URL
*/
+#ifdef TEST_URL_UTIL
int
-main (int argc, char **argv)
+main (int argc, char **argv)
{
- char buf[80];
- char hostname[80];
- char *tail;
- int port;
+
+ GString *url;
+ GString *protocol;
+ GString *user;
+ GString *passwd;
+ guint position=0;
+ gboolean error;
+ gboolean found;
+ guint i;
+
+ url = g_string_new(argv[1]);
+ printf("URL to test : %s\n\n", url->str);
+
+ /* Try to find the protocol */
+ found = find_protocol(url, &protocol, &position, &error);
+ if (found) {
+ printf("protocol found : %s\n", protocol->str);
+ } else printf("protocol not found in URL\n\n");
+ printf("posistion of the next item:\n");
+ printf("%s\n", url->str);
+ for(i=0; i<position; i++) printf(" ");
+ printf("^\n");
+
+ /* Try to find the user name */
+ found = find_user(url, &user, &position, &error);
+ if (found) {
+ printf("name found : %s\n", user->str);
+ } else printf("user name not found in URL\n");
+ printf("posistion of the next item:\n");
+ printf("%s\n", url->str);
+ for(i=0; i<position; i++) printf(" ");
+ printf("^\n");
+
+ /* Try to find the password */
+ found = find_passwd(url, &passwd, &position, &error);
+ if (found) {
+ printf("passwd found : %s\n", passwd->str);
+ printf("\n");
+ } else printf("passwd not found in URL\n");
+ printf("posistion of the next item:\n");
+ printf("%s\n", url->str);
+ for(i=0; i<position; i++) printf(" ");
+ printf("^\n");
+
- if (argc == 3) {
- if (g_url_relative (argv[1], argv[2], buf, sizeof(buf))) {
- printf ("%s\n", buf);
- port = 80;
- tail = g_url_parse (buf, hostname, sizeof (hostname), &port);
- if (tail != NULL) {
- printf ("hostname = %s, port = %d, tail = %s\n", hostname, port, tail);
- }
- } else {
- printf ("buffer overflow!\n");
- }
- } else {
- printf ("Usage: %s base_url relative_url\n", argv[0]);
- }
return 0;
}
-#endif
-
-
-
-
-
+#endif /* TEST_URL_UTIL */
diff --git a/camel/url-util.h b/camel/url-util.h
index acb279422a..2c9218ea78 100644
--- a/camel/url-util.h
+++ b/camel/url-util.h
@@ -2,10 +2,7 @@
/* url-util.h : utility functions to parse URLs */
/*
- * This code is adapted form gzillaurl.h (http://www.gzilla.com)
- * Copyright (C) Raph Levien <raph@acm.org>
- *
- * Modifications by Bertrand Guiheneuf <Bertrand.Guiheneuf@inria.fr>
+ * Copyright (C) Bertrand Guiheneuf <Bertrand.Guiheneuf@inria.fr>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -35,18 +32,6 @@ extern "C" {
#endif /* __cplusplus */
-gboolean g_url_is_absolute (const char *url);
-gboolean g_url_match_method (const char *url, const char *method);
-gboolean g_url_relative (const char *base_url,
- const char *relative_url,
- char *new_url,
- gint size_new_url);
-char *g_url_parse (char *url,
- char *hostname,
- gint hostname_size,
- int *port);
-void g_url_parse_hash (char **p_head, char **p_tail, const char *url);
-
#ifdef __cplusplus
}
#endif /* __cplusplus */