From f30be523d2cd3e066d110c4036bf1db5401f69bb Mon Sep 17 00:00:00 2001 From: bertrand Date: Fri, 23 Apr 1999 15:58:42 +0000 Subject: Utility functions to parse URLs. Stolen shamelessly from gzilla 1999-04-23 bertrand * camel/url-util.[ch]: Utility functions to parse URLs. Stolen shamelessly from gzilla (www.gzilla.com) written by Raph Levien * camel/Makefile.am: added url-util.[ch] compilation. svn path=/trunk/; revision=871 --- camel/url-util.c | 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100644 camel/url-util.c (limited to 'camel/url-util.c') diff --git a/camel/url-util.c b/camel/url-util.c new file mode 100644 index 0000000000..f4b769aad8 --- /dev/null +++ b/camel/url-util.c @@ -0,0 +1,326 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ +/* url-util.c : utility functions to parse URLs */ + +/* + * This code is adapted form gzillaurl.c (http://www.gzilla.com) + * Copyright (C) Raph Levien + * + * Modifications by Bertrand Guiheneuf + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ + + + + + + +#include /* for isalpha */ +#include /* for atoi */ + +#include "url-util.h" + + + +/** + * g_url_is_absolute: + * @url: + * + * + * + * Return value: + **/ +gboolean +g_url_is_absolute (const char *url) +{ + gint i; + + for (i = 0; url[i] != '\0'; i++) { + if (url[i] == ':') + return TRUE; + else if (!isalpha (url[i])) + return FALSE; + } + return FALSE; +} + + + +/** + * g_url_match_method: + * @url: + * @method: + * + * + * + * Return value: TRUE if the method matches + **/ +gboolean +g_url_match_method (const char *url, const char *method) +{ + gint i; + + for (i = 0; method[i] != '\0'; i++) + if (url[i] != method[i]) return FALSE; + return (url[i] == ':'); +} + + + + +/** + * g_url_add_slash: + * @url: + * @size_url: + * + * Add the trailing slash if necessary. Return FALSE if there isn't room + * + * Return value: + **/ +gboolean +g_url_add_slash (char *url, gint size_url) +{ + char hostname[256]; + gint port; + char *tail; + + if (g_url_match_method (url, "http") || + g_url_match_method (url, "ftp")) { + tail = g_url_parse (url, hostname, sizeof(hostname), &port); + if (tail == NULL) + return TRUE; + if (tail[0] == '\0') { + if (strlen (url) + 1 == size_url) + return FALSE; + tail[0] = '/'; + tail[1] = '\0'; + } + } + return TRUE; +} + + + + +/** + * g_url_relative: + * @base_url: + * @relative_url: + * @new_url: + * @size_new_url: + * + * + * + * Return value: + **/ +gboolean +g_url_relative (const char *base_url, + const char *relative_url, + char *new_url, + gint size_new_url) +{ + gint i, j, k; + gint num_dotdot; + + if (base_url == NULL || g_url_is_absolute (relative_url)) { + if (strlen (relative_url) >= size_new_url) + return FALSE; + strcpy (new_url, relative_url); + return g_url_add_slash (new_url, size_new_url); + } + + /* Assure that we have enough room for at least the base URL. */ + if (strlen (base_url) >= size_new_url) + return FALSE; + + /* Copy http://hostname:port/ from base_url to new_url */ + i = 0; + if (g_url_match_method (base_url, "http") || + g_url_match_method (base_url, "ftp")) { + while (base_url[i] != '\0' && base_url[i] != ':') + new_url[i] = base_url[i++]; + if (base_url[i] != '\0') + new_url[i] = base_url[i++]; + if (base_url[i] != '\0') + new_url[i] = base_url[i++]; + if (base_url[i] != '\0') + new_url[i] = base_url[i++]; + while (base_url[i] != '\0' && base_url[i] != '/') + new_url[i] = base_url[i++]; + } else { + while (base_url[i] != '\0' && base_url[i] != ':') + new_url[i] = base_url[i++]; + if (base_url[i] != '\0') + new_url[i] = base_url[i++]; + } + + if (relative_url[0] == '/') { + if (i + strlen (relative_url) >= size_new_url) + return FALSE; + strcpy (new_url + i, relative_url); + return g_url_add_slash (new_url, size_new_url); + } + + /* At this point, i points to the first slash following the hostname + (and port) in base_url. */ + + /* Now, figure how many ..'s to follow. */ + num_dotdot = 0; + j = 0; + while (relative_url[j] != '\0') { + if (relative_url[j] == '.' && + relative_url[j + 1] == '/') { + j += 2; + } else if (relative_url[j] == '.' && + relative_url[j + 1] == '.' && + relative_url[j + 2] == '/') { + j += 3; + num_dotdot++; + } else { + break; + } + } + + /* Find num_dotdot+1 slashes back from the end, point k there. */ + + for (k = strlen (base_url); k > i && num_dotdot >= 0; k--) + if (base_url[k - 1] == '/') + num_dotdot--; + + if (k + 1 + strlen (relative_url) - j >= size_new_url) + return FALSE; + + while (i < k) + new_url[i] = base_url[i++]; + if (relative_url[0] == '#') + while (base_url[i] != '\0') + new_url[i] = base_url[i++]; + else if (base_url[i] == '/' || base_url[i] == '\0') + new_url[i++] = '/'; + strcpy (new_url + i, relative_url + j); + return g_url_add_slash (new_url, size_new_url); +} + + + + + +/* Parse the url, packing the hostname and port into the arguments, and + returning the suffix. Return NULL in case of failure. */ + +/** + * g_url_parse: + * @url: + * @hostname: + * @hostname_size: + * @port: + * + * + * + * Return value: + **/ +char * +g_url_parse (char *url, + char *hostname, + gint hostname_size, + int *port) +{ + gint i, j; + + for (i = 0; url[i] != '\0' && url[i] != ':'; i++); + if (url[i] != ':' || url[i + 1] != '/' || url[i + 2] != '/') return NULL; + i += 3; + for (j = i; url[j] != '\0' && url[j] != ':' && url[j] != '/'; j++); + if (j - i >= hostname_size) return NULL; + memcpy (hostname, url + i, j - i); + hostname[j - i] = '\0'; + if (url[j] == ':') { + *port = atoi (url + j + 1); + for (j++; url[j] != '\0' && url[j] != '/'; j++); + } + return url + j; +} + + + + +#ifndef UNIT_TEST +/* Parse "http://a/b#c" into "http://a/b" and "#c" (storing both as + newly allocated strings into *p_head and *p_tail, respectively. + + Note: this routine allocates new strings for the subcomponents, so + that there's no arbitrary restriction on sizes. That's the way I want + all the URL functions to work eventually. +*/ +void +g_url_parse_hash (char **p_head, char **p_tail, const char *url) +{ + gint i; + + /* todo: I haven't checked this for standards compliance. What's it + supposed to do when there are two hashes? */ + + for (i = 0; url[i] != '\0' && url[i] != '#'; i++); + *p_tail = g_strdup (url + i); + *p_head = g_new (char, i + 1); + memcpy (*p_head, url, i); + (*p_head)[i] = '\0'; +} +#endif + + + + + +#ifdef UNIT_TEST +/* Unit test as follows: + + gcc -g -I/usr/local/include/gtk -DUNIT_TEST camelurl.c -o camelurl + ./camelurl base_url relative_url + +*/ + +int +main (int argc, char **argv) +{ + char buf[80]; + char hostname[80]; + char *tail; + int port; + + if (argc == 3) { + if (g_url_relative (argv[1], argv[2], buf, sizeof(buf))) { + printf ("%s\n", buf); + port = 80; + tail = g_url_parse (buf, hostname, sizeof (hostname), &port); + if (tail != NULL) { + printf ("hostname = %s, port = %d, tail = %s\n", hostname, port, tail); + } + } else { + printf ("buffer overflow!\n"); + } + } else { + printf ("Usage: %s base_url relative_url\n", argv[0]); + } + return 0; +} +#endif + + + + + + -- cgit v1.2.3