/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* url-util.c : utility functions to parse URLs */ /* * This code is adapted form gzillaurl.c (http://www.gzilla.com) * Copyright (C) Raph Levien * * Modifications by Bertrand Guiheneuf * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ #include /* for isalpha */ #include /* for atoi */ #include "url-util.h" /** * g_url_is_absolute: * @url: * * * * Return value: **/ gboolean g_url_is_absolute (const char *url) { gint i; for (i = 0; url[i] != '\0'; i++) { if (url[i] == ':') return TRUE; else if (!isalpha (url[i])) return FALSE; } return FALSE; } /** * g_url_match_method: * @url: * @method: * * * * Return value: TRUE if the method matches **/ gboolean g_url_match_method (const char *url, const char *method) { gint i; for (i = 0; method[i] != '\0'; i++) if (url[i] != method[i]) return FALSE; return (url[i] == ':'); } /** * g_url_add_slash: * @url: * @size_url: * * Add the trailing slash if necessary. Return FALSE if there isn't room * * Return value: **/ gboolean g_url_add_slash (char *url, gint size_url) { char hostname[256]; gint port; char *tail; if (g_url_match_method (url, "http") || g_url_match_method (url, "ftp")) { tail = g_url_parse (url, hostname, sizeof(hostname), &port); if (tail == NULL) return TRUE; if (tail[0] == '\0') { if (strlen (url) + 1 == size_url) return FALSE; tail[0] = '/'; tail[1] = '\0'; } } return TRUE; } /** * g_url_relative: * @base_url: * @relative_url: * @new_url: * @size_new_url: * * * * Return value: **/ gboolean g_url_relative (const char *base_url, const char *relative_url, char *new_url, gint size_new_url) { gint i, j, k; gint num_dotdot; if (base_url == NULL || g_url_is_absolute (relative_url)) { if (strlen (relative_url) >= size_new_url) return FALSE; strcpy (new_url, relative_url); return g_url_add_slash (new_url, size_new_url); } /* Assure that we have enough room for at least the base URL. */ if (strlen (base_url) >= size_new_url) return FALSE; /* Copy http://hostname:port/ from base_url to new_url */ i = 0; if (g_url_match_method (base_url, "http") || g_url_match_method (base_url, "ftp")) { while (base_url[i] != '\0' && base_url[i] != ':') new_url[i] = base_url[i++]; if (base_url[i] != '\0') new_url[i] = base_url[i++]; if (base_url[i] != '\0') new_url[i] = base_url[i++]; if (base_url[i] != '\0') new_url[i] = base_url[i++]; while (base_url[i] != '\0' && base_url[i] != '/') new_url[i] = base_url[i++]; } else { while (base_url[i] != '\0' && base_url[i] != ':') new_url[i] = base_url[i++]; if (base_url[i] != '\0') new_url[i] = base_url[i++]; } if (relative_url[0] == '/') { if (i + strlen (relative_url) >= size_new_url) return FALSE; strcpy (new_url + i, relative_url); return g_url_add_slash (new_url, size_new_url); } /* At this point, i points to the first slash following the hostname (and port) in base_url. */ /* Now, figure how many ..'s to follow. */ num_dotdot = 0; j = 0; while (relative_url[j] != '\0') { if (relative_url[j] == '.' && relative_url[j + 1] == '/') { j += 2; } else if (relative_url[j] == '.' && relative_url[j + 1] == '.' && relative_url[j + 2] == '/') { j += 3; num_dotdot++; } else { break; } } /* Find num_dotdot+1 slashes back from the end, point k there. */ for (k = strlen (base_url); k > i && num_dotdot >= 0; k--) if (base_url[k - 1] == '/') num_dotdot--; if (k + 1 + strlen (relative_url) - j >= size_new_url) return FALSE; while (i < k) new_url[i] = base_url[i++]; if (relative_url[0] == '#') while (base_url[i] != '\0') new_url[i] = base_url[i++]; else if (base_url[i] == '/' || base_url[i] == '\0') new_url[i++] = '/'; strcpy (new_url + i, relative_url + j); return g_url_add_slash (new_url, size_new_url); } /* Parse the url, packing the hostname and port into the arguments, and returning the suffix. Return NULL in case of failure. */ /** * g_url_parse: * @url: * @hostname: * @hostname_size: * @port: * * * * Return value: **/ char * g_url_parse (char *url, char *hostname, gint hostname_size, int *port) { gint i, j; for (i = 0; url[i] != '\0' && url[i] != ':'; i++); if (url[i] != ':' || url[i + 1] != '/' || url[i + 2] != '/') return NULL; i += 3; for (j = i; url[j] != '\0' && url[j] != ':' && url[j] != '/'; j++); if (j - i >= hostname_size) return NULL; memcpy (hostname, url + i, j - i); hostname[j - i] = '\0'; if (url[j] == ':') { *port = atoi (url + j + 1); for (j++; url[j] != '\0' && url[j] != '/'; j++); } return url + j; } #ifndef UNIT_TEST /* Parse "http://a/b#c" into "http://a/b" and "#c" (storing both as newly allocated strings into *p_head and *p_tail, respectively. Note: this routine allocates new strings for the subcomponents, so that there's no arbitrary restriction on sizes. That's the way I want all the URL functions to work eventually. */ void g_url_parse_hash (char **p_head, char **p_tail, const char *url) { gint i; /* todo: I haven't checked this for standards compliance. What's it supposed to do when there are two hashes? */ for (i = 0; url[i] != '\0' && url[i] != '#'; i++); *p_tail = g_strdup (url + i); *p_head = g_new (char, i + 1); memcpy (*p_head, url, i); (*p_head)[i] = '\0'; } #endif #ifdef UNIT_TEST /* Unit test as follows: gcc -g -I/usr/local/include/gtk -DUNIT_TEST camelurl.c -o camelurl ./camelurl base_url relative_url */ int main (int argc, char **argv) { char buf[80]; char hostname[80]; char *tail; int port; if (argc == 3) { if (g_url_relative (argv[1], argv[2], buf, sizeof(buf))) { printf ("%s\n", buf); port = 80; tail = g_url_parse (buf, hostname, sizeof (hostname), &port); if (tail != NULL) { printf ("hostname = %s, port = %d, tail = %s\n", hostname, port, tail); } } else { printf ("buffer overflow!\n"); } } else { printf ("Usage: %s base_url relative_url\n", argv[0]); } return 0; } #endif