/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* url-util.c : utility functions to parse URLs */ /* * Author : * Bertrand Guiheneuf <bertrand@helixcode.com> * * Copyright 1999, 2000 HelixCode (http://www.helixcode.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ /* Here we deal with URL following the general scheme: protocol://user:password@host:port/name where name is a path-like string (ie dir1/dir2/....) See rfc1738 for the complete description of Uniform Ressource Locators Bertrand. */ /* XXX TODO: recover the words between #'s or ?'s after the path */ #include <config.h> #include "url-util.h" /* general item finder */ /* it begins the search at position @position in @url, returns true when the item is found, amd set position after the item */ typedef gboolean find_item_func(const gchar *url, gchar **item, guint *position, gboolean *error); /* used to find one item (protocol, then user .... */ typedef struct { char *item_name; /* item name : for debug only */ gchar **item_value; /* where the item value will go */ find_item_func *find_func; /* item finder */ } FindStepStruct; static gboolean _find_protocol (const gchar *url, gchar **item, guint *position, gboolean *error); static gboolean _find_user (const gchar *url, gchar **item, guint *position, gboolean *error); static gboolean _find_passwd (const gchar *url, gchar **item, guint *position, gboolean *error); static gboolean _find_host (const gchar *url, gchar **item, guint *position, gboolean *error); static gboolean _find_port (const gchar *url, gchar **item, guint *position, gboolean *error); static gboolean _find_path (const gchar *url, gchar **item, guint *position, gboolean *error); /** * g_url_new: create an Gurl object from a string * * @url_string: The string containing the URL to scan * * This routine takes a gchar and parses it as an * URL of the form: * protocol://user:password@host:port/path * there is no test on the values. For example, * "port" can be a string, not only a number ! * The Gurl structure fields ar filled with * the scan results. When a member of the * general URL can not be found, the corresponding * Gurl member is NULL * Fields filled in the Gurl structure are allocated * and url_string is not modified. * * Return value: a Gurl structure containing the URL items. **/ Gurl *g_url_new (const gchar* url_string) { Gurl *g_url; gchar *protocol; gchar *user; gchar *passwd; gchar *host; gchar *port; gchar *path; guint position = 0; gboolean error; gboolean found; guint i; g_url = g_new (Gurl,1); #define NB_STEP_URL 6 { FindStepStruct step[NB_STEP_URL] = { { "protocol", &(g_url->protocol), _find_protocol}, { "user", &(g_url->user), _find_user}, { "password", &(g_url->passwd), _find_passwd}, { "host", &(g_url->host), _find_host}, { "port", &(g_url->port), _find_port}, { "path", &(g_url->path), _find_path} }; for (i = 0; i < NB_STEP_URL; i++) { found = step[i].find_func (url_string, step[i].item_value, &position, &error); } } return g_url; } void g_url_free (Gurl *url) { g_assert (url); g_free (url->protocol); g_free (url->user); g_free (url->passwd); g_free (url->host); g_free (url->port); g_free (url->path); g_free (url); } /**** PARSING FUNCTIONS ****/ /* So, yes, I must admit there would have been more elegant ways to do this, but it works, and quite well :) */ static gboolean _find_protocol (const gchar *url, gchar **item, guint *position, gboolean *error) { guint i; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; *error = FALSE; i = *position; /* find a ':' */ while ((i < len_url) && (url[i] != ':')) i++; if (i == len_url) return FALSE; i++; /* check if it is followed by a "//" */ if ((i < len_url) && (url[i++] == '/')) if ((i < len_url) && (url[i++] == '/')) { *item = g_strndup (url, i-3); *position = i; return TRUE; } return FALSE; } static gboolean _find_user (const gchar *url, gchar **item, guint *position, gboolean *error) { guint i; guint at_pos; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i = *position; /* find a '@' */ while ((i < len_url) && (url[i] != '@')) i++; if (i == len_url) return FALSE; at_pos = i; i = *position; /* find a ':' */ while ((i < at_pos) && (url[i] != ':')) i++; /* now if i has not been incremented at all, there is no user */ if (i == *position) { (*position)++; return FALSE; } *item = g_strndup (url+ *position, i - *position); if (i < at_pos) *position = i + 1; /* there was a ':', skip it */ else *position = i; return TRUE; } static gboolean _find_passwd (const gchar *url, gchar **item, guint *position, gboolean *error) { guint i; gint len_url; gchar *str_passwd; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i = *position; /* find a '@' */ while ((i < len_url) && (url[i] != '@')) i++; if (i == len_url) return FALSE; /*i has not been incremented at all, there is no passwd */ if (i == *position) { *position = i + 1; return FALSE; } *item = g_strndup (url + *position, i - *position); *position = i + 1; /* skip it the '@' */ return TRUE; } static gboolean _find_host (const gchar *url, gchar **item, guint *position, gboolean *error) { guint i; guint slash_pos; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i = *position; /* find a '/' */ while ((i < len_url) && (url[i] != '/')) i++; slash_pos = i; i = *position; /* find a ':' */ while ( (i < slash_pos) && (url[i] != ':') ) i++; /* at this point if i has not been incremented at all, there is no host */ if (i == *position) { /* if we have not met / or \0, we have : and must skip it */ if (i < slash_pos) (*position)++; return FALSE; } *item = g_strndup (url + *position, i - *position); if (i < slash_pos) *position = i + 1; /* there was a ':', skip it */ else *position=i; return TRUE; } static gboolean _find_port (const gchar *url, gchar **item, guint *position, gboolean *error) { guint i; guint slash_pos; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i=*position; /* find a '/' */ while ((i < len_url) && (url[i] != '/')) i++; slash_pos = i; i = *position; /* find a ':' */ while ((i < slash_pos) && (url[i] != ':')) i++; /* at this point if i has not been incremented at all, */ /* there is no port */ if (i == *position) return FALSE; *item = g_strndup (url+ *position, i - *position); *position = i; return TRUE; } static gboolean _find_path (const gchar *url, gchar **item, guint *position, gboolean *error) { guint i; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i = *position; /* find a '#' */ while ((i < len_url) && (url[i] != '#') && (url[i] != '?')) i++; /*i has not been incremented at all, there is no path */ if (i == *position) return FALSE; *item = g_strndup (url + *position, i - *position); *position=i; return TRUE; } /**** TEST ROUTINE - NOT COMPILED BY DEFAULT ****/ /* to tests this file : gcc -o test_url_util `glib-config --cflags` -I.. -DTEST_URL_UTIL url-util.c `glib-config --libs` ./test_url_util URL */ #ifdef TEST_URL_UTIL int main (int argc, char **argv) { gchar *url; gchar *protocol; gchar *user; gchar *passwd; gchar *host; gchar *port; gchar *path; guint position=0; gboolean error; gboolean found; guint i; guint i_pos; #define NB_STEP_TEST 6 FindStepStruct test_step[NB_STEP_TEST] = { { "protocol", &protocol, _find_protocol}, { "user", &user, _find_user}, { "password", &passwd, _find_passwd}, { "host", &host, _find_host}, { "port", &port, _find_port}, { "path", &path, _find_path} }; url = argv[1]; printf("URL to test : %s\n\n", url); for (i=0; i<NB_STEP_TEST; i++) { found = test_step[i].find_func (url, test_step[i].item_value, &position, &error); if (found) { printf ("\t\t\t\t** %s found : %s\n", test_step[i].item_name, *(test_step[i].item_value)); } else printf ("** %s not found in URL\n", test_step[i].item_name); printf ("next item position:\n"); printf ("%s\n", url); for (i_pos = 0; i_pos < position; i_pos++) printf (" "); printf ("^\n"); } } #endif /* TEST_URL_UTIL */