/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* url-util.c : utility functions to parse URLs */ /* * Copyright (C) 1999 Bertrand Guiheneuf <Bertrand.Guiheneuf@inria.fr> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ /* Here we deal with URL following the general scheme: protocol://user:password@host:port/name where name is a path-like string (ie dir1/dir2/....) See rfc1738 for the complete description of Uniform Ressource Locators Bertrand. */ /* XXX TODO: recover the words between #'s or ?'s after the path */ #include <config.h> #include "url-util.h" /* general item finder */ /* it begins the search at position @position in @url, returns true when the item is found, amd set position after the item */ typedef gboolean find_item_func(gchar *url, gchar **item, guint *position, gboolean *error); /* used to find one item (protocol, then user .... */ typedef struct { char *item_name; /* item name : for debug only */ gchar **item_value; /* where the item value will go */ find_item_func *find_func; /* item finder */ } FindStepStruct; static gboolean find_protocol (gchar *url, gchar **item, guint *position, gboolean *error); static gboolean find_user (gchar *url, gchar **item, guint *position, gboolean *error); static gboolean find_passwd (gchar *url, gchar **item, guint *position, gboolean *error); static gboolean find_host (gchar *url, gchar **item, guint *position, gboolean *error); static gboolean find_port (gchar *url, gchar **item, guint *position, gboolean *error); static gboolean find_path (gchar *url, gchar **item, guint *position, gboolean *error); /** * g_url_new: create an Gurl object from a string * * @url_string: The string containing the URL to scan * * This routine takes a gchar and parses it as an * URL of the form: * protocol://user:password@host:port/path * there is no test on the values. For example, * "port" can be a string, not only a number ! * The Gurl structure fields ar filled with * the scan results. When a member of the * general URL can not be found, the corresponding * Gurl member is NULL * Fields filled in the Gurl structure are allocated * and url_string is not modified. * * Return value: a Gurl structure containing the URL items. **/ Gurl *g_url_new (gchar* url_string) { Gurl *g_url; gchar *protocol; gchar *user; gchar *passwd; gchar *host; gchar *port; gchar *path; guint position=0; gboolean error; gboolean found; guint i; g_url = g_new(Gurl,1); #define NB_STEP_URL 6 { FindStepStruct step[NB_STEP_URL] = { { "protocol", &(g_url->protocol), find_protocol}, { "user", &(g_url->user), find_user}, { "password", &(g_url->passwd), find_passwd}, { "host", &(g_url->host), find_host}, { "port", &(g_url->port), find_port}, { "path", &(g_url->path), find_path} }; for (i=0; i<NB_STEP_URL; i++) { found = step[i].find_func(url_string, step[i].item_value, &position, &error); } } return g_url; } /** So, yes, I must admit there would have been more elegant ways to do this, but it works, and quite well :) */ static gboolean find_protocol(gchar *url, gchar **item, guint *position, gboolean *error) { guint i; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; *error = FALSE; i=*position; /* find a ':' */ while ( (i<len_url) && (url[i] != ':') ) i++; if (i==len_url) return FALSE; i++; /* check if it is followed by a "//" */ if ((i<len_url) && (url[i++] == '/')) if ((i<len_url) && (url[i++] == '/')) { *item = g_strndup (url, i-3); *position=i; return TRUE; } return FALSE; } static gboolean find_user(gchar *url, gchar **item, guint *position, gboolean *error) { guint i; guint at_pos; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i=*position; /* find a '@' */ while ((i<len_url) && (url[i] != '@')) i++; if (i==len_url) return FALSE; at_pos = i; i = *position; /* find a ':' */ while ( (i<at_pos) && (url[i] != ':') ) i++; /* now if i has not been incremented at all, there is no user */ if (i == *position) { (*position)++; return FALSE; } *item = g_strndup(url+ *position, i - *position); if (i<at_pos) *position=i+1; /* there was a ':', skip it */ else *position=i; return TRUE; } static gboolean find_passwd(gchar *url, gchar **item, guint *position, gboolean *error) { guint i; gint len_url; gchar *str_passwd; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i=*position; /* find a '@' */ while ((i<len_url) && (url[i] != '@')) i++; if (i==len_url) return FALSE; /*i has not been incremented at all, there is no passwd */ if (i == *position) { *position = i+1; return FALSE; } *item = g_strndup (url + *position, i - *position); *position=i+1; /* skip it the '@' */ return TRUE; } static gboolean find_host(gchar *url, gchar **item, guint *position, gboolean *error) { guint i; guint slash_pos; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i=*position; /* find a '/' */ while ((i<len_url) && (url[i] != '/')) i++; slash_pos = i; i = *position; /* find a ':' */ while ( (i<slash_pos) && (url[i] != ':') ) i++; /* at this point if i has not been incremented at all, there is no host */ if (i == *position) { (*position)++; return FALSE; } *item = g_strndup (url + *position, i - *position); if (i<slash_pos) *position=i+1; /* there was a ':', skip it */ else *position=i; return TRUE; } static gboolean find_port(gchar *url, gchar **item, guint *position, gboolean *error) { guint i; guint slash_pos; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i=*position; /* find a '/' */ while ((i<len_url) && (url[i] != '/')) i++; slash_pos = i; i = *position; /* find a ':' */ while ( (i<slash_pos) && (url[i] != ':') ) i++; /* at this point if i has not been incremented at all, */ /* there is no port */ if (i == *position) return FALSE; *item = g_strndup(url+ *position, i - *position); *position = i; return TRUE; } static gboolean find_path(gchar *url, gchar **item, guint *position, gboolean *error) { guint i; gint len_url; g_assert (url); g_assert (item); g_assert (position); len_url = strlen (url); *item = NULL; i=*position; /* find a '#' */ while ((i<len_url) && (url[i] != '#') && (url[i] != '?')) i++; /*i has not been incremented at all, there is no path */ if (i == *position) return FALSE; *item = g_strndup(url + *position, i - *position); *position=i; return TRUE; } /* to tests this file : gcc -o test_url_util `glib-config --cflags` -I.. -DTEST_URL_UTIL url-util.c `glib-config --libs` ./test_url_util URL */ #ifdef TEST_URL_UTIL int main (int argc, char **argv) { gchar *url; gchar *protocol; gchar *user; gchar *passwd; gchar *host; gchar *port; gchar *path; guint position=0; gboolean error; gboolean found; guint i; guint i_pos; #define NB_STEP_TEST 6 FindStepStruct test_step[NB_STEP_TEST] = { { "protocol", &protocol, find_protocol}, { "user", &user, find_user}, { "password", &passwd, find_passwd}, { "host", &host, find_host}, { "port", &port, find_port}, { "path", &path, find_path} }; url = argv[1]; printf("URL to test : %s\n\n", url); for (i=0; i<NB_STEP_TEST; i++) { found = test_step[i].find_func (url, test_step[i].item_value, &position, &error); if (found) { printf("\t\t\t\t** %s found : %s\n", test_step[i].item_name, *(test_step[i].item_value)); } else printf("** %s not found in URL\n", test_step[i].item_name); printf("next item position:\n"); printf("%s\n", url); for(i_pos=0; i_pos<position; i_pos++) printf(" "); printf("^\n"); } } #endif /* TEST_URL_UTIL */