aboutsummaryrefslogblamecommitdiffstats
path: root/camel/camel-url.c
blob: 75b7ac985e4c4ad16f1798d6e438f25a32d122fc (plain) (tree)
1
2
3
4
5
6
7
8
9







                                                                           
                                        


















                                                                      
                   


                  












                                                                  
                                                                                                                                     










                                                                    


                                                                    






                                                               
                











                                                                              













                                                                                             

                                             
                                   
                                                         
                                                     

                                          











                                                                 
                                          
                                                                            
                                                       
                        








                                                                       
                                                         





                                                                      
                                             





















                                                                                      
                                             


                                                  
                                             





                                     
                                     






                                                         


                                                            

                      
                      
                                                                  
                          
                                                                         
                                       
                                                                    
                      
                                                                



                                                                  
                      
                                                                 
 
                                                                     
                                              








                                                         
                                     







                                                  















                               

 











                                                                         





















                                                                            






                                                                   
   

                             















                                                                          

           
                               









                                       




                                      
                        
        



                    
                                






                                     




                                    




                                                  
        


                                                          



                                                  
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* camel-url.c : utility functions to parse URLs */


/* 
 * Authors:
 *  Bertrand Guiheneuf <bertrand@helixcode.com>
 *  Dan Winship <danw@helixcode.com>
 *  Tiago Antào <tiagoantao@bigfoot.com>
 *
 * Copyright 1999, 2000 Helix Code, Inc. (http://www.helixcode.com)
 *
 * This program is free software; you can redistribute it and/or 
 * modify it under the terms of the GNU General Public License as 
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */

#include <config.h>

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "camel-url.h"
#include "camel-exception.h"

/**
 * camel_url_new: create a CamelURL object from a string
 * @url_string: The string containing the URL to scan
 * 
 * This routine takes a string and parses it as a URL of the form:
 *
 *   protocol://user;AUTH=mech:password@host:port/path
 *
 * The protocol, followed by a ":" is required. If it is followed by * "//", there must be an "authority" containing at least a host,
 * which ends at the end of the string or at the next "/". If there
 * is an "@" in the authority, there must be a username before it,
 * and the host comes after it. The authmech, password, and port are
 * optional, and the punctuation that preceeds them is omitted if
 * they are. Everything after the authority (or everything after the
 * protocol if there was no authority) is the path. We consider the
 * "/" between the authority and the path to be part of the path,
 * although this is incorrect according to RFC 1738.
 *
 * The port, if present, must be numeric.
 * 
 * If nothing but the protocol (and the ":") is present, the "empty"
 * flag will be set on the returned URL.
 *
 * Return value: a CamelURL structure containing the URL items.
 **/
CamelURL *
camel_url_new (const char *url_string, CamelException *ex)
{
    CamelURL *url;
    char *semi, *colon, *at, *slash;
    char *p;

    /* Find protocol: initial substring until ":" */
    colon = strchr (url_string, ':');
    if (!colon) {
        camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
                      "URL string `%s' contains no protocol",
                      url_string);
        return NULL;
    }

    url = g_new0 (CamelURL, 1);
    url->protocol = g_strndup (url_string, colon - url_string);
    g_strdown (url->protocol);

    /* Check protocol */
    p = url->protocol;
    while (*p) {
        if (!((*p >= 'a' && *p <= 'z') ||
              (*p == '-') || (*p == '+') || (*p == '.'))) {
            camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
                          "URL string `%s' contains an invalid protocol",
                          url_string);
            return NULL;
        }
        p++;
    }

    if (strncmp (colon, "://", 3) != 0) {
        if (*(colon + 1)) {
            url->path = g_strdup (colon + 1);
            camel_url_decode (url->path);
        } else
            url->empty = TRUE;
        return url;
    }

    url_string = colon + 3;

    /* If there is an @ sign in the authority, look for user,
     * authmech, and password before it.
     */
    slash = strchr (url_string, '/');
    at = strchr (url_string, '@');
    if (at && (!slash || at < slash)) {
        colon = strchr (url_string, ':');
        if (colon && colon < at) {
            url->passwd = g_strndup (colon + 1, at - colon - 1);
            camel_url_decode (url->passwd);
        } else {
            url->passwd = NULL;
            colon = at;
        }

        semi = strchr(url_string, ';');
        if (semi && (semi < colon || (!colon && semi < at)) &&
            !strncasecmp (semi, ";auth=", 6)) {
            url->authmech = g_strndup (semi + 6,
                             colon - semi - 6);
            camel_url_decode (url->authmech);
        } else {
            url->authmech = NULL;
            semi = colon;
        }

        url->user = g_strndup (url_string, semi - url_string);
        camel_url_decode (url->user);
        url_string = at + 1;
    } else
        url->user = url->passwd = url->authmech = NULL;

    /* Find host and port. */
    slash = strchr (url_string, '/');
    colon = strchr (url_string, ':');
    if (slash && colon > slash)
        colon = NULL;

    if (colon) {
        url->host = g_strndup (url_string, colon - url_string);
        url->port = strtoul (colon + 1, &colon, 10);
        if (*colon && colon != slash) {
            camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
                          "Port number in URL `%s' is non-"
                          "numeric", url_string);
            camel_url_free (url);
            return NULL;
        }
    } else if (slash) {
        url->host = g_strndup (url_string, slash - url_string);
        camel_url_decode (url->host);
        url->port = 0;
    } else {
        url->host = g_strdup (url_string);
        camel_url_decode (url->host);
        url->port = 0;
    }

    if (!slash)
        slash = "/";
    url->path = g_strdup (slash);
    camel_url_decode (url->path);

    return url;
}

char *
camel_url_to_string (CamelURL *url, gboolean show_passwd)
{
    char *return_result;
    char *user = NULL, *authmech = NULL, *passwd = NULL;
    char *host = NULL, *path = NULL;
    char port[20];

    if (url->user)
        user = camel_url_encode (url->user, TRUE, ":;@/");
    if (url->authmech)
        authmech = camel_url_encode (url->authmech, TRUE, ":@/");
    if (show_passwd && url->passwd)
        passwd = camel_url_encode (url->passwd, TRUE, "@/");
    if (url->host)
        host = camel_url_encode (url->host, TRUE, ":/");
    if (url->port)
        g_snprintf (port, sizeof (port), "%d", url->port);
    else
        *port = '\0';
    if (url->path)
        path = camel_url_encode (url->path, FALSE, NULL);

    return_result = g_strdup_printf ("%s:%s%s%s%s%s%s%s%s%s%s%s",
                url->protocol,
                host ? "//" : "",
                user ? user : "",
                authmech ? ";auth=" : "",
                authmech ? authmech : "",
                passwd ? ":" : "",
                passwd ? passwd : "",
                user ? "@" : "",
                host ? host : "",
                *port ? ":" : "",
                port,
                path ? path : "");
    g_free (user);
    g_free (authmech);
    g_free (passwd);
    g_free (host);
    g_free (path);

    return return_result;
}

void
camel_url_free (CamelURL *url)
{
    g_assert (url);

    g_free (url->protocol);
    g_free (url->user);
    g_free (url->authmech);
    g_free (url->passwd);
    g_free (url->host);
    g_free (url->path);

    g_free (url);
}


/**
 * camel_url_encode:
 * @part: a URL part
 * @escape_unsafe: whether or not to %-escape "unsafe" characters.
 * ("%#<>{}|\^~[]`)
 * @escape_extra: additional characters to escape.
 *
 * This %-encodes the given URL part and returns the escaped version
 * in allocated memory, which the caller must free when it is done.
 **/
char *
camel_url_encode (char *part, gboolean escape_unsafe, char *escape_extra)
{
    char *work, *p;

    /* worst case scenario = 3 times the initial */
    p = work = g_malloc (3 * strlen (part) + 1);

    while (*part) {
        if (((guchar) *part >= 127) || ((guchar) *part <= ' ') ||
            (escape_unsafe && strchr ("\"%#<>{}|\\^~[]`", *part)) ||
            (escape_extra && strchr (escape_extra, *part))) {
            sprintf (p, "%%%.02hX", (guchar) *part++);
            p += 3;
        } else
            *p++ = *part++;
    }
    *p = '\0';

    return work;
}

#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)

/**
 * camel_url_decode:
 * @part: a URL part
 *
 * %-decodes the passed-in URL *in place*. The decoded version is
 * never longer than the encoded version, so there does not need to
 * be any additional space at the end of the string.
 */
void
camel_url_decode (char *part)
{
    guchar *s, *d;

    s = d = (guchar *)part;
    while (*s) {
        if (*s == '%') {
            if (isxdigit (s[1]) && isxdigit (s[2])) {
                *d++ = HEXVAL (s[1]) * 16 + HEXVAL (s[2]);
                s += 3;
            } else
                *d++ = *s++;
        } else
            *d++ = *s++;
    }
    *d = '\0';
}

static void
add_hash (guint *hash, char *s)
{
    if (s)
        *hash ^= g_str_hash(s);
}

guint camel_url_hash (const void *v)
{
    const CamelURL *u = v;
    guint hash = 0;

    add_hash (&hash, u->protocol);
    add_hash (&hash, u->user);
    add_hash (&hash, u->authmech);
    add_hash (&hash, u->host);
    add_hash (&hash, u->path);
    hash ^= u->port;
    
    return hash;
}

static int
check_equal (char *s1, char *s2)
{
    if (s1 == NULL) {
        if (s2 == NULL)
            return TRUE;
        else
            return FALSE;
    }
    
    if (s2 == NULL)
        return FALSE;

    return strcmp (s1, s2) == 0;
}

int camel_url_equal(const void *v, const void *v2)
{
    const CamelURL *u1 = v, *u2 = v2;
    
    return check_equal(u1->protocol, u2->protocol)
        && check_equal(u1->user, u2->user)
        && check_equal(u1->authmech, u2->authmech)
        && check_equal(u1->host, u2->host)
        && check_equal(u1->path, u2->path)
        && u1->port == u2->port;
}