aboutsummaryrefslogblamecommitdiffstats
path: root/camel/camel-url.c
blob: 7b1c31db5c65d429df69460dd0ed60959b363bb4 (plain) (tree)
1
2
3
4
5
6
7
8
9


                                                                           

           
                                 
                                      
  
                                                    

                                                                 
                                                                    
                                                        











                                                                      
                    
                   
      


                  

                   
 
                      
                         
                            
                             
                         
 


                                                                            

                                                                                           
   


                           
  
                                        
  
                                   

          
                                                                

                      

                                                                      
 
                                   
















                                                                  
                    


                                                                       
                                              
                                   
         
 
                                  
                           
 
































                                                                              
 
                                         
                                                 


                                                                               
                        


                                                                               

                 









                                                                      
                 

                               
 













                                                                         
                                                                                 

                                                                 
                                                                        




                                                                             
                                              
                         
                 





                                                                     

         












































































                                                                               



                   










                                                                            
                        





                                      
                                                          




                                                                   


                                                                              







                            
                                          


                                                                        
      
                                                  
 
                     
                            
        


                                                       
        
                                      
        
                          

                                                                   


                                            









                                                                                
                 
                                                          
                              
                                                                       


                                                                               
        

                                                          
                                                            

                                                                     

                                                           

                            

                                                              
         
        

                                   
        
                             

 



                                                               


                                                                   
                            

                                                    
         







                   


                              












                                                
 
 
 





                                                         

 










                                            


                         


                                                                        
 
                                                                                               

 





                                                        
































                                                                              
 


                    

                                                                




                                                                    
                                                             
 

                      
 



                                                     
 
                       

 






                                                                   
   

                             
 
                             
 






                                                                    
                      

                                  
 
 
 

                              



                              







                                                  
                        
        



                    
                                






                                     




                                    

 

                                              

                                         
 


                                                          

                                                  
                                                    

                                        
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* camel-url.c : utility functions to parse URLs */

/* 
 * Authors:
 *  Dan Winship <danw@ximian.com>
 *  Jeffrey Stedfast <fejj@ximian.com>
 *
 * Copyright 1999-2001 Ximian, Inc. (www.ximian.com)
 *
 * This program is free software; you can redistribute it and/or 
 * modify it under the terms of version 2 of the GNU General Public 
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "camel-url.h"
#include "string-utils.h"
#include "camel-exception.h"
#include "camel-mime-utils.h"
#include "camel-object.h"

static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
static void output_param (GQuark key_id, gpointer data, gpointer user_data);

static void append_url_encoded (GString *str, const char *in, const char *extra_enc_chars);

/**
 * camel_url_new_with_base:
 * @base: a base URL
 * @url_string: the URL
 *
 * Parses @url_string relative to @base.
 *
 * Return value: a parsed CamelURL.
 **/
CamelURL *
camel_url_new_with_base (CamelURL *base, const char *url_string)
{
    CamelURL *url;
    const char *end, *hash, *colon, *semi, *at, *slash, *question;
    const char *p;

    url = g_new0 (CamelURL, 1);

    /* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
     * FUNCTION, RUN tests/misc/url AFTERWARDS.
     */

    /* Find fragment. */
    end = hash = strchr (url_string, '#');
    if (hash && hash[1]) {
        url->fragment = g_strdup (hash + 1);
        camel_url_decode (url->fragment);
    } else
        end = url_string + strlen (url_string);

    /* Find protocol: initial [a-z+.-]* substring until ":" */
    p = url_string;
    while (p < end && (isalnum ((unsigned char)*p) ||
               *p == '.' || *p == '+' || *p == '-'))
        p++;

    if (p > url_string && *p == ':') {
        url->protocol = g_strndup (url_string, p - url_string);
        camel_strdown (url->protocol);
        url_string = p + 1;
    }

    if (!*url_string && !base)
        return url;

    /* Check for authority */
    if (strncmp (url_string, "//", 2) == 0) {
        url_string += 2;

        slash = url_string + strcspn (url_string, "/#");
        at = strchr (url_string, '@');
        if (at && at < slash) {
            colon = strchr (url_string, ':');
            if (colon && colon < at) {
                url->passwd = g_strndup (colon + 1,
                             at - colon - 1);
                camel_url_decode (url->passwd);
            } else {
                url->passwd = NULL;
                colon = at;
            }

            semi = strchr(url_string, ';');
            if (semi && semi < colon &&
                !strncasecmp (semi, ";auth=", 6)) {
                url->authmech = g_strndup (semi + 6,
                               colon - semi - 6);
                camel_url_decode (url->authmech);
            } else {
                url->authmech = NULL;
                semi = colon;
            }

            url->user = g_strndup (url_string, semi - url_string);
            camel_url_decode (url->user);
            url_string = at + 1;
        } else
            url->user = url->passwd = url->authmech = NULL;

        /* Find host and port. */
        colon = strchr (url_string, ':');
        if (colon && colon < slash) {
            url->host = g_strndup (url_string, colon - url_string);
            url->port = strtoul (colon + 1, NULL, 10);
        } else {
            url->host = g_strndup (url_string, slash - url_string);
            camel_url_decode (url->host);
            url->port = 0;
        }

        url_string = slash;
    }

    /* Find query */
    question = memchr (url_string, '?', end - url_string);
    if (question) {
        if (question[1]) {
            url->query = g_strndup (question + 1,
                        end - (question + 1));
            camel_url_decode (url->query);
        }
        end = question;
    }

    /* Find parameters */
    semi = memchr (url_string, ';', end - url_string);
    if (semi) {
        if (semi[1]) {
            const char *cur, *p, *eq;
            char *name, *value;

            for (cur = semi + 1; cur < end; cur = p + 1) {
                p = memchr (cur, ';', end - cur);
                if (!p)
                    p = end;
                eq = memchr (cur, '=', p - cur);
                if (eq) {
                    name = g_strndup (cur, eq - cur);
                    value = g_strndup (eq + 1, p - (eq + 1));
                    camel_url_decode (value);
                } else {
                    name = g_strndup (cur, p - cur);
                    value = g_strdup ("");
                }
                camel_url_decode (name);
                g_datalist_set_data_full (&url->params, name,
                              value, g_free);
                g_free (name);
            }
        }
        end = semi;
    }

    if (end != url_string) {
        url->path = g_strndup (url_string, end - url_string);
        camel_url_decode (url->path);
    }

    /* Apply base URL. Again, this is spelled out in RFC 1808. */
    if (base && !url->protocol && url->host)
        url->protocol = g_strdup (base->protocol);
    else if (base && !url->protocol) {
        if (!url->user && !url->authmech && !url->passwd &&
            !url->host && !url->port && !url->path &&
            !url->params && !url->query && !url->fragment)
            url->fragment = g_strdup (base->fragment);

        url->protocol = g_strdup (base->protocol);
        url->user = g_strdup (base->user);
        url->authmech = g_strdup (base->authmech);
        url->passwd = g_strdup (base->passwd);
        url->host = g_strdup (base->host);
        url->port = base->port;

        if (!url->path) {
            url->path = g_strdup (base->path);
            if (!url->params) {
                g_datalist_foreach (&base->params, copy_param,
                            &url->params);
                if (!url->query)
                    url->query = g_strdup (base->query);
            }
        } else if (*url->path != '/') {
            char *newpath, *last, *p, *q;

            last = strrchr (base->path, '/');
            if (last) {
                newpath = g_strdup_printf ("%.*s/%s",
                               last - base->path,
                               base->path,
                               url->path);
            } else
                newpath = g_strdup_printf ("/%s", url->path);

            /* Remove "./" where "." is a complete segment. */
            for (p = newpath + 1; *p; ) {
                if (*(p - 1) == '/' &&
                    *p == '.' && *(p + 1) == '/')
                    memmove (p, p + 2, strlen (p + 2) + 1);
                else
                    p++;
            }
            /* Remove "." at end. */
            if (p > newpath + 2 &&
                *(p - 1) == '.' && *(p - 2) == '/')
                *(p - 1) = '\0';
            /* Remove "<segment>/../" where <segment> != ".." */
            for (p = newpath + 1; *p; ) {
                if (!strncmp (p, "../", 3)) {
                    p += 3;
                    continue;
                }
                q = strchr (p + 1, '/');
                if (!q)
                    break;
                if (strncmp (q, "/../", 4) != 0) {
                    p = q + 1;
                    continue;
                }
                memmove (p, q + 4, strlen (q + 4) + 1);
                p = newpath + 1;
            }
            /* Remove "<segment>/.." at end */
            q = strrchr (newpath, '/');
            if (q && !strcmp (q, "/..")) {
                p = q - 1;
                while (p > newpath && *p != '/')
                    p--;
                if (strncmp (p, "/../", 4) != 0)
                    *(p + 1) = 0;
            }
            g_free (url->path);
            url->path = newpath;
        }
    }

    return url;
}

static void
copy_param (GQuark key_id, gpointer data, gpointer user_data)
{
    GData **copy = user_data;

    g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
}

/**
 * camel_url_new:
 * @url_string: a URL
 * @ex: a CamelException
 *
 * Parses an absolute URL.
 *
 * Return value: a CamelURL, or %NULL.
 **/
CamelURL *
camel_url_new (const char *url_string, CamelException *ex)
{
    CamelURL *url = camel_url_new_with_base (NULL, url_string);

    if (!url->protocol) {
        camel_url_free (url);
        camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
                      _("Could not parse URL `%s'"),
                      url_string);
        return NULL;
    }
    return url;
}

/**
 * camel_url_to_string:
 * @url: a CamelURL
 * @flags: additional translation options.
 *
 * Return value: a string representing @url, which the caller must free.
 **/
char *
camel_url_to_string (CamelURL *url, guint32 flags)
{
    GString *str;
    char *return_result;
    
    /* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
     * tests/misc/url AFTERWARD.
     */
    
    str = g_string_sized_new (20);
    
    if (url->protocol)
        g_string_append_printf (str, "%s:", url->protocol);
    
    if (url->host) {
        g_string_append (str, "//");
        if (url->user) {
            append_url_encoded (str, url->user, ":;@/");
            if (url->authmech && *url->authmech) {
                g_string_append (str, ";auth=");
                append_url_encoded (str, url->authmech, ":@/");
            }
            if (url->passwd && !(flags & CAMEL_URL_HIDE_PASSWORD)) {
                g_string_append_c (str, ':');
                append_url_encoded (str, url->passwd, "@/");
            }
            g_string_append_c (str, '@');
        }
        append_url_encoded (str, url->host, ":/");
        if (url->port)
            g_string_append_printf (str, ":%d", url->port);
        if (!url->path && (url->params || url->query || url->fragment))
            g_string_append_c (str, '/');
    }
    
    if (url->path)
        append_url_encoded (str, url->path, ";?");
    if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
        g_datalist_foreach (&url->params, output_param, str);
    if (url->query) {
        g_string_append_c (str, '?');
        append_url_encoded (str, url->query, NULL);
    }
    if (url->fragment) {
        g_string_append_c (str, '#');
        append_url_encoded (str, url->fragment, NULL);
    }
    
    return_result = str->str;
    g_string_free (str, FALSE);
    
    return return_result;
}

static void
output_param (GQuark key_id, gpointer data, gpointer user_data)
{
    GString *str = user_data;

    g_string_append_c (str, ';');
    append_url_encoded (str, g_quark_to_string (key_id), "?=");
    if (*(char *)data) {
        g_string_append_c (str, '=');
        append_url_encoded (str, data, "?");
    }
}

/**
 * camel_url_free:
 * @url: a CamelURL
 *
 * Frees @url
 **/
void
camel_url_free (CamelURL *url)
{
    if (url) {
        g_free (url->protocol);
        g_free (url->user);
        g_free (url->authmech);
        g_free (url->passwd);
        g_free (url->host);
        g_free (url->path);
        g_datalist_clear (&url->params);
        g_free (url->query);
        g_free (url->fragment);
        
        g_free (url);
    }
}


#define DEFINE_CAMEL_URL_SET(part)          \
void                            \
camel_url_set_##part (CamelURL *url, const char *part)  \
{                           \
    g_free (url->part);             \
    url->part = g_strdup (part);            \
}

DEFINE_CAMEL_URL_SET (protocol)
DEFINE_CAMEL_URL_SET (user)
DEFINE_CAMEL_URL_SET (authmech)
DEFINE_CAMEL_URL_SET (passwd)
DEFINE_CAMEL_URL_SET (host)
DEFINE_CAMEL_URL_SET (path)
DEFINE_CAMEL_URL_SET (query)
DEFINE_CAMEL_URL_SET (fragment)

void
camel_url_set_port (CamelURL *url, int port)
{
    url->port = port;
}

void
camel_url_set_param (CamelURL *url, const char *name, const char *value)
{
    g_datalist_set_data_full (&url->params, name, value ? g_strdup (value) : NULL, g_free);
}

const char *
camel_url_get_param (CamelURL *url, const char *name)
{
    return g_datalist_get_data (&url->params, name);
}

/* From RFC 2396 2.4.3, the characters that should always be encoded */
static const char url_encoded_char[] = {
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x00 - 0x0f */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 0x10 - 0x1f */
    1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  ' ' - '/'  */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,  /*  '0' - '?'  */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  '@' - 'O'  */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,  /*  'P' - '_'  */
    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  '`' - 'o'  */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,  /*  'p' - 0x7f */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};

static void
append_url_encoded (GString *str, const char *in, const char *extra_enc_chars)
{
    const unsigned char *s = (const unsigned char *)in;

    while (*s) {
        if (url_encoded_char[*s] ||
            (extra_enc_chars && strchr (extra_enc_chars, *s)))
            g_string_append_printf (str, "%%%02x", (int)*s++);
        else
            g_string_append_c (str, *s++);
    }
}

/**
 * camel_url_encode:
 * @part: a URL part
 * @escape_extra: additional characters beyond " \"%#<>{}|\^[]`"
 * to escape (or %NULL)
 *
 * This %-encodes the given URL part and returns the escaped version
 * in allocated memory, which the caller must free when it is done.
 **/
char *
camel_url_encode (const char *part, const char *escape_extra)
{
    GString *str;
    char *encoded;

    str = g_string_new (NULL);
    append_url_encoded (str, part, escape_extra);
    encoded = str->str;
    g_string_free (str, FALSE);

    return encoded;
}

/**
 * camel_url_decode:
 * @part: a URL part
 *
 * %-decodes the passed-in URL *in place*. The decoded version is
 * never longer than the encoded version, so there does not need to
 * be any additional space at the end of the string.
 */
void
camel_url_decode (char *part)
{
    unsigned char *s, *d;

#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)

    s = d = (unsigned char *)part;
    do {
        if (*s == '%' && s[1] && s[2]) {
            *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]);
            s += 2;
        } else
            *d++ = *s;
    } while (*s++);
}


guint
camel_url_hash (const void *v)
{
    const CamelURL *u = v;
    guint hash = 0;

#define ADD_HASH(s) if (s) hash ^= g_str_hash (s);

    ADD_HASH (u->protocol);
    ADD_HASH (u->user);
    ADD_HASH (u->authmech);
    ADD_HASH (u->host);
    ADD_HASH (u->path);
    ADD_HASH (u->query);
    hash ^= u->port;
    
    return hash;
}

static int
check_equal (char *s1, char *s2)
{
    if (s1 == NULL) {
        if (s2 == NULL)
            return TRUE;
        else
            return FALSE;
    }
    
    if (s2 == NULL)
        return FALSE;

    return strcmp (s1, s2) == 0;
}

int
camel_url_equal(const void *v, const void *v2)
{
    const CamelURL *u1 = v, *u2 = v2;

    return check_equal(u1->protocol, u2->protocol)
        && check_equal(u1->user, u2->user)
        && check_equal(u1->authmech, u2->authmech)
        && check_equal(u1->host, u2->host)
        && check_equal(u1->path, u2->path)
        && check_equal(u1->query, u2->query)
        && u1->port == u2->port;
}