aboutsummaryrefslogblamecommitdiffstats
path: root/camel/url-util.c
blob: cf2425fdc358b0657b67d96ba9c3f7d919955a89 (plain) (tree)
1
2
3
4
5
6


                                                                           
 
   
                                                                      


















                                                                      







                                                       

                                                                   
 
                   

                     


                                                                          
                                                                                            
 
                                                     
                
                                                                   
                                                                   
                                                    

                 





                                                                                           



   

                                                 

                                                     
                                                 







                                                  

                                                    
   
                                                           
    
                                   


                    





                        






                              
        























                                                                         
 
 

                                                            

 
                
                                                                         
 
 
                
                     





                               
        
                     



                        
                                                     
        



                                               

                                                     
                 
                                                     
                                    
                                    
                 

                     




 
               
                                                                     
 

                     
                     





                                
                     
                    
        
                        
                                                   



                                     
 
                        
                                                    
 
                                                                        



                              
        
                                                         


                                                                   
                        

 
               
                                                                       
 
                        

                          





                               
                     

                    
                        
                                                   







                                                                  
                                                           


                                            




               
                                                                     


                        
                     
        




                                


                     
                        
                                                   




                        
                                                       







                                                               
                                                           





                                                                      
 
               
                                                                     


                        
                     
        




                                

                     
        
                        
                                                   




                        
                                                       
 

                                                                 

                                         
                                                         

                      
 

 
               
                                                                     

                
                     
        




                               




                        
                                                                      



                                                                
                                                          




                    

 



 
                       
                                                                                                    
                      
  
                    
 

 
    
                            
 
 






                        



                         










                                                        

                                            
                                        



                                                                         


                                                            
                                                           

                                                                                  
                                    

                                                                  
                
         
         
 
 
                          
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* url-util.c : utility functions to parse URLs */


/* 
 * Copyright (C) 1999 Bertrand Guiheneuf <Bertrand.Guiheneuf@inria.fr>
 *
 * This program is free software; you can redistribute it and/or 
 * modify it under the terms of the GNU General Public License as 
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */



/* 
   Here we deal with URL following the general scheme:
   protocol://user:password@host:port/name
   where name is a path-like string (ie dir1/dir2/....)
   See rfc1738 for the complete description of 
   Uniform Ressource Locators 
   
     Bertrand. */
/*
  XXX TODO: recover  the words between #'s or ?'s after the path */

#include <config.h>
#include "url-util.h"

/* general item finder */
/* it begins the search at position @position in @url,
   returns true when the item is found, amd set position after the item */
typedef gboolean find_item_func(gchar *url, gchar **item, guint *position, gboolean *error);

/* used to find one item (protocol, then user .... */
typedef struct {
    char *item_name;           /* item name : for debug only */
    gchar **item_value;      /* where the item value will go */
    find_item_func *find_func; /* item finder */
} FindStepStruct;

static gboolean find_protocol (gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean find_user (gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean find_passwd (gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean find_host (gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean find_port (gchar *url, gchar **item, guint *position, gboolean *error);
static gboolean find_path (gchar *url, gchar **item, guint *position, gboolean *error);



/**
 * g_url_new: create an Gurl object from a string
 *
 * @url_string: The string containing the URL to scan
 * 
 * This routine takes a gchar and parses it as an
 * URL of the form:
 * protocol://user:password@host:port/path
 * there is no test on the values. For example,
 * "port" can be a string, not only a number !
 * The Gurl structure fields ar filled with
 * the scan results. When a member of the 
 * general URL can not be found, the corresponding
 * Gurl member is NULL  
 * Fields filled in the Gurl structure are allocated
 * and url_string is not modified. 
 * 
 * Return value: a Gurl structure containing the URL items.
 **/
Gurl *g_url_new (gchar* url_string)
{
    Gurl *g_url;
    
    gchar *protocol;
    gchar *user;
    gchar *passwd;
    gchar *host;
    gchar *port;
    gchar *path;
    
    guint position=0;
    gboolean error;
    gboolean found;
    guint i;
    
    g_url = g_new(Gurl,1);
    
#define NB_STEP_URL  6
    {
        FindStepStruct step[NB_STEP_URL] = {
            { "protocol", &(g_url->protocol), find_protocol},
            { "user", &(g_url->user), find_user},
            { "password", &(g_url->passwd), find_passwd},
            { "host", &(g_url->host), find_host},
            { "port", &(g_url->port), find_port},
            { "path", &(g_url->path), find_path}
        };
        
        for (i=0; i<NB_STEP_URL; i++) {
            found = step[i].find_func(url_string, 
                          step[i].item_value, 
                          &position, 
                          &error);
        }
    }
    
    return g_url;
}





/** So, yes, I must admit there would have been more elegant
    ways to do this, but it works, and quite well :)  */


static gboolean 
find_protocol(gchar *url, gchar **item, guint *position, gboolean *error)
{

    guint i;
    gint len_url;

    g_assert (url);
    g_assert (item);
    g_assert (position);

    len_url = strlen (url);
    
    *item = NULL;
    *error = FALSE;
    i=*position;
    
    /* find a ':' */
    while ( (i<len_url) && (url[i] != ':') ) i++;
    
    if (i==len_url) return FALSE;
    i++;

    /* check if it is followed by a "//" */
    if  ((i<len_url) && (url[i++] == '/'))
        if ((i<len_url) && (url[i++] == '/'))
        {
            *item = g_strndup (url, i-3);
            *position=i;
            return TRUE;
        }
    
    return FALSE;
}




static gboolean
find_user(gchar *url, gchar **item, guint *position, gboolean *error)
{
    guint i;
    guint at_pos;
    gint len_url;

    g_assert (url);
    g_assert (item);
    g_assert (position);

    len_url = strlen (url); 
    *item = NULL;
    i=*position;
    
    /* find a '@' */
    while ((i<len_url) && (url[i] != '@')) i++;
    
    if (i==len_url) return FALSE;
    at_pos = i;
    i = *position;

    /* find a ':' */
    while ( (i<at_pos) && (url[i] != ':') ) i++;

    /* now if i has not been incremented at all, there is no user */
    if (i == *position) {
        (*position)++;
        return FALSE;
    }
    
    *item = g_strndup(url+ *position, i - *position);
    if (i<at_pos) *position=i+1; /* there was a ':', skip it */
    else *position=i;
    
    return TRUE;    
}

static gboolean
find_passwd(gchar *url, gchar **item, guint *position, gboolean *error)
{
    guint i;    
    gint len_url;
    gchar *str_passwd;

    g_assert (url);
    g_assert (item);
    g_assert (position);

    len_url = strlen (url);
    *item = NULL;
    i=*position;
    
    /* find a '@' */
    while ((i<len_url) && (url[i] != '@')) i++;
    
    if (i==len_url) return FALSE;
    /*i has not been incremented at all, there is no passwd */
    if (i == *position) {
        *position = i+1;
        return FALSE;
    }
    
    *item = g_strndup (url + *position, i - *position);
    *position=i+1; /* skip it the '@' */
    
    return TRUE;
}



static gboolean
find_host(gchar *url, gchar **item, guint *position, gboolean *error)
{
    guint i;
    guint slash_pos;
    gint len_url;
    
    g_assert (url);
    g_assert (item);
    g_assert (position);

    len_url = strlen (url); 
    *item = NULL;
    i=*position;
    
    /* find a '/' */
    while ((i<len_url) && (url[i] != '/')) i++;
    
    slash_pos = i;
    i = *position;

    /* find a ':' */
    while ( (i<slash_pos) && (url[i] != ':') ) i++;

    /* at this point if i has not been incremented at all, 
       there is no host */
    if (i == *position) {
        (*position)++;
        return FALSE;
    }
    
    *item = g_strndup (url + *position, i - *position);
    if (i<slash_pos) *position=i+1; /* there was a ':', skip it */
    else *position=i;
    
    return TRUE;
}


static gboolean
find_port(gchar *url, gchar **item, guint *position, gboolean *error)
{
    guint i;
    guint slash_pos;
    gint len_url;
    
    g_assert (url);
    g_assert (item);
    g_assert (position);

    len_url = strlen (url); 
    *item = NULL;
    i=*position;
    
    /* find a '/' */
    while ((i<len_url) && (url[i] != '/')) i++;
    
    slash_pos = i;
    i = *position;

    /* find a ':' */
    while ( (i<slash_pos) && (url[i] != ':') ) i++;

    /* at this point if i has not been incremented at all, */
    /*   there is no port */
    if (i == *position) return FALSE;

    *item = g_strndup(url+ *position, i - *position);
    *position = i;
    return TRUE;
}


static gboolean
find_path(gchar *url, gchar **item, guint *position, gboolean *error)
{
    guint i;
    gint len_url;
    
    g_assert (url);
    g_assert (item);
    g_assert (position);

    len_url = strlen (url);
    *item = NULL;
    i=*position;
    

    /* find a '#' */
    while ((i<len_url) && (url[i] != '#') && (url[i] != '?')) i++;
    
    /*i has not been incremented at all, there is no path */
    if (i == *position) return FALSE;
    
    *item = g_strndup(url + *position, i - *position);
    *position=i;
    
    
    return TRUE;
}






/* to tests this file :
   gcc -o test_url_util `glib-config --cflags`  -I.. -DTEST_URL_UTIL url-util.c `glib-config --libs`
   ./test_url_util URL
*/
#ifdef TEST_URL_UTIL



int 
main (int argc, char **argv)
{

    gchar *url;
    gchar *protocol;
    gchar *user;
    gchar *passwd;
    gchar *host;
    gchar *port;
    gchar *path;
    guint position=0;
    gboolean error;
    gboolean found;
    guint i;
    guint i_pos;

#define NB_STEP_TEST  6
    FindStepStruct test_step[NB_STEP_TEST] = {
        { "protocol", &protocol, find_protocol},
        { "user", &user, find_user},
        { "password", &passwd, find_passwd},
        { "host", &host, find_host},
        { "port", &port, find_port},
        { "path", &path, find_path}
    };
    url = argv[1];
    printf("URL to test : %s\n\n", url);
    for (i=0; i<NB_STEP_TEST; i++) {
        found = test_step[i].find_func (url, 
                        test_step[i].item_value, 
                        &position, 
                        &error);
        if (found) {
            printf("\t\t\t\t** %s found : %s\n",
                   test_step[i].item_name,
                   *(test_step[i].item_value));
        } else printf("** %s not found in URL\n", test_step[i].item_name);
        printf("next item position:\n");
        printf("%s\n", url);
        for(i_pos=0; i_pos<position; i_pos++) printf(" ");
        printf("^\n");
        
    }
     
}

#endif /* TEST_URL_UTIL */