From fa02e427520a3d30c5f3d65dc58690a969f25097 Mon Sep 17 00:00:00 2001 From: NotZed Date: Fri, 21 Apr 2000 17:39:06 +0000 Subject: Removed. 2000-04-20 NotZed * providers/mbox/camel-mbox-utils.[ch]: Removed. * providers/mbox/camel-mbox-parser.[ch]: Removed. Removed references to it. svn path=/trunk/; revision=2547 --- camel/providers/mbox/camel-mbox-parser.c | 865 ------------------------------- 1 file changed, 865 deletions(-) delete mode 100644 camel/providers/mbox/camel-mbox-parser.c (limited to 'camel/providers/mbox/camel-mbox-parser.c') diff --git a/camel/providers/mbox/camel-mbox-parser.c b/camel/providers/mbox/camel-mbox-parser.c deleted file mode 100644 index b9091278fb..0000000000 --- a/camel/providers/mbox/camel-mbox-parser.c +++ /dev/null @@ -1,865 +0,0 @@ -/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ -/* camel-mbox-parser.c : mbox folder parser */ - -/* - * - * Author : Bertrand Guiheneuf - * - * Copyright (C) 1999 Helix Code . - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - */ - - -#include -#include "camel-mbox-parser.h" -#include "camel-mbox-utils.h" -#include "camel-exception.h" -#include -#include -#include -#include -#include -#include - - - -#define MBOX_PARSER_BUF_SIZE 10000 - -#define MBOX_PARSER_FROM_KW "from:" -#define MBOX_PARSER_FROM_KW_SZ 5 - -#define MBOX_PARSER_DATE_KW "date:" -#define MBOX_PARSER_DATE_KW_SZ 5 - -#define MBOX_PARSER_SUBJECT_KW "subject:" -#define MBOX_PARSER_SUBJECT_KW_SZ 8 - -#define MBOX_PARSER_TO_KW "to:" -#define MBOX_PARSER_TO_KW_SZ 3 - -#define MBOX_PARSER_X_EVOLUTION_KW "x-evolution:" -#define MBOX_PARSER_X_EVOLUTION_KW_SZ 12 - -/* the maximum lentgh of all the previous keywords */ -#define MBOX_PARSER_MAX_KW_SIZE 12 - - -#define MBOX_PARSER_SUMMARY_SIZE 150 - - - - - - -typedef struct { - - int fd; /* file descriptor of the mbox file */ - glong real_position; /* real position in the file */ - - - gchar *message_delimiter; /* message delimiter string */ - guint message_delimiter_length; - - guint message_summary_size; /* how many characters from the begining of the - mail to put into the message summary */ - - GArray *preparsed_messages; /* array of MessagePreParsingInfo */ - CamelMboxParserMessageInfo current_message_info; /* used to store curent info */ - gboolean is_pending_message; /* is there some message information pending ? */ - - /* buffer info */ - gchar *buffer; /* temporary buffer */ - guint left_chunk_size; /* size of the left chunk in the temp buffer */ - guint last_position; /* last position that can be compared to a keyword */ - guint current_position; /* current position in the temp buffer */ - - /* other */ - GString *tmp_string; /* temporary string to fill the headers in */ - - - -} CamelMboxPreParser; - - -/* clear a preparsing info structure */ -static void -clear_message_info (CamelMboxParserMessageInfo *preparsing_info) -{ - - preparsing_info->message_position = 0; - preparsing_info->size = 0; - preparsing_info->from = NULL; - preparsing_info->date = NULL; - preparsing_info->subject = NULL; - preparsing_info->priority = NULL; - preparsing_info->references = NULL; - preparsing_info->body_summary = NULL; - preparsing_info->end_of_headers_offset = 0; - - preparsing_info->x_evolution_offset = 0; - preparsing_info->status = 0; - preparsing_info->uid = 0; -} - - - -/** - * new_parser: create a new parser object - * @fd: file descriptor opened on the mbox file - * @message_delimiter: the string that announce the start of a new message. - * - * Create a new parser object. This object is the place where are - * stored all the information concerning the parsing process. - * - * Return value: The newly created parser object. - **/ -static CamelMboxPreParser * -new_parser (int fd, - const gchar *message_delimiter) -{ - CamelMboxPreParser *parser; - - parser = g_new0 (CamelMboxPreParser, 1); - - parser->fd = fd; - parser->buffer = g_new (gchar, MBOX_PARSER_BUF_SIZE); - parser->current_position = 0; - parser->message_delimiter = g_strdup (message_delimiter); - parser->message_delimiter_length = strlen (message_delimiter); - parser->real_position = 0; - parser->preparsed_messages = g_array_new (FALSE, FALSE, sizeof (CamelMboxParserMessageInfo)); - parser->message_summary_size = MBOX_PARSER_SUMMARY_SIZE; - - parser->left_chunk_size = MAX (parser->message_delimiter_length, MBOX_PARSER_MAX_KW_SIZE); - - parser->tmp_string = g_string_sized_new (1000); - - return parser; -} - - - -/** - * parser_free: free the parser object - * @parser: the parser objet to free. - * - * it is important to notice that all structures allocated - * in new_parser () are freed ** EXCEPT ** the message - * information array, i.e. the preparsed_messages - * field. - **/ -static void -parser_free (CamelMboxPreParser *parser) -{ - g_free (parser->buffer); - g_free (parser->message_delimiter); - g_string_free (parser->tmp_string, TRUE); - g_free (parser); -} - - - - -/* ** handle exceptions here */ -/** - * initialize_buffer: read the first chunk of data in the buffer - * @parser: parser object to fill - * @first_position: position to start the read at - * - * read the first chunk of data from the mbox file. - * - **/ -static void -initialize_buffer (CamelMboxPreParser *parser, - glong first_position) -{ - gint seek_res; - gint buf_nb_read; - - g_assert (parser); - - /* set the search start position */ - seek_res = lseek (parser->fd, first_position, SEEK_SET); - //if (seek_res == (off_t)-1) goto io_error; - - - /* the first part of the buffer is filled with newlines, - but the next time a chunk of buffer is read, it will - be filled with the last bytes of the previous chunk. - This allows simple g_strcasecmp to test for the presence of - the keyword */ - memset (parser->buffer, '\n', parser->left_chunk_size); - do { - buf_nb_read = read (parser->fd, parser->buffer + parser->left_chunk_size, - MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); - } while ((buf_nb_read == -1) && (errno == EINTR)); - /* ** check for an error here */ - - if (buf_nb_read < MBOX_PARSER_BUF_SIZE - parser->left_chunk_size) { - /* fill the end of the buffer with 0\ */ - memset (parser->buffer + buf_nb_read + parser->left_chunk_size, '\0', - MIN (parser->left_chunk_size, MBOX_PARSER_BUF_SIZE - buf_nb_read - parser->left_chunk_size)); - }; - - parser->last_position = MIN (buf_nb_read + parser->left_chunk_size + 1, - MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); - parser->current_position = parser->left_chunk_size; -} - - - - -/** - * read_next_buffer_chunk: read the next chunk of data in the mbox file - * @parser: parser object - * - * read the next chunk of data in the mbox file. - * Routine copies the last part of the buffer at - * the begining are concatenate the read data to - * it. This allows strcmp of keywords in the buffer, - * until the last postion. That means you can - * do a strcmp (buffer, keyword) for any of the - * keyword defined at the begining of this file. - * - **/ -static void -read_next_buffer_chunk (CamelMboxPreParser *parser) -{ - gint buf_nb_read; - - g_assert (parser); - - /* read the next chunk of data in the folder file : */ - /* - first, copy the last bytes from the previous - chunk at the begining of the new one. */ - memcpy (parser->buffer, - parser->buffer + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size, - parser->left_chunk_size); - - /* - then read the next chunk on disk */ - do { - buf_nb_read = read (parser->fd, - parser->buffer + parser->left_chunk_size, - MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); - } while ((buf_nb_read == -1) && (errno == EINTR)); - /* ** check for an error here */ - - if (buf_nb_read < MBOX_PARSER_BUF_SIZE - parser->left_chunk_size) { - /* fill the end of the buffer with 0\ */ - memset (parser->buffer + buf_nb_read + parser->left_chunk_size, '\0', - MIN (parser->left_chunk_size, MBOX_PARSER_BUF_SIZE - buf_nb_read - parser->left_chunk_size)); - }; - - parser->last_position = MIN (buf_nb_read + parser->left_chunk_size + 1, - MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); - - parser->current_position = 0; -} - - - -/** - * goto_next_char: go one postion forward in the buffer - * @parser: parser object - * - * goto one position forward in the buffer. If necessary, - * read the next chunk of data in the file. - * - **/ -static void -goto_next_char (CamelMboxPreParser *parser) -{ - - if (parser->current_position < parser->last_position - 1) - parser->current_position++; - else - read_next_buffer_chunk (parser); - - parser->real_position++; - -} - - - - - - - -/** - * advance_n_chars: go n positions forward in the buffer. - * @parser: parser object - * @n: number of characters to advance. - * - **/ -static void -advance_n_chars (CamelMboxPreParser *parser, guint n) -{ - - gint position_to_the_end; - - position_to_the_end = parser->last_position - parser->current_position; - - if (n < position_to_the_end) - parser->current_position += n; - else { - read_next_buffer_chunk (parser); - parser->current_position = n - position_to_the_end; - } - - parser->real_position += n; -} - - - - - - -/* called when the buffer has detected the begining of - a new message. This routine is supposed to simply - store the previous message information and - clean the temporary structure used to store - the informations */ - - -/** - * new_message_detected: routine to call when a new message has been detected - * @parser: parser object. - * - * this routine must be called when the keyword determining the - * begining of a new message has been detected. It pushes the - * information fetched for the last message into the message information - * array. Also, it gets the parser to the end of the line. - **/ -static void -new_message_detected (CamelMboxPreParser *parser) -{ - - gchar c; - - /* if we were filling a message information - save it in the message information array */ - if (parser->is_pending_message) { - parser->current_message_info.size = - parser->real_position - parser->current_message_info.message_position; - g_array_append_vals (parser->preparsed_messages, (gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); - } - - clear_message_info ( &(parser->current_message_info)); - - /* go to the end of the line */ - do { - - c = parser->buffer[parser->current_position]; - goto_next_char (parser); - - } while (c != '\n'); - - /* save message position in the message information structure */ - (parser->current_message_info).message_position = parser->real_position; - - parser->is_pending_message = TRUE; -} - - - - - - - -/** - * read_header: read the header content contained after the current position. - * @parser: the parser object. - * @header_content: a pointer on a (char *) variable to feed with the obtained header string. - * - * This routine must be called when the parser has detected a header - * and it wants the header content to be stored. The parser current position - * must EXACTLY be located at the begining of the header content line. - * For example, if the file contains the line : - * from:Bertrand Guiheneuf - * - * When this routine is called, the parser must be located - * on the "B" of "Bertrand". - * - * When this routine returns, the parser is located just - * after the "\n" at the end of the header content. - * - **/ -static void -read_header (CamelMboxPreParser *parser, gchar **header_content) -{ - gboolean space = FALSE; - gboolean newline = FALSE; - gboolean header_end = FALSE; - gchar *buffer; - gchar c; - - g_assert (parser); - - /* reset the header buffer string */ - parser->tmp_string = g_string_truncate (parser->tmp_string, 0); - - buffer = parser->buffer; - - /* read the current character */ - c = buffer[parser->current_position]; - - while (! ((c == '\0') || header_end )) { - - if (space) { - if (c == ' ' && c == '\t') - goto next_char; - else - space = FALSE; - } - - if (newline) { - if (c == ' ' && c == '\t') { - - space = TRUE; - newline = FALSE; - goto next_char; - } else { - - header_end = TRUE; - continue; - } - } - - if (c == '\n') { - newline = TRUE; - goto next_char; - } - - /* feed the header content */ - parser->tmp_string = g_string_append_c (parser->tmp_string, c); - - next_char: /* read next char in the buffer */ - goto_next_char (parser); - /* read the current character */ - c = buffer[parser->current_position]; - } - - - /* FIXME: this can cause a memory leak, for duplicated headers? */ - - /* copy the buffer in the preparsing information structure */ - *header_content = g_strndup (parser->tmp_string->str, parser->tmp_string->len); -} - - - - - - - -/** - * read_message_begining: read the first characters of a message body - * @parser: parser object - * @message_summary: a pointer on a (gchar *) variable where the obtained string will be stored. - * - * Read the first lines of a message. When calling this routine, the - * parser must be located at the begining of the message body. - * - * Return value: if the parsing inside this routine last read a newline, then %TRUE is returned, otherwise %FALSE is returned - **/ -static gboolean -read_message_begining (CamelMboxPreParser *parser, gchar **message_summary) -{ - guint nb_read = 0; - gchar *buffer; - gboolean new_message = FALSE; - guint nb_line = 0; - g_assert (parser); - - /* reset the header buffer string */ - parser->tmp_string = g_string_truncate (parser->tmp_string, 0); - - buffer = parser->buffer; - /* the message should not be filled character by - character but there is no g_string_n_append - function, so for the moment, this is a lazy - implementation */ - while (! (buffer[parser->current_position] != '\0') && - (nb_line <2) && (nb_readmessage_summary_size) && - (!new_message)) { - - - /* test if we are not at the end of the message */ - if (buffer[parser->current_position] == '\n') { - - nb_line++; - goto_next_char (parser); - if ((buffer[parser->current_position] == '\0') || - (g_strncasecmp (parser->buffer + parser->current_position, - parser->message_delimiter, - parser->message_delimiter_length) == 0)) { - new_message = TRUE; - continue; - } else { - /* we're not at the end, so let's just add the cr to the summary */ - parser->tmp_string = g_string_append_c (parser->tmp_string, - '\n'); - nb_read++; - continue; - } - - - } - - parser->tmp_string = g_string_append_c (parser->tmp_string, - buffer[parser->current_position]); - nb_read++; - goto_next_char (parser); - } - - *message_summary = g_strndup (parser->tmp_string->str, parser->tmp_string->len); - - return new_message; -} - - - - - - - - - - -/** - * camel_mbox_parse_file: read an mbox file and parse it. - * @fd: file descriptor opened on the mbox file. - * @message_delimiter: character string delimiting the beginig of a - * new message - * @start_position: position in the file where to start the parsing. - * @file_size: on output, the size in bytes of the file - * @next_uid: on output, the next uid available for use - * @get_message_summary: should the parser retrieve the begining of - * the messages - * @status_callback: function to call peridically to indicate the - * progress of the parser - * @status_interval: floating value between 0 and 1 indicate how often - * to call @status_callback. - * @user_data: user data that will be passed to the callback function - * - * This routine parses an mbox file and retreives both the message - * starting positions and some of the informations contained in the - * message. Those informations are mainly some RFC822 headers values - * but also (optionally) the first characters of the mail body. The - * @get_message_summary parameter allows to enable or disable this - * option. - * - * - * Return value: An array of CamelMboxParserMessageInfo containing the - * informations on each message parsed in the file - **/ -GArray * -camel_mbox_parse_file (int fd, - const gchar *message_delimiter, - glong start_position, - guint32 *file_size, - guint32 *next_uid, - gboolean get_message_summary, - camel_mbox_preparser_status_callback *status_callback, - double status_interval, - gpointer user_data) -{ - CamelMboxPreParser *parser; - gboolean is_parsing_a_message = FALSE; - gchar c; - struct stat stat_buf; - gint fstat_result; - glong total_file_size; - int last_status = 0; - int real_interval; - gboolean newline; - GArray *return_value; - gchar *x_ev_header_content; - guint32 next_available_uid = 1; - - - g_assert (next_uid); - - /* get file size */ - fstat_result = fstat (fd, &stat_buf); - if (fstat_result == -1) { - g_warning ("Manage exception here \n"); - } - - total_file_size = stat_buf.st_size; - real_interval = status_interval * total_file_size; - - - /* create the parser */ - parser = new_parser (fd, message_delimiter); - - /* initialize the temporary char buffer */ - initialize_buffer (parser, start_position); - - /* the first line is indeed at the begining of a new line ... */ - newline = TRUE; - - while (parser->buffer[parser->current_position] != '\0') { - - /* read the current character */ - if (!newline) { - c = parser->buffer[parser->current_position]; - newline = (c == '\n'); - goto_next_char (parser); - } - - if (newline) { - - /* check if we reached a status milestone */ - if ( status_callback && ((parser->real_position - last_status) > real_interval)) { - last_status += real_interval; - status_callback ((double)last_status / (double)total_file_size, - user_data); - } - - /* is the next part a message delimiter ? */ - if (strncmp (parser->buffer + parser->current_position, - parser->message_delimiter, - parser->message_delimiter_length) == 0) { - - is_parsing_a_message = TRUE; - new_message_detected (parser); - newline = TRUE; - continue; - } - - - if (is_parsing_a_message) { - /* we could find the headers in a clever way, like - storing them in a list of pair - [keyword, offset_in_CamelMboxParserMessageInfo] - I am too busy for now. Contribution welcome */ - - /* is the next part a "from" header ? */ - if (g_strncasecmp (parser->buffer + parser->current_position, - MBOX_PARSER_FROM_KW, - MBOX_PARSER_FROM_KW_SZ) == 0) { - - advance_n_chars (parser, MBOX_PARSER_FROM_KW_SZ); - read_header (parser, (gchar **) ((gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + - G_STRUCT_OFFSET (CamelMboxParserMessageInfo, from))); - - newline = TRUE; - continue; - } - - /* is the next part a "Date" header ? */ - if (g_strncasecmp (parser->buffer + parser->current_position, - MBOX_PARSER_DATE_KW, - MBOX_PARSER_DATE_KW_SZ) == 0) { - - advance_n_chars (parser, MBOX_PARSER_DATE_KW_SZ); - read_header (parser, (gchar **) ((gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + - G_STRUCT_OFFSET (CamelMboxParserMessageInfo, date))); - - newline = TRUE; - continue; - } - - - /* is the next part a "Subject" header ? */ - if (g_strncasecmp (parser->buffer + parser->current_position, - MBOX_PARSER_SUBJECT_KW, - MBOX_PARSER_SUBJECT_KW_SZ) == 0) { - - advance_n_chars (parser, MBOX_PARSER_SUBJECT_KW_SZ); - read_header (parser, (gchar **) ((gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + - G_STRUCT_OFFSET (CamelMboxParserMessageInfo, subject))); - - newline = TRUE; - continue; - } - - - /* is the next part a "To" header ? */ - if (g_strncasecmp (parser->buffer + parser->current_position, - MBOX_PARSER_TO_KW, - MBOX_PARSER_TO_KW_SZ) == 0) { - - advance_n_chars (parser, MBOX_PARSER_TO_KW_SZ); - read_header (parser, (gchar **) ((gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + - G_STRUCT_OFFSET (CamelMboxParserMessageInfo, to))); - - newline = TRUE; - continue; - } - - - /* is the next part a "X-evolution" header ? */ - if (g_strncasecmp (parser->buffer + parser->current_position, - MBOX_PARSER_X_EVOLUTION_KW, - MBOX_PARSER_X_EVOLUTION_KW_SZ) == 0) { - - /* in the case of the evolution private field, we store - the field position as well as its length because - we will have to change them */ - parser->current_message_info.x_evolution_offset = parser->real_position - - parser->current_message_info.message_position; - advance_n_chars (parser, MBOX_PARSER_X_EVOLUTION_KW_SZ); - - /* read the header */ - read_header (parser, &x_ev_header_content); - - /* parse it and put the result in the uid and status fields */ - camel_mbox_xev_parse_header_content (x_ev_header_content, - (guint32 *) ((gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + - G_STRUCT_OFFSET (CamelMboxParserMessageInfo, uid)), - (guchar *) ((gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + - G_STRUCT_OFFSET (CamelMboxParserMessageInfo, status))); - g_free (x_ev_header_content); - next_available_uid = MAX (next_available_uid, parser->current_message_info.uid + 1); - - newline = TRUE; - continue; - } - - - - - /* is it an empty line ? */ - if (parser->buffer[parser->current_position] == '\n') { - - parser->current_message_info.end_of_headers_offset = - parser->real_position - parser->current_message_info.message_position; - - goto_next_char (parser); - if (get_message_summary) - newline = read_message_begining (parser, (gchar **) ((gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + - G_STRUCT_OFFSET (CamelMboxParserMessageInfo, body_summary))); - - is_parsing_a_message = FALSE; - continue; - } - } - newline = FALSE; - } - - } - - /* if there is a pending message information put it in the array */ - if (parser->is_pending_message) { - parser->current_message_info.size = - parser->real_position - parser->current_message_info.message_position; - g_array_append_vals (parser->preparsed_messages, (gchar *)parser + - G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); - } - - return_value = parser->preparsed_messages; - *file_size = parser->real_position; - *next_uid = next_available_uid; - /* free the parser */ - parser_free (parser); - - return return_value; -} - - - - - - - - - - -#ifdef MBOX_PARSER_TEST -/* to build the test : - - gcc -O3 -I/opt/gnome/lib/glib/include `glib-config --cflags` -o test_parser -DMBOX_PARSER_TEST -I ../.. -I ../../.. -I /usr/lib/glib/include camel-mbox-parser.c `glib-config --libs` -lm - - - */ - - -#include - -static void -status (double done, gpointer user_data) -{ - printf ("%d %% done\n", (int)floor (done * 100)); -} -int -main (int argc, char **argv) -{ - int test_file_fd; - int i; - int file_size; - int next_uid; - GArray *message_positions; - CamelMboxParserMessageInfo *message_info; - gchar tmp_buffer[50]; - - tmp_buffer[49] = '\0'; - - if (argc<2) { - printf("usage: %s mbox\n", argv[0]); - return 1; - } - - test_file_fd = open (argv[1], O_RDONLY); - message_positions = camel_mbox_parse_file (test_file_fd, - "From ", - 0, - &file_size, - &next_uid, - TRUE, - status, - 0.05, - NULL); - - printf ("Found %d messages \n", message_positions->len); - - - for (i=0; ilen; i++) { - - message_info = ((CamelMboxParserMessageInfo *)(message_positions->data)) + i; - printf ("\n\n** Message %d : \n", i); - printf ("Size : %d\n", message_info->size); - printf ("From: %s\n", message_info->from); - printf ("Date: %s\n", message_info->date); - printf ("Subject: %s\n", message_info->subject); - printf ("Summary: %s\n", message_info->body_summary) ; - - - lseek (test_file_fd, message_info->message_position, SEEK_SET); - read (test_file_fd, tmp_buffer, 49); - printf ("File content at position %d : \n===\n%s\n===\n", message_info->message_position, tmp_buffer); - - } - - - - return 0; -} - - - - -#endif /* MBOX_PARSER_TEST */ -- cgit v1.2.3