/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* camel-mbox-parser.c : mbox folder parser */ /* * * Copyright (C) 1999 Bertrand Guiheneuf . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ #include #include "camel-mbox-parser.h" #include "camel-log.h" #include "camel-exception.h" #include #include #include #include #include #include #define MBOX_PARSER_BUF_SIZE 1000 #define MBOX_PARSER_FROM_KW "from:" #define MBOX_PARSER_FROM_KW_SZ 5 #define MBOX_PARSER_DATE_KW "date:" #define MBOX_PARSER_DATE_KW_SZ 5 #define MBOX_PARSER_X_EVOLUTION_KW "x-evolution:" #define MBOX_PARSER_X_EVOLUTION_KW_SZ 12 /* the maximum lentgh of all the previous keywords */ #define MBOX_PARSER_MAX_KW_SIZE 12 #define MBOX_PARSER_SUMMARY_SIZE 100 typedef struct { int fd; /* file descriptor of the mbox file */ guint real_position; /* real position in the file */ gchar *message_delimiter; /* message delimiter string */ guint message_delimiter_length; guint message_summary_size; /* how many characters from the begining of the mail to put into the message summary */ GArray *preparsed_messages; /* array of MessagePreParsingInfo */ CamelMboxParserMessageInfo current_message_info; /* used to store curent info */ gboolean is_pending_message; /* is there some message information pending ? */ /* buffer info */ gchar *buffer; /* temporary buffer */ guint left_chunk_size; /* size of the left chunk in the temp buffer */ guint last_position; /* last position that can be compared to a keyword */ guint current_position; /* current position in the temp buffer */ gboolean eof; /* did we read the entire file */ /* other */ GString *tmp_string; /* temporary string to fill the headers in */ } CamelMboxPreParser; /* clear a preparsing info structure */ static void clear_message_info (CamelMboxParserMessageInfo *preparsing_info) { preparsing_info->message_position = 0; preparsing_info->from = NULL; preparsing_info->date = NULL; preparsing_info->subject = NULL; preparsing_info->status = NULL; preparsing_info->priority = NULL; preparsing_info->references = NULL; } static CamelMboxPreParser * new_parser (int fd, const gchar *message_delimiter) { CamelMboxPreParser *parser; parser = g_new0 (CamelMboxPreParser, 1); parser->fd = fd; parser->buffer = g_new (gchar, MBOX_PARSER_BUF_SIZE); parser->current_position = 0; parser->message_delimiter = g_strdup (message_delimiter); parser->message_delimiter_length = strlen (message_delimiter); parser->real_position = 0; parser->preparsed_messages = g_array_new (FALSE, FALSE, sizeof (CamelMboxParserMessageInfo)); parser->message_summary_size = MBOX_PARSER_SUMMARY_SIZE; parser->left_chunk_size = MAX (parser->message_delimiter_length, MBOX_PARSER_MAX_KW_SIZE); parser->eof = FALSE; parser->tmp_string = g_string_sized_new (1000); return parser; } /* ** handle exceptions here */ /* read the first chunk of data in the buffer */ static void initialize_buffer (CamelMboxPreParser *parser, guint first_position) { gint seek_res; gint buf_nb_read; g_assert (parser); /* set the search start position */ seek_res = lseek (parser->fd, first_position, SEEK_SET); //if (seek_res == (off_t)-1) goto io_error; /* the first part of the buffer is filled with newlines, but the next time a chunk of buffer is read, it will be filled with the last bytes of the previous chunk. This allows simple g_strcasecmp to test for the presence of the keyword */ memset (parser->buffer, '\n', parser->left_chunk_size); do { buf_nb_read = read (parser->fd, parser->buffer + parser->left_chunk_size, MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); } while ((buf_nb_read == -1) && (errno == EINTR)); /* ** check for an error here */ parser->last_position = buf_nb_read - parser->left_chunk_size; if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size)) parser->eof =TRUE; parser->current_position = 0; } /* read next data in the mbox file */ static void read_next_buffer_chunk (CamelMboxPreParser *parser) { gint buf_nb_read; g_assert (parser); /* read the next chunk of data in the folder file : */ /* - first, copy the last bytes from the previous chunk at the begining of the new one. */ memcpy (parser->buffer, parser->buffer + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size, parser->left_chunk_size); /* - then read the next chunk on disk */ do { buf_nb_read = read (parser->fd, parser->buffer + parser->left_chunk_size, MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); } while ((buf_nb_read == -1) && (errno == EINTR)); /* ** check for an error here */ parser->last_position = buf_nb_read - parser->left_chunk_size; if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size)) parser->eof =TRUE; parser->current_position = 0; } /* read next char in the buffer */ static void goto_next_char (CamelMboxPreParser *parser) { if (parser->current_position < parser->last_position) parser->current_position++; else read_next_buffer_chunk (parser); parser->real_position++; } static void new_message_detected (CamelMboxPreParser *parser) { /* if we were filling a message information save it in the message information array */ if (parser->is_pending_message) { g_array_append_vals (parser->preparsed_messages, (gchar *)parser + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); } clear_message_info ( &(parser->current_message_info)); (parser->current_message_info).message_position = parser->real_position; parser->is_pending_message = TRUE; } /* read a header value and put it in the string pointer to by header_content */ static void read_header (CamelMboxPreParser *parser, gchar **header_content) { gboolean space = FALSE; gboolean newline = FALSE; gboolean header_end = FALSE; gchar *buffer; gchar c; g_assert (parser); /* reset the header buffer string */ parser->tmp_string = g_string_truncate (parser->tmp_string, 0); buffer = parser->buffer; while (! (parser->eof || header_end) ) { /* read the current character */ c = buffer[parser->current_position]; if (space) { if (c == ' ' && c == '\t') goto next_char; else space = FALSE; } if (newline) { if (c == ' ' && c == '\t') { space = TRUE; newline = FALSE; goto next_char; } else { header_end = TRUE; continue; } } if (c == '\n') { newline = TRUE; goto next_char; } /* feed the header content */ parser->tmp_string = g_string_append_c (parser->tmp_string, c); next_char: /* read next char in the buffer */ goto_next_char (parser); } /* copy the buffer in the preparsing information structure */ *header_content = g_strndup (parser->tmp_string->str, parser->tmp_string->len); } /* read the begining of the message and put it in the message summary field */ static void read_message_begining (CamelMboxPreParser *parser, gchar **message_summary) { guint nb_read = 0; gchar *buffer; g_assert (parser); /* reset the header buffer string */ parser->tmp_string = g_string_truncate (parser->tmp_string, 0); buffer = parser->buffer; /* the message should not be filled character by character but there is no g_string_n_append function, so for the moment, this is a lazy implementation */ while (! (parser->eof) && nb_readmessage_summary_size) { parser->tmp_string = g_string_append_c (parser->tmp_string, buffer[parser->current_position]); nb_read++; goto_next_char (parser); } *message_summary = g_strndup (parser->tmp_string->str, parser->tmp_string->len); } GArray * camel_mbox_parse_file (int fd, guint start_position, const gchar *message_delimiter) { CamelMboxPreParser *parser; gboolean is_parsing_a_message = FALSE; gchar c; /* create the parser */ parser = new_parser (fd, message_delimiter); /* initialize the temporary char buffer */ initialize_buffer (parser, start_position); while (!parser->eof) { /* read the current character */ c = parser->buffer[parser->current_position]; goto_next_char (parser); if (c == '\n') { /* is the next part a message delimiter ? */ if (g_strncasecmp (parser->buffer + parser->current_position, parser->message_delimiter, parser->message_delimiter_length) == 0) { is_parsing_a_message = TRUE; new_message_detected (parser); goto_next_char (parser); continue; } if (is_parsing_a_message) { /* is the next part a "from" header ? */ if (g_strncasecmp (parser->buffer + parser->current_position, MBOX_PARSER_FROM_KW, MBOX_PARSER_FROM_KW_SZ) == 0) { parser->current_position += MBOX_PARSER_FROM_KW_SZ; read_header (parser, (gchar **) ((gchar *)parser + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + G_STRUCT_OFFSET (CamelMboxParserMessageInfo, from))); continue; } /* is it an empty line ? */ if (parser->buffer[parser->current_position] == '\n') { goto_next_char (parser); read_message_begining (parser, (gchar **) ((gchar *)parser + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + G_STRUCT_OFFSET (CamelMboxParserMessageInfo, body_summary))); is_parsing_a_message = FALSE; } } } } /* if there is a pending message information put it in the array */ if (parser->is_pending_message) { g_array_append_vals (parser->preparsed_messages, (gchar *)parser + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); } /* free the parser */ /* ** FIXME : FREE THE PARSER */ return parser->preparsed_messages; } #ifdef MBOX_PARSER_TEST /* to build the test : gcc -o test_parser -DMBOX_PARSER_TEST -I ../.. -I ../../.. \ -I /usr/lib/glib/include camel-mbox-parser.c \ -lglib ../../.libs/libcamel.a */ int main (int argc, char **argv) { int test_file_fd; int i; GArray *message_positions; CamelMboxParserMessageInfo *message_info; test_file_fd = open (argv[1], O_RDONLY); message_positions = camel_mbox_parse_file (test_file_fd, 0, "From "); printf ("Found %d messages \n", message_positions->len); #if 0 for (i=0; ilen; i++) { //message_info = g_array_index(message_positions, CamelMboxParserMessageInfo, i); message_info = ((CamelMboxParserMessageInfo *)(message_positions->data)) + i; printf ("\n\n** Message %d : \n", i); printf ("\t From: %s\n", message_info->from) ; printf ("\t Summary: %s\n", message_info->body_summary) ; } #endif } #endif /* MBOX_PARSER_TEST */