From c6ff08354321779cfa5995ee768b6f17a32847c5 Mon Sep 17 00:00:00 2001 From: bertrand Date: Wed, 12 Jan 2000 02:09:50 +0000 Subject: Backup of the first clean and working mbox file parser. It both find the 2000-01-11 bertrand Backup of the first clean and working mbox file parser. It both find the message and pre-parse the message, that is, retrieve some key headers, and the first lines of the body. svn path=/trunk/; revision=1556 --- camel/providers/mbox/Makefile.am | 6 +- camel/providers/mbox/camel-mbox-folder.c | 7 +- camel/providers/mbox/camel-mbox-parser.c | 449 ++++++++++++++++++++++++++++--- camel/providers/mbox/camel-mbox-parser.h | 21 ++ 4 files changed, 442 insertions(+), 41 deletions(-) (limited to 'camel') diff --git a/camel/providers/mbox/Makefile.am b/camel/providers/mbox/Makefile.am index b614a63036..3943cd7a8c 100644 --- a/camel/providers/mbox/Makefile.am +++ b/camel/providers/mbox/Makefile.am @@ -13,11 +13,13 @@ INCLUDES = -I.. -I$(srcdir)/.. -I$(includedir) \ libcamelmbox_la_SOURCES = \ camel-mbox-folder.c \ - camel-mbox-store.c + camel-mbox-store.c \ + camel-mbox-parser.c libcamelmboxinclude_HEADERS = \ camel-mbox-folder.h \ - camel-mbox-store.h + camel-mbox-store.h \ + camel-mbox-parser.h libcamelmbox_la_LDFLAGS = -version-info 0:0:0 -rpath $(libdir) diff --git a/camel/providers/mbox/camel-mbox-folder.c b/camel/providers/mbox/camel-mbox-folder.c index 966218fcc6..674f56c193 100644 --- a/camel/providers/mbox/camel-mbox-folder.c +++ b/camel/providers/mbox/camel-mbox-folder.c @@ -195,7 +195,6 @@ _open (CamelFolder *folder, CamelFolderOpenMode mode, CamelException *ex) { CamelMboxFolder *mbox_folder = CAMEL_MBOX_FOLDER (folder); struct dirent *dir_entry; - DIR *dir_handle; if (folder->open_state == FOLDER_OPEN) { @@ -763,6 +762,7 @@ _list_subfolders (CamelFolder *folder, CamelException *ex) +#if 0 static CamelMimeMessage * _get_message_by_number (CamelFolder *folder, gint number, CamelException *ex) @@ -803,9 +803,14 @@ _get_message_by_number (CamelFolder *folder, gint number, CamelException *ex) #warning Set flags and all this stuff here } g_free (message_file_name); + } else CAMEL_LOG_FULL_DEBUG ("CanelMhFolder::get_message message number = %d, not found\n", number); return message; } + +#endif + + diff --git a/camel/providers/mbox/camel-mbox-parser.c b/camel/providers/mbox/camel-mbox-parser.c index a13032a391..e5c18e70d8 100644 --- a/camel/providers/mbox/camel-mbox-parser.c +++ b/camel/providers/mbox/camel-mbox-parser.c @@ -26,71 +26,444 @@ #include "camel-mbox-parser.h" #include "camel-log.h" #include "camel-exception.h" - #include #include +#include +#include +#include +#include + + + +#define MBOX_PARSER_BUF_SIZE 1000 + +#define MBOX_PARSER_FROM_KW "from:" +#define MBOX_PARSER_FROM_KW_SZ 5 + +#define MBOX_PARSER_DATE_KW "date:" +#define MBOX_PARSER_DATE_KW_SZ 5 + +#define MBOX_PARSER_X_EVOLUTION_KW "x-evolution:" +#define MBOX_PARSER_X_EVOLUTION_KW_SZ 12 + +/* the maximum lentgh of all the previous keywords */ +#define MBOX_PARSER_MAX_KW_SIZE 12 + + +#define MBOX_PARSER_SUMMARY_SIZE 100 -GList * -camel_mbox_find_message_positions (int fd, gint first_position, CamelException *ex) + + +typedef struct { + + int fd; /* file descriptor of the mbox file */ + guint real_position; /* real position in the file */ + + + gchar *message_delimiter; /* message delimiter string */ + guint message_delimiter_length; + + guint message_summary_size; /* how many characters from the begining of the + mail to put into the message summary */ + + GArray *preparsed_messages; /* array of MessagePreParsingInfo */ + CamelMboxParserMessageInfo current_message_info; /* used to store curent info */ + gboolean is_pending_message; /* is there some message information pending ? */ + + /* buffer info */ + gchar *buffer; /* temporary buffer */ + guint left_chunk_size; /* size of the left chunk in the temp buffer */ + guint last_position; /* last position that can be compared to a keyword */ + guint current_position; /* current position in the temp buffer */ + gboolean eof; /* did we read the entire file */ + + /* other */ + GString *tmp_string; /* temporary string to fill the headers in */ + + + +} CamelMboxPreParser; + + +/* clear a preparsing info structure */ +static void +clear_message_info (CamelMboxParserMessageInfo *preparsing_info) { -#define MBOX_PARSER_BUF_SIZE 1000 + preparsing_info->message_position = 0; + preparsing_info->from = NULL; + preparsing_info->date = NULL; + preparsing_info->subject = NULL; + preparsing_info->status = NULL; + preparsing_info->priority = NULL; + preparsing_info->references = NULL; +} - off_t seek_res; - GList *message_positions = NULL; - char buffer[MBOX_PARSER_BUF_SIZE]; - ssize_t buf_nb_read; - /* set the initial position */ - seek_res = lseek (fd, first_position, SEEK_SET); - if (seek_res == (off_t)-1) goto io_error; +static CamelMboxPreParser * +new_parser (int fd, + const gchar *message_delimiter) +{ + + CamelMboxPreParser *parser; - /* populate the buffer and initialize the search proc */ - buf_nb_read = read (fd, buffer, MBOX_PARSER_BUF_SIZE); + parser = g_new0 (CamelMboxPreParser, 1); - while (buf_nb_read>0) { - current_pos = 0; + parser->fd = fd; + parser->buffer = g_new (gchar, MBOX_PARSER_BUF_SIZE); + parser->current_position = 0; + parser->message_delimiter = g_strdup (message_delimiter); + parser->message_delimiter_length = strlen (message_delimiter); + parser->real_position = 0; + parser->preparsed_messages = g_array_new (FALSE, FALSE, sizeof (CamelMboxParserMessageInfo)); + parser->message_summary_size = MBOX_PARSER_SUMMARY_SIZE; + + parser->left_chunk_size = MAX (parser->message_delimiter_length, MBOX_PARSER_MAX_KW_SIZE); + parser->eof = FALSE; + + parser->tmp_string = g_string_sized_new (1000); + + return parser; +} + + + +/* ** handle exceptions here */ +/* read the first chunk of data in the buffer */ +static void +initialize_buffer (CamelMboxPreParser *parser, + guint first_position) +{ + gint seek_res; + gint buf_nb_read; + + g_assert (parser); + + /* set the search start position */ + seek_res = lseek (parser->fd, first_position, SEEK_SET); + //if (seek_res == (off_t)-1) goto io_error; + + + /* the first part of the buffer is filled with newlines, + but the next time a chunk of buffer is read, it will + be filled with the last bytes of the previous chunk. + This allows simple g_strcasecmp to test for the presence of + the keyword */ + memset (parser->buffer, '\n', parser->left_chunk_size); + do { + buf_nb_read = read (parser->fd, parser->buffer + parser->left_chunk_size, + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); + } while ((buf_nb_read == -1) && (errno == EINTR)); + /* ** check for an error here */ + + parser->last_position = buf_nb_read - parser->left_chunk_size; + if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size)) + parser->eof =TRUE; + + parser->current_position = 0; +} + + + + +/* read next data in the mbox file */ +static void +read_next_buffer_chunk (CamelMboxPreParser *parser) +{ + gint buf_nb_read; + + + g_assert (parser); + + /* read the next chunk of data in the folder file : */ + /* - first, copy the last bytes from the previous + chunk at the begining of the new one. */ + memcpy (parser->buffer, + parser->buffer + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size, + parser->left_chunk_size); + + /* - then read the next chunk on disk */ + do { + buf_nb_read = read (parser->fd, + parser->buffer + parser->left_chunk_size, + MBOX_PARSER_BUF_SIZE - parser->left_chunk_size); + } while ((buf_nb_read == -1) && (errno == EINTR)); + /* ** check for an error here */ + + + parser->last_position = buf_nb_read - parser->left_chunk_size; + if (buf_nb_read < (MBOX_PARSER_BUF_SIZE - parser->left_chunk_size)) + parser->eof =TRUE; + + parser->current_position = 0; + +} + + + +/* read next char in the buffer */ +static void +goto_next_char (CamelMboxPreParser *parser) +{ + if (parser->current_position < parser->last_position) + parser->current_position++; + else + read_next_buffer_chunk (parser); + + parser->real_position++; +} + + + + +static void +new_message_detected (CamelMboxPreParser *parser) +{ + /* if we were filling a message information + save it in the message information array */ + + if (parser->is_pending_message) { + g_array_append_vals (parser->preparsed_messages, (gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); +} + + clear_message_info ( &(parser->current_message_info)); + + (parser->current_message_info).message_position = parser->real_position; + + parser->is_pending_message = TRUE; +} + + + + +/* read a header value and put it in the string pointer + to by header_content */ +static void +read_header (CamelMboxPreParser *parser, gchar **header_content) +{ + gboolean space = FALSE; + gboolean newline = FALSE; + gboolean header_end = FALSE; + gchar *buffer; + gchar c; + + + g_assert (parser); + + /* reset the header buffer string */ + parser->tmp_string = g_string_truncate (parser->tmp_string, 0); + + buffer = parser->buffer; + + while (! (parser->eof || header_end) ) { + /* read the current character */ + c = buffer[parser->current_position]; + if (space) { + if (c == ' ' && c == '\t') + goto next_char; + else + space = FALSE; + } + + if (newline) { + if (c == ' ' && c == '\t') { + + space = TRUE; + newline = FALSE; + goto next_char; + } else { + + header_end = TRUE; + continue; + } + } + + if (c == '\n') { + newline = TRUE; + goto next_char; + } + + /* feed the header content */ + parser->tmp_string = g_string_append_c (parser->tmp_string, c); - /* read the next chunk of data in the folder file */ - buf_nb_read = read (fd, buffer, MBOX_PARSER_BUF_SIZE); + next_char: /* read next char in the buffer */ + goto_next_char (parser); } + + /* copy the buffer in the preparsing information structure */ + *header_content = g_strndup (parser->tmp_string->str, parser->tmp_string->len); +} + + +/* read the begining of the message and put it in the message + summary field + +*/ +static void +read_message_begining (CamelMboxPreParser *parser, gchar **message_summary) +{ + guint nb_read = 0; + gchar *buffer; + g_assert (parser); - + /* reset the header buffer string */ + parser->tmp_string = g_string_truncate (parser->tmp_string, 0); + + buffer = parser->buffer; + /* the message should not be filled character by + character but there is no g_string_n_append + function, so for the moment, this is a lazy + implementation */ + while (! (parser->eof) && nb_readmessage_summary_size) { + + parser->tmp_string = g_string_append_c (parser->tmp_string, + buffer[parser->current_position]); + nb_read++; + goto_next_char (parser); + } + + *message_summary = g_strndup (parser->tmp_string->str, parser->tmp_string->len); +} + + + + + + + +GArray * +camel_mbox_parse_file (int fd, guint start_position, const gchar *message_delimiter) +{ + CamelMboxPreParser *parser; + gboolean is_parsing_a_message = FALSE; + gchar c; - /* io exception handling */ - io_error : - switch errno { - case EACCES : + + /* create the parser */ + parser = new_parser (fd, message_delimiter); + + /* initialize the temporary char buffer */ + initialize_buffer (parser, start_position); + + while (!parser->eof) { + + /* read the current character */ + c = parser->buffer[parser->current_position]; + goto_next_char (parser); - camel_exception_setv (ex, - CAMEL_EXCEPTION_FOLDER_INSUFFICIENT_PERMISSION, - "Unable to list the directory. Full Error text is : %s ", - strerror (errno)); - break; + if (c == '\n') { - case ENOENT : - case ENOTDIR : - camel_exception_setv (ex, - CAMEL_EXCEPTION_FOLDER_INVALID_PATH, - "Invalid mbox folder path. Full Error text is : %s ", - strerror (errno)); - break; + /* is the next part a message delimiter ? */ + if (g_strncasecmp (parser->buffer + parser->current_position, + parser->message_delimiter, + parser->message_delimiter_length) == 0) { + + is_parsing_a_message = TRUE; + new_message_detected (parser); + goto_next_char (parser); + continue; + } - default : - camel_exception_set (ex, - CAMEL_EXCEPTION_SYSTEM, - "Unable to delete the mbox folder."); + if (is_parsing_a_message) { + + /* is the next part a "from" header ? */ + if (g_strncasecmp (parser->buffer + parser->current_position, + MBOX_PARSER_FROM_KW, + MBOX_PARSER_FROM_KW_SZ) == 0) { + + parser->current_position += MBOX_PARSER_FROM_KW_SZ; + read_header (parser, (gchar **) ((gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + + G_STRUCT_OFFSET (CamelMboxParserMessageInfo, from))); + continue; + } + + /* is it an empty line ? */ + if (parser->buffer[parser->current_position] == '\n') { + + goto_next_char (parser); + read_message_begining (parser, (gchar **) ((gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info) + + G_STRUCT_OFFSET (CamelMboxParserMessageInfo, body_summary))); + is_parsing_a_message = FALSE; + } + + } } + + } + + /* if there is a pending message information put it in the array */ + if (parser->is_pending_message) { + g_array_append_vals (parser->preparsed_messages, (gchar *)parser + + G_STRUCT_OFFSET (CamelMboxPreParser, current_message_info), 1); + } + + /* free the parser */ + /* ** FIXME : FREE THE PARSER */ + + return parser->preparsed_messages; + +} + + + + + + + + + +#ifdef MBOX_PARSER_TEST +/* to build the test : + gcc -o test_parser -DMBOX_PARSER_TEST -I ../.. -I ../../.. \ + -I /usr/lib/glib/include camel-mbox-parser.c \ + -lglib ../../.libs/libcamel.a + + + */ + +int +main (int argc, char **argv) +{ + int test_file_fd; + int i; + GArray *message_positions; + CamelMboxParserMessageInfo *message_info; + + + test_file_fd = open (argv[1], O_RDONLY); + message_positions = camel_mbox_parse_file (test_file_fd, + 0, + "From "); + + printf ("Found %d messages \n", message_positions->len); + +#if 0 + for (i=0; ilen; i++) { + //message_info = g_array_index(message_positions, CamelMboxParserMessageInfo, i); + message_info = ((CamelMboxParserMessageInfo *)(message_positions->data)) + i; + printf ("\n\n** Message %d : \n", i); + printf ("\t From: %s\n", message_info->from) ; + printf ("\t Summary: %s\n", message_info->body_summary) ; + } +#endif } + + + + +#endif /* MBOX_PARSER_TEST */ diff --git a/camel/providers/mbox/camel-mbox-parser.h b/camel/providers/mbox/camel-mbox-parser.h index 19b7a42dc8..994e5d85e3 100644 --- a/camel/providers/mbox/camel-mbox-parser.h +++ b/camel/providers/mbox/camel-mbox-parser.h @@ -21,5 +21,26 @@ * USA */ +#include +#include "camel-log.h" +#include "camel-exception.h" +typedef struct { + + guint message_position; + gchar *from; + gchar *date; + gchar *subject; + gchar *status; + gchar *priority; + gchar *references; + gchar *body_summary; + +} CamelMboxParserMessageInfo; + + +GArray * camel_mbox_find_message_positions (int fd, + const gchar *message_delimiter, + gint first_position, + CamelException *ex); -- cgit v1.2.3