/* * Copyright 2000 HelixCode (http://www.helixcode.com). * * Author : * Michael Zucchi * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA */ #include #include #include #include #include #include #include "camel/camel-mime-message.h" #include "camel/camel-mime-part.h" #include "camel/camel-stream.h" #include "camel/camel-stream-fs.h" #include "camel/camel.h" #include "camel-mbox-folder.h" #define HAVE_IBEX #ifdef HAVE_IBEX #include "ibex.h" #endif #define p(x) /* parse debug */ #define r(x) /* run debug */ #define d(x) /* general debug */ /* This is not yet complete. The following s-exp's are supported: list = (and list*) perform an intersection of a number of lists, and return that. bool = (and bool*) perform a boolean AND of boolean values. list = (or list*) perform a union of a number of lists, returning the new list. bool = (or bool*) perform a boolean OR of boolean values. Comparison operators: bool = (lt int int) bool = (gt int int) bool = (eq int int) bool = (lt string string) bool = (gt string string) bool = (eq string string) Perform a comparision of 2 integers, or 2 string values. Matching operators: list = (contains string) Returns a list of all messages containing the string in their body. list = (match-all bool-expr) Returns a list of all messages for which the bool expression is true. The bool-expr is evaluated for each message in turn. It is more efficient not to perform body-content comparisons inside a match-all operator. int = (date-sent) Returns a time_t of the date-sent of the message. bool = (header-contains string1 string2) Returns true if the current message (inside a match-all operator) has a header 'string1', which contains 'string2' */ static GScannerConfig scanner_config = { ( " \t\r\n" ) /* cset_skip_characters */, ( G_CSET_a_2_z "_" G_CSET_A_2_Z ) /* cset_identifier_first */, ( G_CSET_a_2_z "_0123456789-" G_CSET_A_2_Z G_CSET_LATINS G_CSET_LATINC ) /* cset_identifier_nth */, ( "#\n" ) /* cpair_comment_single */, FALSE /* case_sensitive */, TRUE /* skip_comment_multi */, TRUE /* skip_comment_single */, TRUE /* scan_comment_multi */, TRUE /* scan_identifier */, FALSE /* scan_identifier_1char */, FALSE /* scan_identifier_NULL */, TRUE /* scan_symbols */, FALSE /* scan_binary */, TRUE /* scan_octal */, TRUE /* scan_float */, TRUE /* scan_hex */, FALSE /* scan_hex_dollar */, TRUE /* scan_string_sq */, TRUE /* scan_string_dq */, TRUE /* numbers_2_int */, FALSE /* int_2_float */, FALSE /* identifier_2_string */, TRUE /* char_2_token */, FALSE /* symbol_2_token */, FALSE /* scope_0_fallback */, }; enum _searchtermtype_t { SEARCH_AND, SEARCH_OR, SEARCH_LT, SEARCH_GT, SEARCH_EQ, SEARCH_CONTAINS, SEARCH_DATESENT, SEARCH_STRING, SEARCH_INT, SEARCH_FUNC, }; struct _searchterm { enum _searchtermtype_t type; union { char *string; int number; struct { struct _searchterm_symbol *sym; struct _searchterm **terms; int termcount; } func; } value; }; struct _searchcontext { int whatever; CamelFolder *folder; #ifdef HAVE_IBEX ibex *index; /* index of content for this folder */ #endif CamelFolderSummary *summary; const GArray *message_info; CamelMessageInfo *message_current; /* when performing a (match operation */ }; enum _searchresulttype_t { RESULT_ARRAY_PTR=0, RESULT_INT, RESULT_STRING, RESULT_BOOL, RESULT_UNDEFINED }; struct _searchresult { enum _searchresulttype_t type; union { GPtrArray *ptrarray; int number; char *string; int bool; } value; }; /* function callbacks */ static struct _searchresult *search_contains(struct _searchcontext *ctx, struct _searchterm *t); static struct _searchresult *search_matches(struct _searchcontext *ctx, struct _searchterm *t); static struct _searchresult *search_date_sent(struct _searchcontext *ctx, struct _searchterm *t); static struct _searchresult *header_contains(struct _searchcontext *ctx, struct _searchterm *t); struct _searchterm_symbol { char *name; int type; int argtype; struct _searchresult * (*func)(struct _searchcontext *ctx, struct _searchterm *t); } symbols[] = { { "and", SEARCH_AND, 0, NULL }, { "or", SEARCH_OR, 0, NULL }, { "lt", SEARCH_LT, 1, NULL }, { "gt", SEARCH_GT, 1, NULL }, { "eq", SEARCH_EQ, 1, NULL }, { "contains", SEARCH_FUNC, 1, search_contains }, { "match-all", SEARCH_FUNC, 1, search_matches }, { "date-sent", SEARCH_FUNC, 1, search_date_sent }, { "header-contains", SEARCH_FUNC, 1, header_contains }, }; static struct _searchterm * parse_list(GScanner *gs, int gotbrace); static struct _searchterm * parse_value(GScanner *gs); static struct _searchresult *term_eval(struct _searchcontext *ctx, struct _searchterm *t); static void parse_dump_term(struct _searchterm *t, int depth); /* can you tell, i dont like glib? */ struct _glib_sux_donkeys { int count; GPtrArray *uids; }; /* ok, store any values that are in all sets */ static void g_lib_sux_htand(char *key, int value, struct _glib_sux_donkeys *fuckup) { if (value == fuckup->count) { g_ptr_array_add(fuckup->uids, key); } } /* or, store all unique values */ static void g_lib_sux_htor(char *key, int value, struct _glib_sux_donkeys *fuckup) { g_ptr_array_add(fuckup->uids, key); } static struct _searchresult * result_new(int type) { struct _searchresult *r = g_malloc0(sizeof(*r)); r->type = type; return r; } static void result_free(struct _searchresult *t) { switch(t->type) { case RESULT_ARRAY_PTR: g_ptr_array_free(t->value.ptrarray, TRUE); break; case RESULT_BOOL: case RESULT_INT: break; case RESULT_STRING: g_free(t->value.string); break; case RESULT_UNDEFINED: break; } g_free(t); } static struct _searchresult *search_contains(struct _searchcontext *ctx, struct _searchterm *t) { struct _searchresult *r, *r1; r = result_new(RESULT_UNDEFINED); if (t->value.func.termcount>0) { if (t->value.func.termcount!=1) { printf("warning, only looking for first string in contains clause\n"); } r1 = term_eval(ctx, t->value.func.terms[0]); if (r1->type == RESULT_STRING) { if (ctx->message_current) { int truth = FALSE; #ifdef HAVE_IBEX int i; GPtrArray *array; if (ctx->index) { array = ibex_find(ctx->index, r1->value.string); for (i=0;ilen;i++) { if (!strcmp(g_ptr_array_index(array, i), ctx->message_current->uid)) { truth = TRUE; break; } } g_ptr_array_free(array, TRUE); } #endif r->type = RESULT_BOOL; r->value.bool = truth; } else { r->type = RESULT_ARRAY_PTR; #ifdef HAVE_IBEX if (ctx->index) { /* blah, this should probably copy the index strings? */ r->value.ptrarray = ibex_find(ctx->index, r1->value.string); } else { r->value.ptrarray = g_ptr_array_new(); } #endif } } else { printf("you can't search for a contents of a non-string, fool\n"); } result_free(r1); } return r; } /* run a sub-tree of commands which match on header fields etc */ static struct _searchresult *search_matches(struct _searchcontext *ctx, struct _searchterm *t) { int i; struct _searchresult *r, *r1; if (t->value.func.termcount == 1) { r = result_new(RESULT_ARRAY_PTR); r->value.ptrarray = g_ptr_array_new(); for (i=0;imessage_info->len;i++) { ctx->message_current = &g_array_index(ctx->message_info, CamelMessageInfo, i); r1 = term_eval(ctx, t->value.func.terms[0]); if (r1->type == RESULT_BOOL) { if (r1->value.bool) { r(printf("adding message %s\n", ctx->message_current->uid)); g_ptr_array_add(r->value.ptrarray, ctx->message_current->uid); } } else { printf("invalid syntax, matches require a single bool result\n"); } result_free(r1); } ctx->message_current = NULL; } else { r = result_new(RESULT_UNDEFINED); printf("invalid syntax, matches only allows a single bool arg\n"); } return r; } /* these variable-getting things could be put into 1 function */ static struct _searchresult *search_date_sent(struct _searchcontext *ctx, struct _searchterm *t) { struct _searchresult *r; if (ctx->message_current) { r = result_new(RESULT_INT); r->value.number = time(0); /* r->value.number = ctx->current_message->date_sent;*/ } else { r = result_new(RESULT_UNDEFINED); } return r; } /* header contains - can only be used inside a match-all construct */ /* all headers should be inside a lookup table, so this can search all header types */ static struct _searchresult *header_contains(struct _searchcontext *ctx, struct _searchterm *t) { struct _searchresult *r; r(printf("executing header-contains\n")); /* are we inside a match-all? */ if (ctx->message_current && t->value.func.termcount == 2) { char *header, *substring; int truth = FALSE; struct _searchresult *r1, *r2; r1 = term_eval(ctx, t->value.func.terms[0]); r2 = term_eval(ctx, t->value.func.terms[1]); if (r1->type == RESULT_STRING && r2->type == RESULT_STRING) { header = r1->value.string; substring = r2->value.string; if (!strcasecmp(header, "subject")) { r(printf("comparing subject: %s\n", ctx->message_current->subject)); if (ctx->message_current->subject) truth = (strstr(ctx->message_current->subject, substring)) != NULL; else printf("Warning: no subject line in message?\n"); } r(printf("header-contains %s %s = %s\n", header, substring, truth?"TRUE":"FALSE")); } result_free(r1); result_free(r2); r = result_new(RESULT_BOOL); r->value.number = truth; } else { r = result_new(RESULT_UNDEFINED); } return r; } static struct _searchresult * term_eval(struct _searchcontext *ctx, struct _searchterm *t) { struct _searchresult *r, *r1, *r2; int i; r(printf("eval term :\n")); r(parse_dump_term(t, 0)); r = g_malloc0(sizeof(*r)); r->type = RESULT_UNDEFINED; switch (t->type) { case SEARCH_AND: { GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal); struct _glib_sux_donkeys lambdafoo; int type=-1; int bool = TRUE; r(printf("( and\n")); for (i=0;bool && ivalue.func.termcount;i++) { r1 = term_eval(ctx, t->value.func.terms[i]); if (type == -1) type = r1->type; if (type != r1->type) { printf("invalid types in and operation, all types must be the same\n"); } else if ( r1->type == RESULT_ARRAY_PTR ) { char **a1; int l1, j; a1 = (char **)r1->value.ptrarray->pdata; l1 = r1->value.ptrarray->len; for (j=0;itype == RESULT_BOOL ) { bool &= r1->value.bool; } result_free(r1); } if (type == RESULT_ARRAY_PTR) { lambdafoo.count = t->value.func.termcount; lambdafoo.uids = g_ptr_array_new(); g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htand, &lambdafoo); r->type = RESULT_ARRAY_PTR; r->value.ptrarray = lambdafoo.uids; } else if (type == RESULT_BOOL) { r->type = RESULT_BOOL; r->value.bool = bool; } g_hash_table_destroy(ht); break; } case SEARCH_OR: { GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal); struct _glib_sux_donkeys lambdafoo; int type = -1; int bool = FALSE; r(printf("(or \n")); for (i=0;!bool && ivalue.func.termcount;i++) { r1 = term_eval(ctx, t->value.func.terms[i]); if (type == -1) type = r1->type; if (r1->type != type) { printf("wrong types in or operation\n"); } else if (r1->type == RESULT_ARRAY_PTR) { char **a1; int l1, j; a1 = (char **)r1->value.ptrarray->pdata; l1 = r1->value.ptrarray->len; for (j=0;itype == RESULT_BOOL) { bool |= r1->value.bool; } result_free(r1); } if (type == RESULT_ARRAY_PTR) { lambdafoo.count = t->value.func.termcount; lambdafoo.uids = g_ptr_array_new(); g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo); r->type = RESULT_ARRAY_PTR; r->value.ptrarray = lambdafoo.uids; } else if (type == RESULT_BOOL) { r->type = RESULT_BOOL; r->value.bool = bool; } g_hash_table_destroy(ht); break; } case SEARCH_LT: r(printf("(lt \n")); if (t->value.func.termcount == 2) { r1 = term_eval(ctx, t->value.func.terms[0]); r2 = term_eval(ctx, t->value.func.terms[1]); if (r1->type != r2->type) { printf("error, invalid types in compare\n"); } else if (r1->type == RESULT_INT) { r->type = RESULT_BOOL; r->value.bool = r1->value.number < r2->value.number; } else if (r1->type == RESULT_STRING) { r->type = RESULT_BOOL; r->value.bool = strcmp(r1->value.string, r2->value.string) < 0; } } break; case SEARCH_GT: r(printf("(gt \n")); if (t->value.func.termcount == 2) { r1 = term_eval(ctx, t->value.func.terms[0]); r2 = term_eval(ctx, t->value.func.terms[1]); if (r1->type != r2->type) { printf("error, invalid types in compare\n"); } else if (r1->type == RESULT_INT) { r->type = RESULT_BOOL; r->value.bool = r1->value.number > r2->value.number; } else if (r1->type == RESULT_STRING) { r->type = RESULT_BOOL; r->value.bool = strcmp(r1->value.string, r2->value.string) > 0; } } break; case SEARCH_STRING: r(printf(" (string \"%s\")\n", t->value.string)); r->type = RESULT_STRING; /* erk, this shoul;dn't need to strdup this ... */ r->value.string = g_strdup(t->value.string); break; case SEARCH_INT: r(printf(" (int %d)\n", t->value.number)); r->type = RESULT_INT; r->value.number = t->value.number; break; case SEARCH_FUNC: g_free(r); /* <---- FIXME: ICK !! */ r(printf("function '%s'\n", t->value.func.sym->name)); return t->value.func.sym->func(ctx, t); default: printf("Warning: Unknown type encountered in parse tree: %d\n", t->type); r->type = RESULT_UNDEFINED; } return r; } static void parse_dump_term(struct _searchterm *t, int depth) { int dumpvals = FALSE; int i; if (t==NULL) { printf("null term??\n"); return; } for (i=0;itype) { case SEARCH_AND: printf("(and \n"); dumpvals = 1; break; case SEARCH_OR: printf("(or \n"); dumpvals = 1; break; case SEARCH_LT: printf("(lt \n"); dumpvals = 1; break; case SEARCH_GT: printf("(gt \n"); dumpvals = 1; break; case SEARCH_STRING: printf(" \"%s\"", t->value.string); break; case SEARCH_INT: printf(" %d", t->value.number); break; case SEARCH_FUNC: printf(" (function %s", t->value.func.sym->name); dumpvals = 1; break; default: printf("unknown type: %d\n", t->type); } if (dumpvals) { /*printf(" [%d] ", t->value.func.termcount);*/ for (i=0;ivalue.func.termcount;i++) { parse_dump_term(t->value.func.terms[i], depth+1); } for (i=0;itype = type; return s; } static void parse_term_free(struct _searchterm *t) { int i; if (t==NULL) { return; } switch (t->type) { case SEARCH_AND: case SEARCH_OR: case SEARCH_LT: case SEARCH_GT: case SEARCH_FUNC: for (i=0;ivalue.func.termcount;i++) { parse_term_free(t->value.func.terms[i]); } g_free(t->value.func.terms); break; case SEARCH_STRING: g_free(t->value.string); break; case SEARCH_INT: break; default: printf("parse_term_free: unknown type: %d\n", t->type); } g_free(t); } static struct _searchterm ** parse_lists(GScanner *gs, int *len) { int token; struct _searchterm **terms; int i=0; p(printf("parsing lists\n")); terms = g_malloc0(20*sizeof(*terms)); while ( (token = g_scanner_peek_next_token(gs)) != G_TOKEN_EOF && token != ')') { terms[i]=parse_list(gs, FALSE); i++; } if (len) *len = i; p(printf("found %d subterms\n", i)); p(printf("done parsing lists, token= %d %c\n", token, token)); return terms; } static struct _searchterm ** parse_values(GScanner *gs, int *len) { int token; struct _searchterm **terms; int i=0; p(printf("parsing values\n")); terms = g_malloc0(20*sizeof(*terms)); while ( (token = g_scanner_peek_next_token(gs)) != G_TOKEN_EOF && token != ')') { terms[i]=parse_value(gs); i++; } p(printf("found %d subterms\n", i)); *len = i; p(printf("dont parsing values\n")); return terms; } static struct _searchterm * parse_value(GScanner *gs) { int token; struct _searchterm *t = NULL; p(printf("parsing value\n")); token = g_scanner_get_next_token(gs); switch(token) { case G_TOKEN_LEFT_PAREN: p(printf("got brace, its a list!\n")); return parse_list(gs, TRUE); case G_TOKEN_STRING: p(printf("got string\n")); t = parse_new_term(SEARCH_STRING); t->value.string = g_strdup(g_scanner_cur_value(gs).v_string); break; case G_TOKEN_INT: t = parse_new_term(SEARCH_INT); t->value.number = g_scanner_cur_value(gs).v_int; p(printf("got int\n")); break; default: printf("Innvalid token trying to parse a list of values\n"); } p(printf("done parsing value\n")); return t; } /* FIXME: this needs some robustification */ static struct _searchterm * parse_list(GScanner *gs, int gotbrace) { int token; struct _searchterm *t = NULL; p(printf("parsing list\n")); if (gotbrace) token = '('; else token = g_scanner_get_next_token(gs); if (token =='(') { token = g_scanner_get_next_token(gs); if (token == G_TOKEN_SYMBOL) { struct _searchterm_symbol *s; s = g_scanner_cur_value(gs).v_symbol; p(printf("got funciton: %s\n", s->name)); t = parse_new_term(s->type); t->value.func.sym = s; p(printf("created new list %p\n", t)); switch(s->argtype) { case 0: /* it MUST be a list of lists */ t->value.func.terms = parse_lists(gs, &t->value.func.termcount); break; case 1: t->value.func.terms = parse_values(gs, &t->value.func.termcount); break; default: printf("Error, internal error parsing symbols\n"); } } else { printf("unknown sequence encountered, type = %d\n", token); } token = g_scanner_get_next_token(gs); if (token != ')') { printf("Error, expected ')' not found\n"); } } else { printf("Error, list term without opening (\n"); } p(printf("returning list %p\n", t)); return t; } GList * camel_mbox_folder_search_by_expression(CamelFolder *folder, char *expression, CamelException *ex) { GScanner *gs; int i; struct _searchterm *t; struct _searchcontext *ctx; struct _searchresult *r; GList *matches = NULL; gs = g_scanner_new(&scanner_config); for(i=0;ifolder = folder; ctx->summary = camel_folder_get_summary(folder, ex); ctx->message_info = camel_folder_summary_get_message_info_list(ctx->summary); #ifdef HAVE_IBEX ctx->index = ibex_open(CAMEL_MBOX_FOLDER(folder)->index_file_path, FALSE); if (!ctx->index) { perror("Cannot open index file, body searches will be ignored\n"); } #endif r = term_eval(ctx, t); /* now create a folder summary to return?? */ if (r && r->type == RESULT_ARRAY_PTR) { d(printf("got result ...\n")); for (i=0;ivalue.ptrarray->len;i++) { d(printf("adding match: %s\n", (char *)g_ptr_array_index(r->value.ptrarray, i))); matches = g_list_prepend(matches, g_strdup(g_ptr_array_index(r->value.ptrarray, i))); } result_free(r); } if (ctx->index) ibex_close(ctx->index); gtk_object_unref((GtkObject *)ctx->summary); g_free(ctx); parse_term_free(t); } else { printf("Warning, Could not parse expression!\n %s\n", expression); } g_scanner_destroy(gs); return matches; }