From 2723c5fba64556af7aadf82916a765cbecb0bda5 Mon Sep 17 00:00:00 2001
From: Jeffrey Stedfast <fejj@ximian.com>
Date: Wed, 4 Apr 2001 02:00:25 +0000
Subject: Added tests/ back in

2001-04-03  Jeffrey Stedfast  <fejj@ximian.com>

	* Makefile.am: Added tests/ back in

	* camel-mime-utils.c (header_decode_text): Ignore whitespace
	between encoded words (there were a few cases where it didn't
	before).
	(header_encode_string): Preserve whitespace between words that are
	to be encoded by encoding them too.
	(header_encode_phrase): Same here but with phrases.

svn path=/trunk/; revision=9158
---
 camel/camel-mime-utils.c | 337 +++++++++++++++++++++++++++++------------------
 1 file changed, 207 insertions(+), 130 deletions(-)

(limited to 'camel/camel-mime-utils.c')

diff --git a/camel/camel-mime-utils.c b/camel/camel-mime-utils.c
index 71600e44bf..ed6589819f 100644
--- a/camel/camel-mime-utils.c
+++ b/camel/camel-mime-utils.c
@@ -1022,65 +1022,69 @@ static char *
 header_decode_text (const char *in, int inlen)
 {
 	GString *out;
-	char *inptr, *inend, *start;
+	char *inptr, *inend, *start, *word_start;
 	char *decoded;
-	unsigned char lastc = 0;
-	int wasdword = FALSE;
+	gboolean wasdword = FALSE;
+	gboolean wasspace = FALSE;
 	
 	out = g_string_new ("");
 	start = inptr = (char *) in;
 	inend = inptr + inlen;
 	
+	word_start = NULL;
 	while (inptr && inptr < inend) {
 		unsigned char c = *inptr++;
 		
-		if (is_lwsp (c)) {
+		if (is_lwsp (c) && !wasspace) {
 			char *word, *dword;
-			guint len;
 			
-			len = inptr - start - 1;
-			word = start;
+			if (word_start)
+				word = word_start;
+			else
+				word = start;
 			
-			dword = rfc2047_decode_word (word, len);
+			dword = rfc2047_decode_word (word, inptr - word - 1);
 			
 			if (dword) {
-				if (!wasdword && lastc)
-					g_string_append_c (out, lastc);
-					
+				if (!wasdword && word_start)
+					g_string_append_len (out, start, word_start - start);
+				
 				g_string_append (out, dword);
 				g_free (dword);
-				lastc = c;
 				wasdword = TRUE;
 			} else {
-				if (lastc)
-					g_string_append_c (out, lastc);
-				out = append_latin1 (out, word, len);
-				lastc = c;
+				out = append_latin1 (out, start, inptr - start - 1);
 				wasdword = FALSE;
 			}
 			
-			start = inptr;
+			start = inptr - 1;
+			word_start = NULL;
+			wasspace = TRUE;
+		} else if (!is_lwsp (c)) {
+			wasspace = FALSE;
+			if (!word_start)
+				word_start = inptr - 1;
 		}
 	}
 	
 	if (inptr - start) {
 		char *word, *dword;
-		guint len;
 		
-		len = inptr - start;
-		word = start;
+		if (word_start)
+			word = word_start;
+		else
+			word = start;
 		
-		dword = rfc2047_decode_word (word, len);
+		dword = rfc2047_decode_word (word, inptr - word);
 		
 		if (dword) {
-			if (!wasdword && lastc)
-				g_string_append_c (out, lastc);
+			if (!wasdword && word_start)
+				g_string_append_len (out, start, word_start - start);
+			
 			g_string_append (out, dword);
 			g_free (dword);
 		} else {
-			if (lastc)
-				g_string_append_c (out, lastc);
-			out = append_latin1 (out, word, len);
+			out = append_latin1 (out, start, inptr - start);
 		}
 	}
 	
@@ -1243,82 +1247,125 @@ rfc2047_encode_word(GString *outstring, const char *in, int len, const char *typ
 
 /* TODO: Should this worry about quotes?? */
 char *
-header_encode_string(const unsigned char *in)
+header_encode_string (const unsigned char *in)
 {
-	GString *out;
-	const unsigned char *inptr = in, *start;
+	const unsigned char *inptr = in, *start, *word;
+	gboolean last_was_encoded = FALSE;
+	gboolean last_was_space = FALSE;
 	int encoding;
+	GString *out;
 	char *outstr;
-
+	
 	if (in == NULL)
 		return NULL;
-
+	
 	/* do a quick us-ascii check (the common case?) */
 	while (*inptr) {
 		if (*inptr > 127)
 			break;
 		inptr++;
 	}
-	if (*inptr == 0)
-		return g_strdup(in);
-
+	if (*inptr == '\0')
+		return g_strdup (in);
+	
 	/* This gets each word out of the input, and checks to see what charset
 	   can be used to encode it. */
 	/* TODO: Work out when to merge subsequent words, or across word-parts */
-	out = g_string_new("");
+	out = g_string_new ("");
 	inptr = in;
 	encoding = 0;
+	word = NULL;
 	start = inptr;
 	while (inptr && *inptr) {
 		unicode_char_t c;
 		const char *newinptr;
-		newinptr = unicode_get_utf8(inptr, &c);
+		
+		newinptr = unicode_get_utf8 (inptr, &c);
 		if (newinptr == NULL) {
-			w(g_warning("Invalid UTF-8 sequence encountered (pos %d, char '%c'): %s", (inptr-in), inptr[0], in));
+			w(g_warning ("Invalid UTF-8 sequence encountered (pos %d, char '%c'): %s",
+				     (inptr-in), inptr[0], in));
 			inptr++;
 			continue;
 		}
-		inptr = newinptr;
-		if (unicode_isspace(c)) {
+		
+		if (unicode_isspace (c) && !last_was_space) {
 			/* we've reached the end of a 'word' */
+			if (word && !(last_was_encoded && encoding)) {
+				g_string_append_len (out, start, word - start);
+				start = word;
+			}
+			
 			switch (encoding) {
 			case 0:
-				out = g_string_append_len(out, start, inptr-start);
+				out = g_string_append_len (out, word, inptr - start);
+				last_was_encoded = FALSE;
 				break;
 			case 1:
-				rfc2047_encode_word(out, start, inptr-start-1, "ISO-8859-1", IS_ESAFE);
-				out = g_string_append_c (out, c);
+				if (last_was_encoded)
+					g_string_append_c (out, ' ');
+				
+				rfc2047_encode_word (out, start, inptr - start, "ISO-8859-1", IS_ESAFE);
+				last_was_encoded = TRUE;
 				break;
 			case 2:
-				rfc2047_encode_word(out, start, inptr-start-1,
-						    camel_charset_best(start, inptr-start-1), IS_ESAFE);
-				out = g_string_append_c(out, c);
+				if (last_was_encoded)
+					g_string_append_c (out, ' ');
+				
+				rfc2047_encode_word (out, start, inptr - start,
+						     camel_charset_best (start, inptr - start), IS_ESAFE);
+				last_was_encoded = TRUE;
 				break;
 			}
+			
+			last_was_space = TRUE;
 			start = inptr;
+			word = NULL;
 			encoding = 0;
 		} else if (c > 127 && c < 256) {
-			encoding = MAX(encoding, 1);
+			encoding = MAX (encoding, 1);
+			last_was_space = FALSE;
 		} else if (c >= 256) {
-			encoding = MAX(encoding, 2);
+			encoding = MAX (encoding, 2);
+			last_was_space = FALSE;
+		} else if (!unicode_isspace (c)) {
+			last_was_space = FALSE;
 		}
+		
+		if (!unicode_isspace (c) && !word)
+			word = inptr;
+		
+		inptr = newinptr;
 	}
-	if (inptr-start) {
+	
+	if (inptr - start) {
+		if (word && !(last_was_encoded && encoding)) {
+			g_string_append_len (out, start, word - start);
+			start = word;
+		}
+		
 		switch (encoding) {
 		case 0:
-			out = g_string_append_len(out, start, inptr-start);
+			out = g_string_append_len (out, start, inptr - start);
 			break;
 		case 1:
-			rfc2047_encode_word(out, start, inptr-start, "ISO-8859-1", IS_ESAFE);
+			if (last_was_encoded)
+				g_string_append_c (out, ' ');
+			
+			rfc2047_encode_word (out, start, inptr - start, "ISO-8859-1", IS_ESAFE);
 			break;
 		case 2:
-			rfc2047_encode_word(out, start, inptr-start,
-					    camel_charset_best(start, inptr-start-1), IS_ESAFE);
+			if (last_was_encoded)
+				g_string_append_c (out, ' ');
+			
+			rfc2047_encode_word (out, start, inptr - start,
+					     camel_charset_best (start, inptr - start - 1), IS_ESAFE);
 			break;
 		}
 	}
+	
 	outstr = out->str;
-	g_string_free(out, FALSE);
+	g_string_free (out, FALSE);
+	
 	return outstr;
 }
 
@@ -1359,110 +1406,102 @@ struct _phrase_word {
    merge common word types
    clean up
 */
-/* encodes a phrase sequence (different quoting/encoding rules to strings) */
-char *
-header_encode_phrase(const unsigned char *in)
+
+static GList *
+header_encode_phrase_get_words (const unsigned char *in)
 {
-	GString *out;
 	const unsigned char *inptr = in, *start, *last;
-	int encoding;
-	char *outstr;
-	struct _phrase_word *word, *next;
+	struct _phrase_word *word;
 	enum _phrase_word_t type;
-	GList *words = NULL, *wordl, *nextl;
-	int count;
-
-	if (in == NULL)
-		return NULL;
-
-	out = g_string_new("");
-
-#if 0
-	{
-		int i;
-
-		printf("encoding phrase: %s\n", in);
-		for (i=0;in[i];i++) {
-			printf(" %02x", in[i]);
-			if (((i) & 15) == 15)
-				printf("\n");
-		}
-		printf("\n");
-	}
-#endif
-
+	int encoding, count = 0;
+	GList *words = NULL;
+	
 	/* break the input into words */
 	type = WORD_ATOM;
-	count = 0;
 	last = inptr;
 	start = inptr;
 	encoding = 0;
 	while (inptr && *inptr) {
 		unicode_char_t c;
 		const char *newinptr;
-		newinptr = unicode_get_utf8(inptr, &c);
+		
+		newinptr = unicode_get_utf8 (inptr, &c);
 		if (newinptr == NULL) {
-			w(g_warning("Invalid UTF-8 sequence encountered (pos %d, char '%c'): %s", (inptr-in), inptr[0], in));
+			w(g_warning ("Invalid UTF-8 sequence encountered (pos %d, char '%c'): %s",
+				     (inptr - in), inptr[0], in));
 			inptr++;
 			continue;
 		}
+		
 		inptr = newinptr;
-		/* save this word out, multiple whitespace is not explicitly counted (?) */
-		if (unicode_isspace(c)) {
+		if (unicode_isspace (c)) {
 			if (count > 0) {
-				word = g_malloc0(sizeof(*word));
+				word = g_new0 (struct _phrase_word, 1);
 				word->start = start;
 				word->end = last;
 				word->type = type;
 				word->encoding = encoding;
-				words = g_list_append(words, word);
+				words = g_list_append (words, word);
 				count = 0;
 			}
+			
 			start = inptr;
 			type = WORD_ATOM;
 			encoding = 0;
 		} else {
 			count++;
-			if (c<128) {
-				if (!is_atom(c))
-					type = MAX(type, WORD_QSTRING);
-			} else if (c>127 && c < 256) {
+			if (c < 128) {
+				if (!is_atom (c))
+					type = MAX (type, WORD_QSTRING);
+			} else if (c > 127 && c < 256) {
 				type = WORD_2047;
-				encoding = MAX(encoding, 1);
-			} else if (c >=256) {
+				encoding = MAX (encoding, 1);
+			} else if (c >= 256) {
 				type = WORD_2047;
-				encoding = MAX(encoding, 2);
+				encoding = MAX (encoding, 2);
 			}
 		}
+		
 		last = inptr;
 	}
+	
 	if (count > 0) {
-		word = g_malloc0(sizeof(*word));
+		word = g_new0 (struct _phrase_word, 1);
 		word->start = start;
 		word->end = last;
 		word->type = type;
 		word->encoding = encoding;
-		words = g_list_append(words, word);
+		words = g_list_append (words, word);
 	}
+	
+	return words;
+}
 
-	/* now scan the list, checking for words of similar types that can be merged */
+static void
+header_encode_phrase_merge_words (GList **wordsp)
+{
+	GList *wordl, *nextl, *words = *wordsp;
+	struct _phrase_word *word, *next;
+	
+	/* scan the list, checking for words of similar types that can be merged */
 	wordl = words;
 	while (wordl) {
 		word = wordl->data;
 		/* leave atoms as atoms (unless they're surrounded by quoted words??) */
 		if (word->type != WORD_ATOM) {
-			nextl = g_list_next(wordl);
+			nextl = g_list_next (wordl);
 			while (nextl) {
 				next = nextl->data;
 				/* merge nodes of the same type AND we are not creating too long a string */
 				if (word->type == next->type) {
 					if (next->end - word->start < CAMEL_FOLD_PREENCODED) {
 						word->end = next->end;
-						words = g_list_remove_link(words, nextl);
-						g_free(next);
-						nextl = g_list_next(wordl);
+						words = g_list_remove_link (words, nextl);
+						g_free (next);
+						nextl = g_list_next (wordl);
 					} else {
-						/* if it is going to be too long, make sure we include the separating whitespace */
+						/* if it is going to be too long, make sure we include the
+						   separating whitespace */
 						word->end = next->start;
 						break;
 					}
@@ -1471,51 +1510,89 @@ header_encode_phrase(const unsigned char *in)
 				}
 			}
 		}
-		wordl = g_list_next(wordl);
+		wordl = g_list_next (wordl);
 	}
+	
+	*wordsp = words;
+}
 
+/* encodes a phrase sequence (different quoting/encoding rules to strings) */
+char *
+header_encode_phrase (const unsigned char *in)
+{
+	struct _phrase_word *word = NULL, *last_word = NULL;
+	GList *words, *wordl;
+	GString *out;
+	char *outstr;
+	
+	if (in == NULL)
+		return NULL;
+	
+	words = header_encode_phrase_get_words (in);
+	if (!words)
+		return NULL;
+	
+	header_encode_phrase_merge_words (&words);
+	
+	out = g_string_new ("");
+	
 	/* output words now with spaces between them */
 	wordl = words;
 	while (wordl) {
+		const char *start;
+		int len;
+		
 		word = wordl->data;
+		
+		/* append correct number of spaces between words */
+		if (last_word && !(last_word->type == WORD_2047 && word->type == WORD_2047)) {
+			/* one or both of the words are not encoded so we write the spaces out untouched */
+			len = word->start - last_word->end;
+			out = g_string_append_len (out, last_word->end, len);
+		}
+		
 		switch (word->type) {
 		case WORD_ATOM:
-			out = g_string_append_len(out, word->start, word->end-word->start);
+			out = g_string_append_len (out, word->start, word->end - word->start);
 			break;
 		case WORD_QSTRING:
-			quote_word(out, TRUE, word->start, word->end-word->start);
+			quote_word (out, TRUE, word->start, word->end - word->start);
 			break;
 		case WORD_2047:
+			if (last_word && last_word->type == WORD_2047) {
+				/* include the whitespace chars between these 2 words in the
+                                   resulting rfc2047 encoded word. */
+				len = word->end - last_word->end;
+				start = last_word->end;
+				
+				/* encoded words need to be separated by linear whitespace */
+				g_string_append_c (out, ' ');
+			} else {
+				len = word->end - word->start;
+				start = word->start;
+			}
+			
 			if (word->encoding == 1)
-				rfc2047_encode_word(out, word->start, word->end-word->start, "ISO-8859-1", IS_PSAFE);
+				rfc2047_encode_word (out, start, len, "ISO-8859-1", IS_PSAFE);
 			else
-				rfc2047_encode_word(out, word->start, word->end-word->start,
-						    camel_charset_best(word->start, word->end-word->start), IS_PSAFE);
+				rfc2047_encode_word (out, start, len,
+						     camel_charset_best (start, len), IS_PSAFE);
 			break;
 		}
-
-		/* copy across the right number of spaces between words */
-		nextl = g_list_next(wordl);
-		if (nextl) {
-			int i;
-			next = nextl->data;
-			/* if they are adjacent, it means we already had the spaces encoded internally,
-			   so now we just need to output 1 space */
-			i=next->start-word->end;
-			if (i==0)
-				i=1;
-			for (;i>0;i--)
-				out = g_string_append_c(out, ' ');
-		}
-
-		g_free(word);
-		wordl = g_list_next(wordl);
+		
+		g_free (last_word);
+		wordl = g_list_next (wordl);
+		
+		last_word = word;
 	}
+	
 	/* and we no longer need the list */
-	g_list_free(words);
-
+	g_free (word);
+	g_list_free (words);
+	
 	outstr = out->str;
-	g_string_free(out, FALSE);
+	g_string_free (out, FALSE);
+	
 	return outstr;
 }
 
-- 
cgit v1.2.3