summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Worth <cworth@cworth.org>2009-10-19 16:38:44 -0700
committerCarl Worth <cworth@cworth.org>2009-10-19 16:38:44 -0700
commit371091139abb00e0eb6b17cd19311e2a30fd7470 (patch)
tree8e7c33e71057657100061ec18eab0701ff891fe2
parent45f0d7bcab72ff6b8375804d3e518f961bd9a96f (diff)
Rework message parsing to use getline rather than mmap.
The line-based parsing can be a bit awkward when wanting to peek ahead, (say, for folded header values), but it's so convenient to be able to trust that a string terminator exists on every line so it cleans up the code considerably.
-rw-r--r--message.c220
-rw-r--r--notmuch-private.h2
2 files changed, 104 insertions, 118 deletions
diff --git a/message.c b/message.c
index 97df4b2..03583c8 100644
--- a/message.c
+++ b/message.c
@@ -24,19 +24,25 @@
#include <glib.h> /* GHashTable */
+typedef struct {
+ char *str;
+ size_t size;
+ size_t len;
+} header_value_closure_t;
+
struct _notmuch_message {
- /* File objects */
- int fd;
- void *map;
+ /* File object */
+ FILE *file;
/* Header storage */
int restrict_headers;
GHashTable *headers;
/* Parsing state */
- char *start;
- size_t size;
- const char *next_line;
+ char *line;
+ size_t line_size;
+ header_value_closure_t value;
+
int parsing_started;
int parsing_finished;
};
@@ -66,20 +72,11 @@ notmuch_message_t *
notmuch_message_open (const char *filename)
{
notmuch_message_t *message;
- struct stat st;
message = xcalloc (1, sizeof (notmuch_message_t));
- message->fd = open (filename, O_RDONLY);
- if (message->fd < 0)
- goto FAIL;
-
- if (fstat (message->fd, &st) < 0)
- goto FAIL;
-
- message->map = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
- message->fd, 0);
- if (message->map == MAP_FAILED)
+ message->file = fopen (filename, "r");
+ if (message->file == NULL)
goto FAIL;
message->headers = g_hash_table_new_full (strcase_hash,
@@ -87,9 +84,6 @@ notmuch_message_open (const char *filename)
free,
free);
- message->start = (char *) message->map;
- message->size = st.st_size;
- message->next_line = message->start;
message->parsing_started = 0;
message->parsing_finished = 0;
@@ -111,10 +105,8 @@ notmuch_message_close (notmuch_message_t *message)
if (message->headers)
g_hash_table_unref (message->headers);
- if (message->map)
- munmap (message->map, message->size);
- if (message->fd)
- close (message->fd);
+ if (message->file)
+ fclose (message->file);
free (message);
}
@@ -151,80 +143,44 @@ notmuch_message_restrict_headers (notmuch_message_t *message, ...)
notmuch_message_restrict_headersv (message, va_headers);
}
-/* With our mmapped file, we don't get the benefit of terminated
- * strings, so we can't use things like strchr(). We don't even know
- * if there's a newline at the end of the file so we also have to be
- * careful of that. Basically, every time we advance a pointer while
- * parsing we must ensure we don't go beyond our buffer.
- */
-#define WITHIN(s) (((s) - message->start) < (message->size -1))
-
-/* In each of the macros below, "without overrunning the buffer" means
- * that the macro will never dereference a character beyond the end of
- * the buffer. However, all of the macros may return a pointer
- * pointing to the first character beyond the buffer. So callers
- * should test with WITHIN before dereferencing the result. */
-
-/* Advance 'ptr' until pointing at a non-space character in the same
- * line, (without overrunning the buffer) */
-#define SKIP_SPACE_IN_LINE(ptr) \
- while (WITHIN (ptr) && (*(ptr) == ' ' || *(ptr) == '\t')) \
- (ptr)++;
-
-/* Advance 'ptr' until pointing at a non-space character, (without
- * overrunning the buffer) */
-#define SKIP_SPACE(ptr) \
- while (WITHIN (ptr) && isspace(*(ptr))) \
- (ptr)++;
-
-/* Advance 'ptr' to the first occurrence of 'c' within the same
- * line, (without overrunning the buffer). */
-#define ADVANCE_TO(ptr, c) \
- while (WITHIN (ptr) && *(ptr) != '\n' && \
- *(ptr) != (c)) \
- { \
- (ptr)++; \
+void
+copy_header_unfolding (header_value_closure_t *value,
+ const char *chunk)
+{
+ char *last;
+
+ if (chunk == NULL)
+ return;
+
+ while (*chunk == ' ' || *chunk == '\t')
+ chunk++;
+
+ if (value->len + 1 + strlen (chunk) + 1 > value->size) {
+ int new_size = value->size;
+ if (value->size == 0)
+ new_size = strlen (chunk) + 1;
+ else
+ while (value->len + 1 + strlen (chunk) + 1 > new_size)
+ new_size *= 2;
+ value->str = xrealloc (value->str, new_size);
+ value->size = new_size;
}
-/* Advance 'ptr' to the beginning of the next line not starting with
- * an initial tab character, (without overruning the buffer). */
-#define ADVANCE_TO_NEXT_HEADER_LINE(ptr) \
- do { \
- ADVANCE_TO ((ptr), '\n'); \
- if (WITHIN (ptr)) \
- (ptr)++; \
- } while (WITHIN (ptr) && \
- (*(ptr) == '\t' || *(ptr) == ' '));
-
-char *
-copy_header_value (const char *start, const char *end)
-{
- const char *s;
- char *result, *r;
- int was_newline = 0;
-
- result = xmalloc (end - start + 1);
-
- s = start;
- r = result;
-
- while (s < end) {
- if (*s == '\n') {
- was_newline = 1;
- } else {
- if (*s == '\t' && was_newline)
- *r = ' ';
- else
- *r = *s;
- r++;
- was_newline = 0;
- }
- s++;
+ last = value->str + value->len;
+ if (value->len) {
+ *last = ' ';
+ last++;
+ value->len++;
}
- *r = '\0';
+ strcpy (last, chunk);
+ value->len += strlen (chunk);
- return result;
+ last = value->str + value->len - 1;
+ if (*last == '\n') {
+ *last = '\0';
+ value->len--;
+ }
}
const char *
@@ -232,8 +188,8 @@ notmuch_message_get_header (notmuch_message_t *message,
const char *header_desired)
{
int contains;
- const char *s, *colon;
char *header, *value;
+ const char *s, *colon;
int match;
message->parsing_started = 1;
@@ -247,54 +203,82 @@ notmuch_message_get_header (notmuch_message_t *message,
if (message->parsing_finished)
return NULL;
- while (1) {
- s = message->next_line;
+#define NEXT_HEADER_LINE(closure) \
+ do { \
+ ssize_t bytes_read = getline (&message->line, \
+ &message->line_size, \
+ message->file); \
+ if (bytes_read == -1) { \
+ message->parsing_finished = 1; \
+ break; \
+ } \
+ if (*message->line == '\n') { \
+ message->parsing_finished = 1; \
+ break; \
+ } \
+ if (closure && \
+ (*message->line == ' ' || *message->line == '\t')) \
+ { \
+ copy_header_unfolding ((closure), message->line); \
+ } \
+ } while (*message->line == ' ' || *message->line == '\t');
+
+ if (message->line == NULL)
+ NEXT_HEADER_LINE (NULL);
- if (*s == '\n') {
- message->parsing_finished = 1;
- return NULL;
- }
+ while (1) {
- if (*s == '\t') {
- fprintf (stderr, "Warning: Unexpected continued value\n");
- ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
- continue;
- }
+ if (message->parsing_finished)
+ break;
- colon = s;
- ADVANCE_TO (colon, ':');
+ colon = strchr (message->line, ':');
- if (! WITHIN (colon) || *colon == '\n') {
- fprintf (stderr, "Warning: Unexpected non-header line: %s\n", s);
- ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
+ if (colon == NULL) {
+ fprintf (stderr, "Warning: Unexpected non-header line: %s\n",
+ message->line);
+ NEXT_HEADER_LINE (NULL);
continue;
}
- header = xstrndup (s, colon - s);
+ header = xstrndup (message->line, colon - message->line);
if (message->restrict_headers &&
! g_hash_table_lookup_extended (message->headers,
header, NULL, NULL))
{
free (header);
- message->next_line = colon;
- ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
+ NEXT_HEADER_LINE (NULL);
continue;
}
s = colon + 1;
- SKIP_SPACE_IN_LINE (s);
+ while (*s == ' ' || *s == '\t')
+ s++;
- message->next_line = s;
- ADVANCE_TO_NEXT_HEADER_LINE (message->next_line);
+ message->value.len = 0;
+ copy_header_unfolding (&message->value, s);
- value = copy_header_value (s, message->next_line);
+ NEXT_HEADER_LINE (&message->value);
match = (strcasecmp (header, header_desired) == 0);
- g_hash_table_insert (message->headers, header, value);
+ g_hash_table_insert (message->headers, header,
+ xstrdup (message->value.str));
if (match)
return value;
}
+
+ if (message->line)
+ free (message->line);
+ message->line = NULL;
+
+ if (message->value.size) {
+ free (message->value.str);
+ message->value.str = NULL;
+ message->value.size = 0;
+ message->value.len = 0;
+ }
+
+ return NULL;
}
diff --git a/notmuch-private.h b/notmuch-private.h
index b7d27e9..449aff7 100644
--- a/notmuch-private.h
+++ b/notmuch-private.h
@@ -23,6 +23,8 @@
#include "notmuch.h"
+#define _GNU_SOURCE /* For getline */
+
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>