summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--alot/mail/envelope.py71
1 files changed, 51 insertions, 20 deletions
diff --git a/alot/mail/envelope.py b/alot/mail/envelope.py
index bd214568..aa2a66e2 100644
--- a/alot/mail/envelope.py
+++ b/alot/mail/envelope.py
@@ -408,24 +408,57 @@ class Envelope:
if reset:
self.headers.clear()
- headerEndPos = 0
- if not only_body:
- # go through multiline, utf-8 encoded headers
- # locally, lines are separated by a simple LF, not CRLF
- # we decode the edited text ourselves here as
- # email.message_from_file can't deal with raw utf8 header values
- headerRe = re.compile(r'^(?P<k>.+?):(?P<v>(.|\n[ \t\r\f\v])+)$',
- re.MULTILINE)
- for header in headerRe.finditer(raw):
- if header.start() > headerEndPos + 1:
- break # switched to body
-
- key = header.group('k')
- # simple unfolding as decribed in
- # https://tools.ietf.org/html/rfc2822#section-2.2.3
- unfoldedValue = header.group('v').replace('\n', '')
- self.add(key, unfoldedValue.strip())
- headerEndPos = header.end()
+ if only_body:
+ self.body = raw
+ else:
+ # Split the raw string into headers and body.
+ # The string should be in "pseudo-email" format: a sequence of
+ # (possibly folded) headers, followed by an empty line, followed by
+ # the body. Since it may come from the user's text editor, we try to
+ # be lenient in parsing it.
+ #
+ # The differences from the real email format are:
+ # - line breaks do not have to be CRLF as per the RFC, but can be
+ # whatever the user's text editor wrote;
+ # - all text is a plain Unicode string, with no email encoding
+ # applied.
+ # - Attach headers are interpreted as instruction to us to attach
+ # the specified files.
+ # Since the email package cannot parse this (FIXME: might actually
+ # be possible with a custom policy - check this), we do it manually
+ # ourselves.
+
+ # Use bytes.splitlines(), so that only ASCII CR, LF, or CRLF are
+ # considered. str.splitlines() would also split on various Unicode
+ # linebreaks which we probably? want to preserve.
+ lines = list(map(lambda l: l.decode('utf-8'),
+ raw.encode('utf-8').splitlines()))
+
+ # list of the last seen [header name, header value], for unfolding
+ prev_header = None
+ for i, l in enumerate(lines):
+ if l and l[0] in ' \t' and prev_header:
+ # continuation of a folded header
+ prev_header[1] += l
+ elif re.match('[!-9;-~]+:', l):
+ # beginning of a new header
+ # as per RFC5322 2.2, header names are ASCII chars 33-126
+ # except colon
+ if prev_header:
+ self.add(*prev_header)
+
+ prev_header = l.split(':', maxsplit = 1)
+ else:
+ # anything else is assumed to start the body
+
+ # skip the empty line separating headers from the body,
+ # if present
+ idx = i if (len(l) > 0 or i == 0) else i + 1
+ self.body = '\n'.join(lines[idx:])
+ break
+
+ if prev_header:
+ self.add(*prev_header)
# interpret 'Attach' pseudo header
if 'Attach' in self:
@@ -439,8 +472,6 @@ class Envelope:
self.attach_file(path)
del self['Attach']
- self.body = raw[headerEndPos:].strip()
-
_MAILTO_PREFIX = 'mailto:'
_MAILTO_SAFE_HEADERS = (HDR.SUBJECT, HDR.CC, HDR.KEYWORDS)