diff options
author | pacien <pacien.trangirard@pacien.net> | 2019-11-15 17:00:07 +0100 |
---|---|---|
committer | Patrick Totzke <patricktotzke@gmail.com> | 2019-11-23 08:36:14 +0000 |
commit | 45829f7956716638fe77989a50eb87d3ea34cace (patch) | |
tree | 31a2f974db95daa8fac629baa9ca5a19baf58294 | |
parent | f57505160af4304b5f99a83beb2316a12dcc90aa (diff) |
envelope: correctly handle folded headers
-rw-r--r-- | alot/db/envelope.py | 56 | ||||
-rw-r--r-- | tests/db/test_envelope.py | 35 |
2 files changed, 46 insertions, 45 deletions
diff --git a/alot/db/envelope.py b/alot/db/envelope.py index 291ee849..1852bae1 100644 --- a/alot/db/envelope.py +++ b/alot/db/envelope.py @@ -291,50 +291,42 @@ class Envelope: return outer_msg - def parse_template(self, tmp, reset=False, only_body=False): + def parse_template(self, raw, reset=False, only_body=False): """parses a template or user edited string to fills this envelope. - :param tmp: the string to parse. - :type tmp: str + :param raw: the string to parse. + :type raw: str :param reset: remove previous envelope content :type reset: bool + :param only_body: do not parse headers + :type only_body: bool """ - logging.debug('GoT: """\n%s\n"""', tmp) + logging.debug('GoT: """\n%s\n"""', raw) if self.sent_time: self.modified_since_sent = True - if only_body: - self.body = tmp - else: - m = re.match(r'(?P<h>([a-zA-Z0-9_-]+:.+\n)*)\n?(?P<b>(\s*.*)*)', - tmp) - assert m - - d = m.groupdict() - headertext = d['h'] - self.body = d['b'] - - # remove existing content - if reset: - self.headers = {} + if reset: + self.headers = {} + headerEndPos = 0 + if not only_body: # go through multiline, utf-8 encoded headers + # locally, lines are separated by a simple LF, not CRLF # we decode the edited text ourselves here as # email.message_from_file can't deal with raw utf8 header values - key = value = None - for line in headertext.splitlines(): - if re.match('[a-zA-Z0-9_-]+:', line): # new k/v pair - if key and value: # save old one from stack - self.add(key, value) # save - key, value = line.strip().split(':', 1) # parse new pair - # strip spaces, otherwise we end up having " foo" as value - # of "Subject: foo" - value = value.strip() - elif key and value: # append new line without key prefix - value += line - if key and value: # save last one if present - self.add(key, value) + headerRe = re.compile(r'^(?P<k>.+):(?P<v>(.|\n[ \t\r\f\v])+)$', + re.MULTILINE) + for header in headerRe.finditer(raw): + if header.start() > headerEndPos + 1: + break # switched to body + + key = header.group('k') + # simple unfolding as decribed in + # https://tools.ietf.org/html/rfc2822#section-2.2.3 + unfoldedValue = header.group('v').replace('\n', '') + self.add(key, unfoldedValue.strip()) + headerEndPos = header.end() # interpret 'Attach' pseudo header if 'Attach' in self: @@ -347,3 +339,5 @@ class Envelope: for path in to_attach: self.attach(path) del self['Attach'] + + self.body = raw[headerEndPos:].strip() diff --git a/tests/db/test_envelope.py b/tests/db/test_envelope.py index be318b0d..b7612f37 100644 --- a/tests/db/test_envelope.py +++ b/tests/db/test_envelope.py @@ -28,20 +28,6 @@ SETTINGS = { } -def email_to_dict(mail): - """Consumes an email, and returns a dict of headers and 'Body'.""" - split = mail.splitlines() - final = {} - for line in split: - if line.strip(): - try: - k, v = line.split(':') - final[k.strip()] = v.strip() - except ValueError: - final['Body'] = line.strip() - return final - - class TestEnvelope(unittest.TestCase): def assertEmailEqual(self, first, second): @@ -100,3 +86,24 @@ class TestEnvelope(unittest.TestCase): e.attach(f.name) self._test_mail(e) + + @mock.patch('alot.db.envelope.settings', SETTINGS) + def test_parse_template(self): + """Tests multi-line header and body parsing""" + raw = ( + 'From: foo@example.com\n' + 'To: bar@example.com,\n' + ' baz@example.com\n' + 'Subject: Test email\n' + '\n' + 'Some body content: which is not a header.\n' + ) + envlp = envelope.Envelope() + envlp.parse_template(raw) + self.assertDictEqual(envlp.headers, { + 'From': ['foo@example.com'], + 'To': ['bar@example.com, baz@example.com'], + 'Subject': ['Test email'] + }) + self.assertEqual(envlp.body, + 'Some body content: which is not a header.') |