From 3b46df146398736b870eee2668de8388762ad1ba Mon Sep 17 00:00:00 2001 From: Dylan Baker Date: Tue, 10 Apr 2018 11:08:09 -0700 Subject: db/utils: attempt to unwrap Content-Transfer-Encodings manually The builtin for this assumes that inside of a Content-Transfer-Encoding that the format must be ascii. That is pretty silly, since the reason to use something like base64 is to transfer something that is not ascii, like utf-8. This attempts to handle the base64 and quoted-printable cases manually. --- alot/db/utils.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/alot/db/utils.py b/alot/db/utils.py index 5303320c..7b2b2979 100644 --- a/alot/db/utils.py +++ b/alot/db/utils.py @@ -16,6 +16,8 @@ import re import logging import mailcap import io +import base64 +import quopri from .. import crypto from .. import helper @@ -354,19 +356,21 @@ def extract_body(mail, types=None, field_key='copiousoutput'): continue enc = part.get_content_charset() or 'ascii' - raw_payload = part.get_payload(decode=True) - try: - raw_payload = raw_payload.decode(enc) - except UnicodeDecodeError: - # If the message is not formatted ascii then get_payload with - # decode=True will convert to raw-unicode-escape. if the encoding - # that the message specifies doesn't work try this. It might be - # better to handle the base64 and quoted-printable oursevles - # instead of having to clean up like this. - raw_payload = raw_payload.decode('raw-unicode-escape') + cte = str(part.get('content-transfer-encoding', '7bit')).lower() + payload = part.get_payload() + if cte not in ['7bit', '8bit']: + if cte == 'quoted-printable': + raw_payload = quopri.decodestring(payload.encode('ascii')) + elif cte == 'base64': + raw_payload = base64.b64decode(payload) + else: + raise Exception('Unknown Content-Transfer-Encoding {}'.format(cte)) + # message.get_payload(decode=True) also handles a number of unicode + # encodindigs. maybe those are useful? + payload = raw_payload.decode(enc) if ctype == 'text/plain': - body_parts.append(string_sanitize(raw_payload)) + body_parts.append(string_sanitize(payload)) else: # get mime handler _, entry = settings.mailcap_find_match(ctype, key=field_key) @@ -383,13 +387,12 @@ def extract_body(mail, types=None, field_key='copiousoutput'): nametemplate = entry.get('nametemplate', '%s') prefix, suffix = parse_mailcap_nametemplate(nametemplate) with tempfile.NamedTemporaryFile( - delete=False, prefix=prefix, - suffix=suffix) \ - as tmpfile: - tmpfile.write(raw_payload.encode(enc)) + 'wt', delete=False, prefix=prefix, suffix=suffix, + encoding=enc) as tmpfile: + tmpfile.write(payload) tempfile_name = tmpfile.name else: - stdin = raw_payload + stdin = payload # read parameter, create handler command parms = tuple('='.join(p) for p in part.get_params()) -- cgit v1.2.3