summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDylan Baker <dylan@pnwbakers.com>2018-04-10 11:08:09 -0700
committerDylan Baker <dylan@pnwbakers.com>2018-04-10 11:08:09 -0700
commit3b46df146398736b870eee2668de8388762ad1ba (patch)
treec4c160f891cb28ba68f030bf91dcb5a91b33d547
parent1a164a386c555a9924e7f3402126c47094b02f8b (diff)
db/utils: attempt to unwrap Content-Transfer-Encodings manually
The builtin for this assumes that inside of a Content-Transfer-Encoding that the format must be ascii. That is pretty silly, since the reason to use something like base64 is to transfer something that is not ascii, like utf-8. This attempts to handle the base64 and quoted-printable cases manually.
-rw-r--r--alot/db/utils.py35
1 files changed, 19 insertions, 16 deletions
diff --git a/alot/db/utils.py b/alot/db/utils.py
index 5303320c..7b2b2979 100644
--- a/alot/db/utils.py
+++ b/alot/db/utils.py
@@ -16,6 +16,8 @@ import re
import logging
import mailcap
import io
+import base64
+import quopri
from .. import crypto
from .. import helper
@@ -354,19 +356,21 @@ def extract_body(mail, types=None, field_key='copiousoutput'):
continue
enc = part.get_content_charset() or 'ascii'
- raw_payload = part.get_payload(decode=True)
- try:
- raw_payload = raw_payload.decode(enc)
- except UnicodeDecodeError:
- # If the message is not formatted ascii then get_payload with
- # decode=True will convert to raw-unicode-escape. if the encoding
- # that the message specifies doesn't work try this. It might be
- # better to handle the base64 and quoted-printable oursevles
- # instead of having to clean up like this.
- raw_payload = raw_payload.decode('raw-unicode-escape')
+ cte = str(part.get('content-transfer-encoding', '7bit')).lower()
+ payload = part.get_payload()
+ if cte not in ['7bit', '8bit']:
+ if cte == 'quoted-printable':
+ raw_payload = quopri.decodestring(payload.encode('ascii'))
+ elif cte == 'base64':
+ raw_payload = base64.b64decode(payload)
+ else:
+ raise Exception('Unknown Content-Transfer-Encoding {}'.format(cte))
+ # message.get_payload(decode=True) also handles a number of unicode
+ # encodindigs. maybe those are useful?
+ payload = raw_payload.decode(enc)
if ctype == 'text/plain':
- body_parts.append(string_sanitize(raw_payload))
+ body_parts.append(string_sanitize(payload))
else:
# get mime handler
_, entry = settings.mailcap_find_match(ctype, key=field_key)
@@ -383,13 +387,12 @@ def extract_body(mail, types=None, field_key='copiousoutput'):
nametemplate = entry.get('nametemplate', '%s')
prefix, suffix = parse_mailcap_nametemplate(nametemplate)
with tempfile.NamedTemporaryFile(
- delete=False, prefix=prefix,
- suffix=suffix) \
- as tmpfile:
- tmpfile.write(raw_payload.encode(enc))
+ 'wt', delete=False, prefix=prefix, suffix=suffix,
+ encoding=enc) as tmpfile:
+ tmpfile.write(payload)
tempfile_name = tmpfile.name
else:
- stdin = raw_payload
+ stdin = payload
# read parameter, create handler command
parms = tuple('='.join(p) for p in part.get_params())