diff options
author | Dylan Baker <dylan@pnwbakers.com> | 2018-04-10 12:02:40 -0700 |
---|---|---|
committer | Dylan Baker <dylan@pnwbakers.com> | 2018-04-10 12:02:40 -0700 |
commit | 777823f414aab5dfa130174dc7c80cda8036d13f (patch) | |
tree | 693ade2764a00623be5e3e3c6186366482005dff /alot | |
parent | 3b46df146398736b870eee2668de8388762ad1ba (diff) |
db/utils: correctly handle 8bit encodings.
Because python's mail will use raw-unicode-escape for anything that
isn't ascii we need to encode back into the original raw bytes, then
decode into the proper encoding.
Diffstat (limited to 'alot')
-rw-r--r-- | alot/db/utils.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/alot/db/utils.py b/alot/db/utils.py index 7b2b2979..c49019ce 100644 --- a/alot/db/utils.py +++ b/alot/db/utils.py @@ -358,11 +358,16 @@ def extract_body(mail, types=None, field_key='copiousoutput'): enc = part.get_content_charset() or 'ascii' cte = str(part.get('content-transfer-encoding', '7bit')).lower() payload = part.get_payload() - if cte not in ['7bit', '8bit']: + if cte != '7bit': if cte == 'quoted-printable': raw_payload = quopri.decodestring(payload.encode('ascii')) elif cte == 'base64': raw_payload = base64.b64decode(payload) + elif cte == '8bit': + # Python's mail library will decode 8bit as raw-unicode-escape, + # so we need to encode that back to bytes so we can decode it + # using the correct encoding. + raw_payload = payload.encode('raw-unicode-escape') else: raise Exception('Unknown Content-Transfer-Encoding {}'.format(cte)) # message.get_payload(decode=True) also handles a number of unicode |