summaryrefslogtreecommitdiff
path: root/alot/db
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2018-11-29 20:05:57 +0100
committerAnton Khirnov <anton@khirnov.net>2020-02-03 13:00:02 +0100
commit11c5995ff803850d41eb93e5bfe7488722b11892 (patch)
treeffca85c217316c434829e7c4c381e6c85974de4c /alot/db
parentc96eb5766496a050cea2f350bf5ef24b066888cd (diff)
Let python decode the message transfer encoding.
Diffstat (limited to 'alot/db')
-rw-r--r--alot/db/utils.py57
1 files changed, 9 insertions, 48 deletions
diff --git a/alot/db/utils.py b/alot/db/utils.py
index 97406e3a..86c4718b 100644
--- a/alot/db/utils.py
+++ b/alot/db/utils.py
@@ -398,64 +398,25 @@ def remove_cte(part, as_string=False):
:returns: The mail with any Content-Transfer-Encoding removed
:rtype: Union[str, bytes]
"""
- enc = part.get_content_charset() or 'ascii'
- cte = str(part.get('content-transfer-encoding', '7bit')).lower().strip()
- payload = part.get_payload()
- sp = '' # string variant of return value
- bp = b'' # bytestring variant
-
- logging.debug('Content-Transfer-Encoding: "{}"'.format(cte))
- if cte not in ['quoted-printable', 'base64', '7bit', '8bit', 'binary']:
- logging.info('Unknown Content-Transfer-Encoding: "{}"'.format(cte))
-
- # switch through all sensible cases
- # starting with those where payload is already a str
- if '7bit' in cte or 'binary' in cte:
- logging.debug('assuming Content-Transfer-Encoding: 7bit')
- sp = payload
- if as_string:
- return sp
- bp = payload.encode('utf-8')
- return bp
-
- # the remaining cases need decoding and define only bt;
- # decoding into a str is done at the end if requested
- elif '8bit' in cte:
- logging.debug('assuming Content-Transfer-Encoding: 8bit')
- # Python's mail library may decode 8bit as raw-unicode-escape, so
- # we need to encode that back to bytes so we can decode it using
- # the correct encoding, or it might not, in which case assume that
- # the str representation we got is correct.
- bp = payload.encode('raw-unicode-escape')
-
- elif 'quoted-printable' in cte:
- logging.debug('assuming Content-Transfer-Encoding: quoted-printable')
- bp = quopri.decodestring(payload.encode('ascii'))
-
- elif 'base64' in cte:
- logging.debug('assuming Content-Transfer-Encoding: base64')
- bp = base64.b64decode(payload)
-
- else:
- logging.debug('failed to interpret Content-Transfer-Encoding: '
- '"{}"'.format(cte))
-
- # by now, bp is defined, sp is not.
+ payload = part.get_payload(decode = True)
if as_string:
+ enc = part.get_content_charset('ascii')
+ if enc.startswith('windows-'):
+ enc = enc.replace('windows-', 'cp', 1)
+
try:
- sp = bp.decode(enc)
+ payload = payload.decode(enc, errors = 'backslashreplace')
except LookupError:
# enc is unknown;
# fall back to guessing the correct encoding using libmagic
- sp = helper.try_decode(bp)
+ payload = helper.try_decode(payload)
except UnicodeDecodeError as emsg:
# the mail contains chars that are not enc-encoded.
# libmagic works better than just ignoring those
logging.debug('Decoding failure: {}'.format(emsg))
- sp = helper.try_decode(bp)
- return sp
- return bp
+ payload = helper.try_decode(payload)
+ return payload
MISSING_HTML_MSG = ("This message contains a text/html part that was not "
"rendered due to a missing mailcap entry. "