diff options
author | Dylan Baker <dylan@pnwbakers.com> | 2018-03-06 15:32:50 -0800 |
---|---|---|
committer | Dylan Baker <dylan@pnwbakers.com> | 2018-03-06 16:41:51 -0800 |
commit | 0b5f8bf143e76be7ac989e8c8ec6e4a7dc08a78a (patch) | |
tree | 4e91c271764d7ae99a17bf26bd124825182190bc /alot/helper.py | |
parent | 637a1b44186612dc1119454eb1a2cffe20c0da40 (diff) |
require chardet
magic struggles to decode a number of encodings, particularly it
struggles with windows-1252, which Exchange *loves* to silently
re-encode mail in, without updated the charset of the payload. Chardet
successfully guesses these oddball encodings much more often.
Diffstat (limited to 'alot/helper.py')
-rw-r--r-- | alot/helper.py | 30 |
1 files changed, 6 insertions, 24 deletions
diff --git a/alot/helper.py b/alot/helper.py index ba41986e..612e5dba 100644 --- a/alot/helper.py +++ b/alot/helper.py @@ -25,6 +25,7 @@ from email.mime.image import MIMEImage from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart +import chardet import urwid import magic from twisted.internet import reactor @@ -386,34 +387,15 @@ def guess_mimetype(blob): def guess_encoding(blob): - """ - uses file magic to determine the encoding of the given data blob. + """Use chardet to guess the encoding of a given data blob - :param blob: file content as read by file.read() - :type blob: data + :param blob: A blob of bytes + :type blob: bytes :returns: encoding :rtype: str """ - # this is a bit of a hack to support different versions of python magic. - # Hopefully at some point this will no longer be necessary - # - # the version with open() is the bindings shipped with the file source from - # http://darwinsys.com/file/ - this is what is used by the python-magic - # package on Debian/Ubuntu. However it is not available on pypi/via pip. - # - # the version with from_buffer() is available at - # https://github.com/ahupp/python-magic and directly installable via pip. - # - # for more detail see https://github.com/pazz/alot/pull/588 - if hasattr(magic, 'open'): - m = magic.open(magic.MAGIC_MIME_ENCODING) - m.load() - return m.buffer(blob) - elif hasattr(magic, 'from_buffer'): - m = magic.Magic(mime_encoding=True) - return m.from_buffer(blob) - else: - raise Exception('Unknown magic API') + info = chardet.detect(blob) + return info['encoding'] def try_decode(blob): |