summaryrefslogtreecommitdiff
path: root/alot/helper.py
diff options
context:
space:
mode:
authorDylan Baker <dylan@pnwbakers.com>2018-03-06 15:32:50 -0800
committerDylan Baker <dylan@pnwbakers.com>2018-03-06 16:41:51 -0800
commit0b5f8bf143e76be7ac989e8c8ec6e4a7dc08a78a (patch)
tree4e91c271764d7ae99a17bf26bd124825182190bc /alot/helper.py
parent637a1b44186612dc1119454eb1a2cffe20c0da40 (diff)
require chardet
magic struggles to decode a number of encodings, particularly it struggles with windows-1252, which Exchange *loves* to silently re-encode mail in, without updated the charset of the payload. Chardet successfully guesses these oddball encodings much more often.
Diffstat (limited to 'alot/helper.py')
-rw-r--r--alot/helper.py30
1 files changed, 6 insertions, 24 deletions
diff --git a/alot/helper.py b/alot/helper.py
index ba41986e..612e5dba 100644
--- a/alot/helper.py
+++ b/alot/helper.py
@@ -25,6 +25,7 @@ from email.mime.image import MIMEImage
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
+import chardet
import urwid
import magic
from twisted.internet import reactor
@@ -386,34 +387,15 @@ def guess_mimetype(blob):
def guess_encoding(blob):
- """
- uses file magic to determine the encoding of the given data blob.
+ """Use chardet to guess the encoding of a given data blob
- :param blob: file content as read by file.read()
- :type blob: data
+ :param blob: A blob of bytes
+ :type blob: bytes
:returns: encoding
:rtype: str
"""
- # this is a bit of a hack to support different versions of python magic.
- # Hopefully at some point this will no longer be necessary
- #
- # the version with open() is the bindings shipped with the file source from
- # http://darwinsys.com/file/ - this is what is used by the python-magic
- # package on Debian/Ubuntu. However it is not available on pypi/via pip.
- #
- # the version with from_buffer() is available at
- # https://github.com/ahupp/python-magic and directly installable via pip.
- #
- # for more detail see https://github.com/pazz/alot/pull/588
- if hasattr(magic, 'open'):
- m = magic.open(magic.MAGIC_MIME_ENCODING)
- m.load()
- return m.buffer(blob)
- elif hasattr(magic, 'from_buffer'):
- m = magic.Magic(mime_encoding=True)
- return m.from_buffer(blob)
- else:
- raise Exception('Unknown magic API')
+ info = chardet.detect(blob)
+ return info['encoding']
def try_decode(blob):