summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDylan Baker <dylan@pnwbakers.com>2018-03-12 11:03:11 -0700
committerDylan Baker <dylan@pnwbakers.com>2018-03-12 11:03:11 -0700
commit10b46df578f08f54f879d561ccc7d061569fa7b4 (patch)
treed4e70e730211828084822a20b07c097d12cadeb0
parent02279e2abf5a28a4f983a720d3a33ed94a4e117b (diff)
db/utils: decoded_headers will be passed str not bytes
I had made the assumption early on that this would get bytes, but when I added `assert isinstance(header, bytes)` alot would crash on startup, changing `bytes` to `str` fixed that. I noticed this when trying to fix the warning generated in the logging call.
-rw-r--r--alot/db/utils.py20
-rw-r--r--tests/db/utils_test.py14
2 files changed, 12 insertions, 22 deletions
diff --git a/alot/db/utils.py b/alot/db/utils.py
index 4f533306..5303320c 100644
--- a/alot/db/utils.py
+++ b/alot/db/utils.py
@@ -421,30 +421,20 @@ def decode_header(header, normalize=False):
This turns it into a single unicode string
:param header: the header value
- :type header: bytes
+ :type header: str
:param normalize: replace trailing spaces after newlines
:type normalize: bool
:rtype: str
"""
- # FIXME: this is just hacked until it works, mostly
-
- # If the value isn't ascii as RFC2822 prescribes,
- # we just return the unicode bytestring as is
- value = string_decode(header) # convert to unicode
- try:
- value = value.encode('ascii')
- except UnicodeEncodeError:
- return value
-
# some mailers send out incorrectly escaped headers
# and double quote the escaped realname part again. remove those
# RFC: 2047
- regex = br'"(=\?.+?\?.+?\?[^ ?]+\?=)"'
- value = re.sub(regex, br'\1', value)
- logging.debug(b"unquoted header: |%s|", value)
+ regex = r'"(=\?.+?\?.+?\?[^ ?]+\?=)"'
+ value = re.sub(regex, r'\1', header)
+ logging.debug("unquoted header: |%s|", value)
# otherwise we interpret RFC2822 encoding escape sequences
- valuelist = email.header.decode_header(value.decode('ascii'))
+ valuelist = email.header.decode_header(value)
decoded_list = []
for v, enc in valuelist:
v = string_decode(v, enc)
diff --git a/tests/db/utils_test.py b/tests/db/utils_test.py
index e3d3596a..b1187fe3 100644
--- a/tests/db/utils_test.py
+++ b/tests/db/utils_test.py
@@ -248,7 +248,7 @@ class TestDecodeHeader(unittest.TestCase):
output = b'=?' + encoding.encode('ascii') + b'?Q?'
for byte in string:
output += b'=' + codecs.encode(bytes([byte]), 'hex').upper()
- return output + b'?='
+ return (output + b'?=').decode('ascii')
@staticmethod
def _base64(unicode_string, encoding):
@@ -263,7 +263,7 @@ class TestDecodeHeader(unittest.TestCase):
"""
string = unicode_string.encode(encoding)
b64 = base64.encodebytes(string).strip()
- return b'=?' + encoding.encode('utf-8') + b'?B?' + b64 + b'?='
+ return (b'=?' + encoding.encode('utf-8') + b'?B?' + b64 + b'?=').decode('ascii')
def _test(self, teststring, expected):
@@ -306,17 +306,17 @@ class TestDecodeHeader(unittest.TestCase):
def test_quoted_words_can_be_interrupted(self):
part = u'ÄÖÜäöü'
- text = self._base64(part, 'utf-8') + b' and ' + \
+ text = self._base64(part, 'utf-8') + ' and ' + \
self._quote(part, 'utf-8')
expected = u'ÄÖÜäöü and ÄÖÜäöü'
self._test(text, expected)
def test_different_encodings_can_be_mixed(self):
part = u'ÄÖÜäöü'
- text = b'utf-8: ' + self._base64(part, 'utf-8') + \
- b' again: ' + self._quote(part, 'utf-8') + \
- b' latin1: ' + self._base64(part, 'iso-8859-1') + \
- b' and ' + self._quote(part, 'iso-8859-1')
+ text = 'utf-8: ' + self._base64(part, 'utf-8') + \
+ ' again: ' + self._quote(part, 'utf-8') + \
+ ' latin1: ' + self._base64(part, 'iso-8859-1') + \
+ ' and ' + self._quote(part, 'iso-8859-1')
expected = u'utf-8: ÄÖÜäöü again: ÄÖÜäöü latin1: ÄÖÜäöü and ÄÖÜäöü'
self._test(text, expected)