diff options
author | Dylan Baker <dylan@pnwbakers.com> | 2018-03-12 11:03:11 -0700 |
---|---|---|
committer | Dylan Baker <dylan@pnwbakers.com> | 2018-03-12 11:03:11 -0700 |
commit | 10b46df578f08f54f879d561ccc7d061569fa7b4 (patch) | |
tree | d4e70e730211828084822a20b07c097d12cadeb0 | |
parent | 02279e2abf5a28a4f983a720d3a33ed94a4e117b (diff) |
db/utils: decoded_headers will be passed str not bytes
I had made the assumption early on that this would get bytes, but when I
added `assert isinstance(header, bytes)` alot would crash on startup,
changing `bytes` to `str` fixed that. I noticed this when trying to fix
the warning generated in the logging call.
-rw-r--r-- | alot/db/utils.py | 20 | ||||
-rw-r--r-- | tests/db/utils_test.py | 14 |
2 files changed, 12 insertions, 22 deletions
diff --git a/alot/db/utils.py b/alot/db/utils.py index 4f533306..5303320c 100644 --- a/alot/db/utils.py +++ b/alot/db/utils.py @@ -421,30 +421,20 @@ def decode_header(header, normalize=False): This turns it into a single unicode string :param header: the header value - :type header: bytes + :type header: str :param normalize: replace trailing spaces after newlines :type normalize: bool :rtype: str """ - # FIXME: this is just hacked until it works, mostly - - # If the value isn't ascii as RFC2822 prescribes, - # we just return the unicode bytestring as is - value = string_decode(header) # convert to unicode - try: - value = value.encode('ascii') - except UnicodeEncodeError: - return value - # some mailers send out incorrectly escaped headers # and double quote the escaped realname part again. remove those # RFC: 2047 - regex = br'"(=\?.+?\?.+?\?[^ ?]+\?=)"' - value = re.sub(regex, br'\1', value) - logging.debug(b"unquoted header: |%s|", value) + regex = r'"(=\?.+?\?.+?\?[^ ?]+\?=)"' + value = re.sub(regex, r'\1', header) + logging.debug("unquoted header: |%s|", value) # otherwise we interpret RFC2822 encoding escape sequences - valuelist = email.header.decode_header(value.decode('ascii')) + valuelist = email.header.decode_header(value) decoded_list = [] for v, enc in valuelist: v = string_decode(v, enc) diff --git a/tests/db/utils_test.py b/tests/db/utils_test.py index e3d3596a..b1187fe3 100644 --- a/tests/db/utils_test.py +++ b/tests/db/utils_test.py @@ -248,7 +248,7 @@ class TestDecodeHeader(unittest.TestCase): output = b'=?' + encoding.encode('ascii') + b'?Q?' for byte in string: output += b'=' + codecs.encode(bytes([byte]), 'hex').upper() - return output + b'?=' + return (output + b'?=').decode('ascii') @staticmethod def _base64(unicode_string, encoding): @@ -263,7 +263,7 @@ class TestDecodeHeader(unittest.TestCase): """ string = unicode_string.encode(encoding) b64 = base64.encodebytes(string).strip() - return b'=?' + encoding.encode('utf-8') + b'?B?' + b64 + b'?=' + return (b'=?' + encoding.encode('utf-8') + b'?B?' + b64 + b'?=').decode('ascii') def _test(self, teststring, expected): @@ -306,17 +306,17 @@ class TestDecodeHeader(unittest.TestCase): def test_quoted_words_can_be_interrupted(self): part = u'ÄÖÜäöü' - text = self._base64(part, 'utf-8') + b' and ' + \ + text = self._base64(part, 'utf-8') + ' and ' + \ self._quote(part, 'utf-8') expected = u'ÄÖÜäöü and ÄÖÜäöü' self._test(text, expected) def test_different_encodings_can_be_mixed(self): part = u'ÄÖÜäöü' - text = b'utf-8: ' + self._base64(part, 'utf-8') + \ - b' again: ' + self._quote(part, 'utf-8') + \ - b' latin1: ' + self._base64(part, 'iso-8859-1') + \ - b' and ' + self._quote(part, 'iso-8859-1') + text = 'utf-8: ' + self._base64(part, 'utf-8') + \ + ' again: ' + self._quote(part, 'utf-8') + \ + ' latin1: ' + self._base64(part, 'iso-8859-1') + \ + ' and ' + self._quote(part, 'iso-8859-1') expected = u'utf-8: ÄÖÜäöü again: ÄÖÜäöü latin1: ÄÖÜäöü and ÄÖÜäöü' self._test(text, expected) |