diff options
Diffstat (limited to 'alot/db/utils.py')
-rw-r--r-- | alot/db/utils.py | 78 |
1 files changed, 49 insertions, 29 deletions
diff --git a/alot/db/utils.py b/alot/db/utils.py index 14e6ebc9..20665a03 100644 --- a/alot/db/utils.py +++ b/alot/db/utils.py @@ -15,7 +15,7 @@ import tempfile import re import logging import mailcap -from io import BytesIO +import io from .. import crypto from .. import helper @@ -44,12 +44,11 @@ def add_signature_headers(mail, sigs, error_msg): :param error_msg: `str` containing an error message, the empty string indicating no error ''' - sig_from = u'' + sig_from = '' sig_known = True uid_trusted = False - if isinstance(error_msg, str): - error_msg = error_msg.decode('utf-8') + assert isinstance(error_msg, (str, bool)) if not sigs: error_msg = error_msg or u'no signature found' @@ -58,22 +57,22 @@ def add_signature_headers(mail, sigs, error_msg): key = crypto.get_key(sigs[0].fpr) for uid in key.uids: if crypto.check_uid_validity(key, uid.email): - sig_from = uid.uid.decode('utf-8') + sig_from = uid.uid uid_trusted = True break else: # No trusted uid found, since we did not break from the loop. - sig_from = key.uids[0].uid.decode('utf-8') + sig_from = key.uids[0].uid except GPGProblem: - sig_from = sigs[0].fpr.decode('utf-8') + sig_from = sigs[0].fpr sig_known = False if error_msg: - msg = u'Invalid: {}'.format(error_msg) + msg = 'Invalid: {}'.format(error_msg) elif uid_trusted: - msg = u'Valid: {}'.format(sig_from) + msg = 'Valid: {}'.format(sig_from) else: - msg = u'Untrusted: {}'.format(sig_from) + msg = 'Untrusted: {}'.format(sig_from) mail.add_header(X_SIGNATURE_VALID_HEADER, 'False' if (error_msg or not sig_known) else 'True') @@ -133,10 +132,11 @@ def _handle_signatures(original, message, params): if not malformed: try: sigs = crypto.verify_detached( - helper.email_as_string(message.get_payload(0)), - message.get_payload(1).get_payload()) + helper.email_as_bytes(message.get_payload(0)), + message.get_payload(1).get_payload().encode('ascii')) + # XXX: I think ascii is the right thing to use for the pgp signature except GPGProblem as e: - malformed = unicode(e) + malformed = str(e) add_signature_headers(original, sigs, malformed) @@ -170,15 +170,17 @@ def _handle_encrypted(original, message): malformed = u'expected Content-Type: {0}, got: {1}'.format(want, ct) if not malformed: + # This should be safe because PGP uses US-ASCII characters only + payload = message.get_payload(1).get_payload().encode('ascii') try: - sigs, d = crypto.decrypt_verify(message.get_payload(1).get_payload()) + sigs, d = crypto.decrypt_verify(payload) except GPGProblem as e: # signature verification failures end up here too if the combined # method is used, currently this prevents the interpretation of the # recovered plain text mail. maybe that's a feature. - malformed = unicode(e) + malformed = str(e) else: - n = message_from_string(d) + n = message_from_bytes(d) # add the decrypted message to message. note that n contains all # the attachments, no need to walk over n here. @@ -208,7 +210,7 @@ def _handle_encrypted(original, message): if malformed: msg = u'Malformed OpenPGP message: {0}'.format(malformed) - content = email.message_from_string(msg.encode('utf-8')) + content = email.message_from_string(msg) content.set_charset('utf-8') original.attach(content) @@ -265,14 +267,21 @@ def message_from_file(handle): def message_from_string(s): '''Reads a mail from the given string. This is the equivalent of :func:`email.message_from_string` which does nothing but to wrap - the given string in a BytesIO object and to call + the given string in a StringIO object and to call :func:`email.message_from_file`. Please refer to the documentation of :func:`message_from_file` for details. ''' - return message_from_file(BytesIO(s)) + return message_from_file(io.StringIO(s)) + + +def message_from_bytes(bytestring): + """Read mail from given bytes string. Works like message_from_string, but + for bytes. + """ + return message_from_file(io.StringIO(helper.try_decode(bytestring))) def extract_headers(mail, headers=None): @@ -344,8 +353,17 @@ def extract_body(mail, types=None, field_key='copiousoutput'): enc = part.get_content_charset() or 'ascii' raw_payload = part.get_payload(decode=True) + try: + raw_payload = raw_payload.decode(enc) + except UnicodeDecodeError: + # If the message is not formatted ascii then get_payload with + # decode=True will convert to raw-unicode-escape. if the encoding + # that the message specifies doesn't work try this. It might be + # better to handle the base64 and quoted-printable oursevles + # instead of having to clean up like this. + raw_payload = raw_payload.decode('raw-unicode-escape') + if ctype == 'text/plain': - raw_payload = string_decode(raw_payload, enc) body_parts.append(string_sanitize(raw_payload)) else: # get mime handler @@ -363,9 +381,10 @@ def extract_body(mail, types=None, field_key='copiousoutput'): nametemplate = entry.get('nametemplate', '%s') prefix, suffix = parse_mailcap_nametemplate(nametemplate) with tempfile.NamedTemporaryFile( - delete=False, prefix=prefix, suffix=suffix) \ + delete=False, prefix=prefix, + suffix=suffix) \ as tmpfile: - tmpfile.write(raw_payload) + tmpfile.write(raw_payload.encode(enc)) tempfile_name = tmpfile.name else: stdin = raw_payload @@ -400,11 +419,12 @@ def decode_header(header, normalize=False): This turns it into a single unicode string :param header: the header value - :type header: str + :type header: bytes :param normalize: replace trailing spaces after newlines :type normalize: bool - :rtype: unicode + :rtype: str """ + # FIXME: this is just hacked until it works, mostly # If the value isn't ascii as RFC2822 prescribes, # we just return the unicode bytestring as is @@ -417,17 +437,17 @@ def decode_header(header, normalize=False): # some mailers send out incorrectly escaped headers # and double quote the escaped realname part again. remove those # RFC: 2047 - regex = r'"(=\?.+?\?.+?\?[^ ?]+\?=)"' - value = re.sub(regex, r'\1', value) - logging.debug("unquoted header: |%s|", value) + regex = br'"(=\?.+?\?.+?\?[^ ?]+\?=)"' + value = re.sub(regex, br'\1', value) + logging.debug(b"unquoted header: |%s|", value) # otherwise we interpret RFC2822 encoding escape sequences - valuelist = email.header.decode_header(value) + valuelist = email.header.decode_header(value.decode('ascii')) decoded_list = [] for v, enc in valuelist: v = string_decode(v, enc) decoded_list.append(string_sanitize(v)) - value = u' '.join(decoded_list) + value = ''.join(decoded_list) if normalize: value = re.sub(r'\n\s+', r' ', value) return value |