diff options
author | Anton Khirnov <anton@khirnov.net> | 2020-03-05 08:05:29 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2020-03-05 09:59:02 +0100 |
commit | 46bb3a3efe8614478559a9594880367a8a419e53 (patch) | |
tree | e77a9acaee41a830778ca24546f66d2a8f53455f /alot/db/message.py | |
parent | 618a235fa30a6a83aebd1f4bda526deb9e9b630a (diff) |
db/utils: move message body extraction code into db/message
It is only called from there, so there is no reason to keep it
elsewhere.
Diffstat (limited to 'alot/db/message.py')
-rw-r--r-- | alot/db/message.py | 135 |
1 files changed, 134 insertions, 1 deletions
diff --git a/alot/db/message.py b/alot/db/message.py index 566a848b..6523f18b 100644 --- a/alot/db/message.py +++ b/alot/db/message.py @@ -4,17 +4,21 @@ import email import email.charset as charset import email.policy +import mailcap +import tempfile from datetime import datetime from notmuch import NullPointerError from . import utils -from .utils import extract_body from .attachment import Attachment from .. import crypto from .. import helper from ..errors import GPGProblem +from ..helper import parse_mailcap_nametemplate +from ..helper import split_commandstring +from ..helper import string_sanitize from ..settings.const import settings charset.add_charset('utf-8', charset.QP, charset.QP, 'utf-8') @@ -251,6 +255,135 @@ def _decrypted_message_from_bytes(bytestring, session_keys = None): return enc +def render_part(part, field_key='copiousoutput'): + """ + renders a non-multipart email part into displayable plaintext by piping its + payload through an external script. The handler itself is determined by + the mailcap entry for this part's ctype. + """ + ctype = part.get_content_type() + raw_payload = remove_cte(part) + rendered_payload = None + # get mime handler + _, entry = settings.mailcap_find_match(ctype, key=field_key) + if entry is not None: + tempfile_name = None + stdin = None + handler_raw_commandstring = entry['view'] + # in case the mailcap defined command contains no '%s', + # we pipe the files content to the handling command via stdin + if '%s' in handler_raw_commandstring: + # open tempfile, respect mailcaps nametemplate + nametemplate = entry.get('nametemplate', '%s') + prefix, suffix = parse_mailcap_nametemplate(nametemplate) + with tempfile.NamedTemporaryFile( + delete=False, prefix=prefix, suffix=suffix) \ + as tmpfile: + tmpfile.write(raw_payload) + tempfile_name = tmpfile.name + else: + stdin = raw_payload + + # read parameter, create handler command + parms = tuple('='.join(p) for p in part.get_params()) + + # create and call external command + cmd = mailcap.subst(entry['view'], ctype, + filename=tempfile_name, plist=parms) + logging.debug('command: %s', cmd) + logging.debug('parms: %s', str(parms)) + cmdlist = split_commandstring(cmd) + # call handler + stdout, _, _ = helper.call_cmd(cmdlist, stdin=stdin) + if stdout: + rendered_payload = stdout + + # remove tempfile + if tempfile_name: + os.unlink(tempfile_name) + + return rendered_payload + +def remove_cte(part, as_string=False): + """Interpret MIME-part according to it's Content-Transfer-Encodings. + + This returns the payload of `part` as string or bytestring for display, or + to be passed to an external program. In the raw file the payload may be + encoded, e.g. in base64, quoted-printable, 7bit, or 8bit. This method will + look for one of the above Content-Transfer-Encoding header and interpret + the payload accordingly. + + Incorrect header values (common in spam messages) will be interpreted as + lenient as possible and will result in INFO-level debug messages. + + ..Note:: All this may be depricated in favour of + `email.contentmanager.raw_data_manager` (v3.6+) + + :param email.message.EmailMessage part: The part to decode + :param bool as_string: If true return a str, otherwise return bytes + :returns: The mail with any Content-Transfer-Encoding removed + :rtype: Union[str, bytes] + """ + payload = part.get_payload(decode = True) + if as_string: + enc = part.get_content_charset('ascii') + if enc.startswith('windows-'): + enc = enc.replace('windows-', 'cp', 1) + + try: + payload = payload.decode(enc, errors = 'backslashreplace') + except LookupError: + # enc is unknown; + # fall back to guessing the correct encoding using libmagic + payload = helper.try_decode(payload) + except UnicodeDecodeError as emsg: + # the mail contains chars that are not enc-encoded. + # libmagic works better than just ignoring those + logging.debug('Decoding failure: {}'.format(emsg)) + payload = helper.try_decode(payload) + + return payload + +MISSING_HTML_MSG = ("This message contains a text/html part that was not " + "rendered due to a missing mailcap entry. " + "Please refer to item 5 in our FAQ: " + "http://alot.rtfd.io/en/latest/faq.html") + +def extract_body(mail): + """Returns a string view of a Message. + + This consults :ref:`prefer_plaintext <prefer-plaintext>` + to determine if a "text/plain" alternative is preferred over a "text/html" + part. + + :param mail: the mail to use + :type mail: :class:`email.message.EmailMessage` + :returns: The combined text of any parts to be used + :rtype: str + """ + + if settings.get('prefer_plaintext'): + preferencelist = ('plain', 'html') + else: + preferencelist = ('html', 'plain') + + body_part = mail.get_body(preferencelist) + if body_part is None: # if no part matching preferredlist was found + return "" + + displaystring = "" + + if body_part.get_content_type() == 'text/plain': + displaystring = string_sanitize(remove_cte(body_part, as_string=True)) + else: + rendered_payload = render_part(body_part) + if rendered_payload: # handler had output + displaystring = string_sanitize(rendered_payload) + else: + if body_part.get_content_type() == 'text/html': + displaystring = MISSING_HTML_MSG + return displaystring + class _MessageHeaders: _msg = None |