diff options
author | Anton Khirnov <anton@khirnov.net> | 2020-03-05 08:05:29 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2020-03-05 09:59:02 +0100 |
commit | 46bb3a3efe8614478559a9594880367a8a419e53 (patch) | |
tree | e77a9acaee41a830778ca24546f66d2a8f53455f /alot/db | |
parent | 618a235fa30a6a83aebd1f4bda526deb9e9b630a (diff) |
db/utils: move message body extraction code into db/message
It is only called from there, so there is no reason to keep it
elsewhere.
Diffstat (limited to 'alot/db')
-rw-r--r-- | alot/db/message.py | 135 | ||||
-rw-r--r-- | alot/db/utils.py | 138 |
2 files changed, 134 insertions, 139 deletions
diff --git a/alot/db/message.py b/alot/db/message.py index 566a848b..6523f18b 100644 --- a/alot/db/message.py +++ b/alot/db/message.py @@ -4,17 +4,21 @@ import email import email.charset as charset import email.policy +import mailcap +import tempfile from datetime import datetime from notmuch import NullPointerError from . import utils -from .utils import extract_body from .attachment import Attachment from .. import crypto from .. import helper from ..errors import GPGProblem +from ..helper import parse_mailcap_nametemplate +from ..helper import split_commandstring +from ..helper import string_sanitize from ..settings.const import settings charset.add_charset('utf-8', charset.QP, charset.QP, 'utf-8') @@ -251,6 +255,135 @@ def _decrypted_message_from_bytes(bytestring, session_keys = None): return enc +def render_part(part, field_key='copiousoutput'): + """ + renders a non-multipart email part into displayable plaintext by piping its + payload through an external script. The handler itself is determined by + the mailcap entry for this part's ctype. + """ + ctype = part.get_content_type() + raw_payload = remove_cte(part) + rendered_payload = None + # get mime handler + _, entry = settings.mailcap_find_match(ctype, key=field_key) + if entry is not None: + tempfile_name = None + stdin = None + handler_raw_commandstring = entry['view'] + # in case the mailcap defined command contains no '%s', + # we pipe the files content to the handling command via stdin + if '%s' in handler_raw_commandstring: + # open tempfile, respect mailcaps nametemplate + nametemplate = entry.get('nametemplate', '%s') + prefix, suffix = parse_mailcap_nametemplate(nametemplate) + with tempfile.NamedTemporaryFile( + delete=False, prefix=prefix, suffix=suffix) \ + as tmpfile: + tmpfile.write(raw_payload) + tempfile_name = tmpfile.name + else: + stdin = raw_payload + + # read parameter, create handler command + parms = tuple('='.join(p) for p in part.get_params()) + + # create and call external command + cmd = mailcap.subst(entry['view'], ctype, + filename=tempfile_name, plist=parms) + logging.debug('command: %s', cmd) + logging.debug('parms: %s', str(parms)) + cmdlist = split_commandstring(cmd) + # call handler + stdout, _, _ = helper.call_cmd(cmdlist, stdin=stdin) + if stdout: + rendered_payload = stdout + + # remove tempfile + if tempfile_name: + os.unlink(tempfile_name) + + return rendered_payload + +def remove_cte(part, as_string=False): + """Interpret MIME-part according to it's Content-Transfer-Encodings. + + This returns the payload of `part` as string or bytestring for display, or + to be passed to an external program. In the raw file the payload may be + encoded, e.g. in base64, quoted-printable, 7bit, or 8bit. This method will + look for one of the above Content-Transfer-Encoding header and interpret + the payload accordingly. + + Incorrect header values (common in spam messages) will be interpreted as + lenient as possible and will result in INFO-level debug messages. + + ..Note:: All this may be depricated in favour of + `email.contentmanager.raw_data_manager` (v3.6+) + + :param email.message.EmailMessage part: The part to decode + :param bool as_string: If true return a str, otherwise return bytes + :returns: The mail with any Content-Transfer-Encoding removed + :rtype: Union[str, bytes] + """ + payload = part.get_payload(decode = True) + if as_string: + enc = part.get_content_charset('ascii') + if enc.startswith('windows-'): + enc = enc.replace('windows-', 'cp', 1) + + try: + payload = payload.decode(enc, errors = 'backslashreplace') + except LookupError: + # enc is unknown; + # fall back to guessing the correct encoding using libmagic + payload = helper.try_decode(payload) + except UnicodeDecodeError as emsg: + # the mail contains chars that are not enc-encoded. + # libmagic works better than just ignoring those + logging.debug('Decoding failure: {}'.format(emsg)) + payload = helper.try_decode(payload) + + return payload + +MISSING_HTML_MSG = ("This message contains a text/html part that was not " + "rendered due to a missing mailcap entry. " + "Please refer to item 5 in our FAQ: " + "http://alot.rtfd.io/en/latest/faq.html") + +def extract_body(mail): + """Returns a string view of a Message. + + This consults :ref:`prefer_plaintext <prefer-plaintext>` + to determine if a "text/plain" alternative is preferred over a "text/html" + part. + + :param mail: the mail to use + :type mail: :class:`email.message.EmailMessage` + :returns: The combined text of any parts to be used + :rtype: str + """ + + if settings.get('prefer_plaintext'): + preferencelist = ('plain', 'html') + else: + preferencelist = ('html', 'plain') + + body_part = mail.get_body(preferencelist) + if body_part is None: # if no part matching preferredlist was found + return "" + + displaystring = "" + + if body_part.get_content_type() == 'text/plain': + displaystring = string_sanitize(remove_cte(body_part, as_string=True)) + else: + rendered_payload = render_part(body_part) + if rendered_payload: # handler had output + displaystring = string_sanitize(rendered_payload) + else: + if body_part.get_content_type() == 'text/html': + displaystring = MISSING_HTML_MSG + return displaystring + class _MessageHeaders: _msg = None diff --git a/alot/db/utils.py b/alot/db/utils.py index 020f51d0..0a7d1f59 100644 --- a/alot/db/utils.py +++ b/alot/db/utils.py @@ -8,150 +8,12 @@ import email import email.charset as charset import email.policy import email.utils -import tempfile import logging -import mailcap -from .. import helper from ..settings.const import settings -from ..helper import string_sanitize -from ..helper import parse_mailcap_nametemplate -from ..helper import split_commandstring charset.add_charset('utf-8', charset.QP, charset.QP, 'utf-8') -def render_part(part, field_key='copiousoutput'): - """ - renders a non-multipart email part into displayable plaintext by piping its - payload through an external script. The handler itself is determined by - the mailcap entry for this part's ctype. - """ - ctype = part.get_content_type() - raw_payload = remove_cte(part) - rendered_payload = None - # get mime handler - _, entry = settings.mailcap_find_match(ctype, key=field_key) - if entry is not None: - tempfile_name = None - stdin = None - handler_raw_commandstring = entry['view'] - # in case the mailcap defined command contains no '%s', - # we pipe the files content to the handling command via stdin - if '%s' in handler_raw_commandstring: - # open tempfile, respect mailcaps nametemplate - nametemplate = entry.get('nametemplate', '%s') - prefix, suffix = parse_mailcap_nametemplate(nametemplate) - with tempfile.NamedTemporaryFile( - delete=False, prefix=prefix, suffix=suffix) \ - as tmpfile: - tmpfile.write(raw_payload) - tempfile_name = tmpfile.name - else: - stdin = raw_payload - - # read parameter, create handler command - parms = tuple('='.join(p) for p in part.get_params()) - - # create and call external command - cmd = mailcap.subst(entry['view'], ctype, - filename=tempfile_name, plist=parms) - logging.debug('command: %s', cmd) - logging.debug('parms: %s', str(parms)) - cmdlist = split_commandstring(cmd) - # call handler - stdout, _, _ = helper.call_cmd(cmdlist, stdin=stdin) - if stdout: - rendered_payload = stdout - - # remove tempfile - if tempfile_name: - os.unlink(tempfile_name) - - return rendered_payload - - -def remove_cte(part, as_string=False): - """Interpret MIME-part according to it's Content-Transfer-Encodings. - - This returns the payload of `part` as string or bytestring for display, or - to be passed to an external program. In the raw file the payload may be - encoded, e.g. in base64, quoted-printable, 7bit, or 8bit. This method will - look for one of the above Content-Transfer-Encoding header and interpret - the payload accordingly. - - Incorrect header values (common in spam messages) will be interpreted as - lenient as possible and will result in INFO-level debug messages. - - ..Note:: All this may be depricated in favour of - `email.contentmanager.raw_data_manager` (v3.6+) - - :param email.message.EmailMessage part: The part to decode - :param bool as_string: If true return a str, otherwise return bytes - :returns: The mail with any Content-Transfer-Encoding removed - :rtype: Union[str, bytes] - """ - payload = part.get_payload(decode = True) - if as_string: - enc = part.get_content_charset('ascii') - if enc.startswith('windows-'): - enc = enc.replace('windows-', 'cp', 1) - - try: - payload = payload.decode(enc, errors = 'backslashreplace') - except LookupError: - # enc is unknown; - # fall back to guessing the correct encoding using libmagic - payload = helper.try_decode(payload) - except UnicodeDecodeError as emsg: - # the mail contains chars that are not enc-encoded. - # libmagic works better than just ignoring those - logging.debug('Decoding failure: {}'.format(emsg)) - payload = helper.try_decode(payload) - - return payload - -MISSING_HTML_MSG = ("This message contains a text/html part that was not " - "rendered due to a missing mailcap entry. " - "Please refer to item 5 in our FAQ: " - "http://alot.rtfd.io/en/latest/faq.html") - - -def extract_body(mail): - """Returns a string view of a Message. - - This consults :ref:`prefer_plaintext <prefer-plaintext>` - to determine if a "text/plain" alternative is preferred over a "text/html" - part. - - :param mail: the mail to use - :type mail: :class:`email.message.EmailMessage` - :returns: The combined text of any parts to be used - :rtype: str - """ - - if settings.get('prefer_plaintext'): - preferencelist = ('plain', 'html') - else: - preferencelist = ('html', 'plain') - - body_part = mail.get_body(preferencelist) - if body_part is None: # if no part matching preferredlist was found - return "" - - displaystring = "" - - if body_part.get_content_type() == 'text/plain': - displaystring = string_sanitize(remove_cte(body_part, as_string=True)) - else: - rendered_payload = render_part(body_part) - if rendered_payload: # handler had output - displaystring = string_sanitize(rendered_payload) - else: - if body_part.get_content_type() == 'text/html': - displaystring = MISSING_HTML_MSG - return displaystring - - def formataddr(pair): """ this is the inverse of email.utils.parseaddr: other than email.utils.formataddr, this |