summaryrefslogtreecommitdiff
path: root/alot/db/message.py
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2020-03-05 08:05:29 +0100
committerAnton Khirnov <anton@khirnov.net>2020-03-05 09:59:02 +0100
commit46bb3a3efe8614478559a9594880367a8a419e53 (patch)
treee77a9acaee41a830778ca24546f66d2a8f53455f /alot/db/message.py
parent618a235fa30a6a83aebd1f4bda526deb9e9b630a (diff)
db/utils: move message body extraction code into db/message
It is only called from there, so there is no reason to keep it elsewhere.
Diffstat (limited to 'alot/db/message.py')
-rw-r--r--alot/db/message.py135
1 files changed, 134 insertions, 1 deletions
diff --git a/alot/db/message.py b/alot/db/message.py
index 566a848b..6523f18b 100644
--- a/alot/db/message.py
+++ b/alot/db/message.py
@@ -4,17 +4,21 @@
import email
import email.charset as charset
import email.policy
+import mailcap
+import tempfile
from datetime import datetime
from notmuch import NullPointerError
from . import utils
-from .utils import extract_body
from .attachment import Attachment
from .. import crypto
from .. import helper
from ..errors import GPGProblem
+from ..helper import parse_mailcap_nametemplate
+from ..helper import split_commandstring
+from ..helper import string_sanitize
from ..settings.const import settings
charset.add_charset('utf-8', charset.QP, charset.QP, 'utf-8')
@@ -251,6 +255,135 @@ def _decrypted_message_from_bytes(bytestring, session_keys = None):
return enc
+def render_part(part, field_key='copiousoutput'):
+ """
+ renders a non-multipart email part into displayable plaintext by piping its
+ payload through an external script. The handler itself is determined by
+ the mailcap entry for this part's ctype.
+ """
+ ctype = part.get_content_type()
+ raw_payload = remove_cte(part)
+ rendered_payload = None
+ # get mime handler
+ _, entry = settings.mailcap_find_match(ctype, key=field_key)
+ if entry is not None:
+ tempfile_name = None
+ stdin = None
+ handler_raw_commandstring = entry['view']
+ # in case the mailcap defined command contains no '%s',
+ # we pipe the files content to the handling command via stdin
+ if '%s' in handler_raw_commandstring:
+ # open tempfile, respect mailcaps nametemplate
+ nametemplate = entry.get('nametemplate', '%s')
+ prefix, suffix = parse_mailcap_nametemplate(nametemplate)
+ with tempfile.NamedTemporaryFile(
+ delete=False, prefix=prefix, suffix=suffix) \
+ as tmpfile:
+ tmpfile.write(raw_payload)
+ tempfile_name = tmpfile.name
+ else:
+ stdin = raw_payload
+
+ # read parameter, create handler command
+ parms = tuple('='.join(p) for p in part.get_params())
+
+ # create and call external command
+ cmd = mailcap.subst(entry['view'], ctype,
+ filename=tempfile_name, plist=parms)
+ logging.debug('command: %s', cmd)
+ logging.debug('parms: %s', str(parms))
+ cmdlist = split_commandstring(cmd)
+ # call handler
+ stdout, _, _ = helper.call_cmd(cmdlist, stdin=stdin)
+ if stdout:
+ rendered_payload = stdout
+
+ # remove tempfile
+ if tempfile_name:
+ os.unlink(tempfile_name)
+
+ return rendered_payload
+
+def remove_cte(part, as_string=False):
+ """Interpret MIME-part according to it's Content-Transfer-Encodings.
+
+ This returns the payload of `part` as string or bytestring for display, or
+ to be passed to an external program. In the raw file the payload may be
+ encoded, e.g. in base64, quoted-printable, 7bit, or 8bit. This method will
+ look for one of the above Content-Transfer-Encoding header and interpret
+ the payload accordingly.
+
+ Incorrect header values (common in spam messages) will be interpreted as
+ lenient as possible and will result in INFO-level debug messages.
+
+ ..Note:: All this may be depricated in favour of
+ `email.contentmanager.raw_data_manager` (v3.6+)
+
+ :param email.message.EmailMessage part: The part to decode
+ :param bool as_string: If true return a str, otherwise return bytes
+ :returns: The mail with any Content-Transfer-Encoding removed
+ :rtype: Union[str, bytes]
+ """
+ payload = part.get_payload(decode = True)
+ if as_string:
+ enc = part.get_content_charset('ascii')
+ if enc.startswith('windows-'):
+ enc = enc.replace('windows-', 'cp', 1)
+
+ try:
+ payload = payload.decode(enc, errors = 'backslashreplace')
+ except LookupError:
+ # enc is unknown;
+ # fall back to guessing the correct encoding using libmagic
+ payload = helper.try_decode(payload)
+ except UnicodeDecodeError as emsg:
+ # the mail contains chars that are not enc-encoded.
+ # libmagic works better than just ignoring those
+ logging.debug('Decoding failure: {}'.format(emsg))
+ payload = helper.try_decode(payload)
+
+ return payload
+
+MISSING_HTML_MSG = ("This message contains a text/html part that was not "
+ "rendered due to a missing mailcap entry. "
+ "Please refer to item 5 in our FAQ: "
+ "http://alot.rtfd.io/en/latest/faq.html")
+
+def extract_body(mail):
+ """Returns a string view of a Message.
+
+ This consults :ref:`prefer_plaintext <prefer-plaintext>`
+ to determine if a "text/plain" alternative is preferred over a "text/html"
+ part.
+
+ :param mail: the mail to use
+ :type mail: :class:`email.message.EmailMessage`
+ :returns: The combined text of any parts to be used
+ :rtype: str
+ """
+
+ if settings.get('prefer_plaintext'):
+ preferencelist = ('plain', 'html')
+ else:
+ preferencelist = ('html', 'plain')
+
+ body_part = mail.get_body(preferencelist)
+ if body_part is None: # if no part matching preferredlist was found
+ return ""
+
+ displaystring = ""
+
+ if body_part.get_content_type() == 'text/plain':
+ displaystring = string_sanitize(remove_cte(body_part, as_string=True))
+ else:
+ rendered_payload = render_part(body_part)
+ if rendered_payload: # handler had output
+ displaystring = string_sanitize(rendered_payload)
+ else:
+ if body_part.get_content_type() == 'text/html':
+ displaystring = MISSING_HTML_MSG
+ return displaystring
+
class _MessageHeaders:
_msg = None