refactor: header utilities into db.utils

author: Patrick Totzke <patricktotzke@gmail.com> 2012-03-11 18:49:58 +0000
committer: Patrick Totzke <patricktotzke@gmail.com> 2012-03-11 18:49:58 +0000
commit: c5d5fc97d47921b6a004349f09007727a3a629cd (patch)
tree: 2cc0af41f03cb2cdc71e435876c351ec88cc2882
parent: 99b2952adb9caff367a2a9b12dd848be1ec63e34 (diff)
5 files changed, 163 insertions, 6 deletions
diff --git a/alot/commands/thread.py b/alot/commands/thread.py
index b8cfd4e4..1d6419a5 100644
--- a/alot/commands/thread.py
+++ b/alot/commands/thread.py
@@ -14,10 +14,10 @@ from alot.commands.globals import ComposeCommand
 from alot.commands.globals import RefreshCommand
 from alot import widgets
 from alot import completion
-from alot.db.message import decode_header
-from alot.db.message import encode_header
-from alot.db.message import extract_headers
-from alot.db.message import extract_body
+from alot.db.utils import decode_header
+from alot.db.utils import encode_header
+from alot.db.utils import extract_headers
+from alot.db.utils import extract_body
 from alot.db.envelope import Envelope
 from alot.db.attachment import Attachment
 
diff --git a/alot/db/envelope.py b/alot/db/envelope.py
index 6572b507..6b176016 100644
--- a/alot/db/envelope.py
+++ b/alot/db/envelope.py
@@ -12,7 +12,7 @@ import alot.helper as helper
 from alot.settings import settings
 
 from attachment import Attachment
-from message import encode_header
+from utils import encode_header
 
 
 class Envelope(object):
diff --git a/alot/db/message.py b/alot/db/message.py
index 754d0f18..d0a361c9 100644
--- a/alot/db/message.py
+++ b/alot/db/message.py
@@ -7,6 +7,7 @@ from notmuch import NullPointerError
 import alot.helper as helper
 from alot.settings import settings
 
+from utils import extract_headers, extract_body
 from attachment import Attachment
 
 class Message(object):
diff --git a/alot/db/utils.py b/alot/db/utils.py
new file mode 100644
index 00000000..0b034da0
--- /dev/null
+++ b/alot/db/utils.py
@@ -0,0 +1,155 @@
+import os
+import email
+import tempfile
+import re
+import shlex
+from email.header import Header
+import email.charset as charset
+charset.add_charset('utf-8', charset.QP, charset.QP, 'utf-8')
+from email.iterators import typed_subpart_iterator
+
+import alot.helper as helper
+from alot.settings import settings
+from alot.helper import string_sanitize
+from alot.helper import string_decode
+
+
+
+def extract_headers(mail, headers=None):
+    headertext = u''
+    if headers == None:
+        headers = mail.keys()
+    for key in headers:
+        value = u''
+        if key in mail:
+            value = decode_header(mail.get(key, ''))
+        headertext += '%s: %s\n' % (key, value)
+    return headertext
+
+
+def extract_body(mail, types=None):
+    """
+    returns a body text string for given mail.
+    If types is `None`, 'text/*' is used:
+    In case mail has a 'text/html' part, it is prefered over
+    'text/plain' parts.
+
+    :param mail: the mail to use
+    :type mail: :class:`email.Message`
+    :param types: mime content types to use for body string
+    :type types: list of str
+    """
+    html = list(typed_subpart_iterator(mail, 'text', 'html'))
+
+    # if no specific types are given, we favor text/html over text/plain
+    drop_plaintext = False
+    if html and not types:
+        drop_plaintext = True
+
+    body_parts = []
+    for part in mail.walk():
+        ctype = part.get_content_type()
+
+        if types is not None:
+            if ctype not in types:
+                continue
+        cd = part.get('Content-Disposition', '')
+        if cd.startswith('attachment'):
+            continue
+
+        enc = part.get_content_charset() or 'ascii'
+        raw_payload = part.get_payload(decode=True)
+        if part.get_content_maintype() == 'text':
+            raw_payload = string_decode(raw_payload, enc)
+        if ctype == 'text/plain' and not drop_plaintext:
+            body_parts.append(string_sanitize(raw_payload))
+        else:
+            #get mime handler
+            handler = settings.get_mime_handler(ctype, key='view',
+                                                interactive=False)
+            if handler:
+                #open tempfile. Not all handlers accept stuff from stdin
+                tmpfile = tempfile.NamedTemporaryFile(delete=False,
+                                                      suffix='.html')
+                #write payload to tmpfile
+                if part.get_content_maintype() == 'text':
+                    tmpfile.write(raw_payload.encode('utf8'))
+                else:
+                    tmpfile.write(raw_payload)
+                tmpfile.close()
+                #create and call external command
+                cmd = handler % tmpfile.name
+                cmdlist = shlex.split(cmd.encode('utf-8', errors='ignore'))
+                rendered_payload, errmsg, retval = helper.call_cmd(cmdlist)
+                #remove tempfile
+                os.unlink(tmpfile.name)
+                if rendered_payload:  # handler had output
+                    body_parts.append(string_sanitize(rendered_payload))
+                elif part.get_content_maintype() == 'text':
+                    body_parts.append(string_sanitize(raw_payload))
+                # else drop
+    return '\n\n'.join(body_parts)
+
+
+def decode_header(header, normalize=False):
+    """
+    decode a header value to a unicode string
+
+    values are usually a mixture of different substrings
+    encoded in quoted printable using diffetrent encodings.
+    This turns it into a single unicode string
+
+    :param header: the header value
+    :type header: str
+    :param normalize: replace trailing spaces after newlines
+    :type normalize: bool
+    :rtype: unicode
+    """
+
+    # If the value isn't ascii as RFC2822 prescribes,
+    # we just return the unicode bytestring as is
+    value = string_decode(header)  # convert to unicode
+    try:
+        value = value.encode('ascii')
+    except UnicodeEncodeError:
+        return value
+
+    # otherwise we interpret RFC2822 encoding escape sequences
+    valuelist = email.header.decode_header(header)
+    decoded_list = []
+    for v, enc in valuelist:
+        v = string_decode(v, enc)
+        decoded_list.append(string_sanitize(v))
+    value = u' '.join(decoded_list)
+    if normalize:
+        value = re.sub(r'\n\s+', r' ', value)
+    return value
+
+
+def encode_header(key, value):
+    """
+    encodes a unicode string as a valid header value
+
+    :param key: the header field this value will be stored in
+    :type key: str
+    :param value: the value to be encoded
+    :type value: unicode
+    """
+    # handle list of "realname <email>" entries separately
+    if key.lower() in ['from', 'to', 'cc', 'bcc']:
+        rawentries = value.split(',')
+        encodedentries = []
+        for entry in rawentries:
+            m = re.search('\s*(.*)\s+<(.*\@.*\.\w*)>\s*$', entry)
+            if m:  # If a realname part is contained
+                name, address = m.groups()
+                # try to encode as ascii, if that fails, revert to utf-8
+                # name must be a unicode string here
+                namepart = Header(name)
+                # append address part encoded as ascii
+                entry = '%s <%s>' % (namepart.encode(), address)
+            encodedentries.append(entry)
+        value = Header(', '.join(encodedentries))
+    else:
+        value = Header(value)
+    return value
diff --git a/alot/widgets.py b/alot/widgets.py
index c81f685c..fec069af 100644
--- a/alot/widgets.py
+++ b/alot/widgets.py
@@ -9,6 +9,7 @@ from alot.helper import string_decode
 import alot.db.message as message
 from alot.db.attachment import Attachment
 import time
+from alot.db.utils import decode_header
 
 
 class DialogBox(urwid.WidgetWrap):
@@ -472,7 +473,7 @@ class MessageWidget(urwid.WidgetWrap):
         for key in self._displayed_headers:
             if key in mail:
                 for value in mail.get_all(key):
-                    dvalue = message.decode_header(value, normalize=norm)
+                    dvalue = decode_header(value, normalize=norm)
                     lines.append((key, dvalue))
 
         cols = [HeadersList(lines)]
author	Patrick Totzke <patricktotzke@gmail.com>	2012-03-11 18:49:58 +0000
committer	Patrick Totzke <patricktotzke@gmail.com>	2012-03-11 18:49:58 +0000
commit	c5d5fc97d47921b6a004349f09007727a3a629cd (patch)
tree	2cc0af41f03cb2cdc71e435876c351ec88cc2882
parent	99b2952adb9caff367a2a9b12dd848be1ec63e34 (diff)