diff options
author | Patrick Totzke <patricktotzke@gmail.com> | 2012-03-11 18:49:58 +0000 |
---|---|---|
committer | Patrick Totzke <patricktotzke@gmail.com> | 2012-03-11 18:49:58 +0000 |
commit | c5d5fc97d47921b6a004349f09007727a3a629cd (patch) | |
tree | 2cc0af41f03cb2cdc71e435876c351ec88cc2882 | |
parent | 99b2952adb9caff367a2a9b12dd848be1ec63e34 (diff) |
refactor: header utilities into db.utils
-rw-r--r-- | alot/commands/thread.py | 8 | ||||
-rw-r--r-- | alot/db/envelope.py | 2 | ||||
-rw-r--r-- | alot/db/message.py | 1 | ||||
-rw-r--r-- | alot/db/utils.py | 155 | ||||
-rw-r--r-- | alot/widgets.py | 3 |
5 files changed, 163 insertions, 6 deletions
diff --git a/alot/commands/thread.py b/alot/commands/thread.py index b8cfd4e4..1d6419a5 100644 --- a/alot/commands/thread.py +++ b/alot/commands/thread.py @@ -14,10 +14,10 @@ from alot.commands.globals import ComposeCommand from alot.commands.globals import RefreshCommand from alot import widgets from alot import completion -from alot.db.message import decode_header -from alot.db.message import encode_header -from alot.db.message import extract_headers -from alot.db.message import extract_body +from alot.db.utils import decode_header +from alot.db.utils import encode_header +from alot.db.utils import extract_headers +from alot.db.utils import extract_body from alot.db.envelope import Envelope from alot.db.attachment import Attachment diff --git a/alot/db/envelope.py b/alot/db/envelope.py index 6572b507..6b176016 100644 --- a/alot/db/envelope.py +++ b/alot/db/envelope.py @@ -12,7 +12,7 @@ import alot.helper as helper from alot.settings import settings from attachment import Attachment -from message import encode_header +from utils import encode_header class Envelope(object): diff --git a/alot/db/message.py b/alot/db/message.py index 754d0f18..d0a361c9 100644 --- a/alot/db/message.py +++ b/alot/db/message.py @@ -7,6 +7,7 @@ from notmuch import NullPointerError import alot.helper as helper from alot.settings import settings +from utils import extract_headers, extract_body from attachment import Attachment class Message(object): diff --git a/alot/db/utils.py b/alot/db/utils.py new file mode 100644 index 00000000..0b034da0 --- /dev/null +++ b/alot/db/utils.py @@ -0,0 +1,155 @@ +import os +import email +import tempfile +import re +import shlex +from email.header import Header +import email.charset as charset +charset.add_charset('utf-8', charset.QP, charset.QP, 'utf-8') +from email.iterators import typed_subpart_iterator + +import alot.helper as helper +from alot.settings import settings +from alot.helper import string_sanitize +from alot.helper import string_decode + + + +def extract_headers(mail, headers=None): + headertext = u'' + if headers == None: + headers = mail.keys() + for key in headers: + value = u'' + if key in mail: + value = decode_header(mail.get(key, '')) + headertext += '%s: %s\n' % (key, value) + return headertext + + +def extract_body(mail, types=None): + """ + returns a body text string for given mail. + If types is `None`, 'text/*' is used: + In case mail has a 'text/html' part, it is prefered over + 'text/plain' parts. + + :param mail: the mail to use + :type mail: :class:`email.Message` + :param types: mime content types to use for body string + :type types: list of str + """ + html = list(typed_subpart_iterator(mail, 'text', 'html')) + + # if no specific types are given, we favor text/html over text/plain + drop_plaintext = False + if html and not types: + drop_plaintext = True + + body_parts = [] + for part in mail.walk(): + ctype = part.get_content_type() + + if types is not None: + if ctype not in types: + continue + cd = part.get('Content-Disposition', '') + if cd.startswith('attachment'): + continue + + enc = part.get_content_charset() or 'ascii' + raw_payload = part.get_payload(decode=True) + if part.get_content_maintype() == 'text': + raw_payload = string_decode(raw_payload, enc) + if ctype == 'text/plain' and not drop_plaintext: + body_parts.append(string_sanitize(raw_payload)) + else: + #get mime handler + handler = settings.get_mime_handler(ctype, key='view', + interactive=False) + if handler: + #open tempfile. Not all handlers accept stuff from stdin + tmpfile = tempfile.NamedTemporaryFile(delete=False, + suffix='.html') + #write payload to tmpfile + if part.get_content_maintype() == 'text': + tmpfile.write(raw_payload.encode('utf8')) + else: + tmpfile.write(raw_payload) + tmpfile.close() + #create and call external command + cmd = handler % tmpfile.name + cmdlist = shlex.split(cmd.encode('utf-8', errors='ignore')) + rendered_payload, errmsg, retval = helper.call_cmd(cmdlist) + #remove tempfile + os.unlink(tmpfile.name) + if rendered_payload: # handler had output + body_parts.append(string_sanitize(rendered_payload)) + elif part.get_content_maintype() == 'text': + body_parts.append(string_sanitize(raw_payload)) + # else drop + return '\n\n'.join(body_parts) + + +def decode_header(header, normalize=False): + """ + decode a header value to a unicode string + + values are usually a mixture of different substrings + encoded in quoted printable using diffetrent encodings. + This turns it into a single unicode string + + :param header: the header value + :type header: str + :param normalize: replace trailing spaces after newlines + :type normalize: bool + :rtype: unicode + """ + + # If the value isn't ascii as RFC2822 prescribes, + # we just return the unicode bytestring as is + value = string_decode(header) # convert to unicode + try: + value = value.encode('ascii') + except UnicodeEncodeError: + return value + + # otherwise we interpret RFC2822 encoding escape sequences + valuelist = email.header.decode_header(header) + decoded_list = [] + for v, enc in valuelist: + v = string_decode(v, enc) + decoded_list.append(string_sanitize(v)) + value = u' '.join(decoded_list) + if normalize: + value = re.sub(r'\n\s+', r' ', value) + return value + + +def encode_header(key, value): + """ + encodes a unicode string as a valid header value + + :param key: the header field this value will be stored in + :type key: str + :param value: the value to be encoded + :type value: unicode + """ + # handle list of "realname <email>" entries separately + if key.lower() in ['from', 'to', 'cc', 'bcc']: + rawentries = value.split(',') + encodedentries = [] + for entry in rawentries: + m = re.search('\s*(.*)\s+<(.*\@.*\.\w*)>\s*$', entry) + if m: # If a realname part is contained + name, address = m.groups() + # try to encode as ascii, if that fails, revert to utf-8 + # name must be a unicode string here + namepart = Header(name) + # append address part encoded as ascii + entry = '%s <%s>' % (namepart.encode(), address) + encodedentries.append(entry) + value = Header(', '.join(encodedentries)) + else: + value = Header(value) + return value diff --git a/alot/widgets.py b/alot/widgets.py index c81f685c..fec069af 100644 --- a/alot/widgets.py +++ b/alot/widgets.py @@ -9,6 +9,7 @@ from alot.helper import string_decode import alot.db.message as message from alot.db.attachment import Attachment import time +from alot.db.utils import decode_header class DialogBox(urwid.WidgetWrap): @@ -472,7 +473,7 @@ class MessageWidget(urwid.WidgetWrap): for key in self._displayed_headers: if key in mail: for value in mail.get_all(key): - dvalue = message.decode_header(value, normalize=norm) + dvalue = decode_header(value, normalize=norm) lines.append((key, dvalue)) cols = [HeadersList(lines)] |