diff options
Diffstat (limited to 'alot/db/message.py')
-rw-r--r-- | alot/db/message.py | 664 |
1 files changed, 664 insertions, 0 deletions
diff --git a/alot/db/message.py b/alot/db/message.py new file mode 100644 index 00000000..aa9b8a1e --- /dev/null +++ b/alot/db/message.py @@ -0,0 +1,664 @@ +import os +import email +import tempfile +import re +import shlex +from datetime import datetime +from email.header import Header +import email.charset as charset +charset.add_charset('utf-8', charset.QP, charset.QP, 'utf-8') +from email.iterators import typed_subpart_iterator +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from notmuch import NullPointerError + +from alot import __version__ +import logging +import alot.helper as helper +from alot.settings import settings +from alot.helper import string_sanitize +from alot.helper import string_decode + + +class Message(object): + """ + a persistent notmuch message object. + It it uses a :class:`~alot.db.DBManager` for cached manipulation + and lazy lookups. + """ + def __init__(self, dbman, msg, thread=None): + """ + :param dbman: db manager that is used for further lookups + :type dbman: alot.db.DBManager + :param msg: the wrapped message + :type msg: notmuch.database.Message + :param thread: this messages thread (will be looked up later if `None`) + :type thread: :class:`~alot.db.Thread` or `None` + """ + self._dbman = dbman + self._id = msg.get_message_id() + self._thread_id = msg.get_thread_id() + self._thread = thread + casts_date = lambda: datetime.fromtimestamp(msg.get_date()) + self._datetime = helper.safely_get(casts_date, + ValueError, None) + self._filename = msg.get_filename() + self._from = helper.safely_get(lambda: msg.get_header('From'), + NullPointerError) + self._email = None # will be read upon first use + self._attachments = None # will be read upon first use + self._tags = set(msg.get_tags()) + + def __str__(self): + """prettyprint the message""" + aname, aaddress = self.get_author() + if not aname: + aname = aaddress + return "%s (%s)" % (aname, self.get_datestring()) + + def __hash__(self): + """needed for sets of Messages""" + return hash(self._id) + + def __cmp__(self, other): + """needed for Message comparison""" + res = cmp(self.get_message_id(), other.get_message_id()) + return res + + def get_email(self): + """returns :class:`email.Message` for this message""" + path = self.get_filename() + warning = "Subject: Caution!\n"\ + "Message file is no longer accessible:\n%s" % path + if not self._email: + try: + f_mail = open(path) + self._email = email.message_from_file(f_mail) + f_mail.close() + except IOError: + self._email = email.message_from_string(warning) + return self._email + + def get_date(self): + """returns Date header value as :class:`~datetime.datetime`""" + return self._datetime + + def get_filename(self): + """returns absolute path of message files location""" + return self._filename + + def get_message_id(self): + """returns messages id (str)""" + return self._id + + def get_thread_id(self): + """returns id (str) of the thread this message belongs to""" + return self._thread_id + + def get_message_parts(self): + """returns a list of all body parts of this message""" + # TODO really needed? email iterators can do this + out = [] + for msg in self.get_email().walk(): + if not msg.is_multipart(): + out.append(msg) + return out + + def get_tags(self): + """returns tags attached to this message as list of strings""" + l = list(self._tags) + l.sort() + return l + + def get_thread(self): + """returns the :class:`~alot.db.Thread` this msg belongs to""" + if not self._thread: + self._thread = self._dbman.get_thread(self._thread_id) + return self._thread + + def has_replies(self): + """returns true if this message has at least one reply""" + return (len(self.get_replies()) > 0) + + def get_replies(self): + """returns replies to this message as list of :class:`Message`""" + t = self.get_thread() + return t.get_replies_to(self) + + def get_datestring(self): + """ + returns reformated datestring for this messages. + + It uses the format spacified by `timestamp_format` in + the general section of the config. + """ + if self._datetime == None: + return None + formatstring = settings.get('timestamp_format') + if formatstring == None: + res = helper.pretty_datetime(self._datetime) + else: + res = self._datetime.strftime(formatstring) + return res + + def get_author(self): + """ + returns realname and address of this messages author + + :rtype: (str,str) + """ + return email.Utils.parseaddr(self._from) + + def get_headers_string(self, headers): + """ + returns subset of this messages headers as human-readable format: + all header values are decoded, the resulting string has + one line "KEY: VALUE" for each requested header present in the mail. + + :param headers: headers to extract + :type headers: list of str + """ + return extract_headers(self.get_mail(), headers) + + def add_tags(self, tags, afterwards=None, remove_rest=False): + """ + adds tags to message + + .. note:: + + This only adds the requested operation to this objects + :class:`DBManager's <alot.db.DBManager>` write queue. + You need to call :meth:`~alot.db.DBManager.flush` to write out. + + :param tags: a list of tags to be added + :type tags: list of str + :param afterwards: callback that gets called after successful + application of this tagging operation + :type afterwards: callable + :param remove_rest: remove all other tags + :type remove_rest: bool + """ + def myafterwards(): + if remove_rest: + self._tags = set(tags) + else: + self._tags = self._tags.union(tags) + if callable(afterwards): + afterwards() + + self._dbman.tag('id:' + self._id, tags, afterwards=myafterwards, + remove_rest=remove_rest) + self._tags = self._tags.union(tags) + + def remove_tags(self, tags, afterwards=None): + """remove tags from message + + .. note:: + + This only adds the requested operation to this objects + :class:`DBManager's <alot.db.DBManager>` write queue. + You need to call :meth:`~alot.db.DBManager.flush` to actually out. + + :param tags: a list of tags to be added + :type tags: list of str + :param afterwards: callback that gets called after successful + application of this tagging operation + :type afterwards: callable + """ + def myafterwards(): + self._tags = self._tags.difference(tags) + if callable(afterwards): + afterwards() + + self._dbman.untag('id:' + self._id, tags, myafterwards) + + def get_attachments(self): + """ + returns messages attachments + + Derived from the leaves of the email mime tree + that and are not part of :rfc:`2015` syntax for encrypted/signed mails + and either have :mailheader:`Content-Disposition` `attachment` + or have :mailheader:`Content-Disposition` `inline` but specify + a filename (as parameter to `Content-Disposition`). + + :rtype: list of :class:`Attachment` + """ + if not self._attachments: + self._attachments = [] + for part in self.get_message_parts(): + cd = part.get('Content-Disposition', '') + filename = part.get_filename() + ct = part.get_content_type() + # replace underspecified mime description by a better guess + if ct in ['octet/stream', 'application/octet-stream']: + content = part.get_payload(decode=True) + ct = helper.guess_mimetype(content) + + if cd.startswith('attachment'): + if ct not in ['application/pgp-encrypted', + 'application/pgp-signature']: + self._attachments.append(Attachment(part)) + elif cd.startswith('inline'): + if filename != None and ct != 'application/pgp': + self._attachments.append(Attachment(part)) + return self._attachments + + def accumulate_body(self): + """ + returns bodystring extracted from this mail + """ + #TODO: don't hardcode which part is considered body but allow toggle + # commands and a config default setting + + return extract_body(self.get_email()) + + def get_text_content(self): + return extract_body(self.get_email(), types=['text/plain']) + + def matches(self, querystring): + """tests if this messages is in the resultset for `querystring`""" + searchfor = querystring + ' AND id:' + self._id + return self._dbman.count_messages(searchfor) > 0 + + +def extract_headers(mail, headers=None): + headertext = u'' + if headers == None: + headers = mail.keys() + for key in headers: + value = u'' + if key in mail: + value = decode_header(mail.get(key, '')) + headertext += '%s: %s\n' % (key, value) + return headertext + + +def extract_body(mail, types=None): + """ + returns a body text string for given mail. + If types is `None`, 'text/*' is used: + In case mail has a 'text/html' part, it is prefered over + 'text/plain' parts. + + :param mail: the mail to use + :type mail: :class:`email.Message` + :param types: mime content types to use for body string + :type types: list of str + """ + html = list(typed_subpart_iterator(mail, 'text', 'html')) + + # if no specific types are given, we favor text/html over text/plain + drop_plaintext = False + if html and not types: + drop_plaintext = True + + body_parts = [] + for part in mail.walk(): + ctype = part.get_content_type() + + if types is not None: + if ctype not in types: + continue + cd = part.get('Content-Disposition', '') + if cd.startswith('attachment'): + continue + + enc = part.get_content_charset() or 'ascii' + raw_payload = part.get_payload(decode=True) + if part.get_content_maintype() == 'text': + raw_payload = string_decode(raw_payload, enc) + if ctype == 'text/plain' and not drop_plaintext: + body_parts.append(string_sanitize(raw_payload)) + else: + #get mime handler + handler = settings.get_mime_handler(ctype, key='view', + interactive=False) + if handler: + #open tempfile. Not all handlers accept stuff from stdin + tmpfile = tempfile.NamedTemporaryFile(delete=False, + suffix='.html') + #write payload to tmpfile + if part.get_content_maintype() == 'text': + tmpfile.write(raw_payload.encode('utf8')) + else: + tmpfile.write(raw_payload) + tmpfile.close() + #create and call external command + cmd = handler % tmpfile.name + cmdlist = shlex.split(cmd.encode('utf-8', errors='ignore')) + rendered_payload, errmsg, retval = helper.call_cmd(cmdlist) + #remove tempfile + os.unlink(tmpfile.name) + if rendered_payload: # handler had output + body_parts.append(string_sanitize(rendered_payload)) + elif part.get_content_maintype() == 'text': + body_parts.append(string_sanitize(raw_payload)) + # else drop + return '\n\n'.join(body_parts) + + +def decode_header(header, normalize=False): + """ + decode a header value to a unicode string + + values are usually a mixture of different substrings + encoded in quoted printable using diffetrent encodings. + This turns it into a single unicode string + + :param header: the header value + :type header: str + :param normalize: replace trailing spaces after newlines + :type normalize: bool + :rtype: unicode + """ + + # If the value isn't ascii as RFC2822 prescribes, + # we just return the unicode bytestring as is + value = string_decode(header) # convert to unicode + try: + value = value.encode('ascii') + except UnicodeEncodeError: + return value + + # otherwise we interpret RFC2822 encoding escape sequences + valuelist = email.header.decode_header(header) + decoded_list = [] + for v, enc in valuelist: + v = string_decode(v, enc) + decoded_list.append(string_sanitize(v)) + value = u' '.join(decoded_list) + if normalize: + value = re.sub(r'\n\s+', r' ', value) + return value + + +def encode_header(key, value): + """ + encodes a unicode string as a valid header value + + :param key: the header field this value will be stored in + :type key: str + :param value: the value to be encoded + :type value: unicode + """ + # handle list of "realname <email>" entries separately + if key.lower() in ['from', 'to', 'cc', 'bcc']: + rawentries = value.split(',') + encodedentries = [] + for entry in rawentries: + m = re.search('\s*(.*)\s+<(.*\@.*\.\w*)>\s*$', entry) + if m: # If a realname part is contained + name, address = m.groups() + # try to encode as ascii, if that fails, revert to utf-8 + # name must be a unicode string here + namepart = Header(name) + # append address part encoded as ascii + entry = '%s <%s>' % (namepart.encode(), address) + encodedentries.append(entry) + value = Header(', '.join(encodedentries)) + else: + value = Header(value) + return value + + +class Attachment(object): + """represents a mail attachment""" + + def __init__(self, emailpart): + """ + :param emailpart: a non-multipart email that is the attachment + :type emailpart: :class:`email.message.Message` + """ + self.part = emailpart + + def __str__(self): + desc = '%s:%s (%s)' % (self.get_content_type(), + self.get_filename(), + helper.humanize_size(self.get_size())) + return string_decode(desc) + + def get_filename(self): + """ + return name of attached file. + If the content-disposition header contains no file name, + this returns `None` + """ + extracted_name = decode_header(self.part.get_filename()) + if extracted_name: + return os.path.basename(extracted_name) + return None + + def get_content_type(self): + """mime type of the attachment part""" + ctype = self.part.get_content_type() + # replace underspecified mime description by a better guess + if ctype in ['octet/stream', 'application/octet-stream']: + ctype = helper.guess_mimetype(self.get_data()) + return ctype + + def get_size(self): + """returns attachments size in bytes""" + return len(self.part.get_payload()) + + def save(self, path): + """ + save the attachment to disk. Uses :meth:`get_filename` in case path + is a directory + """ + filename = self.get_filename() + path = os.path.expanduser(path) + if os.path.isdir(path): + if filename: + basename = os.path.basename(filename) + FILE = open(os.path.join(path, basename), "w") + else: + FILE = tempfile.NamedTemporaryFile(delete=False, dir=path) + else: + FILE = open(path, "w") # this throws IOErrors for invalid path + FILE.write(self.get_data()) + FILE.close() + return FILE.name + + def get_data(self): + """return data blob from wrapped file""" + return self.part.get_payload(decode=True) + + def get_mime_representation(self): + """returns mime part that constitutes this attachment""" + return self.part + + +class Envelope(object): + """a message that is not yet sent and still editable""" + def __init__(self, template=None, bodytext=u'', headers={}, attachments=[], + sign=False, encrypt=False): + """ + :param template: if not None, the envelope will be initialised by + :meth:`parsing <parse_template>` this string before + setting any other values given to this constructor. + :type template: str + :param bodytext: text used as body part + :type bodytext: str + :param headers: unencoded header values + :type headers: dict (str -> unicode) + :param attachments: file attachments to include + :type attachments: list of :class:`Attachment` + """ + assert isinstance(bodytext, unicode) + self.headers = {} + self.body = None + logging.debug('TEMPLATE: %s' % template) + if template: + self.parse_template(template) + logging.debug('PARSED TEMPLATE: %s' % template) + logging.debug('BODY: %s' % self.body) + if self.body == None: + self.body = bodytext + self.headers.update(headers) + self.attachments = list(attachments) + self.sign = sign + self.encrypt = encrypt + self.sent_time = None + self.modified_since_sent = False + + def __str__(self): + return "Envelope (%s)\n%s" % (self.headers, self.body) + + def __setitem__(self, name, val): + """setter for header values. this allows adding header like so: + + >>> envelope['Subject'] = u'sm\xf8rebr\xf8d' + """ + self.headers[name] = val + + if self.sent_time: + self.modified_since_sent = True + + def __getitem__(self, name): + """getter for header values. + :raises: KeyError if undefined + """ + return self.headers[name] + + def __delitem__(self, name): + del(self.headers[name]) + + if self.sent_time: + self.modified_since_sent = True + + def __contains__(self, name): + return self.headers.__contains__(name) + + def get(self, key, fallback=None): + """secure getter for header values that allows specifying a `fallback` + return string (defaults to None). This returns the first matching value + and doesn't raise KeyErrors""" + if key in self.headers: + value = self.headers[key][0] + else: + value = fallback + return value + + def get_all(self, key, fallback=[]): + """returns all header values for given key""" + if key in self.headers: + value = self.headers[key] + else: + value = fallback + return value + + def add(self, key, value): + """add header value""" + if key not in self.headers: + self.headers[key] = [] + self.headers[key].append(value) + + if self.sent_time: + self.modified_since_sent = True + + def attach(self, attachment, filename=None, ctype=None): + """ + attach a file + + :param attachment: File to attach, given as :class:`Attachment` object + or path to a file. + :type attachment: :class:`Attachment` or str + :param filename: filename to use in content-disposition. + Will be ignored if `path` matches multiple files + :param ctype: force content-type to be used for this attachment + :type ctype: str + """ + + if isinstance(attachment, Attachment): + self.attachments.append(attachment) + elif isinstance(attachment, basestring): + path = os.path.expanduser(attachment) + part = helper.mimewrap(path, filename, ctype) + self.attachments.append(Attachment(part)) + else: + raise TypeError('attach accepts an Attachment or str') + + if self.sent_time: + self.modified_since_sent = True + + def construct_mail(self): + """ + compiles the information contained in this envelope into a + :class:`email.Message`. + """ + # build body text part + textpart = MIMEText(self.body.encode('utf-8'), 'plain', 'utf-8') + + # wrap it in a multipart container if necessary + if self.attachments or self.sign or self.encrypt: + msg = MIMEMultipart() + msg.attach(textpart) + else: + msg = textpart + + headers = self.headers.copy() + # add Message-ID + if 'Message-ID' not in headers: + headers['Message-ID'] = [email.Utils.make_msgid()] + + if 'User-Agent' in headers: + uastring_format = headers['User-Agent'][0] + else: + uastring_format = settings.get('user_agent').strip() + uastring = uastring_format.format(version=__version__) + if uastring: + headers['User-Agent'] = [uastring] + + # copy headers from envelope to mail + for k, vlist in headers.items(): + for v in vlist: + msg[k] = encode_header(k, v) + + # add attachments + for a in self.attachments: + msg.attach(a.get_mime_representation()) + + return msg + + def parse_template(self, tmp, reset=False, only_body=False): + """parses a template or user edited string to fills this envelope. + + :param tmp: the string to parse. + :type tmp: str + :param reset: remove previous envelope content + :type reset: bool + """ + logging.debug('GoT: """\n%s\n"""' % tmp) + + if self.sent_time: + self.modified_since_sent = True + + if only_body: + self.body = tmp + else: + m = re.match('(?P<h>([a-zA-Z0-9_-]+:.+\n)*)\n?(?P<b>(\s*.*)*)', + tmp) + assert m + + d = m.groupdict() + headertext = d['h'] + self.body = d['b'] + + # remove existing content + if reset: + self.headers = {} + + # go through multiline, utf-8 encoded headers + # we decode the edited text ourselves here as + # email.message_from_file can't deal with raw utf8 header values + key = value = None + for line in headertext.splitlines(): + if re.match('[a-zA-Z0-9_-]+:', line): # new k/v pair + if key and value: # save old one from stack + self.add(key, value) # save + key, value = line.strip().split(':', 1) # parse new pair + elif key and value: # append new line without key prefix + value += line + if key and value: # save last one if present + self.add(key, value) |