from PyQt4 import QtCore, QtGui from htmlentitydefs import name2codepoint as n2cp import re import urllib2, httplib, cookielib import socket import unicodedata import htmlentitydefs import format import log socket.setdefaulttimeout(8) appIcon=QtGui.QIcon('gfx/icon.png') eventLoop=QtCore.QEventLoop() def doEvents(): """Make some time for necessary events.""" eventLoop.processEvents(QtCore.QEventLoop.AllEvents) def sec2min(secs): """Converts seconds to min:sec.""" min=int(secs/60) sec=secs%60 if sec<10:sec='0'+str(sec) return str(min)+':'+str(sec) def numeric_compare(x, y): if x>y: return 1 elif x==y: return 0 return -1 def unique(seq): """Retrieve list of unique elements.""" seen = [] return t(c for c in seq if not (c in seen or seen.append(c))) def toAscii(ustr): if type(ustr)==str: return ustr return unicodedata.normalize('NFKD', ustr).encode('ascii', 'ignore') def fetch(SE, sites, song=None, xtra_tags={}, stripHTML=True): """Returns None when nothing found, or [site,source-url].""" # compose the search-engine URL f=format.compile(SE) SE_url=toAscii(f(format.params(song, xtra_tags))) SE_url=SE_url.replace(' ', '+') # fetch the page from the search-engine with the results request=urllib2.Request(SE_url) request.add_header('User-Agent', 'montypc') opener=urllib2.build_opener() data=opener.open(request).read() # look for urls on the search page! regex=re.compile('.*?<\/a>') urls=regex.findall(data) log.debug("all urls") for url in urls: log.debug(" %s"%(url)) # look for urls which are defined in $sites. # The first matching URL is taken finalRegex=None log.debug("Checking %i URLs on %s"%(len(sites), SE_url)) # loop over all sites which may have what we're interested in for site in sites: finalURL=None finalRegex=None # check if on the results-page there is a link to $site for url in urls: if url.find(site)>=0: log.debug(" Found site %s in results: %s"%(site, url)) finalURL=url finalRegex=sites[site] break if finalURL: match=None # open the url cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) log.debug(" Reading URL %s"%(finalURL)) try: # read the page r = opener.open(finalURL) data=r.read() # perform the regular expression regex=re.compile(finalRegex, re.IGNORECASE|re.MULTILINE|re.DOTALL) match=regex.search(data) except Exception, e: log.debug(" Failed to open site %s"%(finalURL)) continue if match: # if the regex matches, then we arrive here # we assume the content we want is in the first group log.debug(" Regex succeeded!") try: charset=re.compile('charset=["\']?([\w-]+)').search(data).group(1) log.debug(" charset=%s"%(charset)) except: charset='iso-8859-1' log.debug(" charset not found. Assuming %s"%(charset)) data=match.group(1) data=re.sub(chr(13), '', data) # replace ^M aka \r data=unicode(data, charset) if stripHTML: # do we want HTML? data=re.sub('', '\n', data) # replace
's with newline data=re.sub('\n\n', '\n', data) data=re.sub('<[^>]*?>', '', data) # strip all other HTML data=decodeHTMLEntities(data) # convert HTML entities data=data.strip() log.debug("Succeeded fetching.") return [data,finalURL] else: log.debug(" Regex for %s%s failed"%(site, (" (%s)"%(finalURL) if finalURL else ""))) else: log.debug(" Site %s not found on results-page"%(site)) log.debug("Failed fetching.") return None def substEntity(match): ent = match.group(2) if match.group(1) == "#": return unichr(int(ent)) else: cp = n2cp.get(ent) if cp: return unichr(cp) else: return match.group() def decodeHTMLEntities(string): # replace entities with their UTF-counterpart entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") return entity_re.subn(substEntity, string)[0] class Button(QtGui.QPushButton): iconSize=32 """A simple Button class which calls $onClick when clicked.""" def __init__(self, caption, onClick=None, iconPath=None, iconOnly=False, parent=None): QtGui.QPushButton.__init__(self, parent) if onClick: self.connect(self, QtCore.SIGNAL('clicked(bool)'), onClick) if iconPath: self.changeIcon(iconPath) if not(iconPath and iconOnly): QtGui.QPushButton.setText(self, caption) self.setToolTip(caption) def setText(self, caption): self.setToolTip(caption) if self.icon()==None: self.setText(caption) def changeIcon(self, iconPath): icon=QtGui.QIcon() icon.addFile(iconPath, QtCore.QSize(self.iconSize, self.iconSize)) self.setIcon(icon)