diff options
author | jerous <jerous@gmail.com> | 2008-09-21 20:51:13 +0200 |
---|---|---|
committer | jerous <jerous@gmail.com> | 2008-09-21 20:51:13 +0200 |
commit | df6c9d328749ae9188952ac718f5071085ee63c9 (patch) | |
tree | 96dc72b2f1045933f0309970fb2d3e85ccc80f99 /misc.py | |
parent | 6f0cb719695c0a420f1f0da1cac77293544b7b78 (diff) |
bugfix: fetching results (used by lyrics and tabs) now looks through all sites, instead of quitting at first (possibly failing) hit
Diffstat (limited to 'misc.py')
-rw-r--r-- | misc.py | 60 |
1 files changed, 38 insertions, 22 deletions
@@ -4,6 +4,7 @@ import urllib2, httplib, cookielib import socket import format +import log socket.setdefaulttimeout(8) @@ -35,10 +36,10 @@ def unique(seq): def fetch(SE, sites, song=None, xtra_tags={}): """Returns None when nothing found, or [site,source-url].""" f=format.compile(SE) - url=f(format.params(song, xtra_tags)) - url=url.replace(' ', '+') + SE_url=f(format.params(song, xtra_tags)) + SE_url=SE_url.replace(' ', '+') - request=urllib2.Request(url) + request=urllib2.Request(SE_url) request.add_header('User-Agent', 'montypc') opener=urllib2.build_opener() data=opener.open(request).read() @@ -50,31 +51,46 @@ def fetch(SE, sites, song=None, xtra_tags={}): # look for predefined urls, which are good lyrics-sites # we assume they are in order of importance; the first one matching # is taken - finalURL=None finalRegex=None - for url in urls: - if finalURL: - break - for site in sites: + log.debug("Checking %i URLs on %s"%(len(sites), SE_url)) + for site in sites: + finalURL=None + for url in urls: if url.find(site)>=0: + log.debug(" Found site %s in results: %s"%(site, url)) finalURL=url finalRegex=sites[site] break - match=None - if finalURL: - cj = cookielib.CookieJar() - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) - r = opener.open(finalURL) - data=r.read() - regex=re.compile(finalRegex, re.IGNORECASE|re.MULTILINE|re.DOTALL) - match=regex.search(data) - if match: - data=match.group(1) - data=data.replace('<br>', '<br />') - data=data.replace('<br />', '<br />') - data=data.strip() - return [data,finalURL] + match=None + if finalURL: + cj = cookielib.CookieJar() + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) + log.debug(" Reading URL %s"%(finalURL)) + try: + r = opener.open(finalURL) + data=r.read() + regex=re.compile(finalRegex, re.IGNORECASE|re.MULTILINE|re.DOTALL) + match=regex.search(data) + except Exception, e: + log.debug(" Failed to open site %s"%(finalURL)) + continue + + if match: + log.debug(" Regex succeeded!") + data=match.group(1) + data=data.replace('<br>', '<br />') + data=data.replace('<br />', '<br />') + data=data.strip() + log.debug("Succeeded fetching.") + return [data,finalURL] + else: + log.debug(" Regex for %s failed"%(site, (" (%s)"%(finalURL) if finalURL else ""))) + else: + log.debug(" Site %s not found on results-page"%(site)) + + + log.debug("Failed fetching.") return None class Button(QtGui.QPushButton): |