diff options
Diffstat (limited to 'nephilim/plugins/Lyrics.py')
-rw-r--r-- | nephilim/plugins/Lyrics.py | 50 |
1 files changed, 18 insertions, 32 deletions
diff --git a/nephilim/plugins/Lyrics.py b/nephilim/plugins/Lyrics.py index af54f0a..d558165 100644 --- a/nephilim/plugins/Lyrics.py +++ b/nephilim/plugins/Lyrics.py @@ -19,6 +19,7 @@ from PyQt4 import QtGui, QtCore, QtNetwork from PyQt4.QtCore import QVariant import os +import re from lxml import etree from ..plugin import Plugin @@ -166,6 +167,7 @@ class Lyrics(Plugin): if not artist: self.logger.info('Didn\'t find artist in %s artist search results.'%self.name) return self.finish() + self.logger.info('Found artist: %s'%artist) url = QtCore.QUrl(self.__apiaddress) url.setQueryItems([('action', 'lyrics'), ('func', 'getSong'), ('artist', artist), @@ -176,50 +178,34 @@ class Lyrics(Plugin): def __handle_search_res(self): url = None - xml = QtCore.QXmlStreamReader(self.rep) - while not xml.atEnd(): - token = xml.readNext() - if token == QtCore.QXmlStreamReader.StartElement: - if xml.name() == 'url': - text = xml.readElementText() - if text and not 'action=edit' in text: - url = QtCore.QUrl() # the url is already percent-encoded - url.setEncodedUrl(text) - if xml.hasError(): - self.logger.error('Error parsing seach results: %s'%xml.errorString()) + + # the page is borked utf-8 as of nov 2009, qxmlstreamreader chokes + # on it => use regexps + match = re.search('<url>(.*)</url>', str(self.rep.readAll()).decode('utf-8', 'replace'), + re.DOTALL|re.IGNORECASE) + if match and not 'action=edit' in match.group(1): + url = QtCore.QUrl() # the url is already percent-encoded + url.setEncodedUrl(match.group(1)) if not url: self.logger.info('Didn\'t find the song on Lyricwiki.') return self.finish() - self.logger.info('Found Lyricwiki song URL: %s.'%url) + self.logger.info('Found Lyricwiki song URL: %s.'%url.toString()) - # XXX temporary hack to work around lyricwiki.org -> lyrics.wikia.org transition - if not url.path().startswith('/lyrics'): - url.setPath('/lyrics%s'%url.path()) req = QtNetwork.QNetworkRequest(url) self.rep = self.nam.get(req) self.rep.finished.connect(self.__handle_lyrics) self.rep.error.connect(self.handle_error) def __handle_lyrics(self): + # the page isn't valid xml, so use regexps lyrics = '' - xml = QtCore.QXmlStreamReader(self.rep) - while not xml.atEnd(): - token = xml.readNext() - if token == QtCore.QXmlStreamReader.StartElement: - if xml.name() == 'div' and xml.attributes().value('class') == 'lyricbox': - while not xml.atEnd(): - token = xml.readNext() - if token == QtCore.QXmlStreamReader.EndElement and xml.name() == 'div': - break - elif token == QtCore.QXmlStreamReader.StartElement and xml.name() == 'br': - lyrics += '\n' - elif token == QtCore.QXmlStreamReader.Characters: - lyrics += xml.text() - if xml.hasError(): - self.logger.warning('Error parsing lyrics: %s'%xml.errorString()) - - self.finish(lyrics) + for it in re.finditer('<div class=\'lyricbox\'>(?:<div.*?>.*?</div>)?(.*?)(?:<div.*?>.*?</div>)?</div>', + str(self.rep.readAll()).decode('utf-8'), re.DOTALL): + gr = re.sub('<br />', '\n', it.group(1)) + gr = re.sub(re.compile('<.*>', re.DOTALL), '', gr) + lyrics += gr + '\n' + self.finish(common.decode_htmlentities(lyrics)) class FetchAnimelyrics(common.MetadataFetcher): name = 'Animelyrics' |