summaryrefslogtreecommitdiff
path: root/nephilim/plugins/Lyrics.py
diff options
context:
space:
mode:
Diffstat (limited to 'nephilim/plugins/Lyrics.py')
-rw-r--r--nephilim/plugins/Lyrics.py50
1 files changed, 18 insertions, 32 deletions
diff --git a/nephilim/plugins/Lyrics.py b/nephilim/plugins/Lyrics.py
index af54f0a..d558165 100644
--- a/nephilim/plugins/Lyrics.py
+++ b/nephilim/plugins/Lyrics.py
@@ -19,6 +19,7 @@ from PyQt4 import QtGui, QtCore, QtNetwork
from PyQt4.QtCore import QVariant
import os
+import re
from lxml import etree
from ..plugin import Plugin
@@ -166,6 +167,7 @@ class Lyrics(Plugin):
if not artist:
self.logger.info('Didn\'t find artist in %s artist search results.'%self.name)
return self.finish()
+ self.logger.info('Found artist: %s'%artist)
url = QtCore.QUrl(self.__apiaddress)
url.setQueryItems([('action', 'lyrics'), ('func', 'getSong'), ('artist', artist),
@@ -176,50 +178,34 @@ class Lyrics(Plugin):
def __handle_search_res(self):
url = None
- xml = QtCore.QXmlStreamReader(self.rep)
- while not xml.atEnd():
- token = xml.readNext()
- if token == QtCore.QXmlStreamReader.StartElement:
- if xml.name() == 'url':
- text = xml.readElementText()
- if text and not 'action=edit' in text:
- url = QtCore.QUrl() # the url is already percent-encoded
- url.setEncodedUrl(text)
- if xml.hasError():
- self.logger.error('Error parsing seach results: %s'%xml.errorString())
+
+ # the page is borked utf-8 as of nov 2009, qxmlstreamreader chokes
+ # on it => use regexps
+ match = re.search('<url>(.*)</url>', str(self.rep.readAll()).decode('utf-8', 'replace'),
+ re.DOTALL|re.IGNORECASE)
+ if match and not 'action=edit' in match.group(1):
+ url = QtCore.QUrl() # the url is already percent-encoded
+ url.setEncodedUrl(match.group(1))
if not url:
self.logger.info('Didn\'t find the song on Lyricwiki.')
return self.finish()
- self.logger.info('Found Lyricwiki song URL: %s.'%url)
+ self.logger.info('Found Lyricwiki song URL: %s.'%url.toString())
- # XXX temporary hack to work around lyricwiki.org -> lyrics.wikia.org transition
- if not url.path().startswith('/lyrics'):
- url.setPath('/lyrics%s'%url.path())
req = QtNetwork.QNetworkRequest(url)
self.rep = self.nam.get(req)
self.rep.finished.connect(self.__handle_lyrics)
self.rep.error.connect(self.handle_error)
def __handle_lyrics(self):
+ # the page isn't valid xml, so use regexps
lyrics = ''
- xml = QtCore.QXmlStreamReader(self.rep)
- while not xml.atEnd():
- token = xml.readNext()
- if token == QtCore.QXmlStreamReader.StartElement:
- if xml.name() == 'div' and xml.attributes().value('class') == 'lyricbox':
- while not xml.atEnd():
- token = xml.readNext()
- if token == QtCore.QXmlStreamReader.EndElement and xml.name() == 'div':
- break
- elif token == QtCore.QXmlStreamReader.StartElement and xml.name() == 'br':
- lyrics += '\n'
- elif token == QtCore.QXmlStreamReader.Characters:
- lyrics += xml.text()
- if xml.hasError():
- self.logger.warning('Error parsing lyrics: %s'%xml.errorString())
-
- self.finish(lyrics)
+ for it in re.finditer('<div class=\'lyricbox\'>(?:<div.*?>.*?</div>)?(.*?)(?:<div.*?>.*?</div>)?</div>',
+ str(self.rep.readAll()).decode('utf-8'), re.DOTALL):
+ gr = re.sub('<br />', '\n', it.group(1))
+ gr = re.sub(re.compile('<.*>', re.DOTALL), '', gr)
+ lyrics += gr + '\n'
+ self.finish(common.decode_htmlentities(lyrics))
class FetchAnimelyrics(common.MetadataFetcher):
name = 'Animelyrics'