1 files changed, 18 insertions, 32 deletions
diff --git a/nephilim/plugins/Lyrics.py b/nephilim/plugins/Lyrics.py
index af54f0a..d558165 100644
--- a/nephilim/plugins/Lyrics.py
+++ b/nephilim/plugins/Lyrics.py
@@ -19,6 +19,7 @@ from PyQt4        import QtGui, QtCore, QtNetwork
 from PyQt4.QtCore import QVariant
 
 import os
+import re
 from   lxml import etree
 
 from ..plugin import Plugin
@@ -166,6 +167,7 @@ class Lyrics(Plugin):
             if not artist:
                 self.logger.info('Didn\'t find artist in %s artist search results.'%self.name)
                 return self.finish()
+            self.logger.info('Found artist: %s'%artist)
 
             url = QtCore.QUrl(self.__apiaddress)
             url.setQueryItems([('action', 'lyrics'), ('func', 'getSong'), ('artist', artist),
@@ -176,50 +178,34 @@ class Lyrics(Plugin):
 
         def __handle_search_res(self):
             url = None
-            xml = QtCore.QXmlStreamReader(self.rep)
-            while not xml.atEnd():
-                token = xml.readNext()
-                if token == QtCore.QXmlStreamReader.StartElement:
-                    if xml.name() == 'url':
-                        text = xml.readElementText()
-                        if text and not 'action=edit' in text:
-                            url = QtCore.QUrl() # the url is already percent-encoded
-                            url.setEncodedUrl(text)
-            if xml.hasError():
-                self.logger.error('Error parsing seach results: %s'%xml.errorString())
+
+            # the page is borked utf-8 as of nov 2009, qxmlstreamreader chokes
+            # on it => use regexps
+            match = re.search('<url>(.*)</url>', str(self.rep.readAll()).decode('utf-8', 'replace'),
+                              re.DOTALL|re.IGNORECASE)
+            if match and not 'action=edit' in match.group(1):
+                url = QtCore.QUrl() # the url is already percent-encoded
+                url.setEncodedUrl(match.group(1))
 
             if not url:
                 self.logger.info('Didn\'t find the song on Lyricwiki.')
                 return self.finish()
-            self.logger.info('Found Lyricwiki song URL: %s.'%url)
+            self.logger.info('Found Lyricwiki song URL: %s.'%url.toString())
 
-            # XXX temporary hack to work around lyricwiki.org -> lyrics.wikia.org transition
-            if not url.path().startswith('/lyrics'):
-                url.setPath('/lyrics%s'%url.path())
             req = QtNetwork.QNetworkRequest(url)
             self.rep = self.nam.get(req)
             self.rep.finished.connect(self.__handle_lyrics)
             self.rep.error.connect(self.handle_error)
 
         def __handle_lyrics(self):
+            # the page isn't valid xml, so use regexps
             lyrics = ''
-            xml    = QtCore.QXmlStreamReader(self.rep)
-            while not xml.atEnd():
-                token = xml.readNext()
-                if token == QtCore.QXmlStreamReader.StartElement:
-                    if xml.name() == 'div' and xml.attributes().value('class') == 'lyricbox':
-                        while not xml.atEnd():
-                            token = xml.readNext()
-                            if token == QtCore.QXmlStreamReader.EndElement and xml.name() == 'div':
-                                break
-                            elif token == QtCore.QXmlStreamReader.StartElement and xml.name() == 'br':
-                                lyrics += '\n'
-                            elif token == QtCore.QXmlStreamReader.Characters:
-                                lyrics += xml.text()
-            if xml.hasError():
-                self.logger.warning('Error parsing lyrics: %s'%xml.errorString())
-
-            self.finish(lyrics)
+            for it in re.finditer('<div class=\'lyricbox\'>(?:<div.*?>.*?</div>)?(.*?)(?:<div.*?>.*?</div>)?</div>',
+                                  str(self.rep.readAll()).decode('utf-8'), re.DOTALL):
+                gr = re.sub('<br />', '\n', it.group(1))
+                gr = re.sub(re.compile('<.*>', re.DOTALL), '', gr)
+                lyrics += gr + '\n'
+            self.finish(common.decode_htmlentities(lyrics))
 
     class FetchAnimelyrics(common.MetadataFetcher):
         name = 'Animelyrics'