summaryrefslogtreecommitdiff
path: root/nephilim/plugins/Lyrics.py
diff options
context:
space:
mode:
authorAnton Khirnov <wyskas@gmail.com>2009-08-11 07:27:55 +0200
committerAnton Khirnov <wyskas@gmail.com>2009-08-11 07:27:55 +0200
commit1b3a7dfad027698ee3218595e7465843add0f3eb (patch)
tree0e5de0b79686d1923cb042d8c29146218f8e4111 /nephilim/plugins/Lyrics.py
parent777f5f30bbbb0f79cea29e6705247d3f102fb458 (diff)
Lyrics: use lxml for parsing animelyrics pages.
Diffstat (limited to 'nephilim/plugins/Lyrics.py')
-rw-r--r--nephilim/plugins/Lyrics.py20
1 files changed, 14 insertions, 6 deletions
diff --git a/nephilim/plugins/Lyrics.py b/nephilim/plugins/Lyrics.py
index 864b130..8b57f6e 100644
--- a/nephilim/plugins/Lyrics.py
+++ b/nephilim/plugins/Lyrics.py
@@ -175,14 +175,22 @@ class Lyrics(Plugin):
't':'performer'})
try:
#get url for lyrics
- page = urllib.urlopen(url).read()
- url = re.search('<a href="(.*?)".*?%s'%song.title(), page, re.IGNORECASE).group(1)
+ self.logger.info('Searching Animelyrics: %s.'%url)
+ tree = etree.HTML(urllib.urlopen(url).read())
+ url = None
+ for elem in tree.iterfind('.//a'):
+ if ('href' in elem.attrib) and elem.text and (song.title() in elem.text):
+ url = 'http://www.animelyrics.com/%s'%elem.get('href')
+ print url
+ if not url:
+ return None
#get lyrics
- url = 'http://www.animelyrics.com%s'%url
- page = urllib.urlopen(url).read()
+ self.logger.info('Found song URL: %s.'%url)
+ tree = etree.HTML(urllib.urlopen(url).read())
ret = ''
- for match in re.finditer('<pre class=lyrics>(.*?)</pre>', page, re.IGNORECASE|re.DOTALL):
- ret += '%s\n\n'%match.group(1)
+ for elem in tree.iterfind('.//pre'):
+ if elem.get('class') == 'lyrics':
+ ret += '%s\n\n'%etree.tostring(elem, method = 'text', encoding = 'utf-8')
return ret
except socket.error, e:
self.logger.error('Error downloading lyrics from Animelyrics: %s.'%e)