diff options
author | Anton Khirnov <wyskas@gmail.com> | 2009-11-20 07:58:24 +0100 |
---|---|---|
committer | Anton Khirnov <wyskas@gmail.com> | 2009-11-21 07:03:16 +0100 |
commit | d3b9598cb75aee28b10d46f7ed58607d738bc98e (patch) | |
tree | 9caeabb60ea6c2123c836a93470254f0dd5d9f7f /nephilim/common.py | |
parent | 965494cf20b170b9916c2444b037250c7ae4764b (diff) |
Lyrics: fix lyricwiki. yet again.
the really should stop doing this.
switched to using regexps, because QXmlStreamReader chokes on their
invalid pages and i don't want to introduce more dependencies.
Diffstat (limited to 'nephilim/common.py')
-rw-r--r-- | nephilim/common.py | 21 |
1 files changed, 20 insertions, 1 deletions
diff --git a/nephilim/common.py b/nephilim/common.py index 2136f29..334ced9 100644 --- a/nephilim/common.py +++ b/nephilim/common.py @@ -21,6 +21,7 @@ import socket import logging import os import re +from htmlentitydefs import name2codepoint as n2cp socket.setdefaulttimeout(8) @@ -86,6 +87,24 @@ def generate_metadata_path(song, dir_tag, file_tag): return dirname, filepath +def substitute_entity(match): + ent = match.group(3) + if match.group(1) == "#": + if match.group(2) == '': + return unichr(int(ent)) + elif match.group(2) == 'x': + return unichr(int('0x'+ent, 16)) + else: + cp = n2cp.get(ent) + if cp: + return unichr(cp) + else: + return match.group() + +def decode_htmlentities(string): + entity_re = re.compile(r'&(#?)(x?)(\w+);') + return entity_re.subn(substitute_entity, string)[0] + class MetadataFetcher(QtCore.QObject): """A basic class for metadata fetchers. Provides a fetch(song) function, emits a finished(song, metadata) signal when done; lyrics is either a Python @@ -115,7 +134,7 @@ class MetadataFetcher(QtCore.QObject): self.abort() self.song = song - self.logger.info('Searching %s: %s.'%(self. name, url)) + self.logger.info('Searching %s: %s.'%(self. name, url.toString())) self.rep = self.nam.get(QtNetwork.QNetworkRequest(url)) self.rep.error.connect(self.handle_error) |