summaryrefslogtreecommitdiff
path: root/misc.py
blob: b85cab201136e73a3b4923f3a240779252a5dd1c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from PyQt4 import QtCore, QtGui
from htmlentitydefs import name2codepoint as n2cp
import re
import urllib2, httplib, cookielib
import socket
import unicodedata
import htmlentitydefs

import format
import log

socket.setdefaulttimeout(8)

appIcon=QtGui.QIcon('gfx/icon.png')

eventLoop=QtCore.QEventLoop()
def doEvents():
    """Make some time for necessary events."""
    eventLoop.processEvents(QtCore.QEventLoop.AllEvents)

def sec2min(secs):
    """Converts seconds to min:sec."""
    min=int(secs/60)
    sec=secs%60
    if sec<10:sec='0'+str(sec)
    return str(min)+':'+str(sec)

def numeric_compare(x, y):
    if x>y:
        return 1
    elif x==y:
        return 0
    return -1
def unique(seq):
    """Retrieve list of unique elements."""
    seen = []
    return t(c for c in seq if not (c in seen or seen.append(c)))

def toAscii(ustr):
    if type(ustr)==str:
        return ustr
    return unicodedata.normalize('NFKD', ustr).encode('ascii', 'ignore')

def fetch(SE, sites, song=None, xtra_tags={}, stripHTML=True):
    """Returns None when nothing found, or [site,source-url]."""
    # compose the search-engine URL
    f=format.compile(SE)
    SE_url=toAscii(f(format.params(song, xtra_tags)))
    SE_url=SE_url.replace(' ', '+')
    
    # fetch the page from the search-engine with the results
    request=urllib2.Request(SE_url)
    request.add_header('User-Agent', 'montypc')
    opener=urllib2.build_opener()
    data=opener.open(request).read()

    # look for urls on the search page!
    regex=re.compile('<a href="(.*?)".*?>.*?<\/a>')
    urls=regex.findall(data)
    log.debug("all urls")
    for url in urls:
        log.debug("  %s"%(url))
    
    # look for urls which are defined in $sites.
    # The first matching URL is taken
    finalRegex=None
    log.debug("Checking %i URLs on %s"%(len(sites), SE_url))
    # loop over all sites which may have what we're interested in
    for site in sites:
        finalURL=None
        finalRegex=None
        # check if on the results-page there is a link to $site
        for url in urls:
            if url.find(site)>=0:
                log.debug("  Found site %s in results: %s"%(site, url))
                finalURL=url
                finalRegex=sites[site]
                break

        if finalURL:
            match=None
            # open the url
            cj = cookielib.CookieJar()
            opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            log.debug("  Reading URL %s"%(finalURL))
            try:
                # read the page
                r = opener.open(finalURL)
                data=r.read()
                # perform the regular expression
                regex=re.compile(finalRegex, re.IGNORECASE|re.MULTILINE|re.DOTALL)
                match=regex.search(data)
            except Exception, e:
                log.debug("  Failed to open site %s"%(finalURL))
                continue
            
            if match:
                # if the regex matches, then we arrive here
                # we assume the content we want is in the first group
                log.debug("  Regex succeeded!")
                try:
                    charset=re.compile('charset=["\']?([\w-]+)').search(data).group(1)
                    log.debug("  charset=%s"%(charset))
                except:
                    charset='iso-8859-1'
                    log.debug("  charset not found. Assuming %s"%(charset))
                data=match.group(1)
                data=re.sub(chr(13), '', data)  # replace ^M aka \r
                data=unicode(data, charset)
                if stripHTML:
                    # do we want HTML?
                    data=re.sub('<br.*?>', '\n', data)  # replace <br />'s with newline
                    data=re.sub('\n\n', '\n', data)
                    data=re.sub('<[^>]*?>', '', data)   # strip all other HTML
                    data=decodeHTMLEntities(data)           # convert HTML entities
                data=data.strip()
                log.debug("Succeeded fetching.")
                return [data,finalURL]
            else:
                log.debug("  Regex for %s%s failed"%(site, (" (%s)"%(finalURL) if finalURL else "")))
        else:
            log.debug("  Site %s not found on results-page"%(site))
            
    
    log.debug("Failed fetching.")
    return None

def substEntity(match):
    ent = match.group(2)
    if match.group(1) == "#":
        return unichr(int(ent))
    else:
        cp = n2cp.get(ent)

        if cp:
            return unichr(cp)
        else:
            return match.group()

def decodeHTMLEntities(string):
    # replace entities with their UTF-counterpart
    entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
    return entity_re.subn(substEntity, string)[0]

        
class Button(QtGui.QPushButton):
    iconSize=32
    """A simple Button class which calls $onClick when clicked."""
    def __init__(self, caption, onClick=None, iconPath=None, iconOnly=False, parent=None):
        QtGui.QPushButton.__init__(self, parent)

        if onClick:
            self.connect(self, QtCore.SIGNAL('clicked(bool)'), onClick)
        if iconPath:
            self.changeIcon(iconPath)

        if not(iconPath and iconOnly):
            QtGui.QPushButton.setText(self, caption)

        self.setToolTip(caption)
    
    def setText(self, caption):
        self.setToolTip(caption)
        if self.icon()==None:
            self.setText(caption)
    
    def changeIcon(self, iconPath):
        icon=QtGui.QIcon()
        icon.addFile(iconPath, QtCore.QSize(self.iconSize, self.iconSize))
        self.setIcon(icon)