# HG changeset patch # User Fabien Ninoles # Date 1390437479 18000 # Node ID d7e256c9aec937333e55927fb769b478212c9a13 # Parent e3a2bb2bae8d25bece00cdd5dce8acb1b4dcdc58 Add torrents.log. Add support for new Stream section. Add more logging. diff -r e3a2bb2bae8d -r d7e256c9aec9 update-hib.py --- a/update-hib.py Sun Sep 22 22:07:04 2013 -0400 +++ b/update-hib.py Wed Jan 22 19:37:59 2014 -0500 @@ -55,6 +55,7 @@ yield s self.attrs = set(chain.from_iterable(cleanup(attr) for attr in ids)) urls = button.a.attrs + logging.debug("URLS are %r", urls) self.torrent = urls["data-bt"] self.web = urls["data-web"] details = soup.find(class_="dldetails").find(class_="dlsize") @@ -83,6 +84,7 @@ self.others = [] self.addchilds(soup) def addchilds(self, soup): + logging.debug("Parsing soup for downloads %s", self.id) for child in soup.children: if type(child) is not bs4.element.Tag: continue @@ -90,7 +92,11 @@ if [True for attr in classes if attr in ("arc-toggle", "downloads")]: self.addchilds(child) elif "download" in classes: - self.elements.append(Download(self.id, child)) + desc = child.find(class_="flexbtn").span.string + if desc == "Stream": + logging.info("Ignoring Stream URLs for %s", self.id) + else: + self.elements.append(Download(self.id, child)) elif [True for attr in classes if attr in ("clearfix","label")]: pass else: @@ -117,13 +123,14 @@ self.title = "unknown" self.downloads = [] self.others = [] - for child in soup.children: + for child in soup.children: if type(child) is not bs4.element.Tag: continue classes = child["class"] if "class" in child.attrs else [] if "gameinfo" in classes: self.title = child.find(class_="title").a.string.strip() elif "downloads" in classes: + logging.debug("Collecting downloadables for %s", self.title) self.downloads.append(Downloads(child)) elif [True for attr in classes if attr in ["icn", "clearfix"]]: pass @@ -166,6 +173,8 @@ return 1 if "website" in dl.attrs: return -1 + if "AAC" in dl.attrs: + return 1 raise Exception("Unknown audio type: %r" % (dl.attrs)) if dl.dltype in ("mac","windows"): return -1 @@ -205,27 +214,40 @@ logging.debug("Empty scores list: %r", scores) return [] +class tee: + def __init__(self, main, *other): + self.main = main + self.other = other + def write(self, s): + self.main.write(s) + for o in self.other: + o.write(s) + def main(fn): selector = FileSelector() downloads = [] - for game in parseGamesFromFile(fn): - for dls in game.downloads: - scores = list(selector(dls)) - choosen = selectHighestScore(scores) - for score, dl in scores: - print("[%s] %2d | %-20s | %-15s | %-10s | %-25s | %s " % ( - "*" if dl in choosen else " ", - score, - game.title, - dls.id, - dls.date, - ", ".join(sorted(dl.attrs)), - dl.torrent)) - if dl in choosen: - downloads.append(dl) - if not scores: - print("No download for",dls.id) - print("-" * 80) + import sys + with open("torrents.log", "w") as l: + for game in parseGamesFromFile(fn): + logging.info("Parsing game %s (%d downloads)", game.title, len(game.downloads)) + for dls in game.downloads: + scores = list(selector(dls)) + choosen = selectHighestScore(scores) + for score, dl in scores: + print("[%s] %2d | %-30s | %-15s | %-30s | %-15s | %s " % ( + "*" if dl in choosen else " ", + score, + game.title, + dls.id, + dl.date, + ", ".join(sorted(dl.attrs)), + dl.torrent), + file=l) + if dl in choosen: + downloads.append(dl) + if not scores: + print("No download for %s" % (dls.id), file=l) + print("-" * 80, file=l) import urllib.request import urllib.parse @@ -243,6 +265,7 @@ with opener.open(dl.torrent) as u: with open(fn,"wb") as f: f.write(u.read()) + logging.info("%s saved.", os.path.realpath(fn)) except: logging.exception("Error with download %r", dl) else: