update-hib.py
changeset 11 dc1b075c538a
parent 10 d7e256c9aec9
child 12 9d5880ecdb82
equal deleted inserted replaced
10:d7e256c9aec9 11:dc1b075c538a
    54                 for s in self.subst.get(attr,(attr,)):
    54                 for s in self.subst.get(attr,(attr,)):
    55                     yield s
    55                     yield s
    56         self.attrs = set(chain.from_iterable(cleanup(attr) for attr in ids))
    56         self.attrs = set(chain.from_iterable(cleanup(attr) for attr in ids))
    57         urls = button.a.attrs
    57         urls = button.a.attrs
    58         logging.debug("URLS are %r", urls)
    58         logging.debug("URLS are %r", urls)
    59         self.torrent = urls["data-bt"]
    59         self.torrent = urls["data-bt"] if "data-bt" in urls.keys() else None
    60         self.web = urls["data-web"]
    60         self.web = urls["data-web"]
    61         details = soup.find(class_="dldetails").find(class_="dlsize")
    61         details = soup.find(class_="dldetails").find(class_="dlsize")
    62         size = details.find(class_="mbs")
    62         size = details.find(class_="mbs")
    63         md5 = details.find(class_="dlmd5")
    63         md5 = details.find(class_="dlmd5")
    64         date = details.find(class_="dldate")
    64         date = details.find(class_="dldate")
    77     def __repr__(self):
    77     def __repr__(self):
    78         return self.format()
    78         return self.format()
    79 
    79 
    80 class Downloads:
    80 class Downloads:
    81     def __init__(self, soup):
    81     def __init__(self, soup):
    82         self.id = [class_ for class_ in soup["class"] if class_ != "downloads"][0]
    82         self.id = [class_ for class_ in soup["class"] if class_ not in ("downloads","js-platform")][0]
    83         self.elements = []
    83         self.elements = []
    84         self.others = []
    84         self.others = []
    85         self.addchilds(soup)
    85         self.addchilds(soup)
    86     def addchilds(self, soup):
    86     def addchilds(self, soup):
    87         logging.debug("Parsing soup for downloads %s", self.id)
    87         logging.debug("Parsing soup for downloads %s", self.id)
    89             if type(child) is not bs4.element.Tag:
    89             if type(child) is not bs4.element.Tag:
    90                 continue
    90                 continue
    91             classes = child["class"] if "class" in child.attrs else []
    91             classes = child["class"] if "class" in child.attrs else []
    92             if [True for attr in classes if attr in ("arc-toggle", "downloads")]:
    92             if [True for attr in classes if attr in ("arc-toggle", "downloads")]:
    93                 self.addchilds(child)
    93                 self.addchilds(child)
    94             elif "download" in classes:
    94             elif "download-buttons" in classes:
    95                 desc = child.find(class_="flexbtn").span.string
    95                 for subchild in child.children:
    96                 if desc == "Stream":
    96                     if type(subchild) is not bs4.element.Tag:
    97                     logging.info("Ignoring Stream URLs for %s", self.id)
    97                         continue
    98                 else:
    98                     btn = subchild.find(class_="flexbtn")
    99                     self.elements.append(Download(self.id, child))
    99                     if not btn:
       
   100                         continue
       
   101                     desc = btn.span.string
       
   102                     if desc == "Stream":
       
   103                         logging.info("Ignoring Stream URLs for %s", self.id)
       
   104                     else:
       
   105                         self.elements.append(Download(self.id, subchild))
   100             elif [True for attr in classes if attr in ("clearfix","label")]:
   106             elif [True for attr in classes if attr in ("clearfix","label")]:
   101                 pass
   107                 pass
   102             else:
   108             else:
   103                 self.others.append(child)
   109                 self.others.append(child)
   104     def __iter__(self):
   110     def __iter__(self):
   225 
   231 
   226 def main(fn):
   232 def main(fn):
   227     selector = FileSelector()
   233     selector = FileSelector()
   228     downloads = []
   234     downloads = []
   229     import sys
   235     import sys
       
   236     import os
       
   237     import urllib.parse
   230     with open("torrents.log", "w") as l:
   238     with open("torrents.log", "w") as l:
   231         for game in parseGamesFromFile(fn):
   239         for game in parseGamesFromFile(fn):
   232             logging.info("Parsing game %s (%d downloads)", game.title, len(game.downloads))
   240             logging.info("Parsing game %s (%d downloads)", game.title, len(game.downloads))
   233             for dls in game.downloads:
   241             for dls in game.downloads:
   234                 scores = list(selector(dls))
   242                 scores = list(selector(dls))
   235                 choosen = selectHighestScore(scores)
   243                 choosen = selectHighestScore(scores)
   236                 for score, dl in scores:
   244                 for score, dl in scores:
   237                     print("[%s] %2d | %-30s | %-15s | %-30s | %-15s | %s " % (
   245                     print("[%s] %2d | %-30s | %-15s | %-30s | %-15s | %s <%s>" % (
   238                             "*" if dl in choosen else " ",
   246                             "*" if dl in choosen else " ",
   239                             score,
   247                             score,
   240                             game.title,
   248                             game.title,
   241                             dls.id,
   249                             dls.id,
   242                             dl.date,
   250                             dl.date,
   243                             ", ".join(sorted(dl.attrs)),
   251                             ", ".join(sorted(dl.attrs)),
       
   252 			    os.path.basename(urllib.parse.urlsplit(dl.torrent).path),
   244                             dl.torrent),
   253                             dl.torrent),
   245                           file=l)
   254                           file=l)
   246                     if dl in choosen:
   255                     if dl in choosen:
   247                         downloads.append(dl)
   256                         downloads.append(dl)
   248                 if not scores:
   257                 if not scores:
   249                     print("No download for %s" % (dls.id), file=l)
   258                     print("No download for %s" % (dls.id), file=l)
   250                 print("-" * 80, file=l)
   259                 print("-" * 80, file=l)
   251 
   260 
   252     import urllib.request
   261     import urllib.request
   253     import urllib.parse
       
   254     import os
       
   255     urlfile = open('http-download.sh','w')
   262     urlfile = open('http-download.sh','w')
   256     opener = urllib.request.build_opener()
   263     opener = urllib.request.build_opener()
   257     for dl in (dl for dl in downloads):
   264     for dl in (dl for dl in downloads):
   258         if dl.torrent:
   265         if dl.torrent:
   259             try:
   266             try: