update-hib.py
changeset 10 d7e256c9aec9
parent 9 e3a2bb2bae8d
child 11 dc1b075c538a
equal deleted inserted replaced
9:e3a2bb2bae8d 10:d7e256c9aec9
    53             if attr not in ("Download","small",""):
    53             if attr not in ("Download","small",""):
    54                 for s in self.subst.get(attr,(attr,)):
    54                 for s in self.subst.get(attr,(attr,)):
    55                     yield s
    55                     yield s
    56         self.attrs = set(chain.from_iterable(cleanup(attr) for attr in ids))
    56         self.attrs = set(chain.from_iterable(cleanup(attr) for attr in ids))
    57         urls = button.a.attrs
    57         urls = button.a.attrs
       
    58         logging.debug("URLS are %r", urls)
    58         self.torrent = urls["data-bt"]
    59         self.torrent = urls["data-bt"]
    59         self.web = urls["data-web"]
    60         self.web = urls["data-web"]
    60         details = soup.find(class_="dldetails").find(class_="dlsize")
    61         details = soup.find(class_="dldetails").find(class_="dlsize")
    61         size = details.find(class_="mbs")
    62         size = details.find(class_="mbs")
    62         md5 = details.find(class_="dlmd5")
    63         md5 = details.find(class_="dlmd5")
    81         self.id = [class_ for class_ in soup["class"] if class_ != "downloads"][0]
    82         self.id = [class_ for class_ in soup["class"] if class_ != "downloads"][0]
    82         self.elements = []
    83         self.elements = []
    83         self.others = []
    84         self.others = []
    84         self.addchilds(soup)
    85         self.addchilds(soup)
    85     def addchilds(self, soup):
    86     def addchilds(self, soup):
       
    87         logging.debug("Parsing soup for downloads %s", self.id)
    86         for child in soup.children:
    88         for child in soup.children:
    87             if type(child) is not bs4.element.Tag:
    89             if type(child) is not bs4.element.Tag:
    88                 continue
    90                 continue
    89             classes = child["class"] if "class" in child.attrs else []
    91             classes = child["class"] if "class" in child.attrs else []
    90             if [True for attr in classes if attr in ("arc-toggle", "downloads")]:
    92             if [True for attr in classes if attr in ("arc-toggle", "downloads")]:
    91                 self.addchilds(child)
    93                 self.addchilds(child)
    92             elif "download" in classes:
    94             elif "download" in classes:
    93                 self.elements.append(Download(self.id, child))
    95                 desc = child.find(class_="flexbtn").span.string
       
    96                 if desc == "Stream":
       
    97                     logging.info("Ignoring Stream URLs for %s", self.id)
       
    98                 else:
       
    99                     self.elements.append(Download(self.id, child))
    94             elif [True for attr in classes if attr in ("clearfix","label")]:
   100             elif [True for attr in classes if attr in ("clearfix","label")]:
    95                 pass
   101                 pass
    96             else:
   102             else:
    97                 self.others.append(child)
   103                 self.others.append(child)
    98     def __iter__(self):
   104     def __iter__(self):
   115 class Game:
   121 class Game:
   116     def __init__(self, soup):
   122     def __init__(self, soup):
   117         self.title = "unknown"
   123         self.title = "unknown"
   118         self.downloads = []
   124         self.downloads = []
   119         self.others = []
   125         self.others = []
   120         for child in soup.children:            
   126         for child in soup.children:
   121             if type(child) is not bs4.element.Tag:
   127             if type(child) is not bs4.element.Tag:
   122                 continue
   128                 continue
   123             classes = child["class"] if "class" in child.attrs else []
   129             classes = child["class"] if "class" in child.attrs else []
   124             if "gameinfo" in classes:
   130             if "gameinfo" in classes:
   125                 self.title = child.find(class_="title").a.string.strip()
   131                 self.title = child.find(class_="title").a.string.strip()
   126             elif "downloads" in classes:
   132             elif "downloads" in classes:
       
   133                 logging.debug("Collecting downloadables for %s", self.title)
   127                 self.downloads.append(Downloads(child))
   134                 self.downloads.append(Downloads(child))
   128             elif [True for attr in classes if attr in ["icn", "clearfix"]]:
   135             elif [True for attr in classes if attr in ["icn", "clearfix"]]:
   129                 pass
   136                 pass
   130             else:
   137             else:
   131                 self.others.append(child)
   138                 self.others.append(child)
   164                 return 1
   171                 return 1
   165             if "MP3" in dl.attrs:
   172             if "MP3" in dl.attrs:
   166                 return 1
   173                 return 1
   167             if "website" in dl.attrs:
   174             if "website" in dl.attrs:
   168                 return -1
   175                 return -1
       
   176             if "AAC" in dl.attrs:
       
   177                 return 1
   169             raise Exception("Unknown audio type: %r" % (dl.attrs))
   178             raise Exception("Unknown audio type: %r" % (dl.attrs))
   170         if dl.dltype in ("mac","windows"):
   179         if dl.dltype in ("mac","windows"):
   171             return -1
   180             return -1
   172         if dl.dltype == "linux":
   181         if dl.dltype == "linux":
   173             score = 1
   182             score = 1
   203         else:
   212         else:
   204             return []
   213             return []
   205     logging.debug("Empty scores list: %r", scores)
   214     logging.debug("Empty scores list: %r", scores)
   206     return []
   215     return []
   207 
   216 
       
   217 class tee:
       
   218     def __init__(self, main, *other):
       
   219         self.main = main
       
   220         self.other = other
       
   221     def write(self, s):
       
   222         self.main.write(s)
       
   223         for o in self.other:
       
   224             o.write(s)
       
   225 
   208 def main(fn):
   226 def main(fn):
   209     selector = FileSelector()
   227     selector = FileSelector()
   210     downloads = []
   228     downloads = []
   211     for game in parseGamesFromFile(fn):
   229     import sys
   212         for dls in game.downloads:
   230     with open("torrents.log", "w") as l:
   213             scores = list(selector(dls))
   231         for game in parseGamesFromFile(fn):
   214             choosen = selectHighestScore(scores)
   232             logging.info("Parsing game %s (%d downloads)", game.title, len(game.downloads))
   215             for score, dl in scores:
   233             for dls in game.downloads:
   216                 print("[%s] %2d | %-20s | %-15s | %-10s | %-25s | %s " % (
   234                 scores = list(selector(dls))
   217                         "*" if dl in choosen else " ",
   235                 choosen = selectHighestScore(scores)
   218                         score,
   236                 for score, dl in scores:
   219                         game.title,
   237                     print("[%s] %2d | %-30s | %-15s | %-30s | %-15s | %s " % (
   220                         dls.id,
   238                             "*" if dl in choosen else " ",
   221                         dls.date,
   239                             score,
   222                         ", ".join(sorted(dl.attrs)),
   240                             game.title,
   223                         dl.torrent))
   241                             dls.id,
   224                 if dl in choosen:
   242                             dl.date,
   225                     downloads.append(dl)
   243                             ", ".join(sorted(dl.attrs)),
   226             if not scores:
   244                             dl.torrent),
   227                 print("No download for",dls.id)
   245                           file=l)
   228             print("-" * 80)
   246                     if dl in choosen:
       
   247                         downloads.append(dl)
       
   248                 if not scores:
       
   249                     print("No download for %s" % (dls.id), file=l)
       
   250                 print("-" * 80, file=l)
   229 
   251 
   230     import urllib.request
   252     import urllib.request
   231     import urllib.parse
   253     import urllib.parse
   232     import os
   254     import os
   233     urlfile = open('http-download.sh','w')
   255     urlfile = open('http-download.sh','w')
   241                 else:
   263                 else:
   242                     logging.info("Saving %s as %s", dl.torrent, fn)
   264                     logging.info("Saving %s as %s", dl.torrent, fn)
   243                     with opener.open(dl.torrent) as u:
   265                     with opener.open(dl.torrent) as u:
   244                         with open(fn,"wb") as f:
   266                         with open(fn,"wb") as f:
   245                             f.write(u.read())
   267                             f.write(u.read())
       
   268                     logging.info("%s saved.", os.path.realpath(fn))
   246             except:
   269             except:
   247                 logging.exception("Error with download %r", dl)
   270                 logging.exception("Error with download %r", dl)
   248         else:
   271         else:
   249             logging.info("No torrent, url is %s", dl.web)
   272             logging.info("No torrent, url is %s", dl.web)
   250             fn = os.path.basename(urllib.parse.urlsplit(dl.web).path)
   273             fn = os.path.basename(urllib.parse.urlsplit(dl.web).path)