diff --git a/src/comics/comics/evilinc.py b/src/comics/comics/evilinc.py index b6811f28..50cfa93b 100644 --- a/src/comics/comics/evilinc.py +++ b/src/comics/comics/evilinc.py @@ -11,20 +11,17 @@ class ComicData(ComicDataBase): class Crawler(CrawlerBase): - history_capable_date = "2005-05-30" - schedule = "Mo,Tu,We,Th,Fr" + history_capable_days = 35 + schedule = "Tu,Th" time_zone = "America/New_York" def crawl(self, pub_date): - page_url = "https://www.evil-inc.com/%s/?post_type=comic" % ( - pub_date.strftime("%Y/%m/%d") - ) - - page = self.parse_page(page_url) - - url = page.src("img.attachment-large.wp-post-image") - if not url: - return - url = url.replace("?fit=1024%2C1024", "") - title = page.text(".post-title") - return CrawlerImage(url, title) + feed = self.parse_feed("https://www.evil-inc.com/comic/feed/") + for entry in feed.for_date(pub_date): + title = entry.title + page = self.parse_page(entry.link) + img = page.root.xpath('//div[@id="unspliced-comic"]/picture/img') + if img is None: + continue + url = img[0].attrib["src"] + return CrawlerImage(url, title) diff --git a/src/comics/comics/hjalmarbt.py b/src/comics/comics/hjalmarbt.py index 387139d5..fe3193c4 100644 --- a/src/comics/comics/hjalmarbt.py +++ b/src/comics/comics/hjalmarbt.py @@ -7,6 +7,7 @@ class ComicData(ComicDataBase): language = "no" url = "https://www.bt.no/kultur/tegneserier/" rights = "Nils Axle Kanten" + active = False class Crawler(CrawlerBase): diff --git a/src/comics/comics/lunchdn.py b/src/comics/comics/lunchdn.py index 93e3355c..2ff9ac36 100644 --- a/src/comics/comics/lunchdn.py +++ b/src/comics/comics/lunchdn.py @@ -6,7 +6,7 @@ class ComicData(ComicDataBase): name = "Lunch (dn.no)" language = "no" url = "https://www.dn.no/topic/Lunch/" - active = True + active = False rights = "Børge Lund" diff --git a/src/comics/comics/lunche24.py b/src/comics/comics/lunche24.py index 874e85dc..62b432b3 100644 --- a/src/comics/comics/lunche24.py +++ b/src/comics/comics/lunche24.py @@ -12,7 +12,7 @@ class ComicData(ComicDataBase): class Crawler(CrawlerBase): history_capable_date = "2024-05-02" - schedule = "Mo,Tu,We,Th,Fr,Sa" + schedule = "Mo,Tu,We,Th,Fr,Su" time_zone = "Europe/Oslo" def crawl(self, pub_date): diff --git a/src/comics/comics/lunchtu.py b/src/comics/comics/lunchtu.py index 3ed20400..edc70ceb 100644 --- a/src/comics/comics/lunchtu.py +++ b/src/comics/comics/lunchtu.py @@ -20,6 +20,6 @@ class Crawler(CrawlerBase): def crawl(self, pub_date): url = ( - "https://www.tu.no/?module=TekComics&service=image&id=lunch&key=%s" + "https://www.tu.no/api/widgets/comics?name=lunch&date=%s" ) % pub_date.strftime("%Y-%m-%d") return CrawlerImage(url) diff --git a/src/comics/comics/nemibt.py b/src/comics/comics/nemibt.py index 63db9b1c..ea6b5a4d 100644 --- a/src/comics/comics/nemibt.py +++ b/src/comics/comics/nemibt.py @@ -8,6 +8,7 @@ class ComicData(ComicDataBase): url = "https://www.bt.no/kultur/tegneserier/" start_date = "1997-01-01" rights = "Lise Myhre" + active = False class Crawler(CrawlerBase): diff --git a/src/comics/comics/notinventedhere.py b/src/comics/comics/notinventedhere.py index 9180b6c9..aabe5dc5 100644 --- a/src/comics/comics/notinventedhere.py +++ b/src/comics/comics/notinventedhere.py @@ -5,8 +5,9 @@ class ComicData(ComicDataBase): name = "Not Invented Here" language = "en" - url = "http://notinventedhe.re/" + url = "https://www.notinventedhere.com/" start_date = "2009-09-21" + end_date = "2015-12-31" rights = "Bill Barnes and Paul Southworth" active = False @@ -16,5 +17,8 @@ class Crawler(CrawlerBase): time_zone = "America/Los_Angeles" def crawl(self, pub_date): - url = "http://thiswas.notinventedhe.re/on/%s" % pub_date.strftime("%Y-%m-%d") + url = ( + "https://s3.amazonaws.com/thiswas.notinventedhe.re/on/%s" + % pub_date.strftime("%Y-%m-%d") + ) return CrawlerImage(url) diff --git a/src/comics/comics/perrybiblefellowship.py b/src/comics/comics/perrybiblefellowship.py index 463e9b1f..52557e10 100644 --- a/src/comics/comics/perrybiblefellowship.py +++ b/src/comics/comics/perrybiblefellowship.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "The Perry Bible Fellowship" language = "en" - url = "http://www.pbfcomics.com/" + url = "https://www.pbfcomics.com/" start_date = "2001-01-01" rights = "Nicholas Gurewitch" @@ -18,5 +18,8 @@ def crawl(self, pub_date): feed = self.parse_feed("https://pbfcomics.com/feed/") for entry in feed.for_date(pub_date): page = self.parse_page(entry.link) - urls = [url for url in page.srcs("div#comic img") if url.startswith("http")] + urls = [ + img.attrib["data-src"] + for img in page.root.findall('.//div[@id="comic"]/img') + ] return [CrawlerImage(url, entry.title) for url in urls] diff --git a/src/comics/comics/pondusbt.py b/src/comics/comics/pondusbt.py index 6ea1ad55..256fffbe 100644 --- a/src/comics/comics/pondusbt.py +++ b/src/comics/comics/pondusbt.py @@ -5,7 +5,7 @@ class ComicData(PondusData): name = "Pondus (bt.no)" url = "https://www.bt.no/kultur/tegneserier/" - active = True + active = False class Crawler(CrawlerBase): diff --git a/src/comics/comics/vgcats.py b/src/comics/comics/vgcats.py index 97ec618c..c645bd6d 100644 --- a/src/comics/comics/vgcats.py +++ b/src/comics/comics/vgcats.py @@ -1,6 +1,3 @@ -import datetime - -from comics.aggregator.crawler import CrawlerBase, CrawlerImage from comics.core.comic_data import ComicDataBase @@ -11,20 +8,3 @@ class ComicData(ComicDataBase): start_date = "2001-09-09" rights = "Scott Ramsoomair" active = False - - -class Crawler(CrawlerBase): - history_capable_date = "2001-09-09" - time_zone = "America/New_York" - - # Without User-Agent set, the server returns empty responses - headers = {"User-Agent": "Mozilla/4.0"} - - def crawl(self, pub_date): - # FIXME: Seems like they are using gif images now and then - file_ext = "gif" if pub_date < datetime.date(2003, 5, 1) else "jpg" - url = "https://vgcats.com/comics/images/{}.{}".format( - pub_date.strftime("%y%m%d"), - file_ext, - ) - return CrawlerImage(url)