diff --git a/src/comics/aggregator/crawler.py b/src/comics/aggregator/crawler.py index da306d33..c87f8dff 100644 --- a/src/comics/aggregator/crawler.py +++ b/src/comics/aggregator/crawler.py @@ -222,38 +222,21 @@ def crawl_helper( class GoComicsComCrawlerBase(CrawlerBase): """Base comic crawler for all comics hosted at gocomics.com""" - # It doesn't want us getting comics because of a User-Agent check. - # Look! I'm a nice, normal Internet Explorer machine! - headers = { - "User-Agent": ( - "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; " - "Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; " - ".NET CLR 3.0.4506.2152; .NET CLR 3.5.30729" - ), - } - def crawl_helper( self, url_name: str, pub_date: dt.date, ) -> CrawlerResult | None: - page_url = "https://www.gocomics.com/{}/{}".format( - url_name, - pub_date.strftime("%Y/%m/%d"), + api_url = ( + "https://www.gocomics.com/api/service/v2/assets/recent/{}?date={}".format( + url_name, + pub_date.strftime("%Y/%m/%d"), + ) ) - page = self.parse_page(page_url) - - url = page.content("meta[property='og:image']") - if not url: - return None - - # If we request a date that doesn't exist - # we get redirected to todays comic - date_str = page.content("meta[property='og:title']") - if not date_str or f"{pub_date:%B %-d, %Y}" not in date_str: - return None - - return CrawlerImage(url) + response = httpx.get(api_url) + response.raise_for_status() + data = response.json() + return CrawlerImage(data[0]["url"]) class PondusNoCrawlerBase(CrawlerBase): diff --git a/src/comics/comics/betty.py b/src/comics/comics/betty.py index 9aa32011..7a7c8ce9 100644 --- a/src/comics/comics/betty.py +++ b/src/comics/comics/betty.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Betty" language = "en" - url = "http://www.gocomics.com/betty/" + url = "https://www.gocomics.com/betty" start_date = "1991-01-01" rights = "Delainey & Gerry Rasmussen" diff --git a/src/comics/comics/calvinandhobbes.py b/src/comics/comics/calvinandhobbes.py index c6dbb1ab..7d03be8a 100644 --- a/src/comics/comics/calvinandhobbes.py +++ b/src/comics/comics/calvinandhobbes.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Calvin and Hobbes" language = "en" - url = "http://www.gocomics.com/calvinandhobbes" + url = "https://www.gocomics.com/calvinandhobbes" start_date = "1985-11-18" end_date = "1995-12-31" rights = "Bill Watterson" diff --git a/src/comics/comics/crabgrass.py b/src/comics/comics/crabgrass.py new file mode 100644 index 00000000..674afb5b --- /dev/null +++ b/src/comics/comics/crabgrass.py @@ -0,0 +1,18 @@ +from comics.aggregator.crawler import GoComicsComCrawlerBase +from comics.core.comic_data import ComicDataBase + + +class ComicData(ComicDataBase): + name = "Crabgrass" + language = "en" + url = "https://www.gocomics.com/crabgrass" + rights = "Tauhid Bondia" + + +class Crawler(GoComicsComCrawlerBase): + history_capable_date = "2019-04-05" + schedule = "Mo,Tu,We,Th,Fr,Sa,Su" + time_zone = "America/New_York" + + def crawl(self, pub_date): + return self.crawl_helper("crabgrass", pub_date) diff --git a/src/comics/comics/fminus.py b/src/comics/comics/fminus.py index 582d6c98..33b633ad 100644 --- a/src/comics/comics/fminus.py +++ b/src/comics/comics/fminus.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "F Minus" language = "en" - url = "http://www.gocomics.com/fminus" + url = "https://www.gocomics.com/fminus" start_date = "1999-09-01" rights = "Tony Carrillo" diff --git a/src/comics/comics/forbetterorforworse.py b/src/comics/comics/forbetterorforworse.py index 3cb2ac20..10a39993 100644 --- a/src/comics/comics/forbetterorforworse.py +++ b/src/comics/comics/forbetterorforworse.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "For Better or For Worse" language = "en" - url = "http://www.gocomics.com/forbetterorforworse" + url = "https://www.gocomics.com/forbetterorforworse" start_date = "1981-11-23" rights = "Lynn Johnston" diff --git a/src/comics/comics/foxtrot.py b/src/comics/comics/foxtrot.py index 7f46eb2b..d2cc3d5d 100644 --- a/src/comics/comics/foxtrot.py +++ b/src/comics/comics/foxtrot.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "FoxTrot" language = "en" - url = "http://www.gocomics.com/foxtrot" + url = "https://www.gocomics.com/foxtrot" start_date = "1988-04-10" rights = "Bill Amend" diff --git a/src/comics/comics/freerange.py b/src/comics/comics/freerange.py new file mode 100644 index 00000000..d830fb1f --- /dev/null +++ b/src/comics/comics/freerange.py @@ -0,0 +1,18 @@ +from comics.aggregator.crawler import GoComicsComCrawlerBase +from comics.core.comic_data import ComicDataBase + + +class ComicData(ComicDataBase): + name = "Free Range" + language = "en" + url = "https://www.gocomics.com/freerange" + rights = "Bill Whitehead" + + +class Crawler(GoComicsComCrawlerBase): + history_capable_date = "2007-02-03" + schedule = "Mo,Tu,We,Th,Fr,Sa,Su" + time_zone = "America/New_York" + + def crawl(self, pub_date): + return self.crawl_helper("freerange", pub_date) diff --git a/src/comics/comics/garfield.py b/src/comics/comics/garfield.py index 434f680e..3788a84f 100644 --- a/src/comics/comics/garfield.py +++ b/src/comics/comics/garfield.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Garfield" language = "en" - url = "https://www.gocomics.com/garfield/" + url = "https://www.gocomics.com/garfield" start_date = "1978-06-19" rights = "Jim Davis" diff --git a/src/comics/comics/getfuzzy.py b/src/comics/comics/getfuzzy.py index c2e7f042..ec043e54 100644 --- a/src/comics/comics/getfuzzy.py +++ b/src/comics/comics/getfuzzy.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Get Fuzzy" language = "en" - url = "http://www.gocomics.com/getfuzzy/" + url = "https://www.gocomics.com/getfuzzy" start_date = "1999-09-01" rights = "Darby Conley" diff --git a/src/comics/comics/luann.py b/src/comics/comics/luann.py new file mode 100644 index 00000000..2d31ada5 --- /dev/null +++ b/src/comics/comics/luann.py @@ -0,0 +1,18 @@ +from comics.aggregator.crawler import GoComicsComCrawlerBase +from comics.core.comic_data import ComicDataBase + + +class ComicData(ComicDataBase): + name = "Luann" + language = "en" + url = "https://www.gocomics.com/luann" + rights = "Greg Evans and Karen Evans" + + +class Crawler(GoComicsComCrawlerBase): + history_capable_date = "1985-03-17" + schedule = "Mo,Tu,We,Th,Fr,Sa,Su" + time_zone = "America/New_York" + + def crawl(self, pub_date): + return self.crawl_helper("luann", pub_date) diff --git a/src/comics/comics/nonsequitur.py b/src/comics/comics/nonsequitur.py index a893d532..6a99eb07 100644 --- a/src/comics/comics/nonsequitur.py +++ b/src/comics/comics/nonsequitur.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Non Sequitur" language = "en" - url = "http://www.gocomics.com/nonsequitur" + url = "https://www.gocomics.com/nonsequitur" start_date = "1992-02-16" rights = "Wiley Miller" diff --git a/src/comics/comics/notinventedherego.py b/src/comics/comics/notinventedherego.py new file mode 100644 index 00000000..57b7d1dd --- /dev/null +++ b/src/comics/comics/notinventedherego.py @@ -0,0 +1,19 @@ +from comics.aggregator.crawler import GoComicsComCrawlerBase +from comics.core.comic_data import ComicDataBase + + +class ComicData(ComicDataBase): + name = "Not Invented Here (gocomics.com)" + language = "en" + url = "https://www.gocomics.com/not-invented-here" + start_date = "2009-09-21" + rights = "Bill Barnes and friends" + + +class Crawler(GoComicsComCrawlerBase): + history_capable_date = "2015-12-28" + schedule = "Mo,Tu,We,Th" + time_zone = "America/Los_Angeles" + + def crawl(self, pub_date): + return self.crawl_helper("not-invented-here", pub_date) diff --git a/src/comics/comics/offthemark.py b/src/comics/comics/offthemark.py new file mode 100644 index 00000000..a051be02 --- /dev/null +++ b/src/comics/comics/offthemark.py @@ -0,0 +1,19 @@ +from comics.aggregator.crawler import GoComicsComCrawlerBase +from comics.core.comic_data import ComicDataBase + + +class ComicData(ComicDataBase): + name = "Off the Mark" + language = "en" + url = "https://www.gocomics.com/offthemark" + start_date = "2002-09-02" + rights = "Mark Parisi" + + +class Crawler(GoComicsComCrawlerBase): + history_capable_date = "2002-09-02" + schedule = "Mo,Tu,We,Th,Fr,Sa,Su" + time_zone = "America/New_York" + + def crawl(self, pub_date): + return self.crawl_helper("offthemark", pub_date) diff --git a/src/comics/comics/peanuts.py b/src/comics/comics/peanuts.py index f5a903bc..2e264ea0 100644 --- a/src/comics/comics/peanuts.py +++ b/src/comics/comics/peanuts.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Peanuts" language = "en" - url = "http://www.gocomics.com/peanuts/" + url = "https://www.gocomics.com/peanuts" start_date = "1950-10-02" end_date = "2000-02-13" rights = "Charles M. Schulz" diff --git a/src/comics/comics/pearlsbeforeswine.py b/src/comics/comics/pearlsbeforeswine.py index b3738212..bb8f514b 100644 --- a/src/comics/comics/pearlsbeforeswine.py +++ b/src/comics/comics/pearlsbeforeswine.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Pearls Before Swine" language = "en" - url = "http://www.gocomics.com/pearlsbeforeswine/" + url = "https://www.gocomics.com/pearlsbeforeswine" start_date = "2001-12-30" rights = "Stephan Pastis" diff --git a/src/comics/comics/pickles.py b/src/comics/comics/pickles.py index f4638e57..adaab693 100644 --- a/src/comics/comics/pickles.py +++ b/src/comics/comics/pickles.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Pickles" language = "en" - url = "http://www.gocomics.com/pickles" + url = "https://www.gocomics.com/pickles" start_date = "2003-10-01" rights = "Brian Crane" diff --git a/src/comics/comics/roseisrose.py b/src/comics/comics/roseisrose.py index 1ad9d6fc..57911acf 100644 --- a/src/comics/comics/roseisrose.py +++ b/src/comics/comics/roseisrose.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Rose Is Rose" language = "en" - url = "http://www.gocomics.com/roseisrose/" + url = "https://www.gocomics.com/roseisrose" start_date = "1984-10-02" rights = "Pat Brady" diff --git a/src/comics/comics/slagoon.py b/src/comics/comics/slagoon.py index 81265dee..261953fb 100644 --- a/src/comics/comics/slagoon.py +++ b/src/comics/comics/slagoon.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Sherman's Lagoon" language = "en" - url = "http://shermanslagoon.com/" + url = "https://shermanslagoon.com" start_date = "1991-05-13" rights = "Jim Toomey" diff --git a/src/comics/comics/tankmcnamara.py b/src/comics/comics/tankmcnamara.py index 553ec31c..f46d1ebb 100644 --- a/src/comics/comics/tankmcnamara.py +++ b/src/comics/comics/tankmcnamara.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "Tank McNamara" language = "en" - url = "http://www.gocomics.com/tankmcnamara" + url = "https://www.gocomics.com/tankmcnamara" start_date = "1998-01-01" rights = "Wiley Miller" diff --git a/src/comics/comics/theboondocks.py b/src/comics/comics/theboondocks.py index 6950804e..1eb447bd 100644 --- a/src/comics/comics/theboondocks.py +++ b/src/comics/comics/theboondocks.py @@ -5,7 +5,7 @@ class ComicData(ComicDataBase): name = "The Boondocks" language = "en" - url = "http://www.gocomics.com/boondocks" + url = "https://www.gocomics.com/boondocks" start_date = "1999-04-19" rights = "Aaron McGruder" diff --git a/src/comics/comics/wallacethebrave.py b/src/comics/comics/wallacethebrave.py new file mode 100644 index 00000000..a394775e --- /dev/null +++ b/src/comics/comics/wallacethebrave.py @@ -0,0 +1,18 @@ +from comics.aggregator.crawler import GoComicsComCrawlerBase +from comics.core.comic_data import ComicDataBase + + +class ComicData(ComicDataBase): + name = "Wallace the Brave" + language = "en" + url = "https://www.gocomics.com/wallace-the-brave" + rights = "Will Henry" + + +class Crawler(GoComicsComCrawlerBase): + history_capable_date = "2015-06-29" + schedule = "Mo,Tu,We,Th,Fr,Sa,Su" + time_zone = "America/New_York" + + def crawl(self, pub_date): + return self.crawl_helper("wallace-the-brave", pub_date)