From b602e5dd9414cfcf2d055374c5fa8ca06e479b3d Mon Sep 17 00:00:00 2001 From: ErnestaP Date: Wed, 29 May 2024 15:36:18 +0200 Subject: [PATCH] hindawi: added curl headers for API call --- hepcrawl/spiders/hindawi_spider.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hepcrawl/spiders/hindawi_spider.py b/hepcrawl/spiders/hindawi_spider.py index 6c2de27..ecc1a04 100644 --- a/hepcrawl/spiders/hindawi_spider.py +++ b/hepcrawl/spiders/hindawi_spider.py @@ -71,7 +71,10 @@ def __init__(self, source_file=None, *args, **kwargs): def start_requests(self): """Default starting point for scraping shall be the local XML file.""" self.log('Harvest started.', logging.INFO) - yield Request(self.source_file) + headers = { + 'User-Agent': 'curl/7.64.1' + } + yield Request(self.source_file, headers=headers) def parse_node(self, response, node): self.log('Parsing node...', logging.INFO)