Source code for kingfisher_scrapy.spiders.italy_appalti_pop

import scrapy

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.util import components, handle_http_error


[docs] class ItalyAppaltiPOP(SimpleSpider): """ Domain AppaltiPOP Bulk download documentation https://www.appaltipop.it/it/download Swagger API documentation https://www.appaltipop.it/api/v1/ """ name = 'italy_appalti_pop' # SimpleSpider data_type = 'release_package' def start_requests(self): yield scrapy.Request( 'https://www.appaltipop.it/api/v1/buyers', meta={'file_name': 'buyers.json'}, callback=self.parse_list ) @handle_http_error def parse_list(self, response): # The response looks like: # { # "total": { ... }, # "max_score": ..., # "hits": [ ... ] # } for buyer in response.json()['hits']: # The first resource in the list is the OCDS JSON, the second one a XLSX file resource = buyer['_source']['appaltipop:releases/0/buyer/dataSource/resources'][0] # The JSON file path looks like 'data/IT-CF-01232710374/ocds.json' file_path = resource['appaltipop:releases/0/buyer/resource/url'] url = f'https://raw.githubusercontent.com/ondata/appaltipop/master/{file_path}' yield self.build_request(url, formatter=components(-2))