Source code for kingfisher_scrapy.spiders.bulgaria

import orjson
import scrapy

from kingfisher_scrapy.base_spiders import CompressedFileSpider
from kingfisher_scrapy.util import components, join


[docs] class Bulgaria(CompressedFileSpider): """ Domain Public Procurement Agency (PPA) Caveats Access to the publication is geo-restricted to Bulgarian IP addresses. Use a VPN to access it from outside. API documentation https://data.egov.bg/api-spetsifikatsiya?section=22&item=82 """ name = "bulgaria" # SimpleSpider data_type = "release_package" # Local base_url = "https://data.egov.bg" async def start(self): yield scrapy.Request( f"{self.base_url}/api/listDatasets", method="POST", body=orjson.dumps( # 502 is the Public Procurement Agency's organization ID. {"criteria": {"org_ids": [502], "formats": ["JSON"], "keywords": "OCDS"}, "records_per_page": 100} ), callback=self.parse_list, ) def parse_list(self, response): """ The response is expected to be a list of dataset objects, e.g.: [{"uri": "27b344eb-da25-4974-ac37-2a4b8435702a", ...}, ...] """ for dataset in response.json()["datasets"]: yield scrapy.Request( f"{self.base_url}/dataset/{dataset['uri']}/resources/download/json", callback=self.parse_item, ) def parse_item(self, response): """ The response contains a download token: {"uri": "some-download-token"} """ yield self.build_request( # /true returns non OCDS data. No parameter returns 404. f"{self.base_url}/dataset/resources/download/zip/json/{response.json()['uri']}/false", formatter=join(components(-2, -1), extension="zip"), )