Source code for kingfisher_scrapy.spiders.liberia_releases

import orjson
import scrapy

from kingfisher_scrapy.base_spiders import IndexSpider
from kingfisher_scrapy.util import components


[docs] class LiberiaReleases(IndexSpider): """ Domain Public Procurement and Concessions Commission (PPCC) Bulk download documentation https://eprocurement.ppcc.gov.lr/ocds/report/home.action#/record """ name = "liberia_releases" # BaseSpider skip_pluck = "Already covered (see code for details)" # liberia_records # SimpleSpider data_type = "release_package" # IndexSpider result_count_pointer = "/total" limit = 1000 # unverified use_page = True start_page = 1 formatter = None parse_list_callback = "parse_items" # Local url_prefix = "https://eprocurement.ppcc.gov.lr/ocds/record/" async def start(self): url, kwargs = self.url_builder(self.start_page, None, None) yield scrapy.Request(url, **kwargs, callback=self.parse_list) # IndexSpider def url_builder(self, value, data, response): # This endpoint is undocumented. return f"{self.url_prefix}searchRecords.action", { "method": "POST", "headers": {"Content-Type": "application/json"}, "body": orjson.dumps({"page": value, "pagesize": self.limit, "sortField": "ocid", "sortDir": "asc"}), "meta": {"file_name": f"page-{value}.json"}, } def parse_items(self, response): for item in response.json()["items"]: # This endpoint is undocumented. There is also a VERSIONED.action endpoint. yield self.build_request( f"{self.url_prefix}downloadRecord/{item['id']}/COMPILED.action", formatter=components(-2) )