Source code for kingfisher_scrapy.spiders.uganda_releases

from kingfisher_scrapy.base_spiders import PeriodicSpider
from kingfisher_scrapy.util import get_parameter_value, parameters, replace_parameters


[docs] class UgandaReleases(PeriodicSpider): """ Domain Government Procurement Portal (GPP) - Public Procurement and Disposal of Public Assets Authority (PPDA) Spider arguments from_date Download only data from this year onward (YYYY format). If ``until_date`` is provided, defaults to '2019'. The year refers to the start of the fiscal year range, e.g. if ``from_date`` = '2019' then the fiscal year is '2019-2020' until_date Download only data until this year (YYYY format). If ``from_date`` is provided, defaults to the current year. The year refers to the start of the fiscal year range, e.g. if ``until_date`` = '2019' then the fiscal year is '2019-2020' Bulk download documentation https://gpp.ppda.go.ug/public/open-data/ocds/ocds-datasets """ name = "uganda_releases" # https://gpp.ppda.go.ug/public/open-data/ocds/ocds-datasets generates URLs with JavaScript. We increment # the 'code' parameter until it 404s. As such, we can't disambiguate expected from unexpected 404s. handle_httpstatus_list = [404] # Returns HTTP 403 if too many requests. (1 is too short.) download_delay = 2 # BaseSpider date_format = "year" default_from_date = "2019" # SimpleSpider data_type = "release_package" # PeriodicSpider pattern = "https://cdn.ppda.go.ug/api/open-data/v2/ocds/download?fy={0}-{1}&format=json&code=1" formatter = staticmethod(parameters("fy", "code")) # PeriodicSpider def build_urls(self, date): yield self.pattern.format(date, date + 1) # SimpleSpider def parse(self, response): # 404 responses indicate we've reached the end of the 'code' sequence for this fiscal year. if response.status == 404: return yield from super().parse(response) yield self.build_request( replace_parameters(response.request.url, code=int(get_parameter_value(response.request.url, "code")) + 1), formatter=self.formatter, )