Source code for kingfisher_scrapy.spiders.uganda_releases

from kingfisher_scrapy.base_spiders import PeriodicSpider
from kingfisher_scrapy.util import get_parameter_value, parameters, replace_parameters



[docs]
class UgandaReleases(PeriodicSpider):
    """
    Domain
      Government Procurement Portal (GPP) - Public Procurement and Disposal of Public Assets Authority (PPDA)
    Spider arguments
      from_date
        Download only data from this year onward (YYYY format).
        If ``until_date`` is provided, defaults to '2019'.
        The year refers to the start of the fiscal year range, e.g. if ``from_date`` = '2019' then the fiscal year is
        '2019-2020'
      until_date
        Download only data until this year (YYYY format).
        If ``from_date`` is provided, defaults to the current year.
        The year refers to the start of the fiscal year range, e.g. if ``until_date`` = '2019' then the fiscal year is
        '2019-2020'
    Bulk download documentation
        https://gpp.ppda.go.ug/public/open-data/ocds/ocds-datasets
    """

    name = "uganda_releases"
    # https://gpp.ppda.go.ug/public/open-data/ocds/ocds-datasets generates URLs with JavaScript. We increment
    # the 'code' parameter until it 404s. As such, we can't disambiguate expected from unexpected 404s.
    handle_httpstatus_list = [404]
    # Returns HTTP 403 if too many requests. (1 is too short.)
    download_delay = 2

    # BaseSpider
    date_format = "year"
    default_from_date = "2019"

    # SimpleSpider
    data_type = "release_package"

    # PeriodicSpider
    pattern = "https://cdn.ppda.go.ug/api/open-data/v2/ocds/download?fy={0}-{1}&format=json&code=1"
    formatter = staticmethod(parameters("fy", "code"))

    # PeriodicSpider
    def build_urls(self, date):
        yield self.pattern.format(date, date + 1)

    # SimpleSpider
    def parse(self, response):
        # 404 responses indicate we've reached the end of the 'code' sequence for this fiscal year.
        if response.status == 404:
            return

        yield from super().parse(response)

        yield self.build_request(
            replace_parameters(response.request.url, code=int(get_parameter_value(response.request.url, "code")) + 1),
            formatter=self.formatter,
        )