Source code for kingfisher_scrapy.spiders.indonesia_opentender

from kingfisher_scrapy.base_spiders import CompressedFileSpider, PeriodicSpider
from kingfisher_scrapy.util import (
    MAX_DOWNLOAD_TIMEOUT,
    components,
    get_parameter_value,
    join,
    parameters,
)


[docs] class IndonesiaOpentender(CompressedFileSpider, PeriodicSpider): """ Domain Opentender.net Spider arguments from_date Download only data from this year onward (YYYY format). Defaults to '2008'. until_date Download only data until this year (YYYY format). Defaults to the current year. Bulk download documentation https://v3.opentender.net/#/ocds """ name = "indonesia_opentender" # These settings are to avoid request timeouts and incomplete JSON. custom_settings = { "CONCURRENT_REQUESTS": 1, "DOWNLOAD_FAIL_ON_DATALOSS": False, "DOWNLOAD_TIMEOUT": MAX_DOWNLOAD_TIMEOUT, } download_delay = 1 # Local # Must be set before `pattern`, so we can't follow the standard order. url_prefix = "https://opentender.net/api/" # BaseSpider date_format = "year" default_from_date = "2008" validate_json = True # https://github.com/open-contracting/kingfisher-collect/issues/964 # SimpleSpider data_type = "release_package" # PeriodicSpider pattern = url_prefix + "master/lpse/?year={}&format=json" formatter = staticmethod(components(-1)) start_callback = "parse_list" def parse_list(self, response): year = get_parameter_value(response.request.url, "year") codes_seen = set() for item in response.json()["results"]: code = item["code"] # There are duplicate codes. if code and code not in codes_seen: codes_seen.add(code) yield self.build_request( f"{self.url_prefix}tender/export-ocds-batch/?year={year}&lpse={code}", formatter=join(components(-1), parameters("year", "lpse"), extension="zip"), )