Source code for kingfisher_scrapy.spiders.indonesia_opentender

from kingfisher_scrapy.base_spiders import CompressedFileSpider, PeriodicSpider
from kingfisher_scrapy.util import components, get_parameter_value, handle_http_error, join, parameters


[docs] class IndonesiaOpentender(CompressedFileSpider, PeriodicSpider): """ Domain Opentender.net Spider arguments from_date Download only data from this year onward (YYYY format). Defaults to '2008'. until_date Download only data until this year (YYYY format). Defaults to the current year. Bulk download documentation https://v3.opentender.net/#/ocds """ name = 'indonesia_opentender' # These settings are to avoid request timeouts and incomplete JSON. custom_settings = { 'CONCURRENT_REQUESTS': 1, 'DOWNLOAD_FAIL_ON_DATALOSS': False, } download_delay = 1 download_timeout = 99999 # Must be set before `pattern`, so we can't follow the standard order. url_prefix = 'https://opentender.net/api/' # BaseSpider date_format = 'year' default_from_date = '2008' validate_json = True # SimpleSpider data_type = 'release_package' # PeriodicSpider pattern = url_prefix + 'master/lpse?year={}&format=json' formatter = staticmethod(components(-1)) start_requests_callback = 'parse_list' @handle_http_error def parse_list(self, response): year = get_parameter_value(response.request.url, 'year') codes_seen = set() for item in response.json()['data']: code = item['code'] # There are duplicate codes. if code and code not in codes_seen: codes_seen.add(code) url = f'{self.url_prefix}tender/export-ocds-batch?year={year}&lpse={code}' yield self.build_request( url, formatter=join(components(-1), parameters('year', 'lpse'), extension='zip') )