Source code for kingfisher_scrapy.spiders.chile_compra_bulk

import orjson

from kingfisher_scrapy.base_spiders import CompressedFileSpider, PeriodicSpider
from kingfisher_scrapy.items import File
from kingfisher_scrapy.util import MAX_DOWNLOAD_TIMEOUT, components

# curl -I https://ocds.blob.core.windows.net/ocds/202205.zip


[docs] class ChileCompraBulk(CompressedFileSpider, PeriodicSpider): """ Domain ChileCompra Caveats This dataset was last updated by the publisher in 2022. Spider arguments from_date Download only data from this month onward (YYYY-MM format). Defaults to '2009-01'. until_date Download only data until this month (YYYY-MM format). Defaults to the current month. Bulk download documentation https://desarrolladores.mercadopublico.cl/OCDS/DescargaMasiva """ name = "chile_compra_bulk" custom_settings = { "DOWNLOAD_FAIL_ON_DATALOSS": False, "DOWNLOAD_TIMEOUT": MAX_DOWNLOAD_TIMEOUT, } # BaseSpider skip_pluck = "Already covered (see code for details)" # chile_compra_api_releases # SimpleSpider data_type = "record_package" # BaseSpider date_format = "year-month" default_from_date = "2009-01" # PeriodicSpider pattern = "https://ocds.blob.core.windows.net/ocds/{0:%Y}{0:%m}.zip" formatter = staticmethod(components(-1)) # filename containing year-month # BaseSpider def build_file(self, *, file_name=None, url=None, data_type=None, data=None): """ Some files contain invalid record packages, like: { "status": 500, "detail": "error" } """ data = data.read() package = orjson.loads(data) if package.get("status", 200) >= 400: self.logger.error( "status=%d message=%r request=<GET %s> file_name=%s", package["status"], package["detail"], url, file_name, ) return None return File(file_name=file_name, url=url, data_type=data_type, data=data)