Source code for kingfisher_scrapy.spiders.ecuador_sercop_api

from kingfisher_scrapy.base_spiders import IndexSpider, PeriodicSpider
from kingfisher_scrapy.util import components, handle_http_error, parameters


[docs] class EcuadorSERCOPAPI(IndexSpider, PeriodicSpider): """ Domain Servicio Nacional de Contratación Pública (SERCOP) Spider arguments from_date Download only data from this year onward (YYYY format). Defaults to '2015'. until_date Download only data until this year (YYYY format). Defaults to the current year. API documentation https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/datos-abiertos/api Bulk download documentation https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/datos-abiertos """ name = 'ecuador_sercop_api' custom_settings = { # Reduce the number of concurrent requests to avoid multiple failures. 'CONCURRENT_REQUESTS': 2, # Don't let Scrapy handle HTTP 429. 'RETRY_HTTP_CODES': [], } # BaseSpider date_format = 'year' default_from_date = '2015' max_attempts = 5 retry_http_codes = [429] # SimpleSpider data_type = 'release_package' # Local url_prefix = 'https://datosabiertos.compraspublicas.gob.ec/PLATAFORMA/api/' # PeriodicSpider formatter = staticmethod(components(-1)) pattern = f'{url_prefix}search_ocds?year={{0}}' start_requests_callback = 'parse_list' # IndexSpider page_count_pointer = '/pages' parse_list_callback = 'parse_page' @handle_http_error def parse_page(self, response): for data in response.json()['data']: # Some ocids have a '/' character which cannot be in a file name. yield self.build_request(f'{self.url_prefix}record?ocid={data["ocid"]}', formatter=lambda url: parameters('ocid')(url).replace('/', '_'))