Source code for kingfisher_scrapy.spiders.peru_oece_bulk

import scrapy

from kingfisher_scrapy.base_spiders import CompressedFileSpider, IndexSpider
from kingfisher_scrapy.util import components


[docs] class PeruOECEBulk(CompressedFileSpider, IndexSpider): """ Domain Organismo Especializado para las Contrataciones Públicas Eficientes (OECE) API documentation https://contratacionesabiertas.oece.gob.pe/api Bulk download documentation https://contratacionesabiertas.oece.gob.pe/descargas """ name = "peru_oece_bulk" # SimpleSpider data_type = "record_package" # IndexSpider formatter = staticmethod(components(-1)) page_count_pointer = "/pagination/num_pages" parse_list_callback = "parse_page" # Local peru_base_url = "https://contratacionesabiertas.oece.gob.pe/api/v1/files?page={0}&paginateBy=10&format=json" async def start(self): yield scrapy.Request(self.peru_base_url.format(1), callback=self.parse_list) # IndexSpider def url_builder(self, value, data, response): return self.peru_base_url.format(value) def parse_page(self, response): for item in response.json()["results"]: # Some URLs still use the old domain. yield scrapy.Request((item["files"]["json"].replace(".osce.", ".oece.")), meta={"file_name": "all.zip"})