Source code for kingfisher_scrapy.spiders.peru_oece_bulk
import scrapy
from kingfisher_scrapy.base_spiders import CompressedFileSpider, IndexSpider
from kingfisher_scrapy.util import components
[docs]
class PeruOECEBulk(CompressedFileSpider, IndexSpider):
"""
Domain
Organismo Especializado para las Contrataciones Públicas Eficientes (OECE)
API documentation
https://contratacionesabiertas.oece.gob.pe/api
Bulk download documentation
https://contratacionesabiertas.oece.gob.pe/descargas
"""
name = "peru_oece_bulk"
# SimpleSpider
data_type = "record_package"
# IndexSpider
formatter = staticmethod(components(-1))
page_count_pointer = "/pagination/num_pages"
parse_list_callback = "parse_page"
# Local
peru_base_url = "https://contratacionesabiertas.oece.gob.pe/api/v1/files?page={0}&paginateBy=10&format=json"
async def start(self):
yield scrapy.Request(self.peru_base_url.format(1), callback=self.parse_list)
# IndexSpider
def url_builder(self, value, data, response):
return self.peru_base_url.format(value)
def parse_page(self, response):
for item in response.json()["results"]:
# Some URLs still use the old domain.
yield scrapy.Request((item["files"]["json"].replace(".osce.", ".oece.")), meta={"file_name": "all.zip"})