Source code for kingfisher_scrapy.spiders.peru_osce_bulk
import scrapy
from kingfisher_scrapy.base_spiders import CompressedFileSpider, IndexSpider
from kingfisher_scrapy.util import components, handle_http_error
[docs]
class PeruOSCEBulk(CompressedFileSpider, IndexSpider):
"""
Domain
Organismo Supervisor de las Contrataciones del Estado (OSCE)
API documentation
https://contratacionesabiertas.osce.gob.pe/api
Bulk download documentation
https://contratacionesabiertas.osce.gob.pe/descargas
"""
name = 'peru_osce_bulk'
# SimpleSpider
data_type = 'record_package'
# IndexSpider
formatter = staticmethod(components(-1))
page_count_pointer = '/pagination/num_pages'
parse_list_callback = 'parse_page'
peru_base_url = 'https://contratacionesabiertas.osce.gob.pe/api/v1/files?page={0}&paginateBy=10&format=json'
def start_requests(self):
yield scrapy.Request(self.peru_base_url.format(1),
meta={'file_name': 'list.json'}, callback=self.parse_list)
def url_builder(self, value, data, response):
return self.peru_base_url.format(value)
@handle_http_error
def parse_page(self, response):
for item in response.json()['results']:
yield scrapy.Request((item['files']['json']), meta={'file_name': 'data.zip'})