Source code for kingfisher_scrapy.spiders.dominican_republic_api
from kingfisher_scrapy.base_spiders import LinksSpider, PeriodicSpider
from kingfisher_scrapy.util import components, handle_http_error
[docs]
class DominicanRepublicAPI(LinksSpider, PeriodicSpider):
"""
Domain
Dirección General de Contrataciones Públicas (DGCP)
Spider arguments
from_date
Download only data from this date onward (YYYY-MM-DD format). Defaults to '2018-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format). Defaults to today.
API documentation
https://api.dgcp.gob.do/api/docs
"""
name = 'dominican_republic_api'
custom_settings = {
# Reduce the number of concurrent requests to avoid multiple failures.
'CONCURRENT_REQUESTS': 1,
}
# BaseSpider
default_from_date = '2018-01-01'
date_format = 'date'
# SimpleSpider
data_type = 'release_package'
# LinksSpider
formatter = staticmethod(components(-2)) # year
next_pointer = '/pagination/next'
# Local
base_url = 'https://api.dgcp.gob.do/api/'
# PeriodicSpider
pattern = base_url + 'date/{0:%Y-%m-%d}/{1:%Y-%m-%d}/1'
@handle_http_error
def parse(self, response):
data = response.json()
for item in data['data']:
yield self.build_request(
f'{self.base_url}release/{item["ocid"]}', formatter=components(-1), callback=super().parse
)
yield self.next_link(response)