Source code for kingfisher_scrapy.spiders.dominican_republic_api
import scrapy
from kingfisher_scrapy.base_spiders import IndexSpider
from kingfisher_scrapy.util import parameters, replace_path_separator
[docs]
class DominicanRepublicAPI(IndexSpider):
"""
Domain
Dirección General de Contrataciones Públicas (DGCP)
Spider arguments
from_date
Download only data from this date onward (YYYY-MM-DD format). Defaults to '2015-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format). Defaults to today.
Swagger API documentation
https://datosabiertos.dgcp.gob.do/api-dgcp/docs/index.html
"""
name = "dominican_republic_api"
# BaseSpider
default_from_date = "2015-01-01"
date_format = "date"
date_required = True
# SimpleSpider
data_type = "release_package"
# IndexSpider
page_count_pointer = "/pages"
parse_list_callback = "parse_page"
# Local
dominican_republic_base_url = "https://datosabiertos.dgcp.gob.do/api-dgcp/v1/ocds/releases"
async def start(self):
yield scrapy.Request(
f"{self.dominican_republic_base_url}/all?limit=1000&start_date={self.from_date.strftime(self.date_format)}"
f"&end_date={self.until_date.strftime(self.date_format)}",
callback=self.parse_list,
)
def parse_page(self, response):
# `content` is null if, for example, the page number is outside the result set.
for item in response.json()["payload"]["content"] or []:
yield self.build_request(
f"{self.dominican_republic_base_url}?ocid={item['ocid']}",
formatter=parameters("ocid", parser=replace_path_separator),
)