Source code for kingfisher_scrapy.spiders.brazil_compras

import scrapy

from kingfisher_scrapy import util
from kingfisher_scrapy.base_spiders import LinksSpider
from kingfisher_scrapy.util import parameters


[docs] class BrazilCompras(LinksSpider): """ Domain Portal de Compras do Governo Federal Spider arguments from_date Download only data from this date onward (YYYY-MM-DD format). Defaults to '2021-08-10'. until_date Download only data until this date (YYYY-MM-DD format). Defaults to today. API documentation https://dadosabertos.compras.gov.br/swagger-ui/index.html#/11%20-%20OCDS/releases """ name = "brazil_compras" custom_settings = { # Reduce the number of concurrent requests to respect undocumented limit (100/min). "CONCURRENT_REQUESTS": 1, } # BaseSpider date_required = True # This is the first date for which there's data for any buyers. default_from_date = "2021-08-10" # SimpleSpider data_type = "release_package" # LinksSpider formatter = staticmethod(parameters("page", "releaseStartDate", "releaseEndDate", "buyerID")) async def start(self): yield scrapy.Request( "https://dadosabertos.compras.gov.br/modulo-uasg/2_consultarOrgao?statusOrgao=true", callback=self.parse_buyer_list, ) def parse_buyer_list(self, response): for value in range(2, response.json()["totalPaginas"] + 1): yield scrapy.Request( util.replace_parameters(response.request.url, pagina=value), callback=self.parse_buyer_page ) yield from self.parse_buyer_page(response) def parse_buyer_page(self, response): for item in response.json()["resultado"]: # The API errors if the difference between the month values is greater than 1; for example, January 1 to # February 28 succeeds, but January 31 to March 1 fails. To avoid errors, use the shortest month length. # "Erro ao efetuar a consulta Período inicial e final maior que 1 mês." for start, end in util.date_range_by_interval(self.from_date, self.until_date, 28): yield self.build_request( f"https://dadosabertos.compras.gov.br/modulo-ocds/1_releases?page=1&offSet=100" f"&buyerID={item['cnpjCpfOrgao']}&" f"releaseStartDate={start:%Y-%m-%d}&" f"releaseEndDate={end:%Y-%m-%d}", formatter=self.formatter, ) # LinksSpider def parse(self, response): # If no releases were found, the API returns a release package with no releases array. if "releases" in response.json(): yield from super().parse(response)