Source code for kingfisher_scrapy.spiders.peru_compras_api

import scrapy

from kingfisher_scrapy.spiders.peru_compras_base import PeruComprasBase
from kingfisher_scrapy.util import parameters


[docs] class PeruComprasAPI(PeruComprasBase): """ Domain Peru Compras (contracts within framework agreements) Caveats The JSON data sometimes contains unescaped newline characters within strings. Spider arguments from_date Download only data from this date onward (YYYY-MM-DD format). Defaults to '2017-01-01'. until_date Download only data until this date (YYYY-MM-DD format). Defaults to today. """ name = "peru_compras_api" # BaseSpider default_from_date = "2017-01-01" async def start(self): yield scrapy.Request(f"{self.url_prefix}obtenerFiltros", callback=self.parse_list) def parse_list(self, response): from_date = self.from_date.strftime(self.date_format) until_date = self.until_date.strftime(self.date_format) # The response is a large text that looks like: list_1¯list_2¯list_3 # The first list of framework agreements, which we need for querying the API, is "¬"-delimited. for framework in response.text.split("¯")[0].split("¬"): # Each list item has the format id-type^description and we need the id for querying the API: # 130-BIENES^IM-CE-2020-9 MATERIAL MÉDICO ¬128-BIENES^IM-CE-2020-8 DISPOSITIVO MÉDICO IN VITRO ¬ if framework_id := framework.split("-")[0]: yield self.build_request( f"{self.url_prefix}DescargaJsonOCDS" f"?pAcuerdo={framework_id}&pFechaIni={from_date}&pFechaFin={until_date}", formatter=parameters("pAcuerdo"), )