Source code for kingfisher_scrapy.spiders.peru_compras_api
import scrapy
from kingfisher_scrapy.spiders.peru_compras_base import PeruComprasBase
from kingfisher_scrapy.util import parameters
[docs]
class PeruComprasAPI(PeruComprasBase):
"""
Domain
Peru Compras (contracts within framework agreements)
Caveats
The JSON data sometimes contains unescaped newline characters within strings.
Spider arguments
from_date
Download only data from this date onward (YYYY-MM-DD format). Defaults to '2017-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format). Defaults to today.
"""
name = "peru_compras_api"
# BaseSpider
default_from_date = "2017-01-01"
async def start(self):
yield scrapy.Request(f"{self.url_prefix}obtenerFiltros", callback=self.parse_list)
def parse_list(self, response):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
# The response is a large text that looks like: list_1¯list_2¯list_3
# The first list of framework agreements, which we need for querying the API, is "¬"-delimited.
for framework in response.text.split("¯")[0].split("¬"):
# Each list item has the format id-type^description and we need the id for querying the API:
# 130-BIENES^IM-CE-2020-9 MATERIAL MÉDICO ¬128-BIENES^IM-CE-2020-8 DISPOSITIVO MÉDICO IN VITRO ¬
if framework_id := framework.split("-")[0]:
yield self.build_request(
f"{self.url_prefix}DescargaJsonOCDS"
f"?pAcuerdo={framework_id}&pFechaIni={from_date}&pFechaFin={until_date}",
formatter=parameters("pAcuerdo"),
)