Source code for kingfisher_scrapy.spiders.peru_compras_bulk
import scrapy
from kingfisher_scrapy.spiders.peru_compras_base import PeruComprasBase
from kingfisher_scrapy.util import components
[docs]
class PeruComprasBulk(PeruComprasBase):
"""
Domain
Peru Compras (contracts within framework agreements)
Caveats
The JSON data sometimes contains unescaped newline characters within strings.
The peru_compras spider contains more updated data.
Spider arguments
from_date
Download only data from this month onward (YYYY-MM format). Defaults to '2021-08'.
until_date
Download only data until this month (YYYY-MM format). Defaults to the current month.
"""
name = "peru_compras_bulk"
# BaseSpider
date_format = "year-month"
default_from_date = "2021-08"
async def start(self):
yield scrapy.Request(
f"{self.url_prefix}getListaDescargaMasiva?Anio=&Mes=", method="POST", callback=self.parse_list
)
def parse_list(self, response):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
for item in response.json():
if from_date <= f"{item['C_Anio']}-{item['CodMes']}" <= until_date:
yield self.build_request(
f"https://saeusceprod01.blob.core.windows.net/contproveedor/DescargaMasiva/{item['C_FileJson']}",
formatter=components(-1),
)