Source code for kingfisher_scrapy.spiders.mexico_aguascalientes_plataforma_digital_estatal
import scrapy
from kingfisher_scrapy.base_spiders import SimpleSpider
[docs]
class MexicoAguascalientesPlataformaDigitalEstatal(SimpleSpider):
"""
Domain
Mexico Aguascalientes Plataforma Digital Estatal
Bulk download documentation
https://plataformadigitalestatal.org/Publica/contratacionesPublicas/index.html
"""
name = "mexico_aguascalientes_plataforma_digital_estatal"
# BaseSpider
root_path = "item"
# SimpleSpider
data_type = "release_package"
async def start(self):
yield scrapy.Request(
"https://plataformadigitalestatal.org/Publica/contratacionesPublicas/index.html", callback=self.parse_list
)
def parse_list(self, response):
for url in response.xpath("//a/@href").getall():
# The URL is currently a link to a Google Drive file.
# The other existing href are partials within the website, e.g. "#contrataciones".
if "http" in url:
yield scrapy.Request(url, meta={"file_name": "all.json"})