Source code for kingfisher_scrapy.spiders.canada_quebec

import datetime

import orjson

from kingfisher_scrapy.base_spiders import CKANSpider, SimpleSpider
from kingfisher_scrapy.exceptions import RetryableError
from kingfisher_scrapy.util import components


[docs] class CanadaQuebec(CKANSpider, SimpleSpider): """ Domain Secrétariat du Conseil du trésor Spider arguments from_date Download only data from this date onward (YYYY-MM-DD format). If ``until_date`` is provided, defaults to '2021-03-01'. until_date Download only data until this date (YYYY-MM-DD format). If ``from_date`` is provided, defaults to today. Bulk download documentation https://www.donneesquebec.ca/recherche/dataset/systeme-electronique-dappel-doffres-seao """ name = "canada_quebec" # BaseSpider default_from_date = "2021-03-01" # SimpleSpider data_type = "release_package" # CKANSpider ckan_api_url = "https://www.donneesquebec.ca" ckan_package_id = "d23b2e02-085d-43e5-9e6e-e1d558ebfdd5" # The same filename can be generated on different dates, but containing different releases, like: # https://www.donneesquebec.ca/recherche/dataset/d23b2e02-085d-43e5-9e6e-e1d558ebfdd5/resource/c6f8d624-b4e7-4a82-bae3-ca78f01bc017/download/hebdo_20251222_20251228.json # https://www.donneesquebec.ca/recherche/dataset/d23b2e02-085d-43e5-9e6e-e1d558ebfdd5/resource/552da290-239f-4512-8f5d-4e0329e5d72d/download/hebdo_20251222_20251228.json formatter = staticmethod(components(-3)) # CKANSpider def get_resource_date(self, resource): # Basename is like "hebdo_20210401_20210411" or "mensuel_20210301_20210331". return datetime.datetime.strptime(resource["url"].split("_")[-2], "%Y%m%d").replace( tzinfo=datetime.timezone.utc ) # SimpleSpider def parse(self, response): # Retry if the response is truncated (invalid JSON). # https://github.com/open-contracting/kingfisher-collect/issues/1250 try: response.json() except orjson.JSONDecodeError as e: raise RetryableError from e yield from super().parse(response)