Source code for kingfisher_scrapy.spiders.canada_montreal

import scrapy

from kingfisher_scrapy.base_spiders import IndexSpider
from kingfisher_scrapy.util import browser_user_agent


[docs]class CanadaMontreal(IndexSpider): """ Domain Ville de Montréal (City of Montreal) API documentation http://donnees.ville.montreal.qc.ca/dataset/contrats-et-subventions-api """ name = 'canada_montreal' # Publisher uses Cloudflare (CF-Cache-Status and CF-RAY response headers). # Cloudflare responds with HTTP 520 if request headers use default user agent. user_agent = browser_user_agent # BaseSpider ocds_version = '1.0' # SimpleSpider data_type = 'release_package' # IndexSpider result_count_pointer = '/meta/count' limit = 10000 # > 10000 causes "Too many records requested. Set parameter LIMIT lower" def start_requests(self): url = f'https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json?limit={self.limit}' yield scrapy.Request(url, meta={'file_name': 'offset-0.json'}, callback=self.parse_list)