Source code for kingfisher_scrapy.spiders.taiwan_aac

import scrapy

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.util import get_parameter_value, parameters, replace_parameters


[docs] class TaiwanAAC(SimpleSpider): """ Domain Agency Against Corruption, Ministry of Justice, ROC (Taiwan) Bulk download documentation https://gpip.aac.moj.gov.tw/cht/index.php?code=list&ids=6&page=1 """ name = "taiwan_aac" # SimpleSpider data_type = "release_package" async def start(self): yield scrapy.Request( "https://gpip.aac.moj.gov.tw/cht/index.php?code=list&ids=6&page=1", callback=self.parse_list ) def parse_list(self, response): for href in response.xpath('//a[contains(@title, "OCDS")]/@href').getall(): yield self.build_request(response.urljoin(href), formatter=parameters("ids")) # If the page is out of range, there are no list items. if response.xpath('//div[@class="main"]//li'): page = int(get_parameter_value(response.request.url, "page")) yield scrapy.Request(replace_parameters(response.request.url, page=page + 1), callback=self.parse_list)