Source code for kingfisher_scrapy.spiders.australia_new_south_wales

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.util import handle_http_error, parameters


[docs]class AustraliaNewSouthWales(SimpleSpider): """ Domain New South Wales (NSW) API documentation https://github.com/NSW-eTendering/NSW-eTendering-API/blob/master/README.md """ name = 'australia_new_south_wales' # SimpleSpider data_type = 'release_package' # Local url_prefix = 'https://www.tenders.nsw.gov.au/?event=public.api.' def start_requests(self): for release_type in ('planning', 'tender', 'contract'): yield self.build_request( f'{self.url_prefix}{release_type}.search&ResultsPerPage=1000', formatter=parameters('event'), meta={'release_type': release_type}, callback=self.parse_list ) @handle_http_error def parse_list(self, response): data = response.json() release_type = response.request.meta['release_type'] if data['releases'] and 'links' in data and isinstance(data['links'], dict) and 'next' in data['links']: yield self.build_request( data['links']['next'], formatter=parameters('event', 'startRow'), meta={'release_type': release_type}, callback=self.parse_list ) for release in data['releases']: if release_type == 'planning': uuid = release['tender']['plannedProcurementUUID'] yield self.build_request( f'{self.url_prefix}planning.view&PlannedProcurementUUID={uuid}', formatter=parameters('event', 'PlannedProcurementUUID') ) elif release_type == 'tender': uuid = release['tender']['RFTUUID'] yield self.build_request( f'{self.url_prefix}tender.view&RFTUUID={uuid}', formatter=parameters('event', 'RFTUUID') ) elif release_type == 'contract': for award in release['awards']: uuid = award['CNUUID'] yield self.build_request( f'{self.url_prefix}contract.view&CNUUID={uuid}', formatter=parameters('event', 'CNUUID') )