Source code for kingfisher_scrapy.spiders.australia_new_south_wales
from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.util import handle_http_error, parameters
[docs]
class AustraliaNewSouthWales(SimpleSpider):
"""
Domain
New South Wales (NSW)
Spider arguments
from_date
Download only data from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2003-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
API documentation
https://github.com/NSW-eTendering/NSW-eTendering-API/wiki
"""
name = 'australia_new_south_wales'
# BaseSpider
date_format = 'date'
default_from_date = '2003-01-01'
# SimpleSpider
data_type = 'release_package'
# Local
url_prefix = 'https://www.tenders.nsw.gov.au/?event=public.api.'
format_string = f'{url_prefix}{{release_type}}.search&ResultsPerPage=1000'
def start_requests(self):
if self.from_date and self.until_date:
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
self.format_string += f'&publishedFrom={from_date}&publishedTo={until_date}'
for release_type in ('planning', 'tender', 'contract'):
yield self.build_request(
self.format_string.format(release_type=release_type),
formatter=parameters('event'),
meta={'release_type': release_type},
callback=self.parse_list
)
@handle_http_error
def parse_list(self, response):
data = response.json()
release_type = response.request.meta['release_type']
if data['releases'] and 'links' in data and isinstance(data['links'], dict) and 'next' in data['links']:
yield self.build_request(
data['links']['next'],
formatter=parameters('event', 'startRow'),
meta={'release_type': release_type},
callback=self.parse_list
)
for release in data['releases']:
if release_type == 'planning':
uuid = release['tender']['plannedProcurementUUID']
yield self.build_request(
f'{self.url_prefix}planning.view&PlannedProcurementUUID={uuid}',
formatter=parameters('event', 'PlannedProcurementUUID')
)
elif release_type == 'tender':
uuid = release['tender']['RFTUUID']
yield self.build_request(
f'{self.url_prefix}tender.view&RFTUUID={uuid}',
formatter=parameters('event', 'RFTUUID')
)
elif release_type == 'contract':
for award in release['awards']:
uuid = award['CNUUID']
yield self.build_request(
f'{self.url_prefix}contract.view&CNUUID={uuid}',
formatter=parameters('event', 'CNUUID')
)