Source code for kingfisher_scrapy.spiders.united_kingdom_fts
import datetime
import scrapy
from kingfisher_scrapy.base_spiders import LinksSpider
from kingfisher_scrapy.util import handle_http_error, parameters
[docs]
class UnitedKingdomFTS(LinksSpider):
"""
Domain
Find a Tender Service (FTS)
Spider arguments
from_date
Download only data from this time onward (YYYY-MM-DDThh:mm:ss format).
If ``until_date`` is provided, defaults to '2021-01-01T00:00:00'.
until_date
Download only data until this time (YYYY-MM-DDThh:mm:ss format).
If ``from_date`` is provided, defaults to now.
API documentation
https://www.find-tender.service.gov.uk/apidocumentation/1.0/GET-ocdsReleasePackages
"""
name = 'united_kingdom_fts'
# BaseSpider
date_format = 'datetime'
default_from_date = '2021-01-01T00:00:00'
# SimpleSpider
data_type = 'release_package'
# LinksSpider
formatter = staticmethod(parameters('cursor'))
def start_requests(self):
url = 'https://www.find-tender.service.gov.uk/api/1.0/ocdsReleasePackages'
if self.from_date and self.until_date:
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}?updatedFrom={from_date}&updatedTo={until_date}'
else:
until_date = datetime.datetime.now(tz=datetime.timezone.utc).strftime(self.date_format)
yield scrapy.Request(url, meta={'file_name': f'{until_date}.json'}, # reverse chronological order
headers={'Accept': 'application/json'})
@handle_http_error
def parse(self, response):
# TODO: Temporary fix for https://github.com/open-contracting/kingfisher-process/issues/323.
# Remove this method once the issue is closed in Kingfisher Process.
response = response.replace(body=response.body.replace(b'1e9999', b'9999999'))
yield from super().parse(response)