Source code for kingfisher_scrapy.spiders.united_kingdom_fts

from kingfisher_scrapy.base_spiders import LinksSpider, PeriodicSpider
from kingfisher_scrapy.util import parameters


[docs] class UnitedKingdomFTS(LinksSpider, PeriodicSpider): """ Domain Find a Tender Service (FTS) Spider arguments from_date Download only data from this time onward (YYYY-MM-DDThh:mm:ss format). Defaults to '2021-01-01T00:00:00'. until_date Download only data until this time (YYYY-MM-DDThh:mm:ss format). Defaults to now. API documentation https://www.find-tender.service.gov.uk/apidocumentation/1.0/GET-ocdsReleasePackages """ name = "united_kingdom_fts" custom_settings = { # The API (using Amazon ELB) eventually responds with HTTP 429 "12 request limit in 2 minute exceeded". "CONCURRENT_REQUESTS": 1, "DOWNLOAD_DELAY": 10, } # BaseSpider date_format = "datetime" default_from_date = "2021-01-01T00:00:00" # https://www.find-tender.service.gov.uk/apidocumentation/1.0/GET-ocdsReleasePackages retry_http_codes = [503] # SimpleSpider data_type = "release_package" # LinksSpider formatter = staticmethod(parameters("updatedTo")) next_link_formatter = staticmethod(parameters("cursor")) # PeriodicSpider pattern = ( "https://www.find-tender.service.gov.uk/api/1.0/ocdsReleasePackages?updatedFrom=" "{0:%Y-%m-%dT%H:%M:%SZ}&updatedTo={1:%Y-%m-%dT%H:%M:%SZ}" ) # The endpoint doesn't return all available releases with a longer `step` value. step = 0.25 # LinksSpider def parse(self, response): # TODO(james): Temporary fix. Remove this method once the issue is closed in Kingfisher Process. # https://github.com/open-contracting/kingfisher-process/issues/323 yield from super().parse( response.replace(body=response.body.replace(b"1e9999", b"9999999").replace(b" 000,", b" 0,")) )