Source code for kingfisher_scrapy.base_spiders.links_spider

from jsonpointer import resolve_pointer

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.exceptions import MissingNextLinkError
from kingfisher_scrapy.util import handle_http_error


[docs] class LinksSpider(SimpleSpider): """ This class makes it easy to collect data from an API that implements the `pagination <https://github.com/open-contracting-extensions/ocds_pagination_extension>`__ pattern: #. Inherit from ``LinksSpider`` #. Set a ``data_type`` class attribute to the data type of the API responses #. Set a ``formatter`` class attribute to set the file name like in :meth:`~kingfisher_scrapy.base_spiders.BaseSpider.build_request` #. Write a ``start_requests()`` method to request the first page of API results #. Optionally, set a ``next_pointer`` class attribute to the JSON Pointer for the next link (default "/links/next") If the API returns the number of total pages or results in the response, consider using ``IndexSpider`` instead. .. code-block:: python import scrapy from kingfisher_scrapy.base_spiders import LinksSpider class MySpider(LinksSpider): name = 'my_spider' # SimpleSpider data_type = 'release_package' # LinksSpider formatter = staticmethod(parameters('page')) def start_requests(self): yield scrapy.Request('https://example.com/api/packages.json', meta={'file_name': 'page-1.json'}) """ next_pointer = '/links/next'
[docs] @handle_http_error def parse(self, response): yield from super().parse(response) yield self.next_link(response)