Source code for kingfisher_scrapy.base_spiders.links_spider

import orjson
from jsonpointer import resolve_pointer

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.exceptions import MissingNextLinkError


[docs] class LinksSpider(SimpleSpider): """ Collect data from an API that implements the `pagination <https://github.com/open-contracting-extensions/ocds_pagination_extension>`__ pattern. #. Inherit from ``LinksSpider`` #. Set a ``data_type`` class attribute to the data type of the API responses #. Set a ``formatter`` class attribute to set the file name like in :meth:`~kingfisher_scrapy.base_spiders.BaseSpider.build_request` #. Set a ``next_link_formatter`` class attribute if pagination URLs differ from start URLs #. Write a ``start()`` method to request the first page of API results #. Optionally, set a ``next_pointer`` class attribute to the JSON Pointer for the next link (default "/links/next") If the API returns the number of total pages or results in the response, consider using ``IndexSpider`` instead. .. code-block:: python import scrapy from kingfisher_scrapy.base_spiders import LinksSpider class MySpider(LinksSpider): name = 'my_spider' # SimpleSpider data_type = 'release_package' # LinksSpider formatter = staticmethod(parameters('page')) async def start(self): yield scrapy.Request('https://example.com/api/packages.json', meta={'file_name': 'page-1.json'}) """ next_pointer = "/links/next"
[docs] def parse(self, response): yield from super().parse(response) yield self.next_link(response)