Source code for kingfisher_scrapy.spiders.rwanda_bulk
import datetime
import scrapy
from kingfisher_scrapy.base_spiders import CompressedFileSpider
from kingfisher_scrapy.util import MAX_DOWNLOAD_TIMEOUT, parameters
[docs]
class RwandaBulk(CompressedFileSpider):
"""
Domain
Rwanda Public Procurement Authority (RPPA)
Spider arguments
from_date
Download only data from this month onward (YYYY-MM format).
If ``until_date`` is provided, defaults to '2013-12'.
until_date
Download only data until this month (YYYY-MM format).
If ``from_date`` is provided, defaults to the current month.
Bulk download documentation
https://ocds.umucyo.gov.rw/OpenData
"""
name = "rwanda_bulk"
custom_settings = {
"DOWNLOAD_TIMEOUT": MAX_DOWNLOAD_TIMEOUT,
}
# BaseSpider
date_format = "year-month"
default_from_date = "2013-12"
skip_pluck = "Already covered (see code for details)" # rwanda_api
# SimpleSpider
data_type = "release_package"
async def start(self):
yield scrapy.Request(
"https://ocds.umucyo.gov.rw/opendata/api/v1/ui/data_set/available_datasets",
callback=self.parse_list,
)
def parse_list(self, response):
"""
The response looks like:
{
"status": 200,
"returnCode": 7,
"message": "Available datasets successfully retrieved",
"datasets": {
"2025": [
"01-January-csv.zip",
"01-January-json.zip",
"01-January-xlsx.zip",
"02-February-csv.zip",
"02-February-json.zip",
"02-February-xlsx.zip",
...,
"flattened",
"json"
],
...
}
}
"""
for year, datasets in response.json()["datasets"].items():
for item in datasets:
if item.endswith("-json.zip"):
if self.from_date and self.until_date:
date = datetime.datetime(int(year), int(item[:2]), 1, tzinfo=datetime.timezone.utc)
if not (self.from_date <= date <= self.until_date):
continue
yield self.build_request(
f"https://ocds.umucyo.gov.rw/opendata/api/v1/ui/data_set/download?year={year}&month_file={item}",
formatter=parameters("year", "month_file"),
)