Source code for kingfisher_scrapy.extensions.sentry_logging
import sentry_sdk
from scrapy.exceptions import (
CannotResolveHostError,
DownloadConnectionRefusedError,
DownloadFailedError,
DownloadTimeoutError,
NotConfigured,
)
# This subclass of ConnectError isn't wrapped by Scrapy's wrap_twisted_exceptions().
# https://docs.twisted.org/en/stable/api/twisted.internet.error.html
from twisted.internet.error import TCPTimedOutError
IGNORE_MESSAGES = {
# BaseSpider.log_error_from_response
"status=%d message=%r request=%s file_name=%s",
# RetryDataErrorMiddleware.process_spider_exception
"Gave up retrying %(request)s (failed %(failures)d times): %(exception)s",
# scrapy.downloadermiddlewares.retry.get_retry_request
"Gave up retrying %(request)s (failed %(retry_times)d times): %(reason)s",
}
[docs]
def before_send(event, hint):
"""Filter out ERROR-level log messages about TCP, DNS and HTTP errors."""
if "log_record" not in hint:
return event
# https://docs.python.org/3/library/logging.html#logrecord-attributes
log_record = hint["log_record"]
# Allow CRITICAL messages.
if log_record.levelname != "ERROR":
return event
if log_record.msg in IGNORE_MESSAGES or (
# scrapy.logformatter.DOWNLOADERRORMSG_SHORT
log_record.msg == "Error downloading %(request)s"
and log_record.exc_info
and issubclass(
# https://docs.python.org/3/library/sys.html#sys.exc_info
log_record.exc_info[0],
(
CannotResolveHostError,
DownloadConnectionRefusedError,
DownloadFailedError,
DownloadTimeoutError,
TCPTimedOutError,
),
)
):
return None
return event
# https://stackoverflow.com/questions/25262765/handle-all-exception-in-scrapy-with-sentry
[docs]
class SentryLogging:
"""
Sends exceptions and log records to Sentry. Log records with a level of ``ERROR`` or higher are captured as events.
.. seealso::
`Sentry documentation <https://docs.sentry.io/platforms/python/logging/>`__
"""
def __init__(self, sentry_dsn):
sentry_sdk.init(sentry_dsn, before_send=before_send)
[docs]
@classmethod
def from_crawler(cls, crawler):
sentry_dsn = crawler.settings["SENTRY_DSN"]
if not sentry_dsn:
raise NotConfigured("SENTRY_DSN is not set.")
return cls(sentry_dsn)