From b421b58dad349dd324607935e5e78cffaf02760a Mon Sep 17 00:00:00 2001 From: sarayourfriend Date: Wed, 14 Aug 2024 17:28:55 +0100 Subject: [PATCH] Do not logger.error timeout errors in dead link checks (#4757) * Do not logger.error timeout errors in dead link checks * Fix comment Co-authored-by: Madison Swain-Bowden --------- Co-authored-by: Madison Swain-Bowden --- api/api/utils/check_dead_links/__init__.py | 11 +++++- api/test/unit/utils/test_check_dead_links.py | 39 +++++++++++++++----- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/api/api/utils/check_dead_links/__init__.py b/api/api/utils/check_dead_links/__init__.py index f54486f991..9b8d56dd34 100644 --- a/api/api/utils/check_dead_links/__init__.py +++ b/api/api/utils/check_dead_links/__init__.py @@ -71,7 +71,16 @@ async def _head( ) status = response.status except (aiohttp.ClientError, asyncio.TimeoutError) as exception: - logger.error("dead_link_validation_error", exc_info=True, e=exception) + if not isinstance(exception, asyncio.TimeoutError): + # only log non-timeout exceptions. Timeouts are more or less expected + # and we have means of visibility into them (e.g., querying for timings + # that exceed the timeout); they do _not_ need to be in the error log or go to Sentry + # The timeout exception class aiohttp uses is a subclass of `ClientError` and `asyncio.TimeoutError`, + # so we have to explicitly check that the error is _not_ an asyncio timeout error, not that it + # _is_ an aiohttp error. When it is a timeout error, it will be both `asyncio.TimeoutError` and + # `aiohttp.ClientError` because it's a subclass of both + logger.error("dead_link_validation_error", exc_info=True, exc=exception) + status = _ERROR_STATUS return url, status diff --git a/api/test/unit/utils/test_check_dead_links.py b/api/test/unit/utils/test_check_dead_links.py index 344bb94e9b..d89b6e7e61 100644 --- a/api/test/unit/utils/test_check_dead_links.py +++ b/api/test/unit/utils/test_check_dead_links.py @@ -2,9 +2,9 @@ from collections.abc import Callable from typing import Any +import aiohttp import pook import pytest -from aiohttp.client import ClientSession from elasticsearch_dsl.response import Hit from structlog.testing import capture_logs @@ -44,12 +44,6 @@ def test_sends_user_agent(): def test_handles_timeout(monkeypatch): - """ - Test that case where timeout occurs. - - Note: This test takes just over 3 seconds to run as it simulates network delay of - 3 seconds. - """ query_hash = "test_handles_timeout" results = _make_hits(1) start_slice = 0 @@ -57,14 +51,41 @@ def test_handles_timeout(monkeypatch): async def raise_timeout_error(*args, **kwargs): raise asyncio.TimeoutError() - monkeypatch.setattr(ClientSession, "_request", raise_timeout_error) - check_dead_links(query_hash, start_slice, results) + monkeypatch.setattr(aiohttp.ClientSession, "_request", raise_timeout_error) + with capture_logs() as logs: + check_dead_links(query_hash, start_slice, results) # `check_dead_links` directly modifies the results list # if the results are timing out then they're considered dead and discarded # so should not appear in the final list of results. assert len(results) == 0 + # A timeout should not get logged as an error + log_event = next((log for log in logs if log["log_level"] == "error"), None) + assert log_event is None + + +def test_handles_error(monkeypatch): + query_hash = "test_handles_timeout" + results = _make_hits(1) + start_slice = 0 + + async def raise_nontimeout_error(*args, **kwargs): + raise aiohttp.InvalidURL("https://invalid-url") + + monkeypatch.setattr(aiohttp.ClientSession, "_request", raise_nontimeout_error) + with capture_logs() as logs: + check_dead_links(query_hash, start_slice, results) + + # `check_dead_links` directly modifies the results list + # if the results are erroring out then they're considered dead and discarded + # so should not appear in the final list of results. + assert len(results) == 0 + + # A non-timeout exception should get logged as an error + log_event = next((log for log in logs if log["log_level"] == "error"), None) + assert log_event is not None + @pook.on @pytest.mark.parametrize("provider", ("thingiverse", "flickr"))