Skip to content

Commit

Permalink
Add request retries for 5XX errors on Science Museum & Brooklyn Museum (
Browse files Browse the repository at this point in the history
  • Loading branch information
AetherUnbound authored Aug 15, 2024
1 parent 05f8c55 commit 083c07f
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 0 deletions.
13 changes: 13 additions & 0 deletions catalog/dags/providers/provider_api_scripts/brooklyn_museum.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging

import backoff
import lxml.html as html
from airflow.models import Variable
from requests import HTTPError

from common.licenses import LicenseInfo, get_license_info
from common.loader import provider_details as prov
Expand All @@ -21,6 +23,17 @@ def __init__(self, *args, **kwargs):
self.api_key = Variable.get("API_KEY_BROOKLYN_MUSEUM")
self.headers = {"api_key": self.api_key}

# Brooklyn Museum's API tends to be flaky, so we add a backoff on every request
# for 5XX error codes.
# See: https://github.com/WordPress/openverse/issues/4712
get_response_json = backoff.on_exception(
backoff.expo,
HTTPError,
max_time=60 * 2,
# Only retry on 5XX errors
giveup=lambda e: e.response.status_code not in {502, 503, 504},
)(ProviderDataIngester.get_response_json)

def get_next_query_params(self, prev_query_params: dict | None) -> dict:
if not prev_query_params:
return {
Expand Down
14 changes: 14 additions & 0 deletions catalog/dags/providers/provider_api_scripts/science_museum.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
import re
from datetime import date

import backoff
from requests import HTTPError

from common import slack
from common.licenses import LicenseInfo, get_license_info
from common.loader import provider_details as prov
Expand Down Expand Up @@ -44,6 +47,17 @@ def __init__(self, *args, **kwargs):
# easily detect when the page limit is reached
self.page_number = 0

# Science Museum's API tends to be flaky, so we add a backoff on every request
# for 5XX error codes.
# See: https://github.com/WordPress/openverse/issues/4710
get_response_json = backoff.on_exception(
backoff.expo,
HTTPError,
max_time=60 * 2,
# Only retry on 5XX errors
giveup=lambda e: e.response.status_code not in {502, 503, 504},
)(ProviderDataIngester.get_response_json)

@staticmethod
def _get_year_ranges(final_year: int) -> list[tuple[int, int]]:
"""
Expand Down

0 comments on commit 083c07f

Please sign in to comment.