diff --git a/merino/providers/suggest/adm/backends/remotesettings.py b/merino/providers/suggest/adm/backends/remotesettings.py index 203f5c69..826283da 100644 --- a/merino/providers/suggest/adm/backends/remotesettings.py +++ b/merino/providers/suggest/adm/backends/remotesettings.py @@ -1,6 +1,7 @@ """A thin wrapper around the Remote Settings client.""" import asyncio +import time from asyncio import Task from typing import Any, Literal, cast from urllib.parse import urljoin @@ -14,6 +15,7 @@ from merino.providers.suggest.adm.backends.protocol import SuggestionContent from merino.utils.http_client import create_http_client from merino.utils.icon_processor import IconProcessor +from merino.utils.metrics import get_metrics_client logger = logging.getLogger(__name__) @@ -76,6 +78,7 @@ def __init__( ) self.icon_processor = icon_processor + self.metrics_client = get_metrics_client() async def fetch(self) -> SuggestionContent: """Fetch suggestions, keywords, and icons from Remote Settings. @@ -83,16 +86,22 @@ async def fetch(self) -> SuggestionContent: Raises: RemoteSettingsError: Failed request to Remote Settings. """ + fetch_start_time = time.time() + self.metrics_client.increment("remotesettings.fetch.calls") + suggestions: dict[str, tuple[int, int]] = {} full_keywords: list[str] = [] results: list[dict[str, Any]] = [] icons: dict[str, str] = {} records: list[dict[str, Any]] = await self.get_records() + self.metrics_client.gauge("remotesettings.records.count", value=len(records)) + attachment_host: str = await self.get_attachment_host() rs_suggestions: list[KintoSuggestion] = await self.get_suggestions( attachment_host, records ) + self.metrics_client.gauge("remotesettings.suggestions.count", value=len(rs_suggestions)) fkw_index = 0 @@ -112,6 +121,9 @@ async def fetch(self) -> SuggestionContent: results.append(suggestion.model_dump(exclude={"keywords", "full_keywords"})) icon_record = self.filter_records(record_type="icon", records=records) + self.metrics_client.gauge("remotesettings.icons.count", value=len(icon_record)) + + icon_processing_start = time.time() icon_data = [] tasks = [] @@ -139,6 +151,16 @@ async def fetch(self) -> SuggestionContent: logger.error(f"Error processing icon {id}: {e}") icons[id] = original_url + # Track total batch processing time for icons + icon_processing_duration = (time.time() - icon_processing_start) * 1000 + self.metrics_client.timing( + "remotesettings.icon_batch_processing.time", value=icon_processing_duration + ) + + # Track total fetch time + total_duration = (time.time() - fetch_start_time) * 1000 + self.metrics_client.timing("remotesettings.fetch.total_time", value=total_duration) + return SuggestionContent( suggestions=suggestions, full_keywords=full_keywords, diff --git a/merino/utils/icon_processor.py b/merino/utils/icon_processor.py index 80f022d1..c50fa00c 100644 --- a/merino/utils/icon_processor.py +++ b/merino/utils/icon_processor.py @@ -2,12 +2,14 @@ import hashlib import logging +import time from typing import Dict, Optional from merino.configs import settings from merino.utils.gcs.gcs_uploader import GcsUploader from merino.utils.gcs.models import Image from merino.utils.http_client import create_http_client +from merino.utils.metrics import get_metrics_client logger = logging.getLogger(__name__) @@ -25,8 +27,17 @@ def __init__(self, gcs_project: str, gcs_bucket: str, cdn_hostname: str) -> None # Content hash cache: {content_hash: gcs_url} self.content_hash_cache = {} + # Metrics + self.metrics_client = get_metrics_client() + self.request_count = 0 + self.processed_count = 0 + async def process_icon_url(self, url: str) -> str: """Process an external icon URL and return a GCS-hosted URL.""" + start_time = time.time() + self.request_count += 1 + self.metrics_client.increment("icon_processor.requests") + # Skip URLs that are already from our CDN cdn_hostname = self.uploader.cdn_hostname if cdn_hostname and url.startswith(f"https://{cdn_hostname}"): @@ -34,14 +45,20 @@ async def process_icon_url(self, url: str) -> str: try: # Download favicon + favicon_start = time.time() favicon_image = await self._download_favicon(url) + favicon_duration = (time.time() - favicon_start) * 1000 + self.metrics_client.timing("icon_processor.download_time", value=favicon_duration) + if not favicon_image: logger.info(f"Failed to download favicon from {url}") + self.metrics_client.increment("icon_processor.download_failures") return url # Check if the image is valid if not self._is_valid_image(favicon_image): logger.info(f"Invalid image from {url}") + self.metrics_client.increment("icon_processor.invalid_images") return url # Generate content hash @@ -49,22 +66,37 @@ async def process_icon_url(self, url: str) -> str: # Check content hash cache - this avoids re-uploading identical content if content_hash in self.content_hash_cache: + self.metrics_client.increment("icon_processor.cache_hits") return self.content_hash_cache[content_hash] + self.metrics_client.increment("icon_processor.cache_misses") + # Generate destination path based on content hash destination = self._get_destination_path(favicon_image, content_hash) # GcsUploader already checks if the file exists before uploading + upload_start = time.time() gcs_url = self.uploader.upload_image(favicon_image, destination, forced_upload=False) + upload_duration = (time.time() - upload_start) * 1000 + self.metrics_client.timing("icon_processor.upload_time", value=upload_duration) # Cache the result self.content_hash_cache[content_hash] = gcs_url + # Track successful processing + self.processed_count += 1 + self.metrics_client.increment("icon_processor.processed") + return gcs_url except Exception as e: logger.warning(f"Error processing icon {url}: {e}") + self.metrics_client.increment("icon_processor.errors") return url + finally: + # Record total processing time + total_duration = (time.time() - start_time) * 1000 + self.metrics_client.timing("icon_processor.processing_time", value=total_duration) async def _download_favicon(self, url: str) -> Optional[Image]: """Download the favicon from the given URL.