Skip to content

Commit

Permalink
DISCO-3308 Adding metrics to icon processing
Browse files Browse the repository at this point in the history
  • Loading branch information
gruberb committed Mar 10, 2025
1 parent d5df50d commit 1519e47
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
22 changes: 22 additions & 0 deletions merino/providers/suggest/adm/backends/remotesettings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""A thin wrapper around the Remote Settings client."""

import asyncio
import time
from asyncio import Task
from typing import Any, Literal, cast
from urllib.parse import urljoin
Expand All @@ -14,6 +15,7 @@
from merino.providers.suggest.adm.backends.protocol import SuggestionContent
from merino.utils.http_client import create_http_client
from merino.utils.icon_processor import IconProcessor
from merino.utils.metrics import get_metrics_client

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -76,23 +78,30 @@ def __init__(
)

self.icon_processor = icon_processor
self.metrics_client = get_metrics_client()

async def fetch(self) -> SuggestionContent:
"""Fetch suggestions, keywords, and icons from Remote Settings.
Raises:
RemoteSettingsError: Failed request to Remote Settings.
"""
fetch_start_time = time.time()
self.metrics_client.increment("remotesettings.fetch.calls")

suggestions: dict[str, tuple[int, int]] = {}
full_keywords: list[str] = []
results: list[dict[str, Any]] = []
icons: dict[str, str] = {}

records: list[dict[str, Any]] = await self.get_records()
self.metrics_client.gauge("remotesettings.records.count", value=len(records))

attachment_host: str = await self.get_attachment_host()
rs_suggestions: list[KintoSuggestion] = await self.get_suggestions(
attachment_host, records
)
self.metrics_client.gauge("remotesettings.suggestions.count", value=len(rs_suggestions))

fkw_index = 0

Expand All @@ -112,6 +121,9 @@ async def fetch(self) -> SuggestionContent:
results.append(suggestion.model_dump(exclude={"keywords", "full_keywords"}))

icon_record = self.filter_records(record_type="icon", records=records)
self.metrics_client.gauge("remotesettings.icons.count", value=len(icon_record))

icon_processing_start = time.time()

icon_data = []
tasks = []
Expand Down Expand Up @@ -139,6 +151,16 @@ async def fetch(self) -> SuggestionContent:
logger.error(f"Error processing icon {id}: {e}")
icons[id] = original_url

# Track total batch processing time for icons
icon_processing_duration = (time.time() - icon_processing_start) * 1000
self.metrics_client.timing(
"remotesettings.icon_batch_processing.time", value=icon_processing_duration
)

# Track total fetch time
total_duration = (time.time() - fetch_start_time) * 1000
self.metrics_client.timing("remotesettings.fetch.total_time", value=total_duration)

return SuggestionContent(
suggestions=suggestions,
full_keywords=full_keywords,
Expand Down
32 changes: 32 additions & 0 deletions merino/utils/icon_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

import hashlib
import logging
import time
from typing import Dict, Optional

from merino.configs import settings
from merino.utils.gcs.gcs_uploader import GcsUploader
from merino.utils.gcs.models import Image
from merino.utils.http_client import create_http_client
from merino.utils.metrics import get_metrics_client

logger = logging.getLogger(__name__)

Expand All @@ -25,46 +27,76 @@ def __init__(self, gcs_project: str, gcs_bucket: str, cdn_hostname: str) -> None
# Content hash cache: {content_hash: gcs_url}
self.content_hash_cache = {}

# Metrics
self.metrics_client = get_metrics_client()
self.request_count = 0
self.processed_count = 0

async def process_icon_url(self, url: str) -> str:
"""Process an external icon URL and return a GCS-hosted URL."""
start_time = time.time()
self.request_count += 1
self.metrics_client.increment("icon_processor.requests")

# Skip URLs that are already from our CDN
cdn_hostname = self.uploader.cdn_hostname
if cdn_hostname and url.startswith(f"https://{cdn_hostname}"):
return url

try:
# Download favicon
favicon_start = time.time()
favicon_image = await self._download_favicon(url)
favicon_duration = (time.time() - favicon_start) * 1000
self.metrics_client.timing("icon_processor.download_time", value=favicon_duration)

if not favicon_image:
logger.info(f"Failed to download favicon from {url}")
self.metrics_client.increment("icon_processor.download_failures")
return url

# Check if the image is valid
if not self._is_valid_image(favicon_image):
logger.info(f"Invalid image from {url}")
self.metrics_client.increment("icon_processor.invalid_images")
return url

# Generate content hash
content_hash = hashlib.sha256(favicon_image.content).hexdigest()

# Check content hash cache - this avoids re-uploading identical content
if content_hash in self.content_hash_cache:
self.metrics_client.increment("icon_processor.cache_hits")
return self.content_hash_cache[content_hash]

self.metrics_client.increment("icon_processor.cache_misses")

# Generate destination path based on content hash
destination = self._get_destination_path(favicon_image, content_hash)

# GcsUploader already checks if the file exists before uploading
upload_start = time.time()
gcs_url = self.uploader.upload_image(favicon_image, destination, forced_upload=False)
upload_duration = (time.time() - upload_start) * 1000
self.metrics_client.timing("icon_processor.upload_time", value=upload_duration)

# Cache the result
self.content_hash_cache[content_hash] = gcs_url

# Track successful processing
self.processed_count += 1
self.metrics_client.increment("icon_processor.processed")

return gcs_url

except Exception as e:
logger.warning(f"Error processing icon {url}: {e}")
self.metrics_client.increment("icon_processor.errors")
return url
finally:
# Record total processing time
total_duration = (time.time() - start_time) * 1000
self.metrics_client.timing("icon_processor.processing_time", value=total_duration)

async def _download_favicon(self, url: str) -> Optional[Image]:
"""Download the favicon from the given URL.
Expand Down

0 comments on commit 1519e47

Please sign in to comment.