Skip to content

Commit

Permalink
test: Add CodSpeed to follow lib performance
Browse files Browse the repository at this point in the history
  • Loading branch information
clemlesne committed Feb 9, 2025
1 parent 86ee9dd commit fb4149b
Show file tree
Hide file tree
Showing 17 changed files with 227 additions and 64 deletions.
15 changes: 13 additions & 2 deletions .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ on:
- feat/*
- hotfix/*
- main
# Allows CodSpeed to trigger backtest performance analysis in order to generate initial data
workflow_dispatch:

jobs:
init:
Expand Down Expand Up @@ -52,7 +54,8 @@ jobs:
# Run all test suites
step:
- static
- unit
- unit-simple
- unit-codspeed
# Run on all supported Python versions
python-version:
- "3.11"
Expand Down Expand Up @@ -103,9 +106,17 @@ jobs:
- name: Configure environment variables
run: echo "${{ secrets.DOTENV_UNIT_TESTS }}" > .env

- name: Run tests
- name: Run tests (simple)
if: ${{ !contains(matrix.step, 'codspeed') }}
run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}

- name: Run tests (CodSpeed)
if: ${{ contains(matrix.step, 'codspeed') }}
uses: CodSpeedHQ/[email protected]
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}

- name: Upload artifacts
uses: actions/[email protected]
if: always()
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,6 @@ test-reports/
# Local .env
!.env.example
.env.*

# CodSpeed
.codspeed/
31 changes: 22 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ install-deps:
uv sync --extra dev

@echo "➡️ Installing Playwright dependencies..."
uv run playwright install chrome --with-deps
uv run playwright install chromium --with-deps --no-shell

upgrade:
@echo "➡️ Updating Git submodules..."
Expand All @@ -53,7 +53,7 @@ upgrade:

test:
$(MAKE) test-static
$(MAKE) test-unit
$(MAKE) test-unit-simple

test-static:
@echo "➡️ Test dependencies issues (deptry)..."
Expand All @@ -65,18 +65,31 @@ test-static:
@echo "➡️ Test types (Pyright)..."
uv run pyright

test-unit:
bash cicd/test-unit-ci.sh
test-unit-simple:
bash cicd/test-unit-ci.sh simple

test-unit-codspeed:
bash cicd/test-unit-ci.sh codspeed

test-static-server:
@echo "➡️ Starting local static server..."
python3 -m http.server -d ./tests/websites 8000

test-unit-run:
@echo "➡️ Unit tests (Pytest)..."
uv run pytest \
--junit-xml=test-reports/$(version_full).xml \
--log-file=test-reports/$(version_full).log \
test-unit-simple-run:
@echo "➡️ Unit tests with no extra (Pytest)..."
CI=true uv run pytest \
--junit-xml=test-reports/$(version_full)-simple.xml \
--log-file=test-reports/$(version_full)-simple.log \
--maxprocesses=4 \
-n=logical \
tests/*.py

test-unit-codspeed-run:
@echo "➡️ Unit tests with CodSpeed (Pytest)..."
CI=true uv run pytest \
--codspeed \
--junit-xml=test-reports/$(version_full)-codspeed.xml \
--log-file=test-reports/$(version_full)-codspeed.log \
--maxprocesses=4 \
-n=logical \
tests/*.py
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Web scraper made for AI and simplicity in mind. It runs as a CLI that can be par
[![GitHub project license](https://img.shields.io/github/license/clemlesne/scrape-it-now)](https://github.com/clemlesne/scrape-it-now/blob/main/LICENSE)
[![PyPI package version](https://img.shields.io/pypi/v/scrape-it-now)](https://pypi.org/project/scrape-it-now)
[![PyPI supported Python versions](https://img.shields.io/pypi/pyversions/scrape-it-now)](https://pypi.org/project/scrape-it-now)
[![CodSpeed report](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/clemlesne/scrape-it-now)

## Features

Expand Down
9 changes: 8 additions & 1 deletion cicd/test-unit-ci.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
#!/bin/bash

mode=$1

if [ -z "$mode" ]; then
echo "Error: mode is required." >&2
exit 1
fi

# Start the first command in the background
make test-static-server 1>/dev/null 2>&1 &

# Capture the PID of the background process
UNIT_RUN_PID=$!

# Run the second command
make test-unit-run
make test-unit-${mode}-run
exit_code=$?

# Once the second command exits, kill the first process
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ dev = [
"pyinstaller~=6.11", # Create standalone executable
"pyright~=1.1", # Static type checker
"pytest-asyncio~=0.23", # Pytest plugin for async tests
"pytest-codspeed~=3.2.0", # Pytest plugin for measuring code speed
"pytest-repeat~=0.9", # Pytest plugin for repeating tests
"pytest-xdist[psutil]~=3.6", # Pytest plugin for parallel testing
"pytest~=8.3", # Testing framework
Expand Down
9 changes: 9 additions & 0 deletions src/scrape_it_now/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
from os import environ as env

from dotenv import find_dotenv, load_dotenv

# First, load the environment variables from the .env file
load_dotenv(
find_dotenv(
# Use the current working directory from where the command is run
usecwd=True,
)
)

# Detect if the code is running in a CI environment
# See: https://stackoverflow.com/a/75223617
IS_CI = env.get("CI", "").lower() == "true"
if IS_CI:
print("CI environment detected, be aware configuration may differ") # noqa: T201
2 changes: 2 additions & 0 deletions src/scrape_it_now/helpers/identity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider

from scrape_it_now.helpers import IS_CI
from scrape_it_now.helpers.cache import lru_acache
from scrape_it_now.helpers.http import azure_transport


@lru_acache()
async def credential() -> DefaultAzureCredential:
return DefaultAzureCredential(
process_timeout=120 if IS_CI else 10, # 2 mins in CI, 10 secs in production
# Performance
transport=await azure_transport(),
)
Expand Down
17 changes: 12 additions & 5 deletions src/scrape_it_now/helpers/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@
)
from structlog.stdlib import PositionalArgumentsFormatter

from scrape_it_now.helpers import IS_CI


def enable_debug_logging() -> None:
configure(
wrapper_class=make_filtering_bound_logger(DEBUG),
)


configure_once(
cache_logger_on_first_use=True,
context_class=dict,
Expand All @@ -44,8 +53,6 @@
# Framework does not exactly expose Logger, but that's easier to work with
logger: Logger = structlog_get_logger("scrape-it-now")


def enable_debug_logging() -> None:
configure(
wrapper_class=make_filtering_bound_logger(DEBUG),
)
# Enable debug logging on CI
if IS_CI:
enable_debug_logging()
68 changes: 50 additions & 18 deletions src/scrape_it_now/persistence/azure_blob_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,12 @@ async def delete_container(
await self._client.delete_container()
# Wait for it to be deleted, API is eventually consistent
while True:
try:
properties = await self._client.get_container_properties()
if properties.deleted:
break
with suppress(ResourceNotFoundError):
await self._client.get_container_properties()
await asyncio.sleep(2)
continue
# Deleted
except ResourceNotFoundError:
break
break
logger.info('Deleted Blob Storage "%s"', self._config.name)

async def __aenter__(self) -> "AzureBlobStorage":
Expand All @@ -207,21 +205,55 @@ async def __aenter__(self) -> "AzureBlobStorage":
container=self._config.name,
)

# Create if it does not exist
with suppress(ResourceExistsError):
# Create
await self._client.create_container()
# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
properties = await self._client.get_container_properties()
if not properties.deleted:
break
await asyncio.sleep(2)
logger.debug('Created Blob Storage "%s"', self._config.name)
await self._wait_for_creation()
await self._wait_for_ready()

# Return instance
return self

async def _wait_for_ready(self) -> None:
"""
Wait for the container to be ready.
API is not consistent, so we need to check if the resource is ready to be used.
"""
while True:
# Try using it
try:
# Upload and clean a test blob
await self.upload_blob(
blob="ping",
data=b"ping",
length=4,
overwrite=True,
)
await self._client.delete_blob("ping")
# If no exception, the container is ready
logger.debug('Blob Storage "%s" is ready', self._config.name)
break
# If exception, the container is not ready yet
except Exception:
logger.debug("Blob Storage not ready yet, retrying", exc_info=True)
await asyncio.sleep(2)

async def _wait_for_creation(self) -> None:
"""
Wait for the container to be created.
Loop indefinitely until the the container respond to upload/download operations. Loop indefinitely until the container is created. API is not consistent, so we need to check if the resource is created.
"""
# Start creation
with suppress(ResourceExistsError):
await self._client.create_container()

# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_container_properties()
logger.debug('Created Blob Storage "%s"', self._config.name)
# Created
break
await asyncio.sleep(2)

async def __aexit__(self, *exc: Any) -> None:
await self._service.close()
49 changes: 30 additions & 19 deletions src/scrape_it_now/persistence/azure_queue_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,35 +120,46 @@ async def create_queue(
await self._wait_for_ready()

async def _wait_for_ready(self) -> None:
"""
Wait for the queue to be ready.
Loop indefinitely until the queue is respond to send/pull operations. API is not consistent, so we need to check if the resource is ready to be used.
"""
while True:
# Try using it
try:
# Send a test message
# Send and clean a test message
await self.send_message("ping")
# Try to consume the message(s)
async for message in self.receive_messages(
max_messages=1, visibility_timeout=1
):
await self.delete_message(message)
# If no exception, the queue is created
# If no exception, the queue is ready
logger.debug('Queue Storage "%s" is ready', self._config.name)
return
except Exception: # If exception, the queue is not created yet
logger.debug("Queue not created yet, retrying")
break
# If exception, the queue is not ready yet
except Exception:
logger.debug("Queue not ready yet, retrying", exc_info=True)
await asyncio.sleep(2)

async def _wait_for_creation(self) -> None:
# Create if it does not exist
"""
Wait for the queue to be created.
Loop indefinitely until the queue is created. API is not consistent, so we need to check if the resource is created.
"""
# Start creation
with suppress(ResourceExistsError):
# Create
await self._client.create_queue()
# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
logger.debug('Created Queue Storage "%s"', self._config.name)
# Created
return
await asyncio.sleep(2)

# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
logger.debug('Created Queue Storage "%s"', self._config.name)
# Created
break
await asyncio.sleep(2)

@retry(
reraise=True,
Expand All @@ -166,12 +177,12 @@ async def delete_queue(
await self._client.delete_queue()
# Wait for it to be deleted, API is eventually consistent
while True:
try:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
await asyncio.sleep(2)
continue
# Deleted
except ResourceNotFoundError:
break
break
logger.info('Deleted Queue Storage "%s"', self._config.name)

def _escape(self, value: str) -> str:
Expand Down
Loading

0 comments on commit fb4149b

Please sign in to comment.