Skip to content

Commit

Permalink
test: Add CodSpeed to follow lib performance
Browse files Browse the repository at this point in the history
  • Loading branch information
clemlesne committed Feb 9, 2025
1 parent 4733803 commit e05299e
Show file tree
Hide file tree
Showing 17 changed files with 290 additions and 88 deletions.
23 changes: 21 additions & 2 deletions .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ on:
- feat/*
- hotfix/*
- main
# Allows CodSpeed to trigger backtest performance analysis in order to generate initial data
workflow_dispatch:

jobs:
init:
Expand Down Expand Up @@ -52,7 +54,8 @@ jobs:
# Run all test suites
step:
- static
- unit
- unit-simple
- unit-codspeed
# Run on all supported Python versions
python-version:
- "3.11"
Expand Down Expand Up @@ -103,9 +106,25 @@ jobs:
- name: Configure environment variables
run: echo "${{ secrets.DOTENV_UNIT_TESTS }}" > .env

- name: Run tests
- name: Run test servers (unit)
if: ${{ contains(matrix.step, 'unit') }}
run: make run-test-servers

- name: Run tests (simple)
if: ${{ !contains(matrix.step, 'codspeed') }}
run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}

- name: Run tests (CodSpeed)
if: ${{ contains(matrix.step, 'codspeed') }}
uses: CodSpeedHQ/[email protected]
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}

- name: Kill test servers (unit)
if: ${{ contains(matrix.step, 'unit') }}
run: make kill-test-servers

- name: Upload artifacts
uses: actions/[email protected]
if: always()
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,9 @@ test-reports/
# Local .env
!.env.example
.env.*

# CodSpeed
.codspeed/

# Test servers PIDs
*.pid
39 changes: 29 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ upgrade:

test:
$(MAKE) test-static
$(MAKE) test-unit
$(MAKE) run-test-servers
$(MAKE) test-unit-simple
$(MAKE) kill-test-servers

test-static:
@echo "➡️ Test dependencies issues (deptry)..."
Expand All @@ -65,18 +67,35 @@ test-static:
@echo "➡️ Test types (Pyright)..."
uv run pyright

test-unit:
bash cicd/test-unit-ci.sh
run-test-servers:
@echo "➡️ Starting Chromium server..."
python3 -m cicd.run_chromium 1>/dev/null 2>&1 & echo "$$!" > .chromium.pid

test-static-server:
@echo "➡️ Starting local static server..."
python3 -m http.server -d ./tests/websites 8000
python3 -m http.server -d ./tests/websites 8000 1>/dev/null 2>&1 & echo "$$!" > .static_server.pid

test-unit-run:
@echo "➡️ Unit tests (Pytest)..."
uv run pytest \
--junit-xml=test-reports/$(version_full).xml \
--log-file=test-reports/$(version_full).log \
kill-test-servers:
@echo "➡️ Killing Chromium server..."
kill -s SIGKILL $(shell cat .chromium.pid)

@echo "➡️ Killing local static server..."
kill -s SIGKILL $(shell cat .static_server.pid)

test-unit-simple:
@echo "➡️ Unit tests with no extra (Pytest)..."
CI=true uv run pytest \
--junit-xml=test-reports/$(version_full)-simple.xml \
--log-file=test-reports/$(version_full)-simple.log \
--maxprocesses=4 \
-n=logical \
tests/*.py

test-unit-codspeed:
@echo "➡️ Unit tests with CodSpeed (Pytest)..."
CI=true uv run pytest \
--codspeed \
--junit-xml=test-reports/$(version_full)-codspeed.xml \
--log-file=test-reports/$(version_full)-codspeed.log \
--maxprocesses=4 \
-n=logical \
tests/*.py
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Web scraper made for AI and simplicity in mind. It runs as a CLI that can be par
[![GitHub project license](https://img.shields.io/github/license/clemlesne/scrape-it-now)](https://github.com/clemlesne/scrape-it-now/blob/main/LICENSE)
[![PyPI package version](https://img.shields.io/pypi/v/scrape-it-now)](https://pypi.org/project/scrape-it-now)
[![PyPI supported Python versions](https://img.shields.io/pypi/pyversions/scrape-it-now)](https://pypi.org/project/scrape-it-now)
[![CodSpeed report](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/clemlesne/scrape-it-now)

## Features

Expand Down
31 changes: 31 additions & 0 deletions cicd/run_chromium.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import asyncio
from contextlib import suppress
from logging import getLogger

from playwright.async_api import async_playwright

CDP_PORT = 9222

logger = getLogger(__name__)


async def run():
async with async_playwright() as p:
# Launch Chromium in server mode with CDP enabled
await p.chromium.launch(
args=[
"--disable-gl-drawing-for-tests",
f"--remote-debugging-port={CDP_PORT}",
],
channel="chromium",
devtools=True,
)
logger.info(f"Chromium running with CDP enabled on localhost:{CDP_PORT}")

# Wait indefinitely (or until the browser is closed)
await asyncio.Future()


if __name__ == "__main__":
with suppress(KeyboardInterrupt):
asyncio.run(run())
17 changes: 0 additions & 17 deletions cicd/test-unit-ci.sh

This file was deleted.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ dev = [
"pyinstaller~=6.11", # Create standalone executable
"pyright~=1.1", # Static type checker
"pytest-asyncio~=0.23", # Pytest plugin for async tests
"pytest-codspeed~=3.2.0", # Pytest plugin for measuring code speed
"pytest-repeat~=0.9", # Pytest plugin for repeating tests
"pytest-xdist[psutil]~=3.6", # Pytest plugin for parallel testing
"pytest~=8.3", # Testing framework
Expand Down
2 changes: 2 additions & 0 deletions src/scrape_it_now/helpers/identity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider

from scrape_it_now.helpers import IS_CI
from scrape_it_now.helpers.cache import lru_acache
from scrape_it_now.helpers.http import azure_transport


@lru_acache()
async def credential() -> DefaultAzureCredential:
return DefaultAzureCredential(
process_timeout=120 if IS_CI else 10, # 2 mins in CI, 10 secs in production
# Performance
transport=await azure_transport(),
)
Expand Down
68 changes: 50 additions & 18 deletions src/scrape_it_now/persistence/azure_blob_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,12 @@ async def delete_container(
await self._client.delete_container()
# Wait for it to be deleted, API is eventually consistent
while True:
try:
properties = await self._client.get_container_properties()
if properties.deleted:
break
with suppress(ResourceNotFoundError):
await self._client.get_container_properties()
await asyncio.sleep(2)
continue
# Deleted
except ResourceNotFoundError:
break
break
logger.info('Deleted Blob Storage "%s"', self._config.name)

async def __aenter__(self) -> "AzureBlobStorage":
Expand All @@ -207,21 +205,55 @@ async def __aenter__(self) -> "AzureBlobStorage":
container=self._config.name,
)

# Create if it does not exist
with suppress(ResourceExistsError):
# Create
await self._client.create_container()
# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
properties = await self._client.get_container_properties()
if not properties.deleted:
break
await asyncio.sleep(2)
logger.debug('Created Blob Storage "%s"', self._config.name)
await self._wait_for_creation()
await self._wait_for_ready()

# Return instance
return self

async def _wait_for_ready(self) -> None:
"""
Wait for the container to be ready.
API is not consistent, so we need to check if the resource is ready to be used.
"""
while True:
# Try using it
try:
# Upload and clean a test blob
await self.upload_blob(
blob="ping",
data=b"ping",
length=4,
overwrite=True,
)
await self._client.delete_blob("ping")
# If no exception, the container is ready
logger.debug('Blob Storage "%s" is ready', self._config.name)
break
# If exception, the container is not ready yet
except Exception:
logger.debug("Blob Storage not ready yet, retrying", exc_info=True)
await asyncio.sleep(2)

async def _wait_for_creation(self) -> None:
"""
Wait for the container to be created.
Loop indefinitely until the the container respond to upload/download operations. Loop indefinitely until the container is created. API is not consistent, so we need to check if the resource is created.
"""
# Start creation
with suppress(ResourceExistsError):
await self._client.create_container()

# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_container_properties()
logger.debug('Created Blob Storage "%s"', self._config.name)
# Created
break
await asyncio.sleep(2)

async def __aexit__(self, *exc: Any) -> None:
await self._service.close()
49 changes: 30 additions & 19 deletions src/scrape_it_now/persistence/azure_queue_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,35 +120,46 @@ async def create_queue(
await self._wait_for_ready()

async def _wait_for_ready(self) -> None:
"""
Wait for the queue to be ready.
Loop indefinitely until the queue is respond to send/pull operations. API is not consistent, so we need to check if the resource is ready to be used.
"""
while True:
# Try using it
try:
# Send a test message
# Send and clean a test message
await self.send_message("ping")
# Try to consume the message(s)
async for message in self.receive_messages(
max_messages=1, visibility_timeout=1
):
await self.delete_message(message)
# If no exception, the queue is created
# If no exception, the queue is ready
logger.debug('Queue Storage "%s" is ready', self._config.name)
return
except Exception: # If exception, the queue is not created yet
logger.debug("Queue not created yet, retrying")
break
# If exception, the queue is not ready yet
except Exception:
logger.debug("Queue not ready yet, retrying", exc_info=True)
await asyncio.sleep(2)

async def _wait_for_creation(self) -> None:
# Create if it does not exist
"""
Wait for the queue to be created.
Loop indefinitely until the queue is created. API is not consistent, so we need to check if the resource is created.
"""
# Start creation
with suppress(ResourceExistsError):
# Create
await self._client.create_queue()
# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
logger.debug('Created Queue Storage "%s"', self._config.name)
# Created
return
await asyncio.sleep(2)

# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
logger.debug('Created Queue Storage "%s"', self._config.name)
# Created
break
await asyncio.sleep(2)

@retry(
reraise=True,
Expand All @@ -166,12 +177,12 @@ async def delete_queue(
await self._client.delete_queue()
# Wait for it to be deleted, API is eventually consistent
while True:
try:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
await asyncio.sleep(2)
continue
# Deleted
except ResourceNotFoundError:
break
break
logger.info('Deleted Queue Storage "%s"', self._config.name)

def _escape(self, value: str) -> str:
Expand Down
9 changes: 3 additions & 6 deletions src/scrape_it_now/persistence/local_disk.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from aiofiles.os import makedirs, path, remove, rmdir
from pydantic import BaseModel, Field

from scrape_it_now.helpers import IS_CI
from scrape_it_now.helpers.logging import logger
from scrape_it_now.helpers.resources import file_lock, local_disk_cache_path
from scrape_it_now.models.message import Message
Expand Down Expand Up @@ -254,7 +255,6 @@ async def __aexit__(self, *exc: Any) -> None:
class QueueConfig(BaseModel):
name: str
table: str = "queue"
timeout: int = 30

async def db_path(self) -> str:
return await path.abspath(
Expand Down Expand Up @@ -398,10 +398,7 @@ async def create_queue(
await makedirs(dirname(file_path), exist_ok=True)

# Initialize the database
async with aiosqlite.connect(
database=file_path,
timeout=self._config.timeout, # Wait for 30 secs before giving up
) as connection:
async with self._use_connection() as connection:
# Enable WAL mode to allow multiple readers and one writer
await connection.execute(
"""
Expand Down Expand Up @@ -437,7 +434,7 @@ async def _use_connection(self) -> AsyncGenerator[aiosqlite.Connection, None]:
# Connect and return the connection
async with aiosqlite.connect(
database=await self._config.db_path(),
timeout=self._config.timeout, # Wait for 30 secs before giving up
timeout=2 * 60 if IS_CI else 30, # 2 mins in CI, 30 secs in production
) as connection:
yield connection

Expand Down
Loading

0 comments on commit e05299e

Please sign in to comment.