Skip to content

Commit

Permalink
test: Add CodSpeed to follow lib performance
Browse files Browse the repository at this point in the history
  • Loading branch information
clemlesne committed Feb 9, 2025
1 parent 4733803 commit b07dba6
Show file tree
Hide file tree
Showing 17 changed files with 302 additions and 91 deletions.
23 changes: 21 additions & 2 deletions .github/workflows/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ on:
- feat/*
- hotfix/*
- main
# Allows CodSpeed to trigger backtest performance analysis in order to generate initial data
workflow_dispatch:

jobs:
init:
Expand Down Expand Up @@ -52,7 +54,8 @@ jobs:
# Run all test suites
step:
- static
- unit
- unit-simple
- unit-codspeed
# Run on all supported Python versions
python-version:
- "3.11"
Expand Down Expand Up @@ -103,9 +106,25 @@ jobs:
- name: Configure environment variables
run: echo "${{ secrets.DOTENV_UNIT_TESTS }}" > .env

- name: Run tests
- name: Run test servers (unit)
if: ${{ contains(matrix.step, 'unit') }}
run: make run-test-servers

- name: Run tests (simple)
if: ${{ !contains(matrix.step, 'codspeed') }}
run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}

- name: Run tests (CodSpeed)
if: ${{ contains(matrix.step, 'codspeed') }}
uses: CodSpeedHQ/[email protected]
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}

- name: Kill test servers (unit)
if: ${{ contains(matrix.step, 'unit') }}
run: make kill-test-servers

- name: Upload artifacts
uses: actions/[email protected]
if: always()
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,9 @@ test-reports/
# Local .env
!.env.example
.env.*

# CodSpeed
.codspeed/

# Test servers PIDs
*.pid
45 changes: 32 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ install:

$(MAKE) install-deps

@echo "➡️ Installing Playwright dependencies..."
uv run playwright install chromium --with-deps --no-shell

install-deps:
@echo "➡️ Syncing dependencies..."
uv sync --extra dev

@echo "➡️ Installing Playwright dependencies..."
uv run playwright install chromium --with-deps --no-shell

upgrade:
@echo "➡️ Updating Git submodules..."
git submodule update --init --recursive
Expand All @@ -53,7 +53,9 @@ upgrade:

test:
$(MAKE) test-static
$(MAKE) test-unit
$(MAKE) run-test-servers
$(MAKE) test-unit-simple
$(MAKE) kill-test-servers

test-static:
@echo "➡️ Test dependencies issues (deptry)..."
Expand All @@ -65,18 +67,35 @@ test-static:
@echo "➡️ Test types (Pyright)..."
uv run pyright

test-unit:
bash cicd/test-unit-ci.sh
run-test-servers:
@echo "➡️ Starting Chromium server..."
python3 -m cicd.run_chromium 1>/dev/null 2>&1 & echo "$$!" > .chromium.pid

test-static-server:
@echo "➡️ Starting local static server..."
python3 -m http.server -d ./tests/websites 8000
python3 -m http.server -d ./tests/websites 8000 1>/dev/null 2>&1 & echo "$$!" > .static_server.pid

kill-test-servers:
@echo "➡️ Killing Chromium server..."
kill -s SIGKILL $(shell cat .chromium.pid)

@echo "➡️ Killing local static server..."
kill -s SIGKILL $(shell cat .static_server.pid)

test-unit-simple:
@echo "➡️ Unit tests with no extra (Pytest)..."
CI=true uv run pytest \
--junit-xml=test-reports/$(version_full)-simple.xml \
--log-file=test-reports/$(version_full)-simple.log \
--maxprocesses=4 \
-n=logical \
tests/*.py

test-unit-run:
@echo "➡️ Unit tests (Pytest)..."
uv run pytest \
--junit-xml=test-reports/$(version_full).xml \
--log-file=test-reports/$(version_full).log \
test-unit-codspeed:
@echo "➡️ Unit tests with CodSpeed (Pytest)..."
CI=true uv run pytest \
--codspeed \
--junit-xml=test-reports/$(version_full)-codspeed.xml \
--log-file=test-reports/$(version_full)-codspeed.log \
--maxprocesses=4 \
-n=logical \
tests/*.py
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Web scraper made for AI and simplicity in mind. It runs as a CLI that can be par
[![GitHub project license](https://img.shields.io/github/license/clemlesne/scrape-it-now)](https://github.com/clemlesne/scrape-it-now/blob/main/LICENSE)
[![PyPI package version](https://img.shields.io/pypi/v/scrape-it-now)](https://pypi.org/project/scrape-it-now)
[![PyPI supported Python versions](https://img.shields.io/pypi/pyversions/scrape-it-now)](https://pypi.org/project/scrape-it-now)
[![CodSpeed report](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/clemlesne/scrape-it-now)

## Features

Expand Down
40 changes: 40 additions & 0 deletions cicd/run_chromium.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import asyncio
from contextlib import suppress
from os import environ as env

from playwright.async_api import async_playwright

from scrape_it_now.helpers.logging import logger
from scrape_it_now.helpers.resources import browsers_install_path
from scrape_it_now.scrape import install

CDP_PORT = 9222


async def run():
# Add installation path to the environment
# See: https://playwright.dev/docs/browsers#hermetic-install
env["PLAYWRIGHT_BROWSERS_PATH"] = await browsers_install_path()

# Note: This won't install required system packages, make sure to install them manually
await install(True)

async with async_playwright() as p:
# Launch Chromium in server mode with CDP enabled
await p.chromium.launch(
args=[
"--disable-gl-drawing-for-tests",
f"--remote-debugging-port={CDP_PORT}",
],
channel="chromium",
devtools=True,
)
logger.info(f"Chromium running with CDP enabled on localhost:{CDP_PORT}")

# Wait indefinitely (or until the browser is closed)
await asyncio.Future()


if __name__ == "__main__":
with suppress(KeyboardInterrupt):
asyncio.run(run())
17 changes: 0 additions & 17 deletions cicd/test-unit-ci.sh

This file was deleted.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ dev = [
"pyinstaller~=6.11", # Create standalone executable
"pyright~=1.1", # Static type checker
"pytest-asyncio~=0.23", # Pytest plugin for async tests
"pytest-codspeed~=3.2.0", # Pytest plugin for measuring code speed
"pytest-repeat~=0.9", # Pytest plugin for repeating tests
"pytest-xdist[psutil]~=3.6", # Pytest plugin for parallel testing
"pytest~=8.3", # Testing framework
Expand Down
2 changes: 2 additions & 0 deletions src/scrape_it_now/helpers/identity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider

from scrape_it_now.helpers import IS_CI
from scrape_it_now.helpers.cache import lru_acache
from scrape_it_now.helpers.http import azure_transport


@lru_acache()
async def credential() -> DefaultAzureCredential:
return DefaultAzureCredential(
process_timeout=120 if IS_CI else 10, # 2 mins in CI, 10 secs in production
# Performance
transport=await azure_transport(),
)
Expand Down
68 changes: 50 additions & 18 deletions src/scrape_it_now/persistence/azure_blob_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,12 @@ async def delete_container(
await self._client.delete_container()
# Wait for it to be deleted, API is eventually consistent
while True:
try:
properties = await self._client.get_container_properties()
if properties.deleted:
break
with suppress(ResourceNotFoundError):
await self._client.get_container_properties()
await asyncio.sleep(2)
continue
# Deleted
except ResourceNotFoundError:
break
break
logger.info('Deleted Blob Storage "%s"', self._config.name)

async def __aenter__(self) -> "AzureBlobStorage":
Expand All @@ -207,21 +205,55 @@ async def __aenter__(self) -> "AzureBlobStorage":
container=self._config.name,
)

# Create if it does not exist
with suppress(ResourceExistsError):
# Create
await self._client.create_container()
# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
properties = await self._client.get_container_properties()
if not properties.deleted:
break
await asyncio.sleep(2)
logger.debug('Created Blob Storage "%s"', self._config.name)
await self._wait_for_creation()
await self._wait_for_ready()

# Return instance
return self

async def _wait_for_ready(self) -> None:
"""
Wait for the container to be ready.
API is not consistent, so we need to check if the resource is ready to be used.
"""
while True:
# Try using it
try:
# Upload and clean a test blob
await self.upload_blob(
blob="ping",
data=b"ping",
length=4,
overwrite=True,
)
await self._client.delete_blob("ping")
# If no exception, the container is ready
logger.debug('Blob Storage "%s" is ready', self._config.name)
break
# If exception, the container is not ready yet
except Exception:
logger.debug("Blob Storage not ready yet, retrying", exc_info=True)
await asyncio.sleep(2)

async def _wait_for_creation(self) -> None:
"""
Wait for the container to be created.
Loop indefinitely until the the container respond to upload/download operations. Loop indefinitely until the container is created. API is not consistent, so we need to check if the resource is created.
"""
# Start creation
with suppress(ResourceExistsError):
await self._client.create_container()

# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_container_properties()
logger.debug('Created Blob Storage "%s"', self._config.name)
# Created
break
await asyncio.sleep(2)

async def __aexit__(self, *exc: Any) -> None:
await self._service.close()
49 changes: 30 additions & 19 deletions src/scrape_it_now/persistence/azure_queue_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,35 +120,46 @@ async def create_queue(
await self._wait_for_ready()

async def _wait_for_ready(self) -> None:
"""
Wait for the queue to be ready.
Loop indefinitely until the queue is respond to send/pull operations. API is not consistent, so we need to check if the resource is ready to be used.
"""
while True:
# Try using it
try:
# Send a test message
# Send and clean a test message
await self.send_message("ping")
# Try to consume the message(s)
async for message in self.receive_messages(
max_messages=1, visibility_timeout=1
):
await self.delete_message(message)
# If no exception, the queue is created
# If no exception, the queue is ready
logger.debug('Queue Storage "%s" is ready', self._config.name)
return
except Exception: # If exception, the queue is not created yet
logger.debug("Queue not created yet, retrying")
break
# If exception, the queue is not ready yet
except Exception:
logger.debug("Queue not ready yet, retrying", exc_info=True)
await asyncio.sleep(2)

async def _wait_for_creation(self) -> None:
# Create if it does not exist
"""
Wait for the queue to be created.
Loop indefinitely until the queue is created. API is not consistent, so we need to check if the resource is created.
"""
# Start creation
with suppress(ResourceExistsError):
# Create
await self._client.create_queue()
# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
logger.debug('Created Queue Storage "%s"', self._config.name)
# Created
return
await asyncio.sleep(2)

# Wait for it to be created, API is eventually consistent
while True:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
logger.debug('Created Queue Storage "%s"', self._config.name)
# Created
break
await asyncio.sleep(2)

@retry(
reraise=True,
Expand All @@ -166,12 +177,12 @@ async def delete_queue(
await self._client.delete_queue()
# Wait for it to be deleted, API is eventually consistent
while True:
try:
with suppress(ResourceNotFoundError):
await self._client.get_queue_properties()
await asyncio.sleep(2)
continue
# Deleted
except ResourceNotFoundError:
break
break
logger.info('Deleted Queue Storage "%s"', self._config.name)

def _escape(self, value: str) -> str:
Expand Down
Loading

0 comments on commit b07dba6

Please sign in to comment.