test: Add CodSpeed to follow lib performance

clemlesne · Feb 9, 2025 · fb4149b · fb4149b
1 parent 86ee9dd
commit fb4149b
Show file tree

Hide file tree

Showing 17 changed files with 227 additions and 64 deletions.
diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
@@ -15,6 +15,8 @@ on:
       - feat/*
       - hotfix/*
       - main
+  # Allows CodSpeed to trigger backtest performance analysis in order to generate initial data
+  workflow_dispatch:
 
 jobs:
   init:
@@ -52,7 +54,8 @@ jobs:
         # Run all test suites
         step:
           - static
-          - unit
+          - unit-simple
+          - unit-codspeed
         # Run on all supported Python versions
         python-version:
           - "3.11"
@@ -103,9 +106,17 @@ jobs:
       - name: Configure environment variables
         run: echo "${{ secrets.DOTENV_UNIT_TESTS }}" > .env
 
-      - name: Run tests
+      - name: Run tests (simple)
+        if: ${{ !contains(matrix.step, 'codspeed') }}
         run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}
 
+      - name: Run tests (CodSpeed)
+        if: ${{ contains(matrix.step, 'codspeed') }}
+        uses: CodSpeedHQ/[email protected]
+        with:
+          token: ${{ secrets.CODSPEED_TOKEN }}
+          run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}
+
       - name: Upload artifacts
         uses: actions/[email protected]
         if: always()

diff --git a/.gitignore b/.gitignore
@@ -291,3 +291,6 @@ test-reports/
 # Local .env
 !.env.example
 .env.*
+
+# CodSpeed
+.codspeed/
diff --git a/Makefile b/Makefile
@@ -39,7 +39,7 @@ install-deps:
 	uv sync --extra dev
 
 	@echo "➡️ Installing Playwright dependencies..."
-	uv run playwright install chrome --with-deps
+	uv run playwright install chromium --with-deps --no-shell
 
 upgrade:
 	@echo "➡️ Updating Git submodules..."
@@ -53,7 +53,7 @@ upgrade:
 
 test:
 	$(MAKE) test-static
-	$(MAKE) test-unit
+	$(MAKE) test-unit-simple
 
 test-static:
 	@echo "➡️ Test dependencies issues (deptry)..."
@@ -65,18 +65,31 @@ test-static:
 	@echo "➡️ Test types (Pyright)..."
 	uv run pyright
 
-test-unit:
-	bash cicd/test-unit-ci.sh
+test-unit-simple:
+	bash cicd/test-unit-ci.sh simple
+
+test-unit-codspeed:
+	bash cicd/test-unit-ci.sh codspeed
 
 test-static-server:
 	@echo "➡️ Starting local static server..."
 	python3 -m http.server -d ./tests/websites 8000
 
-test-unit-run:
-	@echo "➡️ Unit tests (Pytest)..."
-	uv run pytest \
-		--junit-xml=test-reports/$(version_full).xml \
-		--log-file=test-reports/$(version_full).log \
+test-unit-simple-run:
+	@echo "➡️ Unit tests with no extra (Pytest)..."
+	CI=true uv run pytest \
+		--junit-xml=test-reports/$(version_full)-simple.xml \
+		--log-file=test-reports/$(version_full)-simple.log \
+		--maxprocesses=4 \
+		-n=logical \
+		tests/*.py
+
+test-unit-codspeed-run:
+	@echo "➡️ Unit tests with CodSpeed (Pytest)..."
+	CI=true uv run pytest \
+		--codspeed \
+		--junit-xml=test-reports/$(version_full)-codspeed.xml \
+		--log-file=test-reports/$(version_full)-codspeed.log \
 		--maxprocesses=4 \
 		-n=logical \
 		tests/*.py

diff --git a/README.md b/README.md
@@ -6,6 +6,7 @@ Web scraper made for AI and simplicity in mind. It runs as a CLI that can be par
 [![GitHub project license](https://img.shields.io/github/license/clemlesne/scrape-it-now)](https://github.com/clemlesne/scrape-it-now/blob/main/LICENSE)
 [![PyPI package version](https://img.shields.io/pypi/v/scrape-it-now)](https://pypi.org/project/scrape-it-now)
 [![PyPI supported Python versions](https://img.shields.io/pypi/pyversions/scrape-it-now)](https://pypi.org/project/scrape-it-now)
+[![CodSpeed report](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/clemlesne/scrape-it-now)
 
 ## Features
 

diff --git a/cicd/test-unit-ci.sh b/cicd/test-unit-ci.sh
@@ -1,13 +1,20 @@
 #!/bin/bash
 
+mode=$1
+
+if [ -z "$mode" ]; then
+    echo "Error: mode is required." >&2
+    exit 1
+fi
+
 # Start the first command in the background
 make test-static-server 1>/dev/null 2>&1 &
 
 # Capture the PID of the background process
 UNIT_RUN_PID=$!
 
 # Run the second command
-make test-unit-run
+make test-unit-${mode}-run
 exit_code=$?
 
 # Once the second command exits, kill the first process

diff --git a/pyproject.toml b/pyproject.toml
@@ -72,6 +72,7 @@ dev = [
   "pyinstaller~=6.11",         # Create standalone executable
   "pyright~=1.1",              # Static type checker
   "pytest-asyncio~=0.23",      # Pytest plugin for async tests
+  "pytest-codspeed~=3.2.0",    # Pytest plugin for measuring code speed
   "pytest-repeat~=0.9",        # Pytest plugin for repeating tests
   "pytest-xdist[psutil]~=3.6", # Pytest plugin for parallel testing
   "pytest~=8.3",               # Testing framework

diff --git a/src/scrape_it_now/helpers/__init__.py b/src/scrape_it_now/helpers/__init__.py
@@ -1,8 +1,17 @@
+from os import environ as env
+
 from dotenv import find_dotenv, load_dotenv
 
+# First, load the environment variables from the .env file
 load_dotenv(
     find_dotenv(
         # Use the current working directory from where the command is run
         usecwd=True,
     )
 )
+
+# Detect if the code is running in a CI environment
+# See: https://stackoverflow.com/a/75223617
+IS_CI = env.get("CI", "").lower() == "true"
+if IS_CI:
+    print("CI environment detected, be aware configuration may differ")  # noqa: T201
diff --git a/src/scrape_it_now/helpers/identity.py b/src/scrape_it_now/helpers/identity.py
@@ -2,13 +2,15 @@
 
 from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
 
+from scrape_it_now.helpers import IS_CI
 from scrape_it_now.helpers.cache import lru_acache
 from scrape_it_now.helpers.http import azure_transport
 
 
 @lru_acache()
 async def credential() -> DefaultAzureCredential:
     return DefaultAzureCredential(
+        process_timeout=120 if IS_CI else 10,  # 2 mins in CI, 10 secs in production
         # Performance
         transport=await azure_transport(),
     )

diff --git a/src/scrape_it_now/helpers/logging.py b/src/scrape_it_now/helpers/logging.py
@@ -18,6 +18,15 @@
 )
 from structlog.stdlib import PositionalArgumentsFormatter
 
+from scrape_it_now.helpers import IS_CI
+
+
+def enable_debug_logging() -> None:
+    configure(
+        wrapper_class=make_filtering_bound_logger(DEBUG),
+    )
+
+
 configure_once(
     cache_logger_on_first_use=True,
     context_class=dict,
@@ -44,8 +53,6 @@
 # Framework does not exactly expose Logger, but that's easier to work with
 logger: Logger = structlog_get_logger("scrape-it-now")
 
-
-def enable_debug_logging() -> None:
-    configure(
-        wrapper_class=make_filtering_bound_logger(DEBUG),
-    )
+# Enable debug logging on CI
+if IS_CI:
+    enable_debug_logging()
diff --git a/src/scrape_it_now/persistence/azure_blob_storage.py b/src/scrape_it_now/persistence/azure_blob_storage.py
@@ -182,14 +182,12 @@ async def delete_container(
             await self._client.delete_container()
             # Wait for it to be deleted, API is eventually consistent
             while True:
-                try:
-                    properties = await self._client.get_container_properties()
-                    if properties.deleted:
-                        break
+                with suppress(ResourceNotFoundError):
+                    await self._client.get_container_properties()
                     await asyncio.sleep(2)
+                    continue
                 # Deleted
-                except ResourceNotFoundError:
-                    break
+                break
             logger.info('Deleted Blob Storage "%s"', self._config.name)
 
     async def __aenter__(self) -> "AzureBlobStorage":
@@ -207,21 +205,55 @@ async def __aenter__(self) -> "AzureBlobStorage":
             container=self._config.name,
         )
 
-        # Create if it does not exist
-        with suppress(ResourceExistsError):
-            # Create
-            await self._client.create_container()
-            # Wait for it to be created, API is eventually consistent
-            while True:
-                with suppress(ResourceNotFoundError):
-                    properties = await self._client.get_container_properties()
-                    if not properties.deleted:
-                        break
-                await asyncio.sleep(2)
-            logger.debug('Created Blob Storage "%s"', self._config.name)
+        await self._wait_for_creation()
+        await self._wait_for_ready()
 
         # Return instance
         return self
 
+    async def _wait_for_ready(self) -> None:
+        """
+        Wait for the container to be ready.
+
+        API is not consistent, so we need to check if the resource is ready to be used.
+        """
+        while True:
+            # Try using it
+            try:
+                # Upload and clean a test blob
+                await self.upload_blob(
+                    blob="ping",
+                    data=b"ping",
+                    length=4,
+                    overwrite=True,
+                )
+                await self._client.delete_blob("ping")
+                # If no exception, the container is ready
+                logger.debug('Blob Storage "%s" is ready', self._config.name)
+                break
+            # If exception, the container is not ready yet
+            except Exception:
+                logger.debug("Blob Storage not ready yet, retrying", exc_info=True)
+                await asyncio.sleep(2)
+
+    async def _wait_for_creation(self) -> None:
+        """
+        Wait for the container to be created.
+
+        Loop indefinitely until the the container respond to upload/download operations. Loop indefinitely until the container is created. API is not consistent, so we need to check if the resource is created.
+        """
+        # Start creation
+        with suppress(ResourceExistsError):
+            await self._client.create_container()
+
+        # Wait for it to be created, API is eventually consistent
+        while True:
+            with suppress(ResourceNotFoundError):
+                await self._client.get_container_properties()
+                logger.debug('Created Blob Storage "%s"', self._config.name)
+                # Created
+                break
+            await asyncio.sleep(2)
+
     async def __aexit__(self, *exc: Any) -> None:
         await self._service.close()
diff --git a/src/scrape_it_now/persistence/azure_queue_storage.py b/src/scrape_it_now/persistence/azure_queue_storage.py
@@ -120,35 +120,46 @@ async def create_queue(
         await self._wait_for_ready()
 
     async def _wait_for_ready(self) -> None:
+        """
+        Wait for the queue to be ready.
+
+        Loop indefinitely until the queue is respond to send/pull operations. API is not consistent, so we need to check if the resource is ready to be used.
+        """
         while True:
+            # Try using it
             try:
-                # Send a test message
+                # Send and clean a test message
                 await self.send_message("ping")
-                # Try to consume the message(s)
                 async for message in self.receive_messages(
                     max_messages=1, visibility_timeout=1
                 ):
                     await self.delete_message(message)
-                # If no exception, the queue is created
+                # If no exception, the queue is ready
                 logger.debug('Queue Storage "%s" is ready', self._config.name)
-                return
-            except Exception:  # If exception, the queue is not created yet
-                logger.debug("Queue not created yet, retrying")
+                break
+            # If exception, the queue is not ready yet
+            except Exception:
+                logger.debug("Queue not ready yet, retrying", exc_info=True)
                 await asyncio.sleep(2)
 
     async def _wait_for_creation(self) -> None:
-        # Create if it does not exist
+        """
+        Wait for the queue to be created.
+
+        Loop indefinitely until the queue is created. API is not consistent, so we need to check if the resource is created.
+        """
+        # Start creation
         with suppress(ResourceExistsError):
-            # Create
             await self._client.create_queue()
-            # Wait for it to be created, API is eventually consistent
-            while True:
-                with suppress(ResourceNotFoundError):
-                    await self._client.get_queue_properties()
-                    logger.debug('Created Queue Storage "%s"', self._config.name)
-                    # Created
-                    return
-                await asyncio.sleep(2)
+
+        # Wait for it to be created, API is eventually consistent
+        while True:
+            with suppress(ResourceNotFoundError):
+                await self._client.get_queue_properties()
+                logger.debug('Created Queue Storage "%s"', self._config.name)
+                # Created
+                break
+            await asyncio.sleep(2)
 
     @retry(
         reraise=True,
@@ -166,12 +177,12 @@ async def delete_queue(
             await self._client.delete_queue()
             # Wait for it to be deleted, API is eventually consistent
             while True:
-                try:
+                with suppress(ResourceNotFoundError):
                     await self._client.get_queue_properties()
                     await asyncio.sleep(2)
+                    continue
                 # Deleted
-                except ResourceNotFoundError:
-                    break
+                break
             logger.info('Deleted Queue Storage "%s"', self._config.name)
 
     def _escape(self, value: str) -> str: