diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 11287e8..784b2b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,11 @@ --- repos: - - repo: git://github.com/Lucas-C/pre-commit-hooks + - repo: https://github.com/Lucas-C/pre-commit-hooks rev: v1.1.10 hooks: - id: remove-tabs - - repo: git://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.4.0 hooks: - id: trailing-whitespace @@ -22,7 +22,7 @@ repos: - id: check-ast - id: debug-statements - - repo: git://github.com/pycqa/pydocstyle.git + - repo: https://github.com/pycqa/pydocstyle.git rev: 6.0.0 hooks: - id: pydocstyle diff --git a/README.rst b/README.rst index 040a46e..ed6449c 100644 --- a/README.rst +++ b/README.rst @@ -10,3 +10,24 @@ Thoth Graph Metrics Exporter :alt: Quay - Build Periodic job that exports metrics out of the main database asynchronously. + + +Run single task +=============== + +You can run single tasks selecting the name of the task from the allows methods: + +.. list-table:: + :widths: 25 25 + :header-rows: 1 + + * - Task name + - Description + * - ``graph_corruption_check`` + - Check if the database is corruped. + * - ``graph_table_bloat_data_check`` + - Check if the database tables are bloated. + * - ``graph_index_bloat_data_check`` + - Check if the database index tables are bloated. + * - ``graph_database_dumps_check`` + - Check if database dumps are correctly created. diff --git a/app.py b/app.py index e091f81..6ceb342 100755 --- a/app.py +++ b/app.py @@ -19,14 +19,14 @@ import os import logging - -import click +from datetime import datetime, timedelta from enum import Enum +import click from prometheus_client import CollectorRegistry, Gauge, push_to_gateway from thoth.common import init_logging -from thoth.storages import GraphDatabase +from thoth.storages import GraphDatabase, GraphBackupStore from thoth.storages import __version__ as __storages_version__ from thoth.common import __version__ as __common_version__ @@ -43,6 +43,8 @@ THOTH_METRICS_PUSHGATEWAY_URL = os.environ["PROMETHEUS_PUSHGATEWAY_URL"] THOTH_DEPLOYMENT_NAME = os.environ["THOTH_DEPLOYMENT_NAME"] +GRAPH_BACKUP_CHECK_DATE = int(os.getenv("THOTH_GRAPH_BACKUP_CHECK_DAYS", 7)) + class TaskEnum(Enum): """Class for the task to be run.""" @@ -50,6 +52,7 @@ class TaskEnum(Enum): CORRUPTION_CHECK = "graph_corruption_check" TABLE_BLOAT_DATA = "graph_table_bloat_data_check" INDEX_BLOAT_DATA = "graph_index_bloat_data_check" + DATABASE_DUMPS = "graph_database_dumps_check" init_logging() @@ -96,6 +99,38 @@ class TaskEnum(Enum): registry=PROMETHEUS_REGISTRY, ) +# Expose number of dumps available +graphdb_dump_count = Gauge( + "thoth_graphdb_dump_count", + "Number of pg dumps stored on Ceph.", + ["env"], + registry=PROMETHEUS_REGISTRY, +) + +# Expose last dump +graphdb_last_dump = Gauge( + "thoth_graphdb_last_dump", + "Last dump date stored on Ceph.", + ["date", "env"], + registry=PROMETHEUS_REGISTRY, +) + +# Check if dumps are not correctly cleaned +graphdb_dump_not_cleaned = Gauge( + "thoth_graphdb_dump_not_cleaned", + "Check if the number of dumps on Ceph is higher than expected.", + ["env"], + registry=PROMETHEUS_REGISTRY, +) + +# Check if last expected dump is missing +graphdb_dump_missed = Gauge( + "thoth_graphdb_dump_missed", + "Check if the last expected dump is missing.", + ["env"], + registry=PROMETHEUS_REGISTRY, +) + def _create_common_metrics(): """Create common metrics to pushgateway.""" @@ -192,6 +227,41 @@ def _graph_index_bloat_data(graph: GraphDatabase): _LOGGER.info("thoth_graphdb_mb_index_bloat_data_table is empty") +def _graph_database_dumps(adapter: GraphBackupStore) -> None: + pg_dumps = [] + for pg_dump in adapter.get_document_listing(): + pg_dumps.append( + datetime.strptime(pg_dump[len("pg_dump-") :], GraphBackupStore._BACKUP_FILE_DATETIME_FORMAT).date() + ) + + pg_dumps_number = len(pg_dumps) + graphdb_dump_count.labels(THOTH_DEPLOYMENT_NAME).set(pg_dumps_number) + _LOGGER.info(f"Number of database dumps available on Ceph is: {pg_dumps_number}") + + pg_dumps_expected = GraphBackupStore.GRAPH_BACKUP_STORE_ROTATE + _LOGGER.info(f"Number of database dumps expected: {pg_dumps_expected}") + + if pg_dumps_number > pg_dumps_expected: + graphdb_dump_not_cleaned.labels(THOTH_DEPLOYMENT_NAME).set(1) + else: + graphdb_dump_not_cleaned.labels(THOTH_DEPLOYMENT_NAME).set(0) + + # Consider only last uploaded pg dump + last_dump_date = max(pg_dumps) + + _LOGGER.info(f"Last database dump was stored on: {last_dump_date}") + graphdb_last_dump.labels(THOTH_DEPLOYMENT_NAME, last_dump_date).inc() + + last_expected_dump_date = datetime.utcnow().date() - timedelta(days=GRAPH_BACKUP_CHECK_DATE) + + _LOGGER.info(f"Last expected database dump date is: {last_expected_dump_date}") + + if last_dump_date < last_expected_dump_date: + graphdb_dump_missed.labels(THOTH_DEPLOYMENT_NAME).set(1) + else: + graphdb_dump_missed.labels(THOTH_DEPLOYMENT_NAME).set(0) + + @click.command() @click.option( "--task", "-t", type=click.Choice([entity.value for entity in TaskEnum], case_sensitive=False), required=False @@ -202,14 +272,17 @@ def main(task): _create_common_metrics() - graph = GraphDatabase() - graph.connect() - if task: _LOGGER.info(f"{task} task starting...") else: _LOGGER.info("No specific task selected, all tasks will be run...") + graph = GraphDatabase() + graph.connect() + + adapter = GraphBackupStore() + adapter.connect() + if task == TaskEnum.CORRUPTION_CHECK.value or not task: _graph_corruption_check(graph=graph) @@ -219,6 +292,9 @@ def main(task): if task == TaskEnum.INDEX_BLOAT_DATA.value or not task: _graph_index_bloat_data(graph=graph) + if task == TaskEnum.DATABASE_DUMPS.value or not task: + _graph_database_dumps(adapter=adapter) + _send_metrics() _LOGGER.info("Graph metrics exporter finished.")