Skip to content

Commit 051b70b

Browse files
authored
Merge pull request #69 from pacospace/introduce-pg-dump-metrics
Introduce pg dump metrics
2 parents e2a6952 + e6e0833 commit 051b70b

File tree

3 files changed

+106
-9
lines changed

3 files changed

+106
-9
lines changed

.pre-commit-config.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
---
22
repos:
3-
- repo: git://github.com/Lucas-C/pre-commit-hooks
3+
- repo: https://github.com/Lucas-C/pre-commit-hooks
44
rev: v1.1.10
55
hooks:
66
- id: remove-tabs
77

8-
- repo: git://github.com/pre-commit/pre-commit-hooks
8+
- repo: https://github.com/pre-commit/pre-commit-hooks
99
rev: v3.4.0
1010
hooks:
1111
- id: trailing-whitespace
@@ -22,7 +22,7 @@ repos:
2222
- id: check-ast
2323
- id: debug-statements
2424

25-
- repo: git://github.com/pycqa/pydocstyle.git
25+
- repo: https://github.com/pycqa/pydocstyle.git
2626
rev: 6.0.0
2727
hooks:
2828
- id: pydocstyle

README.rst

+21
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,24 @@ Thoth Graph Metrics Exporter
1010
:alt: Quay - Build
1111

1212
Periodic job that exports metrics out of the main database asynchronously.
13+
14+
15+
Run single task
16+
===============
17+
18+
You can run single tasks selecting the name of the task from the allows methods:
19+
20+
.. list-table::
21+
:widths: 25 25
22+
:header-rows: 1
23+
24+
* - Task name
25+
- Description
26+
* - ``graph_corruption_check``
27+
- Check if the database is corruped.
28+
* - ``graph_table_bloat_data_check``
29+
- Check if the database tables are bloated.
30+
* - ``graph_index_bloat_data_check``
31+
- Check if the database index tables are bloated.
32+
* - ``graph_database_dumps_check``
33+
- Check if database dumps are correctly created.

app.py

+82-6
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919

2020
import os
2121
import logging
22-
23-
import click
22+
from datetime import datetime, timedelta
2423
from enum import Enum
2524

25+
import click
2626
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
2727

2828
from thoth.common import init_logging
29-
from thoth.storages import GraphDatabase
29+
from thoth.storages import GraphDatabase, GraphBackupStore
3030
from thoth.storages import __version__ as __storages_version__
3131
from thoth.common import __version__ as __common_version__
3232

@@ -43,13 +43,16 @@
4343
THOTH_METRICS_PUSHGATEWAY_URL = os.environ["PROMETHEUS_PUSHGATEWAY_URL"]
4444
THOTH_DEPLOYMENT_NAME = os.environ["THOTH_DEPLOYMENT_NAME"]
4545

46+
GRAPH_BACKUP_CHECK_DATE = int(os.getenv("THOTH_GRAPH_BACKUP_CHECK_DAYS", 7))
47+
4648

4749
class TaskEnum(Enum):
4850
"""Class for the task to be run."""
4951

5052
CORRUPTION_CHECK = "graph_corruption_check"
5153
TABLE_BLOAT_DATA = "graph_table_bloat_data_check"
5254
INDEX_BLOAT_DATA = "graph_index_bloat_data_check"
55+
DATABASE_DUMPS = "graph_database_dumps_check"
5356

5457

5558
init_logging()
@@ -96,6 +99,38 @@ class TaskEnum(Enum):
9699
registry=PROMETHEUS_REGISTRY,
97100
)
98101

102+
# Expose number of dumps available
103+
graphdb_dump_count = Gauge(
104+
"thoth_graphdb_dump_count",
105+
"Number of pg dumps stored on Ceph.",
106+
["env"],
107+
registry=PROMETHEUS_REGISTRY,
108+
)
109+
110+
# Expose last dump
111+
graphdb_last_dump = Gauge(
112+
"thoth_graphdb_last_dump",
113+
"Last dump date stored on Ceph.",
114+
["date", "env"],
115+
registry=PROMETHEUS_REGISTRY,
116+
)
117+
118+
# Check if dumps are not correctly cleaned
119+
graphdb_dump_not_cleaned = Gauge(
120+
"thoth_graphdb_dump_not_cleaned",
121+
"Check if the number of dumps on Ceph is higher than expected.",
122+
["env"],
123+
registry=PROMETHEUS_REGISTRY,
124+
)
125+
126+
# Check if last expected dump is missing
127+
graphdb_dump_missed = Gauge(
128+
"thoth_graphdb_dump_missed",
129+
"Check if the last expected dump is missing.",
130+
["env"],
131+
registry=PROMETHEUS_REGISTRY,
132+
)
133+
99134

100135
def _create_common_metrics():
101136
"""Create common metrics to pushgateway."""
@@ -192,6 +227,41 @@ def _graph_index_bloat_data(graph: GraphDatabase):
192227
_LOGGER.info("thoth_graphdb_mb_index_bloat_data_table is empty")
193228

194229

230+
def _graph_database_dumps(adapter: GraphBackupStore) -> None:
231+
pg_dumps = []
232+
for pg_dump in adapter.get_document_listing():
233+
pg_dumps.append(
234+
datetime.strptime(pg_dump[len("pg_dump-") :], GraphBackupStore._BACKUP_FILE_DATETIME_FORMAT).date()
235+
)
236+
237+
pg_dumps_number = len(pg_dumps)
238+
graphdb_dump_count.labels(THOTH_DEPLOYMENT_NAME).set(pg_dumps_number)
239+
_LOGGER.info(f"Number of database dumps available on Ceph is: {pg_dumps_number}")
240+
241+
pg_dumps_expected = GraphBackupStore.GRAPH_BACKUP_STORE_ROTATE
242+
_LOGGER.info(f"Number of database dumps expected: {pg_dumps_expected}")
243+
244+
if pg_dumps_number > pg_dumps_expected:
245+
graphdb_dump_not_cleaned.labels(THOTH_DEPLOYMENT_NAME).set(1)
246+
else:
247+
graphdb_dump_not_cleaned.labels(THOTH_DEPLOYMENT_NAME).set(0)
248+
249+
# Consider only last uploaded pg dump
250+
last_dump_date = max(pg_dumps)
251+
252+
_LOGGER.info(f"Last database dump was stored on: {last_dump_date}")
253+
graphdb_last_dump.labels(THOTH_DEPLOYMENT_NAME, last_dump_date).inc()
254+
255+
last_expected_dump_date = datetime.utcnow().date() - timedelta(days=GRAPH_BACKUP_CHECK_DATE)
256+
257+
_LOGGER.info(f"Last expected database dump date is: {last_expected_dump_date}")
258+
259+
if last_dump_date < last_expected_dump_date:
260+
graphdb_dump_missed.labels(THOTH_DEPLOYMENT_NAME).set(1)
261+
else:
262+
graphdb_dump_missed.labels(THOTH_DEPLOYMENT_NAME).set(0)
263+
264+
195265
@click.command()
196266
@click.option(
197267
"--task", "-t", type=click.Choice([entity.value for entity in TaskEnum], case_sensitive=False), required=False
@@ -202,14 +272,17 @@ def main(task):
202272

203273
_create_common_metrics()
204274

205-
graph = GraphDatabase()
206-
graph.connect()
207-
208275
if task:
209276
_LOGGER.info(f"{task} task starting...")
210277
else:
211278
_LOGGER.info("No specific task selected, all tasks will be run...")
212279

280+
graph = GraphDatabase()
281+
graph.connect()
282+
283+
adapter = GraphBackupStore()
284+
adapter.connect()
285+
213286
if task == TaskEnum.CORRUPTION_CHECK.value or not task:
214287
_graph_corruption_check(graph=graph)
215288

@@ -219,6 +292,9 @@ def main(task):
219292
if task == TaskEnum.INDEX_BLOAT_DATA.value or not task:
220293
_graph_index_bloat_data(graph=graph)
221294

295+
if task == TaskEnum.DATABASE_DUMPS.value or not task:
296+
_graph_database_dumps(adapter=adapter)
297+
222298
_send_metrics()
223299
_LOGGER.info("Graph metrics exporter finished.")
224300

0 commit comments

Comments
 (0)