19
19
20
20
import os
21
21
import logging
22
-
23
- import click
22
+ from datetime import datetime , timedelta
24
23
from enum import Enum
25
24
25
+ import click
26
26
from prometheus_client import CollectorRegistry , Gauge , push_to_gateway
27
27
28
28
from thoth .common import init_logging
29
- from thoth .storages import GraphDatabase
29
+ from thoth .storages import GraphDatabase , GraphBackupStore
30
30
from thoth .storages import __version__ as __storages_version__
31
31
from thoth .common import __version__ as __common_version__
32
32
43
43
THOTH_METRICS_PUSHGATEWAY_URL = os .environ ["PROMETHEUS_PUSHGATEWAY_URL" ]
44
44
THOTH_DEPLOYMENT_NAME = os .environ ["THOTH_DEPLOYMENT_NAME" ]
45
45
46
+ GRAPH_BACKUP_CHECK_DATE = int (os .getenv ("THOTH_GRAPH_BACKUP_CHECK_DAYS" , 7 ))
47
+
46
48
47
49
class TaskEnum (Enum ):
48
50
"""Class for the task to be run."""
49
51
50
52
CORRUPTION_CHECK = "graph_corruption_check"
51
53
TABLE_BLOAT_DATA = "graph_table_bloat_data_check"
52
54
INDEX_BLOAT_DATA = "graph_index_bloat_data_check"
55
+ DATABASE_DUMPS = "graph_database_dumps_check"
53
56
54
57
55
58
init_logging ()
@@ -96,6 +99,38 @@ class TaskEnum(Enum):
96
99
registry = PROMETHEUS_REGISTRY ,
97
100
)
98
101
102
+ # Expose number of dumps available
103
+ graphdb_dump_count = Gauge (
104
+ "thoth_graphdb_dump_count" ,
105
+ "Number of pg dumps stored on Ceph." ,
106
+ ["env" ],
107
+ registry = PROMETHEUS_REGISTRY ,
108
+ )
109
+
110
+ # Expose last dump
111
+ graphdb_last_dump = Gauge (
112
+ "thoth_graphdb_last_dump" ,
113
+ "Last dump date stored on Ceph." ,
114
+ ["date" , "env" ],
115
+ registry = PROMETHEUS_REGISTRY ,
116
+ )
117
+
118
+ # Check if dumps are not correctly cleaned
119
+ graphdb_dump_not_cleaned = Gauge (
120
+ "thoth_graphdb_dump_not_cleaned" ,
121
+ "Check if the number of dumps on Ceph is higher than expected." ,
122
+ ["env" ],
123
+ registry = PROMETHEUS_REGISTRY ,
124
+ )
125
+
126
+ # Check if last expected dump is missing
127
+ graphdb_dump_missed = Gauge (
128
+ "thoth_graphdb_dump_missed" ,
129
+ "Check if the last expected dump is missing." ,
130
+ ["env" ],
131
+ registry = PROMETHEUS_REGISTRY ,
132
+ )
133
+
99
134
100
135
def _create_common_metrics ():
101
136
"""Create common metrics to pushgateway."""
@@ -192,6 +227,41 @@ def _graph_index_bloat_data(graph: GraphDatabase):
192
227
_LOGGER .info ("thoth_graphdb_mb_index_bloat_data_table is empty" )
193
228
194
229
230
+ def _graph_database_dumps (adapter : GraphBackupStore ) -> None :
231
+ pg_dumps = []
232
+ for pg_dump in adapter .get_document_listing ():
233
+ pg_dumps .append (
234
+ datetime .strptime (pg_dump [len ("pg_dump-" ) :], GraphBackupStore ._BACKUP_FILE_DATETIME_FORMAT ).date ()
235
+ )
236
+
237
+ pg_dumps_number = len (pg_dumps )
238
+ graphdb_dump_count .labels (THOTH_DEPLOYMENT_NAME ).set (pg_dumps_number )
239
+ _LOGGER .info (f"Number of database dumps available on Ceph is: { pg_dumps_number } " )
240
+
241
+ pg_dumps_expected = GraphBackupStore .GRAPH_BACKUP_STORE_ROTATE
242
+ _LOGGER .info (f"Number of database dumps expected: { pg_dumps_expected } " )
243
+
244
+ if pg_dumps_number > pg_dumps_expected :
245
+ graphdb_dump_not_cleaned .labels (THOTH_DEPLOYMENT_NAME ).set (1 )
246
+ else :
247
+ graphdb_dump_not_cleaned .labels (THOTH_DEPLOYMENT_NAME ).set (0 )
248
+
249
+ # Consider only last uploaded pg dump
250
+ last_dump_date = max (pg_dumps )
251
+
252
+ _LOGGER .info (f"Last database dump was stored on: { last_dump_date } " )
253
+ graphdb_last_dump .labels (THOTH_DEPLOYMENT_NAME , last_dump_date ).inc ()
254
+
255
+ last_expected_dump_date = datetime .utcnow ().date () - timedelta (days = GRAPH_BACKUP_CHECK_DATE )
256
+
257
+ _LOGGER .info (f"Last expected database dump date is: { last_expected_dump_date } " )
258
+
259
+ if last_dump_date < last_expected_dump_date :
260
+ graphdb_dump_missed .labels (THOTH_DEPLOYMENT_NAME ).set (1 )
261
+ else :
262
+ graphdb_dump_missed .labels (THOTH_DEPLOYMENT_NAME ).set (0 )
263
+
264
+
195
265
@click .command ()
196
266
@click .option (
197
267
"--task" , "-t" , type = click .Choice ([entity .value for entity in TaskEnum ], case_sensitive = False ), required = False
@@ -202,14 +272,17 @@ def main(task):
202
272
203
273
_create_common_metrics ()
204
274
205
- graph = GraphDatabase ()
206
- graph .connect ()
207
-
208
275
if task :
209
276
_LOGGER .info (f"{ task } task starting..." )
210
277
else :
211
278
_LOGGER .info ("No specific task selected, all tasks will be run..." )
212
279
280
+ graph = GraphDatabase ()
281
+ graph .connect ()
282
+
283
+ adapter = GraphBackupStore ()
284
+ adapter .connect ()
285
+
213
286
if task == TaskEnum .CORRUPTION_CHECK .value or not task :
214
287
_graph_corruption_check (graph = graph )
215
288
@@ -219,6 +292,9 @@ def main(task):
219
292
if task == TaskEnum .INDEX_BLOAT_DATA .value or not task :
220
293
_graph_index_bloat_data (graph = graph )
221
294
295
+ if task == TaskEnum .DATABASE_DUMPS .value or not task :
296
+ _graph_database_dumps (adapter = adapter )
297
+
222
298
_send_metrics ()
223
299
_LOGGER .info ("Graph metrics exporter finished." )
224
300
0 commit comments