Skip to content

Commit 3a012b5

Browse files
mortzxzharmlesszxz
mort
authored andcommitted
Implemented new features - extract raid member disk name.
Modified smartctl.device param - now you can set it as sda, megaraid_disk_01, etc. Signed-off-by: Denys <[email protected]>
1 parent 84d8cc3 commit 3a012b5

File tree

4 files changed

+69
-30
lines changed

4 files changed

+69
-30
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
/.release
44
/.tarballs
55
debug/
6+
.idea/
67

78
Manifest
89
smartctl_exporter

main.go

+39-12
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package main
1616
import (
1717
"net/http"
1818
"os"
19+
"strings"
1920
"sync"
2021
"time"
2122

@@ -32,11 +33,18 @@ import (
3233
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
3334
)
3435

36+
// Device
37+
type Device struct {
38+
Name string `json:"name"`
39+
Info_Name string `json:"info_name"`
40+
Type string `json:"type"`
41+
}
42+
3543
// SMARTctlManagerCollector implements the Collector interface.
3644
type SMARTctlManagerCollector struct {
3745
CollectPeriod string
3846
CollectPeriodDuration time.Duration
39-
Devices []string
47+
Devices []Device
4048

4149
logger log.Logger
4250
mutex sync.Mutex
@@ -106,24 +114,43 @@ var (
106114
)
107115

108116
// scanDevices uses smartctl to gather the list of available devices.
109-
func scanDevices(logger log.Logger) []string {
117+
func scanDevices(logger log.Logger) []Device {
110118
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)
111119

112120
json := readSMARTctlDevices(logger)
113121
scanDevices := json.Get("devices").Array()
114-
var scanDeviceResult []string
122+
var scanDeviceResult []Device
115123
for _, d := range scanDevices {
116-
deviceName := d.Get("name").String()
124+
deviceName := extractDiskName(strings.TrimSpace(d.Get("info_name").String()))
117125
if filter.ignored(deviceName) {
118126
level.Info(logger).Log("msg", "Ignoring device", "name", deviceName)
119127
} else {
120128
level.Info(logger).Log("msg", "Found device", "name", deviceName)
121-
scanDeviceResult = append(scanDeviceResult, deviceName)
129+
device := Device{
130+
Name: d.Get("name").String(),
131+
Info_Name: deviceName,
132+
Type: d.Get("type").String(),
133+
}
134+
scanDeviceResult = append(scanDeviceResult, device)
122135
}
123136
}
124137
return scanDeviceResult
125138
}
126139

140+
func filterDevices(logger log.Logger, devices []Device, filters []string) []Device {
141+
var filtered []Device
142+
for _, d := range devices {
143+
for _, filter := range filters {
144+
level.Debug(logger).Log("msg", "filterDevices", "device", d.Info_Name, "filter", filter)
145+
if strings.Contains(d.Info_Name, filter) {
146+
filtered = append(filtered, d)
147+
break
148+
}
149+
}
150+
}
151+
return filtered
152+
}
153+
127154
func main() {
128155
metricsPath := kingpin.Flag(
129156
"web.telemetry-path", "Path under which to expose metrics",
@@ -140,21 +167,21 @@ func main() {
140167
level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info())
141168
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())
142169

143-
var devices []string
170+
var devices []Device
171+
devices = scanDevices(logger)
172+
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
144173
if len(*smartctlDevices) > 0 {
145-
devices = *smartctlDevices
146-
} else {
147-
level.Info(logger).Log("msg", "No devices specified, trying to load them automatically")
148-
devices = scanDevices(logger)
149-
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
174+
level.Info(logger).Log("msg", "Devices specified", "devices", strings.Join(*smartctlDevices, ", "))
175+
devices = filterDevices(logger, devices, *smartctlDevices)
176+
level.Info(logger).Log("msg", "Devices filtered", "count", len(devices))
150177
}
151178

152179
collector := SMARTctlManagerCollector{
153180
Devices: devices,
154181
logger: logger,
155182
}
156183

157-
if *smartctlRescanInterval >= 1*time.Second && len(*smartctlDevices) == 0 {
184+
if *smartctlRescanInterval >= 1*time.Second {
158185
level.Info(logger).Log("msg", "Start background scan process")
159186
level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
160187
go collector.RescanForDevices()

readjson.go

+17-17
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ func parseJSON(data string) gjson.Result {
4949
}
5050

5151
// Reading fake smartctl json
52-
func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
53-
s := strings.Split(device, "/")
52+
func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
53+
s := strings.Split(device.Name, "/")
5454
filename := fmt.Sprintf("debug/%s.json", s[len(s)-1])
5555
level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename)
5656
jsonFile, err := os.ReadFile(filename)
@@ -62,16 +62,16 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
6262
}
6363

6464
// Get json from smartctl and parse it
65-
func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) {
65+
func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
6666
start := time.Now()
67-
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output()
67+
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device.Name, "-d", device.Type).Output()
6868
if err != nil {
69-
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device)
69+
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device.Info_Name)
7070
}
7171
json := parseJSON(string(out))
7272
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
7373
jsonOk := jsonIsOk(logger, json)
74-
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start))
74+
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
7575
return json, rcOk && jsonOk
7676
}
7777

@@ -90,7 +90,7 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
9090
}
9191

9292
// Select json source and parse
93-
func readData(logger log.Logger, device string) gjson.Result {
93+
func readData(logger log.Logger, device Device) gjson.Result {
9494
if *smartctlFakeData {
9595
return readFakeSMARTctl(logger, device)
9696
}
@@ -102,7 +102,7 @@ func readData(logger log.Logger, device string) gjson.Result {
102102
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
103103
j, found := jsonCache.Load(device)
104104
if !found {
105-
level.Warn(logger).Log("msg", "device not found", "device", device)
105+
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
106106
}
107107
return j.(JSONCache).JSON
108108
}
@@ -112,35 +112,35 @@ func readData(logger log.Logger, device string) gjson.Result {
112112
}
113113

114114
// Parse smartctl return code
115-
func resultCodeIsOk(logger log.Logger, device string, SMARTCtlResult int64) bool {
115+
func resultCodeIsOk(logger log.Logger, device Device, SMARTCtlResult int64) bool {
116116
result := true
117117
if SMARTCtlResult > 0 {
118118
b := SMARTCtlResult
119119
if (b & 1) != 0 {
120-
level.Error(logger).Log("msg", "Command line did not parse", "device", device)
120+
level.Error(logger).Log("msg", "Command line did not parse", "device", device.Info_Name)
121121
result = false
122122
}
123123
if (b & (1 << 1)) != 0 {
124-
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device)
124+
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Info_Name)
125125
result = false
126126
}
127127
if (b & (1 << 2)) != 0 {
128-
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device)
128+
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Info_Name)
129129
}
130130
if (b & (1 << 3)) != 0 {
131-
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device)
131+
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device.Info_Name)
132132
}
133133
if (b & (1 << 4)) != 0 {
134-
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device)
134+
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device.Info_Name)
135135
}
136136
if (b & (1 << 5)) != 0 {
137-
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device)
137+
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Info_Name)
138138
}
139139
if (b & (1 << 6)) != 0 {
140-
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device)
140+
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device.Info_Name)
141141
}
142142
if (b & (1 << 7)) != 0 {
143-
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device)
143+
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Info_Name)
144144
}
145145
}
146146
return result

smartctl.go

+12-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package main
1515

1616
import (
1717
"fmt"
18+
"regexp"
1819
"strings"
1920

2021
"github.com/go-kit/log"
@@ -42,6 +43,16 @@ type SMARTctl struct {
4243
device SMARTDevice
4344
}
4445

46+
func extractDiskName(input string) string {
47+
re := regexp.MustCompile(`^(?:/dev/\S+/\S+\s\[|/dev/|\[)(?:\s\[|)(?P<disk>[a-z0-9_]+)(?:\].*|)$`)
48+
match := re.FindStringSubmatch(input)
49+
50+
if len(match) > 0 {
51+
return match[re.SubexpIndex("disk")]
52+
}
53+
return ""
54+
}
55+
4556
// NewSMARTctl is smartctl constructor
4657
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
4758
var model_name string
@@ -60,7 +71,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
6071
json: json,
6172
logger: logger,
6273
device: SMARTDevice{
63-
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
74+
device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())),
6475
serial: strings.TrimSpace(json.Get("serial_number").String()),
6576
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
6677
model: strings.TrimSpace(model_name),

0 commit comments

Comments
 (0)