Skip to content

Commit cbc437f

Browse files
committed
Parsing smartctl error code; parsing resulting json for smartctl errors; docker moved to subfolder
1 parent 09cfdec commit cbc437f

10 files changed

+100
-21
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
vendor
22
bin
3+
debug
34
*.json
45

56
Manifest

Makefile

+5
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,8 @@ example:
3131
@echo '```' >> EXAMPLE.md
3232
@curl -s localhost:9633/metrics | grep smartctl >> EXAMPLE.md
3333
@echo '```' >> EXAMPLE.md
34+
35+
collect_fake_json:
36+
-mkdir debug
37+
-rm -f debug/*json
38+
sudo ./collect_fake_json.sh

collect_fake_json.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
for device in $(smartctl --scan | awk '{ print $1}')
44
do
5-
smartctl --json --xall $device | jq > $(basename $device).json
5+
smartctl --json --xall $device | jq > debug/$(basename $device).json
66
done

Dockerfile docker/Dockerfile

File renamed without changes.

config.yaml docker/config.yaml

File renamed without changes.
File renamed without changes.

main.go

+7-4
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,13 @@ func (i SMARTctlManagerCollector) Describe(ch chan<- *prometheus.Desc) {
2626
func (i SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) {
2727
info := NewSMARTctlInfo(ch)
2828
for _, device := range options.SMARTctl.Devices {
29-
json := readData(device)
30-
info.SetJSON(json)
31-
smart := NewSMARTctl(json, ch)
32-
smart.Collect()
29+
if json, err := readData(device); err == nil {
30+
info.SetJSON(json)
31+
smart := NewSMARTctl(json, ch)
32+
smart.Collect()
33+
} else {
34+
logger.Error(err.Error())
35+
}
3336
}
3437
info.Collect()
3538
}

readjson.go

+78-14
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package main
33
import (
44
"fmt"
55
"io/ioutil"
6+
"os"
67
"os/exec"
78
"strings"
89
"time"
@@ -35,7 +36,7 @@ func parseJSON(data string) gjson.Result {
3536
// Reading fake smartctl json
3637
func readFakeSMARTctl(device string) gjson.Result {
3738
splitted := strings.Split(device, "/")
38-
filename := fmt.Sprintf("%s.json", splitted[len(splitted)-1])
39+
filename := fmt.Sprintf("debug/%s.json", splitted[len(splitted)-1])
3940
logger.Verbose("Read fake S.M.A.R.T. data from json: %s", filename)
4041
jsonFile, err := ioutil.ReadFile(filename)
4142
if err != nil {
@@ -46,30 +47,93 @@ func readFakeSMARTctl(device string) gjson.Result {
4647
}
4748

4849
// Get json from smartctl and parse it
49-
func readSMARTctl(device string) gjson.Result {
50+
func readSMARTctl(device string) (gjson.Result, bool) {
5051
logger.Debug("Collecting S.M.A.R.T. counters, device: %s", device)
5152
out, err := exec.Command(options.SMARTctl.SMARTctlLocation, "--json", "--xall", device).Output()
5253
if err != nil {
5354
logger.Warning("S.M.A.R.T. output reading error: %s", err)
5455
}
55-
return parseJSON(string(out))
56+
json := parseJSON(string(out))
57+
rcOk := resultCodeIsOk(json.Get("smartctl.exit_status").Int())
58+
jsonOk := jsonIsOk(json)
59+
return json, rcOk && jsonOk
5660
}
5761

5862
// Select json source and parse
59-
func readData(device string) gjson.Result {
63+
func readData(device string) (gjson.Result, error) {
6064
if options.SMARTctl.FakeJSON {
61-
return readFakeSMARTctl(device)
65+
return readFakeSMARTctl(device), nil
6266
}
6367

64-
if value, ok := jsonCache[device]; ok {
65-
// logger.Debug("Cache exists")
66-
if time.Now().After(value.LastCollect.Add(options.SMARTctl.CollectPeriodDuration)) {
67-
// logger.Debug("Cache update")
68-
jsonCache[device] = JSONCache{JSON: readSMARTctl(device), LastCollect: time.Now()}
68+
if _, err := os.Stat(device); err == nil {
69+
cacheValue, cacheOk := jsonCache[device]
70+
timeToScan := false
71+
if cacheOk {
72+
timeToScan = time.Now().After(cacheValue.LastCollect.Add(options.SMARTctl.CollectPeriodDuration))
73+
} else {
74+
timeToScan = true
75+
}
76+
77+
if timeToScan {
78+
json, ok := readSMARTctl(device)
79+
if ok {
80+
jsonCache[device] = JSONCache{JSON: json, LastCollect: time.Now()}
81+
return jsonCache[device].JSON, nil
82+
}
83+
return gjson.Parse("{}"), fmt.Errorf("smartctl returned bad data for device %s", device)
84+
}
85+
return gjson.Parse("{}"), fmt.Errorf("Too early collect called for device %s", device)
86+
}
87+
return gjson.Parse("{}"), fmt.Errorf("Device %s unavialable", device)
88+
}
89+
90+
// Parse smartctl return code
91+
func resultCodeIsOk(SMARTCtlResult int64) bool {
92+
result := true
93+
if SMARTCtlResult > 0 {
94+
bits := fmt.Sprintf("%08b", SMARTCtlResult)
95+
// logger.Debug("Return code: %d: %s", SMARTCtlResult, bits)
96+
if bits[0] == '1' {
97+
logger.Error("Command line did not parse.")
98+
result = false
99+
}
100+
if bits[1] == '1' {
101+
logger.Error("Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode")
102+
result = false
103+
}
104+
if bits[2] == '1' {
105+
logger.Warning("Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure")
106+
}
107+
if bits[3] == '1' {
108+
logger.Warning("SMART status check returned 'DISK FAILING'.")
109+
}
110+
if bits[4] == '1' {
111+
logger.Warning("We found prefail Attributes <= threshold.")
112+
}
113+
if bits[5] == '1' {
114+
logger.Warning("SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past.")
115+
}
116+
if bits[6] == '1' {
117+
logger.Warning("The device error log contains records of errors.")
118+
}
119+
if bits[7] == '1' {
120+
logger.Warning("The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored.")
121+
}
122+
}
123+
return result
124+
}
125+
126+
// Check json
127+
func jsonIsOk(json gjson.Result) bool {
128+
messages := json.Get("smartctl.messages")
129+
// logger.Debug(messages.String())
130+
if messages.Exists() {
131+
for _, message := range messages.Array() {
132+
if message.Get("severity").String() == "error" {
133+
logger.Error(message.Get("string").String())
134+
return false
135+
}
69136
}
70-
} else {
71-
// logger.Debug("Cache not exists")
72-
jsonCache[device] = JSONCache{JSON: readSMARTctl(device), LastCollect: time.Now()}
73137
}
74-
return jsonCache[device].JSON
138+
return true
75139
}

smartctl.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ func NewSMARTctl(json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
3434
family: strings.TrimSpace(smart.json.Get("model_family").String()),
3535
model: strings.TrimSpace(smart.json.Get("model_name").String()),
3636
}
37-
logger.Verbose("Collecting metrics from %s: %s, %s", smart.device.device, smart.device.family, smart.device.model)
3837
return smart
3938
}
4039

4140
// Collect metrics
4241
func (smart *SMARTctl) Collect() {
42+
logger.Verbose("Collecting metrics from %s: %s, %s", smart.device.device, smart.device.family, smart.device.model)
4343
smart.mineExitStatus()
4444
smart.mineDevice()
4545
smart.mineCapacity()

smartctl_exporter.yaml

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
smartctl_exporter:
22
bind_to: "[::1]:9633"
33
url_path: "/metrics"
4-
fake_json: yes
4+
fake_json: no
55
smartctl_location: /usr/sbin/smartctl
66
collect_not_more_than_period: 20s
77
devices:
@@ -11,3 +11,9 @@ smartctl_exporter:
1111
- /dev/sdd
1212
- /dev/sde
1313
- /dev/sdf
14+
- /dev/sdg
15+
- /dev/sdh
16+
- /dev/sdi
17+
- /dev/sdj
18+
- /dev/sdk
19+
- /dev/sdl

0 commit comments

Comments
 (0)