Skip to content

Commit

Permalink
Merge pull request #510 from oasisprotocol/ptrus/feature/oasis-health…
Browse files Browse the repository at this point in the history
…check

Healthcheck query publickey
  • Loading branch information
ptrus authored Jan 22, 2024
2 parents 1211f06 + 4fe0847 commit fce4470
Show file tree
Hide file tree
Showing 14 changed files with 131 additions and 51 deletions.
4 changes: 2 additions & 2 deletions gas/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import (
)

var (
metricNodeMinPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_gas_orcale_node_min_price", Help: "Min gas price periodically queried from the node."})
metricComputedPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_gas_oracle_computed_price", Help: "Computed recommended gas price based on recent full blocks. -1 if none (no recent full blocks)."})
metricNodeMinPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_gas_orcale_node_min_price", Help: "Min gas price periodically queried from the node."})
metricComputedPrice = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_gas_oracle_computed_price", Help: "Computed recommended gas price based on recent full blocks. -1 if none (no recent full blocks)."})
)

// Backend is the gas price oracle backend.
Expand Down
6 changes: 3 additions & 3 deletions indexer/backend_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,21 @@ const periodicMetricsInterval = 60 * time.Second
var (
metricCacheHits = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_cache_hits",
Name: "oasis_web3_gateway_cache_hits",
Help: "Number of cache hits.",
},
[]string{"cache"},
)
metricCacheMisses = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_cache_misses",
Name: "oasis_web3_gateway_cache_misses",
Help: "Number of cache misses.",
},
[]string{"cache"},
)
metricCacheHitRatio = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_cache_hit_ratio",
Name: "oasis_web3_gateway_cache_hit_ratio",
Help: "Percent of Hits over all accesses (Hits + Misses).",
},
[]string{"cache"},
Expand Down
6 changes: 3 additions & 3 deletions indexer/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ const (
)

var (
metricBlockIndexed = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_block_indexed", Help: "Indexed block heights."})
metricBlockPruned = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_block_pruned", Help: "Pruned block heights."})
metricHealthy = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_health", Help: "1 if gateway healthcheck is reporting as healthy, 0 otherwise."})
metricBlockIndexed = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_block_indexed", Help: "Indexed block heights."})
metricBlockPruned = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_block_pruned", Help: "Pruned block heights."})
metricHealthy = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_indexer_health", Help: "1 if gateway indexer healthcheck is reporting as healthy, 0 otherwise."})
)

// ErrNotHealthy is the error returned if the gateway is unhealthy.
Expand Down
6 changes: 4 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,10 @@ func runRoot() error {
}
}

w3.RegisterAPIs(rpc.GetRPCAPIs(ctx, rc, archiveClient, backend, gasPriceOracle, cfg.Gateway, es))
w3.RegisterHealthChecks([]server.HealthCheck{indx})
apis, checks := rpc.GetRPCAPIs(ctx, rc, archiveClient, backend, gasPriceOracle, cfg.Gateway, es)
w3.RegisterAPIs(apis)
checks = append(checks, indx)
w3.RegisterHealthChecks(checks)

svr := server.Server{
Config: cfg,
Expand Down
43 changes: 20 additions & 23 deletions rpc/apis.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,74 +20,71 @@ import (
"github.com/oasisprotocol/oasis-web3-gateway/rpc/oasis"
"github.com/oasisprotocol/oasis-web3-gateway/rpc/txpool"
"github.com/oasisprotocol/oasis-web3-gateway/rpc/web3"
"github.com/oasisprotocol/oasis-web3-gateway/server"
)

// GetRPCAPIs returns the list of all APIs.
// GetRPCAPIs returns the list of enabled RPC APIs and accompanying health checks.
func GetRPCAPIs(
_ context.Context,
ctx context.Context,
client client.RuntimeClient,
archiveClient *archive.Client,
backend indexer.Backend,
gasPriceOracle gas.Backend,
config *conf.GatewayConfig,
eventSystem *eventFilters.EventSystem,
) []ethRpc.API {
) ([]ethRpc.API, []server.HealthCheck) {
var apis []ethRpc.API
var healthChecks []server.HealthCheck

// Web3 JSON-RPC Spec APIs - always enabled.
web3Service := web3.NewPublicAPI()
ethService := eth.NewPublicAPI(client, archiveClient, logging.GetLogger("eth_rpc"), config.ChainID, backend, gasPriceOracle, config.MethodLimits)
netService := net.NewPublicAPI(config.ChainID)
txpoolService := txpool.NewPublicAPI()
filtersService := filters.NewPublicAPI(client, logging.GetLogger("eth_filters"), backend, eventSystem)
oasisService := oasis.NewPublicAPI(client, logging.GetLogger("oasis"))

if config.Monitoring.Enabled() {
web3Service = web3.NewMetricsWrapper(web3Service)
netService = net.NewMetricsWrapper(netService)
ethService = ethmetrics.NewMetricsWrapper(ethService, logging.GetLogger("eth_rpc_metrics"), backend)
txpoolService = txpool.NewMetricsWrapper(txpoolService)
filtersService = filters.NewMetricsWrapper(filtersService)
oasisService = oasis.NewMetricsWrapper(oasisService)
}

apis = append(apis,
ethRpc.API{
Namespace: "web3",
Version: "1.0",
Service: web3Service,
Public: true,
},
ethRpc.API{
Namespace: "net",
Version: "1.0",
Service: netService,
Public: true,
},
ethRpc.API{
Namespace: "eth",
Version: "1.0",
Service: ethService,
Public: true,
},
ethRpc.API{
Namespace: "txpool",
Version: "1.0",
Service: txpoolService,
Public: true,
},
ethRpc.API{
Namespace: "eth",
Version: "1.0",
Service: filtersService,
Public: true,
},
ethRpc.API{
)

// Configure oasis_ APIs if enabled.
if config.ExposeOasisRPCs {
oasisService, oasisHealth := oasis.NewPublicAPI(ctx, client, logging.GetLogger("oasis"))
if config.Monitoring.Enabled() {
oasisService = oasis.NewMetricsWrapper(oasisService)
}

apis = append(apis, ethRpc.API{
Namespace: "oasis",
Version: "1.0",
Service: oasisService,
Public: config.ExposeOasisRPCs,
},
)
})
healthChecks = append(healthChecks, oasisHealth)
}

return apis
return apis, healthChecks
}
4 changes: 2 additions & 2 deletions rpc/eth/filters/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
var (
durations = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "oasis_oasis_web3_gateway_subscription_seconds",
Name: "oasis_web3_gateway_subscription_seconds",
// Buckets ranging from 1 second to 24 hours.
Buckets: []float64{1, 10, 30, 60, 600, 1800, 3600, 7200, 21600, 86400},
Help: "Histogram for the eth subscription API subscriptions duration.",
Expand All @@ -23,7 +23,7 @@ var (
)
inflightSubs = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_oasis_web3_gateway_subscription_inflight",
Name: "oasis_web3_gateway_subscription_inflight",
Help: "Number of concurrent eth inflight subscriptions.",
},
[]string{"method_name"},
Expand Down
2 changes: 1 addition & 1 deletion rpc/eth/metrics/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

var requestHeights = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "oasis_oasis_web3_gateway_api_request_heights",
Name: "oasis_web3_gateway_api_request_heights",
Buckets: []float64{0, 1, 2, 3, 5, 10, 50, 100, 500, 1000},
Help: "Histogram of eth API request heights (difference from the latest height).",
},
Expand Down
10 changes: 5 additions & 5 deletions rpc/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ import (
)

var (
durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_oasis_web3_gateway_api_seconds", Buckets: []float64{0.00001, 0.0001, .001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the eth API requests duration."}, []string{"method_name"})
requests = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_oasis_web3_gateway_api_request", Help: "Counter for API requests."}, []string{"method_name"})
failures = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_oasis_web3_gateway_api_failure", Help: "Counter for API request failures."}, []string{"method_name"})
successes = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_oasis_web3_gateway_api_success", Help: "Counter for API successful requests."}, []string{"method_name"})
inflight = promauto.NewGaugeVec(prometheus.GaugeOpts{Name: "oasis_oasis_web3_gateway_api_inflight", Help: "Number of inflight API request."}, []string{"method_name"})
durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_web3_gateway_api_seconds", Buckets: []float64{0.00001, 0.0001, .001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the eth API requests duration."}, []string{"method_name"})
requests = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_web3_gateway_api_request", Help: "Counter for API requests."}, []string{"method_name"})
failures = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_web3_gateway_api_failure", Help: "Counter for API request failures."}, []string{"method_name"})
successes = promauto.NewCounterVec(prometheus.CounterOpts{Name: "oasis_web3_gateway_api_success", Help: "Counter for API successful requests."}, []string{"method_name"})
inflight = promauto.NewGaugeVec(prometheus.GaugeOpts{Name: "oasis_web3_gateway_api_inflight", Help: "Number of inflight API request."}, []string{"method_name"})
)

// GetAPIMethodMetrics returns the method metrics for the specified API call.
Expand Down
17 changes: 11 additions & 6 deletions rpc/oasis/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ import (
"github.com/oasisprotocol/oasis-core/go/common/logging"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/client"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/modules/core"
"github.com/oasisprotocol/oasis-web3-gateway/server"
)

var ErrInternalError = errors.New("internal error")

// API is the net_ prefixed set of APIs in the Web3 JSON-RPC spec.
// API is the oasis_ prefixed set of APIs.
type API interface {
// CallDataPublicKey returns the calldata public key for the runtime with the provided ID.
CallDataPublicKey(ctx context.Context) (*CallDataPublicKey, error)
Expand All @@ -35,19 +36,23 @@ type CallDataPublicKey struct {

type publicAPI struct {
client client.RuntimeClient
Logger *logging.Logger
logger *logging.Logger
}

// NewPublicAPI creates an instance of the Web3 API.
// NewPublicAPI creates an instance of the Web3 API and accompanying health check.
func NewPublicAPI(
ctx context.Context,
client client.RuntimeClient,
logger *logging.Logger,
) API {
return &publicAPI{client: client, Logger: logger}
) (API, server.HealthCheck) {
health := &healthChecker{ctx: ctx, client: client, logger: logger}
go health.run()

return &publicAPI{client: client, logger: logger}, health
}

func (api *publicAPI) CallDataPublicKey(ctx context.Context) (*CallDataPublicKey, error) {
logger := api.Logger.With("method", "oasis_callDataPublicKey")
logger := api.logger.With("method", "oasis_callDataPublicKey")
res, err := core.NewV1(api.client).CallDataPublicKey(ctx)
if err != nil {
logger.Error("failed to fetch public key", "err", err)
Expand Down
68 changes: 68 additions & 0 deletions rpc/oasis/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package oasis

import (
"context"
"fmt"
"sync/atomic"
"time"

"github.com/oasisprotocol/oasis-core/go/common/logging"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/client"
"github.com/oasisprotocol/oasis-sdk/client-sdk/go/modules/core"
)

const (
healthCheckInterval = 30 * time.Second
healthIterationTimeout = 15 * time.Second
)

type healthChecker struct {
ctx context.Context
client client.RuntimeClient
logger *logging.Logger

health uint32
}

// Implements server.HealthCheck.
func (h *healthChecker) Health() error {
if atomic.LoadUint32(&h.health) == 0 {
return fmt.Errorf("oasis API not healthy")
}
return nil
}

func (h *healthChecker) updateHealth(healthy bool) {
if healthy {
atomic.StoreUint32(&h.health, 1)
} else {
atomic.StoreUint32(&h.health, 0)
}
}

func (h *healthChecker) run() {
for {
select {
case <-time.After(healthCheckInterval):
func() {
ctx, cancel := context.WithTimeout(h.ctx, healthIterationTimeout)
defer cancel()

// Query public keys.
_, err := core.NewV1(h.client).CallDataPublicKey(ctx)
if err != nil {
h.logger.Error("failed to fetch public key", "err", err)
h.updateHealth(false)
return
}

h.logger.Debug("oasis_ RPC healthy")
h.updateHealth(true)
}()
case <-h.ctx.Done():
h.updateHealth(false)
h.logger.Debug("health checker stopping", "reason", h.ctx.Err())
return
}
}
}
6 changes: 6 additions & 0 deletions server/json_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ import (

"github.com/ethereum/go-ethereum/rpc"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/rs/cors"

"github.com/oasisprotocol/oasis-core/go/common/logging"
)

var metricHealthy = promauto.NewGauge(prometheus.GaugeOpts{Name: "oasis_web3_gateway_health", Help: "1 if gateway healthcheck is reporting as healthy, 0 otherwise."})

// httpConfig is the JSON-RPC/HTTP configuration.
type httpConfig struct {
Modules []string
Expand Down Expand Up @@ -77,9 +81,11 @@ func healthCheckHandler(healthChecks []HealthCheck) func(w http.ResponseWriter,
for _, h := range healthChecks {
if err := h.Health(); err != nil {
w.WriteHeader(http.StatusServiceUnavailable)
metricHealthy.Set(0)
return
}
}
metricHealthy.Set(1)
w.WriteHeader(http.StatusOK)
}
}
Expand Down
2 changes: 1 addition & 1 deletion storage/psql/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
"github.com/oasisprotocol/oasis-web3-gateway/storage"
)

var durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_oasis_web3_gateway_psql_query_seconds", Buckets: []float64{0.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the postgresql query duration."}, []string{"query"})
var durations = promauto.NewHistogramVec(prometheus.HistogramOpts{Name: "oasis_web3_gateway_psql_query_seconds", Buckets: []float64{0.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, Help: "Histogram for the postgresql query duration."}, []string{"query"})

func measureDuration(label string) func() {
timer := prometheus.NewTimer(durations.WithLabelValues(label))
Expand Down
2 changes: 1 addition & 1 deletion tests/rpc/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (

func TestHealthCheck(t *testing.T) {
// Ensure the initial health-check was done.
<-time.After(20 * time.Second)
<-time.After(50 * time.Second)

ctx, cancel := context.WithTimeout(context.Background(), OasisBlockTimeout)
defer cancel()
Expand Down
6 changes: 4 additions & 2 deletions tests/rpc/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,10 @@ func Setup() error {
return fmt.Errorf("setup: failed starting gas price oracle: %w", err)
}

w3.RegisterAPIs(rpc.GetRPCAPIs(context.Background(), rc, nil, backend, gasPriceOracle, tests.TestsConfig.Gateway, es))
w3.RegisterHealthChecks([]server.HealthCheck{indx})
apis, checks := rpc.GetRPCAPIs(ctx, rc, nil, backend, gasPriceOracle, tests.TestsConfig.Gateway, es)
w3.RegisterAPIs(apis)
checks = append(checks, indx)
w3.RegisterHealthChecks(checks)

if err = w3.Start(); err != nil {
w3.Close()
Expand Down

0 comments on commit fce4470

Please sign in to comment.