Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Publish latency and failure metrics for cluster state applier thread #563

Merged
merged 6 commits into from
Mar 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ public enum MetricName {
SHARD_STATS,
MASTER_PENDING,
MOUNTED_PARTITION_METRICS,
CLUSTER_APPLIER_SERVICE,
ADMISSION_CONTROL_METRICS,
SHARD_INDEXING_PRESSURE
SHARD_INDEXING_PRESSURE,
}

// we don't store node details as a metric on reader side database. We
Expand Down Expand Up @@ -823,6 +824,27 @@ public static class Constants {
}
}

public enum ClusterApplierServiceStatsValue implements MetricValue {
CLUSTER_APPLIER_SERVICE_LATENCY(ClusterApplierServiceStatsValue.Constants.CLUSTER_APPLIER_SERVICE_LATENCY),
CLUSTER_APPLIER_SERVICE_FAILURE(ClusterApplierServiceStatsValue.Constants.CLUSTER_APPLIER_SERVICE_FAILURE);

private final String value;

ClusterApplierServiceStatsValue(String value) {
this.value = value;
}

@Override
public String toString() {
return value;
}

public static class Constants {
public static final String CLUSTER_APPLIER_SERVICE_LATENCY = "ClusterApplierService_Latency";
public static final String CLUSTER_APPLIER_SERVICE_FAILURE = "ClusterApplierService_Failure";
}
}

public enum MasterThrottlingValue implements MetricValue {
/**
* Sum of total pending tasks throttled by master node.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class PerformanceAnalyzerMetrics {
public static final String sShardQueryPath = "shardquery";
public static final String sMasterTaskPath = "master_task";
public static final String sFaultDetection = "fault_detection";
public static final String sClusterApplierService = "cluster_applier_service";
public static final String sHttpPath = "http";
public static final String sOSPath = "os_metrics";
public static final String sHeapPath = "heap_metrics";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,16 @@ public class MetricsModel {
new MetricAttributes(
MetricUnits.COUNT.toString(), AllMetrics.ShardStateDimension.values()));

allMetricsInitializer.put(
AllMetrics.ClusterApplierServiceStatsValue.CLUSTER_APPLIER_SERVICE_LATENCY.toString(),
new MetricAttributes(
MetricUnits.MILLISECOND.toString(), EmptyDimension.values()));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need to include failure metric as well in here?

allMetricsInitializer.put(
AllMetrics.ClusterApplierServiceStatsValue.CLUSTER_APPLIER_SERVICE_FAILURE.toString(),
new MetricAttributes(
MetricUnits.COUNT.toString(), EmptyDimension.values()));

allMetricsInitializer.put(
AdmissionControlValue.REJECTION_COUNT.toString(),
new MetricAttributes(MetricUnits.COUNT.toString(), AdmissionControlDimension.values())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric;

public class ClusterApplierService_Failure extends Metric {
public ClusterApplierService_Failure(long evaluationIntervalSeconds) {
super(AllMetrics.ClusterApplierServiceStatsValue.CLUSTER_APPLIER_SERVICE_FAILURE.name(), evaluationIntervalSeconds);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.metrics;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric;

public class ClusterApplierService_Latency extends Metric {
public ClusterApplierService_Latency(long evaluationIntervalSeconds) {
super(AllMetrics.ClusterApplierServiceStatsValue.CLUSTER_APPLIER_SERVICE_LATENCY.name(), evaluationIntervalSeconds);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ public enum ExceptionsAndErrors implements MeasurementSet {

FAULT_DETECTION_COLLECTOR_ERROR("FaultDetectionMetricsCollector"),

CLUSTER_APPLIER_SERVICE_STATS_COLLECTOR_ERROR("ClusterApplierServiceStatsCollector"),

SHARD_INDEXING_PRESSURE_COLLECTOR_ERROR("ShardIndexingPressureMetricsCollector");

/** What we want to appear as the metric name. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ public enum WriterMetrics implements MeasurementSet {
Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)),

FAULT_DETECTION_COLLECTOR_EXECUTION_TIME("FaultDetectionCollectorExecutionTime", "millis", Arrays.asList(
Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)),
Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)),

CLUSTER_APPLIER_SERVICE_STATS_COLLECTOR_EXECUTION_TIME("ClusterApplierServiceStatsCollectorExecutionTime",
"millis", Arrays.asList(Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT,
Statistics.SUM)),

SHARD_INDEXING_PRESSURE_COLLECTOR_EXECUTION_TIME("ShardIndexingPressureCollectorExecutionTime", "millis", Arrays.asList(
Statistics.MAX, Statistics.MIN, Statistics.MEAN, Statistics.COUNT, Statistics.SUM)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ private MetricPropertiesConfig() {
metricPathMap.put(MetricName.MASTER_PENDING, PerformanceAnalyzerMetrics.sPendingTasksPath);
metricPathMap.put(MetricName.MOUNTED_PARTITION_METRICS,
PerformanceAnalyzerMetrics.sMountedPartitionMetricsPath);
metricPathMap.put(MetricName.CLUSTER_APPLIER_SERVICE, PerformanceAnalyzerMetrics.sClusterApplierService);
metricPathMap.put(MetricName.ADMISSION_CONTROL_METRICS, PerformanceAnalyzerMetrics.sAdmissionControlMetricsPath);
metricPathMap.put(MetricName.SHARD_INDEXING_PRESSURE, PerformanceAnalyzerMetrics.sShardIndexingPressurePath);

Expand All @@ -187,6 +188,8 @@ private MetricPropertiesConfig() {
PerformanceAnalyzerMetrics.sPendingTasksPath, MetricName.MASTER_PENDING);
eventKeyToMetricNameMap.put(PerformanceAnalyzerMetrics.sMountedPartitionMetricsPath,
MetricName.MOUNTED_PARTITION_METRICS);
eventKeyToMetricNameMap.put(PerformanceAnalyzerMetrics.sClusterApplierService,
MetricName.CLUSTER_APPLIER_SERVICE);
eventKeyToMetricNameMap.put(PerformanceAnalyzerMetrics.sAdmissionControlMetricsPath, MetricName.ADMISSION_CONTROL_METRICS);
eventKeyToMetricNameMap.put(PerformanceAnalyzerMetrics.sShardIndexingPressurePath, MetricName.SHARD_INDEXING_PRESSURE);

Expand Down Expand Up @@ -256,6 +259,13 @@ private MetricPropertiesConfig() {
DevicePartitionValue.values(),
createFileHandler(metricPathMap.get(MetricName.MOUNTED_PARTITION_METRICS))
));
metricName2Property.put(
MetricName.CLUSTER_APPLIER_SERVICE,
new MetricProperties(
MetricProperties.EMPTY_DIMENSION,
AllMetrics.ClusterApplierServiceStatsValue.values(),
createFileHandler(
metricPathMap.get(MetricName.CLUSTER_APPLIER_SERVICE))));
metricName2Property.put(MetricName.ADMISSION_CONTROL_METRICS,
new MetricProperties(
AdmissionControlDimension.values(),
Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/reader/1566413960000
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ $
^master_throttling_metrics
{"current_time":1566413936555}
{"Data_RetryingPendingTasksCount":0,"Master_ThrottledPendingTasksCount":33}
^cluster_applier_service
{"current_time":1566413936555}
{"ClusterApplierService_Latency":23,"ClusterApplierService_Failure":3}$
^shard_state_metrics
{"current_time":1566413936488}
{"IndexName":"pmc"}
Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/reader/1566413965000
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ $
^master_throttling_metrics
{"current_time":1566413966544}
{"Data_RetryingPendingTasksCount":0,"Master_ThrottledPendingTasksCount":33}
^cluster_applier_service
{"current_time":1566413966544}
{"ClusterApplierService_Latency":23,"ClusterApplierService_Failure":3}$
^shard_state_metrics
{"current_time":1566413966493}
{"IndexName":"pmc"}
Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/reader/1566413970000
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ $
^master_throttling_metrics
{"current_time":1566413996947}
{"Data_RetryingPendingTasksCount":0,"Master_ThrottledPendingTasksCount":33}
^cluster_applier_service
{"current_time":1566413996947}
{"ClusterApplierService_Latency":23,"ClusterApplierService_Failure":3}$
^shard_state_metrics
{"current_time":1566413996664}
{"IndexName":"pmc"}
Expand Down