From 81dcd301b85f30dc3dbf0a82e4f49d1619aff6ce Mon Sep 17 00:00:00 2001 From: Karthik Kumarguru Date: Fri, 16 Oct 2020 18:04:10 -0700 Subject: [PATCH 1/3] Add operational metrics to heap size increase policy --- .../performanceanalyzer/collectors/StatExceptionCode.java | 2 ++ .../deciders/jvm/sizing/HeapSizeIncreasePolicy.java | 3 +++ .../performanceanalyzer/rca/store/rca/OldGenRca.java | 5 +++++ .../rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java | 4 ++++ 4 files changed, 14 insertions(+) diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java index 2485a29f4..3712c1875 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java @@ -45,6 +45,8 @@ public enum StatExceptionCode { RCA_SCHEDULER_THREAD_STOPPED("RcaSchedulerThreadStopped"), JVM_THREAD_ID_NO_LONGER_EXISTS("JVM_THREAD_ID_NO_LONGER_EXISTS"), ES_REQUEST_INTERCEPTOR_ERROR("ES_REQUEST_INTERCEPTOR_ERROR"), + INVALID_OLD_GEN_SIZE("InvalidOldGenSize"), + MISCONFIGURED_OLD_GEN_RCA("MisconfiguredOldGenRca"), OTHER("Other"); private final String value; diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/decisionmaker/deciders/jvm/sizing/HeapSizeIncreasePolicy.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/decisionmaker/deciders/jvm/sizing/HeapSizeIncreasePolicy.java index 74dec6532..7d0713098 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/decisionmaker/deciders/jvm/sizing/HeapSizeIncreasePolicy.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/decisionmaker/deciders/jvm/sizing/HeapSizeIncreasePolicy.java @@ -16,6 +16,7 @@ package com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.deciders.jvm.sizing; import com.amazon.opendistro.elasticsearch.performanceanalyzer.AppContext; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatsCollector; import com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.actions.Action; import com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.actions.HeapSizeIncreaseAction; import com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.deciders.AlarmMonitor; @@ -39,6 +40,7 @@ public class HeapSizeIncreasePolicy implements DecisionPolicy { + private static final String HEAP_SIZE_INCREASE_ACTION_RECOMMENDED = "RecommendHeapSizeIncrease"; private final LargeHeapClusterRca largeHeapClusterRca; private AppContext appContext; private RcaConf rcaConf; @@ -59,6 +61,7 @@ public List evaluate() { if (!heapSizeIncreaseClusterMonitor.isHealthy()) { Action heapSizeIncreaseAction = new HeapSizeIncreaseAction(appContext); if (heapSizeIncreaseAction.isActionable()) { + StatsCollector.instance().logMetric(HEAP_SIZE_INCREASE_ACTION_RECOMMENDED); actions.add(heapSizeIncreaseAction); } } diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java index 296715d9d..dc2f6fdc8 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java @@ -19,6 +19,8 @@ import static com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.GCType.TOT_FULL_GC; import static com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.HeapDimension.MEM_TYPE; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatExceptionCode; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatsCollector; import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.MetricsDB; import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric; import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Rca; @@ -50,6 +52,7 @@ public OldGenRca(long evaluationIntervalSeconds, Metric heapUsed, Metric heapMax protected double getMaxOldGenSizeOrDefault(final double defaultValue) { if (heap_Max == null) { + StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA); throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to " + "take heap_Max as a metric. Please check the analysis graph!"); } @@ -75,6 +78,7 @@ protected double getMaxOldGenSizeOrDefault(final double defaultValue) { protected int getFullGcEventsOrDefault(final double defaultValue) { if (gc_event == null) { + StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA); throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to " + "take gc_event as a metric. Please check the analysis graph!"); } @@ -100,6 +104,7 @@ protected int getFullGcEventsOrDefault(final double defaultValue) { protected double getOldGenUsedOrDefault(final double defaultValue) { if (heap_Used == null) { + StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA); throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to " + "take heap_Used as a metric. Please check the analysis graph!"); } diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java index 9737681d7..7729ee9ca 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java @@ -15,6 +15,8 @@ package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.jvmsizing; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatExceptionCode; +import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatsCollector; import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.configs.HighOldGenOccupancyRcaConfig; import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric; import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Resources.State; @@ -112,6 +114,8 @@ private void addToSlidingWindow() { if (maxOldGen == 0d) { LOG.info("Max Old Gen capacity cannot be 0. Skipping."); + StatsCollector.instance().logException(StatExceptionCode.INVALID_OLD_GEN_SIZE); + return; } this.oldGenUtilizationSlidingWindow.next(new SlidingWindowData(System.currentTimeMillis(), From ad5d1606c8ad66e211c31633f9c150cbad523fc9 Mon Sep 17 00:00:00 2001 From: Karthik Kumarguru Date: Fri, 16 Oct 2020 18:48:15 -0700 Subject: [PATCH 2/3] Added a TODO for the div by zero return case --- .../rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java index 7729ee9ca..1c8ebd24c 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/jvmsizing/HighOldGenOccupancyRca.java @@ -115,6 +115,8 @@ private void addToSlidingWindow() { if (maxOldGen == 0d) { LOG.info("Max Old Gen capacity cannot be 0. Skipping."); StatsCollector.instance().logException(StatExceptionCode.INVALID_OLD_GEN_SIZE); + // TODO: Unit test for this RCA and OldGenReclamation are in the PR where we are checking + // for GC type before evaluating. return; } From 037b5f3232d69e97bf99ba296e6dd26370115213 Mon Sep 17 00:00:00 2001 From: Karthik Kumarguru Date: Tue, 20 Oct 2020 02:04:10 -0700 Subject: [PATCH 3/3] Split metric into three --- .../performanceanalyzer/collectors/StatExceptionCode.java | 4 +++- .../performanceanalyzer/rca/store/rca/OldGenRca.java | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java index 3712c1875..d6da83675 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/collectors/StatExceptionCode.java @@ -46,7 +46,9 @@ public enum StatExceptionCode { JVM_THREAD_ID_NO_LONGER_EXISTS("JVM_THREAD_ID_NO_LONGER_EXISTS"), ES_REQUEST_INTERCEPTOR_ERROR("ES_REQUEST_INTERCEPTOR_ERROR"), INVALID_OLD_GEN_SIZE("InvalidOldGenSize"), - MISCONFIGURED_OLD_GEN_RCA("MisconfiguredOldGenRca"), + MISCONFIGURED_OLD_GEN_RCA_HEAP_MAX_MISSING("MisconfiguredOldGenRcaHeapMaxMissing"), + MISCONFIGURED_OLD_GEN_RCA_HEAP_USED_MISSING("MisconfiguredOldGenRcaHeapUsedMissing"), + MISCONFIGURED_OLD_GEN_RCA_GC_EVENTS_MISSING("MisconfiguredOldGenRcaGcEventsMissing"), OTHER("Other"); private final String value; diff --git a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java index dc2f6fdc8..aa32077ad 100644 --- a/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java +++ b/src/main/java/com/amazon/opendistro/elasticsearch/performanceanalyzer/rca/store/rca/OldGenRca.java @@ -52,7 +52,7 @@ public OldGenRca(long evaluationIntervalSeconds, Metric heapUsed, Metric heapMax protected double getMaxOldGenSizeOrDefault(final double defaultValue) { if (heap_Max == null) { - StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA); + StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA_HEAP_MAX_MISSING); throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to " + "take heap_Max as a metric. Please check the analysis graph!"); } @@ -78,7 +78,7 @@ protected double getMaxOldGenSizeOrDefault(final double defaultValue) { protected int getFullGcEventsOrDefault(final double defaultValue) { if (gc_event == null) { - StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA); + StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA_GC_EVENTS_MISSING); throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to " + "take gc_event as a metric. Please check the analysis graph!"); } @@ -104,7 +104,7 @@ protected int getFullGcEventsOrDefault(final double defaultValue) { protected double getOldGenUsedOrDefault(final double defaultValue) { if (heap_Used == null) { - StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA); + StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA_HEAP_USED_MISSING); throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to " + "take heap_Used as a metric. Please check the analysis graph!"); }