Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Operational metrics for 128GB heap decider components #478

Merged
merged 3 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public enum StatExceptionCode {
RCA_SCHEDULER_THREAD_STOPPED("RcaSchedulerThreadStopped"),
JVM_THREAD_ID_NO_LONGER_EXISTS("JVM_THREAD_ID_NO_LONGER_EXISTS"),
ES_REQUEST_INTERCEPTOR_ERROR("ES_REQUEST_INTERCEPTOR_ERROR"),
INVALID_OLD_GEN_SIZE("InvalidOldGenSize"),
MISCONFIGURED_OLD_GEN_RCA("MisconfiguredOldGenRca"),
OTHER("Other");

private final String value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.deciders.jvm.sizing;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.AppContext;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatsCollector;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.actions.Action;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.actions.HeapSizeIncreaseAction;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.decisionmaker.deciders.AlarmMonitor;
Expand All @@ -39,6 +40,7 @@

public class HeapSizeIncreasePolicy implements DecisionPolicy {

private static final String HEAP_SIZE_INCREASE_ACTION_RECOMMENDED = "RecommendHeapSizeIncrease";
private final LargeHeapClusterRca largeHeapClusterRca;
private AppContext appContext;
private RcaConf rcaConf;
Expand All @@ -59,6 +61,7 @@ public List<Action> evaluate() {
if (!heapSizeIncreaseClusterMonitor.isHealthy()) {
Action heapSizeIncreaseAction = new HeapSizeIncreaseAction(appContext);
if (heapSizeIncreaseAction.isActionable()) {
StatsCollector.instance().logMetric(HEAP_SIZE_INCREASE_ACTION_RECOMMENDED);
actions.add(heapSizeIncreaseAction);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.GCType.TOT_FULL_GC;
import static com.amazon.opendistro.elasticsearch.performanceanalyzer.metrics.AllMetrics.HeapDimension.MEM_TYPE;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatExceptionCode;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatsCollector;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.metricsdb.MetricsDB;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Rca;
Expand Down Expand Up @@ -50,6 +52,7 @@ public OldGenRca(long evaluationIntervalSeconds, Metric heapUsed, Metric heapMax

protected double getMaxOldGenSizeOrDefault(final double defaultValue) {
if (heap_Max == null) {
StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA);
throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to "
+ "take heap_Max as a metric. Please check the analysis graph!");
}
Expand All @@ -75,6 +78,7 @@ protected double getMaxOldGenSizeOrDefault(final double defaultValue) {

protected int getFullGcEventsOrDefault(final double defaultValue) {
if (gc_event == null) {
StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA);
throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to "
+ "take gc_event as a metric. Please check the analysis graph!");
}
Expand All @@ -100,6 +104,7 @@ protected int getFullGcEventsOrDefault(final double defaultValue) {

protected double getOldGenUsedOrDefault(final double defaultValue) {
if (heap_Used == null) {
StatsCollector.instance().logException(StatExceptionCode.MISCONFIGURED_OLD_GEN_RCA);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Name this one something different so we can tell it apart from line 81

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought one sparse metric was enough. Added them still, just to avoid confusion.

throw new IllegalStateException("RCA: " + this.name() + "was not configured in the graph to "
+ "take heap_Used as a metric. Please check the analysis graph!");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.jvmsizing;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatExceptionCode;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.collectors.StatsCollector;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.configs.HighOldGenOccupancyRcaConfig;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Metric;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.api.Resources.State;
Expand Down Expand Up @@ -112,6 +114,10 @@ private void addToSlidingWindow() {

if (maxOldGen == 0d) {
LOG.info("Max Old Gen capacity cannot be 0. Skipping.");
StatsCollector.instance().logException(StatExceptionCode.INVALID_OLD_GEN_SIZE);
// TODO: Unit test for this RCA and OldGenReclamation are in the PR where we are checking
// for GC type before evaluating.
return;
}

this.oldGenUtilizationSlidingWindow.next(new SlidingWindowData(System.currentTimeMillis(),
Expand Down