Skip to content

Commit

Permalink
Client Telemetry: Adds new public APIs (#4056)
Browse files Browse the repository at this point in the history
* Revert "[Internal] Client Telemetry: Refactors code for collectors (#4037)"

This reverts commit e2311a9.

* Revert "Revert "[Internal] Client Telemetry: Refactors code for collectors (#4037)""

This reverts commit f04234b.

* firdst draft

* initialize object

* null handle

* update contracts

* compilation charges

* fix tests

* public API changes

* add docs

* contract updated

* fixed tests

* by default switch of te;emetry in sdk

* fix tests

* fix assertion

* incorporate review comments

* fetaure flag fix in script

* switch case

* add test

* fix tests

* fix test

* fixed run.sh

* minor changes

* code refactor

* changed default values and fix tests
  • Loading branch information
sourabh1007 authored Oct 4, 2023
1 parent b03df6b commit 72e96fa
Show file tree
Hide file tree
Showing 33 changed files with 501 additions and 287 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ public class BenchmarkConfig
[Option(Required = false, HelpText = "Disable core SDK logging")]
public bool DisableCoreSdkLogging { get; set; }

[Option(Required = false, HelpText = "Enable Distributed Tracing")]
public bool EnableDistributedTracing { get; set; }
[Option(Required = false, HelpText = "Disable Distributed Tracing feature from source")]
public bool DisableDistributedTracing { get; set; } = false;

[Option(Required = false, HelpText = "Client Telemetry Schedule in Seconds")]
public int TelemetryScheduleInSec { get; set; }
Expand Down Expand Up @@ -138,8 +138,8 @@ public class BenchmarkConfig
[Option(Required = false, HelpText = "Application Insights connection string")]
public string AppInsightsConnectionString { get; set; }

[Option(Required = false, HelpText = "Enable Client Telemetry Feature in SDK. Make sure you enable it from the portal also.")]
public bool EnableClientTelemetry { get; set; } = true;
[Option(Required = false, HelpText = "Disable Client Telemetry Feature in SDK. Make sure you enable it from the portal also.")]
public bool DisableClientTelemetry { get; set; } = false;

internal int GetTaskCount(int containerThroughput)
{
Expand Down Expand Up @@ -220,16 +220,18 @@ internal Microsoft.Azure.Cosmos.CosmosClient CreateCosmosClient(string accountKe
MaxRetryAttemptsOnRateLimitedRequests = 0,
MaxRequestsPerTcpConnection = this.MaxRequestsPerTcpConnection,
MaxTcpConnectionsPerEndpoint = this.MaxTcpConnectionsPerEndpoint,
EnableClientTelemetry = this.EnableClientTelemetry
CosmosClientTelemetryOptions = new Microsoft.Azure.Cosmos.CosmosClientTelemetryOptions()
{
DisableSendingMetricsToService = this.DisableClientTelemetry,
DisableDistributedTracing = this.DisableDistributedTracing
}
};

if (!string.IsNullOrWhiteSpace(this.ConsistencyLevel))
{
clientOptions.ConsistencyLevel = (Microsoft.Azure.Cosmos.ConsistencyLevel)Enum.Parse(typeof(Microsoft.Azure.Cosmos.ConsistencyLevel), this.ConsistencyLevel, ignoreCase: true);
}

clientOptions.IsDistributedTracingEnabled = this.EnableDistributedTracing;

return new Microsoft.Azure.Cosmos.CosmosClient(
this.EndPoint,
accountKey,
Expand Down
18 changes: 12 additions & 6 deletions Microsoft.Azure.Cosmos.Samples/Tools/Benchmark/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ then
exit -1
fi

if [ -z "$TELEMETRY_ENDPOINT" ]
then
echo "Missing TELEMETRY_ENDPOINT"
exit -1
fi

if [ -z "$INCLUDE_QUERY" ]
then
echo "Missing INCLUDE_QUERY"
Expand All @@ -41,26 +47,26 @@ echo $COMMIT_TIME
echo $BRANCH_NAME

# Client telemetry disabled ReadStreamExistsV3
dotnet run -c Release -- -n 2000000 -w ReadStreamExistsV3 --tcp 10 --pl $PL -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --disableClientTelemetry --disableDistributedTracing --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --partitionkeypath /pk --container testcol
dotnet run -c Release -- -n 2000000 -w ReadStreamExistsV3 --tcp 10 --pl $PL -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --container testcol --partitionkeypath /pk
sleep 10 #Wait

# Client telemetry enabled ReadStreamExistsV3. This is needed to see the impact of client telemetry.
dotnet run -c Release -- -n 2000000 -w ReadStreamExistsV3 --WorkloadName ReadStreamExistsV3WithTelemetry --telemetryScheduleInSec 60 --disableDistributedTracing --tcp 10 --pl $PL -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --partitionkeypath /pk --container testcol
dotnet run -c Release -- -n 2000000 -w ReadStreamExistsV3 --WorkloadName ReadStreamExistsV3WithTelemetry --enableTelemetry --telemetryScheduleInSec 60 --telemetryEndpoint $TELEMETRY_ENDPOINT --tcp 10 --pl $PL -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --container testcol --partitionkeypath /pk
sleep 10 #Wait

# Open telemetry enabled ReadStreamExistsV3. This is needed to see the impact of distributed tracing (without listener)
dotnet run -c Release -- -n 2000000 -w ReadStreamExistsV3 --WorkloadName ReadStreamExistsV3WithDistributedTracingWOListener --disableClientTelemetry --tcp 10 --pl $PL -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --partitionkeypath /pk --container testcol
dotnet run -c Release -- -n 2000000 -w ReadStreamExistsV3 --WorkloadName ReadStreamExistsV3WithDistributedTracingWOListener --enableDistributedTracing --tcp 10 --pl $PL -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --container testcol --partitionkeypath /pk
sleep 10 #Wait

#Point read operations
for WORKLOAD_NAME in ReadNotExistsV3 ReadTExistsV3 ReadStreamExistsWithDiagnosticsV3
do
dotnet run -c Release -- -n 2000000 -w $WORKLOAD_NAME --pl $PL --telemetryScheduleInSec 60 --tcp 10 -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --partitionkeypath /pk --container testcol
dotnet run -c Release -- -n 2000000 -w $WORKLOAD_NAME --pl $PL --enableTelemetry --telemetryScheduleInSec 60 --telemetryEndpoint $TELEMETRY_ENDPOINT --tcp 10 -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --container testcol --partitionkeypath /pk
sleep 10 #Wait
done

#Insert operation
dotnet run -c Release -- -n 2000000 -w InsertV3 --pl 30 --telemetryScheduleInSec 60 --tcp 1 -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --partitionkeypath /pk --container testcol
dotnet run -c Release -- -n 2000000 -w InsertV3 --pl 30 --enableTelemetry --telemetryScheduleInSec 60 --telemetryEndpoint $TELEMETRY_ENDPOINT --tcp 1 -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --container testcol --partitionkeypath /pk
sleep 45 #Wait

if [ "$INCLUDE_QUERY" = true ]
Expand All @@ -70,7 +76,7 @@ then
# pl is 16 because 18 was casuing a small amount of thorrtles.
for WORKLOAD_NAME in ReadFeedStreamV3 QueryTSinglePkV3 QueryTSinglePkOrderByWithPaginationV3 QueryTSinglePkOrderByFullDrainV3 QueryTCrossPkV3 QueryTCrossPkOrderByWithPaginationV3 QueryTCrossPkOrderByFullDrainV3 QueryStreamSinglePkV3 QueryStreamSinglePkOrderByWithPaginationV3 QueryStreamSinglePkOrderByFullDrainV3 QueryStreamCrossPkV3 QueryStreamCrossPkOrderByWithPaginationV3 QueryStreamCrossPkOrderByFullDrainV3
do
dotnet run -c Release -- -n 200000 -w $WORKLOAD_NAME --pl 16 --telemetryScheduleInSec 60 --tcp 10 -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --partitionkeypath /pk --container testcol
dotnet run -c Release -- -n 200000 -w $WORKLOAD_NAME --pl 16 --enableTelemetry --telemetryScheduleInSec 60 --telemetryEndpoint $TELEMETRY_ENDPOINT --tcp 10 -e $ACCOUNT_ENDPOINT -k $ACCOUNT_KEY --enablelatencypercentiles --disablecoresdklogging --publishresults --resultspartitionkeyvalue $RESULTS_PK --commitid $COMMIT_ID --commitdate $COMMIT_DATE --committime $COMMIT_TIME --branchname $BRANCH_NAME --database testdb --container testcol --partitionkeypath /pk
sleep 10 #Wait
done
fi
14 changes: 11 additions & 3 deletions Microsoft.Azure.Cosmos.Samples/Tools/CTL/CTLConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ public string DiagnosticsThresholdDuration
[Option("ctl_reservoir_sample_size", Required = false, HelpText = "The reservoir sample size.")]
public int ReservoirSampleSize { get; set; } = 1028;

[Option("ctl_enable_client_telemetry", Required = false, HelpText = "Enable Client Telemetry Feature in SDK. Make sure you enable it from the portal also.")]
public bool EnableClientTelemetry { get; set; } = true;
[Option("ctl_disable_client_telemetry", Required = false, HelpText = "Disable Client Telemetry Feature in SDK. Make sure you enable it from the portal also.")]
public bool DisableClientTelemetry { get; set; } = false;

internal TimeSpan RunningTimeDurationAsTimespan { get; private set; } = TimeSpan.FromHours(10);
internal TimeSpan DiagnosticsThresholdDurationAsTimespan { get; private set; } = TimeSpan.FromSeconds(60);
Expand All @@ -130,19 +130,27 @@ internal CosmosClient CreateCosmosClient()
CosmosClientOptions clientOptions = new CosmosClientOptions()
{
ApplicationName = CTLConfig.UserAgentSuffix,
EnableClientTelemetry = this.EnableClientTelemetry
CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions()
{
DisableSendingMetricsToService = this.DisableClientTelemetry,
}
};

Console.WriteLine("ApplicationName = " + CTLConfig.UserAgentSuffix);
Console.WriteLine("DisableSendingMetricsToService = " + this.DisableClientTelemetry);

if (this.UseGatewayMode)
{
clientOptions.ConnectionMode = ConnectionMode.Gateway;
Console.WriteLine("ConnectionMode = " + ConnectionMode.Gateway);
}

if (!string.IsNullOrWhiteSpace(this.ConsistencyLevel))
{
if (Enum.TryParse(this.ConsistencyLevel, out ConsistencyLevel consistencyLevel))
{
clientOptions.ConsistencyLevel = consistencyLevel;
Console.WriteLine("ConsistencyLevel = " + consistencyLevel);
}
else
{
Expand Down
22 changes: 11 additions & 11 deletions Microsoft.Azure.Cosmos/src/ConnectionPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ namespace Microsoft.Azure.Cosmos
using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using Microsoft.Azure.Cosmos.Telemetry;
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Client;

Expand Down Expand Up @@ -48,8 +47,9 @@ public ConnectionPolicy()
this.MaxConnectionLimit = defaultMaxConcurrentConnectionLimit;
this.RetryOptions = new RetryOptions();
this.EnableReadRequestsFallback = null;
this.EnableClientTelemetry = false; // by default feature flag is off
this.ServerCertificateCustomValidationCallback = null;

this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions();
}

/// <summary>
Expand Down Expand Up @@ -211,15 +211,6 @@ public bool EnableTcpConnectionEndpointRediscovery
set;
}

/// <summary>
/// Gets or sets the flag to enable client telemetry feature.
/// </summary>
internal bool EnableClientTelemetry
{
get;
set;
}

/// <summary>
/// Gets the default connection policy used to connect to the Azure Cosmos DB service.
/// </summary>
Expand Down Expand Up @@ -489,6 +480,15 @@ internal int? MaxTcpPartitionCount
set;
}

/// <summary>
/// Gets or sets Client Telemetry Options like feature flags and corresponding options
/// </summary>
internal CosmosClientTelemetryOptions CosmosClientTelemetryOptions
{
get;
set;
}

/// <summary>
/// GlobalEndpointManager will subscribe to this event if user updates the preferredLocations list in the Azure Cosmos DB service.
/// </summary>
Expand Down
42 changes: 12 additions & 30 deletions Microsoft.Azure.Cosmos/src/CosmosClientOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ namespace Microsoft.Azure.Cosmos
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using Microsoft.Azure.Cosmos.Fluent;
Expand Down Expand Up @@ -85,6 +84,7 @@ public CosmosClientOptions()
this.ConnectionProtocol = CosmosClientOptions.DefaultProtocol;
this.ApiType = CosmosClientOptions.DefaultApiType;
this.CustomHandlers = new Collection<RequestHandler>();
this.CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions();
}

/// <summary>
Expand Down Expand Up @@ -730,9 +730,14 @@ internal Protocol ConnectionProtocol
internal bool? EnableCpuMonitor { get; set; }

/// <summary>
/// Flag to enable telemetry
/// Gets or sets Client Telemetry Options like feature flags and corresponding options
/// </summary>
internal bool? EnableClientTelemetry { get; set; }
#if PREVIEW
public
#else
internal
#endif
CosmosClientTelemetryOptions CosmosClientTelemetryOptions { get; set; }

internal void SetSerializerIfNotConfigured(CosmosSerializer serializer)
{
Expand Down Expand Up @@ -771,12 +776,13 @@ internal virtual ConnectionPolicy GetConnectionPolicy(int clientId)
EnableTcpConnectionEndpointRediscovery = this.EnableTcpConnectionEndpointRediscovery,
EnableAdvancedReplicaSelectionForTcp = this.EnableAdvancedReplicaSelectionForTcp,
HttpClientFactory = this.httpClientFactory,
ServerCertificateCustomValidationCallback = this.ServerCertificateCustomValidationCallback
ServerCertificateCustomValidationCallback = this.ServerCertificateCustomValidationCallback,
CosmosClientTelemetryOptions = new CosmosClientTelemetryOptions()
};

if (this.EnableClientTelemetry.HasValue)
if (this.CosmosClientTelemetryOptions != null)
{
connectionPolicy.EnableClientTelemetry = this.EnableClientTelemetry.Value;
connectionPolicy.CosmosClientTelemetryOptions = this.CosmosClientTelemetryOptions;
}

if (this.ApplicationRegion != null)
Expand Down Expand Up @@ -1013,29 +1019,5 @@ public override bool CanConvert(Type objectType)
return objectType == typeof(DateTime);
}
}

/// <summary>
/// Distributed Tracing Options. <see cref="Microsoft.Azure.Cosmos.DistributedTracingOptions"/>
/// </summary>
/// <remarks> Applicable only when Operation level distributed tracing is enabled through <see cref="Microsoft.Azure.Cosmos.CosmosClientOptions.IsDistributedTracingEnabled"/></remarks>
internal DistributedTracingOptions DistributedTracingOptions { get; set; }

/// <summary>
/// Gets or sets the flag to generate operation level <see cref="System.Diagnostics.Activity"/> for methods calls using the Source Name "Azure.Cosmos.Operation".
/// </summary>
/// <value>
/// The default value is true (for preview package).
/// </value>
/// <remarks>This flag is there to disable it from source. Please Refer https://opentelemetry.io/docs/instrumentation/net/exporters/ to know more about open telemetry exporters</remarks>
#if PREVIEW
public
#else
internal
#endif
bool IsDistributedTracingEnabled { get; set; }
#if PREVIEW
= true;
#endif

}
}
58 changes: 58 additions & 0 deletions Microsoft.Azure.Cosmos/src/CosmosClientTelemetryOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
/// <summary>
/// Telemetry Options for Cosmos Client to enable/disable telemetry and distributed tracing along with corresponding threshold values.
/// </summary>
#if PREVIEW
public
#else
internal
#endif
class CosmosClientTelemetryOptions
{
/// <summary>
/// Disable sending telemetry to service, <see cref="Microsoft.Azure.Cosmos.CosmosThresholdOptions"/> is not applicable to this as of now.
/// </summary>
/// <remarks>This option will disable sending telemetry to service.even it is opt-in from portal.</remarks>
/// <value>true</value>
#if PREVIEW
public
#else
internal
#endif
bool DisableSendingMetricsToService { get; set; } = true;

/// <summary>
/// This method enable/disable generation of operation level <see cref="System.Diagnostics.Activity"/> if listener is subscribed to the Source Name "Azure.Cosmos.Operation".
/// </summary>
/// <value>false</value>
/// <remarks> Please Refer https://opentelemetry.io/docs/instrumentation/net/exporters/ to know more about open telemetry exporters</remarks>
#if PREVIEW
public
#else
internal
#endif
bool DisableDistributedTracing { get; set; } =
#if PREVIEW
false;
#else
true;
#endif

/// <summary>
/// Threshold values for Distributed Tracing.
/// These values decides whether to generate operation level <see cref="System.Diagnostics.Tracing.EventSource"/> with request diagnostics or not.
/// </summary>
#if PREVIEW
public
#else
internal
#endif
CosmosThresholdOptions CosmosThresholdOptions { get; set; } = new CosmosThresholdOptions();

}
}
31 changes: 31 additions & 0 deletions Microsoft.Azure.Cosmos/src/CosmosThresholdOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos
{
using System;

/// <summary>
/// Threshold values for Distributed Tracing
/// </summary>
#if PREVIEW
public
#else
internal
#endif
class CosmosThresholdOptions
{
/// <summary>
/// Latency Threshold for non point operations i.e. Query
/// </summary>
/// <value>500 ms</value>
public TimeSpan NonPointOperationLatencyThreshold { get; set; } = TimeSpan.FromSeconds(3);

/// <summary>
/// Latency Threshold for point operations i.e operation other than Query
/// </summary>
/// <value>100 ms</value>
public TimeSpan PointOperationLatencyThreshold { get; set; } = TimeSpan.FromSeconds(1);
}
}
Loading

0 comments on commit 72e96fa

Please sign in to comment.