Skip to content

Commit

Permalink
Add semantic caching, update model and locations
Browse files Browse the repository at this point in the history
  • Loading branch information
simonkurtz-MSFT committed Feb 7, 2025
1 parent a532a8d commit 2987fb5
Show file tree
Hide file tree
Showing 3 changed files with 375 additions and 52 deletions.
95 changes: 91 additions & 4 deletions labs/zero-to-production/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,19 @@ param openAIConfig array = []
param openAIModelName string
param openAIModelVersion string
param openAIDeploymentName string
param openAIModelSKU string
param openAIModelCapacity int
param openAIAPIVersion string = '2024-02-01'
param policyXml string

param embeddingsDeploymentName string = 'text-embedding-ada-002'
param embeddingsModelName string = 'text-embedding-ada-002'
param embeddingsModelVersion string = '2'

param redisCacheName string = 'rediscache'
param redisCacheSKU string = 'Balanced_B0'
param redisCachePort int = 10000

// ------------------
// VARIABLES
// ------------------
Expand Down Expand Up @@ -47,7 +56,35 @@ module appInsightsModule '../../modules/monitor/v1/appinsights.bicep' = {
var appInsightsId = appInsightsModule.outputs.id
var appInsightsInstrumentationKey = appInsightsModule.outputs.instrumentationKey

// 3. API Management
// 3. Redis Cache
// 2/4/25: 2024-10-01 is not yet available in all regions. 2024-09-01-preview is more widely available.

// https://learn.microsoft.com/azure/templates/microsoft.cache/redisenterprise
resource redisEnterprise 'Microsoft.Cache/redisEnterprise@2024-09-01-preview' = {
name: '${redisCacheName}-${resourceSuffix}'
location: resourceGroup().location
sku: {
name: redisCacheSKU
}
}

// https://learn.microsoft.com/azure/templates/microsoft.cache/redisenterprise/databases
resource redisCache 'Microsoft.Cache/redisEnterprise/databases@2024-09-01-preview' = {
name: 'default'
parent: redisEnterprise
properties: {
evictionPolicy: 'NoEviction'
clusteringPolicy: 'EnterpriseCluster'
modules: [
{
name: 'RediSearch'
}
]
port: redisCachePort
}
}

// 4. API Management
module apimModule '../../modules/apim/v1/apim.bicep' = {
name: 'apimModule'
params: {
Expand All @@ -57,21 +94,45 @@ module apimModule '../../modules/apim/v1/apim.bicep' = {
}
}

// 4. Cognitive Services
// 5. Cognitive Services
module openAIModule '../../modules/cognitive-services/v1/openai.bicep' = {
name: 'openAIModule'
params: {
openAIConfig: openAIConfig
openAIDeploymentName: openAIDeploymentName
openAIModelName: openAIModelName
openAIModelVersion: openAIModelVersion
openAIModelSKU: openAIModelSKU
openAIModelCapacity: openAIModelCapacity
apimPrincipalId: apimModule.outputs.principalId
lawId: lawId
}
}

// 5. APIM OpenAI API
resource cognitiveService 'Microsoft.CognitiveServices/accounts@2024-10-01' existing = {
name: '${openAIConfig[0].name}-${resourceSuffix}'
}

resource embeddingsDeployment 'Microsoft.CognitiveServices/accounts/deployments@2023-05-01' = {
name: embeddingsDeploymentName
parent: cognitiveService
properties: {
model: {
format: (length(openAIModule.outputs.extendedOpenAIConfig) > 0) ? 'OpenAI': ''
name: embeddingsModelName
version: embeddingsModelVersion
}
}
sku: {
name: 'Standard'
capacity: 20
}
dependsOn: [
cognitiveService
]
}

// 6. APIM OpenAI API
module openAIAPIModule '../../modules/apim/v1/openai-api.bicep' = {
name: 'openAIAPIModule'
params: {
Expand All @@ -83,7 +144,7 @@ module openAIAPIModule '../../modules/apim/v1/openai-api.bicep' = {
}
}

// 6. Create New APIM Subscriptions
// 7. Create New APIM Subscriptions

// We presume the APIM resource has been created as part of this bicep flow.
resource apim 'Microsoft.ApiManagement/service@2024-06-01-preview' existing = {
Expand Down Expand Up @@ -116,6 +177,27 @@ resource apimSubscriptions 'Microsoft.ApiManagement/service/subscriptions@2024-0
]
}]

// https://learn.microsoft.com/azure/templates/microsoft.apimanagement/service/caches
resource apimCache 'Microsoft.ApiManagement/service/caches@2024-06-01-preview' = {
name: 'Default'
parent: apim
properties: {
connectionString: '${redisEnterprise.properties.hostName}:${redisCachePort},password=${redisCache.listKeys().primaryKey},ssl=True,abortConnect=False'
useFromLocation: 'Default'
description: redisEnterprise.properties.hostName
}
}

resource backendEmbeddings 'Microsoft.ApiManagement/service/backends@2024-06-01-preview' = {
name: 'embeddings-backend' // this name is hard coded in the policy.xml file
parent: apim
properties: {
description: 'Embeddings Backend'
url: '${openAIModule.outputs.extendedOpenAIConfig[0].endpoint}openai/deployments/${embeddingsDeploymentName}/embeddings'
protocol: 'http'
}
}

// ------------------
// MARK: OUTPUTS
// ------------------
Expand All @@ -133,3 +215,8 @@ output apimSubscription1Key string = apimSubscriptions[0].listSecrets().primaryK
output apimSubscription2Key string = apimSubscriptions[1].listSecrets().primaryKey
#disable-next-line outputs-should-not-contain-secrets
output apimSubscription3Key string = apimSubscriptions[2].listSecrets().primaryKey

output redisCacheHost string = redisEnterprise.properties.hostName
#disable-next-line outputs-should-not-contain-secrets
output redisCacheKey string = redisCache.listKeys().primaryKey
output redisCachePort int = redisCachePort
46 changes: 46 additions & 0 deletions labs/zero-to-production/policy-4.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<policies>
<inbound>
<base />
<!--Policy 4 - Semantic Caching-->
<!-- Check the embeddings in the Redis cache for a cached prompt response according to the score threshold. If successful, the policy automatically moves past the azure-openai-semantic-cache-store policy. -->
<azure-openai-semantic-cache-lookup score-threshold="0.8" embeddings-backend-id ="embeddings-backend" embeddings-backend-auth ="system-assigned" />
<!-- Authenticate to Azure OpenAI with API Management's managed identity -->
<authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="managed-id-access-token" ignore-error="false" />
<set-header name="Authorization" exists-action="override">
<value>@("Bearer " + (string)context.Variables["managed-id-access-token"])</value>
</set-header>
<set-backend-service backend-id="{backend-id}" />
<!--Policy 3 - Limit the tokens per subscription-->
<azure-openai-token-limit counter-key="@(context.Subscription.Id)" tokens-per-minute="{tpm}" estimate-prompt-tokens="false" remaining-tokens-variable-name="remainingTokens" />
<!--Policy 2 - Emit the Azure OpenAI Token Metrics -->
<azure-openai-emit-token-metric namespace="openai">
<dimension name="Subscription ID" value="@(context.Subscription.Id)" />
<dimension name="Client IP" value="@(context.Request.IpAddress)" />
<dimension name="API ID" value="@(context.Api.Id)" />
<dimension name="User ID" value="@(context.Request.Headers.GetValueOrDefault("x-user-id", "N/A"))" />
</azure-openai-emit-token-metric>
</inbound>
<backend>
<!--Policy 1 - Apply load-balancing and retry mechanisms -->
<!--Set count to one less than the number of backends in the pool to try all backends until the backend pool is temporarily unavailable.-->
<retry count="{retry-count}" interval="0" first-fast-retry="true" condition="@(context.Response.StatusCode == 429 || (context.Response.StatusCode == 503 && !context.Response.StatusReason.Contains("Backend pool") && !context.Response.StatusReason.Contains("is temporarily unavailable")))">
<forward-request buffer-request-body="true" />
</retry>
</backend>
<outbound>
<!-- Cache the Gen AI response in Redis for 2 minutes -->
<azure-openai-semantic-cache-store duration="120" />
<base />
</outbound>
<on-error>
<base />
<choose>
<!--Return a generic error that does not reveal backend pool details.-->
<when condition="@(context.Response.StatusCode == 503)">
<return-response>
<set-status code="503" reason="Service Unavailable" />
</return-response>
</when>
</choose>
</on-error>
</policies>
Loading

0 comments on commit 2987fb5

Please sign in to comment.