Skip to content

Commit

Permalink
Merge pull request #93 from Azure-Samples/fix-load-balancing-priority…
Browse files Browse the repository at this point in the history
…-weight

Fix missing priority & weight properties
  • Loading branch information
vieiraae authored Jan 15, 2025
2 parents 158f3bc + 557b48c commit cebe490
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 10 deletions.
17 changes: 10 additions & 7 deletions labs/backend-pool-load-balancing/backend-pool-load-balancing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@
"\n",
"apim_sku = 'Basicv2'\n",
"\n",
"# Prioritize UK South until exhaustion (simulate PTU with TPM), then equally distribute between Sweden and France (consumption fallback)\n",
"openai_resources = [\n",
" {\"name\": \"openai1\", \"location\": \"uksouth\", \"priority\": 1, \"weight\": 80},\n",
" {\"name\": \"openai2\", \"location\": \"swedencentral\", \"priority\": 1, \"weight\": 10},\n",
" {\"name\": \"openai3\", \"location\": \"francecentral\", \"priority\": 1, \"weight\": 10}\n",
" {\"name\": \"openai1\", \"location\": \"uksouth\", \"priority\": 1},\n",
" {\"name\": \"openai2\", \"location\": \"swedencentral\", \"priority\": 2, \"weight\": 50},\n",
" {\"name\": \"openai3\", \"location\": \"francecentral\", \"priority\": 2, \"weight\": 50}\n",
"]\n",
"\n",
"openai_deployment_name = \"gpt-35-turbo\"\n",
Expand All @@ -79,7 +80,7 @@
"openai_model_capacity = 8\n",
"openai_api_version = \"2024-02-01\"\n",
"\n",
"utils.print_ok('Notebook initiaized')"
"utils.print_ok('Notebook initialized')"
]
},
{
Expand Down Expand Up @@ -208,7 +209,7 @@
"source": [
"import requests, time\n",
"\n",
"runs = 10\n",
"runs = 20\n",
"sleep_time_ms = 100\n",
"url = f\"{apim_resource_gateway_url}/openai/deployments/{openai_deployment_name}/chat/completions?api-version={openai_api_version}\"\n",
"api_runs = []\n",
Expand Down Expand Up @@ -265,7 +266,9 @@
"metadata": {},
"source": [
"<a id='plot'></a>\n",
"### 🔍 Analyze Load Balancing results\n"
"### 🔍 Analyze Load Balancing results\n",
"\n",
"The priority 1 backend will be used until TPM exhaustion sets in, then distribution will occur near equally across the two priority 2 backends with 50/50 weights."
]
},
{
Expand Down Expand Up @@ -323,7 +326,7 @@
"import time\n",
"from openai import AzureOpenAI\n",
"\n",
"runs = 10\n",
"runs = 20\n",
"sleep_time_ms = 100\n",
"\n",
"client = AzureOpenAI(\n",
Expand Down
Binary file modified labs/backend-pool-load-balancing/result.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 4 additions & 3 deletions modules/apim/v1/openai-api.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ resource backendPoolOpenAI 'Microsoft.ApiManagement/service/backends@2024-06-01-
type: 'Pool'
pool: {
services: [for (config, i) in openAIConfig: {
id: '/backends/${backendOpenAI[i].name}'
}
]
id: '/backends/${backendOpenAI[i].name}'
priority: config.?priority
weight: config.?weight
}]
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions modules/cognitive-services/v1/openai.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,12 @@ resource roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = [
// ------------------

output extendedOpenAIConfig array = [for (config, i) in openAIConfig: {
// Original openAIConfig properties
name: config.name
location: config.location
priority: config.?priority
weight: config.?weight
// Additional properties
sku: openAISku
deploymentName: openAIDeploymentName
modelName: openAIModelName
Expand Down

0 comments on commit cebe490

Please sign in to comment.