diff --git a/labs/progressive-policies-1/README.MD b/labs/progressive-policies-1/README.MD
new file mode 100644
index 0000000..9c0941b
--- /dev/null
+++ b/labs/progressive-policies-1/README.MD
@@ -0,0 +1,26 @@
+# APIM โค๏ธ OpenAI
+
+## [Progressive Policies lab](progressive-policies-1.ipynb)
+
+Playground to create a combination of several policies in an interative approach. We start with load balancing, then progressively add token emitting, rate limiting, and semantic caching. Each of these sets of policies is derived from other labs in this repo.
+
+[View policy configuration](policy.xml)
+
+### Prerequisites
+
+- [Python 3.12 or later version](https://www.python.org/) installed
+- [Pandas Library](https://pandas.pydata.org) installed
+- [VS Code](https://code.visualstudio.com/) installed with the [Jupyter notebook extension](https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter) enabled
+- [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) installed
+- [An Azure Subscription](https://azure.microsoft.com/free/) with Contributor permissions
+- [Access granted to Azure OpenAI](https://aka.ms/oai/access)
+- [Sign in to Azure with Azure CLI](https://learn.microsoft.com/cli/azure/authenticate-azure-cli-interactively)
+
+### ๐ Get started
+
+Proceed by opening the [Jupyter notebook](progressive-policies-1.ipynb), and follow the steps provided.
+
+### ๐๏ธ Clean up resources
+
+When you're finished with the lab, you should remove all your deployed resources from Azure to avoid extra charges and keep your Azure subscription uncluttered.
+Use the [clean-up-resources notebook](clean-up-resources.ipynb) for that.
diff --git a/labs/progressive-policies-1/main.bicep b/labs/progressive-policies-1/main.bicep
new file mode 100644
index 0000000..d9127b5
--- /dev/null
+++ b/labs/progressive-policies-1/main.bicep
@@ -0,0 +1,134 @@
+// ------------------
+// PARAMETERS
+// ------------------
+
+// Typically, parameters would be decorated with appropriate metadata and attributes, but as they are very repetetive in these labs we omit them for brevity.
+
+param apimSku string
+param openAIConfig array = []
+param openAIModelName string
+param openAIModelVersion string
+param openAIDeploymentName string
+param openAIModelCapacity int
+param openAIAPIVersion string = '2024-02-01'
+param policyXml string
+
+// ------------------
+// VARIABLES
+// ------------------
+
+var resourceSuffix = uniqueString(subscription().id, resourceGroup().id)
+var apiManagementName = 'apim-${resourceSuffix}'
+var openAISubscriptionName = 'openai-subscription'
+var openAISubscriptionDescription = 'OpenAI Subscription'
+var openAIAPIName = 'openai'
+
+// ------------------
+// RESOURCES
+// ------------------
+
+// 1. Log Analytics Workspace
+module lawModule '../../modules/operational-insights/v1/workspaces.bicep' = {
+ name: 'lawModule'
+}
+
+var lawId = lawModule.outputs.id
+
+// 2. Application Insights
+module appInsightsModule '../../modules/monitor/v1/appinsights.bicep' = {
+ name: 'appInsightsModule'
+ params: {
+ workbookJson: loadTextContent('openai-usage-analysis-workbook.json')
+ lawId: lawId
+ customMetricsOptedInType: 'WithDimensions'
+ }
+}
+
+var appInsightsId = appInsightsModule.outputs.id
+var appInsightsInstrumentationKey = appInsightsModule.outputs.instrumentationKey
+
+// 3. API Management
+module apimModule '../../modules/apim/v1/apim.bicep' = {
+ name: 'apimModule'
+ params: {
+ apimSku: apimSku
+ appInsightsInstrumentationKey: appInsightsInstrumentationKey
+ appInsightsId: appInsightsId
+ }
+}
+
+// 4. Cognitive Services
+module openAIModule '../../modules/cognitive-services/v1/openai.bicep' = {
+ name: 'openAIModule'
+ params: {
+ openAIConfig: openAIConfig
+ openAIDeploymentName: openAIDeploymentName
+ openAIModelName: openAIModelName
+ openAIModelVersion: openAIModelVersion
+ openAIModelCapacity: openAIModelCapacity
+ apimPrincipalId: apimModule.outputs.principalId
+ lawId: lawId
+ }
+}
+
+// 5. APIM OpenAI API
+module openAIAPIModule '../../modules/apim/v1/openai-api.bicep' = {
+ name: 'openAIAPIModule'
+ params: {
+ policyXml: policyXml
+ openAIConfig: openAIModule.outputs.extendedOpenAIConfig
+ openAIAPIVersion: openAIAPIVersion
+ appInsightsInstrumentationKey: appInsightsInstrumentationKey
+ appInsightsId: appInsightsId
+ }
+}
+
+// 6. Create New APIM Subscriptions
+
+// We presume the APIM resource has been created as part of this bicep flow.
+resource apim 'Microsoft.ApiManagement/service@2024-06-01-preview' existing = {
+ name: apiManagementName
+ dependsOn: [
+ apimModule
+ ]
+}
+
+resource api 'Microsoft.ApiManagement/service/apis@2024-06-01-preview' existing = {
+ parent: apim
+ name: openAIAPIName
+ dependsOn: [
+ openAIAPIModule
+ ]
+}
+
+// Ignore the subscription that gets created in the APIM module and create three new ones for this lab.
+resource apimSubscriptions 'Microsoft.ApiManagement/service/subscriptions@2024-06-01-preview' = [for i in range(1, 3): {
+ name: '${openAISubscriptionName}${i}'
+ parent: apim
+ properties: {
+ allowTracing: true
+ displayName: '${openAISubscriptionDescription} ${i}'
+ scope: '/apis/${api.id}'
+ state: 'active'
+ }
+ dependsOn: [
+ api
+ ]
+}]
+
+// ------------------
+// MARK: OUTPUTS
+// ------------------
+
+output applicationInsightsAppId string = appInsightsModule.outputs.appId
+output applicationInsightsName string = appInsightsModule.outputs.applicationInsightsName
+output logAnalyticsWorkspaceId string = lawModule.outputs.customerId
+output apimServiceId string = apimModule.outputs.id
+output apimResourceGatewayURL string = apimModule.outputs.gatewayUrl
+
+#disable-next-line outputs-should-not-contain-secrets
+output apimSubscription1Key string = apimSubscriptions[0].listSecrets().primaryKey
+#disable-next-line outputs-should-not-contain-secrets
+output apimSubscription2Key string = apimSubscriptions[1].listSecrets().primaryKey
+#disable-next-line outputs-should-not-contain-secrets
+output apimSubscription3Key string = apimSubscriptions[2].listSecrets().primaryKey
diff --git a/labs/progressive-policies-1/progressive-policies-1.ipynb b/labs/progressive-policies-1/progressive-policies-1.ipynb
new file mode 100644
index 0000000..0eaf7ec
--- /dev/null
+++ b/labs/progressive-policies-1/progressive-policies-1.ipynb
@@ -0,0 +1,685 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# APIM โค๏ธ OpenAI\n",
+ "\n",
+ "## Progressive Policies lab\n",
+ "\n",
+ "Playground to create a combination of several policies in an interative approach. We start with load balancing, then progressively add token emitting, rate limiting, and semantic caching. Each of these sets of policies is derived from other labs in this repo.\n",
+ "\n",
+ "### TOC\n",
+ "- [0๏ธโฃ Initialize notebook variables](#0)\n",
+ "- [1๏ธโฃ Verify the Azure CLI and the connected Azure subscription](#1)\n",
+ "- [2๏ธโฃ Policy 1 - Load Balancing](#2)\n",
+ " - [Create deployment using ๐ฆพ Bicep](#2deployment)\n",
+ " - [Get the deployment outputs](#2deploymentoutputs)\n",
+ " - [๐งช Test the API using a direct HTTP call](#2requests)\n",
+ " - [๐ Analyze Load Balancing results](#2plot)\n",
+ " - [๐งช Test the API using the Azure OpenAI Python SDK](#2sdk)\n",
+ "- [3๏ธโฃ Policy 2 - Token Emitting](#3)\n",
+ " - [Update deployment using ๐ฆพ Bicep](#3deployment)\n",
+ " - [๐งช Execute multiple runs for each subscription using the Azure OpenAI Python SDK](#3sdk)\n",
+ " - [๐ See the metrics on the Azure Portal](#3metricsinportal)\n",
+ " - [๐ Analyze Application Insights custom metrics with a KQL query](#3kql)\n",
+ " - [๐ Plot the custom metrics results](#3plot)\n",
+ "- [4๏ธโฃ Policy 3 - Token Rate Limiting](#4)\n",
+ " - [Update deployment using ๐ฆพ Bicep](#4deployment)\n",
+ " - [๐งช Execute multiple runs for each subscription using the Azure OpenAI Python SDK](#4sdk) \n",
+ "- [๐๏ธ Clean up resources](#clean) -->\n",
+ "\n",
+ "### Prerequisites\n",
+ "- [Python 3.12 or later version](https://www.python.org/) installed\n",
+ "- [Pandas Library](https://pandas.pydata.org/) and matplotlib installed\n",
+ "- [VS Code](https://code.visualstudio.com/) installed with the [Jupyter notebook extension](https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter) enabled\n",
+ "- [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) installed\n",
+ "- [An Azure Subscription](https://azure.microsoft.com/free/) with Contributor permissions\n",
+ "- [Access granted to Azure OpenAI](https://aka.ms/oai/access) or just enable the mock service\n",
+ "- [Sign in to Azure with Azure CLI](https://learn.microsoft.com/cli/azure/authenticate-azure-cli-interactively)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### 0๏ธโฃ Initialize notebook variables\n",
+ "\n",
+ "- Resources will be suffixed by a unique string based on your subscription id.\n",
+ "- Adjust the location parameters according your preferences and on the [product availability by Azure region.](https://azure.microsoft.com/explore/global-infrastructure/products-by-region/?cdn=disable&products=cognitive-services,api-management)\n",
+ "- Adjust the OpenAI model and version according the [availability by region.](https://learn.microsoft.com/azure/ai-services/openai/concepts/models) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os, sys, json\n",
+ "sys.path.insert(1, '../../shared') # add the shared directory to the Python path\n",
+ "import utils\n",
+ "\n",
+ "deployment_name = os.path.basename(os.path.dirname(globals()['__vsc_ipynb_file__']))\n",
+ "resource_group_name = f\"lab-{deployment_name}\" # change the name to match your naming style\n",
+ "resource_group_location = \"eastus2\"\n",
+ "\n",
+ "apim_sku = 'Basicv2'\n",
+ "\n",
+ "# Equally distribute between Sweden and France\n",
+ "openai_resources = [\n",
+ " {\"name\": \"openai1\", \"location\": \"swedencentral\", \"priority\": 1, \"weight\": 50},\n",
+ " {\"name\": \"openai2\", \"location\": \"francecentral\", \"priority\": 1, \"weight\": 50}\n",
+ "]\n",
+ "\n",
+ "openai_deployment_name = \"gpt-35-turbo\"\n",
+ "openai_model_name = \"gpt-35-turbo\"\n",
+ "openai_model_version = \"0613\"\n",
+ "openai_model_capacity = 8\n",
+ "openai_api_version = \"2024-02-01\"\n",
+ "\n",
+ "backend_id = 'openai-backend-pool' if len(openai_resources) > 1 else openai_resources[0]['name']\n",
+ "\n",
+ "utils.print_ok('Notebook initialized')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### 1๏ธโฃ Verify the Azure CLI and the connected Azure subscription\n",
+ "\n",
+ "The following commands ensure that you have the latest version of the Azure CLI and that the Azure CLI is connected to your Azure subscription."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "output = utils.run(\"az account show\", \"Retrieved az account\", \"Failed to get the current az account\")\n",
+ "\n",
+ "if output.success and output.json_data:\n",
+ " current_user = output.json_data['user']['name']\n",
+ " tenant_id = output.json_data['tenantId']\n",
+ " subscription_id = output.json_data['id']\n",
+ "\n",
+ " utils.print_info(f\"Current user: {current_user}\")\n",
+ " utils.print_info(f\"Tenant ID: {tenant_id}\")\n",
+ " utils.print_info(f\"Subscription ID: {subscription_id}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### 2๏ธโฃ Policy 1 - Load Balancing\n",
+ "\n",
+ "This lab uses [Bicep](https://learn.microsoft.com/azure/azure-resource-manager/bicep/overview?tabs=bicep) to declarative define all the resources that will be deployed in the specified resource group. Change the parameters or the [main.bicep](main.bicep) directly to try different configurations.\n",
+ "\n",
+ "`openAIModelCapacity` is set intentionally low to `8` (8k tokens per minute) to trigger the retry logic in the load balancer (transparent to the user) as well as the priority failover from priority 1 to 2.\n",
+ "\n",
+ "\n",
+ "#### Create deployment using ๐ฆพ Bicep"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "policy_xml_file = \"policy-1.xml\"\n",
+ "bicep_parameters_file = \"params-1.json\"\n",
+ "\n",
+ "# Create the resource group if doesn't exist\n",
+ "utils.create_resource_group(resource_group_name, resource_group_location)\n",
+ "\n",
+ "# Define the Bicep parameters\n",
+ "bicep_parameters = {\n",
+ " \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#\",\n",
+ " \"contentVersion\": \"1.0.0.0\",\n",
+ " \"parameters\": {\n",
+ " \"apimSku\": { \"value\": apim_sku },\n",
+ " \"openAIConfig\": { \"value\": openai_resources },\n",
+ " \"openAIDeploymentName\": { \"value\": openai_deployment_name },\n",
+ " \"openAIModelName\": { \"value\": openai_model_name },\n",
+ " \"openAIModelVersion\": { \"value\": openai_model_version },\n",
+ " \"openAIModelCapacity\": { \"value\": openai_model_capacity },\n",
+ " \"openAIAPIVersion\": { \"value\": openai_api_version },\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "bicep_parameters = utils.create_bicep_params(policy_xml_file, bicep_parameters_file, bicep_parameters, [\n",
+ " ('{backend-id}', backend_id),\n",
+ " ('{retry-count}', len(openai_resources) - 1)\n",
+ "])\n",
+ "\n",
+ "# Run the deployment\n",
+ "output = utils.run(f\"az deployment group create --name {deployment_name} --resource-group {resource_group_name} --template-file main.bicep --parameters {bicep_parameters_file}\",\n",
+ " f\"Deployment '{deployment_name}' succeeded\", f\"Deployment '{deployment_name}' failed\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### Get the deployment outputs\n",
+ "\n",
+ "Retrieve the required outputs from the Bicep deployment."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Obtain all of the outputs from the deployment\n",
+ "output = utils.run(f\"az deployment group show --name {deployment_name} -g {resource_group_name}\", f\"Retrieved deployment: {deployment_name}\", f\"Failed to retrieve deployment: {deployment_name}\")\n",
+ "\n",
+ "if output.success and output.json_data:\n",
+ " apim_service_id = utils.get_deployment_output(output, 'apimServiceId', 'APIM Service Id')\n",
+ " apim_resource_gateway_url = utils.get_deployment_output(output, 'apimResourceGatewayURL', 'APIM API Gateway URL')\n",
+ " apim_subscription1_key = utils.get_deployment_output(output, 'apimSubscription1Key', 'APIM Subscription 1 Key (masked)', True)\n",
+ " apim_subscription2_key = utils.get_deployment_output(output, 'apimSubscription2Key', 'APIM Subscription 2 Key (masked)', True)\n",
+ " apim_subscription3_key = utils.get_deployment_output(output, 'apimSubscription3Key', 'APIM Subscription 3 Key (masked)', True)\n",
+ " app_insights_name = utils.get_deployment_output(output, 'applicationInsightsName', 'Application Insights Name')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### ๐งช Test the API using a direct HTTP call\n",
+ "Requests is an elegant and simple HTTP library for Python that will be used here to make raw API requests and inspect the responses. \n",
+ "\n",
+ "You will not see HTTP 429s returned as API Management's `retry` policy will select an available backend. If no backends are viable, an HTTP 503 will be returned.\n",
+ "\n",
+ "Tip: Use the [tracing tool](../../tools/tracing.ipynb) to track the behavior of the backend pool."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import requests, time\n",
+ "\n",
+ "runs = 20\n",
+ "sleep_time_ms = 10\n",
+ "url = f\"{apim_resource_gateway_url}/openai/deployments/{openai_deployment_name}/chat/completions?api-version={openai_api_version}\"\n",
+ "messages = {\"messages\": [\n",
+ " {\"role\": \"system\", \"content\": \"You are a sarcastic, unhelpful assistant.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Can you tell me the time, please?\"}\n",
+ "]}\n",
+ "api_runs = []\n",
+ "\n",
+ "# Initialize a session for connection pooling and set any default headers\n",
+ "session = requests.Session()\n",
+ "session.headers.update({'api-key': apim_subscription1_key})\n",
+ "\n",
+ "try:\n",
+ " for i in range(runs):\n",
+ " print(f\"โถ๏ธ Run {i+1}/{runs}:\")\n",
+ "\n",
+ " start_time = time.time()\n",
+ " response = session.post(url, json = messages)\n",
+ " response_time = time.time() - start_time\n",
+ " print(f\"โ {response_time:.2f} seconds\")\n",
+ "\n",
+ " utils.print_response_code(response)\n",
+ "\n",
+ " if \"x-ms-region\" in response.headers:\n",
+ " print(f\"x-ms-region: \\x1b[1;32m{response.headers.get(\"x-ms-region\")}\\x1b[0m\") # this header is useful to determine the region of the backend that served the request\n",
+ " api_runs.append((response_time, response.headers.get(\"x-ms-region\")))\n",
+ "\n",
+ " if (response.status_code == 200):\n",
+ " data = json.loads(response.text)\n",
+ " print(f\"Token usage: {json.dumps(dict(data.get(\"usage\")), indent = 4)}\\n\")\n",
+ " print(f\"๐ฌ {data.get(\"choices\")[0].get(\"message\").get(\"content\")}\\n\")\n",
+ " else:\n",
+ " print(f\"{response.text}\\n\")\n",
+ "\n",
+ " time.sleep(sleep_time_ms/1000)\n",
+ "finally:\n",
+ " # Close the session to release the connection\n",
+ " session.close()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### ๐ Analyze Load Balancing results\n",
+ "\n",
+ "The priority 1 backend will be used until TPM exhaustion sets in, then distribution will occur near equally across the two priority 2 backends with 50/50 weights. \n",
+ "\n",
+ "Please note that the first request of the lab can take a bit longer and should be discounted in terms of duration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "from matplotlib.patches import Rectangle as pltRectangle\n",
+ "import matplotlib as mpl\n",
+ "\n",
+ "mpl.rcParams['figure.figsize'] = [15, 7]\n",
+ "df = pd.DataFrame(api_runs, columns = ['Response Time', 'Region'])\n",
+ "df['Run'] = range(1, len(df) + 1)\n",
+ "\n",
+ "# Define a color map for each region\n",
+ "color_map = {'UK South': 'lightpink', 'France Central': 'lightblue', 'Sweden Central': 'lightyellow'} # Add more regions and colors as needed\n",
+ "\n",
+ "# Plot the dataframe with colored bars\n",
+ "ax = df.plot(kind = 'bar', x = 'Run', y = 'Response Time', color = [color_map.get(region, 'gray') for region in df['Region']], legend = False)\n",
+ "\n",
+ "# Add legend\n",
+ "legend_labels = [pltRectangle((0, 0), 1, 1, color = color_map.get(region, 'gray')) for region in df['Region'].unique()]\n",
+ "ax.legend(legend_labels, df['Region'].unique())\n",
+ "\n",
+ "plt.title('Load Balancing results')\n",
+ "plt.xlabel('Run #')\n",
+ "plt.ylabel('Response Time')\n",
+ "plt.xticks(rotation = 0)\n",
+ "\n",
+ "average = df['Response Time'].mean()\n",
+ "plt.axhline(y = average, color = 'r', linestyle = '--', label = f'Average: {average:.2f}')\n",
+ "\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### ๐งช Test the API using the Azure OpenAI Python SDK\n",
+ "\n",
+ "Repeat the same test using the Python SDK to ensure compatibility. Note that we do not know what region served the response; we only see that we obtained a response."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import time\n",
+ "from openai import AzureOpenAI\n",
+ "\n",
+ "runs = 20\n",
+ "sleep_time_ms = 10\n",
+ "total_tokens_all_runs = 0\n",
+ "\n",
+ "client = AzureOpenAI(\n",
+ " azure_endpoint = apim_resource_gateway_url,\n",
+ " api_key = apim_subscription1_key,\n",
+ " api_version = openai_api_version\n",
+ ")\n",
+ "\n",
+ "for i in range(runs):\n",
+ " print(f\"โถ๏ธ Run {i+1}/{runs}:\")\n",
+ "\n",
+ " start_time = time.time()\n",
+ " raw_response = client.chat.completions.with_raw_response.create(\n",
+ " model = openai_model_name,\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a sarcastic, unhelpful assistant.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Can you tell me the time, please?\"}\n",
+ " ])\n",
+ " response_time = time.time() - start_time\n",
+ "\n",
+ " print(f\"โ {response_time:.2f} seconds\")\n",
+ " print(f\"x-ms-region: \\x1b[1;32m{raw_response.headers.get(\"x-ms-region\")}\\x1b[0m\") # this header is useful to determine the region of the backend that served the request\n",
+ "\n",
+ " response = raw_response.parse()\n",
+ "\n",
+ " if response.usage:\n",
+ " total_tokens_all_runs += response.usage.total_tokens\n",
+ " print(f\"Token usage:\\n Total tokens: {response.usage.total_tokens}\\n Prompt tokens: {response.usage.prompt_tokens}\\n Completion tokens: {response.usage.completion_tokens}\\n Total tokens all runs: {total_tokens_all_runs}\\n\")\n",
+ "\n",
+ "\n",
+ " print(f\"๐ฌ {response.choices[0].message.content}\\n\")\n",
+ "\n",
+ " time.sleep(sleep_time_ms/1000)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### 3๏ธโฃ Policy 2 - Token Emitting\n",
+ "\n",
+ "We now add token emitting to the existing API policy in order to track token usage by subscriptions. This aids usage and cost analysis and chargeback models inside organizations.\n",
+ "\n",
+ "\n",
+ "#### Update deployment using ๐ฆพ Bicep"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "policy_xml_file = \"policy-2.xml\"\n",
+ "bicep_parameters_file = \"params-2.json\"\n",
+ "\n",
+ "bicep_parameters = utils.create_bicep_params(policy_xml_file, bicep_parameters_file, bicep_parameters, [\n",
+ " ('{backend-id}', backend_id),\n",
+ " ('{retry-count}', len(openai_resources) - 1)\n",
+ "])\n",
+ "\n",
+ "# Run the deployment\n",
+ "output = utils.run(f\"az deployment group create --name {deployment_name} --resource-group {resource_group_name} --template-file main.bicep --parameters {bicep_parameters_file}\",\n",
+ " f\"Deployment '{deployment_name}' succeeded\", f\"Deployment '{deployment_name}' failed\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### ๐งช Execute multiple runs for each subscription using the Azure OpenAI Python SDK\n",
+ "\n",
+ "We will send requests for each subscription. Adjust the `sleep_time_ms` and the number of `runs` to your test scenario.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import time\n",
+ "from openai import AzureOpenAI\n",
+ "\n",
+ "runs = 20\n",
+ "sleep_time_ms = 10\n",
+ "total_tokens_all_runs = [0, 0, 0]\n",
+ "\n",
+ "clients = [\n",
+ " AzureOpenAI(\n",
+ " azure_endpoint = apim_resource_gateway_url,\n",
+ " api_key = apim_subscription1_key,\n",
+ " api_version = openai_api_version\n",
+ " ),\n",
+ " AzureOpenAI(\n",
+ " azure_endpoint = apim_resource_gateway_url,\n",
+ " api_key = apim_subscription2_key,\n",
+ " api_version = openai_api_version\n",
+ " ),\n",
+ " AzureOpenAI(\n",
+ " azure_endpoint = apim_resource_gateway_url,\n",
+ " api_key = apim_subscription3_key,\n",
+ " api_version = openai_api_version\n",
+ " )\n",
+ "]\n",
+ "\n",
+ "for i in range(runs):\n",
+ " print(f\"โถ๏ธ Run {i+1}/{runs}:\")\n",
+ "\n",
+ " for j in range(0, 3):\n",
+ " start_time = time.time()\n",
+ "\n",
+ " raw_response = clients[j].chat.completions.with_raw_response.create(\n",
+ " model = openai_model_name,\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a sarcastic, unhelpful assistant.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Can you tell me the time, please?\"}\n",
+ " ],\n",
+ " extra_headers = {\"x-user-id\": \"alex\"}\n",
+ " )\n",
+ "\n",
+ " response_time = time.time() - start_time\n",
+ " print(f\"๐ Subscription {j+1}\")\n",
+ " print(f\"โ {response_time:.2f} seconds\")\n",
+ " print(f\"x-ms-region: \\x1b[1;32m{raw_response.headers.get(\"x-ms-region\")}\\x1b[0m\") # this header is useful to determine the region of the backend that served the request\n",
+ "\n",
+ " response = raw_response.parse()\n",
+ "\n",
+ " if response.usage:\n",
+ " total_tokens_all_runs[j] += response.usage.total_tokens\n",
+ " print(f\"Token usage:\\n Total tokens: {response.usage.total_tokens}\\n Prompt tokens: {response.usage.prompt_tokens}\\n Completion tokens: {response.usage.completion_tokens}\\n Total tokens all runs: {total_tokens_all_runs[j]}\\n\")\n",
+ "\n",
+ " print(f\"๐ฌ {response.choices[0].message.content}\\n\")\n",
+ "\n",
+ " print()\n",
+ "\n",
+ " time.sleep(sleep_time_ms/1000)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### ๐ See the metrics on the Azure Portal\n",
+ "\n",
+ "Open the Application Insights resource, navigate to the Metrics blade, then select the defined namespace (openai). Choose the metric \"Total Tokens\" with a Sum aggregation. Then, apply splitting by 'Subscription Id' to view values for each dimension.\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### ๐ Analyze Application Insights custom metrics with a KQL query\n",
+ "\n",
+ "With this query you can get the custom metrics that were emitted by Azure APIM. Note that it may take a few minutes for data to become available."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "query = \"\\\"\" + \"customMetrics \\\n",
+ "| where name == 'Total Tokens' \\\n",
+ "| extend parsedCustomDimensions = parse_json(customDimensions) \\\n",
+ "| extend clientIP = tostring(parsedCustomDimensions.['Client IP']) \\\n",
+ "| extend apiId = tostring(parsedCustomDimensions.['API ID']) \\\n",
+ "| extend apimSubscription = tostring(parsedCustomDimensions.['Subscription ID']) \\\n",
+ "| extend UserId = tostring(parsedCustomDimensions.['User ID']) \\\n",
+ "| project timestamp, value, clientIP, apiId, apimSubscription, UserId \\\n",
+ "| order by timestamp asc\" + \"\\\"\"\n",
+ "\n",
+ "output = utils.run(f\"az monitor app-insights query --app {app_insights_name} -g {resource_group_name} --analytics-query {query}\",\n",
+ " f\"App Insights query succeeded\", f\"App Insights query failed\")\n",
+ "\n",
+ "table = output.json_data['tables'][0]\n",
+ "df = pd.DataFrame(table.get(\"rows\"), columns = [col.get(\"name\") for col in table.get('columns')])\n",
+ "df['timestamp'] = pd.to_datetime(df['timestamp']).dt.strftime('%H:%M')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### ๐ Plot the custom metrics results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# plot the results\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib as mpl\n",
+ "\n",
+ "mpl.rcParams['figure.figsize'] = [15, 7]\n",
+ "ax = df.plot(kind = 'line', x = 'timestamp', y = 'value', legend = False)\n",
+ "plt.title('Total token usage over time')\n",
+ "plt.xlabel('Time')\n",
+ "plt.ylabel('Tokens')\n",
+ "\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### 4๏ธโฃ Policy 3 - Token Rate Limiting\n",
+ "\n",
+ "Adding rate limiting for subscriptions is a sensible way to limit runaway usage.\n",
+ "\n",
+ "\n",
+ "#### Update deployment using ๐ฆพ Bicep"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "policy_xml_file = \"policy-3.xml\"\n",
+ "bicep_parameters_file = \"params-3.json\"\n",
+ "tokens_per_minute = 500\n",
+ "\n",
+ "bicep_parameters = utils.create_bicep_params(policy_xml_file, bicep_parameters_file, bicep_parameters, [\n",
+ " ('{backend-id}', backend_id),\n",
+ " ('{retry-count}', len(openai_resources) - 1),\n",
+ " ('{tpm}', tokens_per_minute)\n",
+ "])\n",
+ "\n",
+ "# Run the deployment\n",
+ "output = utils.run(f\"az deployment group create --name {deployment_name} --resource-group {resource_group_name} --template-file main.bicep --parameters {bicep_parameters_file}\",\n",
+ " f\"Deployment '{deployment_name}' succeeded\", f\"Deployment '{deployment_name}' failed\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "#### ๐งช Execute multiple runs for each subscription using the Azure OpenAI Python SDK\n",
+ "\n",
+ "We will send requests for each subscription. Adjust the `sleep_time_ms` and the number of `runs` to your test scenario.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import time\n",
+ "from openai import AzureOpenAI\n",
+ "\n",
+ "runs = 20\n",
+ "sleep_time_ms = 10\n",
+ "total_tokens_all_runs = [0, 0, 0]\n",
+ "\n",
+ "clients = [\n",
+ " AzureOpenAI(\n",
+ " azure_endpoint = apim_resource_gateway_url,\n",
+ " api_key = apim_subscription1_key,\n",
+ " api_version = openai_api_version\n",
+ " ),\n",
+ " AzureOpenAI(\n",
+ " azure_endpoint = apim_resource_gateway_url,\n",
+ " api_key = apim_subscription2_key,\n",
+ " api_version = openai_api_version\n",
+ " ),\n",
+ " AzureOpenAI(\n",
+ " azure_endpoint = apim_resource_gateway_url,\n",
+ " api_key = apim_subscription3_key,\n",
+ " api_version = openai_api_version\n",
+ " )\n",
+ "]\n",
+ "\n",
+ "for i in range(runs):\n",
+ " print(f\"โถ๏ธ Run {i+1}/{runs}:\")\n",
+ "\n",
+ " for j in range(0, 3):\n",
+ " start_time = time.time()\n",
+ "\n",
+ " raw_response = clients[j].chat.completions.with_raw_response.create(\n",
+ " model = openai_model_name,\n",
+ " messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a sarcastic, unhelpful assistant.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Can you tell me the time, please?\"}\n",
+ " ],\n",
+ " extra_headers = {\"x-user-id\": \"alex\"}\n",
+ " )\n",
+ "\n",
+ " response_time = time.time() - start_time\n",
+ " print(f\"๐ Subscription {j+1}\")\n",
+ " print(f\"โ {response_time:.2f} seconds\")\n",
+ " print(f\"x-ms-region: \\x1b[1;32m{raw_response.headers.get(\"x-ms-region\")}\\x1b[0m\") # this header is useful to determine the region of the backend that served the request\n",
+ "\n",
+ " response = raw_response.parse()\n",
+ "\n",
+ " if response.usage:\n",
+ " total_tokens_all_runs[j] += response.usage.total_tokens\n",
+ " print(f\"Token usage:\\n Total tokens: {response.usage.total_tokens}\\n Prompt tokens: {response.usage.prompt_tokens}\\n Completion tokens: {response.usage.completion_tokens}\\n Total tokens all runs: {total_tokens_all_runs[j]}\\n\")\n",
+ "\n",
+ " print(f\"๐ฌ {response.choices[0].message.content}\\n\")\n",
+ "\n",
+ " print()\n",
+ "\n",
+ " time.sleep(sleep_time_ms/1000)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### ๐๏ธ Clean up resources\n",
+ "\n",
+ "When you're finished with the lab, you should remove all your deployed resources from Azure to avoid extra charges and keep your Azure subscription uncluttered.\n",
+ "Use the [clean-up-resources notebook](clean-up-resources.ipynb) for that."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/labs/progressive-policies-1/result.png b/labs/progressive-policies-1/result.png
new file mode 100644
index 0000000..3ef4239
Binary files /dev/null and b/labs/progressive-policies-1/result.png differ