fixed ch08 and others

corradocavalli · corradocavalli · commit 109032057552 · 2023-11-17T12:13:30.000+01:00
diff --git a/04-prompt-engineering-fundamentals/1-introduction.ipynb b/04-prompt-engineering-fundamentals/1-introduction.ipynb
@@ -90,6 +90,8 @@
     "## Updated\n",
     "import os\n",
     "from openai import AzureOpenAI\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
     "\n",
     "client = AzureOpenAI(\n",
     "  api_key=os.environ['AZURE_OPENAI_KEY'],  # this is also the default, it can be omitted\n",
diff --git a/06-text-generation-apps/notebook-azure-openai.ipynb b/06-text-generation-apps/notebook-azure-openai.ipynb
@@ -235,6 +235,8 @@
    "source": [
     "import os\n",
     "from openai import AzureOpenAI\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()\n",
     "\n",
     "client = AzureOpenAI(\n",
     "  api_key=os.environ['AZURE_OPENAI_KEY'],  \n",
diff --git a/07-building-chat-applications/notebook-azure-openai.ipynb b/07-building-chat-applications/notebook-azure-openai.ipynb
@@ -708,7 +708,7 @@
       },
       "source": [
         "## Embeddings\n",
-        "This section will show how to retrieve embeddings, and find similarities between words, sentences, and documents. In order to run the following noteboooks you need to deploy a model that uses `text-embedding-ada-002` as base model."
+        "This section will show how to retrieve embeddings, and find similarities between words, sentences, and documents. In order to run the following noteboooks you need to deploy a model that uses `text-embedding-ada-002` as base model and set his deployment name inside .env file."
       ]
     },
     {
@@ -787,7 +787,7 @@
       "outputs": [],
       "source": [
         "text = 'the quick brown fox jumped over the lazy dog'\n",
-        "model=\"<name-of-your-model-using-text-embedding-ada-002>\"\n",
+        "model= os.environ['AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT']\n",
         "client.embeddings.create(input='[text]', model=model).data[0].embedding"
       ]
     },
diff --git a/08-building-search-applications/solution.ipynb b/08-building-search-applications/solution.ipynb
@@ -4,35 +4,33 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The first step is to import the libraries and set the OpenAI API key and endpoint. You'll need to set the following environment variables:\n",
-    "\n",
-    "- `AZURE_OPENAI_API_KEY` - Your OpenAI API key\n",
-    "- `AZURE_OPENAI_ENDPOINT` - Your OpenAI endpoint"
+    "In order to run the following noteboooks, if you haven't done yet, you need to deploy a model that uses `text-embedding-ada-002` as base model and set his deployment name inside .env file as `AZURE_OPENAI_EMBEDDINGS_ENDPOINT`"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
     "import pandas as pd\n",
-    "import openai\n",
-    "from openai.embeddings_utils import cosine_similarity, get_embedding\n",
+    "import numpy as np\n",
+    "from openai import AzureOpenAI\n",
+    "from dotenv import load_dotenv\n",
     "\n",
-    "OPENAI_EMBEDDING_ENGINE = \"text-embedding-ada-002\"\n",
-    "SIMILARITIES_RESULTS_THRESHOLD = 0.75\n",
-    "DATASET_NAME = \"embedding_index_3m.json\"\n",
+    "from sklearn.metrics.pairwise import cosine_similarity\n",
+    "load_dotenv()\n",
     "\n",
-    "openai.api_type = \"azure\"\n",
-    "openai.api_key = os.environ[\"AZURE_OPENAI_API_KEY\"]\n",
-    "openai.api_base = os.environ[\"AZURE_OPENAI_ENDPOINT\"]\n",
-    "openai.api_version = \"2023-07-01-preview\"\n",
+    "client = AzureOpenAI(\n",
+    "  api_key=os.environ['AZURE_OPENAI_KEY'],  # this is also the default, it can be omitted\n",
+    "  api_version = \"2023-05-15\"\n",
+    "  )\n",
     "\n",
-    "OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.environ[\n",
-    "    \"AZURE_OPENAI_EMBEDDING_MODEL_DEPLOYMENT_NAME\"\n",
-    "]"
+    "model = os.environ['AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT']\n",
+    "\n",
+    "SIMILARITIES_RESULTS_THRESHOLD = 0.75\n",
+    "DATASET_NAME = \"embedding_index_3m.json\""
    ]
   },
   {
@@ -44,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -69,7 +67,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,12 +77,12 @@
     "    # create a copy of the dataset\n",
     "    video_vectors = dataset.copy()\n",
     "\n",
-    "    # get the embeddings for the query\n",
-    "    query_embeddings = get_embedding(query, OPENAI_EMBEDDING_ENGINE)\n",
+    "    # get the embeddings for the query    \n",
+    "    query_embeddings = client.embeddings.create(input=query, model=model).data[0].embedding\n",
     "\n",
     "    # create a new column with the calculated similarity for each row\n",
     "    video_vectors[\"similarity\"] = video_vectors[\"ada_v2\"].apply(\n",
-    "        lambda x: cosine_similarity(query_embeddings, x)\n",
+    "        lambda x: cosine_similarity(np.array(query_embeddings).reshape(1,-1), np.array(x).reshape(1,-1))\n",
     "    )\n",
     "\n",
     "    # filter the videos by similarity\n",
@@ -109,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -119,7 +117,7 @@
     "        return f\"https://youtu.be/{video_id}?t={seconds}\"\n",
     "\n",
     "    print(f\"\\nVideos similar to '{query}':\")\n",
-    "    for index, row in videos.iterrows():\n",
+    "    for row in videos.iterrows():\n",
     "        youtube_url = _gen_yt_url(row[\"videoId\"], row[\"seconds\"])\n",
     "        print(f\" - {row['title']}\")\n",
     "        print(f\"   Summary: {' '.join(row['summary'].split()[:15])}...\")\n",
@@ -153,7 +151,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -190,7 +188,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,

Original file line number	Diff line number	Diff line change
`@@ -708,7 +708,7 @@`
`708`	`708`	`},`
`709`	`709`	`"source": [`
`710`	`710`	`"## Embeddings\n",`
`711`		- "This section will show how to retrieve embeddings, and find similarities between words, sentences, and documents. In order to run the following noteboooks you need to deploy a model that uses `text-embedding-ada-002` as base model."
	`711`	+ "This section will show how to retrieve embeddings, and find similarities between words, sentences, and documents. In order to run the following noteboooks you need to deploy a model that uses `text-embedding-ada-002` as base model and set his deployment name inside .env file."
`712`	`712`	`]`
`713`	`713`	`},`
`714`	`714`	`{`
`@@ -787,7 +787,7 @@`
`787`	`787`	`"outputs": [],`
`788`	`788`	`"source": [`
`789`	`789`	`"text = 'the quick brown fox jumped over the lazy dog'\n",`
`790`		`- "model=\"<name-of-your-model-using-text-embedding-ada-002>\"\n",`
	`790`	`+ "model= os.environ['AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT']\n",`
`791`	`791`	`"client.embeddings.create(input='[text]', model=model).data[0].embedding"`
`792`	`792`	`]`
`793`	`793`	`},`