From 50c64497498e671f16a548472680361a3159887e Mon Sep 17 00:00:00 2001 From: Pete Rodriguez Date: Mon, 6 Jan 2025 11:24:26 -0600 Subject: [PATCH 1/7] Changed database pods to be internal to VNet and updated AKS version --- .../Resources/ARM-Templates/KubernetesCluster/parameters.json | 2 +- .../Student/Resources/HelmCharts/MySQL57/values.yaml | 2 +- .../Student/Resources/HelmCharts/Oracle184/values.yaml | 2 +- .../Student/Resources/HelmCharts/Oracle21c/values.yaml | 2 +- .../Student/Resources/HelmCharts/PostgreSQL116/values.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/033-OSSDatabaseMigration/Student/Resources/ARM-Templates/KubernetesCluster/parameters.json b/033-OSSDatabaseMigration/Student/Resources/ARM-Templates/KubernetesCluster/parameters.json index 72d35151dd..8886a04055 100644 --- a/033-OSSDatabaseMigration/Student/Resources/ARM-Templates/KubernetesCluster/parameters.json +++ b/033-OSSDatabaseMigration/Student/Resources/ARM-Templates/KubernetesCluster/parameters.json @@ -40,7 +40,7 @@ "value": "ossdbmigration-dns" }, "kubernetesVersion": { - "value": "1.25.5" + "value": "1.31.2" }, "networkPlugin": { "value": "azure" diff --git a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/MySQL57/values.yaml b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/MySQL57/values.yaml index 4ce5bad0bd..8352bcc95b 100644 --- a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/MySQL57/values.yaml +++ b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/MySQL57/values.yaml @@ -18,7 +18,7 @@ image: tag: "5.7.32" service: - type: LoadBalancer + type: ClusterIP port: 3306 protocol: TCP diff --git a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle184/values.yaml b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle184/values.yaml index 6b81c2ca93..53408b6721 100644 --- a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle184/values.yaml +++ b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle184/values.yaml @@ -18,7 +18,7 @@ image: tag: "18.4.0-xe" service: - type: LoadBalancer + type: ClusterIP port: 1521 protocol: TCP diff --git a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle21c/values.yaml b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle21c/values.yaml index 53323ea3e3..b90078f7cc 100644 --- a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle21c/values.yaml +++ b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/Oracle21c/values.yaml @@ -18,7 +18,7 @@ image: tag: "21.3.0-xe" service: - type: LoadBalancer + type: ClusterIP port: 1521 protocol: TCP diff --git a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/PostgreSQL116/values.yaml b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/PostgreSQL116/values.yaml index eb358ca3e1..a42e92f082 100644 --- a/033-OSSDatabaseMigration/Student/Resources/HelmCharts/PostgreSQL116/values.yaml +++ b/033-OSSDatabaseMigration/Student/Resources/HelmCharts/PostgreSQL116/values.yaml @@ -18,7 +18,7 @@ image: tag: "11.6" service: - type: LoadBalancer + type: ClusterIP port: 5432 protocol: TCP From 7fe06c5b27dcf8ebcbfb11e9c6092d54c35d653a Mon Sep 17 00:00:00 2001 From: Devanshi Thakar <33441411+devanshithakar12@users.noreply.github.com> Date: Mon, 28 Apr 2025 18:23:35 -0700 Subject: [PATCH 2/7] Got rid of any mention of gpt-4 --- 066-OpenAIFundamentals/Student/Challenge-00.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/066-OpenAIFundamentals/Student/Challenge-00.md b/066-OpenAIFundamentals/Student/Challenge-00.md index ab109bd8dd..703895e7cc 100644 --- a/066-OpenAIFundamentals/Student/Challenge-00.md +++ b/066-OpenAIFundamentals/Student/Challenge-00.md @@ -144,7 +144,7 @@ Now we will deploy the needed large language models from Azure OpenAI. - Navigate to the [AI Foundry](https://ai.azure.com) and click on **Hub Overview**. In the Connected resources, you should see Azure OpenAI. - On the left navigation bar, click on Deployments. - Deploy the following models in your Azure OpenAI resource. - - `gpt-4` or `gpt-4o` + - `gpt-4o` - `gpt-35-turbo` - `text-embedding-ada-002` From 6c87e929dbcb8ec386282c9ae167c3afdf6f61fe Mon Sep 17 00:00:00 2001 From: Devanshi Thakar <33441411+devanshithakar12@users.noreply.github.com> Date: Mon, 28 Apr 2025 18:25:51 -0700 Subject: [PATCH 3/7] Delete 066-OpenAIFundamentals/Student/Resources/notebooks/CH-02-ModelComparison.ipynb --- .../notebooks/CH-02-ModelComparison.ipynb | 1061 ----------------- 1 file changed, 1061 deletions(-) delete mode 100644 066-OpenAIFundamentals/Student/Resources/notebooks/CH-02-ModelComparison.ipynb diff --git a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-02-ModelComparison.ipynb b/066-OpenAIFundamentals/Student/Resources/notebooks/CH-02-ModelComparison.ipynb deleted file mode 100644 index 99e5c73133..0000000000 --- a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-02-ModelComparison.ipynb +++ /dev/null @@ -1,1061 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "# Challenge 02 - OpenAI Models & Capabilities" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "## Introduction\n", - "\n", - "In this challenge, you will learn about the different capabilities of OpenAI models and learn how to choose the best model for your use case.\n", - "\n", - "You are going to compare GPT-3.5 to GPT-4 model in this Challenge. If you do not have GPT-4 access, you can compare the legacy models if they are deployed, or go through this challenge conceptually to understand how to best pick a model from the ones you have deployed as well as the ones in the model catalog.\n", - "\n", - "In a world where the availability and development of models are always changing, the model we compare may change over time. But we encourage you to understand the general concepts and material in this Challenge because the comparison techniques utilized can be applicable to scenarios where you are comparing Large Language Models.\n", - "\n", - "Questions you will be able to answer by the end of this challenge:\n", - "\n", - "* How do responses differ for each model?\n", - "* What are ways to benchmark the performance of models? " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "## 1. Overview on finding the right model for you\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 1.1 Model Families\n", - "\n", - "Azure OpenAI provides access to many different models, grouped by family and capability. A model family typically associates models by their intended task. \n", - "\n", - "Model families currently available as of _Dec 1, 2023_ in Azure OpenAI includes GPT-4, GPT-3.5, Embeddings, DALL-E, and Whisper. Please reference this link for more information: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models\n", - "\n", - "For GPT-3 and other models retiring in July 2024, see [Azure OpenAI Service legacy models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/legacy-models)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 1.2 Model Capacities\n", - "#### GPT-4\n", - "GPT-4 can solve difficult problems with greater accuracy than any of OpenAI's previous models. Like GPT-3.5 Turbo, GPT-4 is optimized for chat and works well for traditional completions tasks. Use the Chat Completions API to use GPT-4.\n", - "\n", - "#### GPT-3.5\n", - "GPT-3.5 models can understand and generate natural language or code. The most capable and cost effective model in the GPT-3.5 family is GPT-3.5 Turbo, which has been optimized for chat and works well for traditional completions tasks as well. GPT-3.5 Turbo is available for use with the Chat Completions API. GPT-3.5 Turbo Instruct has similar capabilities to text-davinci-003 using the Completions API instead of the Chat Completions API. We recommend using GPT-3.5 Turbo and GPT-3.5 Turbo Instruct over legacy GPT-3.5 and GPT-3 models.\n", - "\n", - "`gpt-35-turbo`\n", - "\n", - "`gpt-35-turbo-16k`\n", - "\n", - "`gpt-35-turbo-instruct`\n", - "\n", - "You can see the token context length supported by each model in the [model summary table](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#model-summary-table-and-region-availability).\n", - "\n", - "#### Embeddings \n", - "The previous embeddings models have been consolidated into the following new replacement model:\n", - "\n", - "`text-embedding-ada-002`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "\n", - "[Azure OpenAI models](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models) \n", - "\n", - "\n", - "| Models | Description |\n", - "| --- | --- |\n", - "| GPT-4 | A set of models that improve on GPT-3.5 and can understand and generate natural language and code. | \n", - "| GPT-3.5 | A set of models that improve on GPT-3 and can understand and generate natural language and code. | \n", - "| Embeddings | A set of models that can convert text into numerical vector form to facilitate text similarity. | \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 1.3 Pricing Details\n", - "\n", - "For the most up-to-date information, check out the Azure OpenAI [pricing page](https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 1.4 Quotas and Limits\n", - "\n", - "*The below limits are subject to change. We anticipate that you will need higher limits as you move toward production and your solution scales. When you know your solution requirements, please reach out to us by applying for a quota increase here: https://aka.ms/oai/quotaincrease\n", - "\n", - "|Limit Name\t|Limit Value|\n", - "|---|---|\n", - "|OpenAI resources per region per Azure subscription|\t30|\n", - "| Default DALL-E 2 quota limits| 2 concurrent requests |\n", - "| Default DALL-E 3 quota limits | 2 capacity units (6 requests per minute)|\n", - "|Maximum prompt tokens per request| Varies per model, see [Azure OpenAI Service models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models) |\n", - "|Max fine-tuned model deployments|5|\n", - "|Total number of training jobs per resource|\t100|\n", - "|Max simultaneous running training jobs per resource|\t1|\n", - "|Max training jobs queued\t|20|\n", - "|Max Files per resource\t|30|\n", - "|Total size of all files per resource\t|1 GB|\n", - "|Max training job time (job will fail if exceeded)\t|720 hours|\n", - "|Max training job size (tokens in training file) x (# of epochs)\t|2 Billion|\n", - "|Max size of all files per upload (Azure OpenAI on your data)\t|16 MB|" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 1.5 Model Selection\n", - "\n", - "Here is some general guidance on well-suited scenarios that tend to differentiate models. Note that these are not hard and fast rules, and oftentimes experimentation and benchmarking are important to making the best decision for your solution.\n", - "\n", - "|Model|Use Cases|\n", - "|---|---|\n", - "|GPT-3.5| Faster/cheaper app performance;
More quota allocated by default|\n", - "|GPT-4| More advanced reasoning or logical processing needed;
32k token window is an absolut must;
Multilingual proficiency needed;
No hard requirement on low latency|" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 1.6 Model Selection Best Practices\n", - "Our recommendation is for users to start with GPT-3.5 Turbo if identify with the use case listed above, and move to GPT-4 if needed.\n", - "\n", - "Once you have a prototype working, you can then optimize your model choice with the best latency/performance balance for your application." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "## 2. Let's Start Implementation" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "If you don't already have the OpenAI, Python-dotenv, plotly, or scikit-learn packages installed on your compute, the following cells will install them." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1685909662455 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "import openai\n", - "import os\n", - "import json\n", - "from dotenv import load_dotenv, find_dotenv\n", - "import pandas as pd\n", - "load_dotenv(find_dotenv())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Set up your environment to access your OpenAI keys. Refer to your OpenAI resource in the Azure Portal to retrieve information regarding your OpenAI endpoint and keys.\n", - "\n", - "For security purposes, store your sensitive information in a .env file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686331271142 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", - "assert API_KEY, \"ERROR: Azure OpenAI Key is missing\"\n", - "openai.api_key = API_KEY\n", - "\n", - "RESOURCE_ENDPOINT = os.getenv(\"OPENAI_API_BASE\",\"\").strip()\n", - "assert RESOURCE_ENDPOINT, \"ERROR: Azure OpenAI Endpoint is missing\"\n", - "assert \"openai.azure.com\" in RESOURCE_ENDPOINT.lower(), \"ERROR: Azure OpenAI Endpoint should be in the form: \\n\\n\\t.openai.azure.com\"\n", - "\n", - "openai.api_base = RESOURCE_ENDPOINT\n", - "openai.api_type = os.getenv(\"OPENAI_API_TYPE\")\n", - "openai.api_version = os.getenv(\"OPENAI_API_VERSION\")\n", - "\n", - "chat_model=os.getenv(\"CHAT_MODEL_NAME\")\n", - "chat_model2=os.getenv(\"CHAT_MODEL_NAME2\")\n", - "text_model=os.getenv(\"EMBEDDING_MODEL_NAME\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 2.0 Helper Functions\n", - "Throughout this course, we will use OpenAI's `gpt-3.5-turbo` and `gpt-4` models and the [chat completions endpoint](https://platform.openai.com/docs/guides/chat). \n", - "\n", - "This helper function will make it easier to use prompts and look at the generated outputs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "**timer wrapper** helps us monitor and compare the latency of each model.\n", - "\n", - "**get_chat_completion** helps create the OpenAI response using the chat model of your choice.\n", - "\n", - "**get_completion_from_messages** helps create the OpenAI response using the chat model of your choice, enabling chat history." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686334202300 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "import functools\n", - "import time\n", - "\n", - "def timer(func):\n", - " @functools.wraps(func)\n", - " def wrapper(*args, **kwargs):\n", - " start_time = time.perf_counter()\n", - " value = func(*args, **kwargs)\n", - " end_time = time.perf_counter()\n", - " run_time = end_time - start_time\n", - " print(\"Finished {} in {} secs\".format(repr(func.__name__), round(run_time, 3)))\n", - " return value[0], value[1], round(run_time, 3)\n", - "\n", - " return wrapper" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686334262004 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "@timer\n", - "def get_chat_completion(prompt, model):\n", - " messages = [{\"role\": \"user\", \"content\": prompt}]\n", - " response = openai.ChatCompletion.create(\n", - " engine=model,\n", - " messages=messages,\n", - " temperature=0, # this is the degree of randomness of the model's output\n", - " max_tokens = 200,\n", - " top_p = 1.0,\n", - " )\n", - " return response.choices[0].message[\"content\"],response['usage']['total_tokens']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686334263077 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "@timer\n", - "def get_completion_from_messages(messages, model, temperature=0):\n", - " response = openai.ChatCompletion.create(\n", - " engine=model,\n", - " messages=messages,\n", - " temperature=temperature, # this is the degree of randomness of the model's output\n", - " )\n", - " #print(str(response.choices[0].message))\n", - " return response.choices[0].message[\"content\"],response['usage']['total_tokens']" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 2.1 Summarize Text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686334268062 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "model_pricing = pd.DataFrame(columns=['model', 'price', 'time'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686334278657 - } - }, - "outputs": [], - "source": [ - "text = f\"\"\"\n", - "The Olympic Games Tokyo 2020 reached a global broadcast audience of 3.05 billion people, according to independent research conducted on behalf of the International Olympic Committee (IOC). Official coverage on Olympic broadcast partners\\' digital platforms alone generated 28 billion video views in total – representing a 139 per cent increase compared with the Olympic Games Rio 2016 and underlining the changing media landscape and Tokyo 2020\\'s designation as the first streaming Games and the most watched Olympic Games ever on digital platforms.Sony and Panasonic partnered with NHK to develop broadcasting standards for 8K resolution television, with a goal to release 8K television sets in time for the 2020 Summer Olympics. In early 2019, Italian broadcaster RAI announced its intention to deploy 8K broadcasting for the Games. NHK broadcast the opening and closing ceremonies, and coverage of selected events in 8K. Telecom company NTT Docomo signed a deal with Finland\\'s Nokia to provide 5G-ready baseband networks in Japan in time for the Games.The Tokyo Olympics were broadcast in the United States by NBCUniversal networks, as part of a US$4.38 billion agreement that began at the 2014 Winter Olympics in Sochi. The United States Olympic & Paralympic Committee asserted that a \"right of abatement\" clause in the contract was triggered by the delay of the Games to 2021, requiring the IOC to \"negotiate in good faith an equitable reduction in the applicable broadcast rights payments\" by NBC, which remains one of IOC\\'s biggest revenue streams. According to NBCUniversal CEO Jeff Shell, the Tokyo games could be the most profitable Olympics in NBC\\'s history. The Tokyo games were NBC\\'s first Olympics broadcast under current president Susan Rosner Rovner.In Europe, this was the first Summer Olympics under the IOC\\'s exclusive pan-European rights deal with Eurosport, which began at the 2018 Winter Olympics and is contracted to run through 2024. The rights for the 2020 Summer Olympics covered almost all of Europe; a pre-existing deal with a marketer excludes Russia. Eurosport planned to sub-license coverage to free-to-air networks in each territory, and other channels owned by Discovery, Inc. subsidiaries. In the United Kingdom, these were set to be the last Games with rights owned primarily by the BBC, although as a condition of a sub-licensing agreement due to carry into the 2022 and 2024 Games, Eurosport holds exclusive pay television rights. In France, these were the last Games whose rights are primarily owned by France Télévisions. Eurosport debuted as pay television rightsholder, after Canal+ elected to sell its pay television rights as a cost-saving measure.In Canada, the 2020 Games were shown on CBC/Radio-Canada platforms, Sportsnet, TSN and TLN. In Australia, they were aired by Seven Network. In the Indian subcontinent, they were aired by Sony Pictures Networks India (SPN).\n", - "\"\"\"\n", - "prompt = f\"\"\"\n", - "Summarize the text delimited by triple backticks into a single sentence.\n", - "```{text}```\n", - "\"\"\"\n", - "\n", - "gpt35_response, gpt35_price, gpt35_time = get_chat_completion(prompt, model=chat_model)\n", - "gpt4_response, gpt4_price, gpt4_time = get_chat_completion(prompt, model=chat_model2)\n", - "\n", - "print(f\"GPT-3.5 Response: {gpt35_response}\\n\")\n", - "print(f\"GPT-4 Response: {gpt4_response}\\n\")\n", - "\n", - "new_rows = pd.DataFrame([{'model': 'gpt3.5', 'price': gpt35_price, 'time': gpt35_time},\n", - " {'model': 'gpt4', 'price': gpt4_price, 'time': gpt4_time}])\n", - "pricing = pd.concat([model_pricing, new_rows], ignore_index=True)\n", - "print(pricing)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Student Task #1:\n", - "With tactics learned in CH1, edit the prompt to get more concise answer from the assistant. Do you find any difference in the result?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# Edit the prompt to get more concise answer from assistant" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 2.2 Summarization for a targeted audience" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686332538379 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "prompt = f\"\"\"\n", - "Summarize the text delimited by triple backticks into a single sentence for 7-year-old to understand.\n", - "```{text}```\n", - "\"\"\"\n", - "\n", - "gpt35_response, gpt35_price, gpt35_time = get_chat_completion(prompt, model=chat_model)\n", - "gpt4_response, gpt4_price, gpt4_time = get_chat_completion(prompt, model=chat_model2)\n", - "print(f\"GPT-3.5 Response: {gpt35_response}\\n\")\n", - "print(f\"GPT-4 Response: {gpt4_response}\\n\")\n", - "\n", - "new_rows = pd.DataFrame([{'model': 'gpt3.5', 'price': gpt35_price, 'time': gpt35_time},\n", - " {'model': 'gpt4', 'price': gpt4_price, 'time': gpt4_time}])\n", - "pricing = pd.concat([model_pricing, new_rows], ignore_index=True)\n", - "print(pricing)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Student Challenge #2:\n", - "Edit the prompt to summarize the text for eye-catching newspaper title. Compare different results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# Edit the prompt to summarize the text for eye-catching newspaper title" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 2.3 Summarize Cause & Effect" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686332587257 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "prompt = f\"\"\"\n", - "Summarize the major event's cause and effect for the text delimited by triple backticks into a single sentence less than 50 words.\n", - "```{text}```\n", - "\"\"\"\n", - "\n", - "gpt35_response, gpt35_price, gpt35_time = get_chat_completion(prompt, model=chat_model)\n", - "gpt4_response, gpt4_price, gpt4_time = get_chat_completion(prompt, model=chat_model2)\n", - "print(f\"GPT-3.5 Response: {gpt35_response}\\n\")\n", - "print(f\"GPT-4 Response: {gpt4_response}\\n\")\n", - "\n", - "new_rows = pd.DataFrame([{'model': 'gpt3.5', 'price': gpt35_price, 'time': gpt35_time},\n", - " {'model': 'gpt4', 'price': gpt4_price, 'time': gpt4_time}])\n", - "pricing = pd.concat([model_pricing, new_rows], ignore_index=True)\n", - "print(pricing)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Student Task #3: Model Comparison\n", - "Use the model comparison chart to briefly summarize your findings after comparing different model output & time taken. eg. GPT-4: Performance (+++), time (+). You may also leverage other python packages to visualize your findings.\n", - "\n", - "|Model| Performance |Time|\n", - "|---|---|---|\n", - "|GPT-3.5|||\n", - "|GPT-4|||" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - " #### Student Task #4: Text Classification\n", - " Edit the prompt to make the models generate key topic categories for the text. Compare different model performance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# Edit the prompt to make the models generate key topic categories for the text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Student Task #5:\n", - "Edit the prompt to make the models generate more precise results. Compare different model performance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# Edit the prompt to make the models generate more precise results. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Student Task #6: Model Comparison\n", - "\n", - "Write code to create two bar charts comparing the **price** and **time for completion** between the models. We recommend using the `matplotlib.pyplot` library for making visualizations.\n", - "\n", - "Instructions for completion:\n", - "\n", - "* Utilize the `model_comparison` dataframe to calculate the averages of price and time for each model\n", - "* Produce the bar chart in a currency amount. Note that the `price` column in the `model_comparison` dataframe is in the unit of tokens. Refer to the Azure [OpenAI pricing page](https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/) to convert the units." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "\"\"\" STUDENT TASK \"\"\"\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "### 1. Bar chart to compare pricing\n", - "\n", - "\n", - "### 2. Bar chart to compare time for completion" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 2.4 Generate Nick Names" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Student Task #7:\n", - "Use different models to create nick names for players from examples words. Compare different model performance. (You can set the temperature value high to increase randomness and more innovative responses.)\n", - "\n", - "Player description: The champion of Men's 100 metre freestyle swimming. Seed words: fast, strong, talented.Nick names: Swimming Genius, Dark Horse, 100-Metre-Freestyle Killer\n", - "\n", - "Player description: The champion of Women Figure Skating. Seed words: elegant, talented, soft." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1685916265011 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# Write your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Model Comparison\n", - "|Model| Performance |Time|Tokens|Pricing |\n", - "|---|---|---|---|---|\n", - "|GPT-3.5|||||\n", - "|GPT-4||||||" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "### 2.5 Embeddings\n", - "This section focuses on how to retrieve embeddings using different embedding models, and find similarity between documents. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "#### Student Task #8:\n", - "Compare the summaries of two swimming games at the 2020 Summer Olympics using the data provided below.\n", - "\n", - "See whether there are differences using different embedding models to compare." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686117865502 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "from openai.embeddings_utils import get_embedding, cosine_similarity" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686117698204 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "game_summary = [\n", - " \"The mixed 100 metre medley relay event at the 2020 Summer Olympics was held in 2021 at the Tokyo Aquatics Centre. These Games marked the first time to feature a mixed-gender swimming event in the program. Each 4-person team features two male and two female swimmers in no particular order. The medals for the competition were presented by Kirsty Coventry IOC Executive Board Member, Zimbabwe; Olympian, 2 Gold Medals, 4 Silver Medals, 1 Bronze Medal, and the medalists bouquets were presented by Errol Clarke, FINA Bureau Member; Barbados.\",\n", - " \"The men's 200 metre breaststroke event at the 2020 Summer Olympics was held from 27 to 29 July 2021 at the Tokyo Aquatics Centre. It was the event's twenty-sixth consecutive appearance, having been held at every edition since 1908.\"\n", - "]\n", - "\n", - "game_highlight = [\n", - " 'The 2020 Summer Olympics featured the first ever mixed-gender swimming event, the 100 metre medley relay. Medals were presented by Kirsty Coventry and bouquets by Errol Clarke.',\n", - " \"The men's 200 metre breaststroke event was held at the 2020 Summer Olympics in Tokyo, making it the event's 26th consecutive appearance since 1908.\"\n", - "]\n", - "\n", - "olympics_game_df = pd.DataFrame({\"summary\":game_summary, \"qualification\":game_highlight})\n", - "\n", - "olympics_game_df.head() " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686117707487 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "@timer\n", - "def get_embedding(text, model=text_model):\n", - " response = openai.Embedding.create(\n", - " input=text,\n", - " engine=model\n", - " )\n", - " return response[\"data\"][0][\"embedding\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "gather": { - "logged": 1686117710151 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "article1_embedding = get_embedding(text=olympics_game_df.summary.iloc[0])\n", - "article2_embedding = get_embedding(text=olympics_game_df.summary.iloc[1])\n", - "print(cosine_similarity(article1_embedding, article2_embedding))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "## Conclusion\n", - "\n", - "In this Challenge, you learned about techniques to compare different types of models from Azure OpenAI. Although we recommended using GPT-3.5 and GPT-4, these methods can be applied to other models as well to determine the best solution for your use case. In Challenge 3, you will learn how to work with larger amounts of data." - ] - } - ], - "metadata": { - "kernel_info": { - "name": "python310-sdkv2" - }, - "kernelspec": { - "display_name": "Python 3.10 - SDK v2", - "language": "python", - "name": "python310-sdkv2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - }, - "microsoft": { - "host": { - "AzureML": { - "notebookHasBeenCompleted": true - } - }, - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file From 40fec1aec1138e18bd46d10f494890a1be5d4971 Mon Sep 17 00:00:00 2001 From: Devanshi Thakar <33441411+devanshithakar12@users.noreply.github.com> Date: Mon, 28 Apr 2025 18:26:04 -0700 Subject: [PATCH 4/7] Delete 066-OpenAIFundamentals/Student/Resources/notebooks/CH-05-ResponsibleAI.ipynb --- .../notebooks/CH-05-ResponsibleAI.ipynb | 1296 ----------------- 1 file changed, 1296 deletions(-) delete mode 100644 066-OpenAIFundamentals/Student/Resources/notebooks/CH-05-ResponsibleAI.ipynb diff --git a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-05-ResponsibleAI.ipynb b/066-OpenAIFundamentals/Student/Resources/notebooks/CH-05-ResponsibleAI.ipynb deleted file mode 100644 index de78389bed..0000000000 --- a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-05-ResponsibleAI.ipynb +++ /dev/null @@ -1,1296 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "e3964eae", - "metadata": {}, - "source": [ - "# Challenge 5: Responsible AI" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "132f6f55", - "metadata": {}, - "source": [ - "As LLMs grow in popularity and use around the world, the need to manage and monitor their outputs becomes increasingly important. In this challenge, you will learn how to evaluate the outputs of LLMs and how to identify and mitigate potential biases in the model.\n", - "\n", - "Questions you should be able to answer by the end of this challenge:\n", - "- How can you leverage content filtering? \n", - "- What are ways to evaluate truthfulness and reduce hallucinations?\n", - "- How can you identify and mitigate bias in your model?\n", - "\n", - "Sections in this Challenge:\n", - "\n", - "1. Identifying harms and detecting Personal Identifiable Information (PII)\n", - "1. Evaluating truthfulness using Ground Truth Datasets\n", - "1. Evaluating truthfulness using GPT without Ground Truth Datasets\n", - "\n", - "Resources:\n", - "- [Overview of Responsible AI practices for Azure OpenAI models](https://learn.microsoft.com/en-us/legal/cognitive-services/openai/overview)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1fdf2ed6", - "metadata": {}, - "source": [ - "## 1. Content filtering, Content Safety, and Personal Identifiable Information (PII) detection" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1e860826", - "metadata": {}, - "source": [ - "The four stages of the Responsible AI recommendations when using OpenAI are to identify, measure, mitigate, and operate harms. In this section, we will focus on identifying harms.\n", - "\n", - "This step has the goal of identifying potential harms so you can effectively mitigate them. It's important to remember that identifying harms is highly dependent on the context. For example, a model that is used to generate text for a children's book will have different harms than a model that is used to generate text for a news article. Language will also have different meaning in different contexts, so an identification framework should be flexible enough to adapt to various situations.\n", - "\n", - "We present three tools to identifying harms:\n", - "- Azure Cognitive Services Content Filtering\n", - "- Azure AI Content safety\n", - "- PII detection via OpenAI Plug-ins" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b5a13fe6", - "metadata": {}, - "source": [ - "### 1.1 Azure Cognitive Services Content Filtering\n", - "\n", - "From [Azure documentation](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/content-filter): \n", - "\n", - " Azure OpenAI Service includes a content management system that works alongside core models to filter content. This system works by running both the input prompt and generated content through an ensemble of classification models aimed at detecting misuse. \n", - "\n", - "You should evaluate all potential harms carefully and add scenario-specific mitigation as needed. For example, you may want to filter out content that is offensive, profane, sexually explicit, or hateful.\n", - "\n", - "**Knowledge Check #1**:\n", - "\n", - "To assess your understanding of the concept of content filtering, answer the following questions based on the documentation:\n", - "\n", - "* True or False: If you make a streaming completions request for multiple responses, the content filter will evaluate each response individually and return only the ones that pass.\n", - "* True or False: the `finish_reason` parameter will be returned on every response from the content filter.\n", - "* True or False: If the content filtering system is down, you will not be able to receive results about your request." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "67c23d99", - "metadata": {}, - "source": [ - "### 1.2 Azure AI Content Safety (Preview)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c9e0d7a6", - "metadata": {}, - "source": [ - "The [Azure AI Content Safety](https://learn.microsoft.com/en-us/azure/cognitive-services/content-safety/overview) was created to help organizations responsible manage and moderate user- and AI-generated content. It is a managed service that provides a scalable, low-latency, and cost-effective content moderation solution for your image and text content. It is designed to help you detect potentially unsafe content, including hate speech, violence, sexually explicit material, and self-harm.\n", - "\n", - "You can read more about the service in this [Microsoft article](https://techcommunity.microsoft.com/t5/ai-cognitive-services-blog/introducing-azure-ai-content-safety-helping-organizations-to/ba-p/3825744).\n", - "\n", - "**Knowledge Check #2**:\n", - "\n", - "Check your understanding of the AI Content Safety Service by answering the following questions:\n", - "\n", - "* True or False: The Text Moderation API is designed to support over 100 languages as input.\n", - "* True or False: The AI Content Safety Service has a feature to monitor activity statistics of your application.\n", - "* True or False: The Azure AI Content Safety Studio and the API have different risk scores (severity levels) across the categories of harm.\n", - "* True or False: You can only customize severity thresholds through the API.\n", - "* True or False: The API always returns a severity level for all four content categories." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "99f9e49d", - "metadata": {}, - "source": [ - "To run the example, first install some packages and load your environment variables from a `.env` file.\n", - "\n", - "**NOTE:** The openai-python library support for Azure OpenAI is in preview. We have specified the API Preview version below.\n", - "\n", - "`os.getenv()` for the endpoint and key assumes that you are using environment variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "220b62a1", - "metadata": { - "gather": { - "logged": 1694716972271 - } - }, - "outputs": [], - "source": [ - "import os\n", - "import openai\n", - "from dotenv import load_dotenv, find_dotenv\n", - "load_dotenv(find_dotenv())\n", - "\n", - "API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", - "assert API_KEY, \"ERROR: Azure OpenAI Key is missing\"\n", - "openai.api_key = API_KEY\n", - "RESOURCE_ENDPOINT = os.getenv(\"OPENAI_API_BASE\",\"\").strip()\n", - "CHAT_MODEL = os.getenv(\"CHAT_MODEL_NAME\")\n", - "assert RESOURCE_ENDPOINT, \"ERROR: Azure OpenAI Endpoint is missing\"\n", - "assert \"openai.azure.com\" in RESOURCE_ENDPOINT.lower(), \"ERROR: Azure OpenAI Endpoint should be in the form: \\n\\n\\t.openai.azure.com\"\n", - "openai.api_base = RESOURCE_ENDPOINT\n", - "openai.api_type = os.environ['OPENAI_API_TYPE']\n", - "CHAT_INSTRUCT_MODEL = os.getenv(\"CHAT_INSTRUCT_MODEL\")\n", - "openai.api_version = \"2023-06-01-preview\" # API version required to test out Annotations preview" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4fac5b67", - "metadata": {}, - "source": [ - "Below is an example OpenAI call using the Preview version which enables [Annotations](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/content-filter#annotations-preview). Replace the input prompt with different text to see how the annotations change." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ee9a8bad", - "metadata": { - "gather": { - "logged": 1694716864019 - } - }, - "outputs": [], - "source": [ - "pii_prompt = \"{Example prompt where a severity level of low is detected}\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55521442", - "metadata": {}, - "outputs": [], - "source": [ - "response = openai.Completion.create(\n", - " engine=CHAT_MODEL,\n", - " prompt=pii_prompt \n", - " # Content that is detected at severity level medium or high is filtered, \n", - " # while content detected at severity level low isn't filtered by the content filters.\n", - ")\n", - "print(response)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8f314ff6", - "metadata": {}, - "source": [ - "### 1.3 Checking for PII data" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "44894b31", - "metadata": {}, - "source": [ - "Plugins are chat extensions designed specifically for language models like ChatGPT, enabling them to access up-to-date information, run computations, or interact with third-party services in response to a user's request. They unlock a wide range of potential use cases and enhance the capabilities of language models.\n", - "\n", - "The below function, `screen_text_for_pii`, can be helpful if you want to avoid uploading sensitive or private documents to a database unintentionally.\n", - "\n", - "This feature is not foolproof and may not catch all instances of personally identifiable information. Use this feature with caution and verify its effectiveness for your specific use case. You can read more about the background of this function from OpenAI [here](https://github.com/openai/chatgpt-retrieval-plugin/tree/main#plugins).\n", - "\n", - "For other ways to ensure your data is secure when using OpenAI, check out ways to [configure the OpenAI service with managed identities](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/managed-identity).\n", - "\n", - "Read through the function `screen_text_for_pii` in the cell below to understand how it works. You can replace the input text with information relevant to your use case." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae37954e", - "metadata": { - "gather": { - "logged": 1694716864051 - } - }, - "outputs": [], - "source": [ - "def get_completion_from_messages(messages, model=CHAT_MODEL, temperature=0):\n", - " response = openai.ChatCompletion.create(\n", - " engine=model,\n", - " messages=messages,\n", - " temperature=temperature, # this is the degree of randomness of the model's output\n", - " )\n", - " return response.choices[0].message[\"content\"]\n", - "\n", - "def screen_text_for_pii(text: str) -> bool:\n", - " # This prompt is just an example, change it to fit your use case\n", - " messages = [\n", - " {\n", - " \"role\": \"system\",\n", - " \"content\": f\"\"\"\n", - " You can only respond with the word \"True\" or \"False\", where your answer indicates whether the text in the user's message contains PII.\n", - " Do not explain your answer, and do not use punctuation.\n", - " Your task is to identify whether the text extracted from your company files\n", - " contains sensitive PII information that should not be shared with the broader company. Here are some things to look out for:\n", - " - An email address that identifies a specific person in either the local-part or the domain\n", - " - The postal address of a private residence (must include at least a street name)\n", - " - The postal address of a public place (must include either a street name or business name)\n", - " - Notes about hiring decisions with mentioned names of candidates. The user will send a document for you to analyze.\n", - " \"\"\",\n", - " },\n", - " {\"role\": \"user\", \"content\": text},\n", - " ]\n", - "\n", - " completion = get_completion_from_messages(messages)\n", - " \n", - " if completion.startswith(\"True\"):\n", - " return True\n", - "\n", - " return False" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "073cfbcb-dad4-47ba-8384-30a5d7a215b2", - "metadata": { - "gather": { - "logged": 1694716864099 - }, - "jupyter": { - "outputs_hidden": false, - "source_hidden": false - }, - "nteract": { - "transient": { - "deleting": false - } - } - }, - "outputs": [], - "source": [ - "# Optional: test out the screening for PII using input data\n", - "text = \"INPUT YOUR TEXT HERE\"\n", - "screen_text_for_pii(text)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "3cadcf88", - "metadata": {}, - "source": [ - "## 2. Evaluating truthfulness using Ground Truth data\n", - "\n", - "In this section, we will focus on evaluating truthfulness in model outputs. Model hallucinations is a common enough problem in using LLMs that it is important to evaluate whether the model is generating responses based on data rather than making up information. The goal is to improve truthfulness in results to make your model more consistent and reliable for production.\n", - "\n", - "This section will focus on how to evaluate your model when you have access to [Ground Truth](https://en.wikipedia.org/wiki/Ground_truth) data. This will allow us to compare the model's output to the correct answer. In the next section, we will focus on how to evaluate your model when you do not have access to Ground Truth data.\n", - "\n", - "When we use Ground Truth data, we can deduce a numerical representation of how similar the predicted answer is to the correct one using various metrics. You will also have the opportunity to identify and implement additional metrics to evaluate the use case in this section.\n", - "\n", - "We will evaluate models using datasets from Hugging Face as well as Hugging Face's [Evaluate library](https://huggingface.co/docs/evaluate/index).\n", - "\n", - "We will also be utilizing LangChain, which has a package (QAEvalChain) for this specific purpose. [Read more](https://python.langchain.com/en/latest/use_cases/evaluation/question_answering.html) about how Evaluation is implemented by LangChain. You may have heard of LangChain and Semantic Kernel. LangChain is a third-party, open-source framework that you can use to develop applications that are powered by language models. LangChain makes the complexities of working and building with AI models easier by providing the pipeline orchestration framework and helper utilities to run powerful, multiple-model pipelines. It can also be integrated with Prompt Flow to scale prompt engineering workflows.\n", - "\n", - "By the end of this section, you can review which approach (Hugging Face's Evaluate or LangChain's QAEvalChain) is preferable for future use cases." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "0e3ce977", - "metadata": {}, - "source": [ - "### 2.1 Setup\n", - "\n", - "For demonstration purposes, we will evaluate a simple question answering system." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4c10054f", - "metadata": { - "gather": { - "logged": 1694716864133 - } - }, - "outputs": [], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "from langchain.chains import LLMChain\n", - "from langchain.chat_models import AzureChatOpenAI" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d4cfc9b3", - "metadata": {}, - "source": [ - "Now we'll create a Prompt Template that will allow us to use the same prompt with different inputs. We will utilize [LangChain](https://docs.langchain.com/docs/), an open-source framework for working with language models.\n", - "\n", - "Read more about LangChain Chains and how they work [here](https://docs.langchain.com/docs/components/chains/)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9abdf160", - "metadata": { - "gather": { - "logged": 1694716864167 - } - }, - "outputs": [], - "source": [ - "prompt = PromptTemplate(template=\"Question: {question}\\nAnswer:\", input_variables=[\"question\"])\n", - "llm = AzureChatOpenAI(deployment_name=CHAT_MODEL, temperature=0.9)\n", - "chain = LLMChain(llm=llm, prompt=prompt)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "cbea2132", - "metadata": {}, - "source": [ - "### 2.2 Loading data\n", - "\n", - "Now we load a dataset from Hugging Face, and then convert it to a list of dictionaries for easier usage." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d2373cf1", - "metadata": { - "gather": { - "logged": 1694716864214 - } - }, - "outputs": [], - "source": [ - "from datasets import load_dataset\n", - "dataset = load_dataset(\"truthful_qa\", \"generation\")" - ] - }, - { - "cell_type": "markdown", - "id": "b91f88b3-0b95-4814-bcd0-36527b0b49db", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Let's work with the first five examples in the [Truthful QA dataset from Hugging Face](https://huggingface.co/datasets/truthful_qa). We are working with the \"Generation\" subsection of the dataset because we are applying this to a text-generating system, but notice how there is another subsection for evaluating the model's performance on multiple choice scenarios." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e591ee7f", - "metadata": { - "gather": { - "logged": 1694716864248 - } - }, - "outputs": [], - "source": [ - "num_examples = 3\n", - "examples = list(dataset['validation'])[:num_examples]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0bf71517", - "metadata": { - "gather": { - "logged": 1694716864285 - } - }, - "outputs": [], - "source": [ - "examples[0]" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "b8c3c8ef", - "metadata": {}, - "source": [ - "### 2.3 Predictions\n", - "\n", - "We can now make and inspect the predictions for these questions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22b2849c", - "metadata": { - "gather": { - "logged": 1694716864314 - } - }, - "outputs": [], - "source": [ - "predictions = chain.apply(examples)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "35e1d71c", - "metadata": { - "gather": { - "logged": 1694716864346 - } - }, - "outputs": [], - "source": [ - "predictions" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "de420cf5", - "metadata": {}, - "source": [ - "### 2.4 Evaluation\n", - "We can see that if we tried to just do exact match on the answer answers they would not match what the language model answered. However, semantically the language model is correct in both cases. In order to account for this, we can use a language model itself to evaluate the answers.\n", - "\n", - "Because these answers are more complex than multiple choice, we can now evaluate their accuracy using a language model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d6e87e11", - "metadata": { - "gather": { - "logged": 1694716864379 - } - }, - "outputs": [], - "source": [ - "from langchain.evaluation.qa import QAEvalChain" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfc2e624", - "metadata": { - "gather": { - "logged": 1694716864406 - } - }, - "outputs": [], - "source": [ - "# Create an Evaluation Chain using LangChain's QAEValChain\n", - "eval_chain = QAEvalChain.from_llm(llm)\n", - "graded_outputs = eval_chain.evaluate(examples, predictions, question_key=\"question\", answer_key=\"best_answer\", prediction_key=\"text\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10238f86", - "metadata": { - "gather": { - "logged": 1694716864440 - } - }, - "outputs": [], - "source": [ - "graded_outputs" - ] - }, - { - "cell_type": "markdown", - "id": "f7d216ad-4ae9-49a9-940c-84fccac2a343", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Now we're going to count the number of outputs that were graded as \"Correct\" or \"Incorrect\" based on the evaluation from the QAEvalChain." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83e70271", - "metadata": { - "gather": { - "logged": 1694715843342 - } - }, - "outputs": [], - "source": [ - "num_correct = sum([1 for x in graded_outputs if str(x['results']).upper().startswith('CORRECT')])\n", - "num_incorrect = sum([1 for x in graded_outputs if str(x['results']).upper().startswith('INCORRECT')])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "386764e3", - "metadata": { - "gather": { - "logged": 1694715843378 - } - }, - "outputs": [], - "source": [ - "print(num_correct, num_incorrect)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "e6dc737a", - "metadata": {}, - "source": [ - "### 2.5 Comparing to other evaluation metrics\n", - "\n", - "We can compare the evaluation results we get to other common evaluation metrics. To do this, let’s load some evaluation metrics from HuggingFace’s Evaluate package." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd5f575e", - "metadata": { - "gather": { - "logged": 1694715843415 - } - }, - "outputs": [], - "source": [ - "print(examples[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "207be70d", - "metadata": { - "gather": { - "logged": 1694715843446 - } - }, - "outputs": [], - "source": [ - "# Some data munging to get the examples in the right format\n", - "for i, eg in enumerate(examples):\n", - " eg['id'] = str(i)\n", - " eg['answers'] = {\"text\": eg['correct_answers'], \"answer_start\": [0]}\n", - " predictions[i]['id'] = str(i)\n", - " predictions[i]['prediction_text'] = predictions[i]['text']\n", - "\n", - "for p in predictions:\n", - " del p['text']\n", - "\n", - "# references need id, answers as list with text and answer_start\n", - "new_examples = examples.copy()\n", - "# print(new_examples)\n", - "for eg in new_examples:\n", - " del eg ['question']\n", - " del eg['best_answer']\n", - " del eg['type']\n", - " del eg['correct_answers']\n", - " del eg['category']\n", - " del eg['incorrect_answers']\n", - " del eg['source']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9d7ef7ec-d7f9-4626-b4a9-12d002737796", - "metadata": { - "gather": { - "logged": 1694715843488 - } - }, - "outputs": [], - "source": [ - "from evaluate import load\n", - "squad_metric = load(\"squad\")\n", - "results = squad_metric.compute(\n", - " references=new_examples,\n", - " predictions=predictions,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a2ab1dc", - "metadata": { - "gather": { - "logged": 1694715843520 - } - }, - "outputs": [], - "source": [ - "results" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4df686a3", - "metadata": {}, - "source": [ - "#### (Optional) Student Task\n", - "\n", - "Now add two additional metrics to evaluate the model using the Hugging Face Evaluate library. One of those could be the BERT_score metric.\n", - "\n", - "Resources for reference:\n", - "\n", - "* [Hugging Face's Evaluate Library on GitHub](https://github.com/huggingface/evaluate) \n", - "* [Evaluate Library Documentation](https://huggingface.co/docs/transformers/tasks/translation#evaluate) \n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2424f5f7", - "metadata": { - "gather": { - "logged": 1694715843567 - } - }, - "outputs": [], - "source": [ - "### STUDENT TASK ###" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "f88f1a76", - "metadata": {}, - "source": [ - "## 3. Evaluating Models for Truthfulness using GPT without Ground Truth Datasets" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "767bceab", - "metadata": {}, - "source": [ - "You won't always have Ground Truth data available to assess your model. Luckily, GPT does a really good job at generating Ground Truth data from your original dataset.\n", - "\n", - "Research has shown that LLMs such as GPT-3 and ChatGPT are good at assessing text inconsistency. Based on these findings, the models can be used to evaluate sentences for truthfulness by prompting GPT. Let's assess the accuracy of GPT through a technique of GPT evaluating itself.\n", - "\n", - "In this section, we will evaluate the model you worked on in the previous challenge applied to the CNN Dailymail dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2b7e21f7", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import LLMChain, QAGenerationChain\n", - "from langchain.requests import Requests\n", - "from langchain.llms import AzureOpenAI\n", - "from langchain.document_loaders import TextLoader\n", - "import pandas as pd\n", - "import json" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "5a19c79e", - "metadata": {}, - "source": [ - "### 3.1. Create a Ground Truth Dataset on Custom Data\n", - "Let's start by using GPT to create a dataset of question-answer pairs as our \"ground-truth\" data from the CNN Dailymail dataset from the previous challenge." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9681f823", - "metadata": {}, - "outputs": [], - "source": [ - "# Load the provided CNN file, the path of which may change based on folder structure\n", - "CNN_FILE_PATH = \"../data/structured/cnn_dailymail_data.csv\"\n", - "\n", - "# Optional: limit to 11 samples for simple scope to avoid RateLimitErrors\n", - "# You are welcome to change `num_samples` or delete it to run this example on\n", - "# the entire dataset but doing so may take 1+ hour\n", - "num_samples = 11\n", - "df = pd.read_csv(CNN_FILE_PATH)[:num_samples]\n", - "df.drop([4,9], axis=0, inplace=True)\n", - "df = df.drop(columns=[\"highlights\"])\n", - "pd.set_option('display.max_colwidth', None) # Show all columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bc9ad597", - "metadata": {}, - "outputs": [], - "source": [ - "# Take a look at the data\n", - "df.head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "b447a9b6-3f2d-4bf9-84a5-4f269c78b45a", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Time for some data scrubbing for consistency." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6599f2f6", - "metadata": {}, - "outputs": [], - "source": [ - "# Convert the column \"article\" to a list of dictionaries\n", - "df_copy = df.copy().rename(columns={\"article\": \"text\"})\n", - "df_copy = df_copy.drop(columns=[\"id\"])\n", - "df_dict = df_copy.to_dict(\"records\")\n", - "\n", - "print(df_dict)" - ] - }, - { - "cell_type": "markdown", - "id": "1cf8a1f8-369f-4fb4-909c-600e1c5102d3", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "We've gone ahead and generated a question-answer pair for each article. This will help us assess GPT's performance on how well it answers the test questions. The answers in each pairing are considered our ground truth data and the ideal answer.\n", - "\n", - "We created these pairs using Langchain's [QAGenerationChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.qa_generation.base.QAGenerationChain.html#). Check out the [source code](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/chains/qa_generation) to see how the question-answer pairs are being generated through QAGenerationChain. The implementation may surprise you!\n", - "\n", - "In the process, we removed articles that triggered the OpenAI content filter. \n", - "\n", - "Below, we're going to load the provided question-answer dataset for later assessment." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af36819b", - "metadata": {}, - "outputs": [], - "source": [ - "llm = AzureOpenAI(deployment_name=CHAT_MODEL, temperature=0, max_tokens=1000)\n", - "chain = QAGenerationChain.from_llm(llm)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dc5ba51", - "metadata": {}, - "outputs": [], - "source": [ - "# Load cnn_qa_set.json\n", - "cnn_qa_set_filepath = '../data/structured/cnn_qa_set.json'\n", - "with open(cnn_qa_set_filepath, 'r') as file:\n", - " qa_set = json.load(file)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b597401-c969-454f-a7ee-6a91195239f7", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - }, - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [ - "qa_set[:3]" - ] - }, - { - "cell_type": "markdown", - "id": "fd04f598", - "metadata": {}, - "source": [ - "Now we have the question and Ground Truth answers. Let's test the GPT + Cognitive Search solution you implemented in the last challenge! We are going to compare the differences between `truth_answers` (provided answers) and `prompt_answers` (model performance)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c06d9ab0", - "metadata": {}, - "outputs": [], - "source": [ - "questions = [(set[\"question\"] for set in qa_set)]\n", - "truth_answers = [(set[\"answers\"] for set in qa_set)]\n", - "prompt_answers = list()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8c9127ee", - "metadata": {}, - "source": [ - "### 3.2 Instantiate the Cognitive Search Index\n", - "\n", - "We're using the Index you created in the last challenge to retrieve documents that are relevant to any input user query." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c97e90f", - "metadata": {}, - "outputs": [], - "source": [ - "import os, json, requests, sys, re\n", - "import requests\n", - "from pprint import pprint\n", - "import pandas as pd\n", - "from azure.core.credentials import AzureKeyCredential\n", - "from azure.search.documents.indexes import SearchIndexClient \n", - "from azure.search.documents import SearchClient\n", - "from azure.search.documents.indexes.models import (\n", - " SearchIndex,\n", - " SearchField,\n", - " SearchFieldDataType,\n", - " SimpleField,\n", - " SearchableField,\n", - " SemanticConfiguration,\n", - " PrioritizedFields,\n", - " SemanticField,\n", - " SemanticSettings\n", - ")\n", - "\n", - "import numpy as np\n", - "from openai.embeddings_utils import get_embedding, cosine_similarity" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a38c0644", - "metadata": {}, - "outputs": [], - "source": [ - "# Create an SDK client\n", - "service_endpoint = os.getenv(\"AZURE_COGNITIVE_SEARCH_ENDPOINT\") \n", - "key = os.getenv(\"AZURE_COGNITIVE_SEARCH_KEY\")\n", - "credential = AzureKeyCredential(key)\n", - "index_name = os.getenv(\"AZURE_COGNITIVE_SEARCH_INDEX_NAME\")\n", - "\n", - "index_client = SearchIndexClient(\n", - " endpoint=service_endpoint, credential=credential)\n", - "search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f0501d0", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a pandas dataframe with columns from qa_set\n", - "pd.set_option('display.max_colwidth', None)\n", - "df = pd.DataFrame(qa_set)\n", - "df = df.rename(columns={\"answer\": \"truth_answer\"})\n", - "df.head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "e794a118-188b-4e70-b0f0-3376146d077c", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Let's retrieve the relevant articles for each question in our qa_set dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "93f48c97", - "metadata": {}, - "outputs": [], - "source": [ - "# Get the articles for the search terms\n", - "# Optional: change `num_docs` to change how many relevant ranked documents the Search index should return\n", - "num_docs=1\n", - "for i, row in df.iterrows():\n", - " search_term = row['question']\n", - " results = search_client.search(search_text=search_term, include_total_count=num_docs)\n", - " df.loc[i, \"context\"] = next(results)['article']\n", - "df.head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "4c172dfb-a2fa-48b2-b723-96d6d521d7e8", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Using a prompt template, we can feed questions into GPT using the information from the retrieved documents.\n", - "\n", - "Notice which model we're now using to generate answers. Why might this be? What happens if you used the chat model we've used earlier?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "31713b2b", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "\n", - "# Ask the model using the embeddings from Challenges 3 and 4 to answer the questions\n", - "template = \"\"\"You are a search assistant trying to answer the following question. Use only the context given. Your answer should only be one sentence.\n", - "\n", - " > Question: {question}\n", - " \n", - " > Context: {context}\"\"\"\n", - "\n", - "# Create a prompt template\n", - "prompt = PromptTemplate(template=template, input_variables=[\"question\", \"context\"])\n", - "llm = AzureOpenAI(deployment_name=CHAT_INSTRUCT_MODEL, temperature=0)\n", - "search_chain = LLMChain(llm=llm, prompt=prompt, verbose=False)\n", - "\n", - "prompt_answers = []\n", - "for question, context in list(zip(df.question, df.context)):\n", - " response = search_chain.run(question=question, context=context)\n", - " prompt_answers.append(response.replace('\\n',''))\n", - "df['prompt_answer'] = prompt_answers " - ] - }, - { - "cell_type": "markdown", - "id": "252b880b", - "metadata": {}, - "source": [ - "Examine the first three answers from the model based on the articles. How could you utilize Prompt Engineering techniques to refine the answers?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "569b27de", - "metadata": {}, - "outputs": [], - "source": [ - "df['prompt_answer'].head(3)" - ] - }, - { - "cell_type": "markdown", - "id": "9e340641-3ffd-4ea0-a35d-5a684b17d40d", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "After generating responses to our test questions, we can use GPT (can be another model if you would like, such as GPT 4) to evaluate the correctness to our Ground Truth answers using a rubric.\n", - "\n", - "Notice how the prompt is using techniques you learned from Challenges 1 and 2." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0583718", - "metadata": {}, - "outputs": [], - "source": [ - "eval_template = \"\"\"You are trying to answer the following question from the context provided:\n", - "\n", - "> Question: {question}\n", - "\n", - "The correct answer is:\n", - "\n", - "> Query: {truth_answer}\n", - "\n", - "Is the following predicted query semantically the same (eg likely to produce the same answer)?\n", - "\n", - "> Predicted Query: {prompt_answer}\n", - "\n", - "Please give the Predicted Query a grade of either an A, B, C, D, or F, along with an explanation of why. End the evaluation with 'Final Grade: '\n", - "\n", - "> Explanation: Let's think step by step.\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eba357c3", - "metadata": {}, - "outputs": [], - "source": [ - "eval_prompt = PromptTemplate(template=eval_template, input_variables=[\"question\", \"truth_answer\", \"prompt_answer\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8062ada", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a new LLM Chain to submit the prompt we created\n", - "eval_chain = LLMChain(llm=llm, prompt=eval_prompt, verbose=False)\n", - "\n", - "# Submit the prompt using our dataset\n", - "eval_results = []\n", - "for question, truth_answer, prompt_answer in list(zip(df.question, df.truth_answer, df.prompt_answer)):\n", - " eval_output = eval_chain.run(\n", - " question=question,\n", - " truth_answer=truth_answer,\n", - " prompt_answer=prompt_answer,\n", - " )\n", - " eval_results.append(eval_output)\n", - "eval_results" - ] - }, - { - "cell_type": "markdown", - "id": "d0dabe6a-659a-44ca-bc82-ac8793f713ef", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "Now let's parse the rubric results in order to quantify and summarize them in aggregate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8449fb78", - "metadata": {}, - "outputs": [], - "source": [ - "import re\n", - "from typing import List\n", - "from collections import defaultdict\n", - "\n", - "# Parse the evaluation chain responses into a rubric\n", - "def parse_eval_results(results: List[str]) -> List[float]:\n", - " rubric = {\n", - " \"A\": 1.0,\n", - " \"B\": 0.75,\n", - " \"C\": 0.5,\n", - " \"D\": 0.25,\n", - " \"F\": 0\n", - " }\n", - " return [rubric[re.search(r'Final Grade: (\\w+)', res).group(1)] for res in results]\n", - "\n", - "scores = defaultdict(list)\n", - "parsed_results = parse_eval_results(eval_results)\n", - "\n", - "# Collect the scores for a final evaluation table\n", - "scores['request_synthesizer'].extend(parsed_results)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e2090296", - "metadata": {}, - "outputs": [], - "source": [ - "# Reusing the rubric from above, parse the evaluation chain responses\n", - "parsed_eval_results = parse_eval_results(eval_results)\n", - "# Collect the scores for a final evaluation table\n", - "scores['result_synthesizer'].extend(parsed_eval_results)\n", - "\n", - "# Print out Score statistics for the evaluation session\n", - "header = \"{:<20}\\t{:<10}\\t{:<10}\\t{:<10}\".format(\"Metric\", \"Min\", \"Mean\", \"Max\")\n", - "print(header)\n", - "for metric, metric_scores in scores.items():\n", - " mean_scores = sum(metric_scores) / len(metric_scores) if len(metric_scores) > 0 else float('nan')\n", - " row = \"{:<20}\\t{:<10.2f}\\t{:<10.2f}\\t{:<10.2f}\".format(metric, min(metric_scores), mean_scores, max(metric_scores))\n", - " print(row)" - ] - }, - { - "cell_type": "markdown", - "id": "a055d128-672d-4bd7-83cf-544ad9b6a803", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "There you have it! We can now review the results of evaluating the model in conjunction with Azure Cognitive Search from the last challenge. You can perform a similar analysis on your use case and custom data." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "0b743a96", - "metadata": {}, - "source": [ - "## Conclusion" - ] - }, - { - "cell_type": "markdown", - "id": "461ee50f-c2c4-4a80-bc0e-68fad17ee380", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "In this challenge, we covered the principles of Responsible AI, particularly when working with OpenAI, and how to evaluate the performance of a model implementation using Ground Truth data.\n", - "\n", - "We introduced you to several tools and services, some from Azure and others that are Open-Source. You can refer to them for your own projects to decide which works best for your scenarios." - ] - }, - { - "cell_type": "markdown", - "id": "718b928c-6041-4aba-9d65-23b493fbf84c", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "**Knowledge Check #1 Answers**:\n", - "* True\n", - "* False - it will be returned if it was not deemed inappropriate\n", - "* False - your request will still complete without content filtering. You can see if it wasn't applied by looking for an error message in the `content_filter_result` object." - ] - }, - { - "cell_type": "markdown", - "id": "8a9fe7e6-4a36-4627-9df5-fe936f9dd93b", - "metadata": { - "nteract": { - "transient": { - "deleting": false - } - } - }, - "source": [ - "**Knowledge Check #2 Answers**:\n", - "* False: the service was trained on more than 100 languages but is designed to support only a handful.\n", - "* True: Content Safety has a monitoring page to help you track you moderation API performance and trends to inform your content moderation strategy.\n", - "* True: The Studio uses four levels of risk, whereas the API scores the risk on a scale of 0 to 6.\n", - "* False: You can also customize severity thresholds in the Studio.\n", - "* False: You can specify which categories you want to assess your text on in the API using the `categories` parameter." - ] - } - ], - "metadata": { - "kernel_info": { - "name": "python3" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - }, - "microsoft": { - "ms_spell_check": { - "ms_spell_check_language": "en" - } - }, - "nteract": { - "version": "nteract-front-end@1.0.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From d64acd8f6f87b17db305655fc74c70a9412fe935 Mon Sep 17 00:00:00 2001 From: Devanshi Thakar <33441411+devanshithakar12@users.noreply.github.com> Date: Tue, 27 May 2025 14:38:04 -0700 Subject: [PATCH 5/7] Update Challenge-00.md --- 066-OpenAIFundamentals/Student/Challenge-00.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/066-OpenAIFundamentals/Student/Challenge-00.md b/066-OpenAIFundamentals/Student/Challenge-00.md index 778699033e..ccccd5c756 100644 --- a/066-OpenAIFundamentals/Student/Challenge-00.md +++ b/066-OpenAIFundamentals/Student/Challenge-00.md @@ -150,7 +150,7 @@ Now we will deploy the needed large language models from Azure OpenAI. - On the left navigation bar, under My Assets, click on Models + endpoints. Click the Deploy Model button and select Deploy base model - Deploy the following 3 models in your Azure OpenAI resource. - `gpt-4o` - - `gpt-35-turbo` + - `gpt-4o-mini` - `text-embedding-ada-002` #### Setup Jupyter Notebooks Configuration File From d506a43730a9750180a51ee0226d4e4d8f273426 Mon Sep 17 00:00:00 2001 From: Devanshi Thakar <33441411+devanshithakar12@users.noreply.github.com> Date: Tue, 27 May 2025 14:48:54 -0700 Subject: [PATCH 6/7] Update Challenge-00.md --- 066-OpenAIFundamentals/Student/Challenge-00.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/066-OpenAIFundamentals/Student/Challenge-00.md b/066-OpenAIFundamentals/Student/Challenge-00.md index ccccd5c756..b39ab4622d 100644 --- a/066-OpenAIFundamentals/Student/Challenge-00.md +++ b/066-OpenAIFundamentals/Student/Challenge-00.md @@ -168,11 +168,11 @@ You will find the `.env.sample` file in the root of the codespace. If you are wo - Navigate to your project. In the lower left corner, click on the link to Management Center. It is also under Project details. - Click on Connected resources under your project - Click the name of your Azure OpenAI Service to see its details. Copy the Target URL and API Key for `OPENAI_API_BASE` and `OPEN_API_KEY`, respectively into the `.env` file - - From the **`Manage connect resources in this project`** screen, click the Name with the type **`AIServices`**. Copy the Target URL and the API Key for `AZURE_FORM_RECOGNIZER_ENDPOINT` and `AZURE_FORM_RECOGNIZER_KEY`, respectively into the `.env` file + - From the **`Manage connect resources in this project`** screen, click the Name with the type **`AIServices`**. The AI Services deployment is a multi-service resource that allows you to access multiple Azure AI services like Document Intelligence with a single key and endpoint. Copy the Target URL and the API Key for `AZURE_DOC_INTELLIGENCE_ENDPOINT` and `AZURE_DOC_INTELLIGENCE_KEY`, respectively into the `.env` file - In the [Azure Portal](portal.azure.com), navigate to the resource group you made when creating your hub within the AI Foundry. - Locate your **AI Search** service that you created earlier - - From the **Overview**, copy the URL for `AZURE_COGNITIVE_SEARCH_ENDPOINT` in the .env file - - Under **`Settings`** go to Keys, copy the admin key into `AZURE_COGNITIVE_SEARCH_KEY` in the `.env` file + - From the **Overview**, copy the URL for `AZURE_AI_SEARCH_ENDPOINT` in the .env file + - Under **`Settings`** go to Keys, copy the admin key into `AZURE_AI_SEARCH_KEY` in the `.env` file - Model deployment names should be the same as the ones populated in the `.env.sample` file especially if you have deployed a different model due to quota issues. - For **Azure Blob**, you can find these credentials in the [Azure Portal](portal.azure.com). - In the Azure Portal, navigate to the resource group you made when creating your hub within the AI Foundry. From a223725d2a299db299c5bfb3a7615e42c612ba50 Mon Sep 17 00:00:00 2001 From: Pete Rodriguez Date: Thu, 29 May 2025 12:35:04 -0500 Subject: [PATCH 7/7] Updated codespace files --- .../Student/Resources/infra/deploy.sh | 6 +++--- .../Resources/infra/modules/aiServices.bicep | 1 + .../Resources/infra/modules/project.bicep | 16 ++++++++++++++++ .../Resources/infra/modules/storageAccount.bicep | 2 +- .../CH-04-A-RAG_for_structured_data.ipynb | 4 ++-- .../CH-04-B-RAG_for_unstructured_data.ipynb | 12 ++++++------ 6 files changed, 29 insertions(+), 12 deletions(-) diff --git a/066-OpenAIFundamentals/Student/Resources/infra/deploy.sh b/066-OpenAIFundamentals/Student/Resources/infra/deploy.sh index 08fea3ab0a..82035a1e5b 100755 --- a/066-OpenAIFundamentals/Student/Resources/infra/deploy.sh +++ b/066-OpenAIFundamentals/Student/Resources/infra/deploy.sh @@ -117,8 +117,8 @@ source $environment_sample_file echo "Populating .env file..." echo "OPENAI_API_KEY=\"$(echo "$json" | jq -r '.deploymentInfo.value.aiServicesKey')\"" >> $environment_file echo "OPENAI_API_BASE=\"$(echo "$json" | jq -r '.deploymentInfo.value.aiServicesOpenAiEndpoint')\"" >> $environment_file -echo "AZURE_COGNITIVE_SEARCH_KEY=\"$(echo "$json" | jq -r '.deploymentInfo.value.searchKey')\"" >> $environment_file -echo "AZURE_COGNITIVE_SEARCH_ENDPOINT=\"$(echo "$json" | jq -r '.deploymentInfo.value.searchEndpoint')\"" >> $environment_file +echo "AZURE_AI_SEARCH_KEY=\"$(echo "$json" | jq -r '.deploymentInfo.value.searchKey')\"" >> $environment_file +echo "AZURE_AI_SEARCH_ENDPOINT=\"$(echo "$json" | jq -r '.deploymentInfo.value.searchEndpoint')\"" >> $environment_file echo "DOCUMENT_INTELLIGENCE_ENDPOINT=\"$(echo "$json" | jq -r '.deploymentInfo.value.documentEndpoint')\"" >> $environment_file echo "DOCUMENT_INTELLIGENCE_KEY=\"$(echo "$json" | jq -r '.deploymentInfo.value.documentKey')\"" >> $environment_file echo "AZURE_BLOB_STORAGE_ACCOUNT_NAME=\"$(echo "$json" | jq -r '.deploymentInfo.value.storageAccountName')\"" >> $environment_file @@ -128,7 +128,7 @@ echo "AZURE_BLOB_STORAGE_CONNECTION_STRING=\"$(echo "$json" | jq -r '.deployment echo "CHAT_MODEL_NAME=\"$CHAT_MODEL_NAME\"" >> $environment_file echo "CHAT_MODEL_NAME2=\"$CHAT_MODEL_NAME2\"" >> $environment_file echo "OPENAI_API_TYPE=\"$OPENAI_API_TYPE\"" >> $environment_file -echo "TEXT_DEPLOYMENT_ID=\"$TEXT_DEPLOYMENT_ID\"" >> $environment_file +echo "CHAT_MODEL_NAME=\"$CHAT_MODEL_NAME\"" >> $environment_file echo "OPENAI_API_VERSION=\"$OPENAI_API_VERSION\"" >> $environment_file echo "EMBEDDING_MODEL_NAME=\"$EMBEDDING_MODEL_NAME\"" >> $environment_file diff --git a/066-OpenAIFundamentals/Student/Resources/infra/modules/aiServices.bicep b/066-OpenAIFundamentals/Student/Resources/infra/modules/aiServices.bicep index c8daf59784..f59d1ee3c9 100644 --- a/066-OpenAIFundamentals/Student/Resources/infra/modules/aiServices.bicep +++ b/066-OpenAIFundamentals/Student/Resources/infra/modules/aiServices.bicep @@ -110,6 +110,7 @@ resource cognitiveServicesOpenAIUserRoleDefinition 'Microsoft.Authorization/role name: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' scope: subscription() } + // This role assignment grants the user the required permissions to eventually delete and purge the Azure AI Services account // https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control#cognitive-services-contributor resource cognitiveServicesContributorRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(userObjectId)) { diff --git a/066-OpenAIFundamentals/Student/Resources/infra/modules/project.bicep b/066-OpenAIFundamentals/Student/Resources/infra/modules/project.bicep index 0943fbf0d5..ba3fe208cb 100644 --- a/066-OpenAIFundamentals/Student/Resources/infra/modules/project.bicep +++ b/066-OpenAIFundamentals/Student/Resources/infra/modules/project.bicep @@ -134,11 +134,27 @@ resource project 'Microsoft.MachineLearningServices/workspaces@2024-04-01-previe } } +resource azureAIDeveloperRoleDefinition 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { + name: '64702f94-c441-49e6-a78b-ef80e0188fee' + scope: subscription() +} + resource azureMLDataScientistRole 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { name: 'f6c7c914-8db3-469d-8ca1-694a8f32e121' scope: subscription() } +// This resource defines the Azure AI Developer role, which provides permissions for managing Azure AI resources, including deployments and configurations +resource aiDeveloperRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(userObjectId)) { + name: guid(project.id, azureAIDeveloperRoleDefinition.id, userObjectId) + scope: project + properties: { + roleDefinitionId: azureAIDeveloperRoleDefinition.id + principalType: 'User' + principalId: userObjectId + } +} + // This role assignment grants the user the required permissions to start a Prompt Flow in a compute service within Azure AI Foundry resource azureMLDataScientistUserRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = if (!empty(userObjectId)) { name: guid(project.id, azureMLDataScientistRole.id, userObjectId) diff --git a/066-OpenAIFundamentals/Student/Resources/infra/modules/storageAccount.bicep b/066-OpenAIFundamentals/Student/Resources/infra/modules/storageAccount.bicep index beac13ba37..85c59f88b2 100644 --- a/066-OpenAIFundamentals/Student/Resources/infra/modules/storageAccount.bicep +++ b/066-OpenAIFundamentals/Student/Resources/infra/modules/storageAccount.bicep @@ -31,7 +31,7 @@ param allowStorageAccountPublicAccess string = 'Enabled' param allowBlobPublicAccess bool = false @description('Specifies whether the storage account allows shared key access. The default value is false.') -param allowSharedKeyAccess bool = false +param allowSharedKeyAccess bool = true @description('Specifies whether the storage account allows cross-tenant replication. The default value is false.') param allowCrossTenantReplication bool = false diff --git a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-A-RAG_for_structured_data.ipynb b/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-A-RAG_for_structured_data.ipynb index e8217ba1c2..226bc1aded 100644 --- a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-A-RAG_for_structured_data.ipynb +++ b/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-A-RAG_for_structured_data.ipynb @@ -167,8 +167,8 @@ "outputs": [], "source": [ "# Create a Cognitive Search Index client\n", - "service_endpoint = os.getenv(\"AZURE_COGNITIVE_SEARCH_ENDPOINT\") \n", - "key = os.getenv(\"AZURE_COGNITIVE_SEARCH_KEY\")\n", + "service_endpoint = os.getenv(\"AZURE_AI_SEARCH_ENDPOINT\") \n", + "key = os.getenv(\"AZURE_AI_SEARCH_KEY\")\n", "credential = AzureKeyCredential(key)\n", "\n", "index_name = \"news-index\"\n", diff --git a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-B-RAG_for_unstructured_data.ipynb b/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-B-RAG_for_unstructured_data.ipynb index a89a92d5fb..c80e8a62ba 100644 --- a/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-B-RAG_for_unstructured_data.ipynb +++ b/066-OpenAIFundamentals/Student/Resources/notebooks/CH-04-B-RAG_for_unstructured_data.ipynb @@ -31,8 +31,8 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install \"tiktoken==0.4.0\" " - ] + "! pip install \"tiktoken==0.9.0\" " + ] }, { "cell_type": "code", @@ -115,8 +115,8 @@ "from azure.core.credentials import AzureKeyCredential\n", "from azure.ai.formrecognizer import DocumentAnalysisClient\n", "\n", - "endpoint = os.environ[\"AZURE_FORM_RECOGNIZER_ENDPOINT\"]\n", - "key = os.environ[\"AZURE_FORM_RECOGNIZER_KEY\"]\n", + "endpoint = os.environ[\"AZURE_DOC_INTELLIGENCE_ENDPOINT\"]\n", + "key = os.environ[\"AZURE_DOC_INTELLIGENCE_KEY\"]\n", "\n", "document_analysis_client = DocumentAnalysisClient(\n", " endpoint=endpoint, credential=AzureKeyCredential(key)\n", @@ -259,8 +259,8 @@ "outputs": [], "source": [ "# Create an SDK client\n", - "service_endpoint = os.getenv(\"AZURE_COGNITIVE_SEARCH_ENDPOINT\") \n", - "key = os.getenv(\"AZURE_COGNITIVE_SEARCH_KEY\")\n", + "service_endpoint = os.getenv(\"AZURE_AI_SEARCH_ENDPOINT\") \n", + "key = os.getenv(\"AZURE_AI_SEARCH_KEY\")\n", "credential = AzureKeyCredential(key)\n", "\n", "index_name = \"research-paper-index\"\n",