Azure-Samples · sdgilley · Nov 11, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 19, 2024
@@ -0,0 +1,63 @@
+import re
+import sys
+import json
+import os
+from typing import Union
+from pathlib import Path
+
+SECRET_PATTERNS = [
+    re.compile(r'[\'"]?subscription_id[\'"]?\s*[:=]\s*[\'"][0-9a-f\-]{36}[\'"]', re.IGNORECASE),
+    re.compile(r'[\'"]?resource_group_name[\'"]?\s*[:=]\s*[\'"][a-zA-Z0-9\-_]+[\'"]', re.IGNORECASE),
+    re.compile(r'[\'"]?project_name[\'"]?\s*[:=]\s*[\'"][a-zA-Z0-9\-_]+[\'"]', re.IGNORECASE),
+    re.compile(r'[\'"]?api_key[\'"]?\s*[:=]\s*[\'"][A-Za-z0-9\-_]{40,}[\'"]', re.IGNORECASE),
+    re.compile(
+        r'[\'"]?azure_endpoint[\'"]?\s*[:=]\s*[\'"]https:\/\/[a-zA-Z0-9\-\.]+\.azure\.com[\/a-zA-Z0-9\.\-]*[\'"]',
+        re.IGNORECASE,
+    ),
+    re.compile(r'export\s+[A-Z_][A-Z0-9_]*\s*=\s*["\'][^"\']+["\']', re.IGNORECASE),
+    re.compile(r'os\.environ\["\s*[A-Za-z0-9_]*(API_KEY|ENDPOINT)[A-Za-z0-9_]*\s*"\]', re.IGNORECASE),
+]
+
+
+def check_ipynb_for_secrets(filename: Union[str, os.PathLike]) -> bool:
+    """Jupyter notebooks can't be parsed directly - need to convert to JSON first"""
+    try:
+        with Path(filename).open("r", encoding="utf-8") as file:
+            notebook_data = json.load(file)
+            failed = False
+            for cell in notebook_data.get("cells", []):
+                if cell["cell_type"] == "code":
+                    for line_number, line in enumerate(cell["source"], start=1):
+                        for pattern in SECRET_PATTERNS:
+                            if pattern.search(line):
+                                print(f"Secret detected in {filename} on line {line_number}: {line.strip()}")
+                                failed = True
+            return failed
+    except (UnicodeDecodeError, json.JSONDecodeError) as e:
+        print(f"Failed to read {filename}. Skipping secrets check. Error: {e}")
+        return True
+
+
+def main() -> None:
+    failed = False
+
+    for filename in sys.argv[1:]:
+        if filename.endswith((".py", ".yaml", ".yml", ".md")):
+            try:
+                with Path(filename).open("r", encoding="utf-8") as file:
+                    for line_number, line in enumerate(file, start=1):
+                        for pattern in SECRET_PATTERNS:
+                            if pattern.search(line):
+                                print(f"Secret detected in {filename} on line {line_number}: {line.strip()}")
+                                failed = True
+            except UnicodeDecodeError:
+                print(f"Failed to read {filename}. Skipping secrets check.")
+        elif filename.endswith(".ipynb") and check_ipynb_for_secrets(filename):
+            failed = True
+
+    if failed:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
@@ -21,3 +21,5 @@ jobs:
       - run: pip install -r dev-requirements.txt
       - name: Run Pre-Commit
         run: pre-commit run --all-files
+        env:
+          SKIP: detect-azure-secrets-custom
@@ -34,3 +34,8 @@ repos:
       entry: python
       language: system
       args: ["-m", "tox", "-qqq", "run", "-e", "black", "--"]
+    - id: detect-azure-secrets-custom
+      name: Detect Azure Secrets
+      entry: python .github/scripts/detect_azure_secrets.py
+      language: python
+      types: [file]
@@ -12,7 +12,24 @@ description: Evaluate.
 
 ### Overview
 
-This tutorial provides a step-by-step guide on how to evaluate Generative AI models with Azure. Each of these samples uses the `azure-ai-evaluation` SDK. 
+This tutorial provides a step-by-step guide on how to evaluate Generative AI base models or AI Applications with Azure. Each of these samples uses the [`azure-ai-evaluation`](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/evaluate-sdk) SDK. 
+
+When selecting a base model for building an application—or after building an AI application (such as a Retrieval-Augmented Generation (RAG) system or a multi-agent framework)—evaluation plays a pivotal role. Effective evaluation ensures that the chosen or developed AI model or application meets the intended safety, quality, and performance benchmarks.
+
+In both cases, running evaluations requires specific tools, methods, and datasets. Here’s a breakdown of the key components involved:
+
+* Testing with Evaluation Datasets
+
+    - Bring Your Own Data: Use datasets tailored to your application or domain.
+    - Redteaming Queries: Design adversarial prompts to test robustness.
+    - [Azure AI Simulators](Simulators/): Leverage Azure AI's context-specific or adversarial dataset generators to create relevant test cases.
+
+* Selecting the Appropriate Evaluators or Building Custom Ones
+
+    - Pre-Built Evaluators: Azure AI provides a range of [generation safety](Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/) and [quality/NLP evaluators](Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/) ready for immediate use.
+    - [Custom Evaluators](Supported_Evaluation_Metrics/Custom_Evaluators/): Using the Azure AI Evaluation SDK, you can design and implement evaluators that align with the unique requirements of your application.
+
+* Generating and Visualizing Evaluation Results: Azure AI Evaluation SDK enables you to evaluate the target functions (such as [endpoints of your AI application](Supported_Evaluation_Targets/Evaluate_App_Endpoint/) or your [model endpoints](Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/) on your dataset with either built-in or custom evaluators. You can run evaluations [remotely](Supported_Evaluation_Targets/Evaluate_On_Cloud/) in the cloud or locally on your own machine.
 
 ### Objective
 
@@ -26,17 +43,17 @@ The main objective of this tutorial is to help users understand the process of e
 
 | Sample name                            | adversarial | simulator | conversation starter | index | raw text | against model endpoint | against app | qualitative metrics | custom metrics | quantitative NLP metrics |
 |----------------------------------------|-------------|-----------|---------------------|-------|----------|-----------------------|-------------|---------------------|----------------|----------------------|
-| simulate_adversarial.ipynb            | X           | X         |                     |      |          | X                      |             |                     |                |                      |
-| simulate_conversation_starter.ipynb   |             | X         | X                   |       |         | X                      |             |                     |                |                      |
-| simulate_input_index.ipynb            |             | X         |                     | X     |          | X                      |             |                     |                |                      |
-| simulate_input_text.ipynb             |             | X         |                     |       | X        | X                     |             |                     |                |                      |
-| evaluate_endpoints.ipynb              |             |           |                     |      |          | X                      |            | X                    |                |                      |
-| evaluate_app.ipynb                    |             |           |                     |       |         |                       | X           | X                    |                |                      |
-| evaluate_qualitative.ipynb            |             |           |                     |       |         | X                      |            | X                    |                |                      |
-| evaluate_custom.ipynb                 |             |           |                     |       |         | X                      |            |                     | X               |                      |
-| evaluate_quantitative.ipynb            |             |           |                     |       |         | X                      |             |                     |               | X                     |
-| evaluate_safety_risk.ipynb            | X           |           |                     |       |          | X                     |             |                     |                |                      |
-| simulate_and_evaluate_endpoint.py      |             | X         |                     |      | X        | X                     |             | X                    |                |                    |
+| [Simulate_Adversarial.ipynb](Simulators/Simulate_Adversarial_Data/Simulate_Adversarial.ipynb)           | X           | X         |                     |      |          | X                      |             |                     |                |                      |
+| [Simulate_From_Conversation_Starter.ipynb](Simulators/Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/Simulate_From_Conversation_Starter.ipynb)   |             | X         | X                   |       |         | X                      |             |                     |                |                      |
+| [Simulate_From_Azure_Search_Index.ipynb](Simulators/Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/Simulate_From_Azure_Search_Index.ipynb)            |             | X         |                     | X     |          | X                      |             |                     |                |                      |
+| [Simulate_From_Input_Text.ipynb](Simulators/Simulate_Context-Relevant_Data/Simulate_From_Input_Text/Simulate_From_Input_Text.ipynb)             |             | X         |                     |       | X        | X                     |             |                     |                |                      |
+| [Evaluate_Base_Model_Endpoint.ipynb](Supported_Evaluation_Targets/Evaluate_Base_Model_Endpoint/Evaluate_Base_Model_Endpoint.ipynb)              |             |           |                     |      |          | X                      |            | X                    |                |                      |
+| [Evaluate_App_Endpoint.ipynb](Supported_Evaluation_Targets/Evaluate_App_Endpoint/Evaluate_App_Endpoint.ipynb)                    |             |           |                     |       |         |                       | X           | X                    |                |                      |
+| [AI_Judge_Evaluators_Quality.ipynb](Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb)            |             |           |                     |       |         | X                      |            | X                    |                |                      |
+| [Custom_Evaluators.ipynb](Supported_Evaluation_Metrics/Custom_Evaluators/Custom_Evaluators.ipynb)                |             |           |                     |       |         | X                      |            |                     | X               |                      |
+| [NLP_Evaluators.ipynb](Supported_Evaluation_Metrics/NLP_Evaluators/NLP_Evaluators.ipynb)            |             |           |                     |       |         | X                      |             |                     |               | X                     |
+| [AI_Judge_Evaluators_Safety_Risks.ipynb](Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks.ipynb)            | X           |           |                     |       |          | X                     |             |                     |                |                      |
+| [Simulate_Evaluate_Groundedness.py](Simulators/Simulate_Evaluate_Groundedness/Simulate_Evaluate_Groundedness.ipynb)      |             | X         |                     |      | X        | X                     |             | X                    |                |                    |
 
 
 

@@ -0,0 +1,24 @@
+---
+page_type: sample
+languages:
+- python
+products:
+- ai-services
+- azure-openai
+description: Evaluate.
+---
+
+## Simulate Evaluation (Test) Data 
+
+
+
+Relevant, robust evaluation data is essential for effective evaluations. This data can be generated manually, can include production data, or can be assembled with the help of AI. There are two main types of evaluation data: 
+
+- Bring Your Own Data: You can create and update a “golden dataset” with realistic customer questions or inputs paired with expert answers, ensuring quality for generative AI experiences. This dataset can also include samples from production data, offering a realistic evaluation dataset derived from actual queries your AI application has encountered. 
+* Simulators: If evaluation data is not available, simulators can play a crucial role in generating evaluation data by creating both topic-related and adversarial queries.  
+    - Context-related simulators test the AI system’s ability to handle relevant interactions within a specific context, ensuring it performs well under typical use scenarios.  
+
+    - Adversarial simulators, on the other hand, generate queries designed to challenge the AI system, mimicking potential security threats or attempting to provoke undesirable behaviors. This approach helps identify the model's limitations and prepares it to perform well in unexpected or hostile conditions.  
+
+Azure AI Studio provides tools for both topic-related and adversarial simulations, enabling comprehensive evaluation and enhancing confidence in deployment. For topic-related simulations, Azure AI enables you to simulate relevant conversations using [your data](Simulate_Context-Relevant_Data/Simulate_From_Input_Text/Simulate_From_Input_Text.ipynb), [your Azure Search Index](Simulate_Context-Relevant_Data/Simulate_From_Azure_Search_Index/Simulate_From_Azure_Search_Index.ipynb), or [your pre-defined conversation starters](Simulate_Context-Relevant_Data/Simulate_From_Conversation_Starter/Simulate_From_Conversation_Starter.ipynb).
+
@@ -5,18 +5,18 @@ languages:
 products:
 - ai-services
 - azure-openai
-description: Simulator which simulates adversarial questions to ask wiki a custom application
+description: Simulator which simulates adversarial questions
 ---
 
-## Adversarial Simulator for Custom Application (askwiki)
+## Adversarial Simulator
 
 ### Overview
 
-This tutorial provides a step-by-step guide on how to use the adversarial simulator to simulate against a custom application
+This tutorial provides a step-by-step guide on how to use the adversarial simulator
 
 ### Objective
 
-The main objective of this tutorial is to help users understand the process of creating and using an adversarial simulator and use it with a custom application
+The main objective of this tutorial is to help users understand the process of creating and using an adversarial simulator
 By the end of this tutorial, you should be able to:
 - Use the simulator
 - Run the simulator to have an adversarial question answering scenario
@@ -26,6 +26,28 @@ By the end of this tutorial, you should be able to:
 
 ### Basic requirements
 
-To use Azure AI Safety Evaluation for different scenarios(simulation, annotation, etc..), you need an **Azure AI Project.** You should provide Azure AI project to run your safety evaluations or simulations with. First[create an Azure AI hub](https://learn.microsoft.com/en-us/azure/ai-studio/concepts/ai-resources)then [create an Azure AI project](    https://learn.microsoft.com/en-us/azure/ai-studio/how-to/create-projects?tabs=ai-studio).You **do not** need to provide your own LLM deployment as the Azure AI Safety Evaluation servicehosts adversarial models for both simulation and evaluation of harmful content andconnects to it via your Azure AI project.Ensure that your Azure AI project is in one of the supported regions for your desiredevaluation metric:#### Region support for evaluations| Region | Hate and unfairness, sexual, violent, self-harm, XPIA | Groundedness | Protected material || - | - | - | - ||UK South | Will be deprecated 12/1/24| no | no ||East US 2 | yes| yes | yes ||Sweden Central | yes| yes | no|US North Central | yes| no | no ||France Central | yes| no | no ||SwitzerlandWest| yes | no |no|For built-in quality and performance metrics, connect your own deployment of LLMs and therefore youcan evaluate in any region your deployment is in.#### Region support for adversarial simulation| Region | Adversarial simulation || - | - ||UK South | yes||East US 2 | yes||Sweden Central | yes||US North Central | yes||France Central | yes|
+To use Azure AI Safety Evaluation for different scenarios(simulation, annotation, etc..), you need an **Azure AI Project.** You should provide Azure AI project to run your safety evaluations or simulations with. First[create an Azure AI hub](https://learn.microsoft.com/en-us/azure/ai-studio/concepts/ai-resources)then [create an Azure AI project](    https://learn.microsoft.com/en-us/azure/ai-studio/how-to/create-projects?tabs=ai-studio).You **do not** need to provide your own LLM deployment as the Azure AI Safety Evaluation servicehosts adversarial models for both simulation and evaluation of harmful content andconnects to it via your Azure AI project.Ensure that your Azure AI project is in one of the supported regions for your desiredevaluation metric:
+
+#### Region support for evaluations
+
+| Region | Hate and unfairness, sexual, violent, self-harm, XPIA | Groundedness | Protected material |
+| - | - | - | - |
+|UK South | Will be deprecated 12/1/24| no | no |
+|East US 2 | yes| yes | yes |
+|Sweden Central | yes| yes | no|
+|US North Central | yes| no | no |
+|France Central | yes| no | no |
+|SwitzerlandWest| yes | no |no|
+
+For built-in quality and performance metrics, connect your own deployment of LLMs and therefore youcan evaluate in any region your deployment is in.
+
+#### Region support for adversarial simulation
+| Region | Adversarial simulation |
+| - | - |
+|UK South | yes|
+|East US 2 | yes|
+|Sweden Central | yes|
+|US North Central | yes|
+|France Central | yes|
 
 ### Estimated Runtime: 20 mins
@@ -4,7 +4,7 @@ languages:
 - python
 products:
 - azure-openai
-description: Use the Simulator to generate high-quality query and response interactions with your AI applications from your data using LLMs."
+description: Simulate from Azure Search Index
 ---
 
 ## Generate Query and Response from your Azure Search Index

@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Simulate Queries and Responses from input text"
+    "# Simulate Queries and Responses from Azure Search Index"
    ]
   },
   {
@@ -13,12 +13,11 @@
    "source": [
     "## Objective\n",
     "\n",
-    "Use the Simulator to generate high-quality queries and responses from your data using LLMs.\n",
+    "Use the Simulator to generate high-quality queries and responses from your data in Azure Search using LLMs.\n",
     "\n",
     "This tutorial uses the following Azure AI services:\n",
     "\n",
     "- Access to Azure OpenAI Service - you can apply for access [here](https://go.microsoft.com/fwlink/?linkid=2222006)\n",
-    "- An Azure AI Studio project - go to [aka.ms/azureaistudio](https://aka.ms/azureaistudio) to create a project\n",
     "- An Azure AI Search service - go to [aka.ms/azuresearch](https://aka.ms/azuresearch) to create a service "
    ]
   },
@@ -127,7 +126,6 @@
    "source": [
     "import os\n",
     "\n",
-    "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"<your-endpoint>\"\n",
     "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = \"<your-deployment>\"\n",
     "os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"<api version>\""
    ]

@@ -4,7 +4,7 @@ languages:
 - python
 products:
 - azure-openai
-description: Use the Simulator to generate high-quality query and response interactions with your AI applications from your data using LLMs."
+description: Use the Simulator to generate high-quality query and response interactions with your AI applications from your data using LLMs
 ---
 
 ## Generate Query and Response from your data