Azure-Samples · DilmurodMak · Nov 19, 2024 · Nov 19, 2024 · Nov 20, 2024 · Dec 2, 2024
diff --git a/single_tech_samples/databricks/README.md b/single_tech_samples/databricks/README.md
@@ -0,0 +1,8 @@
+# Azure Databricks
+
+[Azure Databricks](https://docs.microsoft.com/en-us/azure/databricks/) is a data analytics platform optimized for the Microsoft Azure cloud services platform which lets you set up your Apache Spark™ environment in minutes, and enable you to autoscale, and collaborate on shared projects in an interactive workspace.
+
+## Samples
+
+- [IaC deployment of Azure Databricks](./databricks_ci_cd/README.md) - This sample demonstrates how to deploy an Azure Databricks environment using ARM templates.
+- [IaC Deployment of Azure Databricks using Terraform](./databricks_terraform/README.md) - This sample demonstrates how to deploy an Azure Databricks environment using Terraform and promote the source code using Databricks Asset Bundles to different environments.
diff --git a/single_tech_samples/databricks/databricks_terraform/.devcontainer/Dockerfile b/single_tech_samples/databricks/databricks_terraform/.devcontainer/Dockerfile
@@ -0,0 +1,18 @@
+# Use Ubuntu Image
+FROM mcr.microsoft.com/devcontainers/python:3.11-bullseye
+
+# Update and install required system dependencies
+RUN apt update \
+  && apt install -y sudo vim software-properties-common curl unzip\
+  && apt clean
+
+# Copy and install dev dependencies
+COPY requirements-dev.txt /tmp/requirements-dev.txt
+RUN pip install -r /tmp/requirements-dev.txt && \
+  rm /tmp/requirements-dev.txt
+
+# Set the working directory
+WORKDIR /workspace
+
+# Default command
+CMD ["/bin/bash"]
diff --git a/single_tech_samples/databricks/databricks_terraform/.devcontainer/devcontainer.json b/single_tech_samples/databricks/databricks_terraform/.devcontainer/devcontainer.json
@@ -0,0 +1,25 @@
+{
+    "name": "Python DevContainer",
+    "dockerFile": "Dockerfile",
+    "context": "..",
+    "features": {
+      "ghcr.io/devcontainers/features/terraform:1": {
+          "installTerrafromDocs": true
+      },
+      "ghcr.io/devcontainers/features/azure-cli:1": {
+          "extensions": ""
+      },
+      "ghcr.io/devcontainers/features/github-cli:1": {},
+      "ghcr.io/audacioustux/devcontainers/taskfile:1": {}
+    },
+    "customizations" :{
+      "vscode": {
+          "extensions": [
+              "yzhang.markdown-all-in-one",
+              "DavidAnson.vscode-markdownlint",
+              "-dbaeumer.vscode-eslint"
+          ]
+      }
+    }
+  }
+
diff --git a/...les/databricks/databricks_terraform/.github/workflows/adb-asset-bundle-dev-deployment.yml b/...les/databricks/databricks_terraform/.github/workflows/adb-asset-bundle-dev-deployment.yml
@@ -0,0 +1,61 @@
+name: "Asset Bundle Dev Deployment"
+
+on:
+  workflow_run:
+    workflows: ["Asset Bundle Sandbox Deployment"]
+    types:
+    - completed
+
+env:
+  ENV: dev
+  WORKING_DIR: single_tech_samples/databricks/databricks_terraform/
+
+jobs:
+  deploy:
+    name: "Deploy bundle"
+    runs-on: ubuntu-latest
+    environment: development
+    defaults:
+      run:
+        working-directory: ${{ env.WORKING_DIR }}
+    if: |
+      github.event.workflow_run.conclusion == 'success' &&
+      github.event.workflow_run.head_branch == 'main'
+
+    steps:
+    - name: Checkout Repository
+      uses: actions/checkout@v4
+
+    - name: Setup Databricks CLI
+      uses: databricks/setup-cli@main
+
+    - name: Azure Login Using Service Principal
+      uses: azure/login@v2
+      with:
+        creds: ${{ secrets.AZURE_DEV_CREDENTIALS }}
+
+    - name: Deploy Databricks Bundle
+      run: |
+        databricks bundle validate -t ${{ env.ENV }} -o json
+        databricks bundle deploy -t ${{ env.ENV }}
+      working-directory: .
+      env:
+        DATABRICKS_BUNDLE_ENV: ${{ env.ENV }}
+
+    - name: Install Task
+      uses: arduino/setup-task@v2
+      with:
+        version: 3.x
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Set Test Flows
+      run: task collect-tests
+
+    - name: Run test workflows
+      run: task run-tests
+      env:
+        # gets test_flows from Set Test Flows step
+        # and passes to the run-tests task
+        test_flows: ${{ env.test_flows }}
+        # bundle file required variables
+        DATABRICKS_BUNDLE_ENV: ${{ env.ENV }}
diff --git a/...ch_samples/databricks/databricks_terraform/.github/workflows/adb-asset-bundle-linting.yml b/...ch_samples/databricks/databricks_terraform/.github/workflows/adb-asset-bundle-linting.yml
@@ -0,0 +1,36 @@
+name: "ADB Asset Bundle CI Linting"
+
+on:
+  pull_request:
+    branches:
+    - main
+    paths:
+    - "single_tech_samples/databricks/databricks_terraform/**"
+
+env:
+  UV_VERSION: ">=0.4.26"
+  PYTHON_VERSION: "3.11"
+
+jobs:
+  linting:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout the repository
+      uses: actions/checkout@v4
+
+    - name: Install uv
+      uses: astral-sh/setup-uv@v3
+      with:
+        enable-cache: true
+        version: ${{ env.UV_VERSION }}
+        cache-dependency-glob: "**/requirements**.txt"
+
+    - name: Install Python and Dependencies
+      run: |
+        uv python install ${{ env.PYTHON_VERSION }}
+        uv tool install ruff
+
+    - name: Run Ruff Lint
+      run: |
+        uv run ruff check single_tech_samples/databricks/databricks_terraform
diff --git a/...databricks/databricks_terraform/.github/workflows/adb-asset-bundle-sandbox-deployment.yml b/...databricks/databricks_terraform/.github/workflows/adb-asset-bundle-sandbox-deployment.yml
@@ -0,0 +1,70 @@
+name: "Asset Bundle Sandbox Deployment"
+
+on:
+  push:
+    branches:
+    - main
+    paths:
+    - "single_tech_samples/databricks/databricks_terraform/**"
+  pull_request:
+    branches:
+    - main
+    paths:
+    - "single_tech_samples/databricks/databricks_terraform/**"
+
+env:
+  ENV: sandbox
+  WORKING_DIR: single_tech_samples/databricks/databricks_terraform/
+
+jobs:
+  deploy:
+    name: "Deploy bundle"
+    runs-on: ubuntu-latest
+    environment: sandbox
+
+    defaults:
+      run:
+        working-directory: ${{ env.WORKING_DIR }}
+
+    steps:
+    - name: Checkout Repository
+      uses: actions/checkout@v4
+
+    - name: Setup Databricks CLI
+      uses: databricks/setup-cli@main
+
+    - name: Azure Login Using Service Principal
+      uses: azure/login@v2
+      with:
+        creds: ${{ secrets.AZURE_INT_CREDENTIALS }}
+
+    - name: Deploy Databricks Bundle
+      run: |
+        if [ "${{ github.event_name }}" == "pull_request" ]; then
+          databricks bundle validate -t ${{ env.ENV }} -o json
+        elif [ "${{ github.event_name }}" == "push" ]; then
+          databricks bundle deploy -t ${{ env.ENV }} -o json
+        fi
+      env:
+        DATABRICKS_BUNDLE_ENV: ${{ env.ENV }}
+
+    - name: Install Task
+      if: github.event_name == 'push'
+      uses: arduino/setup-task@v2
+      with:
+        version: 3.x
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+    - name: Set Test Flows
+      if: github.event_name == 'push'
+      run: task collect-tests
+
+    - name: Run test workflows
+      if: github.event_name == 'push'
+      run: task run-tests
+      env:
+        # gets test_flows from Set Test Flows step
+        # and passes to the run-tests task
+        test_flows: ${{ env.test_flows }}
+        # bundle file required variables
+        DATABRICKS_BUNDLE_ENV: ${{ env.ENV }}
diff --git a/single_tech_samples/databricks/databricks_terraform/Infra/README.md b/single_tech_samples/databricks/databricks_terraform/Infra/README.md
@@ -0,0 +1,70 @@
+# Terraform Code for Multi Environment Databricks Medallion Deployment
+
+![Multi Environment Image](../images/architecture.png)
+
+[Visio Drawing](https://microsoft.sharepoint.com/:u:/t/ExternalEcolabKitchenOS/EWM3kB69NGBBiy2s563pjJ0BeKWy1qgtgEznRvvufiseFg?e=RieWOu)
+
+## Overview
+
+**`Infra/modules`** folder has three modules:
+- **`adb-workspace`** - Deploys Databricks workspace
+- **`metastore-and-users`** - Creates Databricks Connector, Creates Storage Account, Give storage access rights to connector, Creates Metastore / Assigns Workspace to Metastore, and Finally Retrieves alls users, groups, and service principals from Azure AD.
+- **`adb-unity-catalog`** - Gives databricks access rights to the connector, Creates containers in the storage account, and creates external locations for the containers. Creates unity catalog and grants permissions user groups. Finally, creates **`bronze` `silver` `gold`** schemas under the catalog and gives the required permissions to the user groups.
+
+**NOTE** - *When **`adb-workspace`** module runs it creates databricks workspace, and by default it creates a metastore in the same region. Databricks allows only **ONE METASTORE** per region. **`metastore-and-users`** module deploys new metastore with our required configurations, but we have to delete existing metastore prior running the module*
+
+**NOTE** - *During script execution you will receive `Error: cannot create metastore: This account with id <Account_ID> has reached the limit for metastores in region <Region>` * error. This is because we have reached the limit of metastores in the region. To fix this, we need to delete existing metastore and re-run the script.*
+
+## How to Run
+
+### Pre-requisites
+- `Infra/deployment/.env` - Update the values as per your requirement
+- Have databricks admin level access. Login to get databricks account id [accounts.databricks.net](https://accounts.azuredatabricks.net/)
+
+### Steps
+
+1. Login to Azure
+```bash
+az login
+```
+
+2. Set the subscription
+```bash
+az account set --subscription <subscription-id>
+```
+
+3. Change directory to `Infra/deployment`
+```bash
+cd Infra/deployment
+```
+
+4. Make the script executable
+```bash
+chmod +x dev.deploy.sh
+```
+
+5. Run the script to deploy the modules sequentially
+```bash
+./dev.deploy.sh
+```
+
+## Destroy
+
+### Steps
+
+1. Change directory to `Infra/deployment`
+```bash
+cd Infra/deployment
+```
+2. Make the script executable
+```bash
+chmod +x dev.destroy.sh
+```
+3. Run the script to destroy the modules by passing 
+```bash
+./dev.destroy.sh --destroy
+```
+
+## Error Handling
+
+In case of any script fails during resource creation, try rerun the script. It will reference the local state files, and will try again to create the resources.
diff --git a/single_tech_samples/databricks/databricks_terraform/Infra/deployment/.env_example b/single_tech_samples/databricks/databricks_terraform/Infra/deployment/.env_example
@@ -0,0 +1,11 @@
+region=""
+environment="dev"
+subscription_id=""
+resource_group_name=""
+metastore_name=""
+account_id="" # login https://accounts.azuredatabricks.net/ to get the account id.
+prefix="dev"
+
+# Ensure these groups exist in Azure EntraId.
+# Make sure you are a member of account_unity_admin group when running a script locally.
+aad_groups='["account_unity_admin","data_engineer","data_analyst","data_scientist"]'