From af84ba261939c68e6ec2724d398c33896c372216 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 6 Aug 2025 15:11:23 +0200 Subject: [PATCH 1/7] Added an example how to deploy resources only for specific targets --- knowledge_base/target_includes/.gitignore | 4 + knowledge_base/target_includes/README.md | 105 ++++++++++++++++++ knowledge_base/target_includes/databricks.yml | 24 ++++ .../target_includes/resources/set1/job_1.yml | 29 +++++ .../target_includes/resources/set1/job_2.yml | 29 +++++ .../target_includes/resources/set2/job_1.yml | 29 +++++ .../target_includes/resources/set2/job_2.yml | 29 +++++ .../target_includes/src/notebook.py | 3 + 8 files changed, 252 insertions(+) create mode 100644 knowledge_base/target_includes/.gitignore create mode 100644 knowledge_base/target_includes/README.md create mode 100644 knowledge_base/target_includes/databricks.yml create mode 100644 knowledge_base/target_includes/resources/set1/job_1.yml create mode 100644 knowledge_base/target_includes/resources/set1/job_2.yml create mode 100644 knowledge_base/target_includes/resources/set2/job_1.yml create mode 100644 knowledge_base/target_includes/resources/set2/job_2.yml create mode 100644 knowledge_base/target_includes/src/notebook.py diff --git a/knowledge_base/target_includes/.gitignore b/knowledge_base/target_includes/.gitignore new file mode 100644 index 00000000..797e9a11 --- /dev/null +++ b/knowledge_base/target_includes/.gitignore @@ -0,0 +1,4 @@ + +.databricks +*.jar +*.class \ No newline at end of file diff --git a/knowledge_base/target_includes/README.md b/knowledge_base/target_includes/README.md new file mode 100644 index 00000000..d9ccc96b --- /dev/null +++ b/knowledge_base/target_includes/README.md @@ -0,0 +1,105 @@ +# Target Includes Example + +This example demonstrates the concept of using `target_includes` (or similar include mechanisms) in Databricks Asset Bundles to organize job configurations across different environments without duplication. + +## Overview + +This example implements the use case described in [GitHub Issue #2878](https://github.com/databricks/cli/issues/2878), which requests the ability to include specific resource files based on target configurations. + +## Directory Structure + +``` +target_includes/ +├── databricks.yml # Main bundle configuration with 3 targets +├── resources/ +│ ├── set1/ # Jobs for dev and staging environments +│ │ ├── job_1.yml +│ │ └── job_2.yml +│ └── set2/ # Jobs for staging and prod environments +│ ├── job_1.yml +│ └── job_2.yml +└── README.md +``` + +## Target Configuration + +The bundle defines three targets: + +1. **dev**: Includes only `resources/set1/*.yml` + - Contains: set1-job-1, set1-job-2 + - Environment: development + +2. **staging**: Includes both `resources/set1/*.yml` and `resources/set2/*.yml` + - Contains: set1-job-1, set1-job-2, set2-job-1, set2-job-2 + - Environment: staging + +3. **prod**: Includes only `resources/set2/*.yml` + - Contains: set2-job-1, set2-job-2 + - Environment: production + +## Usage + +```bash +# Summary of the bundle resources for dev +databricks bundle summary -p u2m -t dev + +Name: target-includes-example +Target: dev +Workspace: + User: *** + Path: *** +Resources: + Jobs: + set1-job-1: + Name: Set1 Job 1 - dev + URL: (not deployed) + set1-job-2: + Name: Set1 Job 2 - dev + URL: (not deployed) + +# Summary of the bundle resources for staging +databricks bundle summary -p u2m -t staging + +Name: target-includes-example +Target: staging +Workspace: + User: *** + Path: *** +Resources: + Jobs: + set1-job-1: + Name: Set1 Job 1 - staging + URL: (not deployed) + set1-job-2: + Name: Set1 Job 2 - staging + URL: (not deployed) + set2-job-1: + Name: Set2 Job 1 - staging + URL: (not deployed) + set2-job-2: + Name: Set2 Job 2 - staging + URL: (not deployed) + +# Summary of the bundle resources for prod +databricks bundle summary -p u2m -t prod + +Name: target-includes-example +Target: prod +Workspace: + User: *** + Path: *** +Resources: + Jobs: + set2-job-1: + Name: Set2 Job 1 - prod + URL: (not deployed) + set2-job-2: + Name: Set2 Job 2 - prod + URL: (not deployed) +``` + +## Notes + +There are some key aspects in this implementation +1. In `databricks.yml` file we include (see `include` section) all configuration files for all targets. This does not impact which resources will be deployed for which target. +2. For each job in corresponding configuration file like `resources/set1/job_1.yml` we define in which targets this job should be deployed. We use YAML anchors to avoid duplications between targets. diff --git a/knowledge_base/target_includes/databricks.yml b/knowledge_base/target_includes/databricks.yml new file mode 100644 index 00000000..9b68d405 --- /dev/null +++ b/knowledge_base/target_includes/databricks.yml @@ -0,0 +1,24 @@ +bundle: + name: target-includes-example + +include: + - resources/set1/*.yml + - resources/set2/*.yml + +variables: + environment: + description: "Define the environment where the job will run" + +targets: + dev: + default: true + variables: + environment: dev + + staging: + variables: + environment: staging + + prod: + variables: + environment: prod diff --git a/knowledge_base/target_includes/resources/set1/job_1.yml b/knowledge_base/target_includes/resources/set1/job_1.yml new file mode 100644 index 00000000..4eaccb9e --- /dev/null +++ b/knowledge_base/target_includes/resources/set1/job_1.yml @@ -0,0 +1,29 @@ +# Job 1 for set1 - used in dev and staging targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set1-job-1: + name: "Set1 Job 1 - ${var.environment}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + new_cluster: + spark_version: "13.3.x-scala2.12" + node_type_id: "Standard_DS3_v2" + num_workers: 1 + max_concurrent_runs: 1 + timeout_seconds: 3600 + schedule: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" + +targets: + dev: + resources: + jobs: + <<: *job-config + staging: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/resources/set1/job_2.yml b/knowledge_base/target_includes/resources/set1/job_2.yml new file mode 100644 index 00000000..5c3a5cc9 --- /dev/null +++ b/knowledge_base/target_includes/resources/set1/job_2.yml @@ -0,0 +1,29 @@ +# Job 2 for set1 - used in dev and staging targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set1-job-2: + name: "Set1 Job 2 - ${var.environment}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + new_cluster: + spark_version: "13.3.x-scala2.12" + node_type_id: "Standard_DS3_v2" + num_workers: 1 + max_concurrent_runs: 1 + timeout_seconds: 3600 + schedule: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" + +targets: + dev: + resources: + jobs: + <<: *job-config + staging: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/resources/set2/job_1.yml b/knowledge_base/target_includes/resources/set2/job_1.yml new file mode 100644 index 00000000..8ed126d9 --- /dev/null +++ b/knowledge_base/target_includes/resources/set2/job_1.yml @@ -0,0 +1,29 @@ +# Job 1 for set2 - used in staging and prod targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set2-job-1: + name: "Set2 Job 1 - ${var.environment}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + new_cluster: + spark_version: "13.3.x-scala2.12" + node_type_id: "Standard_DS3_v2" + num_workers: 1 + max_concurrent_runs: 1 + timeout_seconds: 3600 + schedule: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" + +targets: + staging: + resources: + jobs: + <<: *job-config + prod: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/resources/set2/job_2.yml b/knowledge_base/target_includes/resources/set2/job_2.yml new file mode 100644 index 00000000..8f94ac01 --- /dev/null +++ b/knowledge_base/target_includes/resources/set2/job_2.yml @@ -0,0 +1,29 @@ +# Job 2 for set2 - used in staging and prod targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set2-job-2: + name: "Set2 Job 2 - ${var.environment}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + new_cluster: + spark_version: "13.3.x-scala2.12" + node_type_id: "Standard_DS3_v2" + num_workers: 1 + max_concurrent_runs: 1 + timeout_seconds: 3600 + schedule: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" + +targets: + staging: + resources: + jobs: + <<: *job-config + prod: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/src/notebook.py b/knowledge_base/target_includes/src/notebook.py new file mode 100644 index 00000000..734514cf --- /dev/null +++ b/knowledge_base/target_includes/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello From Notebook!") From d46ca7bd216e7e8d10dd181500a6925bbd313154 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 6 Aug 2025 15:32:57 +0200 Subject: [PATCH 2/7] serverless and no schedule --- knowledge_base/target_includes/.gitignore | 3 +-- .../target_includes/resources/set1/job_1.yml | 7 ------- .../target_includes/resources/set1/job_2.yml | 14 ++++++-------- .../target_includes/resources/set2/job_1.yml | 14 ++++++-------- .../target_includes/resources/set2/job_2.yml | 14 ++++++-------- 5 files changed, 19 insertions(+), 33 deletions(-) diff --git a/knowledge_base/target_includes/.gitignore b/knowledge_base/target_includes/.gitignore index 797e9a11..adcd4586 100644 --- a/knowledge_base/target_includes/.gitignore +++ b/knowledge_base/target_includes/.gitignore @@ -1,4 +1,3 @@ - .databricks *.jar -*.class \ No newline at end of file +*.class diff --git a/knowledge_base/target_includes/resources/set1/job_1.yml b/knowledge_base/target_includes/resources/set1/job_1.yml index 4eaccb9e..4954415a 100644 --- a/knowledge_base/target_includes/resources/set1/job_1.yml +++ b/knowledge_base/target_includes/resources/set1/job_1.yml @@ -8,15 +8,8 @@ job-config: &job-config - task_key: "process_data" notebook_task: notebook_path: ../../src/notebook.py - new_cluster: - spark_version: "13.3.x-scala2.12" - node_type_id: "Standard_DS3_v2" - num_workers: 1 max_concurrent_runs: 1 timeout_seconds: 3600 - schedule: - quartz_cron_expression: "0 0 12 * * ?" - timezone_id: "UTC" targets: dev: diff --git a/knowledge_base/target_includes/resources/set1/job_2.yml b/knowledge_base/target_includes/resources/set1/job_2.yml index 5c3a5cc9..b044836a 100644 --- a/knowledge_base/target_includes/resources/set1/job_2.yml +++ b/knowledge_base/target_includes/resources/set1/job_2.yml @@ -3,20 +3,18 @@ job-config: &job-config set1-job-2: - name: "Set1 Job 2 - ${var.environment}" + name: "Set1 Job 2 - ${bundle.target}" tasks: - task_key: "process_data" notebook_task: notebook_path: ../../src/notebook.py - new_cluster: - spark_version: "13.3.x-scala2.12" - node_type_id: "Standard_DS3_v2" - num_workers: 1 max_concurrent_runs: 1 timeout_seconds: 3600 - schedule: - quartz_cron_expression: "0 0 12 * * ?" - timezone_id: "UTC" + trigger: + type: "cron" + cron: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" targets: dev: diff --git a/knowledge_base/target_includes/resources/set2/job_1.yml b/knowledge_base/target_includes/resources/set2/job_1.yml index 8ed126d9..a1f89d69 100644 --- a/knowledge_base/target_includes/resources/set2/job_1.yml +++ b/knowledge_base/target_includes/resources/set2/job_1.yml @@ -3,20 +3,18 @@ job-config: &job-config set2-job-1: - name: "Set2 Job 1 - ${var.environment}" + name: "Set2 Job 1 - ${bundle.target}" tasks: - task_key: "process_data" notebook_task: notebook_path: ../../src/notebook.py - new_cluster: - spark_version: "13.3.x-scala2.12" - node_type_id: "Standard_DS3_v2" - num_workers: 1 max_concurrent_runs: 1 timeout_seconds: 3600 - schedule: - quartz_cron_expression: "0 0 12 * * ?" - timezone_id: "UTC" + trigger: + type: "cron" + cron: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" targets: staging: diff --git a/knowledge_base/target_includes/resources/set2/job_2.yml b/knowledge_base/target_includes/resources/set2/job_2.yml index 8f94ac01..f3d66446 100644 --- a/knowledge_base/target_includes/resources/set2/job_2.yml +++ b/knowledge_base/target_includes/resources/set2/job_2.yml @@ -3,20 +3,18 @@ job-config: &job-config set2-job-2: - name: "Set2 Job 2 - ${var.environment}" + name: "Set2 Job 2 - ${bundle.target}" tasks: - task_key: "process_data" notebook_task: notebook_path: ../../src/notebook.py - new_cluster: - spark_version: "13.3.x-scala2.12" - node_type_id: "Standard_DS3_v2" - num_workers: 1 max_concurrent_runs: 1 timeout_seconds: 3600 - schedule: - quartz_cron_expression: "0 0 12 * * ?" - timezone_id: "UTC" + trigger: + type: "cron" + cron: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" targets: staging: From 46ca260b3a787cb1cd02cf8207edd1c0f78b4693 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 6 Aug 2025 16:16:23 +0200 Subject: [PATCH 3/7] do not use env variable name --- knowledge_base/target_includes/README.md | 16 ++++++++-------- knowledge_base/target_includes/databricks.yml | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/knowledge_base/target_includes/README.md b/knowledge_base/target_includes/README.md index d9ccc96b..297ae91e 100644 --- a/knowledge_base/target_includes/README.md +++ b/knowledge_base/target_includes/README.md @@ -51,10 +51,10 @@ Workspace: Resources: Jobs: set1-job-1: - Name: Set1 Job 1 - dev + Name: Set1 Job 1 - foo URL: (not deployed) set1-job-2: - Name: Set1 Job 2 - dev + Name: Set1 Job 2 - foo URL: (not deployed) # Summary of the bundle resources for staging @@ -68,16 +68,16 @@ Workspace: Resources: Jobs: set1-job-1: - Name: Set1 Job 1 - staging + Name: Set1 Job 1 - bar URL: (not deployed) set1-job-2: - Name: Set1 Job 2 - staging + Name: Set1 Job 2 - bar URL: (not deployed) set2-job-1: - Name: Set2 Job 1 - staging + Name: Set2 Job 1 - bar URL: (not deployed) set2-job-2: - Name: Set2 Job 2 - staging + Name: Set2 Job 2 - bar URL: (not deployed) # Summary of the bundle resources for prod @@ -91,10 +91,10 @@ Workspace: Resources: Jobs: set2-job-1: - Name: Set2 Job 1 - prod + Name: Set2 Job 1 - baz URL: (not deployed) set2-job-2: - Name: Set2 Job 2 - prod + Name: Set2 Job 2 - baz URL: (not deployed) ``` diff --git a/knowledge_base/target_includes/databricks.yml b/knowledge_base/target_includes/databricks.yml index 9b68d405..2d6f10dc 100644 --- a/knowledge_base/target_includes/databricks.yml +++ b/knowledge_base/target_includes/databricks.yml @@ -6,19 +6,19 @@ include: - resources/set2/*.yml variables: - environment: - description: "Define the environment where the job will run" + name_suffix: + description: "Target specific suffix for the job name" targets: dev: default: true variables: - environment: dev + name_suffix: foo staging: variables: - environment: staging + name_suffix: bar prod: variables: - environment: prod + name_suffix: baz From 662b685e50fe8e9d8b42872cb11ee09e6df40695 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 7 Aug 2025 16:51:00 +0200 Subject: [PATCH 4/7] Update knowledge_base/target_includes/README.md Co-authored-by: Julia Crawford (Databricks) --- knowledge_base/target_includes/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/knowledge_base/target_includes/README.md b/knowledge_base/target_includes/README.md index 297ae91e..c364f45a 100644 --- a/knowledge_base/target_includes/README.md +++ b/knowledge_base/target_includes/README.md @@ -2,9 +2,7 @@ This example demonstrates the concept of using `target_includes` (or similar include mechanisms) in Databricks Asset Bundles to organize job configurations across different environments without duplication. -## Overview - -This example implements the use case described in [GitHub Issue #2878](https://github.com/databricks/cli/issues/2878), which requests the ability to include specific resource files based on target configurations. +It addresses the use case described in [GitHub Issue #2878](https://github.com/databricks/cli/issues/2878), which requests the ability to include specific resource files based on target configurations. ## Directory Structure From 47f963b0b92a359ad308c56dd55d2265f818db36 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 7 Aug 2025 16:51:08 +0200 Subject: [PATCH 5/7] Update knowledge_base/target_includes/README.md Co-authored-by: Julia Crawford (Databricks) --- knowledge_base/target_includes/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/knowledge_base/target_includes/README.md b/knowledge_base/target_includes/README.md index c364f45a..7f1f90c5 100644 --- a/knowledge_base/target_includes/README.md +++ b/knowledge_base/target_includes/README.md @@ -23,15 +23,15 @@ target_includes/ The bundle defines three targets: -1. **dev**: Includes only `resources/set1/*.yml` +- **dev**: Includes only `resources/set1/*.yml` - Contains: set1-job-1, set1-job-2 - Environment: development -2. **staging**: Includes both `resources/set1/*.yml` and `resources/set2/*.yml` +- **staging**: Includes both `resources/set1/*.yml` and `resources/set2/*.yml` - Contains: set1-job-1, set1-job-2, set2-job-1, set2-job-2 - Environment: staging -3. **prod**: Includes only `resources/set2/*.yml` +- **prod**: Includes only `resources/set2/*.yml` - Contains: set2-job-1, set2-job-2 - Environment: production From 56deeb7f92b0ddf521a326250c4c95c51341698c Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 7 Aug 2025 16:51:27 +0200 Subject: [PATCH 6/7] Update knowledge_base/target_includes/README.md Co-authored-by: Julia Crawford (Databricks) --- knowledge_base/target_includes/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/knowledge_base/target_includes/README.md b/knowledge_base/target_includes/README.md index 7f1f90c5..34b29981 100644 --- a/knowledge_base/target_includes/README.md +++ b/knowledge_base/target_includes/README.md @@ -98,6 +98,6 @@ Resources: ## Notes -There are some key aspects in this implementation -1. In `databricks.yml` file we include (see `include` section) all configuration files for all targets. This does not impact which resources will be deployed for which target. -2. For each job in corresponding configuration file like `resources/set1/job_1.yml` we define in which targets this job should be deployed. We use YAML anchors to avoid duplications between targets. +There are some important aspects of this implementation: +- The `databricks.yml` file includes all configuration files for all targets (see `include` section). This does not impact which resources will be deployed to each target. +- For each job in a corresponding configuration file, such as `resources/set1/job_1.yml`, targets are defined where the job should be deployed. YAML anchors are used to avoid duplications between targets. From 1570ba493896c528425b4c815657a99c8de4a002 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 7 Aug 2025 16:54:17 +0200 Subject: [PATCH 7/7] fixed command output --- knowledge_base/target_includes/README.md | 32 ++++++++++++++++-------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/knowledge_base/target_includes/README.md b/knowledge_base/target_includes/README.md index 34b29981..09579aa7 100644 --- a/knowledge_base/target_includes/README.md +++ b/knowledge_base/target_includes/README.md @@ -35,12 +35,22 @@ The bundle defines three targets: - Contains: set2-job-1, set2-job-2 - Environment: production + +## Notes + +There are some important aspects of this implementation: +- The `databricks.yml` file includes all configuration files for all targets (see `include` section). This does not impact which resources will be deployed to each target. +- For each job in a corresponding configuration file, such as `resources/set1/job_1.yml`, targets are defined where the job should be deployed. YAML anchors are used to avoid duplications between targets. + + ## Usage ```bash -# Summary of the bundle resources for dev databricks bundle summary -p u2m -t dev +``` +Output: +```bash Name: target-includes-example Target: dev Workspace: @@ -54,10 +64,14 @@ Resources: set1-job-2: Name: Set1 Job 2 - foo URL: (not deployed) +``` -# Summary of the bundle resources for staging +```bash databricks bundle summary -p u2m -t staging +``` +Output: +```bash Name: target-includes-example Target: staging Workspace: @@ -77,10 +91,14 @@ Resources: set2-job-2: Name: Set2 Job 2 - bar URL: (not deployed) +``` -# Summary of the bundle resources for prod +```bash databricks bundle summary -p u2m -t prod +``` +Output: +```bash Name: target-includes-example Target: prod Workspace: @@ -94,10 +112,4 @@ Resources: set2-job-2: Name: Set2 Job 2 - baz URL: (not deployed) -``` - -## Notes - -There are some important aspects of this implementation: -- The `databricks.yml` file includes all configuration files for all targets (see `include` section). This does not impact which resources will be deployed to each target. -- For each job in a corresponding configuration file, such as `resources/set1/job_1.yml`, targets are defined where the job should be deployed. YAML anchors are used to avoid duplications between targets. +``` \ No newline at end of file