diff --git a/knowledge_base/target_includes/.gitignore b/knowledge_base/target_includes/.gitignore new file mode 100644 index 00000000..adcd4586 --- /dev/null +++ b/knowledge_base/target_includes/.gitignore @@ -0,0 +1,3 @@ +.databricks +*.jar +*.class diff --git a/knowledge_base/target_includes/README.md b/knowledge_base/target_includes/README.md new file mode 100644 index 00000000..09579aa7 --- /dev/null +++ b/knowledge_base/target_includes/README.md @@ -0,0 +1,115 @@ +# Target Includes Example + +This example demonstrates the concept of using `target_includes` (or similar include mechanisms) in Databricks Asset Bundles to organize job configurations across different environments without duplication. + +It addresses the use case described in [GitHub Issue #2878](https://github.com/databricks/cli/issues/2878), which requests the ability to include specific resource files based on target configurations. + +## Directory Structure + +``` +target_includes/ +├── databricks.yml # Main bundle configuration with 3 targets +├── resources/ +│ ├── set1/ # Jobs for dev and staging environments +│ │ ├── job_1.yml +│ │ └── job_2.yml +│ └── set2/ # Jobs for staging and prod environments +│ ├── job_1.yml +│ └── job_2.yml +└── README.md +``` + +## Target Configuration + +The bundle defines three targets: + +- **dev**: Includes only `resources/set1/*.yml` + - Contains: set1-job-1, set1-job-2 + - Environment: development + +- **staging**: Includes both `resources/set1/*.yml` and `resources/set2/*.yml` + - Contains: set1-job-1, set1-job-2, set2-job-1, set2-job-2 + - Environment: staging + +- **prod**: Includes only `resources/set2/*.yml` + - Contains: set2-job-1, set2-job-2 + - Environment: production + + +## Notes + +There are some important aspects of this implementation: +- The `databricks.yml` file includes all configuration files for all targets (see `include` section). This does not impact which resources will be deployed to each target. +- For each job in a corresponding configuration file, such as `resources/set1/job_1.yml`, targets are defined where the job should be deployed. YAML anchors are used to avoid duplications between targets. + + +## Usage + +```bash +databricks bundle summary -p u2m -t dev +``` + +Output: +```bash +Name: target-includes-example +Target: dev +Workspace: + User: *** + Path: *** +Resources: + Jobs: + set1-job-1: + Name: Set1 Job 1 - foo + URL: (not deployed) + set1-job-2: + Name: Set1 Job 2 - foo + URL: (not deployed) +``` + +```bash +databricks bundle summary -p u2m -t staging +``` + +Output: +```bash +Name: target-includes-example +Target: staging +Workspace: + User: *** + Path: *** +Resources: + Jobs: + set1-job-1: + Name: Set1 Job 1 - bar + URL: (not deployed) + set1-job-2: + Name: Set1 Job 2 - bar + URL: (not deployed) + set2-job-1: + Name: Set2 Job 1 - bar + URL: (not deployed) + set2-job-2: + Name: Set2 Job 2 - bar + URL: (not deployed) +``` + +```bash +databricks bundle summary -p u2m -t prod +``` + +Output: +```bash +Name: target-includes-example +Target: prod +Workspace: + User: *** + Path: *** +Resources: + Jobs: + set2-job-1: + Name: Set2 Job 1 - baz + URL: (not deployed) + set2-job-2: + Name: Set2 Job 2 - baz + URL: (not deployed) +``` \ No newline at end of file diff --git a/knowledge_base/target_includes/databricks.yml b/knowledge_base/target_includes/databricks.yml new file mode 100644 index 00000000..2d6f10dc --- /dev/null +++ b/knowledge_base/target_includes/databricks.yml @@ -0,0 +1,24 @@ +bundle: + name: target-includes-example + +include: + - resources/set1/*.yml + - resources/set2/*.yml + +variables: + name_suffix: + description: "Target specific suffix for the job name" + +targets: + dev: + default: true + variables: + name_suffix: foo + + staging: + variables: + name_suffix: bar + + prod: + variables: + name_suffix: baz diff --git a/knowledge_base/target_includes/resources/set1/job_1.yml b/knowledge_base/target_includes/resources/set1/job_1.yml new file mode 100644 index 00000000..4954415a --- /dev/null +++ b/knowledge_base/target_includes/resources/set1/job_1.yml @@ -0,0 +1,22 @@ +# Job 1 for set1 - used in dev and staging targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set1-job-1: + name: "Set1 Job 1 - ${var.environment}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + max_concurrent_runs: 1 + timeout_seconds: 3600 + +targets: + dev: + resources: + jobs: + <<: *job-config + staging: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/resources/set1/job_2.yml b/knowledge_base/target_includes/resources/set1/job_2.yml new file mode 100644 index 00000000..b044836a --- /dev/null +++ b/knowledge_base/target_includes/resources/set1/job_2.yml @@ -0,0 +1,27 @@ +# Job 2 for set1 - used in dev and staging targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set1-job-2: + name: "Set1 Job 2 - ${bundle.target}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + max_concurrent_runs: 1 + timeout_seconds: 3600 + trigger: + type: "cron" + cron: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" + +targets: + dev: + resources: + jobs: + <<: *job-config + staging: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/resources/set2/job_1.yml b/knowledge_base/target_includes/resources/set2/job_1.yml new file mode 100644 index 00000000..a1f89d69 --- /dev/null +++ b/knowledge_base/target_includes/resources/set2/job_1.yml @@ -0,0 +1,27 @@ +# Job 1 for set2 - used in staging and prod targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set2-job-1: + name: "Set2 Job 1 - ${bundle.target}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + max_concurrent_runs: 1 + timeout_seconds: 3600 + trigger: + type: "cron" + cron: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" + +targets: + staging: + resources: + jobs: + <<: *job-config + prod: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/resources/set2/job_2.yml b/knowledge_base/target_includes/resources/set2/job_2.yml new file mode 100644 index 00000000..f3d66446 --- /dev/null +++ b/knowledge_base/target_includes/resources/set2/job_2.yml @@ -0,0 +1,27 @@ +# Job 2 for set2 - used in staging and prod targets +# Using YAML anchors to avoid duplication + +job-config: &job-config + set2-job-2: + name: "Set2 Job 2 - ${bundle.target}" + tasks: + - task_key: "process_data" + notebook_task: + notebook_path: ../../src/notebook.py + max_concurrent_runs: 1 + timeout_seconds: 3600 + trigger: + type: "cron" + cron: + quartz_cron_expression: "0 0 12 * * ?" + timezone_id: "UTC" + +targets: + staging: + resources: + jobs: + <<: *job-config + prod: + resources: + jobs: + <<: *job-config \ No newline at end of file diff --git a/knowledge_base/target_includes/src/notebook.py b/knowledge_base/target_includes/src/notebook.py new file mode 100644 index 00000000..734514cf --- /dev/null +++ b/knowledge_base/target_includes/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello From Notebook!")