Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ exclude = [
"default_python/*",
"default_sql/*",
"mlops_stacks/*",
"lakeflow_pipelines_python/*",
]
16 changes: 1 addition & 15 deletions contrib/data_engineering/README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,6 @@
# data_engineering

The 'data_engineering' project was generated by using the contrib/data-engineering template.

Learn more about this template here:

https://github.com/databricks/bundle-examples/tree/main/contrib/templates/data-engineering

You can re-create this bundle by running the following commands:

```
$ databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering
# (answer prompts, call the project data_engineering)
$ cd data_engineering
$ uv run add-asset
# (select etl-pipeline)
```
The 'data_engineering' project was generated by using the data-engineering template.

## Setup

Expand Down
5 changes: 0 additions & 5 deletions contrib/data_engineering/assets/etl_pipeline/__init__.py

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

14 changes: 7 additions & 7 deletions contrib/data_engineering/databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,21 @@ targets:
workspace:
host: https://company.databricks.com
variables:
catalog: main
catalog: catalog
schema: ${workspace.current_user.short_name}
notifications: []
prod:
mode: production
workspace:
host: https://company.databricks.com
# We explicitly specify /Workspace/Users/user@databricks.com to make sure we only have a single copy.
root_path: /Workspace/Users/user@databricks.com/.bundle/${bundle.name}/${bundle.target}
# We explicitly specify /Workspace/Users/user@company.com to make sure we only have a single copy.
root_path: /Workspace/Users/user@company.com/.bundle/${bundle.name}/${bundle.target}
permissions:
- user_name: user@databricks.com
- user_name: user@company.com
level: CAN_MANAGE
run_as:
user_name: user@databricks.com
user_name: user@company.com
variables:
catalog: main
catalog: catalog
schema: default
notifications: [user@databricks.com]
notifications: [user@company.com]
412 changes: 0 additions & 412 deletions contrib/data_engineering/uv.lock

This file was deleted.

1 change: 1 addition & 0 deletions dbt_sql/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ scratch/**
# dbt
target/
dbt_packages/
dbt_modules/
logs/
1 change: 0 additions & 1 deletion dbt_sql/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"python.analysis.stubPath": ".vscode",
"databricks.python.envFile": "${workspaceFolder}/.env",
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
"python.testing.pytestArgs": [
Expand Down
2 changes: 1 addition & 1 deletion dbt_sql/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ You can find that job by opening your workpace and clicking on **Workflows**.

You can also deploy to your production target directly from the command-line.
The warehouse, catalog, and schema for that target are configured in databricks.yml.
When deploying to this target, note that the default job at resources/dbt_sql_job.yml
When deploying to this target, note that the default job at resources/dbt_sql.job.yml
has a schedule set that runs every day. The schedule is paused when deploying in development mode
(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).

Expand Down
10 changes: 5 additions & 5 deletions dbt_sql/databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,31 @@
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
bundle:
name: dbt_sql
uuid: 5e5ca8d5-0388-473e-84a1-1414ed89c5df

include:
- resources/*.yml
- resources/*/*.yml

# Deployment targets.
# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml
targets:
dev:
default: true
# The default target uses 'mode: development' to create a development copy.
# - Deployed resources get prefixed with '[dev my_user_name]'
# - Any job schedules and triggers are paused by default.
# See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
mode: development
default: true
workspace:
host: https://company.databricks.com

prod:
mode: production
workspace:
host: https://company.databricks.com
# We explicitly specify /Users/user@company.com to make sure we only have a single copy.
root_path: /Users/user@company.com/.bundle/${bundle.name}/${bundle.target}
# We explicitly deploy to /Workspace/Users/user@company.com to make sure we only have a single copy.
root_path: /Workspace/Users/user@company.com/.bundle/${bundle.name}/${bundle.target}
permissions:
- user_name: user@company.com
level: CAN_MANAGE
run_as:
user_name: user@company.com
70 changes: 35 additions & 35 deletions dbt_sql/dbt_profiles/profiles.yml
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@

# This file defines dbt profiles for deployed dbt jobs.
dbt_sql:
target: dev # default target
outputs:

# Doing local development with the dbt CLI?
# Then you should create your own profile in your .dbt/profiles.yml using 'dbt init'
# (See README.md)

# The default target when deployed with the Databricks CLI
# N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml
dev:
type: databricks
method: http
catalog: main
schema: "{{ var('dev_schema') }}"

http_path: /sql/1.0/warehouses/abcdef1234567890

# The workspace host / token are provided by Databricks
# see databricks.yml for the workspace host used for 'dev'
host: "{{ env_var('DBT_HOST') }}"
token: "{{ env_var('DBT_ACCESS_TOKEN') }}"

# The production target when deployed with the Databricks CLI
prod:
type: databricks
method: http
catalog: main
schema: default

http_path: /sql/1.0/warehouses/abcdef1234567890

# The workspace host / token are provided by Databricks
# see databricks.yml for the workspace host used for 'prod'
host: "{{ env_var('DBT_HOST') }}"
token: "{{ env_var('DBT_ACCESS_TOKEN') }}"
target: dev # default target
outputs:

# Doing local development with the dbt CLI?
# Then you should create your own profile in your .dbt/profiles.yml using 'dbt init'
# (See README.md)

# The default target when deployed with the Databricks CLI
# N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml
dev:
type: databricks
method: http
catalog: catalog
schema: "{{ var('dev_schema') }}"

http_path: /sql/1.0/warehouses/abcdef1234567890

# The workspace host / token are provided by Databricks
# see databricks.yml for the workspace host used for 'dev'
host: "{{ env_var('DBT_HOST') }}"
token: "{{ env_var('DBT_ACCESS_TOKEN') }}"

# The production target when deployed with the Databricks CLI
prod:
type: databricks
method: http
catalog: catalog
schema: default

http_path: /sql/1.0/warehouses/abcdef1234567890

# The workspace host / token are provided by Databricks
# see databricks.yml for the workspace host used for 'prod'
host: "{{ env_var('DBT_HOST') }}"
token: "{{ env_var('DBT_ACCESS_TOKEN') }}"
2 changes: 1 addition & 1 deletion dbt_sql/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ seed-paths: ["src/seeds"]
macro-paths: ["src/macros"]
snapshot-paths: ["src/snapshots"]

clean-targets: # directories to be removed by `dbt clean`
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"

Expand Down
4 changes: 2 additions & 2 deletions dbt_sql/profile_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ fixed:
type: databricks
prompts:
host:
default: myworkspace.databricks.com
default: company.databricks.com
token:
hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
hide_input: true
Expand All @@ -14,7 +14,7 @@ prompts:
default: /sql/1.0/warehouses/abcdef1234567890
catalog:
hint: 'initial catalog'
default: main
default: catalog
schema:
hint: 'personal schema where dbt will build objects during development, example: user_name'
threads:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,33 @@ resources:
interval: 1
unit: DAYS

email_notifications:
on_failure:
- user@company.com
#email_notifications:
# on_failure:
# - your_email@example.com

tasks:
- task_key: dbt

dbt_task:
project_directory: ../
# The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml
profiles_directory: dbt_profiles/
commands:
# The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile)
- 'dbt deps --target=${bundle.target}'
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
# The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile)
- 'dbt deps --target=${bundle.target}'
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'

libraries:
- pypi:
package: dbt-databricks>=1.8.0,<2.0.0
- pypi:
package: dbt-databricks>=1.8.0,<2.0.0

new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode
1 change: 0 additions & 1 deletion default_python/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"python.analysis.stubPath": ".vscode",
"databricks.python.envFile": "${workspaceFolder}/.env",
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
"python.testing.pytestArgs": [
Expand Down
Loading