From 928eb4c2cfde14dee15e53342cbfd52c1e5c7fe1 Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:24:25 +0200 Subject: [PATCH 1/4] An example of a bundle that creates Lakebase database instance and catalog --- .../database_with_catalog/README.md | 27 ++++++++++++++++ .../database_with_catalog/databricks.yml | 31 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 knowledge_base/database_with_catalog/README.md create mode 100644 knowledge_base/database_with_catalog/databricks.yml diff --git a/knowledge_base/database_with_catalog/README.md b/knowledge_base/database_with_catalog/README.md new file mode 100644 index 00000000..23377aae --- /dev/null +++ b/knowledge_base/database_with_catalog/README.md @@ -0,0 +1,27 @@ +# OLTP database instance with a catalog + +This example demonstrates how to define an OLTP database instance and a database catalog in a Databricks Asset Bundle. + +It includes and deploys an example database instance and a catalog. When the instance is making changes to the database, the changes are reflected in Unity Catalog. + +For more information about Databricks database instances, see the [documentation](https://docs.databricks.com/aws/en/oltp/). + +## Prerequisites + +* Databricks CLI v0.265.0 or above + +## Usage + +Modify `databricks.yml`: +* Update the `host` field under `workspace` to the Databricks workspace to deploy to + +Run `databricks bundle deploy` to deploy the bundle. + +Run the following queries to populate your database with sample data: + +- `databricks psql my-instance -- -d my_database -c "CREATE TABLE IF NOT EXISTS hello_world (id SERIAL PRIMARY KEY, message TEXT, number INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);"` +- `databricks psql my-instance -- -d my_database -c "INSERT INTO hello_world (message, number) SELECT 'Hello World #' || generate_series, generate_series FROM generate_series(1, 100);"` +- `databricks psql my-instance -- -d my_database -c "SELECT * FROM hello_world;"` + +Open your catalog in Databricks Workspace to explore generated data in Unity Catalog: `databricks bundle open my_catalog` +Navigate to `public` schema, then to `hello_world` table, then to "Sample data" diff --git a/knowledge_base/database_with_catalog/databricks.yml b/knowledge_base/database_with_catalog/databricks.yml new file mode 100644 index 00000000..b4421733 --- /dev/null +++ b/knowledge_base/database_with_catalog/databricks.yml @@ -0,0 +1,31 @@ +bundle: + name: database-catalog-example + +# workspace: +# host: https://myworkspace.cloud.databricks.com + +resources: + database_instances: + my_instance: + name: my-instance + capacity: CU_1 + database_catalogs: + my_catalog: + database_instance_name: ${resources.database_instances.my_instance.name} + name: example_catalog + database_name: my_database + create_database_if_not_exists: true + + +# Defines the targets for this bundle. +# Targets allow you to deploy the same bundle to different Databricks workspaces. +targets: + prod: { + # No overrides + } + dev: + # This target is for development purposes. + # It defaults to the current Databricks workspace. + default: true + mode: development + \ No newline at end of file From 4069773ab3b327228fb492c20c48444d960a90ba Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Thu, 21 Aug 2025 17:42:53 +0200 Subject: [PATCH 2/4] mention psql client as a prerequisite for the demo --- knowledge_base/database_with_catalog/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/knowledge_base/database_with_catalog/README.md b/knowledge_base/database_with_catalog/README.md index 23377aae..ca6480ff 100644 --- a/knowledge_base/database_with_catalog/README.md +++ b/knowledge_base/database_with_catalog/README.md @@ -9,6 +9,7 @@ For more information about Databricks database instances, see the [documentation ## Prerequisites * Databricks CLI v0.265.0 or above +* `psql` client version 14 or above (only needed to run the demo data generation) ## Usage From 8b7d1b9c1507b8e8114814a96848fa7fb04f84f9 Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:55:04 +0200 Subject: [PATCH 3/4] fix language, improve readme, mention costs --- .../database_with_catalog/README.md | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/knowledge_base/database_with_catalog/README.md b/knowledge_base/database_with_catalog/README.md index ca6480ff..5a37ac0e 100644 --- a/knowledge_base/database_with_catalog/README.md +++ b/knowledge_base/database_with_catalog/README.md @@ -2,7 +2,7 @@ This example demonstrates how to define an OLTP database instance and a database catalog in a Databricks Asset Bundle. -It includes and deploys an example database instance and a catalog. When the instance is making changes to the database, the changes are reflected in Unity Catalog. +It includes and deploys an example database instance and a catalog. When data changes in the database instance, they are reflected in Unity Catalog. For more information about Databricks database instances, see the [documentation](https://docs.databricks.com/aws/en/oltp/). @@ -18,11 +18,23 @@ Modify `databricks.yml`: Run `databricks bundle deploy` to deploy the bundle. +Please note that after this bundle gets deployed, the database instance starts running, which incurs cost. + Run the following queries to populate your database with sample data: -- `databricks psql my-instance -- -d my_database -c "CREATE TABLE IF NOT EXISTS hello_world (id SERIAL PRIMARY KEY, message TEXT, number INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);"` -- `databricks psql my-instance -- -d my_database -c "INSERT INTO hello_world (message, number) SELECT 'Hello World #' || generate_series, generate_series FROM generate_series(1, 100);"` -- `databricks psql my-instance -- -d my_database -c "SELECT * FROM hello_world;"` +```bash +# Create a demo table: +databricks psql my-instance -- -d my_database -c "CREATE TABLE IF NOT EXISTS hello_world (id SERIAL PRIMARY KEY, message TEXT, number INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP);" + +# Insert 100 rows of demo data: +databricks psql my-instance -- -d my_database -c "INSERT INTO hello_world (message, number) SELECT 'Hello World #' || generate_series, generate_series FROM generate_series(1, 100);" + +# Show generated rows: +databricks psql my-instance -- -d my_database -c "SELECT * FROM hello_world;" +``` + +Open your catalog in Databricks: `databricks bundle open my_catalog` +Navigate to the `public` schema, then to the `hello_world` table, then to "Sample data" and explore your generated data. -Open your catalog in Databricks Workspace to explore generated data in Unity Catalog: `databricks bundle open my_catalog` -Navigate to `public` schema, then to `hello_world` table, then to "Sample data" +## Clean up +To remove the provisioned instance and catalog run `databricks bundle destroy` \ No newline at end of file From fba5fb5feefa77392bada7c93edb79f9e173cc05 Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Fri, 22 Aug 2025 10:55:13 +0200 Subject: [PATCH 4/4] remove prod block from yaml --- knowledge_base/database_with_catalog/databricks.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/knowledge_base/database_with_catalog/databricks.yml b/knowledge_base/database_with_catalog/databricks.yml index b4421733..b4830d92 100644 --- a/knowledge_base/database_with_catalog/databricks.yml +++ b/knowledge_base/database_with_catalog/databricks.yml @@ -20,9 +20,6 @@ resources: # Defines the targets for this bundle. # Targets allow you to deploy the same bundle to different Databricks workspaces. targets: - prod: { - # No overrides - } dev: # This target is for development purposes. # It defaults to the current Databricks workspace.