From 4f13497e5dafe81409399b88fc3479fda241e8a9 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Fri, 26 Sep 2025 12:15:42 +0200
Subject: [PATCH] Update default templates

---
 default_python/README.md                      | 50 +++++++----
 default_python/pyproject.toml                 | 20 ++---
 .../resources/default_python.job.yml          |  2 +-
 .../resources/default_python.pipeline.yml     |  2 +-
 default_python/scratch/exploration.ipynb      |  2 +-
 default_python/src/default_python/main.py     | 19 +---
 default_python/src/dlt_pipeline.ipynb         | 90 -------------------
 default_python/src/notebook.ipynb             |  2 +-
 default_python/tests/main_test.py             |  6 +-
 lakeflow_pipelines_python/.gitignore          |  2 +
 .../.vscode/extensions.json                   |  4 +-
 .../.vscode/settings.json                     | 28 ++++--
 lakeflow_pipelines_python/README.md           | 49 ++++++----
 lakeflow_pipelines_python/databricks.yml      | 11 +--
 .../README.md                                 | 22 -----
 .../lakeflow_pipelines_python.job.yml         | 19 ----
 .../lakeflow_pipelines_python.pipeline.yml    | 12 ---
 .../sample_trips_lakeflow_pipelines_python.py | 13 ---
 .../sample_zones_lakeflow_pipelines_python.py | 13 ---
 .../utilities/utils.py                        |  8 --
 lakeflow_pipelines_sql/.gitignore             |  2 +
 .../.vscode/extensions.json                   |  4 +-
 lakeflow_pipelines_sql/.vscode/settings.json  | 28 ++++--
 lakeflow_pipelines_sql/README.md              | 49 ++++++----
 lakeflow_pipelines_sql/databricks.yml         | 11 +--
 .../lakeflow_pipelines_sql_pipeline/README.md | 21 -----
 .../lakeflow_pipelines_sql.job.yml            | 19 ----
 .../lakeflow_pipelines_sql.pipeline.yml       | 12 ---
 .../sample_trips_lakeflow_pipelines_sql.sql   |  9 --
 .../sample_zones_lakeflow_pipelines_sql.sql   | 10 ---
 30 files changed, 175 insertions(+), 364 deletions(-)
 delete mode 100644 default_python/src/dlt_pipeline.ipynb
 delete mode 100644 lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/README.md
 delete mode 100644 lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.job.yml
 delete mode 100644 lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.pipeline.yml
 delete mode 100644 lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_trips_lakeflow_pipelines_python.py
 delete mode 100644 lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_zones_lakeflow_pipelines_python.py
 delete mode 100644 lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/utilities/utils.py
 delete mode 100644 lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/README.md
 delete mode 100644 lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.job.yml
 delete mode 100644 lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.pipeline.yml
 delete mode 100644 lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_trips_lakeflow_pipelines_sql.sql
 delete mode 100644 lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_zones_lakeflow_pipelines_sql.sql

diff --git a/default_python/README.md b/default_python/README.md
index 74a1f42d..f5a6a220 100644
--- a/default_python/README.md
+++ b/default_python/README.md
@@ -2,18 +2,39 @@
 
 The 'default_python' project was generated by using the default-python template.
 
+For documentation on the Databricks Asset Bundles format use for this project,
+and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles.
+
 ## Getting started
 
-0. Install UV: https://docs.astral.sh/uv/getting-started/installation/
+Choose how you want to work on this project:
+
+(a) Directly in your Databricks workspace, see
+    https://docs.databricks.com/dev-tools/bundles/workspace.
+
+(b) Locally with an IDE like Cursor or VS Code, see
+    https://docs.databricks.com/vscode-ext.
+
+(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+
+Dependencies for this project should be installed using uv:
 
-1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+*  Make sure you have the UV package manager installed.
+   It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/.
+*  Run `uv sync --dev` to install the project's dependencies.
 
-2. Authenticate to your Databricks workspace, if you have not done so already:
+# Using this project using the CLI
+
+The Databricks workspace and IDE extensions provide a graphical interface for working
+with this project. It's also possible to interact with it directly using the CLI:
+
+1. Authenticate to your Databricks workspace, if you have not done so already:
     ```
     $ databricks configure
     ```
 
-3. To deploy a development copy of this project, type:
+2. To deploy a development copy of this project, type:
     ```
     $ databricks bundle deploy --target dev
     ```
@@ -23,9 +44,9 @@ The 'default_python' project was generated by using the default-python template.
     This deploys everything that's defined for this project.
     For example, the default template would deploy a job called
     `[dev yourname] default_python_job` to your workspace.
-    You can find that job by opening your workpace and clicking on **Workflows**.
+    You can find that job by opening your workpace and clicking on **Jobs & Pipelines**.
 
-4. Similarly, to deploy a production copy, type:
+3. Similarly, to deploy a production copy, type:
    ```
    $ databricks bundle deploy --target prod
    ```
@@ -35,17 +56,12 @@ The 'default_python' project was generated by using the default-python template.
    is paused when deploying in development mode (see
    https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
 
-5. To run a job or pipeline, use the "run" command:
+4. To run a job or pipeline, use the "run" command:
    ```
    $ databricks bundle run
    ```
-6. Optionally, install the Databricks extension for Visual Studio code for local development from
-   https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your
-   virtual environment and setup Databricks Connect for running unit tests locally.
-   When not using these tools, consult your development environment's documentation
-   and/or the documentation for Databricks Connect for manually setting up your environment
-   (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html).
-
-7. For documentation on the Databricks asset bundles format used
-   for this project, and for CI/CD configuration, see
-   https://docs.databricks.com/dev-tools/bundles/index.html.
+
+5. Finally, to run tests locally, use `pytest`:
+   ```
+   $ uv run pytest
+   ```
diff --git a/default_python/pyproject.toml b/default_python/pyproject.toml
index d32e108a..279d7f32 100644
--- a/default_python/pyproject.toml
+++ b/default_python/pyproject.toml
@@ -2,26 +2,20 @@
 name = "default_python"
 version = "0.0.1"
 authors = [{ name = "user@company.com" }]
-requires-python = ">= 3.11"
+requires-python = ">=3.10,<=3.13"
 
-[project.optional-dependencies]
+[dependency-groups]
 dev = [
     "pytest",
 
-    # Code completion support for DLT, also install databricks-connect
+    # Code completion support for Lakeflow Declarative Pipelines, also install databricks-connect
     "databricks-dlt",
 
     # databricks-connect can be used to run parts of this project locally.
-    # See https://docs.databricks.com/dev-tools/databricks-connect.html.
-    #
-    # Note, databricks-connect is automatically installed if you're using Databricks
-    # extension for Visual Studio Code
-    # (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
-    #
-    # To manually install databricks-connect, uncomment the line below to install a version
-    # of db-connect that corresponds to the Databricks Runtime version used for this project.
-    # See https://docs.databricks.com/dev-tools/databricks-connect.html
-    # "databricks-connect>=15.4,<15.5",
+    # Note that for local development, you should use a version that is not newer
+    # than the remote cluster or serverless compute you connect to.
+    # See also https://docs.databricks.com/dev-tools/databricks-connect.html.
+    "databricks-connect>=15.4,<15.5",
 ]
 
 [tool.pytest.ini_options]
diff --git a/default_python/resources/default_python.job.yml b/default_python/resources/default_python.job.yml
index 0504090a..d99eb4dd 100644
--- a/default_python/resources/default_python.job.yml
+++ b/default_python/resources/default_python.job.yml
@@ -40,6 +40,6 @@ resources:
           # Full documentation of this spec can be found at:
           # https://docs.databricks.com/api/workspace/jobs/create#environments-spec
           spec:
-            client: "2"
+            environment_version: "2"
             dependencies:
               - ../dist/*.whl
diff --git a/default_python/resources/default_python.pipeline.yml b/default_python/resources/default_python.pipeline.yml
index ea7cdc02..7954922b 100644
--- a/default_python/resources/default_python.pipeline.yml
+++ b/default_python/resources/default_python.pipeline.yml
@@ -8,7 +8,7 @@ resources:
       serverless: true
       libraries:
         - notebook:
-            path: ../src/dlt_pipeline.ipynb
+            path: ../src/pipeline.ipynb
 
       configuration:
         bundle.sourcePath: ${workspace.file_path}/src
diff --git a/default_python/scratch/exploration.ipynb b/default_python/scratch/exploration.ipynb
index f7832011..57a9c978 100644
--- a/default_python/scratch/exploration.ipynb
+++ b/default_python/scratch/exploration.ipynb
@@ -32,7 +32,7 @@
     "sys.path.append(\"../src\")\n",
     "from default_python import main\n",
     "\n",
-    "main.get_taxis(spark).show(10)"
+    "main.get_taxis().show(10)"
    ]
   }
  ],
diff --git a/default_python/src/default_python/main.py b/default_python/src/default_python/main.py
index 5ae344c7..04e8be4d 100644
--- a/default_python/src/default_python/main.py
+++ b/default_python/src/default_python/main.py
@@ -1,24 +1,13 @@
-from pyspark.sql import SparkSession, DataFrame
+from databricks.sdk.runtime import spark
+from pyspark.sql import DataFrame
 
 
-def get_taxis(spark: SparkSession) -> DataFrame:
+def find_all_taxis() -> DataFrame:
     return spark.read.table("samples.nyctaxi.trips")
 
 
-# Create a new Databricks Connect session. If this fails,
-# check that you have configured Databricks Connect correctly.
-# See https://docs.databricks.com/dev-tools/databricks-connect.html.
-def get_spark() -> SparkSession:
-    try:
-        from databricks.connect import DatabricksSession
-
-        return DatabricksSession.builder.getOrCreate()
-    except ImportError:
-        return SparkSession.builder.getOrCreate()
-
-
 def main():
-    get_taxis(get_spark()).show(5)
+    find_all_taxis().show(5)
 
 
 if __name__ == "__main__":
diff --git a/default_python/src/dlt_pipeline.ipynb b/default_python/src/dlt_pipeline.ipynb
deleted file mode 100644
index eb93d319..00000000
--- a/default_python/src/dlt_pipeline.ipynb
+++ /dev/null
@@ -1,90 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {},
-     "inputWidgets": {},
-     "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec",
-     "showTitle": false,
-     "title": ""
-    }
-   },
-   "source": [
-    "# DLT pipeline\n",
-    "\n",
-    "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/default_python.pipeline.yml."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {},
-     "inputWidgets": {},
-     "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f",
-     "showTitle": false,
-     "title": ""
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Import DLT and src/default_python\n",
-    "import dlt\n",
-    "import sys\n",
-    "\n",
-    "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
-    "from pyspark.sql.functions import expr\n",
-    "from default_python import main"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 0,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {},
-     "inputWidgets": {},
-     "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14",
-     "showTitle": false,
-     "title": ""
-    }
-   },
-   "outputs": [],
-   "source": [
-    "@dlt.view\n",
-    "def taxi_raw():\n",
-    "    return main.get_taxis(spark)\n",
-    "\n",
-    "\n",
-    "@dlt.table\n",
-    "def filtered_taxis():\n",
-    "    return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
-   ]
-  }
- ],
- "metadata": {
-  "application/vnd.databricks.v1+notebook": {
-   "dashboards": [],
-   "language": "python",
-   "notebookMetadata": {
-    "pythonIndentUnit": 2
-   },
-   "notebookName": "dlt_pipeline",
-   "widgets": {}
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.11.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/default_python/src/notebook.ipynb b/default_python/src/notebook.ipynb
index fe99fd32..fd49e5b9 100644
--- a/default_python/src/notebook.ipynb
+++ b/default_python/src/notebook.ipynb
@@ -46,7 +46,7 @@
    "source": [
     "from default_python import main\n",
     "\n",
-    "main.get_taxis(spark).show(10)"
+    "main.find_all_taxis().show(10)"
    ]
   }
  ],
diff --git a/default_python/tests/main_test.py b/default_python/tests/main_test.py
index 66b6f0a2..66c27024 100644
--- a/default_python/tests/main_test.py
+++ b/default_python/tests/main_test.py
@@ -1,6 +1,6 @@
-from default_python.main import get_taxis, get_spark
+from default_python import main
 
 
-def test_main():
-    taxis = get_taxis(get_spark())
+def test_find_all_taxis():
+    taxis = main.find_all_taxis()
     assert taxis.count() > 5
diff --git a/lakeflow_pipelines_python/.gitignore b/lakeflow_pipelines_python/.gitignore
index f6a3b5ff..e566c51f 100644
--- a/lakeflow_pipelines_python/.gitignore
+++ b/lakeflow_pipelines_python/.gitignore
@@ -4,5 +4,7 @@ dist/
 __pycache__/
 *.egg-info
 .venv/
+scratch/**
+!scratch/README.md
 **/explorations/**
 **/!explorations/README.md
diff --git a/lakeflow_pipelines_python/.vscode/extensions.json b/lakeflow_pipelines_python/.vscode/extensions.json
index 5d15eba3..75a111a6 100644
--- a/lakeflow_pipelines_python/.vscode/extensions.json
+++ b/lakeflow_pipelines_python/.vscode/extensions.json
@@ -1,7 +1,7 @@
 {
     "recommendations": [
         "databricks.databricks",
-        "ms-python.vscode-pylance",
-        "redhat.vscode-yaml"
+        "redhat.vscode-yaml",
+        "ms-python.black-formatter"
     ]
 }
diff --git a/lakeflow_pipelines_python/.vscode/settings.json b/lakeflow_pipelines_python/.vscode/settings.json
index 47d90b62..c49593bc 100644
--- a/lakeflow_pipelines_python/.vscode/settings.json
+++ b/lakeflow_pipelines_python/.vscode/settings.json
@@ -1,19 +1,37 @@
 {
-    "python.analysis.stubPath": ".vscode",
-    "databricks.python.envFile": "${workspaceFolder}/.env",
     "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
     "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
     "python.testing.pytestArgs": [
         "."
     ],
-    "python.testing.unittestEnabled": false,
-    "python.testing.pytestEnabled": true,
-    "python.analysis.extraPaths": ["resources/lakeflow_pipelines_python_pipeline"],
     "files.exclude": {
         "**/*.egg-info": true,
         "**/__pycache__": true,
         ".pytest_cache": true,
+        "dist": true,
+    },
+    "files.associations": {
+        "**/.gitkeep": "markdown"
     },
+
+    // Pylance settings (VS Code)
+    // Set typeCheckingMode to "basic" to enable type checking!
+    "python.analysis.typeCheckingMode": "off",
+    "python.analysis.extraPaths": ["src", "lib", "resources"],
+    "python.analysis.diagnosticMode": "workspace",
+    "python.analysis.stubPath": ".vscode",
+
+    // Pyright settings (Cursor)
+    // Set typeCheckingMode to "basic" to enable type checking!
+    "cursorpyright.analysis.typeCheckingMode": "off",
+    "cursorpyright.analysis.extraPaths": ["src", "lib", "resources"],
+    "cursorpyright.analysis.diagnosticMode": "workspace",
+    "cursorpyright.analysis.stubPath": ".vscode",
+
+    // General Python settings
+    "python.defaultInterpreterPath": "./.venv/bin/python",
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
     "[python]": {
         "editor.defaultFormatter": "ms-python.black-formatter",
         "editor.formatOnSave": true,
diff --git a/lakeflow_pipelines_python/README.md b/lakeflow_pipelines_python/README.md
index e727cdbc..b4270849 100644
--- a/lakeflow_pipelines_python/README.md
+++ b/lakeflow_pipelines_python/README.md
@@ -2,38 +2,53 @@
 
 The 'lakeflow_pipelines_python' project was generated by using the Lakeflow Pipelines template.
 
-## Setup
+* `lib/`: Python source code for this project.
+* `lib/shared`: Shared source code across all jobs/pipelines/etc.
+* `resources/pipelines_python_etl`: Pipeline code and assets for the pipelines_python_etl pipeline.
+* `resources/`:  Resource configurations (jobs, pipelines, etc.)
 
-1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+## Getting started
 
-2. Authenticate to your Databricks workspace, if you have not done so already:
-    ```
-    $ databricks auth login
-    ```
+Choose how you want to work on this project:
+
+(a) Directly in your Databricks workspace, see
+    https://docs.databricks.com/dev-tools/bundles/workspace.
 
-3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
-   https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from
-   https://www.databricks.com/blog/announcing-pycharm-integration-databricks.
+(b) Locally with an IDE like Cursor or VS Code, see
+    https://docs.databricks.com/vscode-ext.
 
+(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html
 
-## Deploying resources
+# Using this project using the CLI
 
-1. To deploy a development copy of this project, type:
+The Databricks workspace and IDE extensions provide a graphical interface for working
+with this project. It's also possible to interact with it directly using the CLI:
+
+1. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+2. To deploy a development copy of this project, type:
     ```
     $ databricks bundle deploy --target dev
     ```
     (Note that "dev" is the default target, so the `--target` parameter
     is optional here.)
 
-2. Similarly, to deploy a production copy, type:
-   ```
-   $ databricks bundle deploy --target prod
-   ```
+    This deploys everything that's defined for this project.
+    For example, the default template would deploy a pipeline called
+    `[dev yourname] pipelines_python_etl` to your workspace.
+    You can find that resource by opening your workpace and clicking on **Jobs & Pipelines**.
 
-3. Use the "summary" comand to review everything that was deployed:
+3. Similarly, to deploy a production copy, type:
    ```
-   $ databricks bundle summary
+   $ databricks bundle deploy --target prod
    ```
+   Note the default template has a includes a job that runs the pipeline every day
+   (defined in resources/pipelines_python_etl/pipelines_python_job.job.yml). The schedule
+   is paused when deploying in development mode (see
+   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
 
 4. To run a job or pipeline, use the "run" command:
    ```
diff --git a/lakeflow_pipelines_python/databricks.yml b/lakeflow_pipelines_python/databricks.yml
index 5438327d..44beb468 100644
--- a/lakeflow_pipelines_python/databricks.yml
+++ b/lakeflow_pipelines_python/databricks.yml
@@ -14,8 +14,6 @@ variables:
     description: The catalog to use
   schema:
     description: The schema to use
-  notifications:
-    description: The email addresses to use for failure notifications
 
 targets:
   dev:
@@ -30,18 +28,15 @@ targets:
     variables:
       catalog: catalog
       schema: ${workspace.current_user.short_name}
-      notifications: []
-
   prod:
     mode: production
     workspace:
       host: https://company.databricks.com
       # We explicitly deploy to /Workspace/Users/user@company.com to make sure we only have a single copy.
       root_path: /Workspace/Users/user@company.com/.bundle/${bundle.name}/${bundle.target}
+    variables:
+      catalog: catalog
+      schema: prod
     permissions:
       - user_name: user@company.com
         level: CAN_MANAGE
-    variables:
-      catalog: catalog
-      schema: default
-      notifications: [user@company.com]
diff --git a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/README.md b/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/README.md
deleted file mode 100644
index 5e845f08..00000000
--- a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# lakeflow_pipelines_python_pipeline
-
-This folder defines all source code for the lakeflow_pipelines_python_pipeline pipeline:
-
-- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline.
-- `transformations`: All dataset definitions and transformations.
-- `utilities` (optional): Utility functions and Python modules used in this pipeline.
-- `data_sources` (optional): View definitions describing the source data for this pipeline.
-
-## Getting Started
-
-To get started, go to the `transformations` folder -- most of the relevant source code lives there:
-
-* By convention, every dataset under `transformations` is in a separate file.
-* Take a look at the sample under "sample_trips_lakeflow_pipelines_python.py" to get familiar with the syntax.
-  Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html.
-* Use `Run file` to run and preview a single transformation.
-* Use `Run pipeline` to run _all_ transformations in the entire pipeline.
-* Use `+ Add` in the file browser to add a new data set definition.
-* Use `Schedule` to run the pipeline on a schedule!
-
-For more tutorials and reference material, see https://docs.databricks.com/dlt.
diff --git a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.job.yml b/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.job.yml
deleted file mode 100644
index c003b37f..00000000
--- a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.job.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-# The job that triggers lakeflow_pipelines_python_pipeline.
-resources:
-  jobs:
-    lakeflow_pipelines_python_job:
-      name: lakeflow_pipelines_python_job
-
-      trigger:
-        # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
-        periodic:
-          interval: 1
-          unit: DAYS
-
-      email_notifications:
-        on_failure: ${var.notifications}
-
-      tasks:
-        - task_key: refresh_pipeline
-          pipeline_task:
-            pipeline_id: ${resources.pipelines.lakeflow_pipelines_python_pipeline.id}
diff --git a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.pipeline.yml b/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.pipeline.yml
deleted file mode 100644
index 3db75519..00000000
--- a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/lakeflow_pipelines_python.pipeline.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-resources:
-  pipelines:
-    lakeflow_pipelines_python_pipeline:
-      name: lakeflow_pipelines_python_pipeline
-      serverless: true
-      channel: "PREVIEW"
-      catalog: ${var.catalog}
-      schema: ${var.schema}
-      root_path: "."
-      libraries:
-        - glob:
-            include: transformations/**
diff --git a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_trips_lakeflow_pipelines_python.py b/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_trips_lakeflow_pipelines_python.py
deleted file mode 100644
index f0db7161..00000000
--- a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_trips_lakeflow_pipelines_python.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import dlt
-from pyspark.sql.functions import col
-from utilities import utils
-
-
-# This file defines a sample transformation.
-# Edit the sample below or add new transformations
-# using "+ Add" in the file browser.
-
-
-@dlt.table
-def sample_trips_lakeflow_pipelines_python():
-    return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance")))
diff --git a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_zones_lakeflow_pipelines_python.py b/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_zones_lakeflow_pipelines_python.py
deleted file mode 100644
index a978db9b..00000000
--- a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/transformations/sample_zones_lakeflow_pipelines_python.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import dlt
-from pyspark.sql.functions import col, sum
-
-
-# This file defines a sample transformation.
-# Edit the sample below or add new transformations
-# using "+ Add" in the file browser.
-
-
-@dlt.table
-def sample_zones_lakeflow_pipelines_python():
-    # Read from the "sample_trips" table, then sum all the fares
-    return spark.read.table("sample_trips_lakeflow_pipelines_python").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare"))
diff --git a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/utilities/utils.py b/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/utilities/utils.py
deleted file mode 100644
index ff039898..00000000
--- a/lakeflow_pipelines_python/resources/lakeflow_pipelines_python_pipeline/utilities/utils.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from pyspark.sql.functions import udf
-from pyspark.sql.types import FloatType
-
-
-@udf(returnType=FloatType())
-def distance_km(distance_miles):
-    """Convert distance from miles to kilometers (1 mile = 1.60934 km)."""
-    return distance_miles * 1.60934
diff --git a/lakeflow_pipelines_sql/.gitignore b/lakeflow_pipelines_sql/.gitignore
index f6a3b5ff..e566c51f 100644
--- a/lakeflow_pipelines_sql/.gitignore
+++ b/lakeflow_pipelines_sql/.gitignore
@@ -4,5 +4,7 @@ dist/
 __pycache__/
 *.egg-info
 .venv/
+scratch/**
+!scratch/README.md
 **/explorations/**
 **/!explorations/README.md
diff --git a/lakeflow_pipelines_sql/.vscode/extensions.json b/lakeflow_pipelines_sql/.vscode/extensions.json
index 5d15eba3..75a111a6 100644
--- a/lakeflow_pipelines_sql/.vscode/extensions.json
+++ b/lakeflow_pipelines_sql/.vscode/extensions.json
@@ -1,7 +1,7 @@
 {
     "recommendations": [
         "databricks.databricks",
-        "ms-python.vscode-pylance",
-        "redhat.vscode-yaml"
+        "redhat.vscode-yaml",
+        "ms-python.black-formatter"
     ]
 }
diff --git a/lakeflow_pipelines_sql/.vscode/settings.json b/lakeflow_pipelines_sql/.vscode/settings.json
index d0c85bb8..c49593bc 100644
--- a/lakeflow_pipelines_sql/.vscode/settings.json
+++ b/lakeflow_pipelines_sql/.vscode/settings.json
@@ -1,19 +1,37 @@
 {
-    "python.analysis.stubPath": ".vscode",
-    "databricks.python.envFile": "${workspaceFolder}/.env",
     "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
     "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
     "python.testing.pytestArgs": [
         "."
     ],
-    "python.testing.unittestEnabled": false,
-    "python.testing.pytestEnabled": true,
-    "python.analysis.extraPaths": ["resources/lakeflow_pipelines_sql_pipeline"],
     "files.exclude": {
         "**/*.egg-info": true,
         "**/__pycache__": true,
         ".pytest_cache": true,
+        "dist": true,
+    },
+    "files.associations": {
+        "**/.gitkeep": "markdown"
     },
+
+    // Pylance settings (VS Code)
+    // Set typeCheckingMode to "basic" to enable type checking!
+    "python.analysis.typeCheckingMode": "off",
+    "python.analysis.extraPaths": ["src", "lib", "resources"],
+    "python.analysis.diagnosticMode": "workspace",
+    "python.analysis.stubPath": ".vscode",
+
+    // Pyright settings (Cursor)
+    // Set typeCheckingMode to "basic" to enable type checking!
+    "cursorpyright.analysis.typeCheckingMode": "off",
+    "cursorpyright.analysis.extraPaths": ["src", "lib", "resources"],
+    "cursorpyright.analysis.diagnosticMode": "workspace",
+    "cursorpyright.analysis.stubPath": ".vscode",
+
+    // General Python settings
+    "python.defaultInterpreterPath": "./.venv/bin/python",
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
     "[python]": {
         "editor.defaultFormatter": "ms-python.black-formatter",
         "editor.formatOnSave": true,
diff --git a/lakeflow_pipelines_sql/README.md b/lakeflow_pipelines_sql/README.md
index b4a17db3..20eba12a 100644
--- a/lakeflow_pipelines_sql/README.md
+++ b/lakeflow_pipelines_sql/README.md
@@ -2,38 +2,53 @@
 
 The 'lakeflow_pipelines_sql' project was generated by using the Lakeflow Pipelines template.
 
-## Setup
+* `lib/`: Python source code for this project.
+* `lib/shared`: Shared source code across all jobs/pipelines/etc.
+* `resources/pipelines_sql_etl`: Pipeline code and assets for the pipelines_sql_etl pipeline.
+* `resources/`:  Resource configurations (jobs, pipelines, etc.)
 
-1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+## Getting started
 
-2. Authenticate to your Databricks workspace, if you have not done so already:
-    ```
-    $ databricks auth login
-    ```
+Choose how you want to work on this project:
+
+(a) Directly in your Databricks workspace, see
+    https://docs.databricks.com/dev-tools/bundles/workspace.
 
-3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
-   https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from
-   https://www.databricks.com/blog/announcing-pycharm-integration-databricks.
+(b) Locally with an IDE like Cursor or VS Code, see
+    https://docs.databricks.com/vscode-ext.
 
+(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html
 
-## Deploying resources
+# Using this project using the CLI
 
-1. To deploy a development copy of this project, type:
+The Databricks workspace and IDE extensions provide a graphical interface for working
+with this project. It's also possible to interact with it directly using the CLI:
+
+1. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+2. To deploy a development copy of this project, type:
     ```
     $ databricks bundle deploy --target dev
     ```
     (Note that "dev" is the default target, so the `--target` parameter
     is optional here.)
 
-2. Similarly, to deploy a production copy, type:
-   ```
-   $ databricks bundle deploy --target prod
-   ```
+    This deploys everything that's defined for this project.
+    For example, the default template would deploy a pipeline called
+    `[dev yourname] pipelines_sql_etl` to your workspace.
+    You can find that resource by opening your workpace and clicking on **Jobs & Pipelines**.
 
-3. Use the "summary" comand to review everything that was deployed:
+3. Similarly, to deploy a production copy, type:
    ```
-   $ databricks bundle summary
+   $ databricks bundle deploy --target prod
    ```
+   Note the default template has a includes a job that runs the pipeline every day
+   (defined in resources/pipelines_sql_etl/pipelines_sql_job.job.yml). The schedule
+   is paused when deploying in development mode (see
+   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
 
 4. To run a job or pipeline, use the "run" command:
    ```
diff --git a/lakeflow_pipelines_sql/databricks.yml b/lakeflow_pipelines_sql/databricks.yml
index 4beb0c58..4c4d7a91 100644
--- a/lakeflow_pipelines_sql/databricks.yml
+++ b/lakeflow_pipelines_sql/databricks.yml
@@ -14,8 +14,6 @@ variables:
     description: The catalog to use
   schema:
     description: The schema to use
-  notifications:
-    description: The email addresses to use for failure notifications
 
 targets:
   dev:
@@ -30,18 +28,15 @@ targets:
     variables:
       catalog: catalog
       schema: ${workspace.current_user.short_name}
-      notifications: []
-
   prod:
     mode: production
     workspace:
       host: https://company.databricks.com
       # We explicitly deploy to /Workspace/Users/user@company.com to make sure we only have a single copy.
       root_path: /Workspace/Users/user@company.com/.bundle/${bundle.name}/${bundle.target}
+    variables:
+      catalog: catalog
+      schema: prod
     permissions:
       - user_name: user@company.com
         level: CAN_MANAGE
-    variables:
-      catalog: catalog
-      schema: default
-      notifications: [user@company.com]
diff --git a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/README.md b/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/README.md
deleted file mode 100644
index d01f290a..00000000
--- a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# lakeflow_pipelines_sql_pipeline
-
-This folder defines all source code for the 'lakeflow_pipelines_sql_pipeline' pipeline:
-
-- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline.
-- `transformations`: All dataset definitions and transformations.
-- `data_sources` (optional): View definitions describing the source data for this pipeline.
-
-## Getting Started
-
-To get started, go to the `transformations` folder -- most of the relevant source code lives there:
-
-* By convention, every dataset under `transformations` is in a separate file.
-* Take a look at the sample under "sample_trips_lakeflow_pipelines_sql.sql" to get familiar with the syntax.
-  Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html.
-* Use `Run file` to run and preview a single transformation.
-* Use `Run pipeline` to run _all_ transformations in the entire pipeline.
-* Use `+ Add` in the file browser to add a new data set definition.
-* Use `Schedule` to run the pipeline on a schedule!
-
-For more tutorials and reference material, see https://docs.databricks.com/dlt.
diff --git a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.job.yml b/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.job.yml
deleted file mode 100644
index 32ba1ce4..00000000
--- a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.job.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-# The job that triggers lakeflow_pipelines_sql_pipeline.
-resources:
-  jobs:
-    lakeflow_pipelines_sql_job:
-      name: lakeflow_pipelines_sql_job
-
-      trigger:
-        # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
-        periodic:
-          interval: 1
-          unit: DAYS
-
-      email_notifications:
-        on_failure: ${var.notifications}
-
-      tasks:
-        - task_key: refresh_pipeline
-          pipeline_task:
-            pipeline_id: ${resources.pipelines.lakeflow_pipelines_sql_pipeline.id}
diff --git a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.pipeline.yml b/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.pipeline.yml
deleted file mode 100644
index 781c9fd6..00000000
--- a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/lakeflow_pipelines_sql.pipeline.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-resources:
-  pipelines:
-    lakeflow_pipelines_sql_pipeline:
-      name: lakeflow_pipelines_sql_pipeline
-      serverless: true
-      channel: "PREVIEW"
-      catalog: ${var.catalog}
-      schema: ${var.schema}
-      root_path: "."
-      libraries:
-        - glob:
-            include: transformations/**
diff --git a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_trips_lakeflow_pipelines_sql.sql b/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_trips_lakeflow_pipelines_sql.sql
deleted file mode 100644
index 09dda0bf..00000000
--- a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_trips_lakeflow_pipelines_sql.sql
+++ /dev/null
@@ -1,9 +0,0 @@
--- This file defines a sample transformation.
--- Edit the sample below or add new transformations
--- using "+ Add" in the file browser.
-
-CREATE MATERIALIZED VIEW sample_trips_lakeflow_pipelines_sql AS
-SELECT
-    pickup_zip,
-    fare_amount
-FROM samples.nyctaxi.trips
diff --git a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_zones_lakeflow_pipelines_sql.sql b/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_zones_lakeflow_pipelines_sql.sql
deleted file mode 100644
index 5f5c567d..00000000
--- a/lakeflow_pipelines_sql/resources/lakeflow_pipelines_sql_pipeline/transformations/sample_zones_lakeflow_pipelines_sql.sql
+++ /dev/null
@@ -1,10 +0,0 @@
--- This file defines a sample transformation.
--- Edit the sample below or add new transformations
--- using "+ Add" in the file browser.
-
-CREATE MATERIALIZED VIEW sample_zones_lakeflow_pipelines_sql AS
-SELECT
-    pickup_zip,
-    SUM(fare_amount) AS total_fare
-FROM sample_trips_lakeflow_pipelines_sql
-GROUP BY pickup_zip