From 65b9b0a7fccc7507d5fbe9a1d667fd8f1b021494 Mon Sep 17 00:00:00 2001
From: Lennart Kats <lennart.kats@databricks.com>
Date: Mon, 25 Aug 2025 11:59:45 +0200
Subject: [PATCH] Update examples based on templates

---
 default_python/README.md                  | 50 +++++----------
 default_python/conftest.py                | 76 -----------------------
 default_python/pyproject.toml             | 16 +++--
 default_python/scratch/exploration.ipynb  |  2 +-
 default_python/src/default_python/main.py | 19 ++++--
 default_python/src/dlt_pipeline.ipynb     |  2 +-
 default_python/src/notebook.ipynb         |  2 +-
 default_python/tests/main_test.py         |  6 +-
 scripts/update_from_templates.sh          |  8 +--
 9 files changed, 53 insertions(+), 128 deletions(-)
 delete mode 100644 default_python/conftest.py

diff --git a/default_python/README.md b/default_python/README.md
index c4f3a2b6..74a1f42d 100644
--- a/default_python/README.md
+++ b/default_python/README.md
@@ -2,39 +2,18 @@
 
 The 'default_python' project was generated by using the default-python template.
 
-For documentation on the Databricks Asset Bundles format use for this project,
-and for CI/CD configuration, see https://docs.databricks.com/aws/en/dev-tools/bundles.
-
 ## Getting started
 
-Choose how you want to work on this project:
-
-(a) Directly in your Databricks workspace, see
-    https://docs.databricks.com/dev-tools/bundles/workspace.
-
-(b) Locally with an IDE like Cursor or VS Code, see
-    https://docs.databricks.com/vscode-ext.
-
-(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html
-
-
-Dependencies for this project should be installed using UV:
+0. Install UV: https://docs.astral.sh/uv/getting-started/installation/
 
-*  Make sure you have the UV package manager installed.
-   It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/.
-*  Run `uv sync --dev` to install the project's dependencies.
+1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
 
-# Using this project using the CLI
-
-The Databricks workspace and IDE extensions provide a graphical interface for working
-with this project. It's also possible to interact with it directly using the CLI:
-
-1. Authenticate to your Databricks workspace, if you have not done so already:
+2. Authenticate to your Databricks workspace, if you have not done so already:
     ```
     $ databricks configure
     ```
 
-2. To deploy a development copy of this project, type:
+3. To deploy a development copy of this project, type:
     ```
     $ databricks bundle deploy --target dev
     ```
@@ -44,9 +23,9 @@ with this project. It's also possible to interact with it directly using the CLI
     This deploys everything that's defined for this project.
     For example, the default template would deploy a job called
     `[dev yourname] default_python_job` to your workspace.
-    You can find that job by opening your workpace and clicking on **Jobs & Pipelines**.
+    You can find that job by opening your workpace and clicking on **Workflows**.
 
-3. Similarly, to deploy a production copy, type:
+4. Similarly, to deploy a production copy, type:
    ```
    $ databricks bundle deploy --target prod
    ```
@@ -56,12 +35,17 @@ with this project. It's also possible to interact with it directly using the CLI
    is paused when deploying in development mode (see
    https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
 
-4. To run a job or pipeline, use the "run" command:
+5. To run a job or pipeline, use the "run" command:
    ```
    $ databricks bundle run
    ```
-
-5. Finally, to run tests locally, use `pytest`:
-   ```
-   $ uv run pytest
-   ```
+6. Optionally, install the Databricks extension for Visual Studio code for local development from
+   https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your
+   virtual environment and setup Databricks Connect for running unit tests locally.
+   When not using these tools, consult your development environment's documentation
+   and/or the documentation for Databricks Connect for manually setting up your environment
+   (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html).
+
+7. For documentation on the Databricks asset bundles format used
+   for this project, and for CI/CD configuration, see
+   https://docs.databricks.com/dev-tools/bundles/index.html.
diff --git a/default_python/conftest.py b/default_python/conftest.py
deleted file mode 100644
index cf1d0978..00000000
--- a/default_python/conftest.py
+++ /dev/null
@@ -1,76 +0,0 @@
-"""This file configures pytest.
-
-This file is in the root since it can be used for tests in any place in this
-project, including tests under resources/.
-"""
-
-import os, sys, pathlib
-from contextlib import contextmanager
-
-
-try:
-    from databricks.connect import DatabricksSession
-    from databricks.sdk import WorkspaceClient
-    from pyspark.sql import SparkSession
-    import pytest
-except ImportError:
-    raise ImportError("Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv.")
-
-
-def add_all_resources_to_sys_path():
-    """Add all resources/* directories to sys.path for module discovery."""
-    resources = pathlib.Path(__file__).with_name("resources")
-    resource_dirs = filter(pathlib.Path.is_dir, resources.iterdir())
-    seen: dict[str, pathlib.Path] = {}
-    for resource in resource_dirs:
-        sys.path.append(str(resource.resolve()))
-        for py in resource.rglob("*.py"):
-            mod = ".".join(py.relative_to(resource).with_suffix("").parts)
-            if mod in seen:
-                raise ImportError(f"Duplicate module '{mod}' found:\n  {seen[mod]}\n  {py}")
-            seen[mod] = py
-
-
-def enable_fallback_compute():
-    """Enable serverless compute if no compute is specified."""
-    conf = WorkspaceClient().config
-    if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"):
-        return
-
-    url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config"
-    print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr)
-    print(f"  see {url} for manual configuration", file=sys.stdout)
-
-    os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto"
-
-
-@contextmanager
-def allow_stderr_output(config: pytest.Config):
-    """Temporarily disable pytest output capture."""
-    capman = config.pluginmanager.get_plugin("capturemanager")
-    if capman:
-        with capman.global_and_fixture_disabled():
-            yield
-    else:
-        yield
-
-
-def pytest_configure(config: pytest.Config):
-    """Configure pytest session."""
-    with allow_stderr_output(config):
-        add_all_resources_to_sys_path()
-        enable_fallback_compute()
-
-        # Initialize Spark session eagerly, so it is available even when
-        # SparkSession.builder.getOrCreate() is used. For DB Connect 15+,
-        # we validate version compatibility with the remote cluster.
-        if hasattr(DatabricksSession.builder, "validateSession"):
-            DatabricksSession.builder.validateSession().getOrCreate()
-        else:
-            DatabricksSession.builder.getOrCreate()
-
-
-@pytest.fixture(scope="session")
-def spark() -> SparkSession:
-    """Provide a SparkSession fixture for tests."""
-    return DatabricksSession.builder.getOrCreate()
diff --git a/default_python/pyproject.toml b/default_python/pyproject.toml
index dda79245..d32e108a 100644
--- a/default_python/pyproject.toml
+++ b/default_python/pyproject.toml
@@ -4,7 +4,7 @@ version = "0.0.1"
 authors = [{ name = "user@company.com" }]
 requires-python = ">= 3.11"
 
-[dependency-groups]
+[project.optional-dependencies]
 dev = [
     "pytest",
 
@@ -12,10 +12,16 @@ dev = [
     "databricks-dlt",
 
     # databricks-connect can be used to run parts of this project locally.
-    # Note that for local development, you should use a version that is not newer
-    # than the remote cluster or serverless compute you connect to.
-    # See also https://docs.databricks.com/dev-tools/databricks-connect.html.
-    "databricks-connect>=15.4,<15.5",
+    # See https://docs.databricks.com/dev-tools/databricks-connect.html.
+    #
+    # Note, databricks-connect is automatically installed if you're using Databricks
+    # extension for Visual Studio Code
+    # (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
+    #
+    # To manually install databricks-connect, uncomment the line below to install a version
+    # of db-connect that corresponds to the Databricks Runtime version used for this project.
+    # See https://docs.databricks.com/dev-tools/databricks-connect.html
+    # "databricks-connect>=15.4,<15.5",
 ]
 
 [tool.pytest.ini_options]
diff --git a/default_python/scratch/exploration.ipynb b/default_python/scratch/exploration.ipynb
index 57a9c978..f7832011 100644
--- a/default_python/scratch/exploration.ipynb
+++ b/default_python/scratch/exploration.ipynb
@@ -32,7 +32,7 @@
     "sys.path.append(\"../src\")\n",
     "from default_python import main\n",
     "\n",
-    "main.get_taxis().show(10)"
+    "main.get_taxis(spark).show(10)"
    ]
   }
  ],
diff --git a/default_python/src/default_python/main.py b/default_python/src/default_python/main.py
index 04e8be4d..5ae344c7 100644
--- a/default_python/src/default_python/main.py
+++ b/default_python/src/default_python/main.py
@@ -1,13 +1,24 @@
-from databricks.sdk.runtime import spark
-from pyspark.sql import DataFrame
+from pyspark.sql import SparkSession, DataFrame
 
 
-def find_all_taxis() -> DataFrame:
+def get_taxis(spark: SparkSession) -> DataFrame:
     return spark.read.table("samples.nyctaxi.trips")
 
 
+# Create a new Databricks Connect session. If this fails,
+# check that you have configured Databricks Connect correctly.
+# See https://docs.databricks.com/dev-tools/databricks-connect.html.
+def get_spark() -> SparkSession:
+    try:
+        from databricks.connect import DatabricksSession
+
+        return DatabricksSession.builder.getOrCreate()
+    except ImportError:
+        return SparkSession.builder.getOrCreate()
+
+
 def main():
-    find_all_taxis().show(5)
+    get_taxis(get_spark()).show(5)
 
 
 if __name__ == "__main__":
diff --git a/default_python/src/dlt_pipeline.ipynb b/default_python/src/dlt_pipeline.ipynb
index 34e1895e..eb93d319 100644
--- a/default_python/src/dlt_pipeline.ipynb
+++ b/default_python/src/dlt_pipeline.ipynb
@@ -56,7 +56,7 @@
    "source": [
     "@dlt.view\n",
     "def taxi_raw():\n",
-    "    return main.find_all_taxis()\n",
+    "    return main.get_taxis(spark)\n",
     "\n",
     "\n",
     "@dlt.table\n",
diff --git a/default_python/src/notebook.ipynb b/default_python/src/notebook.ipynb
index fd49e5b9..fe99fd32 100644
--- a/default_python/src/notebook.ipynb
+++ b/default_python/src/notebook.ipynb
@@ -46,7 +46,7 @@
    "source": [
     "from default_python import main\n",
     "\n",
-    "main.find_all_taxis().show(10)"
+    "main.get_taxis(spark).show(10)"
    ]
   }
  ],
diff --git a/default_python/tests/main_test.py b/default_python/tests/main_test.py
index 66c27024..66b6f0a2 100644
--- a/default_python/tests/main_test.py
+++ b/default_python/tests/main_test.py
@@ -1,6 +1,6 @@
-from default_python import main
+from default_python.main import get_taxis, get_spark
 
 
-def test_find_all_taxis():
-    taxis = main.find_all_taxis()
+def test_main():
+    taxis = get_taxis(get_spark())
     assert taxis.count() > 5
diff --git a/scripts/update_from_templates.sh b/scripts/update_from_templates.sh
index a8b74fcf..c56a4b82 100755
--- a/scripts/update_from_templates.sh
+++ b/scripts/update_from_templates.sh
@@ -50,12 +50,12 @@ if [ ! "$DATABRICKS_HOST" ]; then
     exit 1
 fi
 
-if [ -n "$1" ]; then 
+# Prompt for CURRENT_USER_NAME if not passed as first arg
+if [ -n "${1-}" ]; then 
     CURRENT_USER_NAME="$1"
 else
-    read -p "Enter the current user name (e.g., 'lennart_kats'): " CURRENT_USER_NAME
-    read -p "Enter the current user name (e.g., 'lennart_kats'): " CURRENT_USER_NAME
-    if [ ! "$CURRENT_USER_NAME" ]; then
+    read -r -p "Enter the current user name of your 'DEFAULT' profile (e.g., 'lennart_kats'): " CURRENT_USER_NAME
+    if [ -z "${CURRENT_USER_NAME:-}" ]; then
         echo "Error: current user name is required." >&2
         exit 1
     fi