Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions contrib/templates/default-scala/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# default-scala
Comment thread
garlandz-db marked this conversation as resolved.

This template helps you create Scala projects with Databricks Asset Bundles. It uses sbt to compile and package Scala files, and can be used with Databricks Connect for local development.

It supports two compute types: standard clusters and serverless compute.

Run
```
databricks bundle init --template-dir contrib/templates/default-scala https://github.com/databricks/bundle-examples
```

and follow the generated README.md to get started.
58 changes: 58 additions & 0 deletions contrib/templates/default-scala/databricks_template_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"welcome_message": "\nWelcome to the default-scala template for Databricks Asset Bundles!\n\nA workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html.\nworkspace_host: {{workspace_host}}",
"properties": {
"project_name": {
"type": "string",
"description": "\nPlease provide a unique name for this project.\nproject_name",
"order": 1,
"pattern": "^[A-Za-z_][A-Za-z0-9-_]+$",
"pattern_match_failure_message": "Name must consist of letters, numbers, dashes, and underscores."
},
"compute_type": {
"type": "string",
"description": "\nPlease select the compute type.\ncompute_type",
"enum": ["standard cluster", "serverless"],
"default": "serverless",
"order": 2
},
"artifacts_dest_path": {
"type": "string",
"description": "\nPlease provide the Unity Catalog volume destination path in Databricks where the directory will be created containing the JAR and other artifacts to store.{{if eq .compute_type \"standard cluster\"}}\nNote: your admin must allowlist the volume JAR path you specify for your workspace (see https://docs.databricks.com/en/data-governance/unity-catalog/manage-privileges/allowlist.html){{end}}\nartifacts_dest_path [example: /Volumes/abcdef1234567890]",
"order": 3,
"pattern": "^/Volumes(?:/[a-z0-9_-]+)+/?$",
"pattern_match_failure_message": "Please enter a valid path like /Volumes/my-folder/. Only lowercase letters, numbers, dashes, and underscores are allowed in folder names."
},
"default_catalog": {
"type": "string",
"default": "{{default_catalog}}",
"pattern": "^\\w*$",
"pattern_match_failure_message": "Invalid catalog name.",
"description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog",
"order": 4
},
"personal_schemas": {
"type": "string",
"description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
"enum": [
"yes, use a schema based on the current user name during development",
"no, use a shared schema during development"
],
"order": 5
},
"shared_schema": {
"skip_prompt_if": {
"properties": {
"personal_schemas": {
"const": "yes, use a schema based on the current user name during development"
}
}
},
"type": "string",
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
"order": 6
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
{{/* Project artifact version used in JAR naming */}}
{{ define `version` -}}
0.1
{{- end }}

{{ define `databricks_cli_version` -}}
0.241.0
{{- end }}

{{ define `dbr_version` -}}
17.0
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# {{.project_name}}

The '{{.project_name}}' project was generated by using the scala-job template.
The '{{.project_name}}' project was generated by using the default-scala template.

## Getting started

1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html. The version must be v0.241.0 or later.
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html. The version must be v{{template `databricks_cli_version` .}} or later.

2. Authenticate to your Databricks workspace (if you have not done so already):
```
Expand Down Expand Up @@ -90,7 +90,13 @@ by running `sbt test`.
## Customizations

### Job configuration
The bundles piggybacks off the same configurations used in APIs. If you want to use an existing cluster instead of spinning one up everytime, replace job_cluster_key in tasks with existing_cluster_id: <your_cluster_id>
{{- if eq .compute_type "serverless"}}
This project uses serverless compute. No cluster setup is required.

You can also change to an all-purpose (dedicated) cluster by removing the data_security_mode of the created cluster.
{{- else }}
This project uses standard compute for the job cluster. Standard compute requires Unity Catalog volume JAR paths to be allowlisted by your workspace admin.

If you want to use an existing cluster instead, replace `job_cluster_key` in the task configuration with `existing_cluster_id: <your_cluster_id>` in the job YAML file.

You can also customize the cluster configuration (node type, worker count, Spark version) in the `job_clusters` section of the job YAML file.
{{- end}}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ bundle:
include:
- resources/*.yml

variables:
catalog:
description: The catalog to use
schema:
description: The schema to use

{{- $dev_schema := .shared_schema }}
{{- $prod_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "${workspace.current_user.short_name}"}}
{{- $prod_schema = "default"}}
{{- end}}

workspace:
host: {{workspace_host}}
artifact_path: {{.artifacts_dest_path}}/${bundle.name}/${bundle.target}/${workspace.current_user.short_name}
Expand All @@ -29,6 +42,9 @@ targets:
default: true
workspace:
host: {{workspace_host}}
variables:
catalog: {{.default_catalog}}
schema: {{$dev_schema}}

prod:
mode: production
Expand All @@ -39,3 +55,6 @@ targets:
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE
variables:
catalog: {{.default_catalog}}
schema: {{$prod_schema}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# The main job for {{.project_name}}

resources:
jobs:
{{.project_name}}:
name: {{.project_name}}
parameters:
- name: catalog
default: ${var.catalog}
- name: schema
default: ${var.schema}
tasks:
- task_key: main_task
{{- if eq .compute_type "standard cluster"}}
job_cluster_key: job_cluster
{{- end}}
spark_jar_task:
main_class_name: {{template `main_class_name` .}}
parameters:
- "--catalog"
- "{{`{{job.parameters.catalog}}`}}"
- "--schema"
- "{{`{{job.parameters.schema}}`}}"
{{- if eq .compute_type "serverless"}}
environment_key: default
{{- else}}
libraries:
- jar: ${workspace.artifact_path}/.internal/{{.project_name}}-assembly-{{template `version` .}}.jar
{{- end}}
{{- if eq .compute_type "serverless"}}
environments:
- environment_key: default
spec:
environment_version: "4-scala-preview"
java_dependencies:
- ${workspace.artifact_path}/.internal/{{.project_name}}-assembly-{{template `version` .}}.jar
{{- else}}
job_clusters:
- job_cluster_key: job_cluster
new_cluster:
spark_version: 17.3.x-scala2.13
node_type_id: i3.xlarge
autoscale:
min_workers: 1
max_workers: 4
{{- end}}
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,29 @@ object Main {
println("Hello, World!")

val spark = getSession()

setCatalogAndSchema(spark, args)
git
println("Showing range ...")
spark.range(3).show()
}

private def setCatalogAndSchema(spark: SparkSession, args: Array[String]): Unit = {
getFromArgs(args, "catalog").foreach { catalog =>
spark.sql(s"USE CATALOG $catalog")
println(s"Using catalog: $catalog")
}

getFromArgs(args, "schema").foreach { schema =>
spark.sql(s"USE SCHEMA $schema")
println(s"Using schema: $schema")
}
}

println("Showing nyctaxi trips ...")
val nycTaxi = new NycTaxi(spark)
val df = nycTaxi.trips()
df.show()
private def getFromArgs(args: Array[String], key: String): Option[String] = {
Comment thread
garlandz-db marked this conversation as resolved.
args.sliding(2, 2).collectFirst {
case Array(k, v) if k == s"--$key" => v
}
}

def getSession(): SparkSession = {
Expand Down
10 changes: 0 additions & 10 deletions contrib/templates/scala-job/README.md

This file was deleted.

19 changes: 0 additions & 19 deletions contrib/templates/scala-job/databricks_template_schema.json

This file was deleted.

This file was deleted.