diff --git a/contrib/templates/default-scala/README.md b/contrib/templates/default-scala/README.md new file mode 100644 index 00000000..5c6be785 --- /dev/null +++ b/contrib/templates/default-scala/README.md @@ -0,0 +1,12 @@ +# default-scala + +This template helps you create Scala projects with Databricks Asset Bundles. It uses sbt to compile and package Scala files, and can be used with Databricks Connect for local development. + +It supports two compute types: standard clusters and serverless compute. + +Run +``` +databricks bundle init --template-dir contrib/templates/default-scala https://github.com/databricks/bundle-examples +``` + +and follow the generated README.md to get started. \ No newline at end of file diff --git a/contrib/templates/default-scala/databricks_template_schema.json b/contrib/templates/default-scala/databricks_template_schema.json new file mode 100644 index 00000000..9d239768 --- /dev/null +++ b/contrib/templates/default-scala/databricks_template_schema.json @@ -0,0 +1,58 @@ +{ + "welcome_message": "\nWelcome to the default-scala template for Databricks Asset Bundles!\n\nA workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html.\nworkspace_host: {{workspace_host}}", + "properties": { + "project_name": { + "type": "string", + "description": "\nPlease provide a unique name for this project.\nproject_name", + "order": 1, + "pattern": "^[A-Za-z_][A-Za-z0-9-_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, dashes, and underscores." + }, + "compute_type": { + "type": "string", + "description": "\nPlease select the compute type.\ncompute_type", + "enum": ["standard cluster", "serverless"], + "default": "serverless", + "order": 2 + }, + "artifacts_dest_path": { + "type": "string", + "description": "\nPlease provide the Unity Catalog volume destination path in Databricks where the directory will be created containing the JAR and other artifacts to store.{{if eq .compute_type \"standard cluster\"}}\nNote: your admin must allowlist the volume JAR path you specify for your workspace (see https://docs.databricks.com/en/data-governance/unity-catalog/manage-privileges/allowlist.html){{end}}\nartifacts_dest_path [example: /Volumes/abcdef1234567890]", + "order": 3, + "pattern": "^/Volumes(?:/[a-z0-9_-]+)+/?$", + "pattern_match_failure_message": "Please enter a valid path like /Volumes/my-folder/. Only lowercase letters, numbers, dashes, and underscores are allowed in folder names." + }, + "default_catalog": { + "type": "string", + "default": "{{default_catalog}}", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog", + "order": 4 + }, + "personal_schemas": { + "type": "string", + "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", + "enum": [ + "yes, use a schema based on the current user name during development", + "no, use a shared schema during development" + ], + "order": 5 + }, + "shared_schema": { + "skip_prompt_if": { + "properties": { + "personal_schemas": { + "const": "yes, use a schema based on the current user name during development" + } + } + }, + "type": "string", + "default": "default", + "pattern": "^\\w+$", + "pattern_match_failure_message": "Invalid schema name.", + "description": "\nPlease provide an initial schema during development.\ndefault_schema", + "order": 6 + } + } +} diff --git a/contrib/templates/scala-job/library/template_variables.tmpl b/contrib/templates/default-scala/library/template_variables.tmpl similarity index 83% rename from contrib/templates/scala-job/library/template_variables.tmpl rename to contrib/templates/default-scala/library/template_variables.tmpl index 94507b31..136630ed 100644 --- a/contrib/templates/scala-job/library/template_variables.tmpl +++ b/contrib/templates/default-scala/library/template_variables.tmpl @@ -1,7 +1,12 @@ +{{/* Project artifact version used in JAR naming */}} {{ define `version` -}} 0.1 {{- end }} +{{ define `databricks_cli_version` -}} + 0.241.0 +{{- end }} + {{ define `dbr_version` -}} 17.0 {{- end }} diff --git a/contrib/templates/scala-job/template/{{.project_name}}/.gitignore b/contrib/templates/default-scala/template/{{.project_name}}/.gitignore similarity index 100% rename from contrib/templates/scala-job/template/{{.project_name}}/.gitignore rename to contrib/templates/default-scala/template/{{.project_name}}/.gitignore diff --git a/contrib/templates/scala-job/template/{{.project_name}}/README.md.tmpl b/contrib/templates/default-scala/template/{{.project_name}}/README.md.tmpl similarity index 79% rename from contrib/templates/scala-job/template/{{.project_name}}/README.md.tmpl rename to contrib/templates/default-scala/template/{{.project_name}}/README.md.tmpl index e0b3f48c..3d4792ed 100644 --- a/contrib/templates/scala-job/template/{{.project_name}}/README.md.tmpl +++ b/contrib/templates/default-scala/template/{{.project_name}}/README.md.tmpl @@ -1,10 +1,10 @@ # {{.project_name}} -The '{{.project_name}}' project was generated by using the scala-job template. +The '{{.project_name}}' project was generated by using the default-scala template. ## Getting started -1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html. The version must be v0.241.0 or later. +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html. The version must be v{{template `databricks_cli_version` .}} or later. 2. Authenticate to your Databricks workspace (if you have not done so already): ``` @@ -90,7 +90,13 @@ by running `sbt test`. ## Customizations ### Job configuration -The bundles piggybacks off the same configurations used in APIs. If you want to use an existing cluster instead of spinning one up everytime, replace job_cluster_key in tasks with existing_cluster_id: +{{- if eq .compute_type "serverless"}} +This project uses serverless compute. No cluster setup is required. -You can also change to an all-purpose (dedicated) cluster by removing the data_security_mode of the created cluster. +{{- else }} +This project uses standard compute for the job cluster. Standard compute requires Unity Catalog volume JAR paths to be allowlisted by your workspace admin. +If you want to use an existing cluster instead, replace `job_cluster_key` in the task configuration with `existing_cluster_id: ` in the job YAML file. + +You can also customize the cluster configuration (node type, worker count, Spark version) in the `job_clusters` section of the job YAML file. +{{- end}} diff --git a/contrib/templates/scala-job/template/{{.project_name}}/build.sbt.tmpl b/contrib/templates/default-scala/template/{{.project_name}}/build.sbt.tmpl similarity index 100% rename from contrib/templates/scala-job/template/{{.project_name}}/build.sbt.tmpl rename to contrib/templates/default-scala/template/{{.project_name}}/build.sbt.tmpl diff --git a/contrib/templates/scala-job/template/{{.project_name}}/databricks.yml.tmpl b/contrib/templates/default-scala/template/{{.project_name}}/databricks.yml.tmpl similarity index 72% rename from contrib/templates/scala-job/template/{{.project_name}}/databricks.yml.tmpl rename to contrib/templates/default-scala/template/{{.project_name}}/databricks.yml.tmpl index ffa39be3..bfd50a10 100644 --- a/contrib/templates/scala-job/template/{{.project_name}}/databricks.yml.tmpl +++ b/contrib/templates/default-scala/template/{{.project_name}}/databricks.yml.tmpl @@ -7,6 +7,19 @@ bundle: include: - resources/*.yml +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + +{{- $dev_schema := .shared_schema }} +{{- $prod_schema := .shared_schema }} +{{- if (regexp "^yes").MatchString .personal_schemas}} + {{- $dev_schema = "${workspace.current_user.short_name}"}} + {{- $prod_schema = "default"}} +{{- end}} + workspace: host: {{workspace_host}} artifact_path: {{.artifacts_dest_path}}/${bundle.name}/${bundle.target}/${workspace.current_user.short_name} @@ -29,6 +42,9 @@ targets: default: true workspace: host: {{workspace_host}} + variables: + catalog: {{.default_catalog}} + schema: {{$dev_schema}} prod: mode: production @@ -39,3 +55,6 @@ targets: permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE + variables: + catalog: {{.default_catalog}} + schema: {{$prod_schema}} diff --git a/contrib/templates/scala-job/template/{{.project_name}}/project/plugins.sbt.tmpl b/contrib/templates/default-scala/template/{{.project_name}}/project/plugins.sbt.tmpl similarity index 100% rename from contrib/templates/scala-job/template/{{.project_name}}/project/plugins.sbt.tmpl rename to contrib/templates/default-scala/template/{{.project_name}}/project/plugins.sbt.tmpl diff --git a/contrib/templates/scala-job/template/{{.project_name}}/resources/.gitkeep b/contrib/templates/default-scala/template/{{.project_name}}/resources/.gitkeep similarity index 100% rename from contrib/templates/scala-job/template/{{.project_name}}/resources/.gitkeep rename to contrib/templates/default-scala/template/{{.project_name}}/resources/.gitkeep diff --git a/contrib/templates/default-scala/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/contrib/templates/default-scala/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl new file mode 100644 index 00000000..32c7066c --- /dev/null +++ b/contrib/templates/default-scala/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -0,0 +1,46 @@ +# The main job for {{.project_name}} + +resources: + jobs: + {{.project_name}}: + name: {{.project_name}} + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + tasks: + - task_key: main_task +{{- if eq .compute_type "standard cluster"}} + job_cluster_key: job_cluster +{{- end}} + spark_jar_task: + main_class_name: {{template `main_class_name` .}} + parameters: + - "--catalog" + - "{{`{{job.parameters.catalog}}`}}" + - "--schema" + - "{{`{{job.parameters.schema}}`}}" +{{- if eq .compute_type "serverless"}} + environment_key: default +{{- else}} + libraries: + - jar: ${workspace.artifact_path}/.internal/{{.project_name}}-assembly-{{template `version` .}}.jar +{{- end}} +{{- if eq .compute_type "serverless"}} + environments: + - environment_key: default + spec: + environment_version: "4-scala-preview" + java_dependencies: + - ${workspace.artifact_path}/.internal/{{.project_name}}-assembly-{{template `version` .}}.jar +{{- else}} + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: 17.3.x-scala2.13 + node_type_id: i3.xlarge + autoscale: + min_workers: 1 + max_workers: 4 +{{- end}} \ No newline at end of file diff --git a/contrib/templates/scala-job/template/{{.project_name}}/src/main/scala/com/examples/Main.scala b/contrib/templates/default-scala/template/{{.project_name}}/src/main/scala/com/examples/Main.scala similarity index 62% rename from contrib/templates/scala-job/template/{{.project_name}}/src/main/scala/com/examples/Main.scala rename to contrib/templates/default-scala/template/{{.project_name}}/src/main/scala/com/examples/Main.scala index 177bcd00..f2605645 100644 --- a/contrib/templates/scala-job/template/{{.project_name}}/src/main/scala/com/examples/Main.scala +++ b/contrib/templates/default-scala/template/{{.project_name}}/src/main/scala/com/examples/Main.scala @@ -13,13 +13,29 @@ object Main { println("Hello, World!") val spark = getSession() + + setCatalogAndSchema(spark, args) +git println("Showing range ...") spark.range(3).show() + } + + private def setCatalogAndSchema(spark: SparkSession, args: Array[String]): Unit = { + getFromArgs(args, "catalog").foreach { catalog => + spark.sql(s"USE CATALOG $catalog") + println(s"Using catalog: $catalog") + } + + getFromArgs(args, "schema").foreach { schema => + spark.sql(s"USE SCHEMA $schema") + println(s"Using schema: $schema") + } + } - println("Showing nyctaxi trips ...") - val nycTaxi = new NycTaxi(spark) - val df = nycTaxi.trips() - df.show() + private def getFromArgs(args: Array[String], key: String): Option[String] = { + args.sliding(2, 2).collectFirst { + case Array(k, v) if k == s"--$key" => v + } } def getSession(): SparkSession = { diff --git a/contrib/templates/scala-job/template/{{.project_name}}/src/main/scala/com/examples/NycTaxi.scala b/contrib/templates/default-scala/template/{{.project_name}}/src/main/scala/com/examples/NycTaxi.scala similarity index 100% rename from contrib/templates/scala-job/template/{{.project_name}}/src/main/scala/com/examples/NycTaxi.scala rename to contrib/templates/default-scala/template/{{.project_name}}/src/main/scala/com/examples/NycTaxi.scala diff --git a/contrib/templates/scala-job/template/{{.project_name}}/src/test/scala/com/examples/NycTaxiSpec.scala b/contrib/templates/default-scala/template/{{.project_name}}/src/test/scala/com/examples/NycTaxiSpec.scala similarity index 100% rename from contrib/templates/scala-job/template/{{.project_name}}/src/test/scala/com/examples/NycTaxiSpec.scala rename to contrib/templates/default-scala/template/{{.project_name}}/src/test/scala/com/examples/NycTaxiSpec.scala diff --git a/contrib/templates/scala-job/README.md b/contrib/templates/scala-job/README.md deleted file mode 100644 index 2f476a07..00000000 --- a/contrib/templates/scala-job/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# scala-job - -This is an (experimental) template for creating using Scala with Databricks Asset Bundles. It uses sbt to compile and package Scala files, and can be used with Databricks Connect for local development. - -Run -``` -databricks bundle init --template-dir contrib/templates/scala-job https://github.com/databricks/bundle-examples -``` - -and follow the generated README.md to get started. \ No newline at end of file diff --git a/contrib/templates/scala-job/databricks_template_schema.json b/contrib/templates/scala-job/databricks_template_schema.json deleted file mode 100644 index 3571e778..00000000 --- a/contrib/templates/scala-job/databricks_template_schema.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "welcome_message": "\nWelcome to the scala-job template for Databricks Asset Bundles!\n\nA workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html.\nworkspace_host: {{workspace_host}}", - "properties": { - "project_name": { - "type": "string", - "description": "\nPlease provide a unique name for this project.\nproject_name", - "order": 1, - "pattern": "^[A-Za-z_][A-Za-z0-9-_]+$", - "pattern_match_failure_message": "Name must consist of letters, numbers, dashes, and underscores." - }, - "artifacts_dest_path": { - "type": "string", - "description": "\nPlease provide the Unity Catalog volume destination path in Databricks where the directory will be created containing the JAR and other artifacts to store. [example: /Volumes/abcdef1234567890]", - "order": 2, - "pattern": "^/Volumes(?:/[a-z0-9_-]+)+/?$", - "pattern_match_failure_message": "Please enter a valid path like /Volumes/my-folder/. Only lowercase letters, numbers, dashes, and underscores are allowed in folder names." - } - } -} diff --git a/contrib/templates/scala-job/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/contrib/templates/scala-job/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl deleted file mode 100644 index 97f2f6e3..00000000 --- a/contrib/templates/scala-job/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl +++ /dev/null @@ -1,17 +0,0 @@ -# The main job for {{.project_name}} - -resources: - jobs: - {{.project_name}}: - name: {{.project_name}} - tasks: - - task_key: main_task - spark_jar_task: - main_class_name: {{template `main_class_name` .}} - environment_key: default - environments: - - environment_key: default - spec: - environment_version: "4-scala-preview" - java_dependencies: - - ${workspace.artifact_path}/.internal/{{.project_name}}-assembly-{{template `version` .}}.jar