Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
504193f
adds udal table name mappings
tomjemmett Feb 16, 2026
2e57920
switch to use udal table names
tomjemmett Feb 17, 2026
f6cca81
switch to use udal compute
tomjemmett Feb 17, 2026
01f9a50
refactor create_pop_by_lsoa21 to prevent cache size exceeding limits …
tomjemmett Feb 17, 2026
57ac625
use table_names in creation of views
tomjemmett Feb 18, 2026
fdd85dd
rederives maternity_episode_type column
tomjemmett Feb 18, 2026
b84e235
fixes alcohol mitigator
tomjemmett Feb 18, 2026
5827332
removes circular dependency on table which is created later
tomjemmett Feb 18, 2026
8f35e30
fixes issue with loading reference csvs
tomjemmett Feb 18, 2026
badd8e0
fix table names
tomjemmett Feb 18, 2026
6ae78df
fix excess bed days mitigator
tomjemmett Feb 18, 2026
0f3e12f
fix population by imd
tomjemmett Feb 18, 2026
74beac5
fixes for udal ecds table columns
tomjemmett Mar 2, 2026
82333d8
changes how we save inputs data parquets
tomjemmett Mar 2, 2026
7d124fb
orders inputs data before writing
tomjemmett Mar 2, 2026
b48bdb7
filters out problematic rows (denominator=0)
tomjemmett Mar 2, 2026
7d77160
ensures denominator is greater than 0
tomjemmett Mar 3, 2026
73ddf05
fix type issues
tomjemmett Mar 3, 2026
694f151
fix lint issues
tomjemmett Mar 3, 2026
b2a61c2
changes the prod target
tomjemmett Mar 3, 2026
08aa61e
adds icb catchments creation to reference job
tomjemmett Mar 3, 2026
cb8a6c3
fixes issue with pandas to_parquet
tomjemmett Mar 3, 2026
392bb5a
fix typing issues
tomjemmett Mar 17, 2026
c0b26ca
fix formatting issues
tomjemmett Mar 17, 2026
c4b36bf
Apply suggestions from code review
tomjemmett Mar 18, 2026
fcdf7bc
move drop table to before first for loop
tomjemmett Mar 18, 2026
bea02c2
configure the prod environment correctly
tomjemmett Mar 18, 2026
86b056f
adds step to extract the data for use in the model docker containers …
tomjemmett Mar 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,13 @@ targets:
mode: development
default: true
workspace:
host: https://adb-4243551358552236.16.azuredatabricks.net
host: https://adb-6450443583208388.8.azuredatabricks.net

prod:
mode: production
run_as:
service_principal_name: b61ef435-b155-42c7-8c9e-2a2442e280cd
workspace:
host: https://adb-4243551358552236.16.azuredatabricks.net
root_path: /Workspace/Jobs/nhp_devs/${bundle.name}/${bundle.target}
host: https://adb-6450443583208388.8.azuredatabricks.net
root_path: /Workspace/Users/${workspace.current_user.userName}/.bundle/${bundle.name}/${bundle.target}
permissions:
- group_name: nhp_devs
level: CAN_MANAGE
- service_principal_name: b61ef435-b155-42c7-8c9e-2a2442e280cd
- group_name: UDAL - Databricks - NewHospitalProgramme - PROD
level: CAN_MANAGE
47 changes: 26 additions & 21 deletions databricks_workflows/nhp_data-ecds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@ resources:
jobs:
Generate_NHP_Data_AAE_ECDS:
name: Generate NHP Data (AAE/ECDS)
webhook_notifications:
on_success:
- id: 11be34cb-f1f0-42ca-99d8-e7b3e75e20ca
on_failure:
- id: 11be34cb-f1f0-42ca-99d8-e7b3e75e20ca
notification_settings:
no_alert_for_skipped_runs: true
no_alert_for_canceled_runs: true
tasks:
- task_key: run_ecds
condition_task:
Expand All @@ -23,7 +15,7 @@ resources:
python_wheel_task:
package_name: nhp_data
entry_point: raw_data-aae
job_cluster_key: generate_nhp_ecds
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: nhp-raw_data-ecds
Expand All @@ -32,7 +24,7 @@ resources:
python_wheel_task:
package_name: nhp_data
entry_point: raw_data-ecds
job_cluster_key: generate_nhp_ecds
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: nhp-aggregated_data-ecds
Expand All @@ -41,7 +33,7 @@ resources:
python_wheel_task:
package_name: nhp_data
entry_point: aggregated_data-ecds
job_cluster_key: generate_nhp_ecds
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: nhp-default-ecds
Expand All @@ -50,20 +42,33 @@ resources:
python_wheel_task:
package_name: nhp_data
entry_point: default-ecds
job_cluster_key: generate_nhp_ecds
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
job_clusters:
- job_cluster_key: generate_nhp_ecds
- job_cluster_key: nhp_data
new_cluster:
cluster_name: ""
spark_version: 16.4.x-scala2.13
instance_pool_id: 0129-130615-maw351-pool-pss8mvfy
data_security_mode: SINGLE_USER
runtime_engine: PHOTON
autoscale:
min_workers: 2
max_workers: 8
spark_version: 17.3.x-scala2.13
spark_conf:
spark.databricks.delta.properties.defaults.autoOptimize.optimizeWrite: "true"
spark.databricks.delta.properties.defaults.autoOptimize.autoCompact: "true"
spark.databricks.sql.initial.catalog.namespace: udal_lake_mart
azure_attributes:
availability: SPOT_WITH_FALLBACK_AZURE
spot_bid_max_price: 100
node_type_id: Standard_E16_v3
custom_tags:
Application: UDAL
Workload_Type: Job
Workload_Size: Small
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
policy_id: 000816B2986C0B29
data_security_mode: USER_ISOLATION
runtime_engine: STANDARD
kind: CLASSIC_PREVIEW
is_single_node: false
num_workers: 1
tags:
group: nhp_data
env: ${bundle.target}
Expand Down
67 changes: 36 additions & 31 deletions databricks_workflows/nhp_data-extract_nhp_for_containers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@ resources:
jobs:
Extract_NHP_for_containers:
name: Extract NHP for containers
webhook_notifications:
on_success:
- id: 11be34cb-f1f0-42ca-99d8-e7b3e75e20ca
on_failure:
- id: 11be34cb-f1f0-42ca-99d8-e7b3e75e20ca
tasks:
- task_key: run_extract_apc_data
for_each_task:
Expand All @@ -19,7 +14,7 @@ resources:
parameters:
- "{{job.parameters.data_version}}"
- "{{input}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: run_extract_opa_data
Expand All @@ -35,7 +30,7 @@ resources:
parameters:
- "{{job.parameters.data_version}}"
- "{{input}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: run_extract_ecds_data
Expand All @@ -51,7 +46,7 @@ resources:
parameters:
- "{{job.parameters.data_version}}"
- "{{input}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: run_extract_demographic_factors_data
Expand All @@ -68,7 +63,7 @@ resources:
- "{{job.parameters.data_version}}"
- "{{input}}"
- "{{job.parameters.projection_year}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: run_extract_birth_factors_data
Expand All @@ -85,7 +80,7 @@ resources:
- "{{job.parameters.data_version}}"
- "{{input}}"
- "{{job.parameters.projection_year}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: run_extract_inequalities_data
Expand All @@ -101,7 +96,7 @@ resources:
parameters:
- "{{job.parameters.data_version}}"
- "{{input}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: generate_provider_gams
Expand All @@ -112,7 +107,7 @@ resources:
entry_point: model_data-health_status_adjustment-generate_provider_gams
parameters:
- "{{job.parameters.data_version}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- pypi:
package: pygam==0.9.1
Expand All @@ -125,7 +120,7 @@ resources:
entry_point: model_data-health_status_adjustment-generate_icb_gams
parameters:
- "{{job.parameters.data_version}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- pypi:
package: pygam==0.9.1
Expand All @@ -138,7 +133,7 @@ resources:
entry_point: model_data-health_status_adjustment-generate_national_gams
parameters:
- "{{job.parameters.data_version}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- pypi:
package: pygam==0.9.1
Expand All @@ -156,7 +151,7 @@ resources:
parameters:
- "{{input}}"
- "20251001"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: clean_up
Expand All @@ -167,32 +162,42 @@ resources:
entry_point: model_data-clean_up
parameters:
- "{{job.parameters.data_version}}"
job_cluster_key: run_nhp_extracts_cluster
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: move_data
depends_on:
- task_key: clean_up
python_wheel_task:
package_name: nhp_data
entry_point: extract_data
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
job_clusters:
- job_cluster_key: run_nhp_extracts_cluster
- job_cluster_key: nhp_data
new_cluster:
cluster_name: ""
spark_version: 16.4.x-scala2.13
spark_version: 17.3.x-scala2.13
spark_conf:
spark.master: local[*, 4]
spark.databricks.cluster.profile: singleNode
spark.databricks.delta.properties.defaults.autoOptimize.optimizeWrite: "true"
spark.databricks.delta.properties.defaults.autoOptimize.autoCompact: "true"
spark.databricks.sql.initial.catalog.namespace: udal_lake_mart
azure_attributes:
first_on_demand: 1
availability: ON_DEMAND_AZURE
spot_bid_max_price: -1
node_type_id: Standard_E8ads_v5
driver_node_type_id: Standard_E8ads_v5
availability: SPOT_WITH_FALLBACK_AZURE
spot_bid_max_price: 100
node_type_id: Standard_E16_v3
custom_tags:
ResourceClass: SingleNode
project: nhp
Application: UDAL
Workload_Type: Job
Workload_Size: Small
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: SINGLE_USER
policy_id: 000816B2986C0B29
data_security_mode: USER_ISOLATION
runtime_engine: STANDARD
num_workers: 0
kind: CLASSIC_PREVIEW
is_single_node: false
num_workers: 1
tags:
group: nhp_data_model
env: ${bundle.target}
Expand Down
45 changes: 25 additions & 20 deletions databricks_workflows/nhp_data-inpatients.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@ resources:
jobs:
Generate_NHP_Data_IP:
name: Generate NHP Data (IP)
webhook_notifications:
on_success:
- id: 11be34cb-f1f0-42ca-99d8-e7b3e75e20ca
on_failure:
- id: 11be34cb-f1f0-42ca-99d8-e7b3e75e20ca
notification_settings:
no_alert_for_skipped_runs: true
no_alert_for_canceled_runs: true
tasks:
- task_key: run_ip
condition_task:
Expand All @@ -23,7 +15,7 @@ resources:
python_wheel_task:
package_name: nhp_data
entry_point: raw_data-inpatients
job_cluster_key: generate_nhp_apc
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: nhp-raw_data-apc_mitigators
Expand All @@ -32,7 +24,7 @@ resources:
python_wheel_task:
package_name: nhp_data
entry_point: raw_data-inpatients_mitigators
job_cluster_key: generate_nhp_apc
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
- task_key: nhp-default-apc
Expand All @@ -41,20 +33,33 @@ resources:
python_wheel_task:
package_name: nhp_data
entry_point: default-apc
job_cluster_key: generate_nhp_apc
job_cluster_key: nhp_data
libraries:
- whl: ../dist/*.whl
job_clusters:
- job_cluster_key: generate_nhp_apc
- job_cluster_key: nhp_data
new_cluster:
cluster_name: ""
spark_version: 16.4.x-scala2.13
instance_pool_id: 0129-130615-maw351-pool-pss8mvfy
data_security_mode: SINGLE_USER
runtime_engine: PHOTON
autoscale:
min_workers: 2
max_workers: 8
spark_version: 17.3.x-scala2.13
spark_conf:
spark.databricks.delta.properties.defaults.autoOptimize.optimizeWrite: "true"
spark.databricks.delta.properties.defaults.autoOptimize.autoCompact: "true"
spark.databricks.sql.initial.catalog.namespace: udal_lake_mart
azure_attributes:
availability: SPOT_WITH_FALLBACK_AZURE
spot_bid_max_price: 100
node_type_id: Standard_E16_v3
custom_tags:
Application: UDAL
Workload_Type: Job
Workload_Size: Small
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
policy_id: 000816B2986C0B29
data_security_mode: USER_ISOLATION
runtime_engine: STANDARD
kind: CLASSIC_PREVIEW
is_single_node: false
num_workers: 1
tags:
group: nhp_data
env: ${bundle.target}
Expand Down
Loading
Loading