diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 1a9d76103..000000000 Binary files a/.DS_Store and /dev/null differ diff --git a/validation/01_estimate_validation/.DS_Store b/validation/01_estimate_validation/.DS_Store deleted file mode 100644 index 9da79e3df..000000000 Binary files a/validation/01_estimate_validation/.DS_Store and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/.DS_Store b/validation/01_estimate_validation/graphs/.DS_Store deleted file mode 100644 index f95fac451..000000000 Binary files a/validation/01_estimate_validation/graphs/.DS_Store and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/00_README.rtf b/validation/01_estimate_validation/graphs/00_README.rtf deleted file mode 100644 index 54fd79b5e..000000000 --- a/validation/01_estimate_validation/graphs/00_README.rtf +++ /dev/null @@ -1,10 +0,0 @@ -{\rtf1\ansi\ansicpg1252\cocoartf2822 -\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -{\*\expandedcolortbl;;} -\margl1440\margr1440\vieww11520\viewh8400\viewkind0 -\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0 - -\f0\fs24 \cf0 Date: 8/10/2025\ -Internal validation on specifications that include ethnicity. \ -} \ No newline at end of file diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_share_age.png b/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_share_age.png deleted file mode 100644 index 4c69443fc..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both.png b/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both.png deleted file mode 100644 index 21b4c6ee2..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both_income.png b/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both_income.png deleted file mode 100644 index 9199631e6..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both_partnership.png b/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both_partnership.png deleted file mode 100644 index 6cf64b352..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_both_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_gender.png b/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_gender.png deleted file mode 100644 index 22726db70..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1a_continues_edu_ts_16_29_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_share_age.png b/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_share_age.png deleted file mode 100644 index b22f4b082..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both.png b/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both.png deleted file mode 100644 index fa9a1c144..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both_income.png b/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both_income.png deleted file mode 100644 index a3cf767a9..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both_partnership.png b/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both_partnership.png deleted file mode 100644 index 00ccc2376..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_both_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_gender.png b/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_gender.png deleted file mode 100644 index 235ccca84..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E1b_returns_edu_ts_16_35_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_hist_16_29_both.png b/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_hist_16_29_both.png deleted file mode 100644 index 4051b736e..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_hist_16_29_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_share_age.png b/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_share_age.png deleted file mode 100644 index 3bb7bb2cc..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_both.png b/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_both.png deleted file mode 100644 index 7bdb42d91..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_both_income.png b/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_both_income.png deleted file mode 100644 index d55c62259..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_gender.png b/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_gender.png deleted file mode 100644 index 2d439ed34..000000000 Binary files a/validation/01_estimate_validation/graphs/education/int_validation_E2a_edu_attainment_ts_16_29_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_hist_18_30.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_hist_18_30.png deleted file mode 100644 index 4d848187c..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_hist_18_30.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_share_age.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_share_age.png deleted file mode 100644 index 415b76bc4..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_ts_18_30.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_ts_18_30.png deleted file mode 100644 index af6a5b7d9..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_ts_18_30.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_ts_18_30_both_income.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_ts_18_30_both_income.png deleted file mode 100644 index 4895111e4..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1a_fertility_init_edu_ts_18_30_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_hist_18_45.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_hist_18_45.png deleted file mode 100644 index a03ca9d83..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_hist_18_45.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_share_age.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_share_age.png deleted file mode 100644 index 28dd9c0c2..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45.png deleted file mode 100644 index 4e4dd964b..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_edu.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_edu.png deleted file mode 100644 index 0e1af77f0..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_income.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_income.png deleted file mode 100644 index 0880f8203..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_partnership.png b/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_partnership.png deleted file mode 100644 index 106599f1b..000000000 Binary files a/validation/01_estimate_validation/graphs/fertility/int_validation__F1b_fertility_left_edu_ts_18_45_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_hist_18_29.png b/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_hist_18_29.png deleted file mode 100644 index 641732bb0..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_hist_18_29.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_both.png b/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_both.png deleted file mode 100644 index fdfba5f68..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_female.png b/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_female.png deleted file mode 100644 index 99d9d2520..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_female.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_male.png b/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_male.png deleted file mode 100644 index cc271a23a..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_init_edu_ts_18_29_male.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_left_edu_hist_all.png b/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_left_edu_hist_all.png deleted file mode 100644 index 094547199..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1a_health_left_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_both.png b/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_both.png deleted file mode 100644 index b6979d216..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_female.png b/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_female.png deleted file mode 100644 index f64ca15d1..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_female.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_male.png b/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_male.png deleted file mode 100644 index 8a07f5e2e..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H1b_health_left_edu_ts_all_male.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_hist_all.png b/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_hist_all.png deleted file mode 100644 index 5eb26c398..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_share_age.png b/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_share_age.png deleted file mode 100644 index c12668863..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both.png b/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both.png deleted file mode 100644 index dda79ba10..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_edu.png b/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_edu.png deleted file mode 100644 index 5662817bf..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_income.png b/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_income.png deleted file mode 100644 index c71373959..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_partnership.png b/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_partnership.png deleted file mode 100644 index f4b09a26f..000000000 Binary files a/validation/01_estimate_validation/graphs/health/int_validation_H2b_disablilty_left_edu_ts_all_both_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_hist_all.png b/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_hist_all.png deleted file mode 100644 index add30fc7d..000000000 Binary files a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_share_age.png b/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_share_age.png deleted file mode 100644 index 6c86a0ade..000000000 Binary files a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both.png b/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both.png deleted file mode 100644 index 76ebccdb2..000000000 Binary files a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_edu.png b/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_edu.png deleted file mode 100644 index d678963ca..000000000 Binary files a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_income.png b/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_income.png deleted file mode 100644 index ee36ef152..000000000 Binary files a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_partnership.png b/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_partnership.png deleted file mode 100644 index dfa017f36..000000000 Binary files a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_both_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_gender.png b/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_gender.png deleted file mode 100644 index 883b20d5a..000000000 Binary files a/validation/01_estimate_validation/graphs/home_ownership/int_validation_HO1a_homeownership_ts_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_amount_capital_init_edu_hist_all.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_amount_capital_init_edu_hist_all.png deleted file mode 100644 index 143a03eb2..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_amount_capital_init_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_amount_capital_init_edu_hist_all_gender.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_amount_capital_init_edu_hist_all_gender.png deleted file mode 100644 index ccc94083b..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_amount_capital_init_edu_hist_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_hist_all.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_hist_all.png deleted file mode 100644 index 0e9e64d02..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_share_age.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_share_age.png deleted file mode 100644 index 8e38267b5..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both.png deleted file mode 100644 index 1708b8601..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both_income.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both_income.png deleted file mode 100644 index 8c74fd0f2..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both_partnership.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both_partnership.png deleted file mode 100644 index a5cd26424..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_both_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_gender.png b/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_gender.png deleted file mode 100644 index 60cf6530d..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3a_selection_capital_init_edu_ts_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all.png deleted file mode 100644 index 893bf89e3..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all_edu.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all_edu.png deleted file mode 100644 index 37585960f..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all_gender.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all_gender.png deleted file mode 100644 index b0ace9d07..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_amount_capital_left_edu_hist_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_hist_all.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_hist_all.png deleted file mode 100644 index 6016e812a..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_share_age.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_share_age.png deleted file mode 100644 index d939f8295..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both.png deleted file mode 100644 index 293c848cd..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both_income.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both_income.png deleted file mode 100644 index 766e20e2e..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both_partnership.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both_partnership.png deleted file mode 100644 index f5858a9c5..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_both_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_gender.png b/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_gender.png deleted file mode 100644 index 0bb884194..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I3b_selection_capital_left_edu_ts_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all.png b/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all.png deleted file mode 100644 index c1e931a94..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all_edu.png b/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all_edu.png deleted file mode 100644 index 7199d24c3..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all_gender.png b/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all_gender.png deleted file mode 100644 index eb5269251..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I4b_amount_pension_retired_hist_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all.png deleted file mode 100644 index 9867f03a7..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all_edu.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all_edu.png deleted file mode 100644 index edd44df48..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all_gender.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all_gender.png deleted file mode 100644 index 15a473722..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_amount_pension_retired_hist_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_hist_all.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_hist_all.png deleted file mode 100644 index fba960afa..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_share_age.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_share_age.png deleted file mode 100644 index 87afcebef..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both.png deleted file mode 100644 index 1e17b338c..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both_income.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both_income.png deleted file mode 100644 index a2a572c1a..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both_partnership.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both_partnership.png deleted file mode 100644 index 659ea047a..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_both_partnership.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_gender.png b/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_gender.png deleted file mode 100644 index ed652a346..000000000 Binary files a/validation/01_estimate_validation/graphs/income/int_validation_I5a_selection_private_pension_notretired_ts_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_hist_all.png b/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_hist_all.png deleted file mode 100644 index b4d5fa08b..000000000 Binary files a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_share_age.png b/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_share_age.png deleted file mode 100644 index 5ff1a2c3a..000000000 Binary files a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both.png b/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both.png deleted file mode 100644 index 0cb74decb..000000000 Binary files a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both_edu.png b/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both_edu.png deleted file mode 100644 index 91f3cc6b6..000000000 Binary files a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both_income.png b/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both_income.png deleted file mode 100644 index 5089acc3e..000000000 Binary files a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_gender.png b/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_gender.png deleted file mode 100644 index ddca8060b..000000000 Binary files a/validation/01_estimate_validation/graphs/leave_parental_home/int_validation_P1a_leave_parental_home_ts_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_hist_all.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_hist_all.png deleted file mode 100644 index 1cdfbbc5e..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_share_age.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_share_age.png deleted file mode 100644 index 7a365a831..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_both.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_both.png deleted file mode 100644 index 2ef99e6dd..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_both_income.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_both_income.png deleted file mode 100644 index 7b6d1cfbe..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_gender.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_gender.png deleted file mode 100644 index cb9e690b8..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1a_partnership_init_edu_ts_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_hist_all.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_hist_all.png deleted file mode 100644 index 5ad14a8eb..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_share_age.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_share_age.png deleted file mode 100644 index 0c8971e49..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_both.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_both.png deleted file mode 100644 index 24aef5c55..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_both_income.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_both_income.png deleted file mode 100644 index 168af02a9..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_gender.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_gender.png deleted file mode 100644 index 0ce7b6e37..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U1b_partnership_left_edu_ts_all_gender.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_init_edu_ts_all_both_edu.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_init_edu_ts_all_both_edu.png deleted file mode 100644 index bd03d88b3..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_init_edu_ts_all_both_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_init_edu_ts_all_both_income.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_init_edu_ts_all_both_income.png deleted file mode 100644 index f62507685..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_init_edu_ts_all_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_hist_all.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_hist_all.png deleted file mode 100644 index f665a2aa0..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_hist_all.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_share_age.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_share_age.png deleted file mode 100644 index 22807f10d..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_ts_all_both.png b/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_ts_all_both.png deleted file mode 100644 index 768d2d30c..000000000 Binary files a/validation/01_estimate_validation/graphs/partnership/int_validation_U2b_separation_left_edu_ts_all_both.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_hist_50.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_hist_50.png deleted file mode 100644 index 3908cc48e..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_hist_50.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_share_age.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_share_age.png deleted file mode 100644 index 42c0493ba..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50.png deleted file mode 100644 index 14c873a57..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50_both_edu.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50_both_edu.png deleted file mode 100644 index 2d92efba7..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50_both_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50_both_income.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50_both_income.png deleted file mode 100644 index b9162017f..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1a_retirement_single_ts_50_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_hist_50.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_hist_50.png deleted file mode 100644 index 996e4ef93..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_hist_50.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_share_age.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_share_age.png deleted file mode 100644 index fe936a20a..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_share_age.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50.png deleted file mode 100644 index 3c61c0327..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50_both_edu.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50_both_edu.png deleted file mode 100644 index 44958947d..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50_both_edu.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50_both_income.png b/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50_both_income.png deleted file mode 100644 index 3a2d8f54a..000000000 Binary files a/validation/01_estimate_validation/graphs/retirement/int_validation_R1b_retirement_partnered_ts_50_both_income.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_f_npw.png b/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_f_npw.png deleted file mode 100644 index fdce7406b..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_f_npw.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_f_pw.png b/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_f_pw.png deleted file mode 100644 index bc80eab39..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_f_pw.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_m_npw.png b/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_m_npw.png deleted file mode 100644 index 133bb78ef..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_m_npw.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_m_pw.png b/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_m_pw.png deleted file mode 100644 index cc6cd92ed..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/int_validation_wages_hist_m_pw.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/level_NWM.png b/validation/01_estimate_validation/graphs/wages/level_NWM.png deleted file mode 100644 index 3a26c0088..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/level_NWM.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/level_NWW.png b/validation/01_estimate_validation/graphs/wages/level_NWW.png deleted file mode 100644 index 34ca3591b..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/level_NWW.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/level_WM.png b/validation/01_estimate_validation/graphs/wages/level_WM.png deleted file mode 100644 index 7b74ba884..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/level_WM.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/level_WW.png b/validation/01_estimate_validation/graphs/wages/level_WW.png deleted file mode 100644 index c305d7e10..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/level_WW.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/log_NWM.png b/validation/01_estimate_validation/graphs/wages/log_NWM.png deleted file mode 100644 index ec524c946..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/log_NWM.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/log_NWW.png b/validation/01_estimate_validation/graphs/wages/log_NWW.png deleted file mode 100644 index 64d05b685..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/log_NWW.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/log_WM.png b/validation/01_estimate_validation/graphs/wages/log_WM.png deleted file mode 100644 index 29b2bb9f9..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/log_WM.png and /dev/null differ diff --git a/validation/01_estimate_validation/graphs/wages/log_WW.png b/validation/01_estimate_validation/graphs/wages/log_WW.png deleted file mode 100644 index 8f635ae1e..000000000 Binary files a/validation/01_estimate_validation/graphs/wages/log_WW.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/.DS_Store b/validation/02_simulated_output_validation/.DS_Store deleted file mode 100644 index 186b37e4c..000000000 Binary files a/validation/02_simulated_output_validation/.DS_Store and /dev/null differ diff --git a/validation/02_simulated_output_validation/do_files/00_master.do b/validation/02_simulated_output_validation/do_files/00_master.do index 59642203e..6f9e36d05 100644 --- a/validation/02_simulated_output_validation/do_files/00_master.do +++ b/validation/02_simulated_output_validation/do_files/00_master.do @@ -1,15 +1,14 @@ /******************************************************************************* -* PROJECT: SIMPATHS +* PROJECT: SimPaths UK * SECTION: Validation * OBJECT: Master file * AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 +* LAST UPDATE: Jan 2026 * COUNTRY: UK - * DESCRIPTION: This master file sets the globals, directories and * parameters, and runs the do files to construct the * validation datasets and plots the comparison graphs. - +******************************************************************************** * NOTES: UKHLS initial populations data is used to create the * validaton targets. *******************************************************************************/ @@ -22,7 +21,11 @@ set type double /******************************************************************************* -* DEFINE COUNTRY GLOBALS +* 1 - STATIC SET UP +*******************************************************************************/ + +/******************************************************************************* +* DEFINE COUNTRY & RUN GLOBALS *******************************************************************************/ global country = "UK" @@ -37,35 +40,28 @@ global silc_UDB = "UDB_c" *******************************************************************************/ /* -Currently save data locally +"/Users/ashleyburdett/Library/CloudStorage/Box-Box/CeMPA shared area/_SimPaths/_SimPathsUK" +"C:\Users\aburde\Box\CeMPA shared area\_SimPaths\_SimPathsUK" */ -* VM -//global path "C:\Users\aburde\Box\ESPON - OVERLAP\_countries\HU\validation" - -* Mac +* Individual directory +global dir_ind "/Users/ashleyburdett/Library/CloudStorage/Box-Box/CeMPA shared area/_SimPaths/_SimPathsUK" * Main folder -global path "/Users/ashleyburdett/Documents/ESPON/${country}/validation" - -* Observed data -global EUSILC_original_crosssection "N:\CeMPA\data\EU_SILC\2024\_Cross_2004-2023_full_set\_Cross_2004-2023_full_set" +global path "$dir_ind/validation/02_simulated_output_validation" * Do files folder global dir_do_files "$path/do_files" * Output files folder -global dir_work "$path/data" +global dir_work "$path/data" -* Simulated data folder -global dir_simulated_data "$path/data" +* UKHLS dataset folder +global dir_UKHLS_data "$dir_ind/input 2026.03.04" * Data folder global dir_data "$path/data" -* Graphs folder -global dir_output_files "$path/graphs" - /******************************************************************************* * DEFINE SAMPLE PARAMETERS @@ -76,61 +72,129 @@ global use_assert "0" * Trim outliers global trim_outliers true -* Min age of individuals included in plots -global min_age 18 - -* Max age of individuals included in plots -global max_age 65 - * Observations up to and including this simulated year to be kept in the sample -global max_year 2023 +global min_sim_year 2011 +global max_sim_year 2023 * Define age to become responsible as defined in the simulation global age_become_responsible 18 * Set labour supply categories -global ls_cat "ZERO TEN TWENTY THIRTY FORTY" +global ls_cat "ZERO TEN TWENTY THIRTY THIRTY_EIGHT FORTY_FIVE FIFTY_FIVE" // works if the genders are symmetric // still need to alter code in specific do files to print graphs -global ls_cat_labour "TEN TWENTY THIRTY FORTY" +global ls_cat_labour /// + "TEN TWENTY THIRTY THIRTY_EIGHT FORTY_FIVE FIFTY_FIVE" + +* Number of runs (N-1 because numbering starts at 0) +global max_n_runs 4 +* Run commons folder name +global folder 20260306 -* Max hours work per week in sim -global max_hours 48 /******************************************************************************* -RUN DO FILES +* RUN DO FILES *******************************************************************************/ -* Prepare simulated data -do "${dir_do_files}/01_prepare_simulated_data.do" -do "${dir_do_files}/02_create_simulated_variables.do" - * Prepare observed data -do "${dir_do_files}/03_prepare_UKHLS_data.do" -do "${dir_do_files}/05_create_UKHLS_validation_targets.do" - - -* Plot figures -do "${dir_do_files}/06_01_plot_activity_status.do" -do "${dir_do_files}/06_02_plot_education_level.do" -do "${dir_do_files}/06_03_plot_gross_income.do" -do "${dir_do_files}/06_04_plot_gross_labour_income.do" -do "${dir_do_files}/06_05_plot_capital_income.do" -do "${dir_do_files}/06_05_plot_pension_income.do" -do "${dir_do_files}/06_07_plot_disposable_income.do" -do "${dir_do_files}/06_08_plot_equivalised_disposable_income.do" -do "${dir_do_files}/06_09_plot_hourly_wages.do" -do "${dir_do_files}/06_10_plot_hours_worked.do" -do "${dir_do_files}/06_11_plot_income_shares.do" -do "${dir_do_files}/06_12_plot_partnership_status.do" -do "${dir_do_files}/06_13_plot_health.do" -do "${dir_do_files}/06_14_plot_at_risk_of_poverty.do" -do "${dir_do_files}/06_15_plot_income_ratios.do" -do "${dir_do_files}/06_16_plot_number_children.do" -do "${dir_do_files}/06_17_plot_disability" - -* Calculate other statistics -do "${dir_do_files}/07_01_correlations.do" +do "${dir_do_files}/03_create_UKHLS_validation_targets.do" + + +/******************************************************************************* +* 2 - DYNAMIC SET UP +*******************************************************************************/ + +* List of SimPath Set ups to loop through +/* +Permits looping over output from multiple different model set-ups e.g. with and +without ferility alignment, with and without employemnet alignment +*/ +local alignments "0_default" + +/* + 0_default 1_all_alignments_off 2_pop_on 3_pop_fertility_on 4_pop_fertility_inschool_on 5_pop_fertility_inschool_cohabit_on 6_pop_fertility_inschool_cohabit_empl_on +*/ + +foreach align in `alignments' { + + +/******************************************************************************* +* DEFINE DIRECTORIES +*******************************************************************************/ + + * Simulated data CSV files folder + global dir_simulated_data "${dir_ind}/_new_release/output/`align'" + + * Graphs folder + global dir_output_files "$path/graphs/`align'" + + +/******************************************************************************* +* CREATE OUTPUT FOLDERS +*******************************************************************************/ + + /* + mkdir "$path/graphs/`align'" + + mkdir "$path/graphs/`align'/children" + mkdir "$path/graphs/`align'/correlations" + mkdir "$path/graphs/`align'/disability" + mkdir "$path/graphs/`align'/economic_activity" + mkdir "$path/graphs/`align'/education" + mkdir "$path/graphs/`align'/health" + mkdir "$path/graphs/`align'/hours_worked" + mkdir "$path/graphs/`align'/income" + mkdir "$path/graphs/`align'/income/capital_income" + mkdir "$path/graphs/`align'/income/pension_income" + mkdir "$path/graphs/`align'/income/disposable_income" + mkdir "$path/graphs/`align'/income/equivalised_disposable_income" + mkdir "$path/graphs/`align'/income/gross_income" + mkdir "$path/graphs/`align'/income/gross_labour_income" + mkdir "$path/graphs/`align'/income/income_shares" + mkdir "$path/graphs/`align'/inequality" + mkdir "$path/graphs/`align'/partnership" + mkdir "$path/graphs/`align'/poverty" + mkdir "$path/graphs/`align'/wages" + mkdir "$path/graphs/`align'/social_care" + +} + */ + + +/******************************************************************************* +* RUN DO FILES +*******************************************************************************/ + * Prepare simulated data + do "${dir_do_files}/01_prepare_simulated_data.do" + do "${dir_do_files}/02_create_simulated_variables.do" + + + * Plot figures + do "${dir_do_files}/04_01_plot_activity_status.do" + do "${dir_do_files}/04_02_plot_education_level.do" + do "${dir_do_files}/04_03_plot_gross_income.do" + do "${dir_do_files}/04_04_plot_gross_labour_income.do" + do "${dir_do_files}/04_05_plot_capital_income.do" + do "${dir_do_files}/04_06_plot_pension_income.do" + do "${dir_do_files}/04_07_plot_disposable_income.do" + do "${dir_do_files}/04_08_plot_equivalised_disposable_income.do" + do "${dir_do_files}/04_09_plot_hourly_wages.do" + do "${dir_do_files}/04_10_0_plot_hours_worked.do" + do "${dir_do_files}/04_10_1_plot_hours_worked_discrete.do" + do "${dir_do_files}/04_11_plot_income_shares.do" + do "${dir_do_files}/04_12_plot_partnership_status.do" + do "${dir_do_files}/04_13_plot_health.do" + do "${dir_do_files}/04_14_plot_at_risk_of_poverty.do" + do "${dir_do_files}/04_15_plot_inequality.do" + do "${dir_do_files}/04_16_plot_number_children.do" + do "${dir_do_files}/04_17_plot_disability.do" + do "${dir_do_files}/04_18_plot_social_care.do" + + + * Calculate other statistics + //do "${dir_do_files}/07_01_correlations.do" + +} diff --git a/validation/02_simulated_output_validation/do_files/01_prepare_simulated_data.do b/validation/02_simulated_output_validation/do_files/01_prepare_simulated_data.do index 305782f12..2baa6b268 100644 --- a/validation/02_simulated_output_validation/do_files/01_prepare_simulated_data.do +++ b/validation/02_simulated_output_validation/do_files/01_prepare_simulated_data.do @@ -1,146 +1,150 @@ -******************************************************************************** +/******************************************************************************* +* PROJECT: SimPaths UK * SECTION: Validation * OBJECT: Simulation data pre-processing * AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 +* LAST UPDATE: Jan 2026 * COUNTRY: UK - * DESCRIPTION: This file loads the simulated output from SimPaths and * preserves the relevant variables for validation. -* -* NOTES: ******************************************************************************** +* NOTES: Works in Stata 18 onwards - able to preserve case in var +* names. +*******************************************************************************/ +// currently hh information not used +/* * Import required variables from household file -import delimited "${dir_simulated_data}/Household.csv", clear -keep run time id_household -rename id_household idhousehold -keep if time <= ${max_year} +* Import required variables from benefit unit file +forvalues n = 1/$max_n_runs { + + import delimited "${dir_simulated_data}/run`n'/csv/Household.csv", /// + varnames(1) case(preserve) clear + + keep run time id_Household + + rename id_Household idHh + + keep if time <= ${max_year} + + save "${dir_data}/household_sim`n'", replace + +} + +* Combine runs +use "${dir_data}/household_sim1", clear + +forvalues n = 2/$max_n_runs { + + append using "${dir_data}/household_sim`n'" + +} + +save "${dir_data}/household_sim", replace + +* Tidy up +forvalues n = 1/$max_n_runs { + + erase "${dir_data}/household_sim`n'.dta" -save "${dir_simulated_data}/household_sim", replace +} +*/ * Import required variables from benefit unit file -import delimited "${dir_simulated_data}/BenefitUnit.csv", clear +forvalues n = 0/$max_n_runs { -keep run time idhousehold id_benefitunit disposableincome /// - equivaliseddisposableincomeyearl grossincomemonthly /*dhhtp_c4*/ -rename id_benefitunit idbenefitunit -keep if time <= ${max_year} + import delimited /// + "${dir_simulated_data}/${folder}_`n'/csv/BenefitUnit.csv", /// + varnames(1) case(preserve) clear -save "${dir_simulated_data}/benefitunit_sim", replace + keep run time idHh id_BenefitUnit yDispMonth yDispEquivYear yGrossMonth + + rename id_BenefitUnit idBu + + replace run = `n' + + keep if time <= ${max_sim_year} + + save "${dir_data}/benefitunit_sim`n'", replace + +} + +* Combine runs +use "${dir_data}/benefitunit_sim0", clear + +forvalues n = 1/$max_n_runs { + + append using "${dir_data}/benefitunit_sim`n'" + +} + +save "${dir_data}/benefitunit_sim", replace + +* Tidy up +forvalues n = 0/$max_n_runs { + + erase "${dir_data}/benefitunit_sim`n'.dta" + +} * Import required variables from person file -import delimited "${dir_simulated_data}/Person.csv", clear +forvalues n = 0/$max_n_runs { + + import delimited "${dir_simulated_data}/${folder}_`n'/csv/Person.csv", /// + varnames(1) case(preserve) clear + + keep run time id_Person idPartner idBu idMother idFather demAge /// + demMaleFlag demPartnerStatus labC4 eduHighestC4 /// + healthDsblLongtermFlag healthSelfRated healthMentalMcs /// + healthPhysicalPcs /// + yNonBenPersGrossMonth yEmpPersGrossMonth yCapitalPersMonth /// + yPensPersGrossMonth yMiscPersGrossMonth /// + labHrsWorkWeek labHrsWorkEnumWeek labHrsWorkWeek /// + labWageFullTimeHrly /// + careNeedFlag careHrsInformal careHrsFormal /// + careHrsProvidedWeek careFormalX + + rename id_Person idPers + + replace run = `n' + + keep if time <= ${max_sim_year} + + save "${dir_data}/person_sim`n'", replace + +} + +* Combine runs +use "${dir_data}/person_sim0", clear + +forvalues n = 1/$max_n_runs { + + append using "${dir_data}/person_sim`n'" + +} + +save "${dir_data}/person_sim", replace + +* Tidy up +forvalues n = 0/$max_n_runs { -keep run time id_person idpartner dcpst idbenefitunit dag les_c4 /// - deh_c3 dhe dgn ypnbihs_dv yplgrs_dv ypncp ypnoab dhe_mcs dhe_pcs /// - fulltimehourlyearningspotential laboursupplyweekly hoursworkedweekly /// - needsocialcare dlltsd -rename id_person idperson -keep if time <= ${max_year} - -save "${dir_simulated_data}/person_sim", replace + erase "${dir_data}/person_sim`n'.dta" +} + * Combine simulated data -use "${dir_simulated_data}/person_sim", clear - -merge m:1 run time idbenefitunit using /// - "${dir_simulated_data}/benefitunit_sim", nogen keep(matched) - -save "${dir_simulated_data}/baseline_sim", replace - - -* Add labels -order run time id* - -destring yplgrs_dv ypnbihs_dv ypncp ypnoab, replace ignore("null") - -capture label var run "Simulation repetition number" -capture label var time "year" -capture label var idhousehold "Household ID" -capture label var idbenefitunit "Benefit unit ID" -capture label var idfemale "Benefit unit responsible female ID" -capture label var idmale "Benefit unit responsible male ID" -capture label var idperson "Person ID" -capture label var idfather "Father ID" -capture label var idmother "Mother ID" -capture label var idpartner "Partner ID" -capture label var atriskofpoverty "At risk of poverty indicator using 60% of median income" -capture label var dhhtp_c4 "Benefit unit composition" -capture label var disposableincomemonthly "Disposable income, benefit unit, monthly" -capture label var equivaliseddisposableincomeyearl "Equivalised disposable income, benefit unit, yearly" -capture label var occupancy "Benefit unit occupancy (responsible persons)" -capture label var region "Region" -capture label var size "Benefit unit size" -capture label var ydses_c5 "Benefit unit gross equivalised normalised income quintile" -capture label var adultchildflag "Adult child living at home" -capture label var dag "Age" -capture label var dcpagdf "Difference in age between partners" -capture label var dcpen "Entered partnership" -capture label var dcpex "Exited partnership " -capture label var dcpst "Partnership status" -capture label var dcpyy "years in partnership" -capture label var ded "In continuous education" -capture label var deh_c3 "Education level, highest" -capture label var dehf_c3 "Father's education level, highest" -capture label var dehm_c3 "Mother's education level, highest" -capture label var dehsp_c3 "Partner's education level, highest'" -capture label var der "Returned to education" -capture label var dgn "Gender" -capture label var dhe "Self-rated health" -capture label var dhm "Pscyhological distress score" -capture label var dhm_ghq "Psychological distress case" -capture label var dhesp "Partner's self-rated health" -capture capture label var dlltsd "Long-term sick / disabled" -capture label var sedex "Left education this year" -capture capture label var women_fertility "Indicator, women aged 18 to 44 who can have children" -capture label var laboursupplyweekly "Discretized hours of labour supply, weekly" -capture label var hoursworkedweekly "Continuous hours of labour supply, weekly" -capture label var les_c4 "Activity status" -capture label var lessp_c4 "Partner's activity status" -capture label var lesdf_c4 "Own and partner's activity status (only if partnered)" -capture label var fulltimehourlyearningspotential "Potential (model-based) hourly gross wage" -capture label var sindex "Security index (5-year lead)" -capture label var sindexnormalised "Normalised security index (5-year lead)" -capture label var scaling_factor "Scaling factor (one individual represents this many in population)" -capture label var ynbcpdf_dv "Difference between (asinh of) own and spouse's gross personal non-benefit income" -capture gen yplgrs_dv_lvl = sinh(yplgrs_dv) -capture label var yplgrs_dv_lvl "Gross personal employment income" -capture label var yplgrs_dv "Gross personal employment income, asinh" -capture destring ypnbihs_dv, force replace -capture gen ypnbihs_dv_lvl = sinh(ypnbihs_dv) -capture label var ypnbihs_dv_lvl "Gross personal non-benefit income" -capture label var ypnbihs_dv "Gross personal non-benefit income, asinh" -capture gen ypncp_lvl = sinh(ypncp) -capture label var ypncp_lvl "Capital income" -capture label var ypncp "Capital income, asinh" -capture gen ypnoab_lvl = sinh(ypnoab) -capture label var ypnoab_lvl "Pension income," -capture label var ypnoab "Pension income, asinh" -capture gen yptciihs_dv_lvl = sinh(yptciihs_dv) -capture label var yptciihs_dv_lvl "Gross personal non-employment" -capture label var grossincomemonthly "Gross monthly benefit unit income" -capture label define dhe_lbl 1 "Poor" 2 "Fair" 3 "Good" 4 "VeryGood" 5 "Excellent" - -rename dhe dhe2 -encode dhe2, gen(dhe) label(dhe_lbl) -drop dhe2 -capture drop weight - -rename equivaliseddisposableincomeyearl equivalisedincome -rename time year - -keep if run <= 3 - -save "$dir_data/simulated_data_prep1.dta", replace +use "${dir_data}/person_sim", clear + +merge m:1 run time idBu using "${dir_data}/benefitunit_sim" + +save "$dir_data/loaded_simulation_data.dta", replace * Tidy up -erase "${dir_simulated_data}/household_sim.dta" -erase "${dir_simulated_data}/person_sim.dta" -erase "${dir_simulated_data}/benefitunit_sim.dta" -erase "${dir_simulated_data}/baseline_sim.dta" +//erase "${dir_data}/household_sim.dta" +erase "${dir_data}/person_sim.dta" +erase "${dir_data}/benefitunit_sim.dta" diff --git a/validation/02_simulated_output_validation/do_files/02_create_simulated_variables.do b/validation/02_simulated_output_validation/do_files/02_create_simulated_variables.do index d6c140cac..4d02d2997 100644 --- a/validation/02_simulated_output_validation/do_files/02_create_simulated_variables.do +++ b/validation/02_simulated_output_validation/do_files/02_create_simulated_variables.do @@ -1,48 +1,54 @@ /******************************************************************************* +* PROJECT: SimPaths UK * SECTION: Validation * OBJECT: Simulation data processing * AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 +* LAST UPDATE: Feb 2026 * COUNTRY: UK - * DESCRIPTION: This file creates variables from the simulated data * that are used to generate in the comparison plots. - -* NOTES: Income amounts are converted from monthly to annual. -* Two datasets are save, one containing all observations and -* one containing only the adult population (18-65 inc). +******************************************************************************** +* NOTES: Income amounts are converted from IHS to levels and from +* monthly to annual. +* Two datasets are saved, one containing all observations +* (..._full) and one containing only the adult population +* (18-65 inc). *******************************************************************************/ +* Generate/Tidy required variables + +* Load simulated panel +use "$dir_data/loaded_simulation_data.dta", clear + +** TIME -* Generate required variables +rename time year -use "$dir_data/simulated_data_prep1.dta", clear -* Sex -gen sim_sex = . -replace sim_sex = 1 if dgn == "Male" -replace sim_sex = 2 if dgn == "Female" +** DEMOGRAPHICS -la def sim_sex_lb 1 "Male" 2 "Female" -la val sim_sex sim_sex_lb +* Gender +gen demMaleFlag_coded = . +replace demMaleFlag_coded = 1 if demMaleFlag == "Male" +replace demMaleFlag_coded = 0 if demMaleFlag == "Female" -* Hours worked weekly -replace hoursworkedweekly = "" if hoursworkedweekly == "null" -destring hoursworkedweekly, replace +drop demMaleFlag +rename demMaleFlag_coded demMaleFlag +* Age * Define age groups gen ageGroup = . -replace ageGroup = 0 if dag >= 0 & dag <= 14 -replace ageGroup = 1 if dag >= 15 & dag <= 19 -replace ageGroup = 2 if dag >= 20 & dag <= 24 -replace ageGroup = 3 if dag >= 25 & dag <= 29 -replace ageGroup = 4 if dag >= 30 & dag <= 34 -replace ageGroup = 5 if dag >= 35 & dag <= 39 -replace ageGroup = 6 if dag >= 40 & dag <= 59 -replace ageGroup = 7 if dag >= 60 & dag <= 79 -replace ageGroup = 8 if dag >= 80 & dag <= 100 - -label def ageGrouplb /// +replace ageGroup = 0 if demAge >= 0 & demAge < 15 +replace ageGroup = 1 if demAge >= 15 & demAge < 20 +replace ageGroup = 2 if demAge >= 20 & demAge < 25 +replace ageGroup = 3 if demAge >= 25 & demAge < 30 +replace ageGroup = 4 if demAge >= 30 & demAge < 35 +replace ageGroup = 5 if demAge >= 35 & demAge < 40 +replace ageGroup = 6 if demAge >= 40 & demAge < 60 +replace ageGroup = 7 if demAge >= 60 & demAge < 80 +replace ageGroup = 8 if demAge >= 80 & demAge <= 100 + +label def ageGroup /// 0 "ageGroup_0_14" /// 1 "ageGroup_15_19" /// 2 "ageGroup_20_24" /// @@ -53,18 +59,18 @@ label def ageGrouplb /// 7 "ageGroup_60_79" /// 8 "ageGroup_80_100" /// -label val ageGroup ageGrouplb +label val ageGroup ageGroup gen ageGroup2 = . -replace ageGroup2 = 0 if dag >= 16 & dag <= 24 -replace ageGroup2 = 1 if dag >= 25 & dag <= 29 -replace ageGroup2 = 2 if dag >= 30 & dag <= 34 -replace ageGroup2 = 3 if dag >= 35 & dag <= 39 -replace ageGroup2 = 4 if dag >= 40 & dag <= 44 -replace ageGroup2 = 5 if dag >= 45 & dag <= 49 -replace ageGroup2 = 6 if dag >= 50 & dag <= 54 -replace ageGroup2 = 7 if dag >= 55 & dag <= 59 -replace ageGroup2 = 8 if dag >= 60 & dag <= 65 +replace ageGroup2 = 0 if demAge >= 16 & demAge < 25 +replace ageGroup2 = 1 if demAge >= 25 & demAge < 30 +replace ageGroup2 = 2 if demAge >= 30 & demAge < 35 +replace ageGroup2 = 3 if demAge >= 35 & demAge < 40 +replace ageGroup2 = 4 if demAge >= 40 & demAge < 45 +replace ageGroup2 = 5 if demAge >= 45 & demAge < 50 +replace ageGroup2 = 6 if demAge >= 50 & demAge < 55 +replace ageGroup2 = 7 if demAge >= 55 & demAge < 60 +replace ageGroup2 = 8 if demAge >= 60 & demAge <= 65 label def ageGrouplb2 /// 0 "ageGroup_16_24" /// @@ -80,35 +86,43 @@ label def ageGrouplb2 /// label val ageGroup2 ageGrouplb2 -* Partnership status -gen sim_dcpst_p = (dcpst == "Partnered") // partnered -gen sim_dcpst_snm = (dcpst == "SingleNeverMarried") // single never married -gen sim_dcpst_prvp = (dcpst == "PreviouslyPartnered") // previously partnered -gen sim_dcpst_snmprvp = (dcpst == "SingleNeverMarried" | /// - dcpst == "PreviouslyPartnered") - // single never married & previously partnered +* Max benefit unit age +bys run year idBu: egen demAgeBuMax = max(demAge) -replace idpartner = "" if idpartner == "null" -destring idpartner , replace -gen sim_has_partner = (idpartner != .) +* Partnership status +gen demPartnerStatus_coded = . +replace demPartnerStatus_coded = 1 if demPartnerStatus == "Partnered" +replace demPartnerStatus_coded = 2 if demPartnerStatus == "Single" +drop demPartnerStatus +rename demPartnerStatus_coded demPartnerStatus + +gen sim_partnered = (demPartnerStatus == 1) +gen sim_single = (demPartnerStatus == 2) + +replace idPartner = "" if idPartner == "null" +destring idPartner , replace +gen sim_has_partner = (idPartner != .) * Number of children -gen child = (dag < $age_become_responsible) -bys run year idbenefitunit: egen sim_dnc = total(child) +gen child = (demAge < ${age_become_responsible}) +bys run year idBu: egen sim_demNChild = total(child) -gen child02 = (dag < 3) -bys run year idbenefitunit: egen sim_dnc02 = total(child02) +gen child02 = (demAge < 3) +bys run year idBu: egen sim_demNChild0to2 = total(child02) -gen children_0 = (sim_dnc == 0) -gen children_1 = (sim_dnc == 1) -gen children_2 = (sim_dnc == 2) -gen children_3plus = (sim_dnc >= 3) +gen child00 = (demAge < 1) +bys run year idBu: egen sim_demNChild0 = total(child00) +gen children_0 = (sim_demNChild == 0) +gen children_1 = (sim_demNChild == 1) +gen children_2 = (sim_demNChild == 2) +gen children_3plus = (sim_demNChild >= 3) * Interact partnership status and number of children -foreach var1 in sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp { - foreach var2 in children_0 children_1 children_2 children_3p { +foreach var1 in sim_partnered sim_single { + + foreach var2 in children_0 children_1 children_2 children_3plus { gen `var1'_`var2' = (`var1' & `var2') @@ -116,78 +130,226 @@ foreach var1 in sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp { } -* Economic activity dummies -gen sim_employed = (les_c4 == "EmployedOrSelfEmployed") -gen sim_student = (les_c4 == "Student") -gen sim_inactive = (les_c4 == "NotEmployed") -gen sim_retired = (les_c4 == "Retired") +** EDUCATION +* Education dummies +gen sim_edu_na = (eduHighestC4 == "InEducation") +gen sim_edu_high = (eduHighestC4 == "High") +gen sim_edu_med = (eduHighestC4 == "Medium") +gen sim_edu_low = (eduHighestC4 == "Low") + +** HEALTH * Disabled / LT sick -gen sim_dlltsd = (dlltsd == "True") +gen sim_healthDsblLongtermFlag = (healthDsblLongtermFlag == "True") + +* Self rated heath +gen sim_healthSelfRated = . +replace sim_healthSelfRated = 1 if healthSelfRated == "Poor" +replace sim_healthSelfRated = 2 if healthSelfRated == "Fair" +replace sim_healthSelfRated = 3 if healthSelfRated == "Good" +replace sim_healthSelfRated = 4 if healthSelfRated == "VeryGood" +replace sim_healthSelfRated = 5 if healthSelfRated == "Excellent" + +lab def sim_healthSelfRated 1 "Poor" 2 "Fair" 3 "Good" 4 "VeryGood" /// + 5 "Excellent" + +lab val sim_healthSelfRated sim_healthSelfRated + +* MCS +gen sim_healthMentalMcs = healthMentalMcs + +* PCS +gen sim_healthPhysicalPcs = healthPhysicalPcs -* Education dummies -gen sim_edu_high = (deh_c3 == "High") -gen sim_edu_med = (deh_c3 == "Medium") -gen sim_edu_low = (deh_c3 == "Low") -* Hours of work -rename laboursupplyweekly laboursupplyweekly_orig +** LABOUR MARKET -gen lhw = . -replace lhw = 0 if laboursupplyweekly == "ZERO" -replace lhw = 10 if laboursupplyweekly == "TEN" -replace lhw = 20 if laboursupplyweekly == "TWENTY" -replace lhw = 30 if laboursupplyweekly == "THIRTY" -replace lhw = 40 if laboursupplyweekly == "FORTY" +* Economic activity dummies +gen sim_employed = (labC4 == "EmployedOrSelfEmployed") +gen sim_student = (labC4 == "Student") +gen sim_inactive = (labC4 == "NotEmployed") +gen sim_retired = (labC4 == "Retired") + +* Hours worked weekly (continuous) +gen sim_labHrsWorkWeek = labHrsWorkWeek + +* Hours worked weekly (categories) +/* +"This version uses 7 labour supply alternatives:") +("0 hours ==> non-employment alternative.") +("10 hours ==> 6–15 hours bracket.") +("20 hours ==> 16–25 hours bracket.") +("30 hours ==> 26–35 hours bracket.") +("38 hours ==> 36–40 hours bracket.") +("45 hours ==> 41–49 hours bracket.") +("55 hours ==> 50+ hours bracket.") +*/ +gen sim_labHrsWorkEnumWeek = labHrsWorkEnumWeek + +gen sim_labHrsWorkEnum_no = . +replace sim_labHrsWorkEnum_no = 0 if labHrsWorkEnumWeek == "ZERO" +replace sim_labHrsWorkEnum_no = 10 if labHrsWorkEnumWeek == "TEN" +replace sim_labHrsWorkEnum_no = 20 if labHrsWorkEnumWeek == "TWENTY" +replace sim_labHrsWorkEnum_no = 30 if labHrsWorkEnumWeek == "THIRTY" +replace sim_labHrsWorkEnum_no = 38 if labHrsWorkEnumWeek == "THIRTY_EIGHT" +replace sim_labHrsWorkEnum_no = 45 if labHrsWorkEnumWeek == "FORTY_FIVE" +replace sim_labHrsWorkEnum_no = 55 if labHrsWorkEnumWeek == "FIFTY_FIVE" + +* Categorical variable +gen sim_cat_hours = . +replace sim_cat_hours = 1 if labHrsWorkEnumWeek == "ZERO" +replace sim_cat_hours = 2 if labHrsWorkEnumWeek == "TEN" +replace sim_cat_hours = 3 if labHrsWorkEnumWeek == "TWENTY" +replace sim_cat_hours = 4 if labHrsWorkEnumWeek == "THIRTY" +replace sim_cat_hours = 5 if labHrsWorkEnumWeek == "THIRTY_EIGHT" +replace sim_cat_hours = 6 if labHrsWorkEnumWeek == "FORTY_FIVE" +replace sim_cat_hours = 7 if labHrsWorkEnumWeek == "FIFTY_FIVE" + +tab labHrsWorkEnumWeek year + + +* Hourly wage +gen sim_pred_wage = labWageFullTimeHrly + + +** INCOME (ANNUAL) +/* +Amounts of personal income stored with the IHS trasnformation. +Benefit Unit level measure (gross and dispoable income) are stored without the +transformation. +*/ + +* Destring individual amounts +destring yNonBenPersGrossMonth yEmpPersGrossMonth yCapitalPersMonth /// + yPensPersGrossMonth, replace ignore("null" "NaN") + + +* Annual individual gross employment income +* Convert to levels +gen yEmpPersGrossLevelMonth = sinh(yEmpPersGrossMonth) +* Convert to annual +gen sim_yEmpPersGrossLevelYear = yEmpPersGrossLevelMonth * 12 + +* Annual benefit unit gross employment income +bys run year idBu: egen sim_yEmpBuGrossLevelYear = /// + total(sim_yEmpPersGrossLevelYear) + + +* Annual individual capital income +* Convert to levels +gen yCapitalPersLevelMonth = sinh(yCapitalPersMonth) +* Convert to annual +gen sim_yCapitalPersLevelYear = yCapitalPersLevelMonth * 12 -* Potential earnings -rename fulltimehourlyearningspotential potential_earnings_hourly +* Annual benefit unit capital income +bys run year idBu: egen sim_yCapitalBuLevelYear = /// + total(sim_yCapitalPersLevelYear) -* Annual benefit unit disposable and gross income -gen sim_y_disp_yr_bu = disposableincomemonthly * 12 -gen sim_y_gross_yr_bu = grossincomemonthly * 12 +* Annual individual gross private pension income +* Convert to levels +gen yPensPersGrossLevelMonth = sinh(yPensPersGrossMonth) +* Convert to annual +gen sim_yPensPersGrossLevelYear = yPensPersGrossLevelMonth * 12 + +* Annual benefit unit gross private pension income +bys run year idBu: egen sim_yPensBuGrossLevelYear = /// + total(sim_yPensPersGrossLevelYear) + + +* Annual individual gross non-benefit income +* Converts to levels +gen yNonBenPersGrossLevelMonth = sinh(yNonBenPersGrossMonth) +* Convert to annual +gen sim_yNonBenPersGrossLevelYear = sim_yPensPersGrossLevelYear + /// + sim_yCapitalPersLevelYear + sim_yEmpPersGrossLevelYear + +* Annual benefit unit gross non-benefit income +gen sim_yNonBenBuGrossLevelYear = sim_yPensBuGrossLevelYear + /// + sim_yCapitalBuLevelYear + sim_yEmpBuGrossLevelYear -* Annual individual gross income -* Combine employment income, pension income and capital income -foreach var in yplgrs_dv_lvl ypncp_lvl ypnoab_lvl { - replace `var' = `var' * 12 - rename `var' sim_`var' +* Annual benefit unit gross income (level, non-benefit) +/* +Note this should be the same as sim_yNonBenBuGrossLevelYear +*/ -} +gen sim_yGrossBuLevelMonth = yGrossMonth +gen sim_yGrossBuLevelYear = sim_yGrossBuLevelMonth * 12 + +* Check +gen diff = sim_yGrossBuLevelYear - sim_yNonBenBuGrossLevelYear -gen sim_y_gross_yr = sim_yplgrs_dv_lvl + sim_ypnoab_lvl + sim_ypncp_lvl +hist diff +// ISSUE => Use constructed BU gross non-benefit income -* Annual benefit unit employment income, pension income and capital income -foreach observed_var in sim_yplgrs_dv_lvl sim_ypnoab_lvl sim_ypncp_lvl { +replace sim_yGrossBuLevelYear = sim_yNonBenBuGrossLevelYear + +gen sim_yGrossPersLevelYear = sim_yNonBenPersGrossLevelYear + + +* Annual benefit unit disposable (level) +gen yDispBuLevelMonth = yDispMonth +gen sim_yDispBuLevelYear = yDispBuLevelMonth * 12 + + +* Annual benefit unit equivlaized disposable income (BU, level) +gen sim_yDispEquivYear = yDispEquivYear - bys run year idbenefitunit: egen `observed_var'_bu = total(`observed_var') + +* Benefit unit - Net transfers +gen sim_net_transfers = sim_yDispBuLevelYear - sim_yNonBenBuGrossLevelYear + + +** SOCIAL CARE +/* +Demand variables populated with 0 if age < 65. +Supply variables populated with 0 if age < 18 +*/ +* Hours of informal care received +rename careHrsInformal sim_careHrsInformal -} +* Hours of formal care received +rename careHrsFormal sim_careHrsFormal + +* Cost of formal care +rename careFormalX sim_careFormalX + +* Hours of care provided +rename careHrsProvidedWeek sim_careHrsProvidedWeek + +* Need care flag +gen sim_careNeedFlag = (careNeedFlag == "True") -* Max beneift unit age -bys run year idbenefitunit: egen max_age_in_bu = max(dag) +* Receive care flag +gen sim_careReceiveFlag = (sim_careHrsInformal > 0 | sim_careHrsFormal > 0) +* Receive only formal care flag +gen sim_careRecFormalOnly = /// + (sim_careHrsInformal == 0 & sim_careHrsFormal > 0) -* Save full population -preserve +* Receive only informal care flag +gen sim_careRecInformalOnly = /// + (sim_careHrsInformal > 0 & sim_careHrsFormal == 0) -* Restrict sample to observations up to and including specified maximum year -keep if year <= $max_year +* Receive both informal and formal care flag +gen sim_careRecMix = /// + (sim_careHrsInformal > 0 & sim_careHrsFormal > 0) -save "$dir_data/simulated_data_full.dta", replace +* Total care hours received +gen sim_careReceiveHrs = sim_careHrsInformal + sim_careHrsFormal -restore +* Provide care flag +gen sim_careProvideFlag = (sim_careHrsProvidedWeek > 0) +* Restrict sample to relevant valdiation years +keep if year >= ${min_sim_year} +keep if year <= ${max_sim_year} -* Restrict sample to individuals between min and max age defined in 00_master -keep if dag >= $min_age & dag <= $max_age +drop diff -* Restrict sample to observations up to and including specified maximum year -keep if year <= $max_year +save "$dir_data/simulation_sample.dta", replace -save "$dir_data/simulated_data.dta", replace -//erase "$dir_data/simulated_data_prep1.dta" +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/03_create_UKHLS_validation_targets.do b/validation/02_simulated_output_validation/do_files/03_create_UKHLS_validation_targets.do new file mode 100644 index 000000000..6e282eb66 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/03_create_UKHLS_validation_targets.do @@ -0,0 +1,472 @@ +/******************************************************************************* +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Validation data processing +* AUTHORS: Ashley Burdett +* LAST UPDATE: Feb 2026 +* COUNTRY: UK +* DESCRIPTION: This file creates the validation target variables +* using UKHLS initial populations data. +******************************************************************************** +* NOTES: The income amounts in annual terms. +* Currently construct gross income from components. +* +*******************************************************************************/ + +* Generate/Tidy required variables + +* Load UKHLS data +use "${dir_UKHLS_data}/UKHLS_pooled_ipop.dta", clear + + +** IDENTIFIERS +rename idhh idHh +rename idbenefitunit idBu +rename idperson idPers +rename idpartner idPartner +rename idmother idMother +rename idfather idFather +rename swv statCollectionWave + + +** TIME + +fre stm + +gen year = stm +//replace year = year + 2000 + + +** DEMOGRAPHICS + +* Gender +rename dgn demMaleFlag + +replace demMaleFlag = . if demMaleFlag < 0 + + +* Age +rename dag demAge + +replace demAge = . if demAge < 0 + +* Define age groups +gen ageGroup = . +replace ageGroup = 0 if demAge >= 0 & demAge < 15 +replace ageGroup = 1 if demAge >= 15 & demAge < 20 +replace ageGroup = 2 if demAge >= 20 & demAge < 25 +replace ageGroup = 3 if demAge >= 25 & demAge < 30 +replace ageGroup = 4 if demAge >= 30 & demAge < 35 +replace ageGroup = 5 if demAge >= 35 & demAge < 40 +replace ageGroup = 6 if demAge >= 40 & demAge < 60 +replace ageGroup = 7 if demAge >= 60 & demAge < 80 +replace ageGroup = 8 if demAge >= 80 & demAge <= 100 + +label def ageGrou /// + 0 "ageGroup_0_14" /// + 1 "ageGroup_15_19" /// + 2 "ageGroup_20_24" /// + 3 "ageGroup_25_29" /// + 4 "ageGroup_30_34" /// + 5 "ageGroup_35_39" /// + 6 "ageGroup_40_59" /// + 7 "ageGroup_60_79" /// + 8 "ageGroup_80_100" /// + +label val ageGroup ageGroup +fre ageGroup + +gen ageGroup2 = . +replace ageGroup2 = 0 if demAge >= 16 & demAge < 25 +replace ageGroup2 = 1 if demAge >= 25 & demAge < 30 +replace ageGroup2 = 2 if demAge >= 30 & demAge < 35 +replace ageGroup2 = 3 if demAge >= 35 & demAge < 40 +replace ageGroup2 = 4 if demAge >= 40 & demAge < 45 +replace ageGroup2 = 5 if demAge >= 45 & demAge < 50 +replace ageGroup2 = 6 if demAge >= 50 & demAge < 55 +replace ageGroup2 = 7 if demAge >= 55 & demAge < 60 +replace ageGroup2 = 8 if demAge >= 60 & demAge <= 65 + +label def ageGrouplb2 /// + 0 "ageGroup_16_24" /// + 1 "ageGroup_25_29" /// + 2 "ageGroup_30_34" /// + 3 "ageGroup_35_39" /// + 4 "ageGroup_40_44" /// + 5 "ageGroup_45_49" /// + 6 "ageGroup_50_54" /// + 7 "ageGroup_55_59" /// + 8 "ageGroup_60_65" /// + +label val ageGroup2 ageGrouplb2 +fre ageGroup2 + + +* Partnership status +rename dcpst demPartnerStatus + +gen valid_partnered = (demPartnerStatus == 1) +gen valid_single = (demPartnerStatus == 2) + +replace valid_partnered = . if demPartnerStatus == . | demPartnerStatus < 0 +replace valid_single = . if demPartnerStatus == . | demPartnerStatus < 0 + + +* Number of children +rename dnc demNChild +rename dnc02 demNChild0to2 // same within a BU + +gen new_born = (demAge == 0 & demNChild0to2 != 0) +bysort idBu year (new_born): gen demNChild0 = (new_born[_N] == 1) + +gen children_0 = (demNChild == 0) +gen children_1 = (demNChild == 1) +gen children_2 = (demNChild == 2) +gen children_3plus = (demNChild >= 3 & demNChild != .) + + +* Interaction of partnership status and number of children +foreach var1 in valid_partnered valid_single { + + foreach var2 in children_0 children_1 children_2 children_3plus { + + gen `var1'_`var2' = (`var1' & `var2') + + } + +} + + +** EDUCATION +* Education dummies +rename deh_c4 eduHighestC4 + +gen valid_edu_na = (eduHighestC4 == 0) +gen valid_edu_high = (eduHighestC4 == 1) +gen valid_edu_med = (eduHighestC4 == 2) +gen valid_edu_low = (eduHighestC4 == 3) + +replace valid_edu_na = . if eduHighestC4 == . | eduHighestC4 < 0 +replace valid_edu_high = . if eduHighestC4 == . | eduHighestC4 < 0 +replace valid_edu_med = . if eduHighestC4 == . | eduHighestC4 < 0 +replace valid_edu_low = . if eduHighestC4 == . | eduHighestC4 < 0 + + +** HEALTH + +* Disabed / LT sick +rename dlltsd01 healthDsblLongtermFlag + +gen valid_healthDsblLongtermFlag = healthDsblLongtermFlag +replace valid_healthDsblLongtermFlag = . if valid_healthDsblLongtermFlag < 0 + + +* Self rated health +rename dhe valid_healthSelfRated + +replace valid_healthSelfRated = . if valid_healthSelfRated < 0 + +* MCS +rename dhe_mcs valid_healthMentalMcs + +replace valid_healthMentalMcs = . if valid_healthMentalMcs < 0 + +* PCS +rename dhe_pcs valid_healthPhysicalPcs + +replace valid_healthPhysicalPcs = . if valid_healthPhysicalPcs < 0 + + +** LABOUR MARKET + +* Economic activity dummies +rename les_c3 labC3 +rename les_c4 labC4 + +gen valid_employed = (labC4 == 1) +gen valid_student = (labC4 == 2) +gen valid_inactive = (labC4 == 3) +gen valid_retired = (labC4 == 4) + +replace valid_employed = . if labC4 < 0 | labC4 == . +replace valid_student = . if labC4 < 0 | labC4 == . +replace valid_inactive = . if labC4 < 0 | labC4 == . +replace valid_retired = . if labC4 < 0 | labC4 == . + +* Hours worked weekly (continuous) +rename lhw labHrsWorkWeek + +gen valid_labHrsWorkWeek = labHrsWorkWeek + +* Hours workd weekly (categories) +/* +"This version uses 7 labour supply alternatives:") +("0 hours ==> non-employment alternative.") +("10 hours ==> 6–15 hours bracket.") +("20 hours ==> 16–25 hours bracket.") +("30 hours ==> 26–35 hours bracket.") +("38 hours ==> 36–40 hours bracket.") +("45 hours ==> 41–49 hours bracket.") +("55 hours ==> 50+ hours bracket.") +*/ +gen valid_labHrsWorkEnumWeek = "ZERO" +replace valid_labHrsWorkEnumWeek = "TEN" if /// + labHrsWorkWeek >= 6 & labHrsWorkWeek <= 15 +replace valid_labHrsWorkEnumWeek = "TWENTY" if /// + labHrsWorkWeek > 15 & labHrsWorkWeek <= 25 +replace valid_labHrsWorkEnumWeek = "THIRTY" if /// + labHrsWorkWeek > 25 & labHrsWorkWeek <= 35 +replace valid_labHrsWorkEnumWeek = "THIRTY_EIGHT" if /// + labHrsWorkWeek > 35 & labHrsWorkWeek <= 40 +replace valid_labHrsWorkEnumWeek = "FORTY_FIVE" if /// + labHrsWorkWeek > 40 & labHrsWorkWeek <= 49 +replace valid_labHrsWorkEnumWeek = "FIFTY_FIVE" if /// + labHrsWorkWeek > 49 & labHrsWorkWeek != . + +gen valid_labHrsWorkEnum_no = . +replace valid_labHrsWorkEnum_no = 0 if valid_labHrsWorkEnumWeek == "ZERO" +replace valid_labHrsWorkEnum_no = 10 if valid_labHrsWorkEnumWeek == "TEN" +replace valid_labHrsWorkEnum_no = 20 if valid_labHrsWorkEnumWeek == "TWENTY" +replace valid_labHrsWorkEnum_no = 30 if valid_labHrsWorkEnumWeek == "THIRTY" +replace valid_labHrsWorkEnum_no = 38 if valid_labHrsWorkEnumWeek == "THIRTY_EIGHT" +replace valid_labHrsWorkEnum_no = 45 if valid_labHrsWorkEnumWeek == "FORTY_FIVE" +replace valid_labHrsWorkEnum_no = 55 if valid_labHrsWorkEnumWeek == "FIFTY_FIVE" + +* Categorical variable +gen valid_cat_hours = . +replace valid_cat_hours = 1 if valid_labHrsWorkEnumWeek == "ZERO" +replace valid_cat_hours = 2 if valid_labHrsWorkEnumWeek == "TEN" +replace valid_cat_hours = 3 if valid_labHrsWorkEnumWeek == "TWENTY" +replace valid_cat_hours = 4 if valid_labHrsWorkEnumWeek == "THIRTY" +replace valid_cat_hours = 5 if valid_labHrsWorkEnumWeek == "THIRTY_EIGHT" +replace valid_cat_hours = 6 if valid_labHrsWorkEnumWeek == "FORTY_FIVE" +replace valid_cat_hours = 7 if valid_labHrsWorkEnumWeek == "FIFTY_FIVE" + +* Hourly wage +// obs_earnings_hourly - alternative only containing observed wages +// pred_hourly_wage - alternative containing predicted wages +gen valid_wage = obs_earnings_hourly + + +** INCOME (ANNUAL) +/* +Amounts of personal income stored with the IHS transformation. +Benefit Unit level measure (gross and disposable income) are stored without the +transformation. + +No missing observations in income amounts. +*/ + +* Annual individual employment gross income +rename yplgrs_dv yEmpPersGrossMonth + +sum yEmpPersGrossMonth +count if yEmpPersGrossMonth == . + +* Convert to levels +gen yEmpPersGrossLevelMonth = sinh(yEmpPersGrossMonth) +* Convert to annual +gen valid_yEmpPersGrossLevelYear = yEmpPersGrossLevelMonth * 12 + +* Annual benefit unit gross employment income +bys year idBu: egen valid_yEmpBuGrossLevelYear = /// + total(valid_yEmpPersGrossLevelYear) + + +* Annual individual capital income +rename ypncp yCapitalPersMonth + +sum yCapitalPersMonth +count if yCapitalPersMonth == . + +* Convert to levels +gen yCapitalPersLevelMonth = sinh(yCapitalPersMonth) +* Convert to annual +gen valid_yCapitalPersLevelYear = yCapitalPersLevelMonth * 12 + +* Annual benefit unit capital income +bys year idBu: egen valid_yCapitalBuLevelYear = /// + total(valid_yCapitalPersLevelYear) + + +* Annual individual gross private pension income +rename ypnoab yPensPersGrossMonth + +sum yPensPersGrossMonth +count if yPensPersGrossMonth == . + +* Convert to levels +gen yPensPersGrossLevelMonth = sinh(yPensPersGrossMonth) +* Convert to annual +gen valid_yPensPersGrossLevelYear = yPensPersGrossLevelMonth * 12 + +* Annual benefit unit gross private pension income +bys year idBu: egen valid_yPensBuGrossLevelYear = /// + total(valid_yPensPersGrossLevelYear) + + +* Annual individual gross non-benefit income +/* +rename ypnbihs_dv yNonBenPersGrossMonth +* Convert to levels +gen yNonBenPersGrossLevelMonth = sinh(yNonBenPersGrossMonth) +* Convert to annual +gen valid_yNonBenPersGrossLevelYear = yNonBenPersGrossLevelMonth * 12 +*/ +egen valid_yNonBenPersGrossLevelYear = /// + rowtotal(valid_yPensPersGrossLevelYear valid_yCapitalPersLevelYear /// + valid_yEmpPersGrossLevelYear) + +* Annual benefit unit gross non-benefit income +bys year idBu: egen valid_yNonBenBuGrossLevelYear = /// + total(valid_yNonBenPersGrossLevelYear) + + +* Annual benefit unit gross income (level, non-benefit) +/* +Gross income is the same as non-benefit private income. +*/ +gen valid_yGrossBuLevelYear = valid_yNonBenBuGrossLevelYear + +gen valid_yGrossPersLevelYear = valid_yNonBenPersGrossLevelYear + + +* Annual benefit unit disposable income (level) +rename ydisp yDispPersMonth +* Convert to annual +gen valid_yDispPersYear = yDispPersMonth * 12 + +* Convert to benefit unit +bys year idBu: egen valid_yDispBuLevelYear = total(valid_yDispPersYear) + + +* Benefit unit - Net transfers +gen valid_net_transfers = valid_yDispBuLevelYear - valid_yNonBenBuGrossLevelYear + + +* Equivalised disposable income per benefit unit + +* Compute equivalence scale +* Idenifty types of children +gen is_older_child = 1 if inrange(demAge,14,18) & (idMother < . | idFather < .) +gen is_child = 1 if demAge < 14 & (idMother < . | idFather < .) + +* Sum up number in hh +bysort idHh: egen num_older_children = total(is_older_child) +bysort idHh: egen num_children = total(is_child) + +* Compute Modified OECD equivalence scale + +gen moecd_eq = . +replace moecd_eq = 1.5 if dhhtp_c4 == 1 +replace moecd_eq = 0.3 * num_children + 0.5 * num_older_children + 1.5 if /// + dhhtp_c4 == 2 +replace moecd_eq = 1 if dhhtp_c4 == 3 +replace moecd_eq = 0.3 * num_children + 0.5 * num_older_children + 1 if /// + dhhtp_c4 == 4 + + +* Apply equivalence scale +gen valid_yDispBuEquivYear = valid_yDispBuLevelYear / moecd_eq + +drop is_older_child is_child moecd_eq + + +** SOCIAL CARE +/* +To align with the simulation set valud of those who aren't eligable equal to 0. +Demand variables populated with 0 if age < 65. +Supply variables populated with 0 if age < 18. +Missing = . +Note: If missing any care demand info age > 64, then missing all. +*/ + +* Care need flag +rename need_socare valid_careNeedFlag + +replace valid_careNeedFlag = 0 if demAge < 65 +replace valid_careNeedFlag = . if valid_careNeedFlag == -9 + +* Hours of informal care received +/* +replace partner_socare_hrs = 0 if demAge < 65 +replace daughter_socare_hrs = 0 if demAge < 65 +replace son_socare_hrs = 0 if demAge < 65 +replace other_socare_hrs = 0 if demAge < 65 + +replace partner_socare_hrs = . if partner_socare_hrs == -9 +replace daughter_socare_hrs = . if daughter_socare_hrs == -9 +replace son_socare_hrs = . if son_socare_hrs == -9 +replace other_socare_hrs = . if other_socare_hrs == -9 + +egen valid_careHrsInformal = rowtotal(partner_socare_hrs /// + daughter_socare_hrs son_socare_hrs other_socare_hrs) + +replace valid_careHrsInformal = . if partner_socare_hrs == . | /// + daughter_socare_hrs == . | son_socare_hrs == . | other_socare_hrs == . +*/ +rename informal_socare_hrs valid_careHrsInformal +replace valid_careHrsInformal = 0 if demAge < 65 +replace valid_careHrsInformal = . if valid_careHrsInformal == -9 + +* Hours of formal care +rename formal_socare_hrs valid_careHrsFormal +replace valid_careHrsFormal = 0 if demAge < 65 +replace valid_careHrsFormal = 0 if valid_careHrsFormal == -9 + +* Formal care cost +rename formal_socare_cost valid_careFormalX +replace valid_careFormalX = 0 if demAge < 65 +replace valid_careFormalX = . if valid_careFormalX < 0 + +* Hours of care provided +rename careHoursProvidedWeekly valid_careHrsProvidedWeek +replace valid_careHrsProvidedWeek = 0 if demAge < 16 +replace valid_careHrsProvidedWeek = . if valid_careHrsProvidedWeek == -9 + + +* Receive care flag +gen valid_careReceiveFlag = . +replace valid_careReceiveFlag = 0 if valid_careHrsInformal != . | /// + valid_careHrsFormal != . +replace valid_careReceiveFlag = 1 if /// + (valid_careHrsInformal > 0 & valid_careHrsInformal != .)| /// + (valid_careHrsFormal > 0 & valid_careHrsFormal != . ) +replace valid_careReceiveFlag = 0 if demAge < 65 + + +* Receive only formal care flag +gen valid_careRecFormalOnly = /// + (valid_careHrsInformal == 0 & valid_careHrsFormal > 0 & /// + valid_careHrsFormal != .) + +* Receive only informal care flag +gen valid_careRecInformalOnly = /// + (valid_careHrsInformal > 0 & valid_careHrsInformal != . & /// + valid_careHrsFormal == 0) + +* Receive both informal and formal care flag +gen valid_careRecMix = (valid_careHrsInformal > 0 & /// + valid_careHrsFormal > 0 & valid_careHrsInformal != . & /// + valid_careHrsFormal != .) + +* Total care hours received +egen valid_careReceiveHrs = rowtotal(valid_careHrsInformal valid_careHrsFormal) +replace valid_careReceiveHrs = 0 if demAge < 65 +replace valid_careReceiveHrs = . if valid_careHrsInformal == . | /// + valid_careHrsFormal == . + + +* Provide care flag +gen valid_careProvideFlag = (valid_careHrsProvidedWeek > 0 & /// + valid_careHrsProvidedWeek != . ) +replace valid_careProvideFlag = . if valid_careHrsProvidedWeek == . + +* Restrict sample to relevant valdiation years +drop if year < ${min_sim_year} +drop if year > ${max_sim_year} + +save "$dir_data/ukhls_validation_sample.dta", replace + + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/03_prepare_UKHLS_data.do b/validation/02_simulated_output_validation/do_files/03_prepare_UKHLS_data.do deleted file mode 100644 index 741e9a0e6..000000000 --- a/validation/02_simulated_output_validation/do_files/03_prepare_UKHLS_data.do +++ /dev/null @@ -1,55 +0,0 @@ -/******************************************************************************* -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Validation data pre-processing -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 -* COUNTRY: UK - -* DESCRIPTION: This file obtains the monthly individual disposable income -* variable from the UKHLS to merge into the processed data - -* NOTES: Unlike with the European models, we use the same data -* that was used to estimate the transition parameters. -* For now use the exact same dataset that does not drop -* household that have some missing information and thus the -* weights have not yet been adjusted. -*******************************************************************************/ -clear all - -* Will use the below file which is used to estimate the transition parameters -* This is before missing values are dropped and weights adjusted -//"$dir_data/ukhls_pooled_all_obs_09.dta" - - - -* Prepare dataset with dispoable income to merge in -use "$dir_data/ukhls_pooled_all_obs_01.dta", clear - -* Drop booster sample -drop if hhorig == 8 - -* From initial populations 2_ script - -lab var swv "Data collection wave" - -* Year -gen stm = intdaty_dv -la var stm "Interview year" - -* Interview date -gen Int_Date = mdy(intdatm_dv, intdatd_dv ,intdaty_dv) -format Int_Date %d - -* Household ID -clonevar idhh = hidp -la var idhh "Household identifier" - -* Individal ID -clonevar idperson = pidp -lab var idperson "Unique cross wave identifier" - -keep idperson swv fimnnet_dv - -save "$dir_data/ukhls_ind_dispos_inc.dta", replace - diff --git a/validation/02_simulated_output_validation/do_files/04_01_plot_activity_status.do b/validation/02_simulated_output_validation/do_files/04_01_plot_activity_status.do new file mode 100644 index 000000000..a4116545e --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_01_plot_activity_status.do @@ -0,0 +1,2286 @@ +/******************************************************************************* +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Economic Activity Status plots +* AUTHORS: Ashley Burdett +* LAST UPDATE: 9/25 +* COUNTRY: UK +* DESCRIPTION: This do file plots validation graphs for economic activity +* status (4 cat). +******************************************************************************** +* NOTES: +*******************************************************************************/ + +clear all + +******************************************************************************** +* 0 : Programmes +******************************************************************************** + +* Time series plot, all activity statuses +cap program drop make_activity_plot + +program define make_activity_plot + syntax, subtitle(string) saving(string) note(string) + + twoway /// + (rarea sim_employed_high sim_employed_low year, sort color(green%20) legend(label(1 "Employed, SimPaths"))) /// + (line valid_employed year, sort color(green) legend(label(2 "Employed, UKHLS"))) /// + (rarea sim_student_high sim_student_low year, sort color(blue%20) legend(label(3 "Students, SimPaths"))) /// + (line valid_student year, sort color(blue) legend(label(4 "Students, UKHLS"))) /// + (rarea sim_inactive_high sim_inactive_low year, sort color(red%20) legend(label(5 "Non-employed, SimPaths"))) /// + (line valid_inactive year, sort color(red) legend(label(6 "Non-employed, UKHLS"))) /// + (rarea sim_retired_high sim_retired_low year, sort color(grey%20) legend(label(7 "Retired, SimPaths"))) /// + (line valid_retired year, sort color(grey) legend(label(8 "Retired, UKHLS"))), /// + title("Economic Activity Status") /// + subtitle("`subtitle'") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + graphregion(color(white)) /// + legend(size(small)) /// + note(`note', size(vsmall)) + + graph export "$dir_output_files/economic_activity/`saving'.jpg", replace width(2400) height(1350) quality(100) + +end + + +* Time series plot, non-employed statuses only +* Time series plot all +cap program drop make_activity_ne_plot + +program define make_activity_ne_plot + syntax, subtitle(string) saving(string) note(string) + + twoway /// + (rarea sim_student_high sim_student_low year, sort color(blue%20) legend(label(1 "Students, SimPaths"))) /// + (line valid_student year, sort color(blue) legend(label(2 "Students, UKHLS"))) /// + (rarea sim_inactive_high sim_inactive_low year, sort color(red%20) legend(label(3 "Non-employed, SimPaths"))) /// + (line valid_inactive year, sort color(red) legend(label(4 "Non-employed, UKHLS"))) /// + (rarea sim_retired_high sim_retired_low year, sort color(grey%20) legend(label(5 "Retired, SimPaths"))) /// + (line valid_retired year, sort color(grey) legend(label(6 "Retired, UKHLS"))), /// + title("Non-Employed Economic Activity Status") /// + subtitle("`subtitle'") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + graphregion(color(white)) /// + legend(size(small)) /// + note(`note', size(vsmall)) + + graph export "$dir_output_files/economic_activity/`saving'.jpg", replace width(2400) height(1350) quality(100) +end + + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** +******************************************************************************** +* 1.1 : Mean values over time - Economic activity status +******************************************************************************** +******************************************************************************** +* 1.1.1 : Young people (16-30) +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", /// + clear + +drop if demAge > 30 +drop if demAge < 16 + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demAge using /// + "$dir_data/simulation_sample.dta", clear + +drop if demAge > 30 +drop if demAge < 16 + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +* Compute 95% confidence interval +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +make_activity_plot, /// + subtitle("Ages 16-30") /// + saving("validation_${country}_activity_status_ts_16_30_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + + +******************************************************************************** +* 1.1.1.1 : Young people (16-30), by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demAge demMaleFlag /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", /// + clear + +drop if demAge > 30 +drop if demAge < 16 + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demAge /// + demMaleFlag using "$dir_data/simulation_sample.dta", clear + +drop if demAge > 30 +drop if demAge < 16 + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +* Compute 95% confidence interval +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +* Males +preserve + +keep if demMaleFlag == 1 + +make_activity_plot, /// + subtitle("Ages 16-30, males") /// + saving("validation_${country}_activity_status_ts_16_30_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +restore + +* Female + +keep if demMaleFlag == 0 + +* Plot figure +make_activity_plot, /// + subtitle("Ages 16-30, females") /// + saving("validation_${country}_activity_status_ts_16_30_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + + +******************************************************************************** +* 1.1.2 : Working age (16-65) +******************************************************************************** + +* Prepare validation data +use idPers year dwt demMaleFlag demAge valid_employed valid_student /// + valid_inactive valid_retired using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +make_activity_plot, /// + subtitle("Ages 16-65") /// + saving("validation_${country}_activity_status_ts_16_65_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + + +******************************************************************************** +* 1.1.2.1 : Working age (16-65), by gender +******************************************************************************** + +* Prepare validation data +use idPers year dwt demMaleFlag demAge valid_employed valid_student /// + valid_inactive valid_retired using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +* Male +preserve + +keep if demMaleFlag == 1 + +make_activity_plot, /// + subtitle("Ages 16-65, males") /// + saving("validation_${country}_activity_status_ts_16_65_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +restore + +* Female + +keep if demMaleFlag == 0 + +make_activity_plot, /// + subtitle("Ages 16-65, females") /// + saving("validation_${country}_activity_status_ts_16_65_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +graph drop _all + + +******************************************************************************** +* 1.1.2.2 : Working age (18-65) by partnership status +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired /// + demPartnerStatus demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demPartnerStatus) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired /// + demPartnerStatus demMaleFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demPartnerStatus) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demPartnerStatus) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demPartnerStatus using "$dir_data/temp_valid_stats.dta", /// + keep(3) nogen + +* Plot figures + +* Partnered +preserve + +keep if demPartnerStatus == 1 + +make_activity_plot, /// + subtitle("Ages 18-65, partnered") /// + saving("validation_${country}_activity_status_ts_18_65_both_partnered") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +restore + +* Single +keep if demPartnerStatus == 2 + +make_activity_plot, /// + subtitle("Ages 18-65, single") /// + saving("validation_${country}_activity_status_ts_18_65_both_single") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +graph drop _all + + +******************************************************************************** +* 1.1.2.2.1 : Working age (18-65), by partnership status, by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired /// + demPartnerStatus demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demPartnerStatus demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired /// + demPartnerStatus demMaleFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demPartnerStatus demMaleFlag) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demPartnerStatus demMaleFlag) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demPartnerStatus demMaleFlag using /// + "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figures +foreach g in 0 1 { + local gname = cond(`g'==1, "male", "female") + + foreach p in 1 2 { + local pname = cond(`p'==1, "partnered", "single") + + preserve + keep if demMaleFlag == `g' & demPartnerStatus == `p' + + make_activity_plot, /// + subtitle("Ages 18-65, `pname' `gname's") /// + saving("validation_${country}_activity_status_ts_18_65_`gname'_`pname'") /// + note(`""Notes: ..." "..." "') + restore + } +} + +graph drop _all + + +******************************************************************************** +* 1.1.3 : Female working age (16-60) +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", /// + clear + +* Select sample +keep if inrange(demAge,16,60) + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,60) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Females +keep if demMaleFlag == 0 + +* Plot figure +make_activity_plot, /// + subtitle("Ages 16-60, females") /// + saving("validation_${country}_activity_status_ts_16_60_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + + +******************************************************************************** +* 1.1.4 : All ages +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag /// + valid_retired demAge labC4 using /// + "$dir_data/ukhls_validation_sample.dta", clear + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge labC4 using "$dir_data/simulation_sample.dta", clear + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure +make_activity_plot, /// + subtitle("All ages") /// + saving("validation_${country}_activity_status_ts_all_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +graph drop _all + + +******************************************************************************** +* 1.1.4.1 : All ages, by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag /// + valid_retired demAge labC4 using /// + "$dir_data/ukhls_validation_sample.dta", clear + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge labC4 using "$dir_data/simulation_sample.dta", clear + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure + +* Males +preserve +keep if demMaleFlag == 1 + +make_activity_plot, /// + subtitle("All ages, males") /// + saving("validation_${country}_activity_status_ts_all_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +restore + +* Females +keep if demMaleFlag == 0 + +make_activity_plot, /// + subtitle("All ages, females") /// + saving("validation_${country}_activity_status_ts_all_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +graph drop _all + + +******************************************************************************** +* 1.1.5 : Adult population 18+ +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge < 18 + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +drop if demAge < 18 + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(yea) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +make_activity_plot, /// + subtitle("Ages 18+") /// + saving("validation_${country}_activity_status_ts_18plus_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + + +******************************************************************************** +* 1.1.5.1 : Adult population 18+, by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge < 18 + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +drop if demAge < 18 + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(yea demMaleFlag) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +* Males +preserve + +keep if demMaleFlag == 1 + +make_activity_plot, /// + subtitle("Ages 18+, males") /// + saving("validation_${country}_activity_status_ts_18plus_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +restore + +* Females +keep if demMaleFlag == 0 + +make_activity_plot, /// + subtitle("Ages 18+, females") /// + saving("validation_${country}_activity_status_ts_18plus_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +graph drop _all + + +******************************************************************************** +* 1.1.6 : Labour supply age group (16-75) +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", /// + clear + +* Select sample +keep if inrange(demAge,18,75) + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,75) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +make_activity_plot, /// + subtitle("Ages 16-75") /// + saving("validation_${country}_activity_status_ts_16_75_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + + +******************************************************************************** +* 1.1.6.1 : Labour supply age group (16-75), by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", /// + clear + +* Select sample +keep if inrange(demAge,18,75) + +collapse (mean) valid_employed valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,75) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_employed sim_student sim_inactive sim_retired /// + (sd) sim_employed_sd = sim_employed /// + sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +foreach varname in sim_employed sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +* Male +preserve + +keep if demMaleFlag == 1 + +make_activity_plot, /// + subtitle("Ages 16-75, males") /// + saving("validation_${country}_activity_status_ts_16_75_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +restore + +* Female +keep if demMaleFlag == 0 + +* Plot figure +make_activity_plot, /// + subtitle("Ages 16-75, females") /// + saving("validation_${country}_activity_status_ts_16_75_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired.""') + +graph drop _all + + +******************************************************************************** +* 1.2 : Mean values over time, share employed +******************************************************************************** + +******************************************************************************** +* 1.2.1 : Mean values over time, share employed, by age group, by gender +******************************************************************************** + +* Prepare validation data +use year dwt demMaleFlag ageGroup valid_employed demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +gen employed_f = (valid_employed) if demMaleFlag == 0 +gen employed_m = (valid_employed) if demMaleFlag == 1 + +drop if ageGroup == 0 | ageGroup == 8 + +collapse (mean) employed_f employed_m [aw=dwt], /// + by(ageGroup year) + +drop if missing(ageGroup) + +reshape wide employed_f employed_m, i(year) j(ageGroup) + +forvalues i = 1(1)7 { + + rename employed_f`i' employed_f_`i'_valid + rename employed_m`i' employed_m_`i'_valid + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demMaleFlag ageGroup sim_employed using /// + "$dir_data/simulation_sample.dta", clear + +gen employed_f = (sim_employed) if demMaleFlag == 0 +gen employed_m = (sim_employed) if demMaleFlag == 1 + +drop if ageGroup == 0 | ageGroup == 8 + +collapse (mean) employed_f employed_m, by(ageGroup run year) +drop if missing(ageGroup) + +reshape wide employed_f employed_m, i(year run) j(ageGroup) + +forvalues i = 1(1)7 { + + rename employed_f`i' employed_f_`i'_sim + rename employed_m`i' employed_m_`i'_sim + +} + +collapse (mean) employed* /// + (sd) sd_employed_f_1_sim=employed_f_1_sim /// + sd_employed_f_2_sim=employed_f_2_sim /// + sd_employed_f_3_sim=employed_f_3_sim /// + sd_employed_f_4_sim=employed_f_4_sim /// + sd_employed_f_5_sim=employed_f_5_sim /// + sd_employed_f_6_sim=employed_f_6_sim /// + sd_employed_f_7_sim=employed_f_7_sim /// + sd_employed_m_1_sim=employed_m_1_sim /// + sd_employed_m_2_sim=employed_m_2_sim /// + sd_employed_m_3_sim=employed_m_3_sim /// + sd_employed_m_4_sim=employed_m_4_sim /// + sd_employed_m_5_sim=employed_m_5_sim /// + sd_employed_m_6_sim=employed_m_6_sim /// + sd_employed_m_7_sim=employed_m_7_sim /// + , by(year) + + +forvalues i = 1(1)7 { + + gen employed_f_`i'_sim_high = /// + employed_f_`i'_sim + 1.96*sd_employed_f_`i'_sim + gen employed_f_`i'_sim_low = /// + employed_f_`i'_sim - 1.96*sd_employed_f_`i'_sim + gen employed_m_`i'_sim_high = /// + employed_m_`i'_sim + 1.96*sd_employed_m_`i'_sim + gen employed_m_`i'_sim_low = /// + employed_m_`i'_sim - 1.96*sd_employed_m_`i'_sim + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +* Define the age labels in a local macro +local age_labels `" "16-19" "20-24" "25-29" "30-34" "35-39" "40-59" "60-79" "' + +foreach vble in "employed_f" "employed_m" { + + *Loop through the 7 age groups + forvalues i = 1/7 { + + * Extract the label for the current index i + local title : word `i' of `age_labels' + + twoway (rarea `vble'_`i'_sim_high `vble'_`i'_sim_low year, /// + sort color(green%20) /// + legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_`i'_valid year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Age `title'") /// + name(`vble'_`i', replace) /// + ylabel(0.2(0.4)1) /// + xtitle("") /// + graphregion(color(white)) + } + + * Determine gender subtitle for the combined plot + local gsubtitle = cond("`vble'" == "employed_m", "Males", "Females") + local gsuffix = cond("`vble'" == "employed_m", "male", "female") + + * Combine plots + grc1leg `vble'_1 `vble'_2 `vble'_3 `vble'_4 `vble'_5 `vble'_6 `vble'_7, /// + title("Share Employed by Age Group") /// + subtitle("`gsubtitle'") /// + legendfrom(`vble'_1) /// + ycomm /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + + * 5. Export + graph export "$dir_output_files/economic_activity/validation_${country}_employed_ts_age_groups_`gsuffix'.jpg", /// + replace width(2400) height(1350) quality(100) + +} + +graph drop _all + + +******************************************************************************** +* 1.3 : Mean values over time, non-employed shares +******************************************************************************** + +******************************************************************************** +* 1.3.1 : Non-employed shares, working age (16-65) +******************************************************************************** + +* Prepare validation data +use idPers year dwt valid_employed valid_student valid_inactive /// + valid_retired demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demAge /// + using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure +make_activity_ne_plot, /// + subtitle("Ages 16-65") /// + saving("validation_${country}_activity_status_ts_not_employed_16_65_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + + +******************************************************************************** +* 1.3.1.1 : Non-employed shares, Working age (16-65), by gender +******************************************************************************** + +* Prepare validation data +use idPers year dwt valid_employed valid_student valid_inactive /// + valid_retired demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demAge /// + demMaleFlag using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* PLot figures + +* Males +preserve + +keep if demMaleFlag == 1 + +make_activity_ne_plot, /// + subtitle("Ages 16-65, males") /// + saving("validation_${country}_activity_status_ts_not_employed_16_65_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +restore + +* Females +keep if demMaleFlag == 0 + +* Plot figure +make_activity_ne_plot, /// + subtitle("Ages 16-65, females") /// + saving("validation_${country}_activity_status_ts_not_employed_16_65_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +graph drop _all + + +******************************************************************************** +* 1.3.1.2 : Non-employed shares, working age (16-65), by partnership status +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired /// + demPartnerStatus demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demPartnerStatus) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired /// + demPartnerStatus demMaleFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +* Compute mean and sd +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year demPartnerStatus) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demPartnerStatus) + +* Approx 95% confidence interval +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demPartnerStatus using "$dir_data/temp_valid_stats.dta", /// + keep(3) nogen + + +* Plot figure + +* Partnered +preserve + +keep if demPartnerStatus == 1 + +make_activity_ne_plot, /// + subtitle("Ages 18-65, partnered") /// + saving("validation_${country}_activity_status_ts_not_employed_18_65_partnered") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +restore + +* Single +keep if demPartnerStatus == 2 + +make_activity_ne_plot, /// + subtitle("Ages 18-65, singles") /// + saving("validation_${country}_activity_status_ts_not_employed_18_65_single") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +graph drop _all + + +******************************************************************************** +* 1.3.1.3 : Non-employed shares, working age (18-65), by partnership status, +* by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired /// + demPartnerStatus demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demPartnerStatus demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired /// + demPartnerStatus demMaleFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,18,65) + +* Compute mean and sd +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year demPartnerStatus demMaleFlag) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demPartnerStatus demMaleFlag) + +* Approx 95% confidence interval +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demPartnerStatus demMaleFlag using /// + "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +foreach g in 1 0 { + * Define gender labels + local gname = cond(`g' == 1, "male", "female") + local gtitle = cond(`g' == 1, "males", "females") + + foreach p in 1 2 { + * Define partnership labels + local pname = cond(`p' == 1, "partnered", "single") + + preserve + * Filter data + keep if demMaleFlag == `g' & demPartnerStatus == `p' + + * Generate the plot + make_activity_ne_plot, /// + subtitle("Ages 18-65, `pname' `gtitle'") /// + saving("validation_${country}_activity_status_ts_not_employed_18_65_`gname'_`pname'") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + restore + + } +} + +graph drop _all + + +******************************************************************************** +* 1.3.2 : Non-employed shares, Female working age (16-60) +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", /// + clear + +* Select sample +keep if inrange(demAge,16,60) + +drop if demMaleFlag == 1 +drop valid_employed demMaleFlag + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,60) + +drop if demMaleFlag == 1 +drop sim_employed demMaleFlag + +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure +make_activity_ne_plot, /// + subtitle("Ages 16-60, females") /// + saving("validation_${country}_activity_status_ts_not_employed_16_60_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + + +******************************************************************************** +* 1.3.3 : Non-employed shares, all ages +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", clear + +drop valid_employed + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired /// + demMaleFlag using "$dir_data/simulation_sample.dta", clear + +drop sim_employed + +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +make_activity_ne_plot, /// + subtitle("All ages") /// + saving("validation_${country}_activity_status_ts_not_employed_all_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + + +******************************************************************************** +* 1.3.3.1 : Non-employed shares, all ages, by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", clear + +drop valid_employed + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired /// + demMaleFlag using "$dir_data/simulation_sample.dta", clear + +drop sim_employed + +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +* Males +preserve + +keep if demMaleFlag == 1 + +* Plot figure +make_activity_ne_plot, /// + subtitle("All ages, males") /// + saving("validation_${country}_activity_status_ts_not_employed_all_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +restore + +* Females +keep if demMaleFlag == 0 + +* Plot figure +make_activity_ne_plot, /// + subtitle("All ages, females") /// + saving("validation_${country}_activity_status_ts_not_employed_all_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +graph drop _all + + +******************************************************************************** +* 1.3.4 : Non-employed shares, adult population 18+ +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge < 18 + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge < 18 + +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year) + +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +make_activity_ne_plot, /// + subtitle("Ages 18+") /// + saving("validation_${country}_activity_status_ts_not_employed_18plus_both") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + + +******************************************************************************** +* 1.3.4.1 : Non-employed shares, adult population 18+, by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive demMaleFlag demAge /// + valid_retired using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge < 18 + +collapse (mean) valid_student valid_inactive valid_retired /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_employed sim_student sim_inactive sim_retired demMaleFlag /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge < 18 + +collapse (mean) sim_student sim_inactive sim_retired, /// + by(run year demMaleFlag) + +collapse (mean) sim_student sim_inactive sim_retired /// + (sd) sim_student_sd = sim_student /// + sim_inactive_sd = sim_inactive /// + sim_retired_sd = sim_retired /// + , by(year demMaleFlag) + +foreach varname in sim_student sim_inactive sim_retired { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure + +* Males +preserve + +keep if demMaleFlag == 1 + +make_activity_ne_plot, /// + subtitle("Ages 18+, males") /// + saving("validation_${country}_activity_status_ts_not_employed_18plus_male") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +restore + +* Females +keep if demMaleFlag == 0 + +make_activity_ne_plot, /// + subtitle("Ages 18+, females") /// + saving("validation_${country}_activity_status_ts_not_employed_18plus_female") /// + note(`""Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. Demonimator is the full population.""') + +graph drop _all + + +******************************************************************************** +* 1.4 Mean values over time, share students +******************************************************************************** + +******************************************************************************** +* 1.4.1 Share of students, by age group +******************************************************************************** + +* Prepare validation data +use year dwt demMaleFlag ageGroup valid_student demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +gen student = valid_student + +* Select sample +drop if ageGroup == 0 | ageGroup == 8 + +* Compute means +collapse (mean) student [aw=dwt], by(ageGroup year) + +drop if missing(ageGroup) + +* Restructure data +reshape wide student , i(year) j(ageGroup) + +forvalues i = 1(1)7 { + + rename student`i' student_`i'_valid + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demMaleFlag ageGroup sim_student using /// + "$dir_data/simulation_sample.dta", clear + +gen student = sim_student + +* Compute means +collapse (mean) student, by(ageGroup run year) + +drop if missing(ageGroup) + +* Restructure data +reshape wide student, i(year run) j(ageGroup) + +forvalues i=1(1)7{ + + rename student`i' student_`i'_sim + +} + +collapse (mean) student* /// + (sd) sd_student_1_sim =student_1_sim /// + sd_student_2_sim = student_2_sim /// + sd_student_3_sim = student_3_sim /// + sd_student_4_sim = student_4_sim /// + sd_student_5_sim = student_5_sim /// + sd_student_6_sim = student_6_sim /// + sd_student_7_sim = student_7_sim /// + , by(year) + + +forvalues i = 1(1)7 { + + gen student_`i'_sim_high = student_`i'_sim + 1.96*sd_student_`i'_sim + gen student_`i'_sim_low = student_`i'_sim - 1.96*sd_student_`i'_sim + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures +* Define the specific age titles in a local macro +local age_titles `" "16-19" "20-24" "25-29" "30-34" "35-39" "40-59" "' + +* Loop through the 6 groups +forvalues i = 1/6 { + + * Extract the corresponding title from the macro + local title : word `i' of `age_titles' + + twoway (rarea student_`i'_sim_high student_`i'_sim_low year, /// + sort color(blue%20) /// + legend(label(1 "SimPaths") position(6) rows(1))) /// + (line student_`i'_valid year, sort color(blue) /// + legend(label(2 "UKHLS"))), /// + title("Age `title'") /// + name(student_`i', replace) /// + ylabel(0(0.4)0.8) /// // Note: Standard Stata syntax is 0(step)max + xtitle("") /// + graphregion(color(white)) +} + +* 3. Combine and Save +grc1leg student_1 student_2 student_3 student_4 student_5 student_6 , /// + title("Share of Students by Age Group") /// + legendfrom(student_1) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +graph export /// +"$dir_output_files/economic_activity/validation_${country}_students_ts_age_groups_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.5 Mean values over time - Partners combined status +******************************************************************************** + +* LF Non-employed partners with LF Non-employed + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired demAge /// + demPartnerStatus idPers idPartner idBu idHh /// + using "$dir_data/ukhls_validation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != -9 + +sort idPartner year + +* Address multiple partners +gen to_drop = 1 if idPartner == idPartner[_n-1] & year == year[_n-1] +replace to_drop = 1 if to_drop[_n+1] == 1 & idPartner == idPartner[_n+1] + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename valid_* valid_ptnr_* +rename to_drop to_drop_ptnr + +save "$dir_data/temp_valid_partner.dta", replace + +restore + +* Address multiple partners +drop if to_drop == 1 + +* Merge in partner info +merge 1:1 year idPartner using "$dir_data/temp_valid_partner.dta" + +drop if to_drop_ptnr == 1 + +* Only keep those in which partner's info is available +keep if _m == 3 + +* Select sample +keep if valid_inactive == 1 + +keep if inrange(demAge,18,65) + +collapse (mean) valid_ptnr_inactive [aw=dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulation data +use year sim_employed sim_student sim_inactive sim_retired demAge run /// + idPers idPartner idBu /// + using "$dir_data/simulation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != . + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename sim_* sim_ptnr_* + +save "$dir_data/temp_sim_partner.dta", replace + +restore + +* Merge in partner info +merge 1:1 year idPartner run using "$dir_data/temp_sim_partner.dta" +drop _m + +* Compute share of those who are non-employed and in a partnership whose partner +* is also non-emplyed compared to working + +* Select sample +keep if sim_inactive == 1 + +keep if inrange(demAge,18,65) + + +* Compute mean and sd +collapse (mean) sim_ptnr_inactive, by(run year) + +collapse (mean) sim_ptnr_inactive /// + (sd) sim_ptnr_inactive_sd = sim_ptnr_inactive /// + , by(year) + +* Compute 95% confidence interval +foreach varname in sim_ptnr_inactive { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" +keep if _m == 3 +drop _m + +* Plot +twoway /// +(rarea sim_ptnr_inactive_high sim_ptnr_inactive_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_ptnr_inactive year, sort color(green) /// + legend(label(2 " UKHLS"))), /// + title("Economic Activity Status") /// + subtitle("Share of LF non-employed partnered & partner LF non-employed") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + graphregion(color(white)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(vsmall)) /// + note("Notes: Ages 18-65 included in sample. Non-employed includes the unemployed and inactive (homemakers, incapacity, carers," "discouraged workers etc.) minus students and retired. ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_partnerhip_shares_non_non_18_65.jpg", /// + replace width(2400) height(1350) quality(100) + + +* LF Non-employed partners with not LF +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired /// + demAge demPartnerStatus idPers idPartner idBu idHh using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != -9 + +sort idPartner year + +* Address multiple partners +gen to_drop = 1 if idPartner == idPartner[_n-1] & year == year[_n-1] +replace to_drop = 1 if to_drop[_n+1] == 1 & idPartner == idPartner[_n+1] + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename valid_* valid_ptnr_* +rename to_drop to_drop_ptnr + +save "$dir_data/temp_valid_partner.dta", replace + +restore + +* Address multiple partners +drop if to_drop == 1 + +* Merge in partner info +merge 1:1 year idPartner using "$dir_data/temp_valid_partner.dta" + +drop if to_drop_ptnr == 1 + +* Only keep those in which partner's info is available +keep if _m == 3 + +* Select sample +keep if valid_inactive == 1 + +keep if inrange(demAge,18,65) + +gen valid_partner_nlf = 0 +replace valid_partner_nlf = 1 if valid_ptnr_student == 1 | /// + valid_ptnr_retired ==1 + +collapse (mean) valid_partner_nlf [aw=dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulation data +use year sim_employed sim_student sim_inactive sim_retired demAge run /// + idPers idPartner idBu /// + using "$dir_data/simulation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != . + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename sim_* sim_ptnr_* + +save "$dir_data/temp_sim_partner.dta", replace + +restore + +* Merge in partner info +merge 1:1 year idPartner run using "$dir_data/temp_sim_partner.dta" +drop _m + +* Compute share of those who are non-employed and in a partnership whose partner +* is also non-emplyed compared to working + +* Select sample +keep if sim_inactive == 1 + +keep if inrange(demAge,18,65) + +gen sim_partner_nlf = 0 +replace sim_partner_nlf = 1 if sim_ptnr_student == 1 | sim_ptnr_retired == 1 + + +* Compute mean and sd +collapse (mean) sim_partner_nlf, by(run year) + +collapse (mean) sim_partner_nlf /// + (sd) sim_partner_nlf_sd = sim_partner_nlf /// + , by(year) + +* Compute 95% confidence interval +foreach varname in sim_partner_nlf { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" +keep if _m == 3 +drop _m + +* Plot +twoway /// +(rarea sim_partner_nlf_high sim_partner_nlf_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_partner_nlf year, sort color(green) /// + legend(label(2 " UKHLS"))), /// + title("Economic Activity Status") /// + subtitle("Share of LF non-employed partnered & partner not LF") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + graphregion(color(white)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(vsmall)) /// + note("Notes: Ages 18-65 included in sample. Not LF includes student and retired here.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_partnerhip_shares_non_notlf_18_65.jpg", /// + replace width(2400) height(1350) quality(100) + + +* Employed partnered with employed + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired /// + demAge demPartnerStatus idPers idPartner idBu idHh /// + using "$dir_data/ukhls_validation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != -9 + +sort idPartner year + +* Address multiple partners +gen to_drop = 1 if idPartner == idPartner[_n-1] & year == year[_n-1] +replace to_drop = 1 if to_drop[_n+1] == 1 & idPartner == idPartner[_n+1] + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename valid_* valid_ptnr_* +rename to_drop to_drop_ptnr + +save "$dir_data/temp_valid_partner.dta", replace + +restore + +* Address multiple partners +drop if to_drop == 1 + +* Merge in partner info +merge 1:1 year idPartner using "$dir_data/temp_valid_partner.dta" + +drop if to_drop_ptnr == 1 + +* Only keep those in which partner's info is available +keep if _m == 3 + +* Select sample +keep if valid_employed == 1 + +keep if inrange(demAge,18,65) + +collapse (mean) valid_ptnr_employed [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulation data +use year sim_employed sim_student sim_inactive sim_retired demAge run /// + idPers idPartner idBu /// + using "$dir_data/simulation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != . + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename sim_* sim_ptnr_* + +save "$dir_data/temp_sim_partner.dta", replace + +restore + +* Merge in partner info +merge 1:1 year idPartner run using "$dir_data/temp_sim_partner.dta" +drop _m + +* Compute share of those who are non-employed and in a partnership whose partner +* is also non-emplyed compared to working + +* Select sample +keep if sim_employed == 1 + +keep if inrange(demAge,18,65) + +* Compute mean and sd +collapse (mean) sim_ptnr_employed, by(run year) + +collapse (mean) sim_ptnr_employed /// + (sd) sim_ptnr_employed_sd = sim_ptnr_employed /// + , by(year) + +* Compute 95% confidence interval +foreach varname in sim_ptnr_employed { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + + +merge 1:1 year using "$dir_data/temp_valid_stats.dta" +keep if _m == 3 +drop _m + + +twoway /// +(rarea sim_ptnr_employed_high sim_ptnr_employed_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_ptnr_employed year, sort color(green) /// + legend(label(2 " UKHLS"))), /// + title("Economic Activity Status") /// + subtitle("Share of employed partnered whose partner is also employed") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + graphregion(color(white)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(vsmall)) /// + note("Notes: Ages 18-65. ", /// + size(vsmall)) + +graph export /// + "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_partnerhip_shares_emp_emp_18_65.jpg", /// + replace width(2400) height(1350) quality(100) + + + +* Employed share patterned with LF non-employed + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired demAge /// + demPartnerStatus idPers idPartner idBu idHh /// + using "$dir_data/ukhls_validation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != -9 + +sort idPartner year + +* Address multiple partners +gen to_drop = 1 if idPartner == idPartner[_n-1] & year == year[_n-1] +replace to_drop = 1 if to_drop[_n+1] == 1 & idPartner == idPartner[_n+1] + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename valid_* valid_ptnr_* +rename to_drop to_drop_ptnr + +save "$dir_data/temp_valid_partner.dta", replace + +restore + +* Address multiple partners +drop if to_drop == 1 + +* Merge in partner info +merge 1:1 year idPartner using "$dir_data/temp_valid_partner.dta" + +drop if to_drop_ptnr == 1 + +* Only keep those in which partner's info is available +keep if _m == 3 + +* Select sample +keep if valid_employed == 1 + +drop if demAge > 65 +drop if demAge < 18 + +collapse (mean) valid_ptnr_inactive [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulation data +use year sim_employed sim_student sim_inactive sim_retired demAge run /// + idPers idPartner idBu /// + using "$dir_data/simulation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != . + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename sim_* sim_ptnr_* + +save "$dir_data/temp_sim_partner.dta", replace + +restore + +* Merge in partner info +merge 1:1 year idPartner run using "$dir_data/temp_sim_partner.dta" +drop _m + +* Compute share of those who are non-employed and in a partnership whose partner +* is also non-emplyed compared to working + +* Select sample +keep if sim_employed == 1 + +drop if demAge > 65 +drop if demAge < 18 + +* Compute mean and sd +collapse (mean) sim_ptnr_inactive, by(run year) + +collapse (mean) sim_ptnr_inactive /// + (sd) sim_ptnr_inactive_sd = sim_ptnr_inactive /// + , by(year) + +* Compute 95% confidence interval +foreach varname in sim_ptnr_inactive { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + + +merge 1:1 year using "$dir_data/temp_valid_stats.dta" +keep if _m == 3 +drop _m + + +twoway /// +(rarea sim_ptnr_inactive_high sim_ptnr_inactive_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_ptnr_inactive year, sort color(green) /// + legend(label(2 " UKHLS"))), /// + title("Economic Activity Status") /// + subtitle("Share of employed partnered & partner is non-employed") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + graphregion(color(white)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(vsmall)) /// + note("Notes: Ages 18-65.", /// + size(vsmall)) + + +graph export /// + "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_partnerhip_shares_emp_non_18_65.jpg", /// + replace width(2400) height(1350) quality(100) + + +* Employed share patterned not not lf + +* Prepare validation data +use year dwt valid_employed valid_student valid_inactive valid_retired /// + demAge demPartnerStatus idPers idPartner idBu idHh /// + using "$dir_data/ukhls_validation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != -9 + +sort idPartner year + +* Address multiple partners +gen to_drop = 1 if idPartner == idPartner[_n-1] & year == year[_n-1] +replace to_drop = 1 if to_drop[_n+1] == 1 & idPartner == idPartner[_n+1] + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename valid_* valid_ptnr_* +rename to_drop to_drop_ptnr + +save "$dir_data/temp_valid_partner.dta", replace + +restore + +* Address multiple partners +drop if to_drop == 1 + +* Merge in partner info +merge 1:1 year idPartner using "$dir_data/temp_valid_partner.dta" + +drop if to_drop_ptnr == 1 + +* Only keep those in which partner's info is available +keep if _m == 3 + +* Select sample +keep if valid_employed == 1 + +drop if demAge > 65 +drop if demAge < 18 + +gen valid_ptnr_out = 0 +replace valid_ptnr_out = valid_ptnr_student + valid_ptnr_retired + +collapse (mean) valid_ptnr_out [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulation data +use year sim_employed sim_student sim_inactive sim_retired demAge run /// + idPers idPartner idBu /// + using "$dir_data/simulation_sample.dta", clear + +* Only keep those with a partner +keep if idPartner != . + +* Collect partner employement information +preserve + +drop idPartner demAge +rename idPers idPartner +rename sim_* sim_ptnr_* + +save "$dir_data/temp_sim_partner.dta", replace + +restore + +* Merge in partner info +merge 1:1 year idPartner run using "$dir_data/temp_sim_partner.dta" +drop _m + +* Compute share of those who are non-employed and in a partnership whose partner +* is also non-emplyed compared to working + +* Select sample +keep if sim_employed == 1 + +drop if demAge > 65 +drop if demAge < 18 + + +gen sim_ptnr_out = 0 +replace sim_ptnr_out = sim_ptnr_student + sim_ptnr_retired + + +* Compute mean and sd +collapse (mean) sim_ptnr_out, by(run year) + +collapse (mean) sim_ptnr_out /// + (sd) sim_ptnr_out_sd = sim_ptnr_out /// + , by(year) + +* Compute 95% confidence interval +foreach varname in sim_ptnr_out { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta" +keep if _m == 3 +drop _m + +twoway /// +(rarea sim_ptnr_out_high sim_ptnr_out_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_ptnr_out year, sort color(green) /// + legend(label(2 " UKHLS"))), /// + title("Economic Activity Status") /// + subtitle("Share of employed partnered & partner is not LF") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + graphregion(color(white)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(vsmall)) /// + note("Notes: Ages 18-65. ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_partnerhip_shares_emp_notlf_18_65.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/04_02_plot_education_level.do b/validation/02_simulated_output_validation/do_files/04_02_plot_education_level.do new file mode 100644 index 000000000..b69517772 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_02_plot_education_level.do @@ -0,0 +1,720 @@ +******************************************************************************** +* SECTION: Validation +* OBJECT: Education +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: This do file plots simulated and UKHLS education. +* Unable to look at transitions because use X-sectional +* SILC data. +******************************************************************************** + +******************************************************************************** +* 0 : Programmes +******************************************************************************** + +* Time series plot, all +cap program drop make_edu_plot + +program define make_edu_plot + syntax, subtitle(string) saving(string) note(string) + + twoway /// + (rarea sim_edu_high_high sim_edu_high_low year, sort color(green%20) /// + legend(label(1 "High education, SimPaths"))) /// + (line valid_edu_high year, sort color(green) /// + legend(label(2 "High education, UKHLS"))) /// + (rarea sim_edu_med_high sim_edu_med_low year, sort color(blue%20) /// + legend(label(3 "Medium education, SimPaths"))) /// + (line valid_edu_med year, sort color(blue) /// + legend(label(4 "Medium education, UKHLS"))) /// + (rarea sim_edu_low_high sim_edu_low_low year, sort color(red%20) /// + legend(label(5 "Low education, SimPaths"))) /// + (line valid_edu_low year, sort color(red) /// + legend(label(6 "Low education, UKHLS"))) /// + (rarea sim_edu_na_high sim_edu_na_low year, sort color(purple%20) /// + legend(label(7 "Initial education spell, SimPaths"))) /// + (line valid_edu_na year, sort color(purple) /// + legend(label(8 "Initial education spell, UKHLS"))), /// + title("Educational Attainment") /// + subtitle("`subtitle'") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + graphregion(color(white)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + note(`note', size(vsmall)) + + graph export "$dir_output_files/education/`saving'.jpg", replace width(2400) height(1350) quality(100) +end + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Educational attainment +******************************************************************************** + +******************************************************************************** +* 1.1.1 : Educational attainment - 16-65 +******************************************************************************** + +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + demAge labC4 using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Compute annual shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na labC4 /// + demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Compute shares and standard deviation +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, by(run year) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + sim_edu_med_sd = sim_edu_med /// + sim_edu_low_sd = sim_edu_low /// + sim_edu_na_sd = sim_edu_na /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +make_edu_plot, /// + subtitle("Ages 16-65") /// + saving("validation_${country}_education_ts_16_65_both") /// + note(`""Notes:""') + + +******************************************************************************** +* 1.1.2 : Educational attainment - 16-65, by gender +******************************************************************************** + +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + demMaleFlag labC4 demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Compute annual shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demMaleFlag /// + labC4 demAge using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Compute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, /// + by(run year demMaleFlag) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + (sd) sim_edu_med_sd = sim_edu_med /// + (sd) sim_edu_low_sd = sim_edu_low /// + (sd) sim_edu_na_sd = sim_edu_na /// + , by(year demMaleFlag ) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Female +preserve + +keep if demMaleFlag == 0 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 16-65, females") /// + saving("validation_${country}_education_ts_16_65_female") /// + note(`""Notes:""') + +restore + +* male +preserve + +keep if demMaleFlag == 1 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 16-65, males") /// + saving("validation_${country}_education_ts_16_65_male") /// + note(`""Notes:""') + +restore + + +******************************************************************************** +* 1.1.3 : Educational attainment - 16-30 +******************************************************************************** + +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + demAge labC4 demAge using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge > 30 +drop if demAge < 16 +drop labC4 + +* Compute shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demAge labC4 /// + using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge > 30 +drop if demAge < 16 + +* Compute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, by(run year) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + sim_edu_med_sd = sim_edu_med /// + sim_edu_low_sd = sim_edu_low /// + sim_edu_na_sd = sim_edu_na /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +make_edu_plot, /// + subtitle("Ages 16-30") /// + saving("validation_${country}_education_ts_16_30_both") /// + note(`""Notes:""') + + +******************************************************************************** +* 1.1.4 : Educational attainment - 16-30, by gender +******************************************************************************** +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na demAge /// + demMaleFlag labC4 demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select smaple +drop if demAge > 30 +drop if demAge < 16 +drop labC4 + +* Compute shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demAge /// + demMaleFlag labC4 using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge > 30 +drop if demAge < 16 + +* Cmpute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, /// + by(run year demMaleFlag) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + (sd) sim_edu_med_sd = sim_edu_med /// + (sd) sim_edu_low_sd = sim_edu_low /// + (sd) sim_edu_na_sd = sim_edu_na /// + , by(year demMaleFlag) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Female +preserve + +keep if demMaleFlag == 0 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 16-30, females") /// + saving("validation_${country}_education_ts_16_30_female") /// + note(`""Notes:""') + +restore + +* male +preserve + +keep if demMaleFlag == 1 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 16-30, males") /// + saving("validation_${country}_education_ts_16_30_male") /// + note(`""Notes:""') + +restore + + +******************************************************************************** +* 1.1.5 : Educational attainment - 31-40 +******************************************************************************** + +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + demAge labC4 demAge using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge > 40 +drop if demAge < 31 +drop labC4 + +* Compute shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demAge labC4 /// + using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge > 40 +drop if demAge < 31 + +* Compute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, by(run year) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + sim_edu_med_sd = sim_edu_med /// + sim_edu_low_sd = sim_edu_low /// + sim_edu_na_sd = sim_edu_na /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +make_edu_plot, /// + subtitle("Ages 31-40") /// + saving("validation_${country}_education_ts_31_40_both") /// + note(`""Notes:""') + + +******************************************************************************** +* 1.1.6 : Educational attainment 31-40, by gender +******************************************************************************** +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na demAge /// + demMaleFlag labC4 demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select smaple +drop if demAge > 40 +drop if demAge < 31 +drop labC4 + +* Compute shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demAge /// + demMaleFlag labC4 using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge > 40 +drop if demAge < 31 + +* Cmpute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, /// + by(run year demMaleFlag) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + (sd) sim_edu_med_sd = sim_edu_med /// + (sd) sim_edu_low_sd = sim_edu_low /// + (sd) sim_edu_na_sd = sim_edu_na /// + , by(year demMaleFlag) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Female +preserve + +keep if demMaleFlag == 0 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 31-40, females") /// + saving("validation_${country}_education_ts_31_40_female") /// + note(`""Notes:""') + +restore + +* male +preserve + +keep if demMaleFlag == 1 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 31-40, males") /// + saving("validation_${country}_education_ts_31_40_male") /// + note(`""Notes:""') + +restore + + +******************************************************************************** +* 1.1.7 : Educational attainment - 41-65 +******************************************************************************** + +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + demAge labC4 demAge using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge > 65 +drop if demAge < 41 +drop labC4 + +* Compute shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demAge labC4 /// + using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge > 65 +drop if demAge < 41 + +* Compute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, by(run year) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + sim_edu_med_sd = sim_edu_med /// + sim_edu_low_sd = sim_edu_low /// + sim_edu_na_sd = sim_edu_na /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +make_edu_plot, /// + subtitle("Ages 41-65") /// + saving("validation_${country}_education_ts_41_65_both") /// + note(`""Notes:""') + + +******************************************************************************** +* 1.1.8 : Educational attainment 41-65, by gender +******************************************************************************** +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na demAge /// + demMaleFlag labC4 demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select smaple +drop if demAge > 65 +drop if demAge < 41 +drop labC4 + +* Compute shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demAge /// + demMaleFlag labC4 using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge > 65 +drop if demAge < 41 + +* Cmpute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, /// + by(run year demMaleFlag) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + (sd) sim_edu_med_sd = sim_edu_med /// + (sd) sim_edu_low_sd = sim_edu_low /// + (sd) sim_edu_na_sd = sim_edu_na /// + , by(year demMaleFlag) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Female +preserve + +keep if demMaleFlag == 0 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 41-65, females") /// + saving("validation_${country}_education_ts_41_65_female") /// + note(`""Notes:""') + +restore + +* Male +preserve + +keep if demMaleFlag == 1 + +* Plot figure +make_edu_plot, /// + subtitle("Ages 41-65, males") /// + saving("validation_${country}_education_ts_41_65_male") /// + note(`""Notes:""') + +restore + + +******************************************************************************** +* 1.1.9 : Educational attainment - 66-70 +******************************************************************************** + +* Prepare validation data +use year dwt valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + demAge labC4 demAge using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge > 70 +drop if demAge < 66 +drop labC4 + +* Compute shares +collapse (mean) valid_edu_high valid_edu_med valid_edu_low valid_edu_na /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year sim_edu_high sim_edu_med sim_edu_low sim_edu_na demAge labC4 /// + using "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge > 70 +drop if demAge < 66 + +* Compute shares and sd +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na, by(run year) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low sim_edu_na /// + (sd) sim_edu_high_sd = sim_edu_high /// + sim_edu_med_sd = sim_edu_med /// + sim_edu_low_sd = sim_edu_low /// + sim_edu_na_sd = sim_edu_na /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_edu_high sim_edu_med sim_edu_low sim_edu_na { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +make_edu_plot, /// + subtitle("Ages 66-70") /// + saving("validation_${country}_education_ts_66_70_both") /// + note(`""Notes:""') + + +******************************************************************************** +* 1.2 : Educational attainment when leave education +******************************************************************************** + +******************************************************************************** +* 1.2.1 : Educational attainment when leave education - 16 - 65 +******************************************************************************** + +* Prepare validation data +use year idPers dwt valid_edu_high valid_edu_med valid_edu_low labC4 /// + demAge using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Select relevant observations +sort idPers year +gen left_edu = 1 if idPers == idPers[_n-1] & /// + labC4 != 2 & labC4[_n-1] == 2 & year == year[_n-1]+1 + +keep if left_edu == 1 + +collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idPers sim_edu_high sim_edu_med sim_edu_low labC4 demAge /// + using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Select relevant observations +sort idPers year +gen left_edu_sim = 1 if idPers == idPers[_n-1] & /// + labC4 != "Student" & labC4[_n-1] == "Student" & year == year[_n-1]+1 + +keep if left_edu_sim == 1 + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year) + +collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// + (sd) sim_edu_high_sd = sim_edu_high /// + sim_edu_med_sd = sim_edu_med /// + sim_edu_low_sd = sim_edu_low /// + , by(year) + +foreach varname in sim_edu_high sim_edu_med sim_edu_low { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway /// +(rarea sim_edu_high_high sim_edu_high_low year, sort color(green%20) /// + legend(label(1 "High education, simulated"))) /// +(line valid_edu_high year, sort color(green) /// + legend(label(2 "High education, UKHLS"))) /// +(rarea sim_edu_med_high sim_edu_med_low year, sort color(blue%20) /// + legend(label(3 "Medium education, simulated"))) /// +(line valid_edu_med year, sort color(blue) /// + legend(label(4 "Medium education, UKHLS"))) /// +(rarea sim_edu_low_high sim_edu_low_low year, sort color(red%20) /// + legend(label(5 "Low education, simulated"))) /// +(line valid_edu_low year, sort color(red) /// + legend(label(6 "Low education, UKHLS"))), /// + title("Educational Attainment When Leave Education") /// + subtitle("Ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + graphregion(color(white)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + note("Notes: ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/education/validation_${country}_leave_education_ts_16_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/04_03_plot_gross_income.do b/validation/02_simulated_output_validation/do_files/04_03_plot_gross_income.do new file mode 100644 index 000000000..1b1f051ff --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_03_plot_gross_income.do @@ -0,0 +1,985 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Gross income +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: +******************************************************************************** + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time - Benefit unit amounts +******************************************************************************** + +* Prepare validation data +use year dwt idBu idPers demAge valid_yGrossBuLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge >= 16 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yGrossBuLevelYear, d + + replace valid_yGrossBuLevelYear = . if /// + valid_yGrossBuLevelYear < r(p1) | /// + valid_yGrossBuLevelYear > r(p99) + +} + +collapse (mean) valid_yGrossBuLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers idBu demAge sim_yGrossBuLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if demAge >= 16 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yGrossBuLevelYear, d + + replace sim_yGrossBuLevelYear = . if sim_yGrossBuLevelYear < r(p1) | /// + sim_yGrossBuLevelYear > r(p99) + +} + +collapse (mean) sim_yGrossBuLevelYear, by(run year) + +collapse (mean) sim_yGrossBuLevelYear /// + (sd) sim_yGrossBuLevelYear_sd = sim_yGrossBuLevelYear /// + , by(year) + +foreach varname in sim_yGrossBuLevelYear { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure +twoway /// + (rarea sim_yGrossBuLevelYear_high sim_yGrossBuLevelYear_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// + (line valid_yGrossBuLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Benefit Unity Gross Income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Series represents average benefit unit gross income through time. Gross income is the sum of captial income, private" "pension income and employment income. One observation per benefit unit plotted. Amounts in 2015 prices. Top and" "bottom percentiles trimmed.", /// + size(vsmall)) + +graph export /// +"$dir_output_files/income/gross_income/validation_${country}_gross_income_bu_ts.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time, individual level amounts +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_yGrossPersLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yGrossPersLevelYear, d + + replace valid_yGrossPersLevelYear = . if /// + valid_yGrossPersLevelYear < r(p1) | /// + valid_yGrossPersLevelYear > r(p99) + +} + +collapse (mean) valid_yGrossPersLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge sim_yGrossPersLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yGrossPersLevelYear, d + + replace sim_yGrossPersLevelYear = . if /// + sim_yGrossPersLevelYear < r(p1) | /// + sim_yGrossPersLevelYear > r(p99) + +} + +collapse (mean) sim_yGrossPersLevelYear, by(run year) + +collapse (mean) sim_yGrossPersLevelYear /// + (sd) sim_yGrossPersLevelYear_sd = sim_yGrossPersLevelYear, /// + by(year) + +foreach varname in sim_yGrossPersLevelYear{ + + gen `varname'_hi = `varname' + 1.96*`varname'_sd + gen `varname'_lo = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway /// +(rarea sim_yGrossPersLevelYear_hi sim_yGrossPersLevelYear_lo /// + year, sort color(green%20) /// + legend(label(1 "SimPaths"))) /// +(line valid_yGrossPersLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Individual Gross Income") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Series represents average individual gross income through time. Gross income is the sum of captial income, private pension" "income and employment income. Values in 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + +graph export /// +"$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_ts_18_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.2.1 : Mean values over time, individual level amounts, by gender +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_yGrossPersLevelYear demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) +keep if demMaleFlag == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yGrossPersLevelYear, d + + replace valid_yGrossPersLevelYear = . if /// + valid_yGrossPersLevelYear < r(p1) | /// + valid_yGrossPersLevelYear > r(p99) + +} + +collapse (mean) valid_yGrossPersLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year demAge sim_yGrossPersLevelYear demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) +keep if demMaleFlag == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yGrossPersLevelYear, d + + replace sim_yGrossPersLevelYear = . if /// + sim_yGrossPersLevelYear < r(p1) | /// + sim_yGrossPersLevelYear > r(p99) + +} + +collapse (mean) sim_yGrossPersLevelYear, by(run year) + +collapse (mean) sim_yGrossPersLevelYear /// + (sd) sim_yGrossPersLevelYear_sd = /// + sim_yGrossPersLevelYear, by(year) + +foreach varname in sim_yGrossPersLevelYear { + + gen `varname'_hi = `varname' + 1.96*`varname'_sd + gen `varname'_lo = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway /// +(rarea sim_yGrossPersLevelYear_hi sim_yGrossPersLevelYear_lo /// + year, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_yGrossPersLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Individual Gross Income") /// + subtitle("Ages 18-65, males") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Series represents average individual gross income through time. Gross income is the sum of captial income, private pension" "income and employment income. Values in 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + +graph export /// +"$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_ts_18_65_male.jpg", /// + replace width(2400) height(1350) quality(100) + + +* Female +* Prepare validation data +use year demAge dwt valid_yGrossPersLevelYear demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +keep if demMaleFlag == 0 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yGrossPersLevelYear, d + + replace valid_yGrossPersLevelYear = . if /// + valid_yGrossPersLevelYear < r(p1) | /// + valid_yGrossPersLevelYear > r(p99) + +} + +collapse (mean) valid_yGrossPersLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year demAge sim_yGrossPersLevelYear demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +keep if demMaleFlag == 0 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yGrossPersLevelYear, d + + replace sim_yGrossPersLevelYear= . if /// + sim_yGrossPersLevelYear< r(p1) | /// + sim_yGrossPersLevelYear> r(p99) + +} + +collapse (mean) sim_yGrossPersLevelYear, by(run year) + +collapse (mean) sim_yGrossPersLevelYear /// + (sd) sim_yGrossPersLevelYear_sd = /// + sim_yGrossPersLevelYear, by(year) + +foreach varname in sim_yGrossPersLevelYear{ + + gen `varname'_hi = `varname' + 1.96*`varname'_sd + gen `varname'_lo = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway /// +(rarea sim_yGrossPersLevelYear_hi /// + sim_yGrossPersLevelYear_lo year, sort color(green%20) /// + legend(label(1 "SimPaths"))) /// +(line valid_yGrossPersLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Individual Gross Income") /// + subtitle("Ages 18-65, females") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Series represents average individual gross through time. Gross income is the sum of captial income, private pension" "income and employment income. Values in 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + +graph export /// +"$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_ts_18_65_female.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +/******************************************************************************* +* 2 : Histograms +*******************************************************************************/ + +/******************************************************************************* +* 2.1 : Histograms - Benefit unit gross income by year, and by category of +weekly labour supply +*******************************************************************************/ + +* Prepare validation data +use year demAge dwt valid_yGrossBuLevelYear valid_labHrsWorkEnumWeek /// + using "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yGrossBuLevelYear, d + + replace valid_yGrossBuLevelYear = . if /// + valid_yGrossBuLevelYear < r(p1) | /// + valid_yGrossBuLevelYear > r(p99) + +} + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_yGrossBuLevelYear if year == `year', /// + width(2500) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + + foreach ls in $ls_cat { + + twoway__histogram_gen valid_yGrossBuLevelYear if /// + year == `year' & valid_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year'_`ls' = r(max) + + drop d_valid v2 + + } + +} + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year demAge sim_yGrossBuLevelYear sim_labHrsWorkEnumWeek using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yGrossBuLevelYear, d + + replace sim_yGrossBuLevelYear = . if /// + sim_yGrossBuLevelYear < r(p1) | sim_yGrossBuLevelYear > r(p99) + +} + +keep if run == 1 + +append using "$dir_data/temp_valid_stats.dta" + +* Plot sub-figures +qui sum year +local min_year = 2011 +local max_year = 2023 + +//local year = 2010 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yGrossBuLevelYear if year == `year', /// + width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/3 + + + twoway (hist sim_yGrossBuLevelYear if year == `year', width(2500) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yGrossBuLevelYear if year == `year', width(2500) /// + color(red%30) legend(label(2 "UKHLS"))) , /// + title("ALL hours") /// + name(gross_inc_`year'_all, replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y',labsize(vsmall)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + foreach ls in $ls_cat { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yGrossBuLevelYear if /// + year == `year' & sim_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year'_`ls' if /// + max_d_valid_`year'_`ls' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/3 + + * Plot by weekly hours work + twoway (hist sim_yGrossBuLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) color(green%30) /// + legend(label(1 "SimPaths"))) /// + (hist valid_yGrossBuLevelYear if year == `year' & /// + valid_labHrsWorkEnumWeek == "`ls'", width(2500) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + title("`ls' hours") /// + name(gross_inc_`year'_`ls', replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y',labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + } +} + +* Combine plots by year +qui sum year +local min_year = 2011 +local max_year = 2023 + +forvalues year = `min_year'/`max_year' { + + grc1leg gross_inc_`year'_all /// + gross_inc_`year'_ZERO /// + gross_inc_`year'_TEN , /// + title("Benefit Unit Gross Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(gross_inc_`year'_ZERO) rows(1) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Gross income is the sum of capital income, private pension income and employment income." "Individual observations of benefit unit amounts plotted.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_income/validation_${country}_gross_income_bu_dist_`year'_18_65_1.png", /// + replace width(2400) height(1350) + + grc1leg /// + gross_inc_`year'_TWENTY /// + gross_inc_`year'_THIRTY /// + gross_inc_`year'_THIRTY_EIGHT /// + gross_inc_`year'_FORTY_FIVE /// + gross_inc_`year'_FIFTY_FIVE, /// + title("Benefit Unit Gross Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(gross_inc_`year'_TWENTY) rows(2) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Gross income is the sum of capital income, private pension income and employment income." "Individual observations of benefit unit amounts plotted.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_income/validation_${country}_gross_income_bu_dist_`year'_18_65_2.png", /// + replace width(2400) height(1350) + +} + +graph drop _all + + +/******************************************************************************* +* 2.2 : Histograms - Individual gross income by year, and by category of weekly +labour supply, by gender +*******************************************************************************/ + +* Males + +* Prepare validation data +use year demAge dwt valid_yGrossBuLevelYear valid_labHrsWorkEnumWeek /// + demMaleFlag using "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +keep if demMaleFlag == 1 +drop demMaleFlag + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yGrossBuLevelYear, d + + replace valid_yGrossBuLevelYear = . if /// + valid_yGrossBuLevelYear < r(p1) | /// + valid_yGrossBuLevelYear > r(p99) + +} + + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_yGrossBuLevelYear if /// + year == `year', width(2500) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + + foreach ls in $ls_cat { + + twoway__histogram_gen valid_yGrossBuLevelYear if /// + year == `year' & valid_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year'_`ls' = r(max) + + drop d_valid v2 + + } +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge sim_yGrossBuLevelYear sim_labHrsWorkEnumWeek /// + demMaleFlag using "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +keep if demMaleFlag == 1 +drop demMaleFlag + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yGrossBuLevelYear, d + + replace sim_yGrossBuLevelYear= . if /// + sim_yGrossBuLevelYear < r(p1) | /// + sim_yGrossBuLevelYear > r(p99) + +} + +keep if run == 1 + +append using "$dir_data/temp_valid_stats.dta" + +* Plot sub-figures +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yGrossBuLevelYear if year == `year', /// + width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/3 + + * Plot all hours + twoway (hist sim_yGrossBuLevelYear if year == `year', width(2500) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yGrossBuLevelYear if year == `year', width(2500) /// + color(red%30) legend(label(2 "UKHLS"))) , /// + title("ALL hours") /// + name(ind_gross_inc_`year'_all, replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + foreach ls in $ls_cat { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yGrossBuLevelYear if /// + year == `year' & sim_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year'_`ls' if /// + max_d_valid_`year'_`ls' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/3 + + * Plot by weekly hours work + twoway (hist sim_yGrossBuLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) color(green%30) /// + legend(label(1 "SimPaths"))) /// + (hist valid_yGrossBuLevelYear if year == `year' & /// + valid_labHrsWorkEnumWeek == "`ls'", width(2500) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + title("`ls' hours") /// + name(ind_gross_inc_`year'_`ls', replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + } +} + +* Combine plots by year +qui sum year +local min_year = 2011 +local max_year = 2023 + +forvalues year = `min_year'/`max_year' { + + grc1leg ind_gross_inc_`year'_all /// + ind_gross_inc_`year'_ZERO /// + ind_gross_inc_`year'_TEN , /// + title("Individual Gross Income by Weekly Hours of Work") /// + subtitle("`year', males") /// + legendfrom(ind_gross_inc_`year'_ZERO) /// + rows(1) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Sample include males age 18-65. Top and bottom percentiles trimmed. Gross income is" "the sum of capital income, private pension income and employment income.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_dist_`year'_male_1.png", /// + replace width(2400) height(1350) + + grc1leg /// + ind_gross_inc_`year'_TWENTY /// + ind_gross_inc_`year'_THIRTY /// + ind_gross_inc_`year'_THIRTY_EIGHT /// + ind_gross_inc_`year'_FORTY_FIVE /// + ind_gross_inc_`year'_FIFTY_FIVE, /// + title("Individual Gross Income by Weekly Hours of Work") /// + subtitle("`year', males") /// + legendfrom(ind_gross_inc_`year'_TWENTY) /// + rows(2) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Sample include males age 18-65. Top and bottom percentiles trimmed. Gross income is" "the sum of capital income, private pension income and employment income.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_dist_`year'_male_2.png", /// + replace width(2400) height(1350) + +} + +graph drop _all + + +* Females + +* Prepare validation data +use year demAge dwt valid_yGrossBuLevelYear valid_labHrsWorkEnumWeek /// + demMaleFlag using "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +keep if demMaleFlag == 0 +drop demMaleFlag + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yGrossBuLevelYear, d + + replace valid_yGrossBuLevelYear = . if /// + valid_yGrossBuLevelYear < r(p1) | /// + valid_yGrossBuLevelYear > r(p99) + +} + + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_yGrossBuLevelYear if /// + year == `year' , width(2500) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + + foreach ls in $ls_cat { + + twoway__histogram_gen valid_yGrossBuLevelYear if /// + year == `year' & valid_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year'_`ls' = r(max) + + drop d_valid v2 + + } +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge sim_yGrossBuLevelYear sim_labHrsWorkEnumWeek /// + demMaleFlag using "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +keep if demMaleFlag == 0 +drop demMaleFlag + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yGrossBuLevelYear, d + + replace sim_yGrossBuLevelYear = . if sim_yGrossBuLevelYear< r(p1) | /// + sim_yGrossBuLevelYear> r(p99) + +} + +keep if run == 1 + + +append using "$dir_data/temp_valid_stats.dta" + +* Plot sub-figures +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yGrossBuLevelYear if year == `year', /// + width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/3 + + * Plot all hours + twoway (hist sim_yGrossBuLevelYear if year == `year', width(2500) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yGrossBuLevelYear if year == `year', width(2500) /// + color(red%30) legend(label(2 "UKHLS"))) , /// + title("ALL hours") /// + name(ind_gross_inc_`year'_all, replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + foreach ls in $ls_cat { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yGrossBuLevelYear if /// + year == `year' & sim_labHrsWorkEnumWeek == "`ls'", /// + width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year'_`ls' if /// + max_d_valid_`year'_`ls' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/3 + + * Plot by weekly hours work + twoway (hist sim_yGrossBuLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yGrossBuLevelYear if year == `year' & /// + valid_labHrsWorkEnumWeek == "`ls'", width(2500) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + title("`ls' hours") /// + name(ind_gross_inc_`year'_`ls', replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + } +} + +* Combine plots by year +qui sum year +local min_year = 2011 +local max_year = 2023 + +forvalues year = `min_year'/`max_year' { + + grc1leg ind_gross_inc_`year'_all /// + ind_gross_inc_`year'_ZERO /// + ind_gross_inc_`year'_TEN , /// + title("Individual Gross Income by Weekly Hours of Work") /// + subtitle("`year', females") /// + legendfrom(ind_gross_inc_`year'_ZERO) /// + rows(1) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Sample include females age 18-65. Top and bottom percentiles trimmed. Gross income is" "the sum of capital income, private pension income and employment income.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_dist_`year'_female_1.png", /// + replace width(2400) height(1350) + + + grc1leg /// + ind_gross_inc_`year'_TWENTY /// + ind_gross_inc_`year'_THIRTY /// + ind_gross_inc_`year'_THIRTY_EIGHT /// + ind_gross_inc_`year'_FORTY_FIVE /// + ind_gross_inc_`year'_FIFTY_FIVE, /// + title("Individual Gross Income by Weekly Hours of Work") /// + subtitle("`year', females") /// + legendfrom(ind_gross_inc_`year'_TWENTY) /// + rows(2) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Sample include females age 18-65. Top and bottom percentiles trimmed. Gross income is" "the sum of capital income, private pension income and employment income.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_dist_`year'_female_2.png", /// + replace width(2400) height(1350) + +} + +graph drop _all + + + +/* + +* Investigation into who the people are with high working hours and low gross +* income +/* +Note plot ben unit observations using individual level data. + +Components of gross income. + +Gross personal income components +• PY010G - Gross employee cash or near cash employee income +• PY050G - Gross cash benefits or losses from self-employment + (including royalties) +• PY080G - Pensions received from individual private plans (other than those + covered under ESSPROS) + +Plus gross income components at household level +• HY040G - Income from rental of a property or land +• HY080G - Regular inter-household cash transfers received +• HY090G - Interests, dividends, profit from capital investments in + unincorporated business +• HY110G - Income received by people aged under 16 +*/ + +* Explore 2018 FIFTY hours +use "$dir_data/ukhls_validation_full_sample.dta", clear + +keep if year == 2018 & labHrsWorkEnumWeek == "FIFTY" + +order idperson idbenefit lhw valid_yGrossBuLevelYear /// + y_gross_labour_person valid_wage_hour /// + py010g* py050g py080g /// + hy080g_pc hy110g_pc hy040g_pc hy090g_pc missing* + +fre missing_py010g missing_py050g missing_py080g missing_hy080g /// + missing_hy110g missing_hy040g missing_hy090g missing_lhw if /// + valid_yGrossBuLevelYear == 0 // none missing seems to be in the data + + diff --git a/validation/02_simulated_output_validation/do_files/04_04_plot_gross_labour_income.do b/validation/02_simulated_output_validation/do_files/04_04_plot_gross_labour_income.do new file mode 100644 index 000000000..7c4f3b5ac --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_04_plot_gross_labour_income.do @@ -0,0 +1,599 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Gross labour income +* AUTHORS: Ashley Burdett +* LAST UPDATE: Feb 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: Plotted using individual level data +* => multiple observations per ben unit. +******************************************************************************** + +******************************************************************************** +* 1 : Mean labour income +******************************************************************************** + +******************************************************************************** +* 1.1: Mean labour income - benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu idPers demAge dwt labC4 valid_yEmpBuGrossLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == 1 + +keep if demAge >= 16 + +* Keep one observatioon per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yEmpBuGrossLevelYear, d + + replace valid_yEmpBuGrossLevelYear = . if /// + valid_yEmpBuGrossLevelYear < r(p1) | valid_yEmpBuGrossLevelYear > r(p99) + +} + +collapse (mean) valid_yEmpBuGrossLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare SimPaths data +use run year idPers idBu demAge labC4 sim_yEmpBuGrossLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" + +keep if demAge >= 16 + +* Keep one observatioon per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yEmpBuGrossLevelYear, d + + replace sim_yEmpBuGrossLevelYear = . if /// + sim_yEmpBuGrossLevelYear < r(p1) | sim_yEmpBuGrossLevelYear > r(p99) + +} + +collapse (mean) sim_yEmpBuGrossLevelYear, by(run year) + +collapse (mean) sim_yEmpBuGrossLevelYear /// + (sd) sim_yEmpBuGrossLevelYear_sd = sim_yEmpBuGrossLevelYear /// + , by(year) + +foreach varname in sim_yEmpBuGrossLevelYear { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway /// +(rarea sim_yEmpBuGrossLevelYear_high sim_yEmpBuGrossLevelYear_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_yEmpBuGrossLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Benefit Unit Gross Labour Income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + graphregion(color(white)) /// + legend(size(small)) /// + note("Note: Amounts in 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_bu_ts.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.1: Mean labour income - individual +******************************************************************************** + +* Prepare validation data +use year demAge dwt labC4 valid_yEmpPersGrossLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == 1 + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yEmpPersGrossLevelYear, d + + replace valid_yEmpPersGrossLevelYear = . if /// + valid_yEmpPersGrossLevelYear < r(p1) | /// + valid_yEmpPersGrossLevelYear > r(p99) + +} + +collapse (mean) valid_yEmpPersGrossLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare SimPaths data +use run year demAge labC4 sim_yEmpPersGrossLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yEmpPersGrossLevelYear, d + + replace sim_yEmpPersGrossLevelYear = . if /// + sim_yEmpPersGrossLevelYear < r(p1) | sim_yEmpPersGrossLevelYear > r(p99) + +} + +collapse (mean) sim_yEmpPersGrossLevelYear, by(run year) + +collapse (mean) sim_yEmpPersGrossLevelYear /// + (sd) sim_yEmpPersGrossLevelYear_sd = sim_yEmpPersGrossLevelYear /// + , by(year) + +foreach varname in sim_yEmpPersGrossLevelYear { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway /// +(rarea sim_yEmpPersGrossLevelYear_high sim_yEmpPersGrossLevelYear_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_yEmpPersGrossLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Gross Labour Income") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + graphregion(color(white)) /// + legend(size(small)) /// + note("Note: Amounts at the individual level, individual data plotted. Statistics calculated on the sample of employed individuals" "ages 18-65. Amounts in 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/income/gross_labour_income/validation_${country}_ind_gross_labour_income_ts_18_65.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 2 : Histograms +******************************************************************************** + +******************************************************************************** +* 2.1 : Histograms - working age, benefit unit +******************************************************************************** + +* Prepare validation data +use year idPers idBu demAge dwt labC4 valid_yEmpBuGrossLevelYear /// + valid_labHrsWorkEnumWeek using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == 1 +drop labC4 + +keep if inrange(demAge,18,65) + +* Keep one observatioon per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yEmpBuGrossLevelYear, d + + replace valid_yEmpBuGrossLevelYear = . if /// + valid_yEmpBuGrossLevelYear < r(p1) | valid_yEmpBuGrossLevelYear > r(p99) + +} + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_yEmpBuGrossLevelYear if year == `year', /// + width(2500) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + + foreach ls in $ls_cat { + + twoway__histogram_gen valid_yEmpBuGrossLevelYear if /// + year == `year' & valid_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year'_`ls' = r(max) + + drop d_valid v2 + + } + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare SimPaths data +use run year idPers idBu demAge labC4 sim_yEmpBuGrossLevelYear /// + sim_labHrsWorkEnumWeek using "$dir_data/simulation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" +drop labC4 + +keep if inrange(demAge,18,65) + +* Keep one observatioon per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yEmpBuGrossLevelYear, d + + replace sim_yEmpBuGrossLevelYear = . if /// + sim_yEmpBuGrossLevelYear < r(p1) | sim_yEmpBuGrossLevelYear > r(p99) + + } + +keep if run == 1 + +append using "$dir_data/temp_valid_stats.dta" + +* Plot sub-figures +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yEmpBuGrossLevelYear if year == `year', /// + width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + + * Plot all hours + twoway (hist sim_yEmpBuGrossLevelYear if year == `year' , width(2500) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yEmpBuGrossLevelYear if year == `year' , width(2500) /// + color(red%30) legend(label(2 "UKHLS"))) , /// + subtitle("ALL hours") /// + name(gross_lab_inc_`year'_all, replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y',labsize(vsmall)) /// + graphregion(color(white)) + + + drop d_sim v1 max_d_sim max_value + + * Plot by weekly hours work + foreach ls in $ls_cat_labour { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yEmpBuGrossLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year'_`ls' if /// + max_d_valid_`year'_`ls' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + * Plot by weekly hours work + twoway (hist sim_yEmpBuGrossLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) color(green%30) /// + legend(label(1 "SimPaths"))) /// + (hist valid_yEmpBuGrossLevelYear if year == `year' & /// + valid_labHrsWorkEnumWeek == "`ls'", width(2500) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + subtitle("`ls' hours") /// + name(gross_lab_inc_`year'_`ls', replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y',labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + } +} + +* Combine plots by year +forvalues year = 2011/2023 { + + grc1leg gross_lab_inc_`year'_all /// + gross_lab_inc_`year'_TEN /// + gross_lab_inc_`year'_TWENTY, /// + title("Benefit Unit Gross Labour Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(gross_lab_inc_`year'_TEN) rows(1) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Top and bottom percentiles trimmed. Individual observations of benefit unit amount plotted", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_bu_dist_`year'_18_65_1.png", /// + replace width(2400) height(1350) + + + grc1leg /// + gross_lab_inc_`year'_THIRTY /// + gross_lab_inc_`year'_THIRTY_EIGHT /// + gross_lab_inc_`year'_FORTY_FIVE /// + gross_lab_inc_`year'_FIFTY_FIVE, /// + title("Gross Labour Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(gross_lab_inc_`year'_THIRTY) rows(2) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Top and bottom percentiles trimmed. Individual observations of benefit unit amount plotted", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_bu_dist_`year'_18_65_2.png", /// + replace width(2400) height(1350) + +} + +graph drop _all + + +******************************************************************************** +* 2.1 : Histograms - working age, individual +******************************************************************************** + +* Prepare validation data +use year demAge dwt labC4 valid_yEmpPersGrossLevelYear /// + valid_labHrsWorkEnumWeek using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == 1 +drop labC4 + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yEmpPersGrossLevelYear, d + + replace valid_yEmpPersGrossLevelYear = . if /// + valid_yEmpPersGrossLevelYear < r(p1) | /// + valid_yEmpPersGrossLevelYear > r(p99) + +} + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_yEmpPersGrossLevelYear if year == `year', /// + width(2500) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + + foreach ls in $ls_cat { + + twoway__histogram_gen valid_yEmpPersGrossLevelYear if /// + year == `year' & valid_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year'_`ls' = r(max) + + drop d_valid v2 + + } + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare SimPaths data +use run year demAge labC4 sim_yEmpPersGrossLevelYear sim_labHrsWorkEnumWeek /// + using "$dir_data/simulation_sample.dta", clear + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" +drop labC4 + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yEmpPersGrossLevelYear, d + + replace sim_yEmpPersGrossLevelYear = . if /// + sim_yEmpPersGrossLevelYear < r(p1) | sim_yEmpPersGrossLevelYear > r(p99) + + } + +keep if run == 1 + +append using "$dir_data/temp_valid_stats.dta" + +* Plot sub-figures +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yEmpPersGrossLevelYear if year == `year', /// + width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + + * Plot all hours + twoway (hist sim_yEmpPersGrossLevelYear if year == `year' , width(2500) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yEmpPersGrossLevelYear if year == `year' , width(2500) /// + color(red%30) legend(label(2 "UKHLS"))) , /// + subtitle("ALL hours") /// + name(gross_lab_inc_`year'_all, replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y',labsize(vsmall)) /// + graphregion(color(white)) + + + drop d_sim v1 max_d_sim max_value + + * Plot by weekly hours work + foreach ls in $ls_cat_labour { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yEmpPersGrossLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year'_`ls' if /// + max_d_valid_`year'_`ls' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + * Plot by weekly hours work + twoway (hist sim_yEmpPersGrossLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) color(green%30) /// + legend(label(1 "SimPaths"))) /// + (hist valid_yEmpPersGrossLevelYear if year == `year' & /// + valid_labHrsWorkEnumWeek == "`ls'", width(2500) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + subtitle("`ls' hours") /// + name(gross_lab_inc_`year'_`ls', replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y',labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + } +} + +* Combine plots by year +forvalues year = `min_year'/`max_year' { + + grc1leg gross_lab_inc_`year'_all /// + gross_lab_inc_`year'_TEN /// + gross_lab_inc_`year'_TWENTY, /// + title("Individual Gross Labour Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(gross_lab_inc_`year'_TEN) rows(1) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Employed 18-65 years olds included in the sample. Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_labour_income/validation_${country}_ind_gross_labour_income_dist_`year'_both_1.png", /// + replace width(2400) height(1350) + + + grc1leg /// + gross_lab_inc_`year'_THIRTY /// + gross_lab_inc_`year'_THIRTY_EIGHT /// + gross_lab_inc_`year'_FORTY_FIVE /// + gross_lab_inc_`year'_FIFTY_FIVE, /// + title("Individual Gross Labour Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(gross_lab_inc_`year'_THIRTY) rows(2) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Employed 18-65 years olds included in the sample. Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export "$dir_output_files/income/gross_labour_income/validation_${country}_ind_gross_labour_income_dist_`year'_both_2.png", /// + replace width(2400) height(1350) + +} + +graph drop _all + diff --git a/validation/02_simulated_output_validation/do_files/04_05_plot_capital_income.do b/validation/02_simulated_output_validation/do_files/04_05_plot_capital_income.do new file mode 100644 index 000000000..a4a4f10d5 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_05_plot_capital_income.do @@ -0,0 +1,373 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Capital income +* AUTHORS: Ashley Burdett +* LAST UPDATE: 9/25 (AB) +* COUNTRY: UK +******************************************************************************** +* NOTES: This do file plots simulated and UKHLS capital income, +* per benefit unit + +******************************************************************************** + +******************************************************************************** +* 1 : Time series +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean through time, benefit unit +******************************************************************************** + +* Prepare validation data +use year idPers idBu demAge dwt valid_yCapitalBuLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge >= 16 + +* Keep one observatioon per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +/* +if "$trim_outliers" == "true" { + + sum valid_yCapitalBuLevelYear, d + + replace valid_yCapitalBuLevelYear = . if /// + valid_yCapitalBuLevelYear < r(p1) | /// + valid_yCapitalBuLevelYear > r(p99) + +} +*/ + +collapse (mean) valid_yCapitalBuLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers idBu demAge sim_yCapitalBuLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if demAge >= 16 + +* Keep one observatioon per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +/* +if "$trim_outliers" == "true" { + + sum sim_yCapitalBuLevelYear, d + + replace sim_yCapitalBuLevelYear = . if /// + sim_yCapitalBuLevelYear < r(p1) | sim_yCapitalBuLevelYear > r(p99) + +} +*/ + +collapse (mean) sim_yCapitalBuLevelYear, by(run year) + +collapse (mean) sim_yCapitalBuLevelYear /// + (sd) sim_yCapitalBuLevelYear_sd = sim_yCapitalBuLevelYear /// + , by(year) + +foreach varname in sim_yCapitalBuLevelYear { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_yCapitalBuLevelYear_high sim_yCapitalBuLevelYear_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_yCapitalBuLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Benefit Unit Capital income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Series represents average benefit unit capital income per year. Amounts in 2015 prices.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/income/capital_income/validation_${country}_capital_income_bu_ts.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.2 : Share with no capital income, benefit unit +******************************************************************************** + +* Share with no capital income +* Prepare validation data +use year idPers idBu demAge dwt valid_yCapitalBuLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge >= 16 + +* Keep one observatioon per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +/* +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yCapitalBuLevelYear, d + + replace valid_yCapitalBuLevelYear = . if /// + valid_yCapitalBuLevelYear < r(p1) | /// + valid_yCapitalBuLevelYear > r(p99) + +} +*/ + +gen valid_no_capital = (valid_yCapitalBuLevelYear == 0) + +collapse (mean) valid_no_capital [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers idBu demAge sim_yCapitalBuLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if demAge >= 16 + +* Keep one observatioon per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +/* +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yCapitalBuLevelYear, d + + replace sim_yCapitalBuLevelYear = . if /// + sim_yCapitalBuLevelYear < r(p1) | sim_yCapitalBuLevelYear > r(p99) + +} +*/ + +gen sim_no_capital = (sim_yCapitalBuLevelYear == 0) + +collapse (mean) sim_no_capital, by(run year) + +collapse (mean) sim_no_capital /// + (sd) sim_no_capital_sd = sim_no_capital /// + , by(year) + +foreach varname in sim_no_capital { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_no_capital_high sim_no_capital_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_no_capital year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("No Capital Income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("Share of benefit units", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/income/capital_income/validation_${country}_no_capital_income_bu_ts.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 2 : Histograms +******************************************************************************** + + +******************************************************************************** +* 2.1 : Benefit unit by year, +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yCapitalBuLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yCapitalBuLevelYear, d + + replace valid_yCapitalBuLevelYear = . if /// + valid_yCapitalBuLevelYear < r(p1) | /// + valid_yCapitalBuLevelYear > r(p99) + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu demAge sim_yCapitalBuLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yCapitalBuLevelYear, d + + replace sim_yCapitalBuLevelYear = . if /// + sim_yCapitalBuLevelYear < r(p1) | sim_yCapitalBuLevelYear > r(p99) + +} + +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway (hist sim_yCapitalBuLevelYear if year == `year', width(250) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yCapitalBuLevelYear if year == `year', /// + width(250) color(red%30) legend(label(2 "UKHLS"))) , /// + title("Benefit Unit Capital Income") /// + subtitle("`year'") /// + name(capital_inc_`year'_all, replace) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/capital_income/validation_${country}_capital_income_bu_dist_`year'.png", /// + replace width(2560) height(1440) + +} + +******************************************************************************** +* 2.2 : Positive amounts only, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yCapitalBuLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge >= 16 + +* Keep one observatioon per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers + +if "$trim_outliers" == "true" { + + sum valid_yCapitalBuLevelYear, d + + replace valid_yCapitalBuLevelYear = . if /// + valid_yCapitalBuLevelYear < r(p1) | /// + valid_yCapitalBuLevelYear > r(p99) + +} + + +drop if valid_yCapitalBuLevelYear == 0 + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu demAge sim_yCapitalBuLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Keep one observatioon per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers + +if "$trim_outliers" == "true" { + + sum sim_yCapitalBuLevelYear, d + + replace sim_yCapitalBuLevelYear = . if /// + sim_yCapitalBuLevelYear < r(p1) | sim_yCapitalBuLevelYear > r(p99) + +} + +drop if sim_yCapitalBuLevelYear == 0 + +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway (hist sim_yCapitalBuLevelYear if year == `year', /// + width(500) color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yCapitalBuLevelYear if year == `year', width(500) /// + color(red%30) legend(label(2 "UKHLS"))) , /// + title("Benefit Unit Capital Income") /// + subtitle("Positive amounts, `year'") /// + name(capital_inc_`year'_all, replace) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Top and bottom percentailes trimmed. ", /// + size(vsmall)) + + graph export /// +"$dir_output_files/income/capital_income/validation_${country}_positive_capital_income_bu_dist_`year'.png", /// + replace width(2560) height(1440) + +} + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/04_06_plot_pension_income.do b/validation/02_simulated_output_validation/do_files/04_06_plot_pension_income.do new file mode 100644 index 000000000..1c544b5d1 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_06_plot_pension_income.do @@ -0,0 +1,373 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Pension income +* AUTHORS: Ashley Burdett +* LAST UPDATE: 9/25 (AB) +* COUNTRY: UK + +* NOTES: This do file plots simulated and UKHLS private penson +* income, per benefit unit + +******************************************************************************** + +******************************************************************************** +* 1 : Time series +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean through time, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yPensBuGrossLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge < 65 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yPensBuGrossLevelYear, d + + replace valid_yPensBuGrossLevelYear = . if /// + valid_yPensBuGrossLevelYear < r(p1) | /// + valid_yPensBuGrossLevelYear > r(p99) + +} + +collapse (mean) valid_yPensBuGrossLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu sim_yPensBuGrossLevelYear demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Sample selection +drop if demAge < 65 + +* Keep one observatioon per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yPensBuGrossLevelYear, d + + replace sim_yPensBuGrossLevelYear = . if /// + sim_yPensBuGrossLevelYear < r(p1) | sim_yPensBuGrossLevelYear > r(p99) + +} + +collapse (mean) sim_yPensBuGrossLevelYear, by(run year) + +collapse (mean) sim_yPensBuGrossLevelYear /// + (sd) sim_yPensBuGrossLevelYear_sd = sim_yPensBuGrossLevelYear /// + , by(year) + +foreach varname in sim_yPensBuGrossLevelYear { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_yPensBuGrossLevelYear_high sim_yPensBuGrossLevelYear_low /// + year, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_yPensBuGrossLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Benefit Unit Private Pension Income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Series represents average benefit unit private pension income. Amounts in 2015 prices. Top and bottom" "percentiles trimmed. Those 65+ maintained in sample.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/income/pension_income/validation_${country}_pension_income_bu_ts_65plus.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.2 : Share with no pension income, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yPensBuGrossLevelYear demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Sample selection +drop if demAge < 65 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yPensBuGrossLevelYear, d + + replace valid_yPensBuGrossLevelYear = . if /// + valid_yPensBuGrossLevelYear < r(p1) | /// + valid_yPensBuGrossLevelYear > r(p99) + +} + +gen valid_no_pension = (valid_yPensBuGrossLevelYear == 0) + +collapse (mean) valid_no_pension [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu sim_yPensBuGrossLevelYear demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Sample selection +drop if demAge < 65 + +* Keep one observatioon per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yPensBuGrossLevelYear, d + + replace sim_yPensBuGrossLevelYear = . if /// + sim_yPensBuGrossLevelYear < r(p1) | sim_yPensBuGrossLevelYear > r(p99) + +} + +gen sim_no_pension = (sim_yPensBuGrossLevelYear == 0) + +collapse (mean) sim_no_pension, by(run year) + +collapse (mean) sim_no_pension /// + (sd) sim_no_pension_sd = sim_no_pension /// + , by(year) + +foreach varname in sim_no_pension { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_no_pension_high sim_no_pension_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_no_pension year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("No Private Pension Income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("Share of benefit units", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Share of benefit unit units with individual 65+ with no private pension income. Top and bottom percentiles trimmed.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/income/pension_income/validation_${country}_no_pension_income_bu_ts_65plus.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 2 : Histograms +******************************************************************************** + +******************************************************************************** +* 2.1 : 65+, by year, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yPensBuGrossLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Sample selection +drop if demAge < 65 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yPensBuGrossLevelYear, d + + replace valid_yPensBuGrossLevelYear = . if /// + valid_yPensBuGrossLevelYear < r(p1) | /// + valid_yPensBuGrossLevelYear > r(p99) + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu sim_yPensBuGrossLevelYear demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Sample selection +drop if demAge < 65 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yPensBuGrossLevelYear, d + + replace sim_yPensBuGrossLevelYear = . if /// + sim_yPensBuGrossLevelYear < r(p1) | sim_yPensBuGrossLevelYear > r(p99) + +} + +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway (hist sim_yPensBuGrossLevelYear if year == `year', width(1000) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yPensBuGrossLevelYear if year == `year', /// + width(1000) color(red%30) legend(label(2 "UKHLS"))) , /// + title("Benefit Unit Private Pension Income") /// + subtitle("`year'") /// + name(capital_inc_`year'_all, replace) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Sample includes benefit units with individuals age 65+. Amounts in GBP per year, 2015 prices. Top and bottom" "percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/pension_income/validation_${country}_pension_income_bu_dist_`year'.png", /// + replace width(2560) height(1440) + +} + + +******************************************************************************** +* 2.2 : Ages 65+, positive amounts only, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yPensBuGrossLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +drop if demAge < 65 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers + +if "$trim_outliers" == "true" { + + sum valid_yPensBuGrossLevelYear, d + + replace valid_yPensBuGrossLevelYear = . if /// + valid_yPensBuGrossLevelYear < r(p1) | /// + valid_yPensBuGrossLevelYear > r(p99) + +} +*/ + +drop if valid_yPensBuGrossLevelYear == 0 + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu sim_yPensBuGrossLevelYear demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +drop if demAge < 65 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers + +if "$trim_outliers" == "true" { + + sum sim_yPensBuGrossLevelYear, d + + replace sim_yPensBuGrossLevelYear = . if /// + sim_yPensBuGrossLevelYear < r(p1) | sim_yPensBuGrossLevelYear > r(p99) + +} +*/ + +drop if sim_yPensBuGrossLevelYear == 0 + +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway (hist sim_yPensBuGrossLevelYear if year == `year', /// + width(1000) color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yPensBuGrossLevelYear if year == `year', width(1000) /// + color(red%30) legend(label(2 "UKHLS"))) , /// + title("Benefit Unit Private Pension Income") /// + subtitle("Positive amounts, `year'") /// + name(capital_inc_`year'_all, replace) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Sample includes benefit units with individuals age 65+. Amounts in GBP per year, 2015 prices. Top and bottom" "percentiles trimmed.", /// + size(vsmall)) + + graph export /// +"$dir_output_files/income/pension_income/validation_${country}_positive_pension_income_bu_dist_`year'.png", /// + replace width(2560) height(1440) + +} + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/04_07_plot_disposable_income.do b/validation/02_simulated_output_validation/do_files/04_07_plot_disposable_income.do new file mode 100644 index 000000000..8e23f903f --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_07_plot_disposable_income.do @@ -0,0 +1,379 @@ +******************************************************************************** +* PROJECT: SimPath UK +* SECTION: Validation +* OBJECT: Disposable income +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: This do file plots simulated and UKHLS disposable income, +* per benefit unit. Individual level data plotted. +******************************************************************************** + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yDispBuLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuLevelYear, d + + replace valid_yDispBuLevelYear = . if /// + valid_yDispBuLevelYear < r(p1) | valid_yDispBuLevelYear > r(p99) + +} + +collapse (mean) valid_yDispBuLevelYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run idBu year demAge sim_yDispBuLevelYear using /// + "$dir_data/simulation_sample.dta", clear + +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispBuLevelYear, d + + replace sim_yDispBuLevelYear = . if /// + sim_yDispBuLevelYear < r(p1) | sim_yDispBuLevelYear > r(p99) + +} + +collapse (mean) sim_yDispBuLevelYear, by(run year) + +collapse (mean) sim_yDispBuLevelYear /// + (sd) sim_yDispBuLevelYear_sd = sim_yDispBuLevelYear /// + , by(year) + +foreach varname in sim_yDispBuLevelYear { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_yDispBuLevelYear_high sim_yDispBuLevelYear_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_yDispBuLevelYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Benefit Unit Disposable Income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + +graph export /// +"$dir_output_files/income/disposable_income/validation_${country}_disposable_income_bu_ts.jpg", /// + replace + + +******************************************************************************** +* 2 : Histograms +******************************************************************************** + +******************************************************************************** +* 2.1 : By year, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yDispBuLevelYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Sample selection +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuLevelYear, d + + replace valid_yDispBuLevelYear = . if /// + valid_yDispBuLevelYear < r(p1) | /// + valid_yDispBuLevelYear > r(p99) + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu sim_yDispBuLevelYear demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Sample selection +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispBuLevelYear, d + + replace sim_yDispBuLevelYear = . if /// + sim_yDispBuLevelYear < r(p1) | sim_yDispBuLevelYear > r(p99) + +} + +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway (hist sim_yDispBuLevelYear if year == `year', width(2000) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yDispBuLevelYear if year == `year', /// + width(2000) color(red%30) legend(label(2 "UKHLS"))) , /// + title("Benefit Unit Disposable Income") /// + subtitle("`year'") /// + name(disp_inc_`year'_all, replace) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/disposable_income/validation_${country}_disposable_income_bu_dist_`year'.png", /// + replace width(2560) height(1440) + +} + +graph drop _all + + +******************************************************************************** +* 2.2 : Histograms - Benefit unit, ages 18-65, by year, by hours of work +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_yDispBuLevelYear valid_labHrsWorkEnumWeek /// + using "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuLevelYear, d + + replace valid_yDispBuLevelYear = . if /// + valid_yDispBuLevelYear < r(p1) | valid_yDispBuLevelYear > r(p99) + +} + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_yDispBuLevelYear if year == `year' , /// + width(2500) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + + foreach ls in $ls_cat { + + twoway__histogram_gen valid_yDispBuLevelYear if /// + year == `year' & valid_labHrsWorkEnumWeek == "`ls'", width(2500) /// + den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year'_`ls' = r(max) + + drop d_valid v2 + + } +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year sim_yDispBuLevelYear sim_labHrsWorkEnumWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispBuLevelYear, d + + replace sim_yDispBuLevelYear = . if /// + sim_yDispBuLevelYear < r(p1) | sim_yDispBuLevelYear > r(p99) + +} + +keep if run == 1 + +append using "$dir_data/temp_valid_stats.dta" + +* Plot sub-figures +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yDispBuLevelYear if year == `year', width(2500) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + * Plot all hours + twoway (hist sim_yDispBuLevelYear if year == `year', width(2500) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yDispBuLevelYear if year == `year', width(2500) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + subtitle("ALL hours") /// + name(disp_inc_`year'_all, replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + foreach ls in $ls_cat { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yDispBuLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year'_`ls' if /// + max_d_valid_`year'_`ls' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + * Plot by weekly hours work + twoway (hist sim_yDispBuLevelYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2500) color(green%30) /// + legend(label(1 "SimPaths"))) /// + (hist valid_yDispBuLevelYear if year == `year' & /// + valid_labHrsWorkEnumWeek == "`ls'", width(2500) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + subtitle("`ls' hours") /// + name(disp_inc_`year'_`ls', replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + } +} + +* Combine plots by year +qui sum year +local min_year = 2011 +local max_year = 2023 + +forvalues year = `min_year'/`max_year' { + + grc1leg disp_inc_`year'_all /// + disp_inc_`year'_ZERO /// + disp_inc_`year'_TEN , /// + title("Benefit Unit Disposable Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(disp_inc_`year'_ZERO) rows(1) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Indiviudal level data of benefit level amount plotted." "Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/disposable_income/validation_${country}_disposable_income_bu_dist_`year'_hrs_work_1.png", /// + replace width(2400) height(1350) + + grc1leg /// + disp_inc_`year'_TWENTY /// + disp_inc_`year'_THIRTY /// + disp_inc_`year'_THIRTY_EIGHT /// + disp_inc_`year'_FORTY_FIVE /// + disp_inc_`year'_FIFTY_FIVE, /// + title("Disposable Income by Weekly Hours of Work") /// + subtitle("`year'") /// + legendfrom(disp_inc_`year'_TWENTY) rows(2) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Indiviudal level data of benefit level amount plotted." "Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/disposable_income/validation_${country}_disposable_income_bu_dist_`year'_hrs_work_2.png", /// + replace width(2400) height(1350) + +} + + +graph drop _all + diff --git a/validation/02_simulated_output_validation/do_files/04_08_plot_equivalised_disposable_income.do b/validation/02_simulated_output_validation/do_files/04_08_plot_equivalised_disposable_income.do new file mode 100644 index 000000000..62c139ec9 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_08_plot_equivalised_disposable_income.do @@ -0,0 +1,369 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Equivalised disposable income +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: This do file plots simulated and UKHLS equivalised +* disposable income, per benefit unit +******************************************************************************** + +******************************************************************************** +* 1 : Mean values over time, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yDispBuEquivYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuEquivYear, d + + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | valid_yDispBuEquivYear > r(p99) + +} + +collapse (mean) valid_yDispBuEquivYear [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run idBu year demAge sim_yDispEquivYear using /// + "$dir_data/simulation_sample.dta", clear + +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispEquivYear, d + + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) + +} + +collapse (mean) sim_yDispEquivYear, by(run year) + +collapse (mean) sim_yDispEquivYear /// + (sd) sim_yDispEquivYear_sd = sim_yDispEquivYear, by(year) + +foreach varname in sim_yDispEquivYear { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_yDispEquivYear_high sim_yDispEquivYear_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_yDispBuEquivYear year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Benefit Unit Equivalised Disposable Income") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("GBP per year", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Equivalised disposable income computed by the modified OECD scale. Top and bottom percentiles trimmed. Amounts" "annual, in 2015 prices.", /// + size(vsmall)) + +graph export /// +"$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_income_bu_ts.jpg", /// + replace width(2400) height(1350) + + +******************************************************************************** +* 2 : Histograms +******************************************************************************** + +******************************************************************************** +* 2.1 : By year, benefit unit +******************************************************************************** + +* Prepare validation data +use year idBu demAge dwt valid_yDispBuEquivYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Sample selection +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuEquivYear, d + + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | /// + valid_yDispBuEquivYear > r(p99) + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idBu sim_yDispEquivYear demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Sample selection +drop if demAge < 16 + +* Keep one observation per benefit unit +bysort run year idBu: gen first_person = (_n == 1) +keep if first_person == 1 + + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispEquivYear, d + + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) + +} + +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway (hist sim_yDispEquivYear if year == `year', width(2000) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yDispBuEquivYear if year == `year', /// + width(2000) color(red%30) legend(label(2 "UKHLS"))) , /// + title("Benefit Unit Equivalised Disposable Income") /// + subtitle("`year'") /// + name(disp_inc_`year'_all, replace) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Amounts in GBP per year, 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_income_bu_dist_`year'.png", /// + replace width(2560) height(1440) + +} + +graph drop _all + +******************************************************************************** +* 2 : Histograms by year, and by category of weekly labour supply, ben unit +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_yDispBuEquivYear valid_labHrsWorkEnumWeek using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuEquivYear, d + + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | valid_yDispBuEquivYear > r(p99) + +} + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_yDispBuEquivYear if year == `year' , /// + width(2000) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + + foreach ls in $ls_cat { + + twoway__histogram_gen valid_yDispBuEquivYear if /// + year == `year' & valid_labHrsWorkEnumWeek == "`ls'", width(2000) /// + den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year'_`ls' = r(max) + + drop d_valid v2 + + } +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge sim_yDispEquivYear sim_labHrsWorkEnumWeek using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispEquivYear, d + + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) + +} + +keep if run == 1 + +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yDispEquivYear if year == `year', width(2000) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + * Plot all hours + twoway (hist sim_yDispEquivYear if year == `year', width(2000) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yDispBuEquivYear if year == `year', width(2000) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + subtitle("ALL hours") /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + name(eqdisp_inc_`year'_all, replace) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + foreach ls in $ls_cat { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_yDispEquivYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2000) den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year'_`ls' if /// + max_d_valid_`year'_`ls' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_yDispEquivYear if year == `year' & /// + sim_labHrsWorkEnumWeek == "`ls'", width(2000) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_yDispBuEquivYear if year == `year' & /// + valid_labHrsWorkEnumWeek == "`ls'", width(2000) color(red%30) /// + legend(label(2 "UKHLS"))) , /// + subtitle("`ls' hours") /// + name(eqdisp_inc_`year'_`ls', replace) /// + xtitle("GBP", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(vsmall) angle(forty_five)) /// + ylabel(0(`steps')`max_y', labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + drop d_sim v1 max_d_sim max_value + + } +} + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forvalues year = `min_year'/`max_year' { + + grc1leg eqdisp_inc_`year'_all /// + eqdisp_inc_`year'_ZERO /// + eqdisp_inc_`year'_TEN , /// + title("Equivalised Disposable Income") /// + subtitle("`year'") /// + legendfrom(eqdisp_inc_`year'_all) rows(1) /// + graphregion(color(white)) /// + note("Notes: Distribution of benefit unit equivalised disposable income. Individual level data plotted 18-65 year olds included in sample. Amounts in" "GBP per year, 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_inc_dist_`year'_1.png", /// + replace width(2560) height(1440) + + + grc1leg /// + eqdisp_inc_`year'_TWENTY /// + eqdisp_inc_`year'_THIRTY /// + eqdisp_inc_`year'_THIRTY_EIGHT /// + eqdisp_inc_`year'_FORTY_FIVE /// + eqdisp_inc_`year'_FIFTY_FIVE, /// + title("Equivalised Disposable Income") /// + subtitle("`year'") /// + legendfrom(eqdisp_inc_`year'_TWENTY) rows(2) /// + graphregion(color(white)) /// + note("Notes: Distribution of benefit unit equivalised disposable income. Individual level data plotted 18-65 year olds included in sample. Amounts in" "GBP per year, 2015 prices. Top and bottom percentiles trimmed.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_inc_dist_`year'_2.png", /// + replace width(2560) height(1440) + +} + +graph drop _all + diff --git a/validation/02_simulated_output_validation/do_files/06_09_plot_hourly_wages.do b/validation/02_simulated_output_validation/do_files/04_09_plot_hourly_wages.do similarity index 52% rename from validation/02_simulated_output_validation/do_files/06_09_plot_hourly_wages.do rename to validation/02_simulated_output_validation/do_files/04_09_plot_hourly_wages.do index a4fa84b1f..4393322a9 100644 --- a/validation/02_simulated_output_validation/do_files/06_09_plot_hourly_wages.do +++ b/validation/02_simulated_output_validation/do_files/04_09_plot_hourly_wages.do @@ -1,11 +1,11 @@ ******************************************************************************** -* PROJECT: SimPaths +* PROJECT: SimPaths UK * SECTION: Validation * OBJECT: Hourly wages * AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 +* LAST UPDATE: Jan 2026 * COUNTRY: UK - +******************************************************************************** * NOTES: This master do file organises do files used for validating * SimPaths model using UKHLS data. ******************************************************************************** @@ -15,63 +15,61 @@ ******************************************************************************** ******************************************************************************** -* 1.1 : Mean values over time - 18-65 +* 1.1 : Mean values over time - 16-65 ******************************************************************************** * Prepare validation data -use year dwt les_c4 valid_wage_hour lhw_flag using /// +use year demAge dwt labC4 valid_wage using /// "$dir_data/ukhls_validation_sample.dta", clear -* Select sample -keep if les_c4 == 1 - -drop if lhw_flag == 1 +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) * Trim outliers if "$trim_outliers" == "true" { - sum valid_wage_hour, d + sum valid_wage, d - replace valid_wage_hour = . if /// - valid_wage_hour < r(p1) | valid_wage_hour > r(p99) + replace valid_wage = . if /// + valid_wage < r(p1) | valid_wage > r(p99) } -* Drop very low wages -drop if valid_wage_hour < 3 - * Compute means -collapse (mean) valid_wage_hour [aw = dwt], by(year) +collapse (mean) valid_wage [aw = dwt], by(year) save "$dir_data/temp_valid_stats.dta", replace * Prepare simulated data -use run year les_c4 potential_earnings_hourly using /// - "$dir_data/simulated_data.dta", clear +use run year labC4 sim_pred_wage demAge using /// + "$dir_data/simulation_sample.dta", clear * Select sample -keep if les_c4 == "EmployedOrSelfEmployed" +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) * Trim outliers if "$trim_outliers" == "true" { - sum potential_earnings_hourly, d + sum sim_pred_wage, d - replace potential_earnings_hourly = . if /// - potential_earnings_hourly < r(p1) | potential_earnings_hourly > r(p99) + replace sim_pred_wage = . if /// + sim_pred_wage < r(p1) | sim_pred_wage > r(p99) } * Compute means and sd -collapse (mean) potential_earnings_hourly, by(run year) -collapse (mean) potential_earnings_hourly /// - (sd) potential_earnings_hourly_sd = potential_earnings_hourly /// +collapse (mean) sim_pred_wage, by(run year) + +collapse (mean) sim_pred_wage /// + (sd) sim_pred_wage_sd = sim_pred_wage /// , by(year) * Approx 95% confidence interval -foreach varname in potential_earnings_hourly { +foreach varname in sim_pred_wage { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd @@ -82,13 +80,13 @@ foreach varname in potential_earnings_hourly { merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea potential_earnings_hourly_high /// - potential_earnings_hourly_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_wage_hour year, sort color(green) /// +twoway (rarea sim_pred_wage_high /// + sim_pred_wage_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_wage year, sort color(green) /// legend(label(2 "UKHLS"))), /// title("Hourly Wage") /// - subtitle("Ages 18-65") /// + subtitle("Ages 16-65") /// xtitle("Year", size(small)) /// ytitle("GBP per hour", size(small)) /// ylabel(,labsize(small)) /// @@ -100,82 +98,76 @@ twoway (rarea potential_earnings_hourly_high /// * Save figure graph export /// -"$dir_output_files/wages/validation_${country}_wages_ts_${min_age}_${max_age}_both.jpg", /// +"$dir_output_files/wages/validation_${country}_wages_ts_16_65_both.jpg", /// replace width(2560) height(1440) quality(100) ******************************************************************************** -* 1.2 : Mean values over time - 18-65, by gender +* 1.2 : Mean values over time - 16-65, by gender ******************************************************************************** * Prepare validation data -use year dwt les_c4 valid_wage_hour dgn using /// +use year demAge dwt labC4 valid_wage demMaleFlag using /// "$dir_data/ukhls_validation_sample.dta", clear * select sample -keep if les_c4 == 1 +keep if labC4 == 1 +keep if inrange(demAge,16,65) * Trim outliers if "$trim_outliers" == "true" { - sum valid_wage_hour, d + sum valid_wage, d - replace valid_wage_hour = . if /// - valid_wage_hour < r(p1) | valid_wage_hour > r(p99) + replace valid_wage = . if /// + valid_wage < r(p1) | valid_wage > r(p99) } -* Drop very low wages -drop if valid_wage_hour < 3 - * Compute mean -collapse (mean) valid_wage_hour [aw = dwt], by(year dgn) +collapse (mean) valid_wage [aw = dwt], by(year demMaleFlag) save "$dir_data/temp_valid_stats.dta", replace * Prepare simulated data -use run year les_c4 potential_earnings_hourly dgn using /// - "$dir_data/simulated_data.dta", clear +use run year labC4 sim_pred_wage demMaleFlag demAge using /// + "$dir_data/simulation_sample.dta", clear * Select sample -keep if les_c4 == "EmployedOrSelfEmployed" - -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" - -drop dgn -rename dgn2 dgn +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) * Trim outliers if "$trim_outliers" == "true" { - sum potential_earnings_hourly , d + sum sim_pred_wage , d - replace potential_earnings_hourly = . if /// - potential_earnings_hourly < r(p1) | potential_earnings_hourly > r(p99) + replace sim_pred_wage = . if /// + sim_pred_wage < r(p1) | sim_pred_wage > r(p99) } -collapse (mean) potential_earnings_hourly, by(run year dgn) -collapse (mean) potential_earnings_hourly /// - (sd) potential_earnings_hourly_sd = potential_earnings_hourly /// - , by(year dgn) +collapse (mean) sim_pred_wage, by(run year demMaleFlag) + +collapse (mean) sim_pred_wage /// + (sd) sim_pred_wage_sd = sim_pred_wage /// + , by(year demMaleFlag) -foreach varname in potential_earnings_hourly { +foreach varname in sim_pred_wage { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd } -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea potential_earnings_hourly_high /// - potential_earnings_hourly_low year if dgn == 0, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_wage_hour year if dgn == 0, sort color(green) /// +twoway (rarea sim_pred_wage_high /// + sim_pred_wage_low year if demMaleFlag == 0, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_wage year if demMaleFlag == 0, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Females") /// name(wages_female, replace) /// @@ -186,10 +178,10 @@ twoway (rarea potential_earnings_hourly_high /// legend(size(small)) /// graphregion(color(white)) -twoway (rarea potential_earnings_hourly_high /// - potential_earnings_hourly_low year if dgn == 1, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_wage_hour year if dgn == 1, sort color(green) /// +twoway (rarea sim_pred_wage_high /// + sim_pred_wage_low year if demMaleFlag == 1, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_wage year if demMaleFlag == 1, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Males") /// name(wages_male, replace) /// @@ -203,57 +195,58 @@ twoway (rarea potential_earnings_hourly_high /// grc1leg wages_female wages_male, /// title("Hourly Wage") /// - subtitle("Ages 18-65") /// + subtitle("Ages 16-65") /// legendfrom(wages_female) rows(1) /// graphregion(color(white)) /// - note("Notes: Statistics calculated on sample of employed anf self-employed individuals. Amounts in 2015 prices. Top and bottom percentiles trimmed.", /// + ycomm /// + note("Notes: Statistics calculated on sample of employed anf self-employed individuals. Amounts in 2015 prices. Top and bottom" "percentiles trimmed.", /// size(vsmall)) * Save figure graph export /// -"$dir_output_files/wages/validation_${country}_wages_ts_${min_age}_${max_age}_gender.jpg", /// +"$dir_output_files/wages/validation_${country}_wages_ts_16_65_gender.jpg", /// replace width(2560) height(1440) quality(100) +graph drop _all + ******************************************************************************** * 2 : Histograms by year ******************************************************************************** ******************************************************************************** -* 2.1 : Histograms by year - ages 18-65 +* 2.1 : Histograms by year - ages 16-65 ******************************************************************************** * Prepare validation data -use year dwt les_c4 valid_wage_hour using /// - "$dir_data/ukhls_validation_sample.dta", clear +use year demAge dwt labC4 valid_wage demAge flag_wage_imp_panel /// + flag_wage_hotdeck using "$dir_data/ukhls_validation_sample.dta", clear * Select sample -keep if les_c4 == 1 +keep if labC4 == 1 +keep if inrange(demAge,16,65) -drop les_c4 +drop labC4 * Trim outliers if "$trim_outliers" == "true" { - sum valid_wage_hour, d + sum valid_wage, d - replace valid_wage_hour = . if /// - valid_wage_hour < r(p1) | valid_wage_hour > r(p99) + replace valid_wage = . if /// + valid_wage < r(p1) | valid_wage > r(p99) } -* Drop very low wages -drop if valid_wage_hour < 3 - * Prepare info needed for dynamic y axis labels qui sum year local min_year = 2011 -local max_year = r(max) +local max_year = 2023 forval year = `min_year'/`max_year' { - twoway__histogram_gen valid_wage_hour if year == `year' , /// - bin(60) den gen(d_valid v2) + twoway__histogram_gen valid_wage if year == `year' , /// + bin(10) den gen(d_valid v2) qui sum d_valid gen max_d_valid_`year' = r(max) @@ -266,21 +259,22 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare simulated data -use run year les_c4 potential_earnings_hourly using /// - "$dir_data/simulated_data.dta", clear +use run year labC4 sim_pred_wage demAge using /// + "$dir_data/simulation_sample.dta", clear -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) -drop les_c4 +drop labC4 * Trim outliers if "$trim_outliers" == "true" { - sum potential_earnings_hourly, d + sum sim_pred_wage, d - replace potential_earnings_hourly = . if /// - potential_earnings_hourly < r(p1) | potential_earnings_hourly > r(p99) + replace sim_pred_wage = . if /// + sim_pred_wage < r(p1) | sim_pred_wage > r(p99) } @@ -288,13 +282,13 @@ append using "$dir_data/temp_valid_stats.dta" qui sum year local min_year = 2011 -local max_year = r(max) +local max_year = 2023 forval year = `min_year'/`max_year' { * Prepare info needed for dynamic y axis labels - twoway__histogram_gen potential_earnings_hourly if year == `year', /// - bin(60) den gen(d_sim v1) + twoway__histogram_gen sim_pred_wage if year == `year', /// + bin(5) den gen(d_sim v1) qui sum d_sim gen max_d_sim = r(max) @@ -307,9 +301,9 @@ forval year = `min_year'/`max_year' { local steps = `max_y'/2 * Plot all hours - twoway (hist potential_earnings_hourly if year == `year', /// - width(0.5) color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_wage_hour if year == `year', width(0.5) color(red%30) /// + twoway (hist sim_pred_wage if year == `year', /// + width(1) color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_wage if year == `year', width(1) color(red%30) /// legend(label(2 "UKHLS"))) , /// title("Hourly Wage") /// subtitle("`year'") /// @@ -317,10 +311,10 @@ forval year = `min_year'/`max_year' { xtitle("GBP per hour", size(small)) /// ytitle("Density", size(small)) /// xlabel(,labsize(small)) /// - ylabel(0(`steps')`max_y', labsize(small)) /// + ylabel(, labsize(small)) /// legend(size(small)) /// graphregion(color(white)) /// - note("Notes: Statistics calculated on subsample of employed and self-employed individuals aged 18-65. Amounts in 2015 prices.""Top and bottom percentiles trimmed.", size(vsmall)) + note("Notes: Statistics calculated on subsample of employed and self-employed individuals aged 16-65. Amounts in 2015 prices.""Top percentiles and bottom percentiles trimmed.", size(vsmall)) graph export /// "$dir_output_files/wages/validation_${country}_wages_dist_`year'.png", /// @@ -334,45 +328,40 @@ graph drop _all ******************************************************************************** -* 2.2 : Histograms by year - ages 18-65 by gender +* 2.2 : Histograms by year - ages 16-65 by gender ******************************************************************************** * Females * Prepare validation data -use year dwt les_c4 valid_wage_hour dgn hours using /// +use year demAge dwt labC4 valid_wage demMaleFlag demAge using /// "$dir_data/ukhls_validation_sample.dta", clear * Select sample -keep if les_c4 == 1 -keep if dgn == 0 +keep if labC4 == 1 +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) -drop les_c4 dgn +drop labC4 demMaleFlag * Trim outliers if "$trim_outliers" == "true" { - sum valid_wage_hour, d + sum valid_wage, d - replace valid_wage_hour = . if /// - valid_wage_hour < r(p1) | valid_wage_hour > r(p99) + replace valid_wage = . if /// + valid_wage < r(p1) | valid_wage > r(p99) } -* Drop very low wages -drop if valid_wage_hour < 3 - -* Remove those with very high hours of work -//drop if hours > $max_hours - * Prepare info needed for dynamic y axis labels qui sum year local min_year = 2011 -local max_year = r(max) +local max_year = 2023 forval year = `min_year'/`max_year' { - twoway__histogram_gen valid_wage_hour if year == `year' , /// - bin(60) den gen(d_valid v2) + twoway__histogram_gen valid_wage if year == `year' , /// + bin(10) den gen(d_valid v2) qui sum d_valid gen max_d_valid_`year' = r(max) @@ -385,21 +374,23 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare simulated data -use run year les_c4 potential_earnings_hourly dgn using /// - "$dir_data/simulated_data.dta", clear +use run year labC4 sim_pred_wage demMaleFlag demAge using /// + "$dir_data/simulation_sample.dta", clear * Select sample -keep if les_c4 == "EmployedOrSelfEmployed" -keep if dgn == "Female" -drop les_c4 dgn +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) + +drop labC4 demMaleFlag * Trim outliers if "$trim_outliers" == "true" { - sum potential_earnings_hourly, d + sum sim_pred_wage, d - replace potential_earnings_hourly = . if /// - potential_earnings_hourly < r(p1) | potential_earnings_hourly > r(p99) + replace sim_pred_wage = . if /// + sim_pred_wage < r(p1) | sim_pred_wage > r(p99) } @@ -407,13 +398,13 @@ append using "$dir_data/temp_valid_stats.dta" qui sum year local min_year = 2011 -local max_year = r(max) +local max_year = 2023 forval year = `min_year'/`max_year' { * Prepare info needed for dynamic y axis labels - twoway__histogram_gen potential_earnings_hourly if year == `year', /// - bin(60) den gen(d_sim v1) + twoway__histogram_gen sim_pred_wage if year == `year', /// + bin(10) den gen(d_sim v1) qui sum d_sim gen max_d_sim = r(max) @@ -426,9 +417,9 @@ forval year = `min_year'/`max_year' { local steps = `max_y'/2 * Plot all hours - twoway (hist potential_earnings_hourly if year == `year', /// - width(0.5) color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_wage_hour if year == `year', width(0.5) color(red%30) /// + twoway (hist sim_pred_wage if year == `year', /// + width(1) color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_wage if year == `year', width(1) color(red%30) /// legend(label(2 "UKHLS"))) , /// title("Hourly Wage") /// subtitle("`year', females") /// @@ -436,10 +427,9 @@ forval year = `min_year'/`max_year' { xtitle("GBP per hour", size(small)) /// ytitle("Density", size(small)) /// xlabel(,labsize(small)) /// - ylabel(0(`steps')`max_y', labsize(small)) /// legend(size(small)) /// graphregion(color(white)) /// - note("Notes: Statistics calculated on subsample of employed and self-employed individuals aged 18-65. Amounts in 2015 prices.""Top and bottom percentiles trimmed.", size(vsmall)) + note("Notes: Statistics calculated on subsample of employed and self-employed individuals aged 16-65. Amounts in 2015 prices.""Top and bottom percentails trimmed.", size(vsmall)) graph export /// "$dir_output_files/wages/validation_${country}_wages_dist_`year'_female.png", /// @@ -452,41 +442,35 @@ forval year = `min_year'/`max_year' { * Males * Prepare validation data -use year dwt les_c4 valid_wage_hour hours dgn using /// +use year demAge dwt labC4 valid_wage demMaleFlag using /// "$dir_data/ukhls_validation_sample.dta", clear * Select sample -keep if les_c4 == 1 -keep if dgn == 1 +keep if labC4 == 1 +keep if demMaleFlag == 1 +drop if inrange(demAge,16,65) -drop les_c4 dgn +drop labC4 demMaleFlag * Trim outliers if "$trim_outliers" == "true" { - sum valid_wage_hour, d + sum valid_wage, d - replace valid_wage_hour = . if /// - valid_wage_hour < r(p1) | valid_wage_hour > r(p99) + replace valid_wage = . if /// + valid_wage < r(p1) | valid_wage > r(p99) } -* Drop very low wages -drop if valid_wage_hour < 3 - -* Remove those with very high hours of work -//drop if hours > $max_hours - - * Prepare info needed for dynamic y axis labels qui sum year local min_year = 2011 -local max_year = r(max) +local max_year = 2023 forval year = `min_year'/`max_year' { - twoway__histogram_gen valid_wage_hour if year == `year' , /// - bin(60) den gen(d_valid v2) + twoway__histogram_gen valid_wage if year == `year' , /// + bin(10) den gen(d_valid v2) qui sum d_valid gen max_d_valid_`year' = r(max) @@ -499,21 +483,23 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare simulated data -use run year les_c4 potential_earnings_hourly dgn using /// - "$dir_data/simulated_data.dta", clear +use run year labC4 sim_pred_wage demMaleFlag demAge using /// + "$dir_data/simulation_sample.dta", clear -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" -keep if dgn == "Male" -drop les_c4 dgn +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 1 +keep if inrange(demAge,16,65) + +drop labC4 demMaleFlag * Trim outliers if "$trim_outliers" == "true" { - sum potential_earnings_hourly, d + sum sim_pred_wage, d - replace potential_earnings_hourly = . if /// - potential_earnings_hourly < r(p1) | potential_earnings_hourly > r(p99) + replace sim_pred_wage = . if /// + sim_pred_wage < r(p1) | sim_pred_wage > r(p99) } @@ -521,13 +507,13 @@ append using "$dir_data/temp_valid_stats.dta" qui sum year local min_year = 2011 -local max_year = r(max) +local max_year = 2023 forval year = `min_year'/`max_year' { * Prepare info needed for dynamic y axis labels - twoway__histogram_gen potential_earnings_hourly if year == `year', /// - bin(60) den gen(d_sim v1) + twoway__histogram_gen sim_pred_wage if year == `year', /// + bin(10) den gen(d_sim v1) qui sum d_sim gen max_d_sim = r(max) @@ -540,9 +526,9 @@ forval year = `min_year'/`max_year' { local steps = `max_y'/2 * Plot all hours - twoway (hist potential_earnings_hourly if year == `year', /// - width(0.5) color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_wage_hour if year == `year', width(0.5) color(red%30) /// + twoway (hist sim_pred_wage if year == `year', /// + width(1) color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_wage if year == `year', width(1) color(red%30) /// legend(label(2 "UKHLS"))) , /// title("Hourly Wage") /// subtitle("`year', males") /// @@ -550,10 +536,9 @@ forval year = `min_year'/`max_year' { xtitle("GBP per hour", size(small)) /// ytitle("Density", size(small)) /// xlabel(,labsize(small)) /// - ylabel(0(`steps')`max_y', labsize(small)) /// legend(size(small)) /// graphregion(color(white)) /// - note("Notes: Statistics calculated on subsample of employed and self-employed individuals aged 18-65. Amounts in 2015 prices.""Top and bottom percentiles trimmed.", size(vsmall)) + note("Notes: Statistics calculated on subsample of employed and self-employed individuals aged 16-65. Amounts in 2015 prices.""Top and bottom percentiles trimmed.", size(vsmall)) graph export /// @@ -562,6 +547,7 @@ forval year = `min_year'/`max_year' { drop d_sim v1 max_d_sim max_value + } diff --git a/validation/02_simulated_output_validation/do_files/04_10_0_plot_hours_worked.do b/validation/02_simulated_output_validation/do_files/04_10_0_plot_hours_worked.do new file mode 100644 index 000000000..8459a4f49 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_10_0_plot_hours_worked.do @@ -0,0 +1,2029 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Hours worked per week +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: Current implementation explores the impact how the +* heterogeneity of the upper most category is instructed. +******************************************************************************** + +set seed 12345 + +******************************************************************************** +* UNIFORM HETEROGENIETY +******************************************************************************** + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time - Ages 16-65 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +save "$dir_data/temp_sim_mean_uni.dta", replace + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_both.jpg", /// + replace width(2560) height(1440) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time - Ages 16-65, by gender +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year demMaleFlag) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year demMaleFlag) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure + +* Males +preserve + +keep if demMaleFlag == 1 + +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Males, ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_male.jpg", /// + replace width(2560) height(1440) quality(100) + +restore + +* Females + +keep if demMaleFlag == 0 + +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Females, ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_female.jpg", /// + replace width(2560) height(1440) quality(100) + + +******************************************************************************** +* 1.3 : Mean values over time - Ages 16-75 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,75) + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,75) + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +save "$dir_data/temp_sim_mean_uni.dta", replace + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Ages 16-75") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_75_both.jpg", /// + replace width(2560) height(1440) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time - Ages 16-75, by gender +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,75) + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,75) + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year demMaleFlag) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year demMaleFlag) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures + +* Male +preserve + +keep if demMaleFlag == 1 + +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Males, ages 16-75") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_75_male.jpg", /// + replace width(2560) height(1440) quality(100) + +restore + +* Females + +keep if demMaleFlag == 0 + +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Females, ages 16-75") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_75_female.jpg", /// + replace width(2560) height(1440) quality(100) + + +******************************************************************************** +* 2 : Histograms by year +******************************************************************************** + +******************************************************************************** +* 2.1 : Histograms by year - ages 16-65 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year labC4 idPers sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year', width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year', width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle(" `year', ages 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed individuals.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value + +} + +******************************************************************************** +* 2.2 : Histograms by year - ages 16-65, by gender +******************************************************************************** + +* Female + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 demMaleFlag + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run idPers year labC4 demMaleFlag sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +* Plot by year +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year', width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year', width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("`year', females, ages 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed females.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_female.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value + +} + + +* Male + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if demMaleFlag == 1 +keep if inrange(demAge,16,65) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 demMaleFlag + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run idPers year labC4 demMaleFlag sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 1 +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year' , width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year' , width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("`year', males, ages 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed males.", /// + size(vsmall)) + + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_male.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value +} + + +graph drop _all + + +******************************************************************************** +* 2.3 : Histograms by year - ages 16-75 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,75) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year labC4 idPers sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,75) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2011 +local max_year = 2023 + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year', width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year', width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("Ages 16-75, `year'") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed individuals.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_16_75.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value + +} + +graph drop _all + +/* + +******************************************************************************** +* LOG-NORMAL HETEROGENIETY +******************************************************************************** + +******************************************************************************** +* 0 : IMPUTATION OF valid_labHrsWorkWeek WORK FOR THOSE IN TOP CATEGORY, LOG NORMAL +******************************************************************************** + +* Estimate parameters of truncated log normal distribution fit to UKHLS +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) +keep if labC4 == 1 +//remove those that report working very low >0 valid_labHrsWorkWeek + +drop labC4 + +* Keep those in top valid_labHrsWorkWeek category +keep if valid_labHrsWorkWeek >= 50 & valid_labHrsWorkWeek != . + +* Proportion of the top group that work 40 valid_labHrsWorkWeek +gen exact_40 = (valid_labHrsWorkWeek == 40) + +preserve +collapse (mean) exact_40 [aw=dwt] +local valid_share_40 = exact_40 +restore + +* Create log variable +gen ln_y = ln(valid_labHrsWorkWeek) + +* Non-truncation +sum ln_y + +* Set truncation points (using observed range) +sum valid_labHrsWorkWeek +local a = r(min) +local b = r(max) +local ln_a = ln(`a') +local ln_b = ln(`b') + +* Estimate parameters +truncreg ln_y, ll(`ln_a') ul(`ln_b') nolog + +* Results +matrix b = e(b) +local mu_hat = b[1,1] +local sigma_hat = b[1,2] +local median_est = exp(`mu_hat') +local mean_est = exp(`mu_hat' + `sigma_hat'^2/2) + +* Generate new values directly in simulated dataset +use run idPers year labC4 sim_labHrsWorkWeek using /// + "$dir_data/simulation_sample.dta", clear + +rename sim_labHrsWorkWeek labHrsWorkWeek + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" + +* Observations to be adjusted +gen top = 1 if labHrsWorkWeek > 35 + +gen new_sim_labHrsWorkWeek = labHrsWorkWeek + +* Calculate the CDF bounds once +local Fa = normal((`ln_a' - `mu_hat')/`sigma_hat') +local Fb = normal((`ln_b' - `mu_hat')/`sigma_hat') + +* For observations with valid_labHrsWorkWeek >= 36, generate random values +replace new_sim_labHrsWorkWeek = exp(`mu_hat' + `sigma_hat' * /// + invnormal(`Fa' + runiform()*(`Fb' - `Fa'))) /// + if top == 1 + +keep run year idPers new_sim_labHrsWorkWeek top + +save "$dir_data/simulation_sample_hrs_adjusted.dta", replace + + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time - Ages 16-65 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_mean.dta", replace + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted.dta" + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +save "$dir_data/temp_sim_mean_ln.dta", replace + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_mean.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(33 [2] 40 ,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals. Log-normal heterogeneity" "imposed on top category.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_both_ln.jpg", /// + replace width(2560) height(1440) quality(100) + + +******************************************************************************** +* 1.1.1 : Mean values over time - Ages 16-65, by gender +******************************************************************************** + +* Males + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 + +keep if inrange(demAge,16,65) +keep if demMaleFlag == 1 + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted.dta" +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if demMaleFlag == 1 +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Males, ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(35 [2] 43 ,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals. Log-normal heterogeneity" "imposed on top category.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_male_ln.jpg", /// + replace width(2560) height(1440) quality(100) + + +* Females + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) +keep if demMaleFlag == 0 + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Merge in update valid_labHrsWorkWeek worked for top category +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted.dta" +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if demMaleFlag == 0 +keep if labC4 == "EmployedOrSelfEmployed" + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidnece interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + + *Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort color(green%20) /// + legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Females, ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals. Log-normal heterogeneity" "imposed on top category.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_female_ln.jpg", /// + replace width(2560) height(1440) quality(100) + + + +******************************************************************************** +* 2 : Histograms by year +******************************************************************************** + +******************************************************************************** +* 2.1 : Histograms by year - ages 16-65 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year labC4 idPers sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted.dta" +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year' /*& labHrsWorkWeek <= 65*/, width(1) color(green%30) /// + legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year' /*& valid_labHrsWorkWeek <= 65*/, width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("`year', age 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed individuals.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_ln.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value + +} + +******************************************************************************** +* 2.1.1 : Histograms by year - ages 16-65, by gender +******************************************************************************** + +* Female + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 demMaleFlag + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run idPers year labC4 demMaleFlag sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted.dta" +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +* Plot by year +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year', width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year', width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("`year', females, ages 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed females.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_female_ln.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value + +} + + +* Male + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if demMaleFlag == 1 +keep if inrange(demAge,16,65) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 demMaleFlag + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run idPers year labC4 demMaleFlag sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted.dta" +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 1 +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year' , width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year' , width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("`year', males, ages 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed males.", /// + size(vsmall)) + + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_male_ln.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value +} + +graph drop _all + + +/* +******************************************************************************** +* LOG-NORMAL HETEROGENIETY WITH SPIKE AT 40 +******************************************************************************** + +******************************************************************************** +* 0 : IMPUTATION OF valid_labHrsWorkWeek WORK FOR THOSE IN TOP CATEGORY, LOG NORMAL WITH MASS +******************************************************************************** +/* +* Load data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) +keep if labC4 == 1 +//remove those that report working very low >0 valid_labHrsWorkWeek + +drop labC4 + +save "$dir_data/temp_valid_stats.dta", replace + +twoway (histogram valid_labHrsWorkWeek, fraction color(ltblue)) + +* Top group only +keep if valid_labHrsWorkWeek > 35 & valid_labHrsWorkWeek != . + +* Proportion of the top group that work 40 valid_labHrsWorkWeek +gen exact_40 = (valid_labHrsWorkWeek == 40) + +preserve +collapse (mean) exact_40 [aw=dwt] +local valid_share_40 = exact_40 +restore + +* Create log variable +gen ln_y = ln(valid_labHrsWorkWeek) + +* Set truncation points (using observed range) +summarize valid_labHrsWorkWeek +local a = r(min) +local b = r(max) +local ln_a = ln(`a') +local ln_b = ln(`b') + +* Estimate parameters +truncreg ln_y, ll(`ln_a') ul(`ln_b') nolog + +* Results +matrix b = e(b) +local mu_hat = b[1,1] +local sigma_hat = b[1,2] +local median_est = exp(`mu_hat') +local mean_est = exp(`mu_hat' + `sigma_hat'^2/2) + +disp "** RESULTS **" +disp "μ (for lnY): " round(`mu_hat', 0.001) +disp "σ (for lnY): " round(`sigma_hat', 0.001) +disp "Estimated median: " round(`median_est', 0.01) +disp "Estimated mean: " round(`mean_est', 0.01) + +* Visualization +range y_plot `a' `b' 150 +gen pdf_fitted = (1/(y_plot*`sigma_hat')) * /// + normalden((ln(y_plot)-`mu_hat')/`sigma_hat') / /// + (normal((`ln_b'-`mu_hat')/`sigma_hat') - /// + normal((`ln_a'-`mu_hat')/`sigma_hat')) + +twoway (histogram valid_labHrsWorkWeek, fraction color(ltblue)) /// + (line pdf_fitted y_plot, color(red) lwidth(*1)), /// + title("Truncated Log-Normal Distribution Fit") /// + legend(order(1 "UKHLS" 2 "Fitted Distribution")) /// + xtitle("valid_labHrsWorkWeek") /// + ytitle("Density") /// + graphregion(color(white)) /// + note("Note: ", /// + size(vsmall)) + +** Apply to simulated data +* Load simulation data +use run idPers year labC4 sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +rename sim_labHrsWorkWeek labHrsWorkWeek + + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" & inrange(demAge,16,65) + +* Observations to be adjusted +gen top = 1 if labHrsWorkWeek > 35 + +* Add log-normal heterogeneity +gen new_labHrsWorkWeek = labHrsWorkWeek + +* Calculate the CDF bounds once +local Fa = normal((`ln_a' - `mu_hat')/`sigma_hat') +local Fb = normal((`ln_b' - `mu_hat')/`sigma_hat') + +* For observations with valid_labHrsWorkWeek >= 36, generate random values +replace new_labHrsWorkWeek = exp(`mu_hat' + `sigma_hat' * /// + invnormal(`Fa' + runiform()*(`Fb' - `Fa'))) /// + if top == 1 +*/ + + +use "$dir_data/simulation_sample_hrs_adjusted", clear + +replace new_labHrsWorkWeek = round(new_labHrsWorkWeek,1) + +twoway(hist new_labHrsWorkWeek) + + +* Proportion at 40 +gen sim_exact_40 = (new_labHrsWorkWeek == 40) + +preserve +mean sim_exact_40 +local sim_40_share = el(r(table),1,1) +restore + +local add_to_40 = `valid_share_40' - `sim_40_share' + +* Identify candidates (36-39 valid_labHrsWorkWeek) +gen candidate = inrange(new_labHrsWorkWeek, 36, 39) if top == 1 + +* Calculate how many to convert +count if top == 1 +local total_top = r(N) +local num_to_convert = round(`total_top' * `add_to_40') + +* Randomly select candidates +gen u = runiform() if candidate == 1 +gsort u +gen convert = (_n <= `num_to_convert') if candidate == 1 + +* Apply conversion +replace new_labHrsWorkWeek = 40 if convert == 1 + +* Clean up +drop u convert candidate + +twoway hist new_labHrsWorkWeek + +rename new_labHrsWorkWeek new_sim_labHrsWorkWeek + +save "$dir_data/simulation_sample_hrs_adjusted_40.dta", replace + +graph drop _all + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time - Ages 16-65 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 + +keep if inrange(demAge,16,65) + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted_40.dta" +keep if _m == 3 +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Keep only employed individuals +keep if labC4 == "EmployedOrSelfEmployed" + +twoway hist sim_labHrsWorkWeek + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +save "$dir_data/temp_sim_mean_ln_40.dta", replace + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort color(green%20) /// + legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Ages 16-65 ") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(33 [2] 40 ,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals. Log-normal heterogeneity" "imposed on top category with a mass at 40.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_both_ln_40.jpg", /// + replace width(2560) height(1440) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time - Ages 18-65, by gender +******************************************************************************** + +* Males + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 + +keep if inrange(demAge,16,65) +keep if demMaleFlag == 1 + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted_40.dta" +keep if _m == 3 +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +twoway hist sim_labHrsWorkWeek + +* Select sample +keep if demMaleFlag == +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, sort /// + color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Males, ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(35 [2] 43 ,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals. Log-normal heterogeneity" "imposed on top category with a mass at 40.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_male_ln_40.jpg", /// + replace width(2560) height(1440) quality(100) + + +* Females + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) +keep if demMaleFlag == 0 + +* Compute mean +collapse (mean) valid_labHrsWorkWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year idPers labC4 sim_labHrsWorkWeek demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if inrange(demAge,16,65) + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted_40.dta" +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if demMaleFlag == 0 +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) + +twoway hist sim_labHrsWorkWeek + +* Compute mean and sd +collapse (mean) sim_labHrsWorkWeek, by(run year) + +collapse (mean) sim_labHrsWorkWeek /// + (sd) sim_labHrsWorkWeek_sd = sim_labHrsWorkWeek /// + , by(year) + +* Approx 95% confidnece interval +foreach varname in sim_labHrsWorkWeek { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + + *Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_labHrsWorkWeek_high sim_labHrsWorkWeek_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_labHrsWorkWeek year, sort color(green) legend(label(2 "UKHLS"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Females, ages 16-65") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals. Log-normal heterogeneity" "imposed on top category with a mass at 40.", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_16_65_female_ln_40.jpg", /// + replace width(2560) height(1440) quality(100) + + + +******************************************************************************** +* 2 : Histograms by year +******************************************************************************** + +******************************************************************************** +* 2.1 : Histograms by year - ages 16-65 +******************************************************************************** + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if inrange(demAge,16,65) + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year labC4 idPers sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if inrange(demAge,16,65) + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted_40.dta" +keep if _m == 3 +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +twoway hist sim_labHrsWorkWeek + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year', width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year', width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("Ages 16-65, `year'") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed individuals.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_ln_40.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value + +} + +******************************************************************************** +* 2.1 : Histograms by year - ages 16-65, by gender +******************************************************************************** + +* Female + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) + + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 demMaleFlag + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run idPers year labC4 demMaleFlag sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted_40.dta" +keep if _m == 3 +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 0 +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +* Plot by year +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year', width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year', width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("`year', females, ages 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed females.", /// + size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_female_ln_40.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value + +} + + +* Male + +* Prepare validation data +use year dwt labC4 valid_labHrsWorkWeek demMaleFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +keep if demMaleFlag == 1 +keep if inrange(demAge,16,65) + + +* Prepare info needed for dynamic y axis labels +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + twoway__histogram_gen valid_labHrsWorkWeek if year == `year' , /// + bin(60) den gen(d_valid v2) + + qui sum d_valid + gen max_d_valid_`year' = r(max) + + drop d_valid v2 + +} + +drop labC4 demMaleFlag + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run idPers year labC4 demMaleFlag sim_labHrsWorkWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Merge in new valid_labHrsWorkWeek worked for top category using log normal distribution +merge 1:1 year run idPers using "$dir_data/simulation_sample_hrs_adjusted_40.dta" +keep if _m == 3 +drop _m + +rename sim_labHrsWorkWeek sim_labHrsWorkWeek_orig +rename new_sim_labHrsWorkWeek sim_labHrsWorkWeek + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +keep if demMaleFlag == 1 +keep if inrange(demAge,16,65) + +* Combine datasets +append using "$dir_data/temp_valid_stats.dta" + +qui sum year +local min_year = 2019 +local max_year = r(max) + +forval year = `min_year'/`max_year' { + + * Prepare info needed for dynamic y axis labels + twoway__histogram_gen sim_labHrsWorkWeek if year == `year', bin(60) /// + den gen(d_sim v1) + + qui sum d_sim + gen max_d_sim = r(max) + + gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim + replace max_value = max_d_sim if max_value == . + + sum max_value + local max_y = 1.25*r(max) + local steps = `max_y'/2 + + twoway (hist sim_labHrsWorkWeek if year == `year' , width(1) /// + color(green%30) legend(label(1 "SimPaths"))) /// + (hist valid_labHrsWorkWeek if year == `year' , width(1) color(red%30) /// + legend(label(2 "UKHLS"))), /// + title("Weekly Hours Worked") /// + subtitle("`year', males, ages 16-65") /// + xtitle("Hours per week", size(small)) /// + ytitle("Density", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0(`steps')`max_y', labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of employed and self-employed males.", /// + size(vsmall)) + + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_male_ln_40.png", /// + replace width(2400) height(1350) + + drop d_sim v1 max_d_sim max_value +} + + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/04_10_1_plot_hours_worked_discrete.do b/validation/02_simulated_output_validation/do_files/04_10_1_plot_hours_worked_discrete.do new file mode 100644 index 000000000..e3c7134bc --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_10_1_plot_hours_worked_discrete.do @@ -0,0 +1,360 @@ +/******************************************************************************* +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Hours worked (discrete) +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: Need to update to acocunt for additional labour supply +* categories +*******************************************************************************/ + +******************************************************************************** +* 1 : Distribution +******************************************************************************** + +******************************************************************************** +* 1.1 : Distribution, 16-65 +******************************************************************************** + +* Comparison of the discretized labour supply hours + +* Load UKHLS data +use year dwt labC4 valid_cat_hours valid_labHrsWorkEnum_no demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +drop if valid_labHrsWorkEnum_no == 0 +keep if inrange(demAge,16,65) + +* Hours dummies +tab valid_cat_hours, gen(hours_cat_) + +* Calculate weighted proportions +collapse (mean) hours_cat_* [aw=dwt] + +gen sim = 0 + +save "$dir_data/valid_props", replace + +* Prepare simulated data +use run year labC4 idPers sim_cat_hours sim_labHrsWorkEnum_no demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +drop if sim_labHrsWorkEnum_no == 0 +keep if inrange(demAge,16,65) + +* Hours dummies +tab sim_cat_hours, gen(hours_cat_) + +* Calculate proportions +collapse (mean) hours_cat_*, by(run) + +collapse (mean) hours_cat_* /// + (sd) hours_cat_1_sd = hours_cat_1 /// + hours_cat_2_sd = hours_cat_2 /// + hours_cat_3_sd = hours_cat_3 /// + hours_cat_4_sd = hours_cat_4 /// + hours_cat_5_sd = hours_cat_5 /// + hours_cat_6_sd = hours_cat_6 + +foreach varname in hours_cat_1 hours_cat_2 hours_cat_3 hours_cat_4 /// + hours_cat_5 hours_cat_6 { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +gen sim = 1 + +* Combine datasets +append using "$dir_data/valid_props" + +* Plot +reshape long hours_cat_@ hours_cat_@_high hours_cat_@_low, i(sim) j(category) + +gen prop_ukhls = hours_cat_ if sim == 0 +gen prop_sim = hours_cat_ if sim == 1 + +gen x_ukhls = category - 0.2 +gen x_sim = category + 0.2 + +twoway (bar prop_ukhls x_ukhls, barw(0.4) color(red%50)) /// + (bar prop_sim x_sim, barw(0.4) color(green%50)) /// + (rcap hours_cat__high hours_cat__low x_sim, lcolor(green)), /// + xlabel(1/6, valuelabel) /// + xtitle("Hours Category", size(small)) /// + ytitle("Proportion", size(small)) /// + title("Share in Each Labour Hours Category") /// + subtitle("Ages 16-65") /// + legend(order(1 "UKHLS" 2 "SimPaths" 3 "95% CI")) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Years 2011-2023. Categories 1 = 6-15 hours, 2 = 16-25 hours, 3 = 26-35 hours , 4 = 36-40 hours, 5 = 41-49 hours," "6 = 55+ hours.", size(vsmall)) + +graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_cat_all.png", /// + replace width(2400) height(1350) + + +******************************************************************************** +* 1.2 : Distribution, 16-65 by year +******************************************************************************** + +* Load UKHLS data +use year dwt labC4 valid_cat_hours valid_labHrsWorkEnum_no demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +drop if valid_labHrsWorkEnum_no == 0 +keep if inrange(demAge,16,65) + +tab valid_cat_hours, gen(hours_cat_) + +* Calculate weighted proportions by year +collapse (mean) hours_cat_* [aw=dwt], by(year) + +gen sim = 0 + +save "$dir_data/valid_props", replace + +* Load SimPaths data +use run year labC4 idPers sim_cat_hours sim_labHrsWorkEnum_no demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +drop if sim_labHrsWorkEnum_no == 0 +keep if inrange(demAge,16,65) + +tab sim_cat_hours, gen(hours_cat_) + +* Calculate proportions by run and year +collapse (mean) hours_cat_*, by(run year) + +* Calculate Mean and SD across runs by year +collapse (mean) hours_cat_* /// + (sd) hours_cat_1_sd = hours_cat_1 /// + hours_cat_2_sd = hours_cat_2 /// + hours_cat_3_sd = hours_cat_3 /// + hours_cat_4_sd = hours_cat_4 /// + hours_cat_5_sd = hours_cat_5 /// + hours_cat_6_sd = hours_cat_6, by(year) + +foreach varname in hours_cat_1 hours_cat_2 hours_cat_3 hours_cat_4 /// + hours_cat_5 hours_cat_6 { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd +} + +gen sim = 1 + +* Combine datasets +append using "$dir_data/valid_props" + +* Plot +reshape long hours_cat_@ hours_cat_@_high hours_cat_@_low, i(sim year) /// + j(category) + +gen prop_ukhls = hours_cat_ if sim == 0 +gen prop_sim = hours_cat_ if sim == 1 +gen x_ukhls = category - 0.2 +gen x_sim = category + 0.2 + +levelsof year, local(years) +foreach y in `years' { + + twoway (bar prop_ukhls x_ukhls if year == `y', barw(0.4) color(red%50)) /// + (bar prop_sim x_sim if year == `y', barw(0.4) color(green%50)) /// + (rcap hours_cat__high hours_cat__low x_sim if year == `y', lcolor(green)), /// + xlabel(1/6, valuelabel) /// + xtitle("Hours Category", size(small)) /// + ytitle("Proportion", size(small)) /// + title("Share in Each Labour Hours Category") /// + subtitle("`y'") /// + legend(order(1 "UKHLS" 2 "SimPaths" 3 "95% CI")) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Ages 16-65. Categories 1 = 6-15 hours, 2 = 16-25 hours, 3 = 26-35 hours , 4 = 36-40 hours, 5 = 41-49 hours," "6 = 55+ hours.", size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_cat_`y'.png", /// + replace width(2400) height(1350) +} + +graph drop _all + + +******************************************************************************** +* 1.3 : Distribution, 16-65 by year, by gender +******************************************************************************** + +* Load UKHLS data +use year dwt labC4 valid_cat_hours valid_labHrsWorkEnum_no demAge /// + demMaleFlag using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if labC4 == 1 +drop if valid_labHrsWorkEnum_no == 0 +keep if inrange(demAge,16,65) + +tab valid_cat_hours, gen(hours_cat_) + +* Calculate weighted proportions by year and gender +collapse (mean) hours_cat_* [aw=dwt], by(year demMaleFlag) + +gen sim = 0 + +save "$dir_data/valid_props", replace + +* Load SimPaths data +use run year labC4 idPers sim_cat_hours sim_labHrsWorkEnum_no demAge /// + demMaleFlag using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if labC4 == "EmployedOrSelfEmployed" +drop if sim_labHrsWorkEnum_no == 0 +keep if inrange(demAge,16,65) + +tab sim_cat_hours, gen(hours_cat_) + +* Calculate proportions and SD by run, year and gender +collapse (mean) hours_cat_*, by(run year demMaleFlag) + +* Calculate Mean and SD across runs by year and gender +collapse (mean) hours_cat_* /// + (sd) hours_cat_1_sd = hours_cat_1 /// + hours_cat_2_sd = hours_cat_2 /// + hours_cat_3_sd = hours_cat_3 /// + hours_cat_4_sd = hours_cat_4 /// + hours_cat_5_sd = hours_cat_5 /// + hours_cat_6_sd = hours_cat_6, by(year demMaleFlag) + +foreach varname in hours_cat_1 hours_cat_2 hours_cat_3 hours_cat_4 /// + hours_cat_5 hours_cat_6 { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd +} + +gen sim = 1 + +* Combine datasets +append using "$dir_data/valid_props" + +* PLot +* Note: Added demMaleFlag to the identifier i() +reshape long hours_cat_@ hours_cat_@_high hours_cat_@_low, /// + i(sim year demMaleFlag) j(category) + +gen prop_ukhls = hours_cat_ if sim == 0 +gen prop_sim = hours_cat_ if sim == 1 +gen x_ukhls = category - 0.2 +gen x_sim = category + 0.2 + +* Label gender for plot titles +label define sex_lbl 0 "Females" 1 "Males" +label values demMaleFlag sex_lbl + +levelsof year, local(years) +levelsof demMaleFlag, local(sexes) + +foreach y in `years' { + foreach s in `sexes' { + + * Get the label text + local sextext : label sex_lbl `s' + + twoway (bar prop_ukhls x_ukhls if year == `y' & demMaleFlag == `s', /// + barw(0.4) color(red%50)) /// + (bar prop_sim x_sim if year == `y' & demMaleFlag == `s', /// + barw(0.4) color(green%50)) /// + (rcap hours_cat__high hours_cat__low x_sim if year == `y' & /// + demMaleFlag == `s', lcolor(green)), /// + xlabel(1/6, valuelabel) /// + xtitle("Hours Category", size(small)) /// + ytitle("Proportion", size(small)) /// + title("Share in Each Labour Hours Category") /// + subtitle("`y', `sextext'") /// + legend(order(1 "UKHLS" 2 "SimPaths" 3 "95% CI")) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Comparison for `sextext' in `y'." "Categories: 1=6-15, 2=16-25, 3=26-35, 4=36-40, 5=41-49, 6=55+ hours.", size(vsmall)) + + graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_cat_`y'_`sextext'.png", /// + replace width(2400) height(1350) + } + +} + +graph drop _all + + +/* +******************************************************************************** +* 3 : Mean hours of work, comparison across all options explored, 16-655 +******************************************************************************** + +use "$dir_data/temp_valid_mean_disc", replace + +merge 1:1 year using "$dir_data/temp_valid_mean", nogen + +rename hours valid_hours + +merge 1:1 year using "$dir_data/temp_sim_mean_uni.dta", nogen + +drop sim_labHrsWorkWeek_sim sim_labHrsWorkWeek_sim_sd +rename sim_labHrsWorkWeek_sim_high sim_labHrsWorkWeek_sim_u_high +rename sim_labHrsWorkWeek_sim_low sim_labHrsWorkWeek_sim_u_low + +merge 1:1 year using "$dir_data/temp_sim_mean_ln.dta", nogen + +drop sim_labHrsWorkWeek_sim sim_labHrsWorkWeek_sim_sd +rename sim_labHrsWorkWeek_sim_high sim_labHrsWorkWeek_sim_ln_high +rename sim_labHrsWorkWeek_sim_low sim_labHrsWorkWeek_sim_ln_low + +merge 1:1 year using "$dir_data/temp_sim_mean_ln_40.dta", nogen + +drop sim_labHrsWorkWeek_sim sim_labHrsWorkWeek_sim_sd +rename sim_labHrsWorkWeek_sim_high sim_labHrsWorkWeek_sim_40_high +rename sim_labHrsWorkWeek_sim_low sim_labHrsWorkWeek_sim_40_low + + +* Plot comparison +drop if year < 2011 + +twoway (line valid_hours year, sort color(cranberry) /// + legend(label(1 "UKHLS, continuous"))) /// +(rarea sim_labHrsWorkWeek_sim_u_high sim_labHrsWorkWeek_sim_u_low year, sort color(purple%20) /// + legend(label(2 "Simulated, uniform"))) /// +(rarea sim_labHrsWorkWeek_sim_ln_high sim_labHrsWorkWeek_sim_ln_low year, sort color(blue%20) /// + legend(label(3 "Simulated, log normal"))) /// +(rarea sim_labHrsWorkWeek_sim_40_high sim_labHrsWorkWeek_sim_40_low year, sort color(green%20) /// + legend(label(4 "Simulated, log normal + 40"))), /// + title("Average Weekly Hours Worked") /// + subtitle("Ages 16-65 ") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(33 [2] 40 ,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Note: Statistics calculated on sample of working age employed and self-employed individuals.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/hours_worked/validation_${country}_hours_worked_comparison.png", /// + replace width(2400) height(1350) + + +//(line valid_disc_hours year, sort color(blue) /// +// legend(label(2 "UKHLS, discretized"))) /// + \ No newline at end of file diff --git a/validation/02_simulated_output_validation/do_files/04_11_plot_income_shares.do b/validation/02_simulated_output_validation/do_files/04_11_plot_income_shares.do new file mode 100644 index 000000000..1f90c8a77 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_11_plot_income_shares.do @@ -0,0 +1,449 @@ +/******************************************************************************* +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Income shares +* AUTHORS: Patryk Bronka, Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: This do file plots simulated and observed income shares and +* incomes by deciles of gross income +* Altered pension age to 65 + + TO UPDATE +*******************************************************************************/ + +** SimPaths output + + +* Create variables + +use "$dir_data/simulation_sample.dta", clear + +* keep only one observation per benefit unit +sort run idBu year + +* Check data structure +count if year == year[_n-1] & idBu == idBu[_n-1] & /// + sim_yEmpBuGrossLevelYear != sim_yEmpBuGrossLevelYear[_n-1] + +count if year == year[_n-1] & idBu == idBu[_n-1] & /// + sim_yCapitalBuLevelYear != sim_yCapitalBuLevelYear[_n-1] + +count if year == year[_n-1] & idBu == idBu[_n-1] & /// + sim_yPensBuGrossLevelYear != sim_yPensBuGrossLevelYear[_n-1] + + +* Variables of interest + +* Keep one observation per benefit unit +* Create a marker for the first observation in each group +bys run year idBu (demAge): gen byte to_keep = (_n == _N) + +* Keep only the marked rows +keep if to_keep == 1 +drop to_keep + +* Create gross income deciles +sort run idPers year + +xtile sim_decile = sim_yNonBenBuGrossLevelYear , n(10) + +tab sim_decile + +gen zero_gross = (sim_yNonBenBuGrossLevelYear == 0) +tab zero_gross + +/* +8.6% of observations have 0 gross non-benefit income. +Equal shares in deciles. +*/ + +* Genrate share variables +gen sim_share_emp = sim_yEmpBuGrossLevelYear / sim_yNonBenBuGrossLevelYear +gen sim_share_cap = sim_yCapitalBuLevelYear / sim_yNonBenBuGrossLevelYear +gen sim_share_pen = sim_yPensBuGrossLevelYear / sim_yNonBenBuGrossLevelYear + +order idPers idBu year sim_yNonBenBuGrossLevelYear sim_yEmpBuGrossLevelYear /// + sim_yCapitalBuLevelYear sim_yPensBuGrossLevelYear sim_yDispBuLevelYear /// + sim_share_* + +gen check1 = sim_share_emp + sim_share_cap + sim_share_pen +sum check1, det + +order idPers idBu year sim_decile sim_yNonBenBuGrossLevelYear /// + sim_yEmpBuGrossLevelYear sim_yCapitalBuLevelYear /// + sim_yPensBuGrossLevelYear check1 sim_share_* + +drop check* + + +* Plots + +* Sources + +* All +graph bar (mean) sim_yEmpBuGrossLevelYear (mean) sim_yCapitalBuLevelYear /// + (mean) sim_yPensBuGrossLevelYear, over(sim_decile) stack /// + title("SimPaths") /// + legend(order(1 "Labour" 2 "Capital" 3 "Private Pension") /// + position(6) rows(1)) /// + name(simulated_income_comp_all, replace) /// + b1title("Decile", size(small)) /// + ytitle("£") /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +* Oldest <= 65 +preserve + +drop if demAge > 65 + +graph bar (mean) sim_yEmpBuGrossLevelYear (mean) sim_yCapitalBuLevelYear /// + (mean) sim_yPensBuGrossLevelYear, over(sim_decile) stack /// + title("SimPaths") /// + legend(order(1 "Labour" 2 "Capital" 3 "Private Pension") /// + position(6) rows(1)) /// + name(simulated_income_comp_upto65, replace) /// + b1title("Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +restore + +* Oldest > 65 +preserve + +drop if demAge <= 65 + +graph bar (mean) sim_yEmpBuGrossLevelYear (mean) sim_yCapitalBuLevelYear /// + (mean) sim_yPensBuGrossLevelYear, over(sim_decile) stack /// + title("SimPaths") /// + legend(order(1 "Labour" 2 "Capital" 3 "Private Pension") /// + position(6) rows(1)) /// + name(simulated_income_comp_66plus, replace) /// + b1title("Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +restore + +* Shares +preserve + +collapse (mean) sim_share_emp sim_share_cap sim_share_pen, /// + by(sim_decile) + +graph bar (asis) sim_share_emp sim_share_cap sim_share_pen, /// + over(sim_decile) stack /// + title("SimPaths") /// + legend(label(1 "Employment") label(2 "Capital") /// + label(3 "Private Pension")) /// + name(simulated_income_share_all, replace) /// + graphregion(color(white)) + +restore + +* Age <= 65 +preserve + +keep if demAge <= 65 + +collapse (mean) sim_share_emp sim_share_cap sim_share_pen, /// + by(sim_decile) + +graph bar (asis) sim_share_emp sim_share_cap sim_share_pen, /// + over(sim_decile) stack /// + title("SimPaths") /// + legend(label(1 "Employment") label(2 "Capital") /// + label(3 "Private Pension")) /// + name(simulated_income_share_upto65, replace) /// + graphregion(color(white)) + +restore + +* Age > 65 +preserve + +keep if demAge > 65 + +collapse (mean) sim_share_emp sim_share_cap sim_share_pen, /// + by(sim_decile) + +graph bar (asis) sim_share_emp sim_share_cap sim_share_pen, /// + over(sim_decile) stack /// + title("SimPaths") /// + legend(label(1 "Employment") label(2 "Capital") /// + label(3 "Private Pension")) /// + name(simulated_income_share_66plus, replace) /// + graphregion(color(white)) + +restore + + +** UKHLS data + +use "$dir_data/ukhls_validation_sample.dta", clear + +* keep only one observation per benefit unit +sort idBu year + +* Check data structure +count if year == year[_n-1] & idBu == idBu[_n-1] & /// + valid_yEmpBuGrossLevelYear != valid_yEmpBuGrossLevelYear[_n-1] + +count if year == year[_n-1] & idBu == idBu[_n-1] & /// + valid_yCapitalBuLevelYear != valid_yCapitalBuLevelYear[_n-1] + +count if year == year[_n-1] & idBu == idBu[_n-1] & /// + valid_yPensBuGrossLevelYear != valid_yPensBuGrossLevelYear[_n-1] + +* Variable of interest + +* Keep one observation per benefit unit +* Create a marker for the first observation in each group +bys year idBu (demAge): gen byte to_keep = (_n == _N) + +* Keep only the marked rows +keep if to_keep == 1 +drop to_keep + +* Create gross income deciles +sort idPers year + +xtile valid_decile = valid_yNonBenBuGrossLevelYear , n(10) + +tab valid_decile + +gen zero_gross = (valid_yNonBenBuGrossLevelYear == 0) +tab zero_gross + +/* +19.36% of observations have 0 gross non-benefit income. +Non-equal shares in in bottom two deciles +*/ + +* Genrate share variables +gen valid_share_emp = valid_yEmpBuGrossLevelYear / valid_yNonBenBuGrossLevelYear +gen valid_share_cap = valid_yCapitalBuLevelYear / valid_yNonBenBuGrossLevelYear +gen valid_share_pen = valid_yPensBuGrossLevelYear / valid_yNonBenBuGrossLevelYear + +order idPers idBu year valid_yNonBenBuGrossLevelYear valid_yEmpBuGrossLevelYear /// + valid_yCapitalBuLevelYear valid_yPensBuGrossLevelYear valid_yDispBuLevelYear /// + valid_share_* + +gen check1 = valid_share_emp + valid_share_cap + valid_share_pen +sum check1, det + +order idPers idBu year valid_decile valid_yNonBenBuGrossLevelYear /// + valid_yEmpBuGrossLevelYear valid_yCapitalBuLevelYear /// + valid_yPensBuGrossLevelYear check1 valid_share_* + +drop check* + +* Plots + +* Sources + +* All +graph bar (mean) valid_yEmpBuGrossLevelYear (mean) valid_yCapitalBuLevelYear /// + (mean) valid_yPensBuGrossLevelYear [aw = dwt], over(valid_decile) stack /// + title("UKHLS") /// + legend(order(1 "Labour" 2 "Capital" 3 "Private Pension") /// + position(6) rows(1)) /// + name(UKHLS_income_comp_all, replace) /// + b1title("Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +* Oldest <= 65 +preserve + +drop if demAge > 65 + +graph bar (mean) valid_yEmpBuGrossLevelYear (mean) valid_yCapitalBuLevelYear /// + (mean) valid_yPensBuGrossLevelYear [aw = dwt], over(valid_decile) stack /// + title("UKHLS") /// + legend(order(1 "Labour" 2 "Capital" 3 "Private Pension") /// + position(6) rows(1)) /// + name(UKHLS_income_comp_upto65, replace) /// + b1title("Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +restore + +* Oldest > 65 +preserve + +drop if demAge <= 65 + +graph bar (mean) valid_yEmpBuGrossLevelYear (mean) valid_yCapitalBuLevelYear /// + (mean) valid_yPensBuGrossLevelYear [aw = dwt], over(valid_decile) stack /// + title("UKHLS") /// + legend(order(1 "Labour" 2 "Capital" 3 "Private Pension") /// + position(6) rows(1)) /// + name(UKHLS_income_comp_66plus , replace) /// + b1title("Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +restore + +* Shares + +* All ages +preserve + +collapse (mean) valid_share_emp valid_share_cap valid_share_pen [aw = dwt], /// + by(valid_decile) + +graph bar (asis) valid_share_emp valid_share_cap valid_share_pen, /// + over(valid_decile) stack /// + title("UKHLS") /// + legend(label(1 "Employment") label(2 "Capital") /// + label(3 "Private Pension")) /// + name(UKHLS_income_share_all, replace) /// + graphregion(color(white)) +restore + +* Age <= 65 +preserve + +keep if demAge <= 65 + +collapse (mean) valid_share_emp valid_share_cap valid_share_pen [aw = dwt], /// + by(valid_decile) + +graph bar (asis) valid_share_emp valid_share_cap valid_share_pen, /// + over(valid_decile) stack /// + title("UKHLS") /// + legend(label(1 "Employment") label(2 "Capital") /// + label(3 "Private Pension")) /// + name(UKHLS_income_share_upto65, replace) /// + graphregion(color(white)) + +restore + +* Age > 65 +preserve + +keep if demAge > 65 + +collapse (mean) valid_share_emp valid_share_cap valid_share_pen [aw = dwt], /// + by(valid_decile) + +graph bar (asis) valid_share_emp valid_share_cap valid_share_pen, /// + over(valid_decile) stack /// + title("UKHLS") /// + legend(label(1 "Employment") label(2 "Capital") /// + label(3 "Private Pension")) /// + name(UKHLS_income_share_66plus, replace) /// + graphregion(color(white)) + +restore + + +* Combine plots + +* Composition - All +grc1leg simulated_income_comp_all UKHLS_income_comp_all, /// + rows(1) ycommon /// + legendfrom(simulated_income_comp_all) /// + graphregion(color(white)) /// + title("Average Benefit Unit Gross Income Sources") /// + subtitle("All age") /// + note("NOTE: Benefit income excluded. Values in real 2015 amounts.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/income/income_shares/validation_${country}_income_levels_all.png", /// + replace width(2400) height(1350) + +* Composition - Working age +grc1leg simulated_income_comp_upto65 UKHLS_income_comp_upto65, /// + rows(1) ycommon /// + legendfrom(simulated_income_comp_upto65) /// + graphregion(color(white)) /// + title("Average Benefit Unit Gross Income Sources") /// + subtitle("Oldest Working Age, <=65") /// + note("NOTE: Benefit income excluded. Values in real 2015 amounts.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/income/income_shares/validation_${country}_income_levels_upto65.png", /// + replace width(2400) height(1350) + + +* Composition - Retirement age +grc1leg simulated_income_comp_66plus UKHLS_income_comp_66plus, /// + rows(1) ycommon /// + legendfrom(simulated_income_comp_66plus) /// + graphregion(color(white)) /// + title("Average Benefit Unit Gross Income Sources") /// + subtitle("Oldest Age > 65") /// + note("NOTE: Benefit income excluded. Values in real 2015 amounts.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/income/income_shares/validation_${country}_income_levels_66plus.png", /// + replace width(2400) height(1350) + + + +* Shares - All +grc1leg simulated_income_share_all UKHLS_income_share_all, /// + rows(1) ycommon /// + legendfrom(simulated_income_share_all) /// + graphregion(color(white)) /// + title("Average Benefit Unit Gross Income Shares") /// + subtitle("All age") /// + note("NOTE: Benefit income excluded. Values in real 2015 amounts.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/income/income_shares/validation_${country}_income_shares_all.png", /// + replace width(2400) height(1350) + +* Shares - Working age +grc1leg simulated_income_share_upto65 UKHLS_income_share_upto65, /// + rows(1) ycommon /// + legendfrom(simulated_income_share_upto65) /// + graphregion(color(white)) /// + title("Average Benefit Unit Gross Income Shares") /// + subtitle("Oldest Working Age, <=65") /// + note("NOTE: Benefit income excluded. Values in real 2015 amounts.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/income/income_shares/validation_${country}_income_sharess_upto65.png", /// + replace width(2400) height(1350) + + +* Shares - Retirement age +grc1leg simulated_income_share_66plus UKHLS_income_share_66plus, /// + rows(1) ycommon /// + legendfrom(simulated_income_share_66plus) /// + graphregion(color(white)) /// + title("Average Benefit Unit Gross Income Shares") /// + subtitle("Oldest Age > 65") /// + note("NOTE: Benefit income excluded. Values in real 2015 amounts.", /// + size(vsmall)) + +graph export /// + "$dir_output_files/income/income_shares/validation_${country}_income_sharess_66plus.png", /// + replace width(2400) height(1350) + + +graph drop _all + diff --git a/validation/02_simulated_output_validation/do_files/04_12_plot_partnership_status.do b/validation/02_simulated_output_validation/do_files/04_12_plot_partnership_status.do new file mode 100644 index 000000000..87189c58c --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_12_plot_partnership_status.do @@ -0,0 +1,330 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Partnership +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: +******************************************************************************** + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time - ages 18-65 +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_partnered valid_single using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Compute shares +collapse (mean) valid_partnered valid_single [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare SimPaths data +use run year demAge sim_partnered sim_single using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Compute shares and sd +collapse (mean) sim_partnered sim_single /// + , by(run year) + +collapse (mean) sim_partnered sim_single /// + (sd) sim_partnered_sd = sim_partnered /// + sim_single_sd = sim_single /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_partnered sim_single { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figures +* Share partnered +twoway (rarea sim_partnered_high sim_partnered_low year, sort color(green%20) /// + legend(label(1 "SimPaths"))) /// +(line valid_partnered year, sort color(green) /// + legend(label(2 "UKHLS "))), /// + title("Partnered") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0[0.1]0.9, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/partnership/validation_${country}_partnered_ts_18_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + +* Partnership status shares +twoway (rarea sim_partnered_high sim_partnered_low year, sort color(green%20) /// + legend(label(1 "Partnered, SimPaths"))) /// +(line valid_partnered year, sort color(green) /// + legend(label(2 "Partnered, UKHLS "))) /// +(rarea sim_single_high sim_single_low year, sort color(red%20) /// + legend(label(3 "Single, SimPaths"))) /// +(line valid_single year, sort color(red) /// + legend(label(4 "Single, UKHLS "))), /// + title("Partnership status") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(,labsize(small)) /// + ylabel(0[0.1]0.8, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +graph export /// +"$dir_output_files/partnership/validation_${country}_partnership_ts_18_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time - by age group +******************************************************************************** + +* Define the groupings using a semi-colon or a specific delimiter +local age_cond1 "ageGroup == 2 | ageGroup == 3" +local age_sub1 "Ages 20-29" +local age_suff1 "20_29" + +local age_cond2 "ageGroup == 4 | ageGroup == 5" +local age_sub2 "Ages 30-39" +local age_suff2 "30_39" + +local age_cond3 "ageGroup == 6" +local age_sub3 "Ages 40-59" +local age_suff3 "40_59" + +* Loop through the 3 groups +forvalues i = 1/3 { + + * Validation data + use year demAge dwt valid_partnered valid_single ageGroup using /// + "$dir_data/ukhls_validation_sample.dta", clear + + * Select sample + keep if `age_cond`i'' + + collapse (mean) valid_partnered valid_single [aw = dwt], by(year) + tempfile valid_stats + save `valid_stats' + + * Simuated data + use run year demAge sim_partnered sim_single ageGroup using /// + "$dir_data/simulation_sample.dta", clear + + * Select sample + keep if `age_cond`i'' + + collapse (mean) sim_partnered sim_single, by(run year) + + collapse (mean) sim_partnered sim_single /// + (sd) sim_partnered_sd = sim_partnered /// + sim_single_sd = sim_single, by(year) + + foreach varname in sim_partnered sim_single { + + gen `varname'_high = `varname' + 1.96*`varname'_sd + gen `varname'_low = `varname' - 1.96*`varname'_sd + + } + + * Combine + merge 1:1 year using `valid_stats', keep(3) nogen + + twoway (rarea sim_partnered_high sim_partnered_low year, sort color(green%20) /// + legend(label(1 "Partnered, SimPaths"))) /// + (line valid_partnered year, sort color(green) /// + legend(label(2 "Partnered, UKHLS"))) /// + (rarea sim_single_high sim_single_low year, sort color(red%20) /// + legend(label(3 "Single, SimPaths"))) /// + (line valid_single year, sort color(red) /// + legend(label(4 "Single, UKHLS"))), /// + title("Partnership status") /// + subtitle("`age_sub`i''") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(0(0.2)1, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + + graph export /// + "$dir_output_files/partnership/validation_${country}_partnership_ts_`age_suff`i''_both.jpg", /// + replace width(2400) height(1350) quality(100) + +} + +graph drop _all + + +******************************************************************************** +* 1.3 : Mean values over time - by children +******************************************************************************** + +* Load validation data +use year demAge dwt valid_partnered_children_0 valid_partnered_children_1 /// + valid_partnered_children_2 valid_partnered_children_3plus /// + valid_single_children_0 valid_single_children_1 /// + valid_single_children_2 valid_single_children_3plus using /// + "$dir_data/ukhls_validation_sample.dta", clear + +collapse (mean) valid_partnered_children_0 valid_partnered_children_1 /// + valid_partnered_children_2 valid_partnered_children_3plus /// + valid_single_children_0 valid_single_children_1 /// + valid_single_children_2 valid_single_children_3plus /// + [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Load SimPaths data +use run year demAge sim_partnered_children_0 sim_partnered_children_1 /// + sim_partnered_children_2 sim_partnered_children_3plus /// + sim_single_children_0 sim_single_children_1 sim_single_children_2 /// + sim_single_children_3plus /// + using "$dir_data/simulation_sample.dta", clear + +* Compute shares and sd +collapse (mean) sim_partnered_children_0 sim_partnered_children_1 /// + sim_partnered_children_2 sim_partnered_children_3plus /// + sim_single_children_0 sim_single_children_1 /// + sim_single_children_2 sim_single_children_3plus, /// + by(run year) + +collapse (mean) sim_partnered_children_0 sim_partnered_children_1 /// + sim_partnered_children_2 sim_partnered_children_3plus /// + sim_single_children_0 sim_single_children_1 sim_single_children_2 /// + sim_single_children_3plus /// + (sd) sim_partnered_children_0_sd = sim_partnered_children_0 /// + sim_partnered_children_1_sd = sim_partnered_children_1 /// + sim_partnered_children_2_sd = sim_partnered_children_2 /// + sim_partnered_children_3plus_sd = sim_partnered_children_3plus /// + sim_single_children_0_sd = sim_single_children_0 /// + sim_single_children_1_sd = sim_single_children_1 /// + sim_single_children_2_sd = sim_single_children_2 /// + sim_single_children_3plus_sd = sim_single_children_3plus /// + , by(year) + +* Approx 95% confidence interval +foreach varname in sim_partnered_children_0 sim_partnered_children_1 /// + sim_partnered_children_2 sim_partnered_children_3plus sim_single_children_0 /// + sim_single_children_1 sim_single_children_2 sim_single_children_3plus { + + gen `varname'_h = `varname' + 1.96*`varname'_sd + gen `varname'_l = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Label variables +label var sim_partnered_children_0 "No children" +label var sim_partnered_children_1 "1 child" +label var sim_partnered_children_2 "2 children" +label var sim_partnered_children_3plus "3+ children" +label var sim_single_children_0 "No children" +label var sim_single_children_1 "1 child" +label var sim_single_children_2 "2 children" +label var sim_single_children_3plus "3+ children" + +* Plot figures + +* Partnered +foreach varname in partnered_children_0 partnered_children_1 /// + partnered_children_2 partnered_children_3plus { + + local vtext : variable label sim_`varname' + if `"`vtext'"' == "" local vtext "sim_`varname'" + twoway (rarea sim_`varname'_h sim_`varname'_l year, sort color(red%20) /// + legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`varname' year, sort color(red) /// + legend(label(2 "UKHLS"))), /// + subtitle("`vtext'") /// + name(`varname', replace) /// + ytitle("Share", size(small)) /// + xtitle("") /// + ylabel(0[0.1]0.5,labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + +} + +* Combine plots +grc1leg partnered_children_0 partnered_children_1 partnered_children_2 /// + partnered_children_3plus , /// + title("Share Partnered and Number of Children") /// + legendfrom(partnered_children_0) /// + rows(2) /// + graphregion(color(white)) /// + ycomm /// + note("Notes: Samples contains all individual ages 18-65. ", size(vsmall)) + +graph export /// +"$dir_output_files/partnership/validation_${country}_partnership_children_ts_18_65_partnered.jpg", /// + replace width(2400) height(1350) quality(100) + + +* Single +foreach varname in single_children_0 single_children_1 single_children_2 /// + single_children_3plus { + + local vtext : variable label sim_`varname' + if `"`vtext'"' == "" local vtext "sim_`varname'" + twoway (rarea sim_`varname'_h sim_`varname'_l year, sort color(red%20) /// + legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`varname' year, sort color(red) /// + legend(label(2 "UKHLS"))), /// + subtitle("`vtext'") /// + name(`varname', replace) /// + ytitle("Share", size(small)) /// + xtitle("") /// + ylabel(0[0.1]0.5,labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + +} + +* Combine plots +grc1leg single_children_0 single_children_1 single_children_2 /// + single_children_3plus , /// + title("Share Single and Number of Children") /// + legendfrom(single_children_0) /// + rows(2) /// + graphregion(color(white)) /// + ycomm /// + note("Notes: Samples contains all individual ages 18-65. ", size(vsmall)) + +graph export /// +"$dir_output_files/partnership/validation_${country}_partnership_children_ts_18_65_single.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all + diff --git a/validation/02_simulated_output_validation/do_files/06_13_plot_health.do b/validation/02_simulated_output_validation/do_files/04_13_plot_health.do similarity index 62% rename from validation/02_simulated_output_validation/do_files/06_13_plot_health.do rename to validation/02_simulated_output_validation/do_files/04_13_plot_health.do index b25d987ef..8d0d0fc69 100644 --- a/validation/02_simulated_output_validation/do_files/06_13_plot_health.do +++ b/validation/02_simulated_output_validation/do_files/04_13_plot_health.do @@ -1,44 +1,49 @@ -******************************************************************************** -* PROJECT: ESPON +/******************************************************************************* +* PROJECT: SimPaths UK * SECTION: Validation * OBJECT: Health * AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 +* LAST UPDATE: Jan 2026 * COUNTRY: UK - -* NOTES: Simulated data doesn't contain 80-100 year olds which make -* up group 8. -* Adjusted the code so that runs without this group. ******************************************************************************** +* NOTES: +*******************************************************************************/ ******************************************************************************** * 1 : Mean values over time ******************************************************************************** ******************************************************************************** -* 1.1 : Mean values over time - sf1, 18-65 +* 1.1 : Mean values over time - Self rated health, 16-65 ******************************************************************************** -use year dwt dhe dag using /// +use year dwt valid_healthSelfRated demAge using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe +keep if inrange(demAge,16,65) + +gen health = valid_healthSelfRated collapse (mean) health [aw = dwt], by(year) save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year dhe dag run using "$dir_data/simulated_data.dta", clear +use run year sim_healthSelfRated demAge run using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,16,65) + +gen healthSelfRated = sim_healthSelfRated -collapse (mean) dhe, by(year run) +collapse (mean) healthSelfRated, by(year run) -collapse (mean) dhe /// - (sd) dhe_sd = dhe /// +collapse (mean) healthSelfRated /// + (sd) healthSelfRated_sd = healthSelfRated /// , by(year) * Compute 95% confidence interval -foreach varname in dhe { +foreach varname in healthSelfRated { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd @@ -48,12 +53,12 @@ foreach varname in dhe { merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea dhe_high dhe_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// +twoway (rarea healthSelfRated_high healthSelfRated_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// (line health year, sort color(green) /// legend(label(2 "UKHLS"))), /// - title("General Health Score") /// - subtitle("sf1, ages 18-65") /// + title("Self Rated Health") /// + subtitle("Ages 16-65") /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// ylabel(,labsize(small)) /// @@ -64,53 +69,53 @@ twoway (rarea dhe_high dhe_low year, /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_sf1_ts_${min_age}_${max_age}_both.jpg", /// +"$dir_output_files/health/validation_${country}_self_rated_ts_16_65_both.jpg", /// replace width(2560) height(1440) quality(100) ******************************************************************************** -* 1.2 : Mean values over time - sf1, 18-65, by gender +* 1.2 : Mean values over time - Self rated health, 16-65, by gender ******************************************************************************** * Prepare validation data -use year dwt dhe dag dgn using /// +use year dwt valid_healthSelfRated demAge demMaleFlag using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe +keep if inrange(demAge,16,65) + +gen health = valid_healthSelfRated -collapse (mean) health [aw = dwt], by(year dgn) +collapse (mean) health [aw = dwt], by(year demMaleFlag) save "$dir_data/temp_valid_stats.dta", replace * Prepare simulation data -use run year dhe dag dgn run using "$dir_data/simulated_data.dta", clear +use run year sim_healthSelfRated demAge demMaleFlag run using "$dir_data/simulation_sample.dta", clear -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" +keep if inrange(demAge,16,65) -drop dgn -rename dgn2 dgn +gen healthSelfRated = sim_healthSelfRated -collapse (mean) dhe, by(year dgn run) +collapse (mean) healthSelfRated, by(year demMaleFlag run) -collapse (mean) dhe /// - (sd) dhe_sd = dhe /// - , by(year dgn) +collapse (mean) healthSelfRated /// + (sd) healthSelfRated_sd = healthSelfRated /// + , by(year demMaleFlag) * Compute 95% confidence interval -foreach varname in dhe { +foreach varname in healthSelfRated { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd } -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea dhe_high dhe_low year if dgn == 0, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year if dgn == 0, sort color(green) /// +twoway (rarea healthSelfRated_high healthSelfRated_low year if /// + demMaleFlag == 0, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line health year if demMaleFlag == 0, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Females") /// name(health_female, replace) /// @@ -122,9 +127,9 @@ twoway (rarea dhe_high dhe_low year if dgn == 0, /// graphregion(color(white)) /// -twoway (rarea dhe_high dhe_low year if dgn == 1, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year if dgn == 1, sort color(green) /// +twoway (rarea healthSelfRated_high healthSelfRated_low year if /// + demMaleFlag == 1, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line health year if demMaleFlag == 1, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Males") /// name(health_male, replace) /// @@ -136,38 +141,38 @@ twoway (rarea dhe_high dhe_low year if dgn == 1, /// graphregion(color(white)) /// grc1leg health_female health_male, /// - title("General Health Score") /// - subtitle("sf1, ages 18-65") /// + title("Self Rated Health") /// + subtitle("Ages 16-65") /// legendfrom(health_female) rows(1) /// graphregion(color(white)) /// + ycomm /// note("Notes: The health variable is a self-assessed variable and follows a 5-point Likert scale (1 = poor, ..., 5 = excellent). ", /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_sf1_ts_${min_age}_${max_age}_gender.jpg", /// +"$dir_output_files/health/validation_${country}_self_rated_ts_16_65_gender.jpg", /// replace width(2560) height(1440) quality(100) - ******************************************************************************** -* 1.3 : Mean values over time - sf1, 18-65, by age group and gender +* 1.3 : Mean values over time - self rated health, by age group and gender ******************************************************************************** * Prepare validation data -use year dwt dgn ageGroup dhe using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -gen health_m = dhe if dgn == 1 -gen health_f = dhe if dgn == 0 +use year dwt demAge demMaleFlag ageGroup valid_healthSelfRated using /// + "$dir_data/ukhls_validation_sample.dta", clear + +gen health_m = valid_healthSelfRated if demMaleFlag == 1 +gen health_f = valid_healthSelfRated if demMaleFlag == 0 -drop if ageGroup == 0 | ageGroup == 8 +drop if ageGroup == 0 collapse (mean) health* [aw = dwt], by(ageGroup year) drop if missing(ageGroup) reshape wide health*, i(year) j(ageGroup) -forvalues i = 1(1)7 { +forvalues i = 1(1)8 { rename health_f`i' health_f_`i'_valid rename health_m`i' health_m_`i'_valid @@ -178,13 +183,16 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year sim_sex ageGroup dhe using "$dir_data/simulated_data.dta", clear +use run year demMaleFlag ageGroup sim_healthSelfRated using /// + "$dir_data/simulation_sample.dta", clear -gen health_m = dhe if sim_sex == 1 -gen health_f = dhe if sim_sex == 2 +gen health_m = sim_healthSelfRated if demMaleFlag == 1 +gen health_f = sim_healthSelfRated if demMaleFlag == 0 collapse (mean) health*, by(ageGroup run year) + drop if missing(ageGroup) + reshape wide health*, i(year run) j(ageGroup) collapse (mean) health* /// @@ -202,11 +210,11 @@ collapse (mean) health* /// (sd) health_f_6_sd = health_f6 /// (sd) health_m_7_sd = health_m7 /// (sd) health_f_7_sd = health_f7 /// + (sd) health_m_8_sd = health_m8 /// + (sd) health_f_8_sd = health_f8 /// , by(year) - /*(sd) health_m_8_sd = health_m8 /// - *(sd) health_f_8_sd = health_f8 /// */ -forvalues i=1(1)7 { +forvalues i=1(1)8 { gen health_f_`i'_sim_high = health_f`i' + 1.96*health_f_`i'_sd gen health_f_`i'_sim_low = health_f`i' - 1.96*health_f_`i'_sd @@ -222,7 +230,7 @@ merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen foreach vble in "health_f" "health_m" { twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_1_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 15-19") /// name(`vble'_1, replace) /// @@ -234,7 +242,7 @@ foreach vble in "health_f" "health_m" { graphregion(color(white)) twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_2_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 20-24") /// name(`vble'_2, replace) /// @@ -246,7 +254,7 @@ foreach vble in "health_f" "health_m" { graphregion(color(white)) twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_3_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 25-29") /// name(`vble'_3, replace) /// @@ -258,7 +266,7 @@ foreach vble in "health_f" "health_m" { graphregion(color(white)) twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_4_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 30-34") /// name(`vble'_4, replace) /// @@ -270,7 +278,7 @@ foreach vble in "health_f" "health_m" { graphregion(color(white)) twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_5_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 35-39") /// name(`vble'_5, replace) /// @@ -282,7 +290,7 @@ foreach vble in "health_f" "health_m" { graphregion(color(white)) twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_6_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 40-59") /// name(`vble'_6, replace) /// @@ -294,7 +302,7 @@ foreach vble in "health_f" "health_m" { graphregion(color(white)) twoway (rarea `vble'_7_sim_high `vble'_7_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_7_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 60-79") /// name(`vble'_7, replace) /// @@ -305,65 +313,79 @@ foreach vble in "health_f" "health_m" { legend(size(small)) /// graphregion(color(white)) - /*twoway (rarea `vble'_8_sim_high `vble'_8_sim_low year, sort /// - color(green%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_8_valid year, sort color(green) legend(label(2 "UKHLS"))), /// - title("age 80-100") name(`vble'_8, replace) ylabel(1 [1] 5)*/ + twoway (rarea `vble'_8_sim_high `vble'_8_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_7_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 80-100") /// + name(`vble'_8, replace) /// + xtitle("Year", size(small)) /// + ytitle("Score", size(small)) /// + ylabel(3 [1] 5, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) } * Save figures grc1leg health_f_1 health_f_2 health_f_3 health_f_4 health_f_5 /// - health_f_6 health_f_7 /*health_f_8*/, /// - title("General Health Score") /// - subtitle("sf1, females") /// + health_f_6 health_f_7 health_f_8, /// + title("Self Rated Health") /// + subtitle("Females") /// legendfrom(health_f_1) /// graphregion(color(white)) /// + ycomm /// note("Notes: The health variable is a self-assessed variable and follows a 5-point Likert scale (1 = poor, ..., 5 = excellent). ", /// size(vsmall)) graph export /// - "$dir_output_files/health/validation_${country}_sf1_ts_all_female.jpg", /// + "$dir_output_files/health/validation_${country}_self_rated_ts_age_groups_female.jpg", /// replace width(2400) height(1350) quality(100) grc1leg health_m_1 health_m_2 health_m_3 health_m_4 health_m_5 /// - health_m_6 health_m_7 /*health_m_8*/, /// - title("General Health Score") /// - subtitle("sf1, males") /// + health_m_6 health_m_7 health_m_8, /// + title("Self Rated Health") /// + subtitle("Males") /// legendfrom(health_m_1) /// graphregion(color(white)) /// + ycomm /// note("Notes: The health variable is a self-assessed variable and follows a 5-point Likert scale (1 = poor, ..., 5 = excellent). ", /// size(vsmall)) graph export /// - "$dir_output_files/health/validation_${country}_sf1_ts_all_male.jpg", /// + "$dir_output_files/health/validation_${country}_self_rated_ts_age_groups_male.jpg", /// replace width(2400) height(1350) quality(100) +graph drop _all + ******************************************************************************** -* 1.4 : Mean values over time - pcs, 18-65 +* 1.4 : Mean values over time - pcs, 16-65 ******************************************************************************** -use year dwt dhe_pcs dag using /// +use year dwt valid_healthPhysicalPcs demAge using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe_pcs - -collapse (mean) health [aw = dwt], by(year) +keep if inrange(demAge,16,65) + +collapse (mean) valid_healthPhysicalPcs [aw = dwt], by(year) save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year dhe_pcs dag run using "$dir_data/simulated_data.dta", clear +use run year sim_healthPhysicalPcs demAge run using /// + "$dir_data/simulation_sample.dta", clear -collapse (mean) dhe_pcs, by(year run) +keep if inrange(demAge,16,65) + +collapse (mean) sim_healthPhysicalPcs, by(year run) -collapse (mean) dhe_pcs /// - (sd) dhe_pcs_sd = dhe_pcs /// +collapse (mean) sim_healthPhysicalPcs /// + (sd) sim_healthPhysicalPcs_sd = sim_healthPhysicalPcs /// , by(year) * Compute 95% confidence interval -foreach varname in dhe_pcs { +foreach varname in sim_healthPhysicalPcs { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd @@ -373,12 +395,12 @@ foreach varname in dhe_pcs { merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea dhe_pcs_high dhe_pcs_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year, sort color(green) /// +twoway (rarea sim_healthPhysicalPcs_high sim_healthPhysicalPcs_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_healthPhysicalPcs year, sort color(green) /// legend(label(2 "UKHLS"))), /// title("Physical Health") /// - subtitle("PCS, ages 18-65") /// + subtitle("PCS, ages 16-65") /// xtitle("Year", size(small)) /// ytitle("Health score", size(small)) /// ylabel(,labsize(small)) /// @@ -389,53 +411,50 @@ twoway (rarea dhe_pcs_high dhe_pcs_low year, /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_pcs_ts_${min_age}_${max_age}_both.jpg", /// +"$dir_output_files/health/validation_${country}_pcs_ts_16_65_both.jpg", /// replace width(2560) height(1440) quality(100) ******************************************************************************** -* 1.5 : Mean values over time - pcs, 18-65, by gender +* 1.5 : Mean values over time - pcs, 16-65, by gender ******************************************************************************** * Prepare validation data -use year dwt dhe_pcs dag dgn using /// +use year dwt valid_healthPhysicalPcs demAge demMaleFlag using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe_pcs - -collapse (mean) health [aw = dwt], by(year dgn) +keep if inrange(demAge,16,65) + +collapse (mean) valid_healthPhysicalPcs [aw = dwt], by(year demMaleFlag) save "$dir_data/temp_valid_stats.dta", replace * Prepare simulation data -use run year dhe_pcs dag dgn run using "$dir_data/simulated_data.dta", clear +use run year sim_healthPhysicalPcs demAge demMaleFlag run using /// + "$dir_data/simulation_sample.dta", clear -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" +keep if inrange(demAge,16,65) -drop dgn -rename dgn2 dgn +collapse (mean) sim_healthPhysicalPcs, by(year demMaleFlag run) -collapse (mean) dhe_pcs, by(year dgn run) - -collapse (mean) dhe_pcs /// - (sd) dhe_pcs_sd = dhe_pcs /// - , by(year dgn) +collapse (mean) sim_healthPhysicalPcs /// + (sd) sim_healthPhysicalPcs_sd = sim_healthPhysicalPcs /// + , by(year demMaleFlag) * Compute 95% confidence interval -foreach varname in dhe_pcs { +foreach varname in sim_healthPhysicalPcs { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd } -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea dhe_pcs_high dhe_pcs_low year if dgn == 0, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year if dgn == 0, sort color(green) /// +twoway (rarea sim_healthPhysicalPcs_high sim_healthPhysicalPcs_low year if /// + demMaleFlag == 0, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_healthPhysicalPcs year if demMaleFlag == 0, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Females") /// name(health_female, replace) /// @@ -446,10 +465,9 @@ twoway (rarea dhe_pcs_high dhe_pcs_low year if dgn == 0, /// legend(size(small)) /// graphregion(color(white)) /// - -twoway (rarea dhe_pcs_high dhe_pcs_low year if dgn == 1, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year if dgn == 1, sort color(green) /// +twoway (rarea sim_healthPhysicalPcs_high sim_healthPhysicalPcs_low year if /// + demMaleFlag == 1, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_healthPhysicalPcs year if demMaleFlag == 1, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Males") /// name(health_male, replace) /// @@ -461,37 +479,38 @@ twoway (rarea dhe_pcs_high dhe_pcs_low year if dgn == 1, /// graphregion(color(white)) /// grc1leg health_female health_male, /// - title("Phyisucal Health") /// - subtitle("PCS, ages 18-65") /// + title("Physical Health") /// + subtitle("PCS, ages 16-65") /// legendfrom(health_female) rows(1) /// graphregion(color(white)) /// + ycomm /// note("Notes: PCS is the SF-12 Physical Health Component. This is a measure of physical function ranging from 0 (low functioning)" "to 100 (high functioning). ", /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_pcs_ts_${min_age}_${max_age}_gender.jpg", /// +"$dir_output_files/health/validation_${country}_pcs_ts_16_65_gender.jpg", /// replace width(2560) height(1440) quality(100) ******************************************************************************** -* 1.6 : Mean values over time - pcs, 18-65, by age group and gender +* 1.6 : Mean values over time - pcs, 16-65, by age group and gender ******************************************************************************** * Prepare validation data -use year dwt dgn ageGroup dhe_pcs using /// - "$dir_data/ukhls_validation_full_sample.dta", clear +use year dwt demMaleFlag ageGroup valid_healthPhysicalPcs using /// + "$dir_data/ukhls_validation_sample.dta", clear -gen health_m = dhe_pcs if dgn == 1 -gen health_f = dhe_pcs if dgn == 0 +gen health_m = valid_healthPhysicalPcs if demMaleFlag == 1 +gen health_f = valid_healthPhysicalPcs if demMaleFlag == 0 -drop if ageGroup == 0 | ageGroup == 8 +drop if ageGroup == 0 collapse (mean) health* [aw = dwt], by(ageGroup year) drop if missing(ageGroup) reshape wide health*, i(year) j(ageGroup) -forvalues i = 1(1)7 { +forvalues i = 1(1)8 { rename health_f`i' health_f_`i'_valid rename health_m`i' health_m_`i'_valid @@ -502,10 +521,11 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year sim_sex ageGroup dhe_pcs using "$dir_data/simulated_data.dta", clear +use run year demMaleFlag ageGroup sim_healthPhysicalPcs using /// + "$dir_data/simulation_sample.dta", clear -gen health_m = dhe_pcs if sim_sex == 1 -gen health_f = dhe_pcs if sim_sex == 2 +gen health_m = sim_healthPhysicalPcs if demMaleFlag == 1 +gen health_f = sim_healthPhysicalPcs if demMaleFlag == 0 collapse (mean) health*, by(ageGroup run year) drop if missing(ageGroup) @@ -526,12 +546,12 @@ collapse (mean) health* /// (sd) health_f_6_sd = health_f6 /// (sd) health_m_7_sd = health_m7 /// (sd) health_f_7_sd = health_f7 /// + (sd) health_m_8_sd = health_m8 /// + (sd) health_f_8_sd = health_f8 /// , by(year) - /*(sd) health_m_8_sd = health_m8 /// - *(sd) health_f_8_sd = health_f8 /// */ * Compute 19% confidence intervals -forvalues i=1(1)7 { +forvalues i=1(1)8 { gen health_f_`i'_sim_high = health_f`i' + 1.96*health_f_`i'_sd gen health_f_`i'_sim_low = health_f`i' - 1.96*health_f_`i'_sd @@ -547,102 +567,110 @@ merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen foreach vble in "health_f" "health_m" { twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_1_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 15-19") /// name(`vble'_1, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_2_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 20-24") /// name(`vble'_2, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_3_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 25-29") /// name(`vble'_3, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_4_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 30-34") /// name(`vble'_4, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_5_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 35-39") /// name(`vble'_5, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_6_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 40-59") /// name(`vble'_6, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_7_sim_high `vble'_7_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_7_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 60-79") /// name(`vble'_7, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) - /*twoway (rarea `vble'_8_sim_high `vble'_8_sim_low year, sort /// - color(green%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_8_valid year, sort color(green) legend(label(2 "UKHLS"))), /// - title("age 80-100") name(`vble'_8, replace) ylabel(1 [1] 5)*/ + twoway (rarea `vble'_8_sim_high `vble'_8_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_8_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 80-100") /// + name(`vble'_8, replace) /// + xtitle("Year", size(small)) /// + ytitle("Score", size(small)) /// + ylabel(,labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) } * Save figures grc1leg health_f_1 health_f_2 health_f_3 health_f_4 health_f_5 /// - health_f_6 health_f_7 /*health_f_8*/, /// + health_f_6 health_f_7 health_f_8, /// title("Physical Health") /// subtitle("PCS, females") /// legendfrom(health_f_1) /// graphregion(color(white)) /// + ycomm /// note("Notes: PCS is the SF-12 Physical Health Component. This is a measure of physical function ranging from 0 (low functioning) to" "100 (high functioning). ", /// size(vsmall)) @@ -652,11 +680,12 @@ graph export /// grc1leg health_m_1 health_m_2 health_m_3 health_m_4 health_m_5 /// - health_m_6 health_m_7 /*health_m_8*/, /// + health_m_6 health_m_7 health_m_8, /// title("Physical Health") /// subtitle("PCS, males") /// legendfrom(health_m_1) /// graphregion(color(white)) /// + ycomm /// note("Notes: PCS is the SF-12 Physical Health Component. This is a measure of physical function ranging from 0 (low functioning) to" "100 (high functioning). ", /// size(vsmall)) @@ -665,31 +694,35 @@ graph export /// replace width(2400) height(1350) quality(100) graph drop _all + ******************************************************************************** -* 1.7 : Mean values over time - mcs, 18-65 +* 1.7 : Mean values over time - mcs, 16-65 ******************************************************************************** -use year dwt dhe_mcs dag using /// +use year dwt valid_healthMentalMcs demAge using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe_mcs - -collapse (mean) health [aw = dwt], by(year) +keep if inrange(demAge,16,65) + +collapse (mean) valid_healthMentalMcs [aw = dwt], by(year) save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year dhe_mcs dag run using "$dir_data/simulated_data.dta", clear +use run year sim_healthMentalMcs demAge run using /// + "$dir_data/simulation_sample.dta", clear -collapse (mean) dhe_mcs, by(year run) +keep if inrange(demAge,16,65) + +collapse (mean) sim_healthMentalMcs, by(year run) -collapse (mean) dhe_mcs /// - (sd) dhe_mcs_sd = dhe_mcs /// +collapse (mean) sim_healthMentalMcs /// + (sd) sim_healthMentalMcs_sd = sim_healthMentalMcs /// , by(year) * Compute 95% confidence interval -foreach varname in dhe_mcs { +foreach varname in sim_healthMentalMcs { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd @@ -699,14 +732,14 @@ foreach varname in dhe_mcs { merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea dhe_mcs_high dhe_mcs_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year, sort color(green) /// +twoway (rarea sim_healthMentalMcs_high sim_healthMentalMcs_low year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_healthMentalMcs year, sort color(green) /// legend(label(2 "UKHLS"))), /// title("Mental Health") /// - subtitle("MCS, ages 18-65") /// + subtitle("MCS, ages 16-65") /// xtitle("Year", size(small)) /// - ytitle("Health score", size(small)) /// + ytitle("Score", size(small)) /// ylabel(,labsize(small)) /// xlabel(,labsize(small)) /// legend(size(small)) /// @@ -715,109 +748,106 @@ twoway (rarea dhe_mcs_high dhe_mcs_low year, /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_mcs_ts_${min_age}_${max_age}_both.jpg", /// +"$dir_output_files/health/validation_${country}_mcs_ts_16_65_both.jpg", /// replace width(2560) height(1440) quality(100) ******************************************************************************** -* 1.8 : Mean values over time - mcs, 18-65, by gender +* 1.8 : Mean values over time - mcs, 16-65, by gender ******************************************************************************** * Prepare validation data -use year dwt dhe_mcs dag dgn using /// +use year dwt valid_healthMentalMcs demAge demMaleFlag using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe_mcs - -collapse (mean) health [aw = dwt], by(year dgn) +keep if inrange(demAge,16,65) + +collapse (mean) valid_healthMentalMcs [aw = dwt], by(year demMaleFlag) save "$dir_data/temp_valid_stats.dta", replace * Prepare simulation data -use run year dhe_mcs dag dgn run using "$dir_data/simulated_data.dta", clear +use run year sim_healthMentalMcs demAge demMaleFlag run using /// + "$dir_data/simulation_sample.dta", clear -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" +keep if inrange(demAge,16,65) -drop dgn -rename dgn2 dgn +collapse (mean) sim_healthMentalMcs, by(year demMaleFlag run) -collapse (mean) dhe_mcs, by(year dgn run) - -collapse (mean) dhe_mcs /// - (sd) dhe_mcs_sd = dhe_mcs /// - , by(year dgn) +collapse (mean) sim_healthMentalMcs /// + (sd) sim_healthMentalMcs_sd = sim_healthMentalMcs /// + , by(year demMaleFlag) * Compute 95% confidence interval -foreach varname in dhe_mcs { +foreach varname in sim_healthMentalMcs { gen `varname'_high = `varname' + 1.96*`varname'_sd gen `varname'_low = `varname' - 1.96*`varname'_sd } -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure -twoway (rarea dhe_mcs_high dhe_mcs_low year if dgn == 0, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year if dgn == 0, sort color(green) /// +twoway (rarea sim_healthMentalMcs_high sim_healthMentalMcs_low year if /// + demMaleFlag == 0, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_healthMentalMcs year if demMaleFlag == 0, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Females") /// name(health_female, replace) /// xtitle("Year", size(small)) /// - ytitle("Health score", size(small)) /// + ytitle("Score", size(small)) /// ylabel(44.5[2]50.5,labsize(small)) /// xlabel(,labsize(small)) /// legend(size(small)) /// graphregion(color(white)) /// - -twoway (rarea dhe_mcs_high dhe_mcs_low year if dgn == 1, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line health year if dgn == 1, sort color(green) /// +twoway (rarea sim_healthMentalMcs_high sim_healthMentalMcs_low year if /// + demMaleFlag == 1, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_healthMentalMcs year if demMaleFlag == 1, sort color(green) /// legend(label(2 "UKHLS"))), /// subtitle("Males") /// name(health_male, replace) /// xtitle("Year", size(small)) /// - ytitle("Health score", size(small)) /// + ytitle("Score", size(small)) /// ylabel(44.5[2]50.5,labsize(small)) /// xlabel(,labsize(small)) /// legend(size(small)) /// graphregion(color(white)) /// grc1leg health_female health_male, /// - title("Physical Health") /// - subtitle("MCS, ages 18-65") /// + title("Mental Health") /// + subtitle("MCS, ages 16-65") /// legendfrom(health_female) rows(1) /// graphregion(color(white)) /// + ycomm /// note("Notes: mcs is the SF-12 Mental Health Component. This is a measure of Mental function ranging from 0 (low functioning)" "to 100 (high functioning). ", /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_mcs_ts_${min_age}_${max_age}_gender.jpg", /// +"$dir_output_files/health/validation_${country}_mcs_ts_16_65_gender.jpg", /// replace width(2560) height(1440) quality(100) ******************************************************************************** -* 1.9 : Mean values over time - mcs, 18-65, by age group and gender +* 1.9 : Mean values over time - mcs, 16-65, by age group and gender ******************************************************************************** * Prepare validation data -use year dwt dgn ageGroup dhe_mcs using /// - "$dir_data/ukhls_validation_full_sample.dta", clear +use year dwt demMaleFlag ageGroup valid_healthMentalMcs using /// + "$dir_data/ukhls_validation_sample.dta", clear -gen health_m = dhe_mcs if dgn == 1 -gen health_f = dhe_mcs if dgn == 0 +gen health_m = valid_healthMentalMcs if demMaleFlag == 1 +gen health_f = valid_healthMentalMcs if demMaleFlag == 0 -drop if ageGroup == 0 | ageGroup == 8 +drop if ageGroup == 0 collapse (mean) health* [aw = dwt], by(ageGroup year) drop if missing(ageGroup) reshape wide health*, i(year) j(ageGroup) -forvalues i = 1(1)7 { +forvalues i = 1(1)8 { rename health_f`i' health_f_`i'_valid rename health_m`i' health_m_`i'_valid @@ -828,13 +858,16 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year sim_sex ageGroup dhe_mcs using "$dir_data/simulated_data.dta", clear +use run year demMaleFlag ageGroup sim_healthMentalMcs using /// + "$dir_data/simulation_sample.dta", clear -gen health_m = dhe_mcs if sim_sex == 1 -gen health_f = dhe_mcs if sim_sex == 2 +gen health_m = sim_healthMentalMcs if demMaleFlag == 1 +gen health_f = sim_healthMentalMcs if demMaleFlag == 0 collapse (mean) health*, by(ageGroup run year) + drop if missing(ageGroup) + reshape wide health*, i(year run) j(ageGroup) collapse (mean) health* /// @@ -852,12 +885,12 @@ collapse (mean) health* /// (sd) health_f_6_sd = health_f6 /// (sd) health_m_7_sd = health_m7 /// (sd) health_f_7_sd = health_f7 /// + (sd) health_m_8_sd = health_m8 /// + (sd) health_f_8_sd = health_f8 /// , by(year) - /*(sd) health_m_8_sd = health_m8 /// - *(sd) health_f_8_sd = health_f8 /// */ * Compute 19% confidence intervals -forvalues i=1(1)7 { +forvalues i = 1(1)8 { gen health_f_`i'_sim_high = health_f`i' + 1.96*health_f_`i'_sd gen health_f_`i'_sim_low = health_f`i' - 1.96*health_f_`i'_sd @@ -873,102 +906,110 @@ merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen foreach vble in "health_f" "health_m" { twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_1_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 15-19") /// name(`vble'_1, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_2_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 20-24") /// name(`vble'_2, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_3_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 25-29") /// name(`vble'_3, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_4_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 30-34") /// name(`vble'_4, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_5_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 35-39") /// name(`vble'_5, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_6_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 40-59") /// name(`vble'_6, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) twoway (rarea `vble'_7_sim_high `vble'_7_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// (line `vble'_7_valid year, sort color(red) legend(label(2 "UKHLS"))), /// subtitle("Age 60-79") /// name(`vble'_7, replace) /// xtitle("Year", size(small)) /// ytitle("Score", size(small)) /// - ylabel(40[5]58,labsize(vsmall)) /// + ylabel(,labsize(vsmall)) /// xlabel(,labsize(vsmall)) /// legend(size(small)) /// graphregion(color(white)) - /*twoway (rarea `vble'_8_sim_high `vble'_8_sim_low year, sort /// - color(green%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_8_valid year, sort color(green) legend(label(2 "UKHLS"))), /// - title("age 80-100") name(`vble'_8, replace) ylabel(1 [1] 5)*/ + twoway (rarea `vble'_8_sim_high `vble'_8_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_8_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 80-100") /// + name(`vble'_8, replace) /// + xtitle("Year", size(small)) /// + ytitle("Score", size(small)) /// + ylabel(,labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) } * Save figures grc1leg health_f_1 health_f_2 health_f_3 health_f_4 health_f_5 /// - health_f_6 health_f_7 /*health_f_8*/, /// + health_f_6 health_f_7 health_f_8, /// title("Mental Health") /// subtitle("MCS, females") /// legendfrom(health_f_1) /// graphregion(color(white)) /// + ycomm /// note("Notes: mcs is the SF-12 Mental Health Component. This is a measure of Mental function ranging from 0 (low functioning) to" "100 (high functioning). ", /// size(vsmall)) @@ -978,11 +1019,12 @@ graph export /// grc1leg health_m_1 health_m_2 health_m_3 health_m_4 health_m_5 /// - health_m_6 health_m_7 /*health_m_8*/, /// + health_m_6 health_m_7 health_m_8, /// title("Mental Health") /// - subtitle("mcs, males") /// + subtitle("MCS, males") /// legendfrom(health_m_1) /// graphregion(color(white)) /// + ycomm /// note("Notes: mcs is the SF-12 Mental Health Component. This is a measure of Mental function ranging from 0 (low functioning) to" "100 (high functioning). ", /// size(vsmall)) @@ -1002,15 +1044,22 @@ graph drop _all ******************************************************************************** * Working age -use year dwt dhe dag using /// +use year dwt valid_healthSelfRated demAge using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe +keep if inrange(demAge,16,65) + +gen health = valid_healthSelfRated save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year dhe dag run using "$dir_data/simulated_data.dta", clear +use run year sim_healthSelfRated demAge run using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,16,65) + +gen healthSelfRated = sim_healthSelfRated keep if run == 1 @@ -1018,10 +1067,11 @@ append using "$dir_data/temp_valid_stats.dta" * Plot figure -twoway (hist dhe, width(0.2) color(green%30) legend(label(1 "Simulated"))) /// +twoway (hist healthSelfRated, width(0.2) color(green%30) /// + legend(label(1 "SimPaths"))) /// (hist health, width(0.2) color(red%30) legend(label(2 "UKHLS"))), /// - title("General Health Score") /// - subtitle("sf1, ages 18-65") /// + title("Self Rated Health") /// + subtitle("Ages 16-65") /// xtitle("Score", size(small)) /// ytitle("Year", size(small)) /// xlabel(,labsize(small)) /// @@ -1032,7 +1082,7 @@ twoway (hist dhe, width(0.2) color(green%30) legend(label(1 "Simulated"))) /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_sf1_hist_${min_age}_${max_age}_both.jpg", /// +"$dir_output_files/health/validation_${country}_self_rated_hist_16_65_both.jpg", /// replace width(2560) height(1440) quality(100) @@ -1040,32 +1090,33 @@ graph export /// * 2.2 : Histograms - Working age, by gender ******************************************************************************** -use year dwt dhe dgn using /// +use year dwt demAge valid_healthSelfRated demMaleFlag using /// "$dir_data/ukhls_validation_sample.dta", clear -gen health = dhe +keep if inrange(demAge,16,65) + +gen health = valid_healthSelfRated save "$dir_data/temp_valid_stats.dta", replace * Prepare Simulated data -use run year dhe dgn run using "$dir_data/simulated_data.dta", clear +use run year demAge sim_healthSelfRated demMaleFlag run using /// + "$dir_data/simulation_sample.dta", clear +keep if inrange(demAge,16,65) + +gen healthSelfRated = sim_healthSelfRated + keep if run == 1 -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" - -drop dgn -rename dgn2 dgn - append using "$dir_data/temp_valid_stats.dta" * Plot figure -twoway (hist dhe if dgn == 0, width(0.2) color(green%30) /// - legend(label(1 "Simulated"))) /// -(hist health if dgn == 0, width(0.2) color(red%30) /// +twoway (hist healthSelfRated if demMaleFlag == 0, width(0.2) color(green%30) /// + legend(label(1 "SimPaths"))) /// +(hist health if demMaleFlag == 0, width(0.2) color(red%30) /// legend(label(2 "UKHLS"))), /// subtitle("Females") /// name(health_female, replace) /// @@ -1074,12 +1125,12 @@ twoway (hist dhe if dgn == 0, width(0.2) color(green%30) /// xlabel(,labsize(small)) /// ylabel(0[.5]2.5,labsize(small)) /// legend(size(small)) /// - graphregion(color(white)) /// + graphregion(color(white)) -twoway (hist dhe if dgn == 1, width(0.2) color(green%30) /// - legend(label(1 "Simulated"))) /// -(hist health if dgn == 1, width(0.2) color(red%30) /// +twoway (hist healthSelfRated if demMaleFlag == 1, width(0.2) color(green%30) /// + legend(label(1 "SimPaths"))) /// +(hist health if demMaleFlag == 1, width(0.2) color(red%30) /// legend(label(2 "UKHLS"))), /// subtitle("Males") /// name(health_male, replace) /// @@ -1088,20 +1139,20 @@ twoway (hist dhe if dgn == 1, width(0.2) color(green%30) /// xlabel(,labsize(small)) /// ylabel(0[.5]2.5,labsize(small)) /// legend(size(small)) /// - graphregion(color(white)) /// - + graphregion(color(white)) grc1leg health_female health_male, /// - title("General Health Score") /// - subtitle("sf1, ages 18-65") /// + title("Self Rated Health") /// + subtitle("Ages 16-65") /// legendfrom(health_male) rows(1) /// graphregion(color(white)) /// + ycomm /// note("Notes: The health variable is a self-assessed variable and follows a 5-point Likert scale (1 = poor, ..., 5 = excellent). ", /// size(vsmall)) graph export /// -"$dir_output_files/health/validation_${country}_sf1_hist_${min_age}_${max_age}_gender.jpg", /// +"$dir_output_files/health/validation_${country}_self_rated_hist_16_65_gender.jpg", /// replace width(2560) height(1440) quality(100) graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_14_plot_at_risk_of_poverty.do b/validation/02_simulated_output_validation/do_files/04_14_plot_at_risk_of_poverty.do similarity index 68% rename from validation/02_simulated_output_validation/do_files/06_14_plot_at_risk_of_poverty.do rename to validation/02_simulated_output_validation/do_files/04_14_plot_at_risk_of_poverty.do index a23f1c925..b53a69647 100644 --- a/validation/02_simulated_output_validation/do_files/06_14_plot_at_risk_of_poverty.do +++ b/validation/02_simulated_output_validation/do_files/04_14_plot_at_risk_of_poverty.do @@ -1,13 +1,13 @@ -******************************************************************************** -* PROJECT: ESPON +/******************************************************************************* +* PROJECT: SimPaths UK * SECTION: Validation * OBJECT: Risk of poverty * AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 (AB) +* LAST UPDATE: Jan 2026 * COUNTRY: UK - -* NOTES: ******************************************************************************** +* NOTES: +*******************************************************************************/ ******************************************************************************** * 1 : Mean values over time @@ -18,16 +18,18 @@ ******************************************************************************** * Prepare validation data -use year dwt valid_y_eq_disp_bu_yr using /// +use year demAge dwt valid_yDispBuEquivYear using /// "$dir_data/ukhls_validation_sample.dta", clear +keep if inrange(demAge,18,65) + * Trim outliers if "$trim_outliers" == "true" { - sum valid_y_eq_disp_bu_yr, d + sum valid_yDispBuEquivYear, d - replace valid_y_eq_disp_bu_yr = . if /// - valid_y_eq_disp_bu_yr < r(p1) | valid_y_eq_disp_bu_yr > r(p99) + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | valid_yDispBuEquivYear > r(p99) } @@ -38,12 +40,12 @@ local max_year = r(max) gen poverty_line = . forval year = `min_year'/`max_year' { - sum valid_y_eq_disp_bu_yr if year == `year', d + sum valid_yDispBuEquivYear if year == `year', d replace poverty_line = 0.6*r(p50) if year == `year' } -gen arop = (valid_y_eq_disp_bu_yr < poverty_line) +gen arop = (valid_yDispBuEquivYear < poverty_line) collapse (mean) arop [aw = dwt], by(year) @@ -51,22 +53,27 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare simulated data -use run year equivalisedincome using "$dir_data/simulated_data.dta", clear +use run year demAge sim_yDispEquivYear using /// + "$dir_data/simulation_sample.dta", clear * Trim outliers if "$trim_outliers" == "true" { - sum equivalisedincome, d - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) + sum sim_yDispEquivYear, d + + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) } -bys run year: egen equivincome_median = median(equivalisedincome) +bys run year: egen equivincome_median = median(sim_yDispEquivYear) + gen poverty_line = 0.6*equivincome_median -gen arop_sim = (equivalisedincome < poverty_line) + +gen arop_sim = (sim_yDispEquivYear < poverty_line) collapse (mean) arop_sim, by(run year) + collapse (mean) arop_sim /// (sd) arop_sim_sd = arop_sim /// , by(year) @@ -82,7 +89,7 @@ merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure twoway (rarea arop_sim_high arop_sim_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// + legend(label(1 "SimPaths"))) /// (line arop year, sort color(green) legend(label(2 "UKHLS"))), /// title("At Risk of Poverty") /// subtitle("Ages 18-65") /// @@ -96,7 +103,7 @@ twoway (rarea arop_sim_high arop_sim_low year, sort color(green%20) /// size(vsmall)) * Save figure -graph export "$dir_output_files/poverty/validation_${country}_at_risk_of_poverty_${min_age}_${max_age}.jpg", /// +graph export "$dir_output_files/poverty/validation_${country}_at_risk_of_poverty_18_${max_age}.jpg", /// replace width(2560) height(1440) quality(100) @@ -106,19 +113,19 @@ graph export "$dir_output_files/poverty/validation_${country}_at_risk_of_poverty ******************************************************************************** * Prepare validation data -use year dwt valid_y_eq_disp_bu_yr dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear +use year demAge dwt valid_yDispBuEquivYear using /// + "$dir_data/ukhls_validation_sample.dta", clear * Select sample -drop if dag < 18 +drop if demAge < 18 * Trim outliers if "$trim_outliers" == "true" { - sum valid_y_eq_disp_bu_yr, d + sum valid_yDispBuEquivYear, d - replace valid_y_eq_disp_bu_yr = . if /// - valid_y_eq_disp_bu_yr < r(p1) | valid_y_eq_disp_bu_yr > r(p99) + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | valid_yDispBuEquivYear > r(p99) } @@ -127,14 +134,15 @@ local min_year = 2011 local max_year = r(max) gen poverty_line = . + forval year = `min_year'/`max_year' { - sum valid_y_eq_disp_bu_yr if year == `year', d + sum valid_yDispBuEquivYear if year == `year', d replace poverty_line = 0.6*r(p50) if year == `year' } -gen arop = (valid_y_eq_disp_bu_yr < poverty_line) +gen arop = (valid_yDispBuEquivYear < poverty_line) collapse (mean) arop [aw = dwt], by(year) @@ -142,26 +150,30 @@ save "$dir_data/temp_valid_stats.dta", replace * Prepare simulated data -use run year equivalisedincome dag using "$dir_data/simulated_data.dta", clear +use run year demAge sim_yDispEquivYear using /// + "$dir_data/simulation_sample.dta", clear * Select sample -drop if dag < 18 +drop if demAge < 18 * Trim outliers if "$trim_outliers" == "true" { - sum equivalisedincome, d + sum sim_yDispEquivYear, d - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) } -bys run year: egen equivincome_median = median(equivalisedincome) +bys run year: egen equivincome_median = median(sim_yDispEquivYear) + gen poverty_line = 0.6*equivincome_median -gen arop_sim = (equivalisedincome < poverty_line) + +gen arop_sim = (sim_yDispEquivYear < poverty_line) collapse (mean) arop_sim, by(run year) + collapse (mean) arop_sim /// (sd) arop_sim_sd = arop_sim /// , by(year) @@ -177,7 +189,7 @@ merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen * Plot figure twoway (rarea arop_sim_high arop_sim_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// + legend(label(1 "SimPaths"))) /// (line arop year, sort color(green) legend(label(2 "UKHLS"))), /// title("At Risk of Poverty") /// subtitle("Ages 18+") /// @@ -191,7 +203,7 @@ twoway (rarea arop_sim_high arop_sim_low year, sort color(green%20) /// size(vsmall)) * Save figure -graph export "$dir_output_files/poverty/validation_${country}_at_risk_of_poverty_${min_age}plus.jpg", /// +graph export "$dir_output_files/poverty/validation_${country}_at_risk_of_poverty_18plus.jpg", /// replace width(2560) height(1440) quality(100) diff --git a/validation/02_simulated_output_validation/do_files/04_15_plot_inequality.do b/validation/02_simulated_output_validation/do_files/04_15_plot_inequality.do new file mode 100644 index 000000000..b923764fc --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_15_plot_inequality.do @@ -0,0 +1,457 @@ +******************************************************************************** +* PROJECT: SimPath UK +* SECTION: Validation +* OBJECT: Inequality +* AUTHORS: Ashley Burdett +* LAST UPDATE: 9/2025 (AB) +* COUNTRY: UK +******************************************************************************** +* NOTES: Equivalized disposable income used to created ratios +******************************************************************************** + +//ssc install ineqdeco + +******************************************************************************** +* 1 : Income ratios through time +******************************************************************************** + +******************************************************************************** +* 1.1 : Income ratio, 90/50 +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_yDispBuEquivYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuEquivYear, d + + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | valid_yDispBuEquivYear > r(p99) + +} + +collapse (p90) p90_disp = valid_yDispBuEquivYear /// + (p50) p50_disp = valid_yDispBuEquivYear /// + [aw = dwt] , by(year) + +gen p90_p50_ratio_disp_obs = p90_disp/p50_disp + +* Align reference years +gen l_p90_p50_ratio_disp_obs = p90_p50_ratio_disp_obs[_n+1] + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge sim_yDispEquivYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispEquivYear, d + + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) + +} + +collapse (p90) p90_disp = sim_yDispEquivYear /// + (p50) p50_disp = sim_yDispEquivYear, by(run year) + +gen p90_p50_ratio_disp = p90_disp/p50_disp + +collapse (mean) p90_p50_ratio_disp /// + (sd) sd_p90_p50_ratio_disp = p90_p50_ratio_disp /// + , by(year) + + foreach var in p90_p50_ratio_disp { + + gen `var'_high = `var' + 1.96*sd_`var' + gen `var'_low = `var' - 1.96*sd_`var' + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure +twoway (rarea p90_p50_ratio_disp_high p90_p50_ratio_disp_low year, sort /// + color(green%20) legend(label(1 "SimPaths") position(6) rows(1))) /// +(line p90_p50_ratio_disp_obs year, sort color(green)legend(label(2 "UKHLS"))), /// + title("P90/P50 Disposable Income Ratio") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Ratio", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Ratios computed using individual observations of benefit unit measure of equivalized disposable income.", /// + size(vsmall)) + +* Save figure +graph export "$dir_output_files/inequality/validation_${country}_p90p50.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.1 : Income ratio, 90/10 +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_yDispBuEquivYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuEquivYear, d + + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | valid_yDispBuEquivYear > r(p99) + +} + +collapse (p90) p90_disp = valid_yDispBuEquivYear /// + (p10) p10_disp = valid_yDispBuEquivYear /// + [aw = dwt], by(year) + +gen p90_p10_ratio_disp_obs = p90_disp/p10_disp + +* Align reference years +gen l_p90_p10_ratio_disp_obs = p90_p10_ratio_disp_obs[_n+1] + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge sim_yDispEquivYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispEquivYear, d + + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) + +} + +collapse (p90) p90_disp = sim_yDispEquivYear /// + (p10) p10_disp = sim_yDispEquivYear, by(run year) + +gen p90_p10_ratio_disp = p90_disp/p10_disp + +collapse (mean) p90_p10_ratio_disp /// + (sd) sd_p90_p10_ratio_disp = p90_p10_ratio_disp /// + , by(year) + + foreach var in p90_p10_ratio_disp { + + gen `var'_high = `var' + 1.96*sd_`var' + gen `var'_low = `var' - 1.96*sd_`var' + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure +twoway (rarea p90_p10_ratio_disp_high p90_p10_ratio_disp_low year, sort /// + color(green%20) legend(label(1 "SimPaths") position(6) rows(1))) /// +(line p90_p10_ratio_disp_obs year, sort color(green)legend(label(2 "UKHLS"))), /// + title("P90/P10 Disposable Income Ratio") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Ratio", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Ratios computed using individual observations of benefit unit measure of equivalized disposable income.", /// + size(vsmall)) + +* Save figure +graph export "$dir_output_files/inequality/validation_${country}_p90p10.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.3 : Gini coefficeint +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_yDispBuEquivYear using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum valid_yDispBuEquivYear, d + + replace valid_yDispBuEquivYear = . if /// + valid_yDispBuEquivYear < r(p1) | valid_yDispBuEquivYear > r(p99) + +} + +* Calulate gini for each year +statsby gini = r(gini), by(year) clear: ineqdeco valid_yDispBuEquivYear [aw=dwt] + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge sim_yDispEquivYear using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Trim outliers +if "$trim_outliers" == "true" { + + sum sim_yDispEquivYear, d + + replace sim_yDispEquivYear = . if /// + sim_yDispEquivYear < r(p1) | sim_yDispEquivYear > r(p99) + +} + +* Calculate gini for each year and run +statsby gini = r(gini), by(year run) clear: ineqdeco sim_yDispEquivYear + +* Obtain the mean and standard deviation by year +collapse (mean) gini /// + (sd) gini_sd = gini, by(year) + +* Compute the 95% confidence interval +gen gini_high = gini + 1.96 * gini_sd +gen gini_low = gini - 1.96 * gini_sd + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea gini_high gini_low year, sort /// + color(green%20) legend(label(1 "SimPaths") position(6) rows(1))) /// +(line gini year, sort color(green)legend(label(2 "UKHLS"))), /// + title("Gini Coefficient") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Coefficient", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Gini coefficient computed using individual observations of benefit unit measure of equivalized disposable income.", /// + size(vsmall)) + +* Save figure +graph export "$dir_output_files/inequality/validation_${country}_gini.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all + + + +******************************************************************************** +* 1.4 : Net transfers +******************************************************************************** + +use "$dir_data/simulation_sample.dta", clear + +* keep only one observation per benefit unit +sort run idBu year + +* Keep one observation per benefit unit +* Create a marker for the first observation in each group +bys run year idBu (demAge): gen byte to_keep = (_n == _N) + +* Keep only the marked rows +keep if to_keep == 1 +drop to_keep + +* Create gross income deciles +sort run idPers year + +xtile sim_decile = sim_yNonBenBuGrossLevelYear , n(10) + +tab sim_decile + + +* Plots + +* Sources + +* All +graph bar (mean) sim_net_transfers , over(sim_decile) /// + title("SimPaths") /// + name(simulated_net_trans_all, replace) /// + b1title("BU Gross Income Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + + +* Oldest <= 65 +preserve + +drop if demAge > 65 + +graph bar (mean) sim_net_transfers , over(sim_decile) /// + title("SimPaths") /// + name(simulated_net_trans_upto65, replace) /// + b1title("BU Gross Income Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + + +restore + +* Oldest > 65 +preserve + +drop if demAge <= 65 + +graph bar (mean) sim_net_transfers , over(sim_decile) /// + title("SimPaths") /// + name(simulated_net_trans_66plus, replace) /// + b1title("BU Gross Income Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +restore + + +use "$dir_data/ukhls_validation_sample.dta", clear + +* keep only one observation per benefit unit +sort idBu year + +* Keep one observation per benefit unit +* Create a marker for the first observation in each group +bys year idBu (demAge): gen byte to_keep = (_n == _N) + +* Keep only the marked rows +keep if to_keep == 1 +drop to_keep + +* Create gross income deciles +sort idPers year + +xtile valid_decile = valid_yNonBenBuGrossLevelYear , n(10) + +tab valid_decile + + +* Plots + +* Sources + +* All +graph bar (mean) valid_net_transfers , over(valid_decile) /// + title("UKHLS") /// + name(valid_net_trans_all, replace) /// + b1title("BU Gross Income Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + + +* Oldest <= 65 +preserve + +drop if demAge > 65 + +graph bar (mean) valid_net_transfers , over(valid_decile) /// + title("UKHLS") /// + name(valid_net_trans_upto65, replace) /// + b1title("BU Gross Income Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + + +restore + +* Oldest > 65 +preserve + +drop if demAge <= 65 + +graph bar (mean) valid_net_transfers , over(valid_decile) /// + title("UKHLS") /// + name(valid_net_trans_66plus, replace) /// + b1title("BU Gross Income Decile", size(small)) /// + ytitle("£") /// + ylabel(#5, format(%12.0fc)) /// + ylabel(,labsize(small)) /// + graphregion(color(white)) + +restore + +//net install grc1leg2, from(http://digital.cgdev.org/doc/stata/MO/Misc) + + +* Shares - Retirement age +grc1leg2 simulated_net_trans_all valid_net_trans_all, /// + rows(1) ycommon loff /// + graphregion(color(white)) /// + title("Average Net Transfers") /// + subtitle("All ages") /// + note("NOTE: ", /// + size(vsmall)) + +graph export /// + "$dir_output_files/inequality/validation_${country}_net_transfers_all.png", /// + replace width(2400) height(1350) + + +grc1leg2 simulated_net_trans_upto65 valid_net_trans_upto65 , /// + rows(1) ycommon loff /// + graphregion(color(white)) /// + title("Average Net Transfers") /// + subtitle("Oldest Age <= 65") /// + note("NOTE: ", /// + size(vsmall)) + +graph export /// + "$dir_output_files/inequality/validation_${country}_net_transfers_upto65.png", /// + replace width(2400) height(1350) + + +grc1leg2 simulated_net_trans_66plus valid_net_trans_66plus , /// + rows(1) ycommon loff /// + graphregion(color(white)) /// + title("Average Net Transfers") /// + subtitle("Oldest Age > 65") /// + note("NOTE: ", /// + size(vsmall)) + +graph export /// + "$dir_output_files/inequality/validation_${country}_net_transfers_66plus.png", /// + replace width(2400) height(1350) + +graph drop _all + diff --git a/validation/02_simulated_output_validation/do_files/04_16_plot_number_children.do b/validation/02_simulated_output_validation/do_files/04_16_plot_number_children.do new file mode 100644 index 000000000..26646a36a --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_16_plot_number_children.do @@ -0,0 +1,582 @@ +******************************************************************************** +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Children +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: This do file plots simulated and UKHLS % of benefit units +* with a given number of children +******************************************************************************** + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time, working age (18-65), children < 18 +******************************************************************************** + +* Prepare validation data +use year demAge idPers idBu dwt children_* using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Calculate weighted share of benefit units with 0, 1, 2, 3 or more children +collapse (mean) children_* [aw = dwt], by(year) + +foreach varname in children_0 children_1 children_2 children_3p { + + rename `varname' valid_`varname' + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge idBu children_* using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +collapse (mean) children_*, by(run year) + +rename children_3plus children_3p + +collapse (mean) children_* /// + (sd) children_0_sd = children_0 /// + children_1_sd = children_1 /// + children_2_sd = children_2 /// + children_3p_sd = children_3p /// + , by(year) + +foreach varname in children_0 children_1 children_2 children_3p { + + gen sim_`varname'_h = `varname' + 1.96*`varname'_sd + gen sim_`varname'_l = `varname' - 1.96*`varname'_sd + rename `varname' sim_`varname' + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures +label var sim_children_0 "No children" +label var sim_children_1 "1 child" +label var sim_children_2 "2 children" +label var sim_children_3p "3+ children" + + +twoway (rarea sim_children_0_h sim_children_0_l year, /// + sort color(green%20) legend(label(1 "No children, SimPaths"))) /// +(line valid_children_0 year, sort color(green) /// + legend(label(2 "No children, UKHLS"))) /// + (rarea sim_children_1_h sim_children_1_l year, sort color(blue%20) /// + legend(label(3 "1 child, SimPaths"))) /// +(line valid_children_1 year, sort color(blue) /// + legend(label(4 "1 child, UKHLS"))) /// +(rarea sim_children_2_h sim_children_2_l year, sort color(red%20) /// + legend(label(5 "2 children, SimPaths"))) /// +(line valid_children_2 year, sort color(red) /// + legend(label(6 "2 children, UKHLS"))) /// +(rarea sim_children_3p_h sim_children_3p_l year, sort color(grey%20) /// + legend(label(7 "3+ children, SimPaths"))) /// +(line valid_children_3p year, sort color(grey) /// + legend(label(8 "3+ children, UKHLS"))), /// + title("Number of Children") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) ///) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Individual observations plotted.", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_children_ts_18_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time, working age (18-65), children < 18, by gender +******************************************************************************** + +* Prepare validation data +use year demAge idPers idBu dwt children_* demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +* Calculate weighted share of benefit units with 0, 1, 2, 3 or more children +collapse (mean) children_* [aw = dwt], by(year demMaleFlag) + +foreach varname in children_0 children_1 children_2 children_3p { + + rename `varname' valid_`varname' + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge idBu children_* demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +collapse (mean) children_*, by(run year demMaleFlag) + +rename children_3plus children_3p + +collapse (mean) children_* /// + (sd) children_0_sd = children_0 /// + children_1_sd = children_1 /// + children_2_sd = children_2 /// + children_3p_sd = children_3p /// + , by(year demMaleFlag) + +foreach varname in children_0 children_1 children_2 children_3p { + + gen sim_`varname'_h = `varname' + 1.96*`varname'_sd + gen sim_`varname'_l = `varname' - 1.96*`varname'_sd + rename `varname' sim_`varname' + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figures +label var sim_children_0 "No children" +label var sim_children_1 "1 child" +label var sim_children_2 "2 children" +label var sim_children_3p "3+ children" + +* Males + +preserve + +keep if demMaleFlag == 1 + +twoway (rarea sim_children_0_h sim_children_0_l year, /// + sort color(green%20) legend(label(1 "No children, SimPaths"))) /// +(line valid_children_0 year, sort color(green) /// + legend(label(2 "No children, UKHLS"))) /// + (rarea sim_children_1_h sim_children_1_l year, sort color(blue%20) /// + legend(label(3 "1 child, SimPaths"))) /// +(line valid_children_1 year, sort color(blue) /// + legend(label(4 "1 child, UKHLS"))) /// +(rarea sim_children_2_h sim_children_2_l year, sort color(red%20) /// + legend(label(5 "2 children, SimPaths"))) /// +(line valid_children_2 year, sort color(red) /// + legend(label(6 "2 children, UKHLS"))) /// +(rarea sim_children_3p_h sim_children_3p_l year, sort color(grey%20) /// + legend(label(7 "3+ children, SimPaths"))) /// +(line valid_children_3p year, sort color(grey) /// + legend(label(8 "3+ children, UKHLS"))), /// + title("Number of Children") /// + subtitle("Ages 18-65, males") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) ///) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Individual observations plotted.", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_children_ts_18_65_male.jpg", /// + replace width(2400) height(1350) quality(100) + +restore + +* Females + +keep if demMaleFlag== 0 + +twoway (rarea sim_children_0_h sim_children_0_l year, /// + sort color(green%20) legend(label(1 "No children, SimPaths"))) /// +(line valid_children_0 year, sort color(green) /// + legend(label(2 "No children, UKHLS"))) /// + (rarea sim_children_1_h sim_children_1_l year, sort color(blue%20) /// + legend(label(3 "1 child, SimPaths"))) /// +(line valid_children_1 year, sort color(blue) /// + legend(label(4 "1 child, UKHLS"))) /// +(rarea sim_children_2_h sim_children_2_l year, sort color(red%20) /// + legend(label(5 "2 children, SimPaths"))) /// +(line valid_children_2 year, sort color(red) /// + legend(label(6 "2 children, UKHLS"))) /// +(rarea sim_children_3p_h sim_children_3p_l year, sort color(grey%20) /// + legend(label(7 "3+ children, SimPaths"))) /// +(line valid_children_3p year, sort color(grey) /// + legend(label(8 "3+ children, UKHLS"))), /// + title("Number of Children") /// + subtitle("Ages 18-65, females") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) ///) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Individual observations plotted.", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_children_ts_18_65_female.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.3 : Mean values over time, working age (18-65), children < 3 +******************************************************************************** + +* Prepare validation data +use year demAge idBu dwt demNChild0to2 using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +gen child02 = . +replace child02 = 0 if demNChild0to2 == 0 +replace child02 = 1 if demNChild0to2 > 0 & demNChild0to2 != . + +* Calculate weighted share of benefit units with 0, 1, 2, 3 or more children +collapse (mean) child02 [aw = dwt], by(year) + +foreach varname in child02 { + + rename `varname' valid_`varname' + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge idBu sim_demNChild0to2 using /// + "$dir_data/simulation_sample.dta", clear + +gen sim_child02 = . +replace sim_child02 = 0 if sim_demNChild0to2 == 0 +replace sim_child02 = 1 if sim_demNChild0to2 > 0 & sim_demNChild0to2 != . + +collapse (mean) sim_child02, by(run year) + +collapse (mean) sim_child02 /// + (sd) sim_child02_sd = sim_child02 /// + , by(year) + +foreach varname in sim_child02 { + + gen `varname'_h = `varname' + 1.96*`varname'_sd + gen `varname'_l = `varname' - 1.96*`varname'_sd + rename `varname' sim_`varname' + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figures +twoway (rarea sim_child02_h sim_child02_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_child02 year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Share With Child 0-2 Years Old") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes:", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_young_child_ts_18_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.4 : Mean values over time, working age (18-65), children < 3, by gender +******************************************************************************** + +* Prepare validation data +use year demAge idBu dwt demNChild0to2 demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +gen child02 = . +replace child02 = 0 if demNChild0to2 == 0 +replace child02 = 1 if demNChild0to2 > 0 & demNChild0to2 != . + +* Calculate weighted share of benefit units with 0, 1, 2, 3 or more children +collapse (mean) child02 [aw = dwt], by(year demMaleFlag) + +foreach varname in child02 { + + rename `varname' valid_`varname' + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge idBu sim_demNChild0to2 demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +gen sim_child02 = . +replace sim_child02 = 0 if sim_demNChild0to2 == 0 +replace sim_child02 = 1 if sim_demNChild0to2 > 0 & sim_demNChild0to2 != . + +collapse (mean) sim_child02, by(run year demMaleFlag) + +collapse (mean) sim_child02 /// + (sd) sim_child02_sd = sim_child02 /// + , by(year demMaleFlag) + +foreach varname in sim_child02 { + + gen `varname'_h = `varname' + 1.96*`varname'_sd + gen `varname'_l = `varname' - 1.96*`varname'_sd + rename `varname' sim_`varname' + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figures + +* Males +preserve + +keep if demMaleFlag == 1 + +twoway (rarea sim_child02_h sim_child02_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_child02 year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Share With Child 0-2 Years Old") /// + subtitle("Ages 18-65, males") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_young_child_ts_18_65_male.jpg", /// + replace width(2400) height(1350) quality(100) + +restore + +* Females +keep if demMaleFlag == 0 + +twoway (rarea sim_child02_h sim_child02_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_child02 year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Share With Child 0-2 Years Old") /// + subtitle("Ages 18-65, females") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_young_child_ts_18_65_female.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all + + + +******************************************************************************** +* 1.5 : Mean values over time, working age (18-65), new born child +******************************************************************************** + +* Prepare validation data +use year demAge idBu dwt demNChild0 using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +gen child0 = . +replace child0 = 0 if demNChild0 == 0 +replace child0 = 1 if demNChild0 > 0 & demNChild0 != . + +* Calculate weighted share of benefit units with 0, 1, 2, 3 or more children +collapse (mean) child0 [aw = dwt], by(year) + +foreach varname in child0 { + + rename `varname' valid_`varname' + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge idBu sim_demNChild0 using /// + "$dir_data/simulation_sample.dta", clear + +gen sim_child0 = . +replace sim_child0 = 0 if sim_demNChild0 == 0 +replace sim_child0 = 1 if sim_demNChild0 > 0 & sim_demNChild0 != . + +collapse (mean) sim_child0, by(run year) + +collapse (mean) sim_child0 /// + (sd) sim_child0_sd = sim_child0 /// + , by(year) + +foreach varname in sim_child0 { + + gen `varname'_h = `varname' + 1.96*`varname'_sd + gen `varname'_l = `varname' - 1.96*`varname'_sd + rename `varname' sim_`varname' + +} + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figures +twoway (rarea sim_child0_h sim_child0_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_child0 year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Share With New Born Child") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: COnstructed from benefit unit information.", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_new_born_child_ts_18_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.6 : Mean values over time, working age (18-65), new born child, by gender +******************************************************************************** + +* Prepare validation data +use year demAge idBu dwt demNChild0 demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +gen child0 = . +replace child0 = 0 if demNChild0 == 0 +replace child0 = 1 if demNChild0 > 0 & demNChild0 != . + +* Calculate weighted share of benefit units +collapse (mean) child0 [aw = dwt], by(year demMaleFlag) + +foreach varname in child0 { + + rename `varname' valid_`varname' + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulated data +use run year demAge idBu sim_demNChild0 demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +gen sim_child0 = . +replace sim_child0 = 0 if sim_demNChild0 == 0 +replace sim_child0 = 1 if sim_demNChild0 > 0 & sim_demNChild0 != . + +collapse (mean) sim_child0, by(run year demMaleFlag) + +collapse (mean) sim_child0 /// + (sd) sim_child0_sd = sim_child0 /// + , by(year demMaleFlag) + +foreach varname in sim_child0 { + + gen `varname'_h = `varname' + 1.96*`varname'_sd + gen `varname'_l = `varname' - 1.96*`varname'_sd + rename `varname' sim_`varname' + +} + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figures + +* Males +preserve + +keep if demMaleFlag == 1 + +twoway (rarea sim_child0_h sim_child0_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_child0 year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Share With New Born Child") /// + subtitle("Ages 18-65, males") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_new_born_child_ts_18_65_male.jpg", /// + replace width(2400) height(1350) quality(100) + +restore + +* Females +keep if demMaleFlag == 0 + +twoway (rarea sim_child0_h sim_child0_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_child0 year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Share With New Born Child") /// + subtitle("Ages 18-65, females") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +* Save figure +graph export "$dir_output_files/children/validation_${country}_new_bron_child_ts_18_65_female.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/04_17_plot_disability.do b/validation/02_simulated_output_validation/do_files/04_17_plot_disability.do new file mode 100644 index 000000000..1f5652281 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_17_plot_disability.do @@ -0,0 +1,321 @@ +/******************************************************************************* +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Disability +* AUTHORS: Ashley Burdett +* LAST UPDATE: Jan 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: +*******************************************************************************/ + +******************************************************************************** +* 1 : Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1 : Mean values over time, working age (18-65) +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_healthDsblLongtermFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +collapse (mean) valid_healthDsblLongtermFlag [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare simulation data +use year demAge sim_healthDsblLongtermFlag run using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +collapse (mean) sim_healthDsblLongtermFlag, by(run year) + +collapse (mean) sim_healthDsblLongtermFlag /// + (sd) sim_healthDsblLongtermFlag_sd = sim_healthDsblLongtermFlag, /// + by(year) + +gen sim_healthDsblLongtermFlag_high = /// + sim_healthDsblLongtermFlag + 1.96*sim_healthDsblLongtermFlag_sd +gen sim_healthDsblLongtermFlag_low = /// + sim_healthDsblLongtermFlag - 1.96*sim_healthDsblLongtermFlag_sd + +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + + +* Plot figure +twoway (rarea sim_healthDsblLongtermFlag_high /// + sim_healthDsblLongtermFlag_low year, sort color(green%20) /// + legend(label(1 "Simulated "))) /// +(line valid_healthDsblLongtermFlag year, sort color(green) /// + legend(label(2 "UKHLS "))), /// + title("Disabled/Long-term Sick ") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +graph export /// +"$dir_output_files/disability/validation_${country}_disability_ts_18_65_both.jpg", /// + replace width(2400) height(1350) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time, working age (18-65), by gender +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_healthDsblLongtermFlag demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if inrange(demAge,18,65) + +collapse (mean) valid_healthDsblLongtermFlag [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulation data +use year demAge sim_healthDsblLongtermFlag run demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +keep if inrange(demAge,18,65) + +collapse (mean) sim_healthDsblLongtermFlag, by(run year demMaleFlag) + +collapse (mean) sim_healthDsblLongtermFlag /// + (sd) sim_healthDsblLongtermFlag_sd = sim_healthDsblLongtermFlag, /// + by(year demMaleFlag) + +gen sim_healthDsblLongtermFlag_high = sim_healthDsblLongtermFlag + /// + 1.96*sim_healthDsblLongtermFlag_sd + +gen sim_healthDsblLongtermFlag_low = sim_healthDsblLongtermFlag - /// + 1.96*sim_healthDsblLongtermFlag_sd + +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +twoway (rarea sim_healthDsblLongtermFlag_high /// + sim_healthDsblLongtermFlag_low year if demMaleFlag == 0, /// + sort color(green%20) legend(label(1 "Female, SimPaths"))) /// +(line valid_healthDsblLongtermFlag year if demMaleFlag == 0, sort color(green) /// + legend(label(2 "Female, UKHLS "))) /// + (rarea sim_healthDsblLongtermFlag_high sim_healthDsblLongtermFlag_low /// + year if demMaleFlag == 1, sort color(red%20) /// + legend(label(3 "Male, SimPaths"))) /// +(line valid_healthDsblLongtermFlag year if demMaleFlag == 1, sort color(red) /// + legend(label(4 "Male, UKHLS"))), /// + title("Disabled/Long-term Sick ") /// + subtitle("Ages 18-65") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: ", size(vsmall)) + +graph export /// +"$dir_output_files/disability/validation_${country}_disability_ts_18_65_male_female.jpg", /// + replace width(2560) height(1440) quality(100) + + +******************************************************************************** +* 1.2 : Mean values over time, by age group +******************************************************************************** + +* Prepare validation data +use year demAge dwt valid_healthDsblLongtermFlag demMaleFlag demAge /// + ageGroup using "$dir_data/ukhls_validation_sample.dta", clear + +drop if ageGroup == 0 + +collapse (mean) valid_healthDsblLongtermFlag [aw=dwt], by(ageGroup year) + +drop if missing(ageGroup) + +reshape wide valid_healthDsblLongtermFlag, i(year) j(ageGroup) + +forvalues i = 1(1)8 { + + rename valid_healthDsblLongtermFlag`i' /// + valid_healthDsblLongtermFlag_`i' + +} + +save "$dir_data/temp_valid_stats_full.dta", replace + + +* Prepare simulation data +use run year sim_healthDsblLongtermFlag ageGroup using /// + "$dir_data/simulation_sample.dta", clear + +collapse (mean) sim_healthDsblLongtermFlag, by(ageGroup run year) + +drop if missing(ageGroup) + +reshape wide sim_healthDsblLongtermFlag, i(year run) j(ageGroup) + +forvalues i = 1(1)8 { + + rename sim_healthDsblLongtermFlag`i' sim_healthDsblLongtermFlag_`i' + +} + +collapse (mean) sim_healthDsblLongtermFlag* /// + (sd) sd_sim_healthDsblLongtermFlag_1 = sim_healthDsblLongtermFlag_1 /// + sd_sim_healthDsblLongtermFlag_2 = sim_healthDsblLongtermFlag_2 /// + sd_sim_healthDsblLongtermFlag_3 = sim_healthDsblLongtermFlag_3 /// + sd_sim_healthDsblLongtermFlag_4 = sim_healthDsblLongtermFlag_4 /// + sd_sim_healthDsblLongtermFlag_5 = sim_healthDsblLongtermFlag_5 /// + sd_sim_healthDsblLongtermFlag_6 = sim_healthDsblLongtermFlag_6 /// + sd_sim_healthDsblLongtermFlag_7 = sim_healthDsblLongtermFlag_7 /// + sd_sim_healthDsblLongtermFlag_8 = sim_healthDsblLongtermFlag_8 /// + , by(year) + +forvalues i = 1(1)8 { + + gen sim_healthDsblLongtermFlag_`i'_h = /// + sim_healthDsblLongtermFlag_`i' + 1.96*sd_sim_healthDsblLongtermFlag_`i' + gen sim_healthDsblLongtermFlag_`i'_l = /// + sim_healthDsblLongtermFlag_`i' - 1.96*sd_sim_healthDsblLongtermFlag_`i' + +} + +recast double year + +merge 1:1 year using "$dir_data/temp_valid_stats_full.dta", keep(3) nogen + +* Plot figures +foreach vble in "healthDsblLongtermFlag" { + + twoway (rarea sim_`vble'_1_h sim_`vble'_1_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`vble'_1 year, sort legend(label(2 "UKHLS"))), /// + title("Age 15-19") /// + name(`vble'_1, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea sim_`vble'_2_h sim_`vble'_2_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`vble'_2 year, sort legend(label(2 "UKHLS"))), /// + title("Age 20-24") /// + name(`vble'_2, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea sim_`vble'_3_h sim_`vble'_3_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`vble'_3 year, sort legend(label(2 "UKHLS"))), /// + title("Age 25-29") /// + name(`vble'_3, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + graphregion(color(white)) + + twoway (rarea sim_`vble'_4_h sim_`vble'_4_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`vble'_4 year, sort legend(label(2 "UKHLS"))), /// + title("Age 30-34") /// + name(`vble'_4, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + graphregion(color(white)) + + twoway (rarea sim_`vble'_5_h sim_`vble'_5_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`vble'_5 year, sort legend(label(2 "UKHLS"))), /// + title("Age 35-39") /// + name(`vble'_5, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + graphregion(color(white)) + + twoway (rarea sim_`vble'_6_h sim_`vble'_6_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) /// + rows(1))) /// + (line valid_`vble'_6 year, sort legend(label(2 "UKHLS"))), /// + title("Age 40-59") /// + name(`vble'_6, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + graphregion(color(white)) + + twoway (rarea sim_`vble'_7_h sim_`vble'_7_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`vble'_7 year, sort legend(label(2 "UKHLS"))), /// + title("Age 60-79") /// + name(`vble'_7, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + graphregion(color(white)) + + twoway (rarea sim_`vble'_8_h sim_`vble'_8_l year, /// + sort color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line valid_`vble'_8 year, sort legend(label(2 "UKHLS"))), /// + title("Age 80-100") /// + name(`vble'_8, replace) /// + ylabel(, labsize(vsmall)) /// + xlabel(, labsize(vsmall)) /// + ytitle("Share", size(small)) /// + xtitle("Year", size(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + graphregion(color(white)) + +} + +grc1leg healthDsblLongtermFlag_1 healthDsblLongtermFlag_2 /// + healthDsblLongtermFlag_3 healthDsblLongtermFlag_4 /// + healthDsblLongtermFlag_5 healthDsblLongtermFlag_6 /// + healthDsblLongtermFlag_7 healthDsblLongtermFlag_8, /// + title("Disabled/Long-term Sick by Age Group") /// + legendfrom(healthDsblLongtermFlag_1) /// + graphregion(color(white)) /// + ycomm /// + note("Notes:", size(vsmall)) + +graph export /// +"$dir_output_files/disability/validation_${country}_disability_ts_age_groups_both.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/04_18_plot_social_care.do b/validation/02_simulated_output_validation/do_files/04_18_plot_social_care.do new file mode 100644 index 000000000..cf7cb9836 --- /dev/null +++ b/validation/02_simulated_output_validation/do_files/04_18_plot_social_care.do @@ -0,0 +1,1832 @@ +/******************************************************************************* +* PROJECT: SimPaths UK +* SECTION: Validation +* OBJECT: Social care +* AUTHORS: Ashley Burdett +* LAST UPDATE: Feb 2026 +* COUNTRY: UK +******************************************************************************** +* NOTES: +*******************************************************************************/ + +clear all + +******************************************************************************** +* 0: Programs +******************************************************************************** + +* Time series plot, +cap program drop make_care_plot + +program define make_care_plot + + * Added 'string' to title + syntax, var(string) title(string) subtitle(string) saving(string) /// + note(string) [name(string)] + + twoway (rarea sim_`var'_h sim_`var'_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// + (line valid_`var' year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("`title'") /// + subtitle("`subtitle'") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + name(`name', replace) /// + graphregion(color(white)) /// + note("`note'", size(vsmall)) + + graph export "$dir_output_files/social_care/`saving'.jpg", /// + replace width(2400) height(1350) quality(100) +end + + +* Program for quantile mean plots + + + +******************************************************************************** +* 1: Mean values over time +******************************************************************************** + +******************************************************************************** +* 1.1: Mean values over time - Share need care +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careNeedFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . + +* Compute mean +collapse (mean) valid_careNeedFlag [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careNeedFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 + +* Compute mean and sd +collapse (mean) sim_careNeedFlag, by(year run) + +collapse (mean) sim_careNeedFlag /// + (sd) sim_careNeedFlag_sd = sim_careNeedFlag, by(year) + +* Compute 95% confidence intervals +gen sim_careNeedFlag_h = sim_careNeedFlag + 1.96*sim_careNeedFlag_sd +gen sim_careNeedFlag_l = sim_careNeedFlag - 1.96*sim_careNeedFlag_sd + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +make_care_plot, /// + var("careNeedFlag") /// + title("In Need of Care") /// + subtitle("Ages 65+") /// + saving("validation_${country}_need_care_ts_65plus_both") /// + note(`""Notes: ""') + +graph drop _all + + +******************************************************************************** +* 1.1.1 : Mean values over time - Share need care, by gender +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careNeedFlag demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . + +* Compute mean +collapse (mean) valid_careNeedFlag [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careNeedFlag demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 + +* Compute mean and sd +collapse (mean) sim_careNeedFlag, by(year demMaleFlag run) + +collapse (mean) sim_careNeedFlag /// + (sd) sim_careNeedFlag_sd = sim_careNeedFlag, by(year demMaleFlag) + +* Compute 95% confidence intervals +gen sim_careNeedFlag_h = sim_careNeedFlag + 1.96*sim_careNeedFlag_sd +gen sim_careNeedFlag_l = sim_careNeedFlag - 1.96*sim_careNeedFlag_sd + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta" + +* Plot figures + +* Females +twoway (rarea sim_careNeedFlag_h sim_careNeedFlag_l year if /// + demMaleFlag == 0, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careNeedFlag year if demMaleFlag == 0, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Females") /// + name(health_female, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0[0.1]0.5,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Males +twoway (rarea sim_careNeedFlag_h sim_careNeedFlag_l year if /// + demMaleFlag == 1, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careNeedFlag year if demMaleFlag == 1, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Males") /// + name(health_male, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0[0.1]0.5,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Combine +grc1leg health_female health_male, /// + title("In Need of Care") /// + subtitle("Ages 65+") /// + legendfrom(health_female) rows(1) /// + ycomm /// + graphregion(color(white)) /// + note("Notes: ", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_need_care_ts_65plus_gender.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.1.2 : Mean values over time - Share need care, by gender and age group +******************************************************************************** + +* Prepare validation data +use year dwt demMaleFlag ageGroup valid_careNeedFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +drop ageGroup +gen ageGroup = 1 if inrange(demAge,65,69) +replace ageGroup = 2 if inrange(demAge,70,74) +replace ageGroup = 3 if inrange(demAge,75,79) +replace ageGroup = 4 if inrange(demAge,80,84) +replace ageGroup = 5 if inrange(demAge,85,89) +replace ageGroup = 6 if inrange(demAge,90,110) + +* Select sample +drop if demAge < 65 + +* Exstensive margin dummy +replace valid_careNeedFlag = . if valid_careNeedFlag < 0 + +gen care_m = valid_careNeedFlag if demMaleFlag == 1 +gen care_f = valid_careNeedFlag if demMaleFlag == 0 + +* Compute means +collapse (mean) care* [aw = dwt], by(ageGroup year) + +* Restructure data +drop if missing(ageGroup) +reshape wide care*, i(year) j(ageGroup) + +forvalues i = 1(1)6 { + + rename care_f`i' care_f_`i'_valid + rename care_m`i' care_m_`i'_valid + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare Simulated data +use run year demMaleFlag ageGroup sim_careNeedFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +drop ageGroup +gen ageGroup = 1 if inrange(demAge,65,69) +replace ageGroup = 2 if inrange(demAge,70,74) +replace ageGroup = 3 if inrange(demAge,75,79) +replace ageGroup = 4 if inrange(demAge,80,84) +replace ageGroup = 5 if inrange(demAge,85,89) +replace ageGroup = 6 if inrange(demAge,90,110) + +* Select sample +drop if demAge < 65 + +* Gender specific vars +gen care_m = sim_careNeedFlag if demMaleFlag == 1 +gen care_f = sim_careNeedFlag if demMaleFlag == 0 + +drop sim_careNeedFlag + +* Compute means +collapse (mean) care*, by(ageGroup run year) + +* Restructure data +drop if missing(ageGroup) +reshape wide care*, i(year run) j(ageGroup) + +* COmpute sd +collapse (mean) care* /// + (sd) care_m_1_sd = care_m1 /// + (sd) care_f_1_sd = care_f1 /// + (sd) care_m_2_sd = care_m2 /// + (sd) care_f_2_sd = care_f2 /// + (sd) care_m_3_sd = care_m3 /// + (sd) care_f_3_sd = care_f3 /// + (sd) care_m_4_sd = care_m4 /// + (sd) care_f_4_sd = care_f4 /// + (sd) care_m_5_sd = care_m5 /// + (sd) care_f_5_sd = care_f5 /// + (sd) care_m_6_sd = care_m6 /// + (sd) care_f_6_sd = care_f6 /// + , by(year) + + +* Approx 95% confidence interval +forvalues i=1(1)6 { + + gen care_f_`i'_sim_high = care_f`i' + 1.96*care_f_`i'_sd + gen care_f_`i'_sim_low = care_f`i' - 1.96*care_f_`i'_sd + gen care_m_`i'_sim_high = care_m`i' + 1.96*care_m_`i'_sd + gen care_m_`i'_sim_low = care_m`i' - 1.96*care_m_`i'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +foreach vble in "care_f" "care_m" { + + twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_1_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 65-69") /// + name(`vble'_1, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_2_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 70-74") /// + name(`vble'_2, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_3_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 75-79") /// + name(`vble'_3, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_4_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 80-84") /// + name(`vble'_4, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_5_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 85-89") /// + name(`vble'_5, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_6_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 90+") /// + name(`vble'_6, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + +} + +* Save figures +grc1leg care_f_1 care_f_2 care_f_3 care_f_4 care_f_5 care_f_6 , /// + title("In Need of Care") /// + subtitle("Females") /// + legendfrom(care_f_1) /// + ycomm /// + graphregion(color(white)) /// +note("Notes: ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_need_care_ts_65plus_female.jpg", /// + replace width(2400) height(1350) quality(100) + + +grc1leg care_m_1 care_m_2 care_m_3 care_m_4 care_m_5 care_m_6, /// + title("In Need of Care") /// + subtitle("Males") /// + legendfrom(care_m_1) /// + ycomm /// + graphregion(color(white)) /// +note("Notes: ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_need_care_ts_65plus_male.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +/* +* Define the subtitles in a local macro +local titles "65-69" "70-74" "75-79" "80-84" "85-89" "90+" + +foreach vble in "care_f" "care_m" { + forvalues i = 1/6 { + + local t : word `i' of `titles' + + twoway (rarea `vble'_`i'_sim_high `vble'_`i'_sim_low year, /// + sort color(red%20)) /// + (line `vble'_`i'_valid year, sort color(red)), /// + subtitle("Age `t'") name(`vble'_`i', replace) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + graphregion(color(white)) legend(off) + + } +} +*/ + + +******************************************************************************** +* 1.2: Mean values over time - Share receive care +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careReceiveFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . + +* Compute mean +collapse (mean) valid_careReceiveFlag [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careReceiveFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 + +* Compute mean and sd +collapse (mean) sim_careReceiveFlag, by(year run) + +collapse (mean) sim_careReceiveFlag /// + (sd) sim_careReceiveFlag_sd = sim_careReceiveFlag, by(year) + +* Compute 95% confidence intervals +gen sim_careReceiveFlag_h = sim_careReceiveFlag + 1.96*sim_careReceiveFlag_sd +gen sim_careReceiveFlag_l = sim_careReceiveFlag - 1.96*sim_careReceiveFlag_sd + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +make_care_plot, /// + var("careReceiveFlag") /// + title("Receive Care") /// + subtitle("Ages 65+") /// + saving("validation_${country}_receive_care_ts_65plus_both") /// + note(`""Notes: ""') + + +******************************************************************************** +* 1.2.1: Mean values over time - Share receive care, by gender +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careReceiveFlag demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . + +* Compute mean +collapse (mean) valid_careReceiveFlag [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careReceiveFlag demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 + +* Compute mean and sd +collapse (mean) sim_careReceiveFlag, by(year demMaleFlag run) + +collapse (mean) sim_careReceiveFlag /// + (sd) sim_careReceiveFlag_sd = sim_careReceiveFlag, by(year demMaleFlag) + +* Compute 95% confidence intervals +gen sim_careReceiveFlag_h = sim_careReceiveFlag + 1.96*sim_careReceiveFlag_sd +gen sim_careReceiveFlag_l = sim_careReceiveFlag - 1.96*sim_careReceiveFlag_sd + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta" + +* Plot figures + +* Females +twoway (rarea sim_careReceiveFlag_h sim_careReceiveFlag_l year if /// + demMaleFlag == 0, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careReceiveFlag year if demMaleFlag == 0, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Females") /// + name(health_female, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0[0.1]0.5,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Males +twoway (rarea sim_careReceiveFlag_h sim_careReceiveFlag_l year if /// + demMaleFlag == 1, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careReceiveFlag year if demMaleFlag == 1, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Males") /// + name(health_male, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0[0.1]0.5,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Combine +grc1leg health_female health_male, /// + title("Receive Care") /// + subtitle("Ages 65+") /// + legendfrom(health_female) rows(1) /// + ycomm /// + graphregion(color(white)) /// + note("Notes: ", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_receive_care_ts_65plus_gender.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.2.2: Mean values over time - Share receive care, by gender and age group +******************************************************************************** + +* Prepare validation data +use year dwt demMaleFlag ageGroup valid_careReceiveFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +drop ageGroup +gen ageGroup = 1 if inrange(demAge,65,69) +replace ageGroup = 2 if inrange(demAge,70,74) +replace ageGroup = 3 if inrange(demAge,75,79) +replace ageGroup = 4 if inrange(demAge,80,85) +replace ageGroup = 5 if inrange(demAge,85,89) +replace ageGroup = 6 if inrange(demAge,90,110) + +* Select sample +drop if demAge < 65 + +* Exstensive margin dummy +gen care_m = valid_careReceiveFlag if demMaleFlag == 1 +gen care_f = valid_careReceiveFlag if demMaleFlag == 0 + +* Compute means +collapse (mean) care* [aw = dwt], by(ageGroup year) + +* Restructure data +drop if missing(ageGroup) +reshape wide care*, i(year) j(ageGroup) + +forvalues i = 1(1)6 { + + rename care_f`i' care_f_`i'_valid + rename care_m`i' care_m_`i'_valid + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare Simulated data +use run year demMaleFlag ageGroup sim_careReceiveFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +drop ageGroup +gen ageGroup = 1 if inrange(demAge,65,69) +replace ageGroup = 2 if inrange(demAge,70,74) +replace ageGroup = 3 if inrange(demAge,75,79) +replace ageGroup = 4 if inrange(demAge,80,85) +replace ageGroup = 5 if inrange(demAge,85,89) +replace ageGroup = 6 if inrange(demAge,90,110) + +* Select sample +drop if demAge < 65 + +* Gender specific vars +gen care_m = sim_careReceiveFlag if demMaleFlag == 1 +gen care_f = sim_careReceiveFlag if demMaleFlag == 0 + +drop sim_careReceiveFlag + +* Compute means +collapse (mean) care*, by(ageGroup run year) + +* Restructure data +drop if missing(ageGroup) +reshape wide care*, i(year run) j(ageGroup) + +* COmpute sd +collapse (mean) care* /// + (sd) care_m_1_sd = care_m1 /// + (sd) care_f_1_sd = care_f1 /// + (sd) care_m_2_sd = care_m2 /// + (sd) care_f_2_sd = care_f2 /// + (sd) care_m_3_sd = care_m3 /// + (sd) care_f_3_sd = care_f3 /// + (sd) care_m_4_sd = care_m4 /// + (sd) care_f_4_sd = care_f4 /// + (sd) care_m_5_sd = care_m5 /// + (sd) care_f_5_sd = care_f5 /// + (sd) care_m_6_sd = care_m6 /// + (sd) care_f_6_sd = care_f6 /// + , by(year) + + +* Approx 95% confidence interval +forvalues i=1(1)6 { + + gen care_f_`i'_sim_high = care_f`i' + 1.96*care_f_`i'_sd + gen care_f_`i'_sim_low = care_f`i' - 1.96*care_f_`i'_sd + gen care_m_`i'_sim_high = care_m`i' + 1.96*care_m_`i'_sd + gen care_m_`i'_sim_low = care_m`i' - 1.96*care_m_`i'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +foreach vble in "care_f" "care_m" { + + twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_1_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 65-69") /// + name(`vble'_1, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_2_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 70-74") /// + name(`vble'_2, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_3_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 75-79") /// + name(`vble'_3, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_4_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 80-84") /// + name(`vble'_4, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_5_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 85-89") /// + name(`vble'_5, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_6_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 90+") /// + name(`vble'_6, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0(0.3)0.9, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + +} + +* Save figures +grc1leg care_f_1 care_f_2 care_f_3 care_f_4 care_f_5 care_f_6 , /// + title("Receive Care") /// + subtitle("Females") /// + legendfrom(care_f_1) /// + ycomm /// + graphregion(color(white)) /// +note("Notes: ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_receive_care_ts_65plus_female.jpg", /// + replace width(2400) height(1350) quality(100) + + +grc1leg care_m_1 care_m_2 care_m_3 care_m_4 care_m_5 care_m_6, /// + title("Receive Care") /// + subtitle("Males") /// + legendfrom(care_m_1) /// + ycomm /// + graphregion(color(white)) /// +note("Notes: ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_receive_care_ts_65plus_male.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.3: Mean values over time - Share type of care received +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careReceiveFlag valid_careRecFormalOnly /// + valid_careRecInformalOnly valid_careRecMix demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careReceiveFlag == 1 + +* Compute mean +collapse (mean) valid_careRecFormalOnly valid_careRecInformalOnly /// + valid_careRecMix [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careReceiveFlag sim_careRecFormalOnly /// + sim_careRecInformalOnly sim_careRecMix demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careReceiveFlag == 1 + +* Compute mean and sd +collapse (mean) sim_careRecFormalOnly sim_careRecInformalOnly /// + sim_careRecMix, by(year run) + +collapse (mean) sim_careRecFormalOnly sim_careRecInformalOnly /// + sim_careRecMix /// + (sd) sim_careRecFormalOnly_sd = sim_careRecFormalOnly /// + (sd) sim_careRecInformalOnly_sd = sim_careRecInformalOnly /// + (sd) sim_careRecMix_sd = sim_careRecMix /// + , by(year) + +* Compute 95% confidence intervals +foreach varname in sim_careRecFormalOnly sim_careRecInformalOnly /// + sim_careRecMix { + + gen `varname'_h = `varname' + 1.96*`varname'_sd + gen `varname'_l = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", nogen + +* Plot figure +twoway /// + (rarea sim_careRecFormalOnly_h sim_careRecFormalOnly_l year, /// + sort color(green%20) legend(label(1 "Formal care only, SimPaths"))) /// + (line valid_careRecFormalOnly year, sort color(green) /// + legend(label(2 "Formal care only, UKHLS"))) /// + (rarea sim_careRecInformalOnly_h sim_careRecInformalOnly_l year, /// + sort color(blue%20) legend(label(3 "Informal care only, SimPaths"))) /// + (line valid_careRecInformalOnly year, sort color(blue) /// + legend(label(4 "Informal care only, UKHLS"))) /// + (rarea sim_careRecMix_h sim_careRecMix_l year, sort color(red%20) /// + legend(label(5 "Mixed care, SimPaths"))) /// + (line valid_careRecMix year, sort color(red) /// + legend(label(6 "Mixed care, UKHLS"))), /// + title("Type of Care Received") /// + subtitle("Ages 65+") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + graphregion(color(white)) /// + legend(size(small)) /// + note("Note: Only those receiving care included in the sample. ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_care_type_ts_65plus_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.3.1: Mean values over time - Share type of care received, by gender +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careReceiveFlag valid_careRecFormalOnly /// + valid_careRecInformalOnly valid_careRecMix demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careReceiveFlag == 1 + +* Compute mean +collapse (mean) valid_careRecFormalOnly valid_careRecInformalOnly /// + valid_careRecMix [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careReceiveFlag sim_careRecFormalOnly /// + sim_careRecInformalOnly sim_careRecMix demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careReceiveFlag == 1 + +* Compute mean and sd +collapse (mean) sim_careRecFormalOnly sim_careRecInformalOnly /// + sim_careRecMix, by(year demMaleFlag run) + +collapse (mean) sim_careRecFormalOnly sim_careRecInformalOnly /// + sim_careRecMix /// + (sd) sim_careRecFormalOnly_sd = sim_careRecFormalOnly /// + (sd) sim_careRecInformalOnly_sd = sim_careRecInformalOnly /// + (sd) sim_careRecMix_sd = sim_careRecMix /// + , by(year demMaleFlag) + +* Compute 95% confidence intervals +foreach varname in sim_careRecFormalOnly sim_careRecInformalOnly /// + sim_careRecMix { + + gen `varname'_h = `varname' + 1.96*`varname'_sd + gen `varname'_l = `varname' - 1.96*`varname'_sd + +} + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta" + +* Plot figure + +* Males +preserve + +keep if demMaleFlag == 1 + +twoway /// + (rarea sim_careRecFormalOnly_h sim_careRecFormalOnly_l year, /// + sort color(green%20) legend(label(1 "Formal care only, SimPaths"))) /// + (line valid_careRecFormalOnly year, sort color(green) /// + legend(label(2 "Formal care only, UKHLS"))) /// + (rarea sim_careRecInformalOnly_h sim_careRecInformalOnly_l year, /// + sort color(blue%20) legend(label(3 "Informal care only, SimPaths"))) /// + (line valid_careRecInformalOnly year, sort color(blue) /// + legend(label(4 "Informal care only, UKHLS"))) /// + (rarea sim_careRecMix_h sim_careRecMix_l year, sort color(red%20) /// + legend(label(5 "Mixed care, SimPaths"))) /// + (line valid_careRecMix year, sort color(red) /// + legend(label(6 "Mixed care, UKHLS"))), /// + title("Type of Care Received") /// + subtitle("Ages 65+, males") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + graphregion(color(white)) /// + legend(size(small)) /// + note("Note: Only those receiving care included in the sample. ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_care_type_ts_65plus_male.jpg", /// + replace width(2400) height(1350) quality(100) + +restore + +* Females +preserve + +keep if demMaleFlag == 0 + +twoway /// + (rarea sim_careRecFormalOnly_h sim_careRecFormalOnly_l year, /// + sort color(green%20) legend(label(1 "Formal care only, SimPaths"))) /// + (line valid_careRecFormalOnly year, sort color(green) /// + legend(label(2 "Formal care only, UKHLS"))) /// + (rarea sim_careRecInformalOnly_h sim_careRecInformalOnly_l year, /// + sort color(blue%20) legend(label(3 "Informal care only, SimPaths"))) /// + (line valid_careRecInformalOnly year, sort color(blue) /// + legend(label(4 "Informal care only, UKHLS"))) /// + (rarea sim_careRecMix_h sim_careRecMix_l year, sort color(red%20) /// + legend(label(5 "Mixed care, SimPaths"))) /// + (line valid_careRecMix year, sort color(red) /// + legend(label(6 "Mixed care, UKHLS"))), /// + title("Type of Care Received") /// + subtitle("Ages 65+, females") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + graphregion(color(white)) /// + legend(size(small)) /// + note("Note: Only those recieving care included in the sample. ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_care_type_ts_65plus_female.jpg", /// + replace width(2400) height(1350) quality(100) + +restore + +graph drop _all + + +******************************************************************************** +* 1.4: Mean values over time - Average hours of care received +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careReceiveHrs valid_careReceiveFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careReceiveFlag == 1 + +* Compute mean +collapse (mean) valid_careReceiveHrs [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careReceiveHrs sim_careReceiveFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careReceiveFlag == 1 + +* Compute mean and sd +collapse (mean) sim_careReceiveHrs, by(year run) + +collapse (mean) sim_careReceiveHrs /// + (sd) sim_careReceiveHrs_sd = sim_careReceiveHrs, by(year) + +* Compute 95% confidence intervals +gen sim_careReceiveHrs_h = sim_careReceiveHrs + 1.96*sim_careReceiveHrs_sd +gen sim_careReceiveHrs_l = sim_careReceiveHrs - 1.96*sim_careReceiveHrs_sd + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +make_care_plot, /// + var("careReceiveHrs") /// + title("Hours of Care Received") /// + subtitle("Ages 65+") /// + saving("validation_${country}_care_hours_received_ts_65plus_both") /// + note(`""Notes: ""') + +graph drop _all + + +******************************************************************************** +* 1.4.1: Mean values over time - Average hours of care received, by gender +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careReceiveFlag valid_careReceiveHrs demAge /// + demMaleFlag using "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careReceiveFlag == 1 + +* Compute mean +collapse (mean) valid_careReceiveHrs [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careReceiveFlag sim_careReceiveHrs demAge demMaleFlag /// + using "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careReceiveFlag == 1 + +* Compute mean and sd +collapse (mean) sim_careReceiveHrs, by(year demMaleFlag run) + +collapse (mean) sim_careReceiveHrs /// + (sd) sim_careReceiveHrs_sd = sim_careReceiveHrs, by(year demMaleFlag) + +* Compute 95% confidence intervals +gen sim_careReceiveHrs_h = sim_careReceiveHrs + 1.96*sim_careReceiveHrs_sd +gen sim_careReceiveHrs_l = sim_careReceiveHrs - 1.96*sim_careReceiveHrs_sd + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta" + +* Plot figures + +* Females +twoway (rarea sim_careReceiveHrs_h sim_careReceiveHrs_l year if /// + demMaleFlag == 0, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careReceiveHrs year if demMaleFlag == 0, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Females") /// + name(health_female, replace) /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Males +twoway (rarea sim_careReceiveHrs_h sim_careReceiveHrs_l year if /// + demMaleFlag == 1, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careReceiveHrs year if demMaleFlag == 1, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Males") /// + name(health_male, replace) /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) /// + ylabel(,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Combine +grc1leg health_female health_male, /// + title("Hours of Care Received") /// + subtitle("Ages 65+") /// + legendfrom(health_female) rows(1) /// + ycomm /// + graphregion(color(white)) /// + note("Notes: Only those receiving care included in sample. ", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_care_hours_received_ts_65plus_gender.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.5: Mean values over time - Quantile means of hours of care received +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careReceiveHrs valid_careReceiveFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careReceiveFlag == 1 + +* Compute mean +collapse (p25) valid_p25 = valid_careReceiveHrs /// + (p50) valid_p50 = valid_careReceiveHrs /// + (p75) valid_p75 = valid_careReceiveHrs [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careReceiveHrs sim_careReceiveFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careReceiveFlag == 1 + +* Compute quantiles and sd +bysort year run: egen sim_p25_run = pctile(sim_careReceiveHrs), p(25) +bysort year run: egen sim_p50_run = pctile(sim_careReceiveHrs), p(50) +bysort year run: egen sim_p75_run = pctile(sim_careReceiveHrs), p(75) + +collapse (mean) sim_p25 = sim_p25_run /// + sim_p50 = sim_p50_run /// + sim_p75 = sim_p75_run /// + (sd) sim_p25_sd = sim_p25 /// + sim_p50_sd = sim_p50 /// + sim_p75_sd = sim_p75, by(year) + +* Approx 95% confidence intervals +foreach p in 25 50 75 { + + gen sim_p`p'_lo = sim_p`p' - 1.96*sim_p`p'_sd + gen sim_p`p'_hi = sim_p`p' + 1.96*sim_p`p'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +twoway (rarea sim_p25_lo sim_p25_hi year, /// + sort color(green%20) legend(label(1 "SimPaths, p25"))) /// +(line valid_p25 year, sort color(green) /// + legend(label(2 "UKHLS, p25"))) /// +(rarea sim_p50_lo sim_p50_hi year, /// + sort color(blue%20) legend(label(3 "SimPaths, p50"))) /// +(line valid_p50 year, sort color(blue) /// + legend(label(4 "UKHLS, p50"))) /// +(rarea sim_p75_lo sim_p75_hi year, /// + sort color(red%20) legend(label(5 "SimPaths, p75"))) /// +(line valid_p75 year, sort color(red) /// + legend(label(6 "UKHLS, p75"))), /// + title("Hours of Care Received ") /// + subtitle("Ages 65+") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) ///) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Percentiles computed on the 65+ population that report receiving positive hours of informal care.", size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_care_hours_received_quantilests_65plus_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.5.1: Mean values over time - Quantile means of hours of care +* received, by gender +******************************************************************************** + + + +******************************************************************************** +* 1.6: Mean values over time - Quantile means of hours of informal care received +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careHrsInformal demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careHrsInformal > 0 + +* Compute mean +collapse (p25) valid_p25 = valid_careHrsInformal /// + (p50) valid_p50 = valid_careHrsInformal /// + (p75) valid_p75 = valid_careHrsInformal [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careHrsInformal demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careHrsInformal > 0 + +* Compute quantiles and sd +bysort year run: egen sim_p25_run = pctile(sim_careHrsInformal), p(25) +bysort year run: egen sim_p50_run = pctile(sim_careHrsInformal), p(50) +bysort year run: egen sim_p75_run = pctile(sim_careHrsInformal), p(75) + +collapse (mean) sim_p25 = sim_p25_run /// + sim_p50 = sim_p50_run /// + sim_p75 = sim_p75_run /// + (sd) sim_p25_sd = sim_p25 /// + sim_p50_sd = sim_p50 /// + sim_p75_sd = sim_p75, by(year) + + +* Approx 95% confidence intervals +foreach p in 25 50 75 { + + gen sim_p`p'_lo = sim_p`p' - 1.96*sim_p`p'_sd + gen sim_p`p'_hi = sim_p`p' + 1.96*sim_p`p'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +twoway (rarea sim_p25_lo sim_p25_hi year, /// + sort color(green%20) legend(label(1 "SimPaths, p25"))) /// +(line valid_p25 year, sort color(green) /// + legend(label(2 "UKHLS, p25"))) /// +(rarea sim_p50_lo sim_p50_hi year, /// + sort color(blue%20) legend(label(3 "SimPaths, p50"))) /// +(line valid_p50 year, sort color(blue) /// + legend(label(4 "UKHLS, p50"))) /// +(rarea sim_p75_lo sim_p75_hi year, /// + sort color(red%20) legend(label(5 "SimPaths, p75"))) /// +(line valid_p75 year, sort color(red) /// + legend(label(6 "UKHLS, p75"))), /// + title("Hours of Informal Care Received ") /// + subtitle("Ages 65+") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) ///) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Percentiles computed on the 65+ population that report receiving positive hours of informal care.", size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_care_hours_received_informal_ quantiles_ts_65plus_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + +******************************************************************************** +* 1.6.1: Mean values over time - Quantile means of hours of informal care +* received, by gender +******************************************************************************** + + + +******************************************************************************** +* 1.7: Mean values over time - Quantile means of hours of formal care received +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careHrsFormal demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careHrsFormal > 0 + +* Compute mean +collapse (p25) valid_p25 = valid_careHrsFormal /// + (p50) valid_p50 = valid_careHrsFormal /// + (p75) valid_p75 = valid_careHrsFormal [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careHrsFormal demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careHrsFormal > 0 + +* Compute quantiles and sd +bysort year run: egen sim_p25_run = pctile(sim_careHrsFormal), p(25) +bysort year run: egen sim_p50_run = pctile(sim_careHrsFormal), p(50) +bysort year run: egen sim_p75_run = pctile(sim_careHrsFormal), p(75) + +collapse (mean) sim_p25 = sim_p25_run /// + sim_p50 = sim_p50_run /// + sim_p75 = sim_p75_run /// + (sd) sim_p25_sd = sim_p25 /// + sim_p50_sd = sim_p50 /// + sim_p75_sd = sim_p75, by(year) + +* Approx 95% confidence intervals +foreach p in 25 50 75 { + + gen sim_p`p'_lo = sim_p`p' - 1.96*sim_p`p'_sd + gen sim_p`p'_hi = sim_p`p' + 1.96*sim_p`p'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +twoway (rarea sim_p25_lo sim_p25_hi year, /// + sort color(green%20) legend(label(1 "SimPaths, p25"))) /// +(line valid_p25 year, sort color(green) /// + legend(label(2 "UKHLS, p25"))) /// +(rarea sim_p50_lo sim_p50_hi year, /// + sort color(blue%20) legend(label(3 "SimPaths, p50"))) /// +(line valid_p50 year, sort color(blue) /// + legend(label(4 "UKHLS, p50"))) /// +(rarea sim_p75_lo sim_p75_hi year, /// + sort color(red%20) legend(label(5 "SimPaths, p75"))) /// +(line valid_p75 year, sort color(red) /// + legend(label(6 "UKHLS, p75"))), /// + title("Hours of Formal Care Received ") /// + subtitle("Ages 65+") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) ///) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Percentiles computed on the 65+ population that report receiving positive hours of formal care.", size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_care_hours_received_formal_ quantiles_ts_65plus_both.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + +******************************************************************************** +* 1.7.1: Mean values over time - Quantile means of hours of formal care +* received, by gender +******************************************************************************** + + + +******************************************************************************** +* 1.8: Mean values over time - Share provide care +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careProvideFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge >= 16 + +* Compute mean +collapse (mean) valid_careProvideFlag [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careProvideFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +keep if demAge >= 16 + +* Compute mean and sd +collapse (mean) sim_careProvideFlag, by(year run) + +collapse (mean) sim_careProvideFlag /// + (sd) sim_careProvideFlag_sd = sim_careProvideFlag, by(year) + +* Compute 95% confidence intervals +gen sim_careProvideFlag_h = sim_careProvideFlag + 1.96*sim_careProvideFlag_sd +gen sim_careProvideFlag_l = sim_careProvideFlag - 1.96*sim_careProvideFlag_sd + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +make_care_plot, /// + var("careProvideFlag") /// + title("Provide Care") /// + subtitle("Ages 16+") /// + saving("validation_${country}_provide_care_ts_16plus_both") /// + note(`""Notes: ""') + +graph drop _all + + +******************************************************************************** +* 1.8.1: Mean values over time - Share provide care, by gender +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careProvideFlag demAge demMaleFlag using /// + "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge >= 16 + +* Compute mean +collapse (mean) valid_careProvideFlag [aw = dwt], by(year demMaleFlag) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careProvideFlag demAge demMaleFlag using /// + "$dir_data/simulation_sample.dta", clear + +keep if demAge >= 16 + +* Compute mean and sd +collapse (mean) sim_careProvideFlag, by(year demMaleFlag run) + +collapse (mean) sim_careProvideFlag /// + (sd) sim_careProvideFlag_sd = sim_careProvideFlag, by(year demMaleFlag) + +* Compute 95% confidence intervals +gen sim_careProvideFlag_h = sim_careProvideFlag + 1.96*sim_careProvideFlag_sd +gen sim_careProvideFlag_l = sim_careProvideFlag - 1.96*sim_careProvideFlag_sd + +* Combine datasets +merge 1:1 year demMaleFlag using "$dir_data/temp_valid_stats.dta" + +* Plot figures + +* Females +twoway (rarea sim_careProvideFlag_h sim_careProvideFlag_l year if /// + demMaleFlag == 0, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careProvideFlag year if demMaleFlag == 0, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Females") /// + name(care_female, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0[0.1]0.5,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Males +twoway (rarea sim_careProvideFlag_h sim_careProvideFlag_l year if /// + demMaleFlag == 1, sort color(green%20) legend(label(1 "SimPaths"))) /// +(line valid_careProvideFlag year if demMaleFlag == 1, sort color(green) /// + legend(label(2 "UKHLS"))), /// + subtitle("Males") /// + name(care_male, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(0[0.1]0.5,labsize(small)) /// + xlabel(,labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + +* Combine +grc1leg care_female care_male, /// + title("Provide Care") /// + subtitle("Age 16+") /// + legendfrom(care_female) rows(1) /// + ycomm /// + graphregion(color(white)) /// + note("Notes: ", /// + size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_provide_care_ts_16plus_gender.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.8.2: Mean values over time - Share provide care, by gender and +* age group +******************************************************************************** + +* Prepare validation data +use year dwt demMaleFlag ageGroup valid_careProvideFlag demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +drop ageGroup +gen ageGroup = 1 if inrange(demAge,16,20) +replace ageGroup = 2 if inrange(demAge,21,40) +replace ageGroup = 3 if inrange(demAge,41,50) +replace ageGroup = 4 if inrange(demAge,51,60) +replace ageGroup = 5 if inrange(demAge,61,70) +replace ageGroup = 6 if inrange(demAge,70,110) + +* Select sample + +* Exstensive margin dummy +replace valid_careProvideFlag = . if valid_careProvideFlag < 0 + +gen care_m = valid_careProvideFlag if demMaleFlag == 1 +gen care_f = valid_careProvideFlag if demMaleFlag == 0 + +* Compute means +collapse (mean) care* [aw = dwt], by(ageGroup year) + +* Restructure data +drop if missing(ageGroup) +reshape wide care*, i(year) j(ageGroup) + +forvalues i = 1(1)6 { + + rename care_f`i' care_f_`i'_valid + rename care_m`i' care_m_`i'_valid + +} + +save "$dir_data/temp_valid_stats.dta", replace + + +* Prepare Simulated data +use run year demMaleFlag ageGroup sim_careProvideFlag demAge using /// + "$dir_data/simulation_sample.dta", clear + +drop ageGroup +gen ageGroup = 1 if inrange(demAge,16,20) +replace ageGroup = 2 if inrange(demAge,21,40) +replace ageGroup = 3 if inrange(demAge,41,50) +replace ageGroup = 4 if inrange(demAge,51,60) +replace ageGroup = 5 if inrange(demAge,61,70) +replace ageGroup = 6 if inrange(demAge,70,110) + +* Gender specific vars +gen care_m = sim_careProvideFlag if demMaleFlag == 1 +gen care_f = sim_careProvideFlag if demMaleFlag == 0 + +drop sim_careProvideFlag + +* Compute means +collapse (mean) care*, by(ageGroup run year) + +* Restructure data +drop if missing(ageGroup) +reshape wide care*, i(year run) j(ageGroup) + +* COmpute sd +collapse (mean) care* /// + (sd) care_m_1_sd = care_m1 /// + (sd) care_f_1_sd = care_f1 /// + (sd) care_m_2_sd = care_m2 /// + (sd) care_f_2_sd = care_f2 /// + (sd) care_m_3_sd = care_m3 /// + (sd) care_f_3_sd = care_f3 /// + (sd) care_m_4_sd = care_m4 /// + (sd) care_f_4_sd = care_f4 /// + (sd) care_m_5_sd = care_m5 /// + (sd) care_f_5_sd = care_f5 /// + (sd) care_m_6_sd = care_m6 /// + (sd) care_f_6_sd = care_f6 /// + , by(year) + + +* Approx 95% confidence interval +forvalues i=1(1)6 { + + gen care_f_`i'_sim_high = care_f`i' + 1.96*care_f_`i'_sd + gen care_f_`i'_sim_low = care_f`i' - 1.96*care_f_`i'_sd + gen care_m_`i'_sim_high = care_m`i' + 1.96*care_m_`i'_sd + gen care_m_`i'_sim_low = care_m`i' - 1.96*care_m_`i'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen + +* Plot figure +foreach vble in "care_f" "care_m" { + + twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_1_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 16-20") /// + name(`vble'_1, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_2_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 21-40") /// + name(`vble'_2, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_3_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 41-50") /// + name(`vble'_3, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_4_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 51-60") /// + name(`vble'_4, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_5_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 61-70") /// + name(`vble'_5, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + + twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, sort /// + color(red%20) legend(label(1 "SimPaths") position(6) rows(1))) /// + (line `vble'_6_valid year, sort color(red) legend(label(2 "UKHLS"))), /// + subtitle("Age 70+") /// + name(`vble'_6, replace) /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + ylabel(, labsize(vsmall)) /// + xlabel(,labsize(vsmall)) /// + legend(size(small)) /// + graphregion(color(white)) + +} + +* Save figures +grc1leg care_f_1 care_f_2 care_f_3 care_f_4 care_f_5 care_f_6 , /// + title("Provide Care") /// + subtitle("Females") /// + legendfrom(care_f_1) /// + ycomm /// + graphregion(color(white)) /// +note("Notes: ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_provide_care_ts_all_ages_female.jpg", /// + replace width(2400) height(1350) quality(100) + + +grc1leg care_m_1 care_m_2 care_m_3 care_m_4 care_m_5 care_m_6, /// + title("Provide Care") /// + subtitle("Males") /// + legendfrom(care_m_1) /// + ycomm /// + graphregion(color(white)) /// +note("Notes: ", /// + size(vsmall)) + +graph export /// +"$dir_output_files/social_care/validation_${country}_provide_care_ts_all_ages_male.jpg", /// + replace width(2400) height(1350) quality(100) + +graph drop _all + + +******************************************************************************** +* 1.9: Mean values over time - Quantile means of hours of care provided +******************************************************************************** + +* Prepare validation data +use year idBu dwt valid_careProvideFlag valid_careHrsProvidedWeek demAge /// + using "$dir_data/ukhls_validation_sample.dta", clear + +keep if demAge >= 16 +keep if valid_careProvideFlag == 1 + +* Compute mean +collapse (p25) valid_p25 = valid_careHrsProvidedWeek /// + (p50) valid_p50 = valid_careHrsProvidedWeek /// + (p75) valid_p75 = valid_careHrsProvidedWeek [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careHrsProvidedWeek demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Compute quantiles and sd +bysort year run: egen sim_p25_run = pctile(sim_careHrsProvidedWeek), p(25) +bysort year run: egen sim_p50_run = pctile(sim_careHrsProvidedWeek), p(50) +bysort year run: egen sim_p75_run = pctile(sim_careHrsProvidedWeek), p(75) + +collapse (mean) sim_p25 = sim_p25_run /// + sim_p50 = sim_p50_run /// + sim_p75 = sim_p75_run /// + (sd) sim_p25_sd = sim_p25 /// + sim_p50_sd = sim_p50 /// + sim_p75_sd = sim_p75, by(year) + +* Approx 95% confidence intervals +foreach p in 25 50 75 { + + gen sim_p`p'_lo = sim_p`p' - 1.96*sim_p`p'_sd + gen sim_p`p'_hi = sim_p`p' + 1.96*sim_p`p'_sd + +} + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot figure +twoway (rarea sim_p25_lo sim_p25_hi year, /// + sort color(green%20) legend(label(1 "SimPaths, p25"))) /// +(line valid_p25 year, sort color(green) /// + legend(label(2 "UKHLS, p25"))) /// +(rarea sim_p50_lo sim_p50_hi year, /// + sort color(blue%20) legend(label(3 "SimPaths, p50"))) /// +(line valid_p50 year, sort color(blue) /// + legend(label(4 "UKHLS, p50"))) /// +(rarea sim_p75_lo sim_p75_hi year, /// + sort color(red%20) legend(label(5 "SimPaths, p75"))) /// +(line valid_p75 year, sort color(red) /// + legend(label(6 "UKHLS, p75"))), /// + title("Hours of Care Provided per Week ") /// + subtitle("All ages") /// + xtitle("Year", size(small)) /// + ytitle("Hours per week", size(small)) ///) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + graphregion(color(white)) /// + note("Notes: Percentiles computed on those age 16+ and provide care.", size(vsmall)) + +* Save figure +graph export /// +"$dir_output_files/social_care/validation_${country}_care_hours_provided_quantiles_ts_16plus_both.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all + + +******************************************************************************** +* 1.9.1: Mean values over time - Quantile means of hours of care provided, by +* gender +******************************************************************************** + + +******************************************************************************** +* 1.10: Mean values over time - Amount spent on formal social care, those that +* receive formal care +******************************************************************************** + +* Load validaiton data +use year idBu dwt valid_careFormalX valid_careHrsFormal demAge using /// + "$dir_data/ukhls_validation_sample.dta", clear + +* Select sample +keep if demAge > 64 & demAge != . +keep if valid_careHrsFormal > 0 & valid_careHrsFormal != . + +* Compute mean +collapse (mean) valid_careFormalX [aw = dwt], by(year) + +save "$dir_data/temp_valid_stats.dta", replace + +* Prepare simulated data +use run year idBu sim_careFormalX sim_careHrsFormal demAge using /// + "$dir_data/simulation_sample.dta", clear + +* Select sample +keep if demAge > 64 +keep if sim_careHrsFormal > 0 & sim_careHrsFormal != . + +* Compute mean and sd +collapse (mean) sim_careFormalX, by(year run) + +collapse (mean) sim_careFormalX /// + (sd) sim_careFormalX_sd = sim_careFormalX, by(year) + +* Compute 95% confidence intervals +gen sim_careFormalX_h = sim_careFormalX + 1.96*sim_careFormalX_sd +gen sim_careFormalX_l = sim_careFormalX - 1.96*sim_careFormalX_sd + +* Combine datasets +merge 1:1 year using "$dir_data/temp_valid_stats.dta" + +* Plot +twoway (rarea sim_careFormalX_h sim_careFormalX_l year, /// + sort color(green%20) legend(label(1 "SimPaths"))) /// + (line valid_careFormalX year, sort color(green) /// + legend(label(2 "UKHLS"))), /// + title("Social Care Expenditure") /// + subtitle("") /// + xtitle("Year", size(small)) /// + ytitle("Share", size(small)) /// + xlabel(, labsize(small)) /// + ylabel(, labsize(small)) /// + legend(size(small)) /// + name(`name', replace) /// + graphregion(color(white)) /// + note("Notes:", size(vsmall)) + + graph export "$dir_output_files/social_care/validation_${country}_formal_care_expenditure_both.jpg", /// + replace width(2400) height(1350) quality(100) + + +graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/05_create_UKHLS_validation_targets.do b/validation/02_simulated_output_validation/do_files/05_create_UKHLS_validation_targets.do deleted file mode 100644 index 49b39dd94..000000000 --- a/validation/02_simulated_output_validation/do_files/05_create_UKHLS_validation_targets.do +++ /dev/null @@ -1,335 +0,0 @@ -/******************************************************************************* -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Validation data processing -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 -* COUNTRY: UK - -* DESCRIPTION: This file creates the validation target variables -* using UKHLS initial populations data. - -* NOTES: The income amounts in annual terms. -*******************************************************************************/ - -* Load initial populations data -use "${dir_data}/ukhls_pooled_all_obs_09.dta", clear - -* Restrict sample to observations up to and including specified maximum year -fre swv -keep if swv <= $max_year -gen year = stm - -* Further adjustments - -* Define age groups -gen ageGroup = . -replace ageGroup = 0 if dag >= 0 & dag <= 14 -replace ageGroup = 1 if dag >= 15 & dag <= 19 -replace ageGroup = 2 if dag >= 20 & dag <= 24 -replace ageGroup = 3 if dag >= 25 & dag <= 29 -replace ageGroup = 4 if dag >= 30 & dag <= 34 -replace ageGroup = 5 if dag >= 35 & dag <= 39 -replace ageGroup = 6 if dag >= 40 & dag <= 59 -replace ageGroup = 7 if dag >= 60 & dag <= 79 -replace ageGroup = 8 if dag >= 80 & dag <= 100 - -label def ageGrouplb /// - 0 "ageGroup_0_14" /// - 1 "ageGroup_15_19" /// - 2 "ageGroup_20_24" /// - 3 "ageGroup_25_29" /// - 4 "ageGroup_30_34" /// - 5 "ageGroup_35_39" /// - 6 "ageGroup_40_59" /// - 7 "ageGroup_60_79" /// - 8 "ageGroup_80_100" /// - -label val ageGroup ageGrouplb -fre ageGroup - -gen ageGroup2 = . -replace ageGroup2 = 0 if dag >= 16 & dag <= 24 -replace ageGroup2 = 1 if dag >= 25 & dag <= 29 -replace ageGroup2 = 2 if dag >= 30 & dag <= 34 -replace ageGroup2 = 3 if dag >= 35 & dag <= 39 -replace ageGroup2 = 4 if dag >= 40 & dag <= 44 -replace ageGroup2 = 5 if dag >= 45 & dag <= 49 -replace ageGroup2 = 6 if dag >= 50 & dag <= 54 -replace ageGroup2 = 7 if dag >= 55 & dag <= 59 -replace ageGroup2 = 8 if dag >= 60 & dag <= 65 - -label def ageGrouplb2 /// - 0 "ageGroup_16_24" /// - 1 "ageGroup_25_29" /// - 2 "ageGroup_30_34" /// - 3 "ageGroup_35_39" /// - 4 "ageGroup_40_44" /// - 5 "ageGroup_45_49" /// - 6 "ageGroup_50_54" /// - 7 "ageGroup_55_59" /// - 8 "ageGroup_60_65" /// - -label val ageGroup2 ageGrouplb2 -fre ageGroup2 - -* Sex -replace dgn = . if dgn < 0 - -* Health variables -replace dhe = . if dhe < 0 -replace dhe_mcs = . if dhe_mcs < 0 -replace dhe_pcs = . if dhe_pcs < 0 - -*** Income variables - all annual unless stated - -* Gross income per individual (non-benefit) -gen ypnb = sinh(ypnbihs_dv) -gen valid_y_gross_ind_yr = ypnb * 12 - -* Gross income per benefit unit (non-benefit) -bys stm idhh idbenefitunit: /// - egen valid_y_gross_bu_yr = total(valid_y_gross_ind_yr) - - -* Disposable income per individual -* Not in the initial popualtions so need to merge in -* Note the raw variable does not include deductions -merge 1:1 idperson swv using "$dir_data/ukhls_ind_dispos_inc.dta" -drop if _m == 2 -drop _m - -gen valid_y_disp_ind_yr = (fimnnet_dv/CPI) * 12 - -* Disposable income per benefit unit -bys stm idhh idbenefitunit: /// - egen valid_y_disp_bu_yr = total(valid_y_disp_ind_yr) - - -* Gross labour income per individual -// yplgrs = fimnlabgrs_dv (ind) -gen y_gross_labour_ind_yr = sinh(yplgrs_dv) * 12 - -* Gross labour income per benefit unit -bys stm idhh idbenefitunit: /// - egen valid_y_gross_labour_bu_yr = total(y_gross_labour_ind_yr) - - -* Gross private pension income -gen y_gross_pension_ind_yr = sinh(ypnoab) * 12 - -bys stm idhh idbenefitunit: /// - egen valid_y_gross_pension_bu_yr = total(y_gross_pension_ind_yr) - - -* Capital income per benefit unit -/* -ypncp = rowtotal (fimninvnet_dv inc_fm inc_oth fimnprben_dv) -investment, family payments, other reg payments, trade union, maintanence, -sickness/accident -*/ -gen y_gross_capital_ind_yr = sinh(ypncp) * 12 - -bys stm idhh idbenefitunit: /// - egen valid_y_gross_capital_bu_yr = total(y_gross_capital_ind_yr) - - -* Equivalised disposable income per benefit unit - -* Generate number of dependent children in a benefit unit -gen depChild = 1 if (dag >= 0 & dag < $age_become_responsible) -bys swv idhh idbenefitunit: egen dnc_bu = sum(depChild) - -gen depChild02 = 1 if (dag >= 0 & dag <= 2) -bys swv idhh idbenefitunit: egen dnc02_bu = sum(depChild02) - -lab var dnc02 "Number of dependent children 0 - 2" - -* Generate modified-OECD equivalence scale: 1 for the household head, 0.5 for -* additional adults, 0.3 for children < 14 years old -bys swv idhh idbenefitunit: gen people_in_bu = _N -cap drop child -gen child = (dag < 14) -bys swv idhh idbenefitunit: egen children_in_bu = total(child) -gen other_adults = people_in_bu - children_in_bu - 1 - // -1 for the household head - -gen equiv_factor = 1 + (0.5 * other_adults) + (0.3 * children_in_bu) - // Start with 1 because each household must have at least the head -lab var equiv_factor "OECD-modified scale equivalence factor" - -gen valid_y_eq_disp_bu_yr = valid_y_disp_bu_yr / equiv_factor - -drop child people_in_bu child children_in_bu other_adults dnc_bu dnc02_bu - - -** Annual income shares - - -** Gross income deciles (ben unit) - -/* -xtile obs_gross_income_group = valid_y_gross_bu_yr, nq(10) - This is not correct for pooled data - -Problem: if many observations have exactly the same value, xtile would group -them into a single decile, causing one or more deciles to have very few -observations. -Adding a very small random amount can help differentiate tied values enough to -distribute them more evenly across deciles without distorting the data -meaningfully. -*/ -gen valid_y_gross_bu_yr_jit = valid_y_gross_bu_yr + runiform() * 1e-5 - -forvalues stm = 2011/$max_year { - - xtile obs_gross_income_group_`stm' = valid_y_gross_bu_yr_jit if /// - depChild != 1 & stm == `stm', nq(10) - - bys idhh: egen temp_obs_gross_income_group_`stm' = /// - max(obs_gross_income_group_`stm') if stm == `stm' - - replace obs_gross_income_group_`stm' = /// - temp_obs_gross_income_group_`stm' if /// - missing(obs_gross_income_group_`stm') - drop temp_obs_gross_income_group_`stm' - -} - -* Unify into a single variable -egen obs_gross_income_group = rowtotal(obs_gross_income_group_2011 /// - obs_gross_income_group_2012 obs_gross_income_group_2013 /// - obs_gross_income_group_2014 obs_gross_income_group_2015 /// - obs_gross_income_group_2016 obs_gross_income_group_2017 /// - obs_gross_income_group_2018 obs_gross_income_group_2019 /// - obs_gross_income_group_2020 obs_gross_income_group_2021 /// - obs_gross_income_group_2022 obs_gross_income_group_2023) - -drop obs_gross_income_group_2* -bys stm: fre obs_gross_income_group - - -** Activity status - -* Add adoption leave -replace les_c4 = 1 if les_c4 == 15 - -* Activity dummies -gen valid_employed = (les_c4 == 1) -gen valid_student = (les_c4 == 2) -gen valid_inactive = (les_c4 == 3) -gen valid_retired = (les_c4 == 4) - -replace valid_employed = . if les_c4 < 0 | les_c4 == . -replace valid_student = . if les_c4 < 0 | les_c4 == . -replace valid_inactive = . if les_c4 < 0 | les_c4 == . -replace valid_retired = . if les_c4 < 0 | les_c4 == . - - -** Education level - -* Attainment dummies -gen valid_edu_high = (deh_c3 == 1) -gen valid_edu_med = (deh_c3 == 2) -gen valid_edu_low = (deh_c3 == 3) - -replace valid_edu_high = . if deh_c3 == . | deh_c3 < 0 -replace valid_edu_med = . if deh_c3 == . | deh_c3 < 0 -replace valid_edu_low = . if deh_c3 == . | deh_c3 < 0 - - -** Family - -* Partnership status -gen valid_dcpst_p = (dcpst == 1) // partnered -gen valid_dcpst_snm = (dcpst == 2) // single never married -gen valid_dcpst_prvp = (dcpst == 3) // previously partnered -gen valid_dcpst_snmprvp = (dcpst == 2 | dcpst == 3) - // single never married & previously partnered - -replace valid_dcpst_p = . if dcpst == . | dcpst < 0 -replace valid_dcpst_snm = . if dcpst == . | dcpst < 0 -replace valid_dcpst_prvp = . if dcpst == . | dcpst < 0 -replace valid_dcpst_snmprvp = . if dcpst == . | dcpst < 0 - - -* Number of children -gen children_0 = (dnc == 0) -gen children_1 = (dnc == 1) -gen children_2 = (dnc == 2) -gen children_3plus = (dnc >= 3 & dnc != .) - - -* Interaction of partnership status and number of children -foreach var1 in valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp { - - foreach var2 in children_0 children_1 children_2 children_3p { - - gen `var1'_`var2' = (`var1' & `var2') - - } - -} - -** Hours worked (weekly) - -* Impose consistency with les_c4 -* Prioritize les_c4 as we did with the European models -replace lhw = . if les_c4 != 1 - -count if (lhw == 0 | lhw == .) & les_c4 == 1 -// note that 0s could be generated from missing values in rowtotal function - -gen lhw_flag = 1 if (lhw < 6 | lhw == .) & les_c4 == 1 - -replace lhw = . if lhw == 0 & les_c4 == 1 -replace lhw = . if lhw < 6 & les_c4 == 1 - -egen hours_mode = mode(lhw) -replace lhw = hours_mode if les_c4 == 1 & lhw == . - -drop hours_mode - -tab les_c4 if lhw != 0 & lhw != . - -gen hours = lhw - -* Labour supply categories -gen laboursupplyweekly_hu = "ZERO" -replace laboursupplyweekly_hu = "TEN" if hours >= 6 & hours <= 15 -replace laboursupplyweekly_hu = "TWENTY" if hours > 15 & hours <= 25 -replace laboursupplyweekly_hu = "THIRTY" if hours > 25 & hours <= 35 -replace laboursupplyweekly_hu = "FORTY" if hours > 35 & hours != . - - -* Hourly wages -/* -There is only a very few missing values in the raw variable therefore treat the -variable as if no missing information -Zero values are possible if report negative gross labour income (self-employed) -*/ -count if les_c4 == 1 & yplgrs_dv == 0 - -gen valid_wage_hour = (sinh(yplgrs_dv)/4.345)/hours - -* Consistency check -tab hours if les_c4 != 1 -tab valid_wage_hour if les_c4 != 1 - -tab hours if les_c4 == 1 - - -drop if dag < 0 - - -save "$dir_data/ukhls_validation_full_sample.dta", replace - - -* Restrict sample to individuals between min and max age defined in -* 00_master file -keep if dag>= $min_age & dag <= $max_age - -save "$dir_data/ukhls_validation_sample.dta", replace - - diff --git a/validation/02_simulated_output_validation/do_files/06_01_plot_activity_status.do b/validation/02_simulated_output_validation/do_files/06_01_plot_activity_status.do deleted file mode 100644 index 5150687e2..000000000 --- a/validation/02_simulated_output_validation/do_files/06_01_plot_activity_status.do +++ /dev/null @@ -1,2844 +0,0 @@ -/******************************************************************************* -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Economic Activity Status plots -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 -* COUNTRY: UK - -* DESCRIPTION: This do file plots validation graphs for economics activity -* status (4 cat). -* -* NOTES: -*******************************************************************************/ - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** -******************************************************************************** -* 1.1 : Mean values over time - Economic Activity Status -******************************************************************************** -******************************************************************************** -* 1.1.1 : Young people (17-30) -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dag /// - valid_retired using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -drop if dag > 30 -drop if dag < 17 - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dag using /// - "$dir_data/simulated_data_full.dta", clear - -drop if dag > 30 -drop if dag < 17 - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year) - -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year) - -* Compute 95% confidence interval -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-30") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_30_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -** By gender - -** Male -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dag dgn /// - valid_retired using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -drop if dag > 30 -drop if dag < 17 -drop if dgn == 0 - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dag dgn using /// - "$dir_data/simulated_data_full.dta", clear - -drop if dag > 30 -drop if dag < 17 -drop if dgn == "Female" - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year) - -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year) - -* Compute 95% confidence interval -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-30, males ") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_30_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -** Female - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dag dgn /// - valid_retired using "$dir_data/ukhls_validation_sample.dta", /// - clear - -drop if dag > 30 -drop if dag < 17 -drop if dgn == 1 - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - dgn [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dag dgn using /// - "$dir_data/simulated_data.dta", clear - -drop if dag > 30 -drop if dag < 17 -drop if dgn == "Male" - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year) -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year) - -* Compute 95% confidence interval -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-30, females ") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_30_female.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.1.2 : Working age (17-65) -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,65) - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,65) - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) - -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - -** By gender -* Male -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -restore, preserve - - -* Female -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - - -* Females ages 17-60 (before state pension age) - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,60) - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,60) - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Females -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-60") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_60_female.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.1.2.1 : Working age by partnership status -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive valid_retired dcpst /// - dgn dag using "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select sample -keep if inrange(dag,17,65) - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dcpst dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dcpst dgn dag /// - using "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,65) - -gen dcpst_coded = . -replace dcpst_coded = 1 if dcpst == "Partnered" -replace dcpst_coded = 3 if dcpst == "PreviouslyPartnered" -replace dcpst_coded = 2 if dcpst == "SingleNeverMarried" - -drop dcpst -rename dcpst_coded dcpst - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dcpst dgn) -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dcpst dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - - } - -merge 1:1 year dcpst dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -** All - -preserve - -collapse (mean) sim* valid*, by(year dcpst) - -* Plot figure: dcpst == 1, partnered -keep if dcpst == 1 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// -title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, partnered") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_both_partnered.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - -collapse (mean) sim* valid*, by(year dcpst) - -* Plot figure: dcpst == 2, single -keep if dcpst == 2 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, single") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_both_single.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - -collapse (mean) sim* valid*, by(year dcpst) - -* Plot figure: dcpst == 3, previously partnered -keep if dcpst == 3 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, previously partnered") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_both_prev_partnered.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - - -** Males - -* Plot figure: dcpst == 1, partnered -preserve - -keep if dcpst == 1 & dgn == 1 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, partnered males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_male_partnered.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - -* Plot figure: dcpst == 2, single -keep if dcpst == 2 & dgn == 1 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, single males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_male_single.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - -* Plot figure: dcpst == 3, previously partnered -keep if dcpst == 3 & dgn == 1 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, prevously partnered males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_male_prev_partnered.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - - -** Females - -* Plot figure: dcpst == 1, partnered -preserve - -keep if dcpst == 1 & dgn == 0 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, partnered females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_female_partnered.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - -* Plot figure: dcpst == 2, single -keep if dcpst == 2 & dgn == 0 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, single females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_female_single.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - -* Plot figure: dcpst == 3, previously partnered -keep if dcpst == 3 & dgn == 0 - -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17-${max_age}, previously partnered females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17_${max_age}_female_prev_partnered.jpg", /// - replace width(2400) height(1350) quality(100) - -graph drop _all - -restore - - -******************************************************************************** -* 1.1.3 : All ages -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn /// - valid_retired dag les_c4 using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag /// - les_c4 using "$dir_data/simulated_data_full.dta", clear - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) - -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("All ages") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_all_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** By gender -* Males - -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("All ages, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_all_male.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -* Females - -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("All ages, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_all_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - -graph drop _all - - -******************************************************************************** -* 1.1.4 : Adult population 17+ -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using /// - "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -drop if dag < 17 - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -drop if dag < 17 - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) - -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17+") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17plus_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** By gender -* Males - -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17+, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17plus_male.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -* Females - -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))) /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(3 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(4 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(5 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(6 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(7 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(8 "Retired, UKHLS"))), /// - title("Economic Activity Status") /// - subtitle("Ages 17+, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_ts_17plus_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - -graph drop _all - - -******************************************************************************** -* 1.2 : Mean values over time - Share Employed -******************************************************************************** - -******************************************************************************** -* 1.2.1 : Working age (17-65) -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,65) - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,65) - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - - } - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("Ages 17-${max_age}") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_17_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** Males - -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("Ages 17-${max_age}, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_employed_ts_17_${max_age}_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -restore, preserve - - -** Females - -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("Ages 17-${max_age}, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_17_${max_age}_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - - -** Females ages 17-60 (before state pension age) - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,60) - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,60) - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Females -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("Ages 17-60") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_17_60_female.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2.2 : All ages -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn /// - valid_retired using /// - "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn using /// - "$dir_data/simulated_data_full.dta", clear - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) - -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("All ages") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_all_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** Males - -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("All ages, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_all_male.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** Females - -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("All ages, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_all_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - -graph drop _all - - -******************************************************************************** -* 1.2.3 : Adult population (17+) -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using /// - "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -drop if dag < 17 - -collapse (mean) valid_employed valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -drop if dag < 17 - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_employed sim_student sim_inactive sim_retired, /// - by(run year dgn) - -collapse (mean) sim_employed sim_student sim_inactive sim_retired /// - (sd) sim_employed_sd = sim_employed /// - sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_employed sim_student sim_inactive sim_retired { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("Ages 17+") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_17plus_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** Males - -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("Ages 17+, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_17plus_male.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** Females - -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_employed_high sim_employed_low year, sort color(green%20) /// - legend(label(1 "Employed, simulated"))) /// -(line valid_employed year, sort color(green) /// - legend(label(2 "Employed, UKHLS"))), /// - title("Share Employed") /// - subtitle("Ages 17+, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_17plus_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - -graph drop _all - -******************************************************************************** -* 1.2.4 : By age group -******************************************************************************** - -* Prepare validation data -use year dwt dgn ageGroup valid_employed dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -keep if inrange(dag,16,79) - -gen employed_f = (valid_employed) if dgn == 0 -gen employed_m = (valid_employed) if dgn == 1 - -drop if ageGroup == 0 | ageGroup == 8 - -collapse (mean) employed_f employed_m [aweight=dwt], /// - by(ageGroup year) - -drop if missing(ageGroup) - -reshape wide employed_f employed_m, i(year) j(ageGroup) - -forvalues i = 1(1)7 { - - rename employed_f`i' employed_f_`i'_valid - rename employed_m`i' employed_m_`i'_valid - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_sex ageGroup sim_employed using /// - "$dir_data/simulated_data.dta", clear - -gen employed_f = (sim_employed) if sim_sex == 2 -gen employed_m = (sim_employed) if sim_sex == 1 - -collapse (mean) employed_f employed_m, by(ageGroup run year) -drop if missing(ageGroup) - -reshape wide employed_f employed_m, i(year run) j(ageGroup) - -forvalues i = 1(1)7 { - - rename employed_f`i' employed_f_`i'_sim - rename employed_m`i' employed_m_`i'_sim - -} - -collapse (mean) employed* /// - (sd) sd_employed_f_1_sim=employed_f_1_sim /// - sd_employed_f_2_sim=employed_f_2_sim /// - sd_employed_f_3_sim=employed_f_3_sim /// - sd_employed_f_4_sim=employed_f_4_sim /// - sd_employed_f_5_sim=employed_f_5_sim /// - sd_employed_f_6_sim=employed_f_6_sim /// - sd_employed_f_7_sim=employed_f_7_sim /// - sd_employed_m_1_sim=employed_m_1_sim /// - sd_employed_m_2_sim=employed_m_2_sim /// - sd_employed_m_3_sim=employed_m_3_sim /// - sd_employed_m_4_sim=employed_m_4_sim /// - sd_employed_m_5_sim=employed_m_5_sim /// - sd_employed_m_6_sim=employed_m_6_sim /// - sd_employed_m_7_sim=employed_m_7_sim /// - , by(year) - - /* - sd_employed_f_8_sim=employed_f_8_sim /// - sd_employed_m_8_sim=employed_m_8_sim /// */ - -forvalues i = 1(1)7 { - - gen employed_f_`i'_sim_high = /// - employed_f_`i'_sim + 1.96*sd_employed_f_`i'_sim - gen employed_f_`i'_sim_low = /// - employed_f_`i'_sim - 1.96*sd_employed_f_`i'_sim - gen employed_m_`i'_sim_high = /// - employed_m_`i'_sim + 1.96*sd_employed_m_`i'_sim - gen employed_m_`i'_sim_low = /// - employed_m_`i'_sim - 1.96*sd_employed_m_`i'_sim - - } - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figures -foreach vble in "employed_f" "employed_m" { - - twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, /// - sort color(green%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_1_valid year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Age 16-19") /// - name(`vble'_1, replace) /// - ylabel(0.2 [0.4] 1) /// - xtitle("") /// - graphregion(color(white)) - - twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, /// - sort color(green%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_2_valid year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Age 20-24") /// - name(`vble'_2, replace) /// - ylabel(0.2 [0.4] 1) /// - xtitle("") /// - graphregion(color(white)) - - twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, /// - sort color(green%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_3_valid year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Age 25-29") /// - name(`vble'_3, replace) /// - ylabel(0.2 [0.4] 1) /// - xtitle("") /// - graphregion(color(white)) - - twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, /// - sort color(green%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_4_valid year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Age 30-34") /// - name(`vble'_4, replace) /// - ylabel(0.2 [0.4] 1) /// - xtitle("") /// - graphregion(color(white)) - - twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, /// - sort color(green%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_5_valid year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Age 35-39") /// - name(`vble'_5, replace) /// - ylabel(0.2 [0.4] 1) /// - xtitle("") /// - graphregion(color(white)) - - twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, /// - sort color(green%20) /// - legend(label(1 "Simulated") position(6) /// - rows(1)))(line `vble'_6_valid year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Age 40-59") /// - name(`vble'_6, replace) /// - ylabel(0.2 [0.4] 1) /// - xtitle("") /// - graphregion(color(white)) - - twoway (rarea `vble'_7_sim_high `vble'_7_sim_low year, /// - sort color(green%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_7_valid year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Age 60-79") /// - name(`vble'_7, replace) /// - ylabel(0.2 [0.4] 1) /// - xtitle("") /// - graphregion(color(white)) - -} - -* Save figures - -* Share employed males -grc1leg employed_m_1 employed_m_2 employed_m_3 employed_m_4 employed_m_5 /// - employed_m_6 employed_m_7 , /// - title("Share Employed by Age Group") /// - subtitle("Males") /// - legendfrom(employed_m_1) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_all_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -* Share employed females -grc1leg employed_f_1 employed_f_2 employed_f_3 employed_f_4 employed_f_5 /// - employed_f_6 employed_f_7 , /// - title("Share Employed by Age Group") /// - subtitle("Females") /// - legendfrom(employed_f_1) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_employed_ts_all_female.jpg", /// - replace width(2400) height(1350) quality(100) - -graph drop _all - - - -******************************************************************************** -* 1.3 : Mean values over time - Non-employed shares -******************************************************************************** - -******************************************************************************** -* 1.3.1 : Working age (17-65) -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive /// - valid_retired dag using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,65) - -drop if valid_employed == 1 -drop valid_employed - -collapse (mean) valid_student valid_inactive valid_retired /// - [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,65) - -drop if sim_employed == 1 -drop sim_employed - -collapse (mean) sim_student sim_inactive sim_retired, /// - by(run year) - -collapse (mean) sim_student sim_inactive sim_retired /// - (sd) sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year) - -foreach varname in sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("Ages 17-${max_age}") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.) minus" "students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_17_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - - -** Males - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive valid_retired /// - dgn dag using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,65) - -drop if dgn == 0 -drop if valid_employed == 1 -drop valid_employed - -collapse (mean) valid_student valid_inactive valid_retired /// - [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,65) - -drop if dgn == "Female" -drop if sim_employed == 1 -drop sim_employed - -collapse (mean) sim_student sim_inactive sim_retired, /// - by(run year) - -collapse (mean) sim_student sim_inactive sim_retired /// - (sd) sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year) - -foreach varname in sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("Ages 17-${max_age}, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// - "$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_17_${max_age}_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -** Females - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive /// - valid_retired dgn dag using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,65) - -drop if dgn == 1 -drop if valid_employed == 1 -drop valid_employed - -collapse (mean) valid_student valid_inactive valid_retired /// - [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,65) - -drop if dgn == "Male" -drop if sim_employed == 1 -drop sim_employed - -collapse (mean) sim_student sim_inactive sim_retired, /// - by(run year) - -collapse (mean) sim_student sim_inactive sim_retired /// - (sd) sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year) - -foreach varname in sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - - } - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("Ages 17-${max_age}, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_17_${max_age}_female.jpg", /// - replace width(2400) height(1350) quality(100) - - - -** Females ages 17-60 (before state pension age) - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -keep if inrange(dag,17,60) - -drop if dgn == 1 -drop if valid_employed == 1 -drop valid_employed dgn - -collapse (mean) valid_student valid_inactive valid_retired /// - [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -keep if inrange(dag,17,60) - -drop if dgn == "Male" -drop if sim_employed == 1 -drop sim_employed dgn - - -collapse (mean) sim_student sim_inactive sim_retired, /// - by(run year) - -collapse (mean) sim_student sim_inactive sim_retired /// - (sd) sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year) - -foreach varname in sim_student sim_inactive sim_retired { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("Ages 17-60") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_17_60_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - - -******************************************************************************** -* 1.3.2 : All ages -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn /// - valid_retired using /// - "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -drop if valid_employed == 1 -drop valid_employed - -collapse (mean) valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn using /// - "$dir_data/simulated_data_full.dta", clear - -drop if sim_employed == 1 -drop sim_employed - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_student sim_inactive sim_retired, /// - by(run year dgn) - -collapse (mean) sim_student sim_inactive sim_retired /// - (sd) sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - - } - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("All ages") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_all_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** Males - -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("All ages, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_all_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -restore, preserve - - -** Females - -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("All ages, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_all_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - - -******************************************************************************** -* 1.3.3 : Adult population 17+ -******************************************************************************** - -* Prepare validation data -use year dwt valid_employed valid_student valid_inactive dgn dag /// - valid_retired using /// - "$dir_data/ukhls_validation_full_sample.dta", /// - clear - -* Select sample -drop if dag < 17 - -drop if valid_employed == 1 -drop valid_employed - -collapse (mean) valid_student valid_inactive valid_retired /// - [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_employed sim_student sim_inactive sim_retired dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if dag < 17 - -drop if sim_employed == 1 -drop sim_employed - -gen dgn_coded = . -replace dgn_coded = 1 if dgn == "Male" -replace dgn_coded = 0 if dgn == "Female" - -drop dgn -rename dgn_coded dgn - -collapse (mean) sim_student sim_inactive sim_retired, /// - by(run year dgn) - -collapse (mean) sim_student sim_inactive sim_retired /// - (sd) sim_student_sd = sim_student /// - sim_inactive_sd = sim_inactive /// - sim_retired_sd = sim_retired /// - , by(year dgn) - -foreach varname in sim_student sim_inactive sim_retired { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -** All - -preserve - -collapse (mean) sim* valid*, by(year) - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("Ages 17+") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_17plus_both.jpg", /// - replace width(2400) height(1350) quality(100) - -restore, preserve - - -** Males - -keep if dgn == 1 - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("Ages 17+, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_17plus_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -restore, preserve - - -** Females - -keep if dgn == 0 - -* Plot figure -twoway /// -(rarea sim_student_high sim_student_low year, sort color(blue%20) /// - legend(label(1 "Students, simulated"))) /// -(line valid_student year, sort color(blue) /// - legend(label(2 "Students, UKHLS"))) /// -(rarea sim_inactive_high sim_inactive_low year, sort color(red%20) /// - legend(label(3 "Non-employed, simulated"))) /// -(line valid_inactive year, sort color(red) /// - legend(label(4 "Non-employed, UKHLS"))) /// -(rarea sim_retired_high sim_retired_low year, sort color(grey%20) /// - legend(label(5 "Retired, simulated"))) /// -(line valid_retired year, sort color(grey) /// - legend(label(6 "Retired, UKHLS"))), /// - title("Economic Activity of the Non-Employed") /// - subtitle("Ages 17+, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Non-employed includes the unemployed and inactive (homemakers, incapacity, carers, discouraged workers etc.)" "minus students and retired. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/economic_activity/validation_${country}_activity_status_not_employed_ts_17plus_female.jpg", /// - replace width(2400) height(1350) quality(100) - -restore - - -******************************************************************************** -* 1.4 Mean values over time - Share students -******************************************************************************** - -******************************************************************************** -* 1.4.1 By age group -******************************************************************************** - -* Prepare validation data -use year dwt dgn ageGroup valid_student dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -keep if inrange(dag,16,79) -gen student = valid_student - -drop if ageGroup == 0 | ageGroup == 8 - -collapse (mean) student [aweight=dwt], /// - by(ageGroup year) -drop if missing(ageGroup) -reshape wide student , i(year) j(ageGroup) - -forvalues i = 1(1)7 { - - rename student`i' student_`i'_valid - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_sex ageGroup sim_student using /// - "$dir_data/simulated_data.dta", clear - -gen student = sim_student - -collapse (mean) student, by(ageGroup run year) -drop if missing(ageGroup) -reshape wide student, i(year run) j(ageGroup) - -forvalues i=1(1)7{ - - rename student`i' student_`i'_sim - -} - -collapse (mean) student* /// - (sd) sd_student_1_sim =student_1_sim /// - sd_student_2_sim = student_2_sim /// - sd_student_3_sim = student_3_sim /// - sd_student_4_sim = student_4_sim /// - sd_student_5_sim = student_5_sim /// - sd_student_6_sim = student_6_sim /// - sd_student_7_sim = student_7_sim /// - , by(year) - - /* sd_student_8_sim=student_8_sim /// - sd_employed_f_8_sim=employed_f_8_sim /// - sd_employed_m_8_sim=employed_m_8_sim /// */ - -forvalues i = 1(1)7 { - gen student_`i'_sim_high = student_`i'_sim + 1.96*sd_student_`i'_sim - gen student_`i'_sim_low = student_`i'_sim - 1.96*sd_student_`i'_sim -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figures - -twoway (rarea student_1_sim_high student_1_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_1_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 16-19") /// - name(student_1, replace) /// - ylabel(0 [0.4] 0.8) /// - xtitle("") /// - graphregion(color(white)) - -twoway (rarea student_2_sim_high student_2_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_2_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 20-24") /// - name(student_2, replace) /// - ylabel(0 [0.4] 0.8) /// - xtitle("") /// - graphregion(color(white)) - - -twoway (rarea student_3_sim_high student_3_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_3_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 25-29") /// - name(student_3, replace) /// - ylabel(0 [0.4] 0.8) /// - xtitle("") /// - graphregion(color(white)) - -twoway (rarea student_4_sim_high student_4_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_4_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 30-34") /// - name(student_4, replace) /// - ylabel(0 [0.4] 0.8) /// - xtitle("") /// - graphregion(color(white)) - -twoway (rarea student_5_sim_high student_5_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_5_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 35-39") /// - name(student_5, replace) /// - ylabel(0 [0.4] 0.8) /// - xtitle("") /// - graphregion(color(white)) - -twoway (rarea student_6_sim_high student_6_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) /// - rows(1)))(line student_6_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 40-59") /// - name(student_6, replace) /// - ylabel(0 [0.4] 0.8) /// - xtitle("") /// - graphregion(color(white)) - -twoway (rarea student_7_sim_high student_7_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_7_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 60-79") /// - name(student_7, replace) /// - ylabel(0 [0.4] 0.8) /// - xtitle("") /// - graphregion(color(white)) - - -* Save figures - -* Share students -grc1leg student_1 student_2 student_3 student_4 student_5 student_6 /// - student_7 , /// - title("Share of Students by Age Group") /// - legendfrom(student_1) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/economic_activity/validation_${country}_students_ts_all_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -twoway (rarea student_1_sim_high student_1_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_1_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 16-19") /// - name(student_1a, replace) /// - ylabel(0.4 [0.1] 0.75) /// - xtitle("") /// - graphregion(color(white)) - -twoway (rarea student_2_sim_high student_2_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_2_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 20-24") /// - name(student_2a, replace) /// - ylabel(0.1 [0.05] 0.3) /// - xtitle("") /// - graphregion(color(white)) - -twoway (rarea student_3_sim_high student_3_sim_low year, /// - sort color(blue%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line student_3_valid year, sort color(blue) /// - legend(label(2 "UKHLS"))), /// - title("Age 25-29") /// - name(student_3a, replace) /// - ylabel(0 [0.025] 0.1) /// - xtitle("") /// - graphregion(color(white)) - -grc1leg student_1a student_2a student_3a , /// - title("Share of Students by Age Group") /// - legendfrom(student_1a) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// - "$dir_output_files/economic_activity/validation_${country}_students_ts_15_29_both.jpg", /// - replace width(2400) height(1350) quality(100) - -graph drop _all - - diff --git a/validation/02_simulated_output_validation/do_files/06_02_plot_education_level.do b/validation/02_simulated_output_validation/do_files/06_02_plot_education_level.do deleted file mode 100644 index d8c0edda7..000000000 --- a/validation/02_simulated_output_validation/do_files/06_02_plot_education_level.do +++ /dev/null @@ -1,637 +0,0 @@ -******************************************************************************** -* SECTION: Validation -* OBJECT: Education -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 -* COUNTRY: UK - -* NOTES: This do file plots simulated and UKHLS education. -* Unable to look at transitions because use X-sectional -* SILC data. -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Educational attainment -******************************************************************************** - -******************************************************************************** -* 1.1.1 : Educational attainment - 17-65 -******************************************************************************** - -* Prepare validation data -use year dwt valid_edu_high valid_edu_med valid_edu_low les_c4 dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select sample -drop if les_c4 == 2 // | les_c4 == -9? -drop les_c4 -keep if inrange(dag,17,65) - -* Compute annual shares -collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_edu_high sim_edu_med sim_edu_low les_c4 dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if les_c4 == "Student" -drop les_c4 -keep if inrange(dag,17,65) - -* Compute shares and standard deviation -collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year) -collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// - (sd) sim_edu_high_sd = sim_edu_high /// - sim_edu_med_sd = sim_edu_med /// - sim_edu_low_sd = sim_edu_low /// - , by(year) - -* Approx 95% confidence interval -foreach varname in sim_edu_high sim_edu_med sim_edu_low { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year, sort color(green%20) /// - legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year, sort color(blue%20) /// - legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 17-${max_age}") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Current students excluded from sample", /// - size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_education_ts_17_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.1.2 : Educational attainment - 17-65, by gender -******************************************************************************** - -* Prepare validation data -use year dwt valid_edu_high valid_edu_med valid_edu_low dgn les_c4 dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select sample -drop if les_c4 == 2 -drop les_c4 -keep if inrange(dag,17,65) - -* Compute annual shares -collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], /// - by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_edu_high sim_edu_med sim_edu_low dgn les_c4 dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if les_c4 == "Student" -keep if inrange(dag,17,65) - -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" - -drop dgn -rename dgn2 dgn - -* Compute shares and sd -collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year dgn) -collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// - (sd) sim_edu_high_sd = sim_edu_high /// - (sd) sim_edu_med_sd = sim_edu_med /// - (sd) sim_edu_low_sd = sim_edu_low /// - , by(year dgn ) - -* Approx 95% confidence interval -foreach varname in sim_edu_high sim_edu_med sim_edu_low { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure - Female -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year if dgn == 0, /// - sort color(green%20) legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year if dgn == 0, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year if dgn == 0, /// - sort color(blue%20) legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year if dgn == 0, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year if dgn == 0, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year if dgn == 0, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 17-${max_age}, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Current students excluded from sample", /// - size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_education_ts_17_${max_age}_female.jpg", /// - replace width(2400) height(1350) quality(100) - -* Plot figure - Male -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year if dgn == 1, /// - sort color(green%20) legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year if dgn == 1, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year if dgn == 1, /// - sort color(blue%20) legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year if dgn == 1, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year if dgn == 1, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year if dgn == 1, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 17-${max_age}, males") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Current students excluded from sample", /// - size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_education_ts_17_${max_age}_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.1.3 : Educational attainment - 17-30 -******************************************************************************** - -* Prepare validation data -use year dwt valid_edu_high valid_edu_med valid_edu_low dag les_c4 dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select sample -drop if les_c4 == 2 -drop if dag > 30 -drop if dag < 17 -drop les_c4 - -* Compute shares -collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_edu_high sim_edu_med sim_edu_low dag les_c4 using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if les_c4 == "Student" -drop if dag > 30 -drop if dag < 17 - -* Compute shares and sd -collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year) -collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// - (sd) sim_edu_high_sd = sim_edu_high /// - sim_edu_med_sd = sim_edu_med /// - sim_edu_low_sd = sim_edu_low /// - , by(year) - -* Approx 95% confidence interval -foreach varname in sim_edu_high sim_edu_med sim_edu_low { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - - } - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year, sort color(green%20) /// - legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year, sort color(blue%20) /// - legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 17-30") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Current students excluded from sample.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_education_ts_17_30_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.1.4 : Educational attainment - 17-30, by gender -******************************************************************************** -* Prepare validation data -use year dwt valid_edu_high valid_edu_med valid_edu_low dag dgn les_c4 dag /// - using "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select smaple -drop if les_c4 == 2 -drop if dag > 30 -drop if dag < 17 -drop les_c4 - -* Compute shares -collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], /// - by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_edu_high sim_edu_med sim_edu_low dag dgn les_c4 using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if les_c4 == "Student" -drop if dag > 30 -drop if dag < 17 - -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" - -drop dgn les_c4 -rename dgn2 dgn - -* Cmpute shares and sd -collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year dgn) -collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// - (sd) sim_edu_high_sd = sim_edu_high /// - (sd) sim_edu_med_sd = sim_edu_med /// - (sd) sim_edu_low_sd = sim_edu_low /// - , by(year dgn) - -* Approx 95% confidence interval -foreach varname in sim_edu_high sim_edu_med sim_edu_low { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure - female -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year if dgn == 0, /// - sort color(green%20) legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year if dgn == 0, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year if dgn == 0, /// - sort color(blue%20) legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year if dgn == 0, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year if dgn == 0, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year if dgn == 0, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 17-30, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Current students excluded from sample", /// - size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_education_ts_17_30_female.jpg", /// - replace width(2400) height(1350) quality(100) - -* Plot figure - male -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year if dgn == 1, /// - sort color(green%20) legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year if dgn == 1, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year if dgn == 1, /// - sort color(blue%20) legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year if dgn == 1, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year if dgn == 1, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year if dgn == 1, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 17-30, females") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: Current students excluded from sample.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_education_ts_17_30_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.1.5 : Educational attainment - 66-70 -******************************************************************************** - -* Prepare validation data -use year dwt valid_edu_high valid_edu_med valid_edu_low dag les_c4 using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select sample -drop if les_c4 == 2 -drop if dag < 66 -drop if dag > 70 - -drop les_c4 - -* Compute shares -collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_edu_high sim_edu_med sim_edu_low dag les_c4 using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if les_c4 == "Student" -drop if dag < 66 -drop if dag > 70 - -drop les_c4 - -* Compute shares and sd -collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year) -collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// - (sd) sim_edu_high_sd = sim_edu_high /// - sim_edu_med_sd = sim_edu_med /// - sim_edu_low_sd = sim_edu_low /// - , by(year) - -* Approx 95% confidence interval -foreach varname in sim_edu_high sim_edu_med sim_edu_low { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year, sort color(green%20) /// - legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year, sort color(blue%20) /// - legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 66-70") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_education_ts_66_70_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -** 60-70 - -* Prepare validation data -use year dwt valid_edu_high valid_edu_med valid_edu_low dag les_c4 using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Selec sample -drop if les_c4 == 2 -drop if dag < 60 -drop if dag > 70 -drop if valid_edu_high == 0 & valid_edu_med == 0 & valid_edu_low == 0 - -drop les_c4 - -collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_edu_high sim_edu_med sim_edu_low dag les_c4 using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if les_c4 == "Student" -drop if dag < 60 -drop if dag > 70 - -drop les_c4 - -collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year) -collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// - (sd) sim_edu_high_sd = sim_edu_high /// - sim_edu_med_sd = sim_edu_med /// - sim_edu_low_sd = sim_edu_low /// - , by(year) - -foreach varname in sim_edu_high sim_edu_med sim_edu_low { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year, sort color(green%20) /// - legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year, sort color(blue%20) /// - legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment") /// - subtitle("Ages 60-70") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note(Notes:, size(vsmall)) - - -******************************************************************************** -* 1.2 : Educational attainment when leave education -******************************************************************************** - -******************************************************************************** -* 1.2.1 : Educational attainment when leave education - 17-65 -******************************************************************************** - -* Prepare validation data -use year idperson dwt valid_edu_high valid_edu_med valid_edu_low deh_c3 /// - les_c4 dag using "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select sample -keep if inrange(dag,16,65) - -* Select relevant observations -sort idperson year -gen left_edu = 1 if idperson == idperson[_n-1] & /// - les_c4 != 2 & les_c4[_n-1] == 2 & year == year[_n-1]+1 - -* Get rid of observations with missing values -drop if deh_c3 == -9 | deh_c3 == . - -* Get rid of observations with missing values -drop if valid_edu_high == 0 & valid_edu_med == 0 & valid_edu_low == 0 - -keep if left_edu == 1 - -collapse (mean) valid_edu_high valid_edu_med valid_edu_low [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year idperson sim_edu_high sim_edu_med sim_edu_low les_c4 dag /// - deh_c3 using "$dir_data/simulated_data.dta", clear - -* Select sample -keep if inrange(dag,16,65) - -* Select relevant observations -sort idperson year -gen left_edu_sim = 1 if idperson == idperson[_n-1] & /// - les_c4 != "Student" & les_c4[_n-1] == "Student" & year == year[_n-1]+1 - -keep if left_edu_sim == 1 - -collapse (mean) sim_edu_high sim_edu_med sim_edu_low, by(run year) -collapse (mean) sim_edu_high sim_edu_med sim_edu_low /// - (sd) sim_edu_high_sd = sim_edu_high /// - sim_edu_med_sd = sim_edu_med /// - sim_edu_low_sd = sim_edu_low /// - , by(year) - -foreach varname in sim_edu_high sim_edu_med sim_edu_low { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_edu_high_high sim_edu_high_low year, sort color(green%20) /// - legend(label(1 "High education, simulated"))) /// -(line valid_edu_high year, sort color(green) /// - legend(label(2 "High education, UKHLS"))) /// -(rarea sim_edu_med_high sim_edu_med_low year, sort color(blue%20) /// - legend(label(3 "Medium education, simulated"))) /// -(line valid_edu_med year, sort color(blue) /// - legend(label(4 "Medium education, UKHLS"))) /// -(rarea sim_edu_low_high sim_edu_low_low year, sort color(red%20) /// - legend(label(5 "Low education, simulated"))) /// -(line valid_edu_low year, sort color(red) /// - legend(label(6 "Low education, UKHLS"))), /// - title("Educational Attainment When Leave Education") /// - subtitle("Ages 17-${max_age}") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - note("Notes: ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/education/validation_${country}_leave_education_ts_17_65_both.jpg", /// - replace width(2400) height(1350) quality(100) - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_03_plot_gross_income.do b/validation/02_simulated_output_validation/do_files/06_03_plot_gross_income.do deleted file mode 100644 index d2c4cddd1..000000000 --- a/validation/02_simulated_output_validation/do_files/06_03_plot_gross_income.do +++ /dev/null @@ -1,880 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Gross income -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 -* COUNTRY: UK - -* NOTES: -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean values over time - Benefit unit amounts -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_bu_yr, d - - replace valid_y_gross_bu_yr = . if /// - valid_y_gross_bu_yr < r(p1) | valid_y_gross_bu_yr > r(p99) - -} - -collapse (mean) valid_y_gross_bu_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_y_gross_yr_bu using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_gross_yr_bu, d - - replace sim_y_gross_yr_bu = . if sim_y_gross_yr_bu < r(p1) | /// - sim_y_gross_yr_bu > r(p99) - -} - -collapse (mean) sim_y_gross_yr_bu, by(run year) -collapse (mean) sim_y_gross_yr_bu /// - (sd) sim_y_gross_yr_bu_sd = sim_y_gross_yr_bu /// - , by(year) - -foreach varname in sim_y_gross_yr_bu { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - - } - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -* Plot figure -twoway /// - (rarea sim_y_gross_yr_bu_high sim_y_gross_yr_bu_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// - (line valid_y_gross_bu_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Gross Income") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents average benefit unit gross income without benefits through time. Statistics computed by averaging benefit" "unit-level gross income over all persons ages 18-65. Amounts in 2015 prices. Top and bottom percentiles trimmed.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/gross_income/validation_${country}_gross_income_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2 : Mean values over time - Individual level amounts -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_ind_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_ind_yr, d - - replace valid_y_gross_ind_yr = . if /// - valid_y_gross_ind_yr < r(p1) | /// - valid_y_gross_ind_yr > r(p99) - -} - -collapse (mean) valid_y_gross_ind_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_y_gross_yr using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - - sum sim_y_gross_yr, d - - replace sim_y_gross_yr = . if /// - sim_y_gross_yr < r(p1) | sim_y_gross_yr > r(p99) - - } - -collapse (mean) sim_y_gross_yr, by(run year) -collapse (mean) sim_y_gross_yr /// - (sd) sim_y_gross_yr_sd = sim_y_gross_yr /// - , by(year) - -foreach varname in sim_y_gross_yr { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - - } - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_y_gross_yr_high sim_y_gross_yr_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// -(line valid_y_gross_ind_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Individual Gross Income") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents average individual gross income without benefits through time. Statistics computed by averaging" "person-level gross income over all males ages 18-65. Values in 2015 prices. Top and bottom percentiles trimmed.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -* Male -* Prepare validation data -use year dwt valid_y_gross_ind_yr dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 1 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_ind_yr, d - - replace valid_y_gross_ind_yr = . if /// - valid_y_gross_ind_yr < r(p1) | /// - valid_y_gross_ind_yr > r(p99) - -} - -collapse (mean) valid_y_gross_ind_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_y_gross_yr dgn using "$dir_data/simulated_data.dta", clear - -keep if dgn == "Male" - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_gross_yr, d - - replace sim_y_gross_yr = . if /// - sim_y_gross_yr < r(p1) | sim_y_gross_yr > r(p99) - -} - -collapse (mean) sim_y_gross_yr, by(run year) -collapse (mean) sim_y_gross_yr /// - (sd) sim_y_gross_yr_sd = sim_y_gross_yr /// - , by(year) - -foreach varname in sim_y_gross_yr { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_y_gross_yr_high sim_y_gross_yr_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// -(line valid_y_gross_ind_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Individual Gross Income") /// - subtitle("Ages 18-65, males") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents average individual gross income without benefits through time. Statistics computed by averaging" "person-level gross income over all females ages 18-65. Values in 2015 prices. Top and bottom percentiles trimmed.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_ts_${min_age}_${max_age}_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -* Female -* Prepare validation data -use year dwt valid_y_gross_ind_yr dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 0 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_ind_yr, d - - replace valid_y_gross_ind_yr = . if /// - valid_y_gross_ind_yr < r(p1) | /// - valid_y_gross_ind_yr > r(p99) - -} - -collapse (mean) valid_y_gross_ind_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_y_gross_yr dgn using "$dir_data/simulated_data.dta", clear - -keep if dgn == "Female" - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_gross_yr, d - - replace sim_y_gross_yr = . if /// - sim_y_gross_yr < r(p1) | sim_y_gross_yr > r(p99) - -} - -collapse (mean) sim_y_gross_yr, by(run year) -collapse (mean) sim_y_gross_yr /// - (sd) sim_y_gross_yr_sd = sim_y_gross_yr /// - , by(year) - -foreach varname in sim_y_gross_yr { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_y_gross_yr_high sim_y_gross_yr_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// -(line valid_y_gross_ind_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Individual Gross Income") /// - subtitle("Ages 18-65, females") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents average individual gross income without benefits through time. Statistics computed by averaging" "person-level gross income over all persons ages 18-65. Values in 2015 prices. Top and bottom percentiles trimmed.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_ts_${min_age}_${max_age}_female.jpg", /// - replace width(2400) height(1350) quality(100) - - -/******************************************************************************* -* 2 : Histograms -*******************************************************************************/ - -/******************************************************************************* -* 2.1 : Histograms - Benefit unit gross income by year, and by category of -weekly labour supply -*******************************************************************************/ - -* Prepare validation data -use year dwt valid_y_gross_bu_yr laboursupplyweekly_hu using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_bu_yr, d - - replace valid_y_gross_bu_yr = . if /// - valid_y_gross_bu_yr < r(p1) | valid_y_gross_bu_yr > r(p99) - -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_gross_bu_yr if year == `year', /// - width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_gross_bu_yr if /// - year == `year' & labour == "`ls'", width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } - -} - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_y_gross_yr_bu laboursupplyweekly using /// - "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_gross_yr_bu, d - - replace sim_y_gross_yr_bu = . if /// - sim_y_gross_yr_bu < r(p1) | sim_y_gross_yr_bu > r(p99) - -} - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -* Plot sub-figures -qui sum year -local min_year = 2011 -local max_year = r(max) - -//local year = 2010 - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_gross_yr if year == `year', width(750) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/3 - - - twoway (hist sim_y_gross_yr_bu if year == `year', width(750) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_bu_yr if year == `year', width(750) /// - color(red%30) legend(label(2 "UKHLS"))) , /// - title("ALL hours") /// - name(gross_inc_`year'_all, replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_gross_yr if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/3 - - * Plot by weekly hours work - twoway (hist sim_y_gross_yr_bu if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_bu_yr if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(750) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - title("`ls' hours") /// - name(gross_inc_`year'_`ls', replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -qui sum year -local min_year = 2011 -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg gross_inc_`year'_all /// - gross_inc_`year'_ZERO /// - gross_inc_`year'_TEN /// - gross_inc_`year'_TWENTY /// - gross_inc_`year'_THIRTY /// - gross_inc_`year'_FORTY, /// - title("Gross Income by Weekly Hours of Work") /// - subtitle("`year'") /// - legendfrom(gross_inc_`year'_ZERO) rows(2) /// - graphregion(color(white)) /// - note("Notes: Amounts in GBP per year, 2015 prices. Sample includes individuals age 18-65. Individual level data plot of benefit unit amount. Weekly" "hours worked categories: ZERO = [0,5], TEN = [6,15], TWENTY = [16,25], THIRTY = [26,34], FORTY = 36+. Top and bottom percentiles trimmed.", /// - size(vsmall)) - - graph export "$dir_output_files/income/gross_income/validation_${country}_gross_income_dist_`year'.png", /// - replace width(2400) height(1350) - -} - -graph drop _all - - -/******************************************************************************* -* 2.2 : Histograms - Individual gross income by year, and by category of weekly -labour supply -*******************************************************************************/ - -* Males - -* Prepare validation data -use year dwt valid_y_gross_ind_yr laboursupplyweekly_hu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 1 -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_ind_yr, d - - replace valid_y_gross_ind_yr = . if /// - valid_y_gross_ind_yr < r(p1) | /// - valid_y_gross_ind_yr > r(p99) - -} - - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_gross_ind_yr if year == `year' , /// - width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_gross_ind_yr if /// - year == `year' & labour == "`ls'", width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_y_gross_yr laboursupplyweekly_orig dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Male" -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_gross_yr, d - - replace sim_y_gross_yr = . if sim_y_gross_yr < r(p1) | /// - sim_y_gross_yr > r(p99) - - } - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -* Plot sub-figures -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_gross_yr if year == `year', width(750) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/3 - - * Plot all hours - twoway (hist sim_y_gross_yr if year == `year', width(750) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_ind_yr if year == `year', width(750) /// - color(red%30) legend(label(2 "UKHLS"))) , /// - title("ALL hours") /// - name(ind_gross_inc_`year'_all, replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_gross_yr if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/3 - - * Plot by weekly hours work - twoway (hist sim_y_gross_yr if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_ind_yr if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(750) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - title("`ls' hours") /// - name(ind_gross_inc_`year'_`ls', replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -qui sum year -local min_year = 2011 -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg ind_gross_inc_`year'_all ind_gross_inc_`year'_ZERO /// - ind_gross_inc_`year'_TEN /// - ind_gross_inc_`year'_TWENTY /// - ind_gross_inc_`year'_THIRTY /// - ind_gross_inc_`year'_FORTY, /// - title("Individual Gross Income by Weekly Hours of Work") /// - subtitle("`year', males") /// - legendfrom(ind_gross_inc_`year'_ZERO) /// - rows(2) /// - graphregion(color(white)) /// - note("Notes: Amounts in GBP per year, 2015 prices. Sample includes males age 18-65. Weekly hours worked categories:"" ZERO = [0,5], TEN = [6,15], TWENTY = [16,25], THIRTY = [26,34], FORTY = 36+. Top and bottom percentiles trimmed.", /// - size(vsmall)) - - graph export "$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_dist_`year'_male.png", /// - replace width(2400) height(1350) - -} - -graph drop _all - - -* Females - -* Prepare validation data -use year dwt valid_y_gross_ind_yr laboursupplyweekly_hu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 0 -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_ind_yr, d - - replace valid_y_gross_ind_yr = . if /// - valid_y_gross_ind_yr < r(p1) | /// - valid_y_gross_ind_yr > r(p99) - -} - - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_gross_ind_yr if year == `year' , /// - width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_gross_ind_yr if /// - year == `year' & labour == "`ls'", width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_y_gross_yr laboursupplyweekly dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Female" -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_gross_yr, d - - replace sim_y_gross_yr = . if sim_y_gross_yr < r(p1) | /// - sim_y_gross_yr > r(p99) - -} - -keep if run == 1 - - -append using "$dir_data/temp_valid_stats.dta" - -* Plot sub-figures -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_gross_yr if year == `year', width(750) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/3 - - * Plot all hours - twoway (hist sim_y_gross_yr if year == `year', width(750) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_ind_yr if year == `year', width(750) /// - color(red%30) legend(label(2 "UKHLS"))) , /// - title("ALL hours") /// - name(ind_gross_inc_`year'_all, replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_gross_yr if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/3 - - * Plot by weekly hours work - twoway (hist sim_y_gross_yr if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_ind_yr if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(750) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - title("`ls' hours") /// - name(ind_gross_inc_`year'_`ls', replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -qui sum year -local min_year = 2011 -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg ind_gross_inc_`year'_all ind_gross_inc_`year'_ZERO /// - ind_gross_inc_`year'_TEN /// - ind_gross_inc_`year'_TWENTY /// - ind_gross_inc_`year'_THIRTY /// - ind_gross_inc_`year'_FORTY, /// - title("Individual Gross Income by Weekly Hours of Work") /// - subtitle("`year', females") /// - legendfrom(ind_gross_inc_`year'_ZERO) rows(2) /// - graphregion(color(white)) /// - note("Notes: Values in GBP per year, 2015 prices. Sample includes females ages 18-65. Weekly hours worked categories:" "ZERO = [0,5], TEN = [6,15], TWENTY = [16,25], THIRTY = [26,34], FORTY = 36+. Top and bottom percentiles trimmed.", /// - size(vsmall)) - - graph export "$dir_output_files/income/gross_income/validation_${country}_ind_gross_income_dist_`year'_female.png", /// - replace width(2400) height(1350) - -} - -graph drop _all - - - -/* - -* Investigation into who the people are with high working hours and low gross -* income -/* -Note plot ben unit observations using individual level data. - -Components of gross income. - -Gross personal income components -• PY010G - Gross employee cash or near cash employee income -• PY050G - Gross cash benefits or losses from self-employment - (including royalties) -• PY080G - Pensions received from individual private plans (other than those - covered under ESSPROS) - -Plus gross income components at household level -• HY040G - Income from rental of a property or land -• HY080G - Regular inter-household cash transfers received -• HY090G - Interests, dividends, profit from capital investments in - unincorporated business -• HY110G - Income received by people aged under 16 -*/ - -* Explore 2018 FIFTY hours -use "$dir_data/ukhls_validation_full_sample.dta", clear - -keep if year == 2018 & laboursupplyweekly_hu == "FIFTY" - -order idperson idbenefit lhw valid_y_gross_ind_yr /// - y_gross_labour_person valid_wage_hour /// - py010g* py050g py080g /// - hy080g_pc hy110g_pc hy040g_pc hy090g_pc missing* - -fre missing_py010g missing_py050g missing_py080g missing_hy080g /// - missing_hy110g missing_hy040g missing_hy090g missing_lhw if /// - valid_y_gross_ind_yr == 0 // none missing seems to be in the data - - diff --git a/validation/02_simulated_output_validation/do_files/06_04_plot_gross_labour_income.do b/validation/02_simulated_output_validation/do_files/06_04_plot_gross_labour_income.do deleted file mode 100644 index 1fdb4fb9d..000000000 --- a/validation/02_simulated_output_validation/do_files/06_04_plot_gross_labour_income.do +++ /dev/null @@ -1,764 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Gross labour income -* AUTHORS: Ashley Burdett -* LAST UPDATE: 06/2025 (AB) -* COUNTRY: UK - -* NOTES: Plotted using individual level data -* => multiple observations per ben unit. -******************************************************************************** - -******************************************************************************** -* 1 : Mean labour income -******************************************************************************** - -******************************************************************************** -* 1.1: Mean labour income - benefit unit -******************************************************************************** - -* Prepare validation data -use year dwt les_c4 valid_y_gross_labour_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Keep only employed individuals -keep if les_c4 == 1 - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_labour_bu_yr, d - - replace valid_y_gross_labour_bu_yr = . if /// - valid_y_gross_labour_bu_yr < r(p1) | valid_y_gross_labour_bu_yr > r(p99) - -} - -collapse (mean) valid_y_gross_labour_bu_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 sim_yplgrs_dv_lvl_bu using /// - "$dir_data/simulated_data.dta", clear - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_yplgrs_dv_lvl_bu, d - - replace sim_yplgrs_dv_lvl_bu = . if /// - sim_yplgrs_dv_lvl_bu < r(p1) | sim_yplgrs_dv_lvl_bu > r(p99) - -} - -collapse (mean) sim_yplgrs_dv_lvl_bu, by(run year) -collapse (mean) sim_yplgrs_dv_lvl_bu /// - (sd) sim_yplgrs_dv_lvl_bu_sd = sim_yplgrs_dv_lvl_bu /// - , by(year) - -foreach varname in sim_yplgrs_dv_lvl_bu { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_yplgrs_dv_lvl_bu_high sim_yplgrs_dv_lvl_bu_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_gross_labour_bu_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Gross Labour Income") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - graphregion(color(white)) /// - legend(size(small)) /// - note("Note: Amounts at the benefit unit level, individual data plotted. Statistics calculated on the sample of employed individuals" "ages 18-65. Amounts in 2015 prices. Top and bottom percentiles trimmed.", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_ts_${min_age}_${max_age}.jpg", /// - replace width(2400) height(1350) quality(100) - -/* -* Males - -* Prepare validation data -use year dwt les_c4 valid_y_gross_labour_yr_bu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 1 - -* Keep only employed individuals -keep if les_c4 == 1 - - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_gross_labour_yr_bu, d - replace valid_y_gross_labour_yr_bu = . if /// - valid_y_gross_labour_yr_bu < r(p1) | valid_y_gross_labour_yr_bu > r(p99) -} - -collapse (mean) valid_y_gross_labour_yr_bu [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 sim_yplgrs_dv_lvl_bu dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Male" - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" - - -* Trim outliers -if "$trim_outliers" == "true" { - sum sim_yplgrs_dv_lvl_bu, d - replace sim_yplgrs_dv_lvl_bu = . if /// - sim_yplgrs_dv_lvl_bu < r(p1) | sim_yplgrs_dv_lvl_bu > r(p99) -} - -collapse (mean) sim_yplgrs_dv_lvl_bu, by(run year) -collapse (mean) sim_yplgrs_dv_lvl_bu /// - (sd) sim_yplgrs_dv_lvl_bu_sd = sim_yplgrs_dv_lvl_bu /// - , by(year) - -foreach varname in sim_yplgrs_dv_lvl_bu { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_yplgrs_dv_lvl_bu_high sim_yplgrs_dv_lvl_bu_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_gross_labour_yr_bu year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Gross labour income") subtitle("Males") /// - xtitle("Year") /// - ytitle("€ per year") /// - ylabel(,labsize(small)) xlabel(,labsize(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on the sample of employed males ages 18-65. Yearly amounts. 2019 X-sectional data used in underlying" "estimation. Amounts in 2015 prices.", size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_ts_${min_age}_${max_age}_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -* Females - -* Prepare validation data -use year dwt les_c4 valid_y_gross_labour_yr_bu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 0 - -* Keep only employed individuals -keep if les_c4 == 1 - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_gross_labour_yr_bu, d - replace valid_y_gross_labour_yr_bu = . if /// - valid_y_gross_labour_yr_bu < r(p1) | valid_y_gross_labour_yr_bu > r(p99) -} - -collapse (mean) valid_y_gross_labour_yr_bu [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 sim_yplgrs_dv_lvl_bu dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Female" - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" - - -* Trim outliers -if "$trim_outliers" == "true" { - sum sim_yplgrs_dv_lvl_bu, d - replace sim_yplgrs_dv_lvl_bu = . if /// - sim_yplgrs_dv_lvl_bu < r(p1) | sim_yplgrs_dv_lvl_bu > r(p99) -} - -collapse (mean) sim_yplgrs_dv_lvl_bu, by(run year) -collapse (mean) sim_yplgrs_dv_lvl_bu /// - (sd) sim_yplgrs_dv_lvl_bu_sd = sim_yplgrs_dv_lvl_bu /// - , by(year) - -foreach varname in sim_yplgrs_dv_lvl_bu { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway /// -(rarea sim_yplgrs_dv_lvl_bu_high sim_yplgrs_dv_lvl_bu_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_gross_labour_yr_bu year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Gross labour income") subtitle("Females") /// - xtitle("Year") /// - ytitle("€ per year") /// - ylabel(,labsize(small)) xlabel(,labsize(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on the sample of employed females ages 18-65. Yearly amounts. 2019 X-sectional data used in underlying" "estimation. Amounts in 2015 prices.", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_ts_${min_age}_${max_age}_female.jpg", /// - replace width(2400) height(1350) quality(100) -*/ - - -******************************************************************************** -* 2 : Histograms -******************************************************************************** - -******************************************************************************** -* 2.1 : Histograms - working age -******************************************************************************** - -* Prepare validation data -use year dwt les_c4 valid_y_gross_labour_bu_yr /// - laboursupplyweekly_hu using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Keep only employed individuals -keep if les_c4 == 1 -drop les_c4 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_labour_bu_yr, d - - replace valid_y_gross_labour_bu_yr = . if /// - valid_y_gross_labour_bu_yr < r(p1) | valid_y_gross_labour_bu_yr > r(p99) - -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_gross_labour_bu_yr if year == `year', /// - width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_gross_labour_bu_yr if /// - year == `year' & labour == "`ls'", width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 sim_yplgrs_dv_lvl_bu laboursupplyweekly using /// - "$dir_data/simulated_data.dta", clear - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" -drop les_c4 - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_yplgrs_dv_lvl_bu, d - - replace sim_yplgrs_dv_lvl_bu = . if /// - sim_yplgrs_dv_lvl_bu < r(p1) | sim_yplgrs_dv_lvl_bu > r(p99) - - } - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -* Plot sub-figures -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_yplgrs_dv_lvl_bu if year == `year', width(750) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - - * Plot all hours - twoway (hist sim_yplgrs_dv_lvl_bu if year == `year' , width(750) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_labour_bu_yr if year == `year' , width(750) /// - color(red%30) legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") /// - name(gross_labour_inc_`year'_all, replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - graphregion(color(white)) - - - drop d_sim v1 max_d_sim max_value - - * Plot by weekly hours work - foreach ls in $ls_cat_labour { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_yplgrs_dv_lvl_bu if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot by weekly hours work - twoway (hist sim_yplgrs_dv_lvl_bu if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(750) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_labour_bu_yr if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(750) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") /// - name(gross_labour_inc_`year'_`ls', replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -forvalues year = `min_year'/`max_year' { - - grc1leg gross_labour_inc_`year'_all /// - gross_labour_inc_`year'_TEN /// - gross_labour_inc_`year'_TWENTY /// - gross_labour_inc_`year'_THIRTY /// - gross_labour_inc_`year'_FORTY, /// - title("Gross Labour Income by Weekly Hours of Work") /// - subtitle("`year'") /// - legendfrom(gross_labour_inc_`year'_TEN) rows(2) /// - graphregion(color(white)) /// - note("Notes: Amount at the benefit unit level, individual data plotted. Amounts in GBP per year, 2015 prices. Employed 18-65 years olds included in the sample." "Top and bottom percentiles trimmed. Weekly hours worked categories:" "ZERO = [0,5], TEN = [6,15], TWENTY = [16,25], THIRTY = [26,34], FORTY = 36+.", /// - size(vsmall)) - - graph export "$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_dist_`year'_both.png", /// - replace width(2400) height(1350) - -} - -graph drop _all - - -/* -* Males - -* Prepare validation data -use year dwt les_c4 valid_y_gross_labour_yr_bu /// - laboursupplyweekly_hu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 1 -drop dgn - -* Keep only employed individuals -keep if les_c4 == 1 -drop les_c4 - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_gross_labour_yr_bu, d - replace valid_y_gross_labour_yr_bu = . if /// - valid_y_gross_labour_yr_bu < r(p1) | valid_y_gross_labour_yr_bu > r(p99) -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_gross_labour_yr_bu if year == `year', /// - width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_gross_labour_yr_bu if /// - year == `year' & labour == "`ls'", width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 sim_yplgrs_dv_lvl_bu laboursupplyweekly dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Male" -drop dgn - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" -drop les_c4 - - -* Trim outliers -if "$trim_outliers" == "true" { - sum sim_yplgrs_dv_lvl_bu, d - replace sim_yplgrs_dv_lvl_bu = . if /// - sim_yplgrs_dv_lvl_bu < r(p1) | sim_yplgrs_dv_lvl_bu > r(p99) -} - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - - -* Plot sub-figures -qui sum year -local min_year = r(min) // Calculate the minimum value of the 'year' variable -local max_year = r(max) // Calculate the maximum value of the 'year' variable - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_yplgrs_dv_lvl_bu if year == `year', width(750) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - - * Plot all hours - twoway (hist sim_yplgrs_dv_lvl_bu if year == `year' , width(750) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_labour_yr_bu if year == `year' , width(750) /// - color(red%30) legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") name(gross_labour_inc_`year'_all, replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - graphregion(color(white)) - - - drop d_sim v1 max_d_sim max_value - - * Plot by weekly hours work - foreach ls in $ls_cat_labour { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_yplgrs_dv_lvl_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(750) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot by weekly hours work - twoway (hist sim_yplgrs_dv_lvl_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(750) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_labour_yr_bu if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(750) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") name(gross_labour_inc_`year'_`ls', replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -forvalues year = `min_year'/`max_year' { - - grc1leg gross_labour_inc_`year'_all /// - gross_labour_inc_`year'_TWENTY gross_labour_inc_`year'_FORTY /// - gross_labour_inc_`year'_FIFTY, /// - title("Gross labour income") /// - subtitle("`year', Males") /// - legendfrom(gross_labour_inc_`year'_all) rows(2) /// - graphregion(color(white)) /// - note("Notes: Series represents average benefit unit gross labour income through time. Statistics computed by averaging benefit unit" "level gross income for all males ages 18-65. Values in € per year, 2015 prices. Weekly hours worked categories:" "ZERO = 0, TWENTY = [1,39], FORTY = 40, FIFTY = 41+", /// - size(vsmall)) - - graph export "$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_dist_`year'_male.png", /// - replace width(2400) height(1350) - -} - -graph drop _all - - -* Females - -* Prepare validation data -use year dwt les_c4 valid_y_gross_labour_yr_bu /// - laboursupplyweekly_hu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 0 -drop dgn - -* Keep only employed individuals -keep if les_c4 == 1 -drop les_c4 - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_gross_labour_yr_bu, d - replace valid_y_gross_labour_yr_bu = . if /// - valid_y_gross_labour_yr_bu < r(p1) | valid_y_gross_labour_yr_bu > r(p99) -} - - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_gross_labour_yr_bu if year == `year', /// - width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat_labour { - - twoway__histogram_gen valid_y_gross_labour_yr_bu if /// - year == `year' & labour == "`ls'", width(750) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 sim_yplgrs_dv_lvl_bu laboursupplyweekly dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Female" -drop dgn - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" -drop les_c4 - - -* Trim outliers -if "$trim_outliers" == "true" { - sum sim_yplgrs_dv_lvl_bu, d - replace sim_yplgrs_dv_lvl_bu = . if /// - sim_yplgrs_dv_lvl_bu < r(p1) | sim_yplgrs_dv_lvl_bu > r(p99) -} - -keep if run == 1 - - -append using "$dir_data/temp_valid_stats.dta" - - -* Plot sub-figures -qui sum year -local min_year = r(min) // Calculate the minimum value of the 'year' variable -local max_year = r(max) // Calculate the maximum value of the 'year' variable - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_yplgrs_dv_lvl_bu if year == `year', width(750) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - - * Plot all hours - twoway (hist sim_yplgrs_dv_lvl_bu if year == `year' , width(750) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_labour_yr_bu if year == `year' , width(750) /// - color(red%30) legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") name(gross_labour_inc_`year'_all, replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - graphregion(color(white)) - - - drop d_sim v1 max_d_sim max_value - - * Plot by weekly hours work - foreach ls in $ls_cat_labour { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_yplgrs_dv_lvl_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(750) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim & max_d_valid_`year'_`ls' - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot by weekly hours work - twoway (hist sim_yplgrs_dv_lvl_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(750) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_labour_yr_bu if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(750) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") name(gross_labour_inc_`year'_`ls', replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y',labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -forvalues year = `min_year'/`max_year' { - - grc1leg gross_labour_inc_`year'_all gross_labour_inc_`year'_TWENTY /// - gross_labour_inc_`year'_FORTY /// - gross_labour_inc_`year'_FIFTY, /// - title("Gross labour income") /// - subtitle("`year', Females") /// - legendfrom(gross_labour_inc_`year'_all) rows(2) /// - graphregion(color(white)) /// - note("Notes: Series represents average benefit unit gross labour income through time. Statistics computed by averaging benefit-unit level" "gross income for all females ages 18-65. Values in € per year, 2015 prices. Weekly hours worked categories:" "ZERO = 0, TWENTY = [1,39], FORTY = 40, FIFTY = 41+.", /// - size(vsmall)) - - graph export "$dir_output_files/income/gross_labour_income/validation_${country}_gross_labour_income_dist_`year'_female.png", /// - replace width(2400) height(1350) - - -} - -graph drop _all - diff --git a/validation/02_simulated_output_validation/do_files/06_05_plot_capital_income.do b/validation/02_simulated_output_validation/do_files/06_05_plot_capital_income.do deleted file mode 100644 index dad56bf01..000000000 --- a/validation/02_simulated_output_validation/do_files/06_05_plot_capital_income.do +++ /dev/null @@ -1,325 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Capital income -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 (AB) -* COUNTRY: UK - -* NOTES: This do file plots simulated and UKHLS capital income, -* per benefit unit - -******************************************************************************** - -******************************************************************************** -* 1 : Time series -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean through time, adult population, bu -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_capital_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - - -* Trim outliers -/* -if "$trim_outliers" == "true" { - - sum valid_y_gross_capital_bu_yr, d - - replace valid_y_gross_capital_bu_yr = . if /// - valid_y_gross_capital_bu_yr < r(p1) | /// - valid_y_gross_capital_bu_yr > r(p99) - -} -*/ - -collapse (mean) valid_y_gross_capital_bu_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypncp_lvl_bu using "$dir_data/simulated_data.dta", clear - - -* Trim outliers -/* -if "$trim_outliers" == "true" { - - sum sim_ypncp_lvl_bu, d - - replace sim_ypncp_lvl_bu = . if /// - sim_ypncp_lvl_bu < r(p1) | sim_ypncp_lvl_bu > r(p99) - -} -*/ - -collapse (mean) sim_ypncp_lvl_bu, by(run year) - -collapse (mean) sim_ypncp_lvl_bu /// - (sd) sim_ypncp_lvl_bu_sd = sim_ypncp_lvl_bu /// - , by(year) - -foreach varname in sim_ypncp_lvl_bu { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_ypncp_lvl_bu_high sim_ypncp_lvl_bu_low year, sort /// - color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_gross_capital_bu_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Capital income") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents average benefit unit capital income. Statistics computed by averaging benefit individual level gross" "capital income over all persons ages 18-65. Amounts in 2015 prices. Top and bottom percentiles trimmed. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/income/capital_income/validation_${country}_capital_income_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2 : Share with no capital income -******************************************************************************** - -* Share with no capital income -* Prepare validation data -use year dwt valid_y_gross_capital_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_capital_bu_yr, d - - replace valid_y_gross_capital_bu_yr = . if /// - valid_y_gross_capital_bu_yr < r(p1) | /// - valid_y_gross_capital_bu_yr > r(p99) - -} - -gen valid_no_capital = (valid_y_gross_capital_bu_yr == 0) - -collapse (mean) valid_no_capital [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypncp_lvl_bu using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_ypncp_lvl_bu, d - - replace sim_ypncp_lvl_bu = . if /// - sim_ypncp_lvl_bu < r(p1) | sim_ypncp_lvl_bu > r(p99) - -} - -gen sim_no_capital = (sim_ypncp_lvl_bu == 0) - -collapse (mean) sim_no_capital, by(run year) -collapse (mean) sim_no_capital /// - (sd) sim_no_capital_sd = sim_no_capital /// - , by(year) - -foreach varname in sim_no_capital { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_no_capital_high sim_no_capital_low year, sort /// - color(green%20) legend(label(1 "Simulated"))) /// -(line valid_no_capital year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("No Capital Income") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents share of individual that report not receiveing any capital income in their benefit unit, annual. Top and bottom" "percentiles trimmed.", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/income/capital_income/validation_${country}_no_capital_income_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 2 : Histograms -******************************************************************************** - -******************************************************************************** -* 2.1 : Ages 18-65, by year -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_capital_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_capital_bu_yr, d - - replace valid_y_gross_capital_bu_yr = . if /// - valid_y_gross_capital_bu_yr < r(p1) | /// - valid_y_gross_capital_bu_yr > r(p99) - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypncp_lvl_bu using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_ypncp_lvl_bu, d - - replace sim_ypncp_lvl_bu = . if /// - sim_ypncp_lvl_bu < r(p1) | sim_ypncp_lvl_bu > r(p99) - -} - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway (hist sim_ypncp_lvl_bu if year == `year' & /// - sim_ypncp_lvl_bu < 100, width(1) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_capital_bu_yr if year == `year' & /// - valid_y_gross_capital_bu_yr < 100, /// - width(1) color(red%30) legend(label(2 "UKHLS"))) , /// - title("Capital Income") /// - subtitle("`year'") /// - name(capital_inc_`year'_all, replace) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Benefit unit capital income reported, individual observations plotted. Sample includes individuals age 18-65. Amounts in GBP" "per year, 2015 prices. X axis range limited to 100. Top and bottom percentiles trimmed.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/capital_income/validation_${country}_capital_income_dist_`year'.png", /// - replace width(2560) height(1440) - -} - - -******************************************************************************** -* 2.2 : Ages 18-65, positive amounts only -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_capital_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - - -* Trim outliers -/* -if "$trim_outliers" == "true" { - - sum valid_y_gross_capital_bu_yr, d - - replace valid_y_gross_capital_bu_yr = . if /// - valid_y_gross_capital_bu_yr < r(p1) | /// - valid_y_gross_capital_bu_yr > r(p99) - -} -*/ - -drop if valid_y_gross_capital_bu_yr == 0 - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypncp_lvl_bu using "$dir_data/simulated_data.dta", clear - -* Trim outliers -/* -if "$trim_outliers" == "true" { - - sum sim_ypncp_lvl_bu, d - - replace sim_ypncp_lvl_bu = . if /// - sim_ypncp_lvl_bu < r(p1) | sim_ypncp_lvl_bu > r(p99) - -} -*/ - -drop if sim_ypncp_lvl_bu == 0 - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway (hist sim_ypncp_lvl_bu if year == `year' & /// - sim_ypncp_lvl_bu < 4000, /// - width(25) color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_capital_bu_yr if year == `year' & /// - valid_y_gross_capital_bu_yr < 4000, width(25) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - title("Capital Income") /// - subtitle("Positive amounts, `year'") /// - name(capital_inc_`year'_all, replace) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Benefit unit capital income reported, individual observations plotted. Sample includes individuals age 18-65. Amounts in GBP" "per year, 2015 prices. X axis range limited to 4000. ", /// - size(vsmall)) - - graph export /// -"$dir_output_files/income/capital_income/validation_${country}_positive_capital_income_dist_`year'.png", /// - replace width(2560) height(1440) - -} - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_06_plot_pension_income.do b/validation/02_simulated_output_validation/do_files/06_06_plot_pension_income.do deleted file mode 100644 index 8ac02c86d..000000000 --- a/validation/02_simulated_output_validation/do_files/06_06_plot_pension_income.do +++ /dev/null @@ -1,344 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Pension income -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 (AB) -* COUNTRY: UK - -* NOTES: This do file plots simulated and UKHLS pension income, -* per benefit unit - -******************************************************************************** - -******************************************************************************** -* 1: Time series -******************************************************************************** - -******************************************************************************** -* 1.1: Mean through time, bu -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_pension_bu_yr dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Sample selection -drop if dag < 65 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_pension_bu_yr, d - - replace valid_y_gross_pension_bu_yr = . if /// - valid_y_gross_pension_bu_yr < r(p1) | /// - valid_y_gross_pension_bu_yr > r(p99) - -} - -collapse (mean) valid_y_gross_pension_bu_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypnoab_lvl_bu dag using "$dir_data/simulated_data.dta", clear - -* Sample selection -drop if dag < 65 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_ypnoab_lvl_bu, d - - replace sim_ypnoab_lvl_bu = . if /// - sim_ypnoab_lvl_bu < r(p1) | sim_ypnoab_lvl_bu > r(p99) - -} - - -collapse (mean) sim_ypnoab_lvl_bu, by(run year) -collapse (mean) sim_ypnoab_lvl_bu /// - (sd) sim_ypnoab_lvl_bu_sd = sim_ypnoab_lvl_bu /// - , by(year) - -foreach varname in sim_ypnoab_lvl_bu { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_ypnoab_lvl_bu_high sim_ypnoab_lvl_bu_low year, sort /// - color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_gross_pension_bu_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Private Pension Income") /// - subtitle("Ages 65+") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents average benefit unit private pension income. Statistics computed by averaging benefit individual level gross" "private pension income over all persons ages 65+. Amounts in 2015 prices. Top and bottom percentiles trimmed. ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/income/pension_income/validation_${country}_pension_income_ts_${max_age}plus_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2 : Share with no pension income -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_pension_bu_yr dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Sample selection -drop if dag < 65 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_pension_bu_yr, d - - replace valid_y_gross_pension_bu_yr = . if /// - valid_y_gross_pension_bu_yr < r(p1) | /// - valid_y_gross_pension_bu_yr > r(p99) - -} - -gen valid_no_pension = (valid_y_gross_pension_bu_yr == 0) - -collapse (mean) valid_no_pension [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypnoab_lvl_bu dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Sample selection -drop if dag < 65 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_ypnoab_lvl_bu, d - - replace sim_ypnoab_lvl_bu = . if /// - sim_ypnoab_lvl_bu < r(p1) | sim_ypnoab_lvl_bu > r(p99) - -} - -gen sim_no_pension = (sim_ypnoab_lvl_bu == 0) - -collapse (mean) sim_no_pension, by(run year) -collapse (mean) sim_no_pension /// - (sd) sim_no_pension_sd = sim_no_pension /// - , by(year) - -foreach varname in sim_no_pension { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_no_pension_high sim_no_pension_low year, sort /// - color(green%20) legend(label(1 "Simulated"))) /// -(line valid_no_pension year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("No Private Pension Income") /// - subtitle("Ages 65+") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Series represents the share of individual who report not receiving any gross private pension income in their benefit unit, annual." "Top and bottom percentiles trimmed.", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/income/pension_income/validation_${country}_no_pension_income_ts_${max_age}plus_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 2 : Histograms -******************************************************************************** - -******************************************************************************** -* 2.1 : 65+, by year -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_pension_bu_yr dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Sample selection -drop if dag < 65 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_gross_pension_bu_yr, d - - replace valid_y_gross_pension_bu_yr = . if /// - valid_y_gross_pension_bu_yr < r(p1) | /// - valid_y_gross_pension_bu_yr > r(p99) - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypnoab_lvl_bu dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Sample selection -drop if dag < 65 - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_ypnoab_lvl_bu, d - - replace sim_ypnoab_lvl_bu = . if /// - sim_ypnoab_lvl_bu < r(p1) | sim_ypnoab_lvl_bu > r(p99) - -} - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway (hist sim_ypnoab_lvl_bu if year == `year' & /// - sim_ypnoab_lvl_bu < 100, width(1) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_gross_pension_bu_yr if year == `year' & /// - valid_y_gross_pension_bu_yr < 100, /// - width(1) color(red%30) legend(label(2 "UKHLS"))) , /// - title("Private Pension Income") /// - subtitle("`year'") /// - name(capital_inc_`year'_all, replace) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Benefit unit gross private pension income reported, individual observations plotted. Sample includes individuals age 65+." "Amounts in GBP per year, 2015 prices. X axis range limited to 100. Top and bottom percentiles trimmed.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/pension_income/validation_${country}_pension_income_dist_`year'.png", /// - replace width(2560) height(1440) - -} - - -******************************************************************************** -* 2.2 : Ages 65+, positive amounts only -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_gross_pension_bu_yr dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -* Select sample -drop if dag < 65 - - -* Trim outliers -/* -if "$trim_outliers" == "true" { - - sum valid_y_gross_capital_bu_yr, d - - replace valid_y_gross_capital_bu_yr = . if /// - valid_y_gross_capital_bu_yr < r(p1) | /// - valid_y_gross_capital_bu_yr > r(p99) - -} -*/ - -drop if valid_y_gross_pension_bu_yr == 0 - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_ypnoab_lvl_bu dag using /// - "$dir_data/simulated_data_full.dta", clear - -* Select sample -drop if dag < 65 - -* Trim outliers -/* -if "$trim_outliers" == "true" { - - sum sim_ypncp_lvl_bu, d - - replace sim_ypncp_lvl_bu = . if /// - sim_ypncp_lvl_bu < r(p1) | sim_ypncp_lvl_bu > r(p99) - -} -*/ - -drop if sim_ypnoab_lvl_bu == 0 - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway (hist sim_ypnoab_lvl_bu if year == `year' & /// - sim_ypnoab_lvl_bu < 35000, /// - width(200) color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_gross_pension_bu_yr if year == `year' & /// - valid_y_gross_pension_bu_yr < 35000, width(200) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - title("Private Pension Income") /// - subtitle("Positive amounts, `year'") /// - name(capital_inc_`year'_all, replace) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Benefit unit gross private pension income reported, individual observations plotted. Sample includes individuals age 65+." "Amounts in GBP per year, 2015 prices. X axis range limited to 35000.", /// - size(vsmall)) - - graph export /// -"$dir_output_files/income/pension_income/validation_${country}_positive_pension_income_dist_`year'.png", /// - replace width(2560) height(1440) - -} - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_07_plot_disposable_income.do b/validation/02_simulated_output_validation/do_files/06_07_plot_disposable_income.do deleted file mode 100644 index c3f137229..000000000 --- a/validation/02_simulated_output_validation/do_files/06_07_plot_disposable_income.do +++ /dev/null @@ -1,639 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Disposable income -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 -* COUNTRY: UK - -* NOTES: This do file plots simulated and UKHLS disposable income, -* per benefit unit. Individual level data plotted. -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean values over time - benefit uit level -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_disp_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_disp_bu_yr, d - - replace valid_y_disp_bu_yr = . if /// - valid_y_disp_bu_yr < r(p1) | valid_y_disp_bu_yr > r(p99) - -} - -collapse (mean) valid_y_disp_bu_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_y_disp_yr_bu using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_disp_yr_bu, d - - replace sim_y_disp_yr_bu = . if /// - sim_y_disp_yr_bu < r(p1) | sim_y_disp_yr_bu > r(p99) - -} - -collapse (mean) sim_y_disp_yr_bu, by(run year) -collapse (mean) sim_y_disp_yr_bu /// - (sd) sim_y_disp_yr_bu_sd = sim_y_disp_yr_bu /// - , by(year) - -foreach varname in sim_y_disp_yr_bu { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_y_disp_yr_bu_high sim_y_disp_yr_bu_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_disp_bu_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Disposable Income") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Statistics computed using benefit unit level amounts, averaging using individual level data. Amounts in GBP" "per year, 2015 prices. Top and bottom percentiles trimmed. ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/disposable_income/validation_${country}_disposable_income_ts_${min_age}_${max_age}_both.jpg", /// - replace //width(2560) height(1440) quality(100) - - -/* -* EUROMOD - -* Prepare EM data -use "$dir_work/${country}_EM_validation_data.dta", clear - - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_disp_yr_bu, d - replace valid_y_disp_yr_bu = . if /// - valid_y_disp_yr_bu < r(p1) | valid_y_disp_yr_bu > r(p99) -} - - -collapse (mean) valid_y_disp_yr_bu [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - - -* Prepare simulated data -use run year sim_y_disp_yr_bu using "$dir_data/simulated_data.dta", clear - - -* Trim outliers -if "$trim_outliers" == "true" { - sum sim_y_disp_yr_bu, d - replace sim_y_disp_yr_bu = . if /// - sim_y_disp_yr_bu < r(p1) | sim_y_disp_yr_bu > r(p99) -} - - -collapse (mean) sim_y_disp_yr_bu, by(run year) -collapse (mean) sim_y_disp_yr_bu /// - (sd) sim_y_disp_yr_bu_sd = sim_y_disp_yr_bu /// - , by(year) - -foreach varname in sim_y_disp_yr_bu { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_y_disp_yr_bu_high sim_y_disp_yr_bu_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_disp_yr_bu year, sort color(green) /// - legend(label(2 "UKHLS"))), /// -title("Disposable income") xtitle("Year") ytitle("€ per year (2015 prices)") /// - ylabel(,labsize(small)) xlabel(,labsize(small)) /// - graphregion(color(white)) /// - note("Notes: Statistics computed at the benefit unit level.", size(vsmall)) -*/ - - - -******************************************************************************** -* 2 : Histograms -******************************************************************************** - -******************************************************************************** -* 2.1 : Histograms - Ages 18-65, by year -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_disp_bu_yr laboursupplyweekly_hu dag /// - using "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_disp_bu_yr, d - - replace valid_y_disp_bu_yr = . if /// - valid_y_disp_bu_yr < r(p1) | valid_y_disp_bu_yr > r(p99) - -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_disp_bu_yr if year == `year' , /// - width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_disp_bu_yr if /// - year == `year' & labour == "`ls'", width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_y_disp_yr_bu laboursupplyweekly_orig dag using /// - "$dir_data/simulated_data.dta", clear - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum sim_y_disp_yr_bu, d - - replace sim_y_disp_yr_bu = . if /// - sim_y_disp_yr_bu < r(p1) | sim_y_disp_yr_bu > r(p99) - -} - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -* Plot sub-figures -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_disp_yr_bu if year == `year', width(500) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot all hours - twoway (hist sim_y_disp_yr_bu if year == `year', width(500) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_disp_bu_yr if year == `year', width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") /// - name(disp_inc_`year'_all, replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(500) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot by weekly hours work - twoway (hist sim_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(500) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_disp_bu_yr if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") /// - name(disp_inc_`year'_`ls', replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -qui sum year -local min_year = 2011 -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg disp_inc_`year'_all /// - disp_inc_`year'_ZERO /// - disp_inc_`year'_TEN /// - disp_inc_`year'_TWENTY /// - disp_inc_`year'_THIRTY /// - disp_inc_`year'_FORTY, /// - title("Disposable Income by Weekly Hours of Work") /// - subtitle("`year',") /// - legendfrom(disp_inc_`year'_ZERO) rows(2) /// - graphregion(color(white)) /// - note("Notes: Amounts in GBP per year, 2015 prices. Indiviudal level data of benefit level variable plotted." "Top and bottom percentiles trimmed. Weekly hours worked categories: ZERO = 0, TWENTY = [1,39], FORTY = 40, FIFTY = 41+", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/disposable_income/validation_${country}_disposable_income_dist_`year'.png", /// - replace width(2400) height(1350) -} - - -graph drop _all - - -/* - -* Males - -* Prepare validation data -use year dwt valid_y_disp_yr_bu laboursupplyweekly_hu dag dgn /// - using "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 1 -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_disp_yr_bu, d - - replace valid_y_disp_yr_bu = . if /// - valid_y_disp_yr_bu < r(p1) | valid_y_disp_yr_bu > r(p99) -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_disp_yr_bu if year == `year' , /// - width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_disp_yr_bu if /// - year == `year' & labour == "`ls'", width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_y_disp_yr_bu laboursupplyweekly dag dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Male" -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - sum sim_y_disp_yr_bu, d - replace sim_y_disp_yr_bu = . if /// - sim_y_disp_yr_bu < r(p1) | sim_y_disp_yr_bu > r(p99) -} - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -* Plot sub-figures -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_disp_yr_bu if year == `year', width(500) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot all hours - twoway (hist sim_y_disp_yr_bu if year == `year', width(500) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_disp_yr_bu if year == `year' , width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") name(disp_inc_`year'_all, replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(500) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot by weekly hours work - twoway (hist sim_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(500) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") name(disp_inc_`year'_`ls', replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -qui sum year -local min_year = r(min) -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg disp_inc_`year'_all disp_inc_`year'_ZERO disp_inc_`year'_TWENTY /// - disp_inc_`year'_FORTY /// - disp_inc_`year'_FIFTY, /// - title("Disposable income by weekly hours of work") /// - subtitle("`year', Males") /// - legendfrom(disp_inc_`year'_all) rows(2) /// - graphregion(color(white)) /// - note("Notes: Sample includes all males aged 18-65. Values in € per year (2015 prices). Indiviudal level data for benefit level variable. Samples" "trimmed. Weekly hours worked categories: ZERO = 0, TWENTY = [1,39], FORTY = 40, FIFTY = 41+", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/disposable_income/validation_${country}_disposable_income_dist_`year'_male.png", /// - replace width(2400) height(1350) -} - - -graph drop _all - - -* Females - -* Prepare validation data -use year dwt valid_y_disp_yr_bu laboursupplyweekly_hu dag dgn /// - using "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 0 -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_disp_yr_bu, d - - replace valid_y_disp_yr_bu = . if /// - valid_y_disp_yr_bu < r(p1) | valid_y_disp_yr_bu > r(p99) -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_disp_yr_bu if year == `year' , /// - width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_disp_yr_bu if /// - year == `year' & labour == "`ls'", width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year sim_y_disp_yr_bu laboursupplyweekly dag dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Female" -drop dgn - -* Trim outliers -if "$trim_outliers" == "true" { - sum sim_y_disp_yr_bu, d - replace sim_y_disp_yr_bu = . if /// - sim_y_disp_yr_bu < r(p1) | sim_y_disp_yr_bu > r(p99) -} - -keep if run == 1 - - -append using "$dir_data/temp_valid_stats.dta" - -* Plot sub-figures -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_disp_yr_bu if year == `year', width(500) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot all hours - twoway (hist sim_y_disp_yr_bu if year == `year' , width(500) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_disp_yr_bu if year == `year' , width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") name(disp_inc_`year'_all, replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen sim_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(500) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot by weekly hours work - twoway (hist sim_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly == "`ls'", width(500) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_disp_yr_bu if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") name(disp_inc_`year'_`ls', replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -* Combine plots by year -qui sum year -local min_year = r(min) -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg disp_inc_`year'_all disp_inc_`year'_ZERO disp_inc_`year'_TWENTY /// - disp_inc_`year'_FORTY /// - disp_inc_`year'_FIFTY, /// - title("Disposable income by weekly hours of work") /// - subtitle("`year', Females") /// - legendfrom(disp_inc_`year'_all) rows(2) /// - graphregion(color(white)) /// - note("Notes: Sample includes all females aged 18-65. Values in € per year (2015 prices). Indiviudal level data for benefit level variable. Samples" "trimmed. Weekly hours worked categories: ZERO = 0, TWENTY = [1,39], FORTY = 40, FIFTY = 41+.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/disposable_income/validation_${country}_disposable_income_dist_`year'_female.png", /// - replace width(2400) height(1350) -} - - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_08_plot_equivalised_disposable_income.do b/validation/02_simulated_output_validation/do_files/06_08_plot_equivalised_disposable_income.do deleted file mode 100644 index 4b5e6203d..000000000 --- a/validation/02_simulated_output_validation/do_files/06_08_plot_equivalised_disposable_income.do +++ /dev/null @@ -1,690 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Equivalised disposable income -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 (AB) -* COUNTRY: UK - -* NOTES: This do file plots simulated and UKHLS equivalised -* disposable income, per benefit unit -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_eq_disp_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_eq_disp_bu_yr, d - - replace valid_y_eq_disp_bu_yr = . if /// - valid_y_eq_disp_bu_yr < r(p1) | valid_y_eq_disp_bu_yr > r(p99) - -} - - -collapse (mean) valid_y_eq_disp_bu_yr [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year equivalisedincome using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum equivalisedincome, d - - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) - -} - - -collapse (mean) equivalisedincome, by(run year) -collapse (mean) equivalisedincome /// - (sd) equivalisedincome_sd = equivalisedincome, by(year) - -foreach varname in equivalisedincome { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea equivalisedincome_high equivalisedincome_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_eq_disp_bu_yr year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Equivalised Disposable Income") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("GBP per year", size(small)) /// - ylabel(,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Equivalised disposable income computed by the modified OECD scale. Amount at the benefit unit level, individual data" "plotted. Top and bottom percentiles trimmed. Amounts annual, in 2015 prices.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_income_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) - -/* -* Males - -* Prepare validation data -use year dwt valid_y_eq_disp_yr_bu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 1 - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_eq_disp_yr_bu, d - replace valid_y_eq_disp_yr_bu = . if /// - valid_y_eq_disp_yr_bu < r(p1) | valid_y_eq_disp_yr_bu > r(p99) -} - -collapse (mean) valid_y_eq_disp_yr_bu [aw = dwt], by(year) - - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year equivalisedincome dgn using "$dir_data/simulated_data.dta", clear - -keep if dgn == "Male" - -* Trim outliers -if "$trim_outliers" == "true" { - sum equivalisedincome, d - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) -} - -collapse (mean) equivalisedincome, by(run year) -collapse (mean) equivalisedincome /// - (sd) equivalisedincome_sd = equivalisedincome, by(year) - -foreach varname in equivalisedincome { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea equivalisedincome_high equivalisedincome_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_eq_disp_yr_bu year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Equivalised disposable income") subtitle("Males") /// - xtitle("Year") /// - ytitle("€ per year.") /// - ylabel(,labsize(small)) xlabel(,labsize(small)) /// - graphregion(color(white)) /// - note("Notes: Equivalised disposable income assigned to each person computed by adjusting benefit unit's disposable income by the" "modified OECD scale. Samples includes males ages 18-65. Trimmed sample. Amounts in 2015 prices.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_income_ts_${min_age}_${max_age}_male.jpg", /// - replace width(2400) height(1350) - - -* Females - -* Prepare validation data -use year dwt valid_y_eq_disp_yr_bu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 0 - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_eq_disp_yr_bu, d - replace valid_y_eq_disp_yr_bu = . if /// - valid_y_eq_disp_yr_bu < r(p1) | valid_y_eq_disp_yr_bu > r(p99) -} - -collapse (mean) valid_y_eq_disp_yr_bu [aw = dwt], by(year) - - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year equivalisedincome dgn using "$dir_data/simulated_data.dta", clear - -keep if dgn == "Female" - -* Trim outliers -if "$trim_outliers" == "true" { - sum equivalisedincome, d - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) -} - -collapse (mean) equivalisedincome, by(run year) -collapse (mean) equivalisedincome /// - (sd) equivalisedincome_sd = equivalisedincome, by(year) - -foreach varname in equivalisedincome { - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea equivalisedincome_high equivalisedincome_low year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_y_eq_disp_yr_bu year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Equivalised disposable income") subtitle("Females") /// - xtitle("Year") /// - ytitle("€ per year.") /// - ylabel(,labsize(small)) xlabel(,labsize(small)) /// - graphregion(color(white)) /// - note("Notes: Equivalised disposable income assigned to each person computed by adjusting benefit unit's disposable income by the" "modified OECD scale. Samples includes females ages 18-65. Trimmed sample. Amounts in 2015 prices.", /// - size(vsmall)) - -graph export /// -"$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_income_ts_${min_age}_${max_age}_female.jpg", /// - replace width(2400) height(1350) -*/ - -******************************************************************************** -* 2 : Histograms by year, and by category of weekly labour supply -******************************************************************************** - - -* Prepare validation data -use year dwt valid_y_eq_disp_bu_yr laboursupplyweekly_hu using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_eq_disp_bu_yr, d - - replace valid_y_eq_disp_bu_yr = . if /// - valid_y_eq_disp_bu_yr < r(p1) | valid_y_eq_disp_bu_yr > r(p99) - -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_eq_disp_bu_yr if year == `year' , /// - width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_eq_disp_bu_yr if /// - year == `year' & labour == "`ls'", width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year equivalisedincome laboursupplyweekly using /// - "$dir_data/simulated_data.dta", clear - - -* Trim outliers -if "$trim_outliers" == "true" { - - sum equivalisedincome, d - - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) - -} - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen equivalisedincome if year == `year', width(500) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot all hours - twoway (hist equivalisedincome if year == `year', width(500) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_eq_disp_bu_yr if year == `year', width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - name(eqdisp_inc_`year'_all, replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen equivalisedincome if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(500) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - twoway (hist equivalisedincome if year == `year' & /// - laboursupplyweekly_orig == "`ls'", width(500) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_eq_disp_bu_yr if year == `year' & /// - laboursupplyweekly_hu == "`ls'", width(500) color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") /// - name(eqdisp_inc_`year'_`ls', replace) /// - xtitle("GBP", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg eqdisp_inc_`year'_all /// - eqdisp_inc_`year'_ZERO /// - eqdisp_inc_`year'_TEN /// - eqdisp_inc_`year'_TWENTY /// - eqdisp_inc_`year'_THIRTY /// - eqdisp_inc_`year'_FORTY, /// - title("Equivalised Disposable Income") /// - subtitle("`year'") /// - legendfrom(eqdisp_inc_`year'_all) rows(2) /// - graphregion(color(white)) /// - note("Notes: Distribution of benefit unit equivalised disposable income. Individual level data plotted 18-65 year olds included in sample. Amounts in" "GBP per year, 2015 prices. Top and bottom percentiles trimmed. Weekly hours worked categories:" "ZERO = [0,5], TEN = [6,15], TWENTY = [16,25], THIRTY = [26,34], FORTY = 36+.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_inc_dist_`year'.png", /// - replace width(2560) height(1440) - -} - -graph drop _all - - - -/* -* Males - -* Prepare validation data -use year dwt valid_y_eq_disp_yr_bu laboursupplyweekly_hu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 1 -drop dgn - - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_eq_disp_yr_bu, d - - replace valid_y_eq_disp_yr_bu = . if /// - valid_y_eq_disp_yr_bu < r(p1) | valid_y_eq_disp_yr_bu > r(p99) -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_eq_disp_yr_bu if year == `year' , /// - width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_eq_disp_yr_bu if /// - year == `year' & labour == "`ls'", width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year equivalisedincome laboursupplyweekly dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Male" -drop dgn - - -* Trim outliers -if "$trim_outliers" == "true" { - sum equivalisedincome, d - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) -} - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen equivalisedincome if year == `year', width(500) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot all hours - twoway (hist equivalisedincome if year == `year' , /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_eq_disp_yr_bu if year == `year' , color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") name(eqdisp_inc_`year'_all, replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen equivalisedincome if year == `year' & /// - laboursupplyweekly == "`ls'", width(500) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - twoway (hist equivalisedincome if year == `year' & /// - laboursupplyweekly == "`ls'", color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_eq_disp_yr_bu if year == `year' & /// - laboursupplyweekly_hu == "`ls'", color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") name(eqdisp_inc_`year'_`ls', replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -qui sum year -local min_year = r(min) -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg eqdisp_inc_`year'_all eqdisp_inc_`year'_ZERO /// - eqdisp_inc_`year'_TWENTY eqdisp_inc_`year'_FORTY /// - eqdisp_inc_`year'_FIFTY, /// - title("Equivalised disposable income") /// - subtitle("`year', Males") /// - legendfrom(eqdisp_inc_`year'_all) rows(2) /// - graphregion(color(white)) /// - note("Notes: Series represents average benefit unit equivalised disposable income for all persons ages 18-65. Individual observations plotted." "Values in € per year, 2015 prices. Sample trimmed. Weekly hours worked categories:" "ZERO = 0, TWENTY = [1,39], FORTY = 40, FIFTY = 41+", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_inc_dist_`year'_male.png", /// - replace width(2560) height(1440) - -} - -graph drop _all - - -* Females - -* Prepare validation data -use year dwt valid_y_eq_disp_yr_bu laboursupplyweekly_hu dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if dgn == 0 -drop dgn - - -* Trim outliers -if "$trim_outliers" == "true" { - sum valid_y_eq_disp_yr_bu, d - replace valid_y_eq_disp_yr_bu = . if /// - valid_y_eq_disp_yr_bu < r(p1) | valid_y_eq_disp_yr_bu > r(p99) -} - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen valid_y_eq_disp_yr_bu if year == `year' , /// - width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - - foreach ls in $ls_cat { - - twoway__histogram_gen valid_y_eq_disp_yr_bu if /// - year == `year' & labour == "`ls'", width(500) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year'_`ls' = r(max) - - drop d_valid v2 - - } -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year equivalisedincome laboursupplyweekly dgn using /// - "$dir_data/simulated_data.dta", clear - -keep if dgn == "Female" -drop dgn - - -* Trim outliers -if "$trim_outliers" == "true" { - sum equivalisedincome, d - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) -} - -keep if run == 1 - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = r(min) -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen equivalisedincome if year == `year', width(500) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - * Plot all hours - twoway (hist equivalisedincome if year == `year' , /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist valid_y_eq_disp_yr_bu if year == `year' , color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("ALL hours") name(eqdisp_inc_`year'_all, replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - foreach ls in $ls_cat { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen equivalisedincome if year == `year' & /// - laboursupplyweekly == "`ls'", width(500) den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year'_`ls' if /// - max_d_valid_`year'_`ls' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - twoway (hist equivalisedincome if year == `year' & /// - laboursupplyweekly == "`ls'", color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist valid_y_eq_disp_yr_bu if year == `year' & /// - laboursupplyweekly_hu == "`ls'", color(red%30) /// - legend(label(2 "UKHLS"))) , /// - subtitle("`ls' hours") name(eqdisp_inc_`year'_`ls', replace) /// - xlabel(,labsize(vsmall) angle(forty_five)) /// - ylabel(0(`steps')`max_y', labsize(vsmall)) /// - graphregion(color(white)) - - drop d_sim v1 max_d_sim max_value - - } -} - -qui sum year -local min_year = r(min) -local max_year = r(max) - -forvalues year = `min_year'/`max_year' { - - grc1leg eqdisp_inc_`year'_all eqdisp_inc_`year'_ZERO /// - eqdisp_inc_`year'_TWENTY eqdisp_inc_`year'_FORTY /// - eqdisp_inc_`year'_FIFTY, /// - title("Equivalised disposable income") /// - subtitle("`year', Females") /// - legendfrom(eqdisp_inc_`year'_all) rows(2) /// - graphregion(color(white)) /// - note("Notes: Series represents average benefit unit equivalised disposable income for all persons ages 18-65. Individual observations plotted" "Values in € per year, 2015 prices. Sample trimmed. Weekly hours worked categories:" "ZERO = 0, TWENTY = [1,39], FORTY = 40, FIFTY = 41+.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/income/equivalised_disposable_income/validation_${country}_equivalised_disposable_inc_dist_`year'_female.png", /// - replace width(2560) height(1440) - -} - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_10_plot_hours_worked.do b/validation/02_simulated_output_validation/do_files/06_10_plot_hours_worked.do deleted file mode 100644 index bbc371e97..000000000 --- a/validation/02_simulated_output_validation/do_files/06_10_plot_hours_worked.do +++ /dev/null @@ -1,543 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Hours worked -* AUTHORS: Ashley Burdett -* LAST UPDATE: 06/2025 (AB) -* COUNTRY: UK - -* NOTES: -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean values over time - Ages 18-65 -******************************************************************************** - -* Prepare validation data -use year dwt les_c4 lhw hours lhw_flag using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Select sample -keep if les_c4 == 1 -drop if lhw_flag == 1 - -* Censor those who work very large number of hours -replace hours = $max_hours if hours > $max_hours & hours != . - -* Compute mean -collapse (mean) hours [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 hoursworkedweekly using /// - "$dir_data/simulated_data.dta", clear - -rename hoursworkedweekly lhw_sim - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" - -* Compute mean and sd -collapse (mean) lhw_sim, by(run year) -collapse (mean) lhw_sim /// - (sd) lhw_sim_sd = lhw_sim /// - , by(year) - -* Approx 95% confidence interval -foreach varname in lhw_sim { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea lhw_sim_high lhw_sim_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// -(line hours year, sort color(green) legend(label(2 "UKHLS"))), /// - title("Average Weekly Hours Worked") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Hours per week", size(small)) /// - ylabel(33 [2] 40 ,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on sample of wokring age employed and self-employed individuals.", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2560) height(1440) quality(100) - - -******************************************************************************** -* 1.2 : Mean values over time - Ages 18-65, by gender -******************************************************************************** - -* Males - -* Prepare validation data -use year dwt les_c4 hours dgn lhw_flag using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Select sample -keep if dgn == 1 -keep if les_c4 == 1 -drop if lhw_flag == 1 - -replace hours = $max_hours if hours > $max_hours & hours != . - -* Compute mean -collapse (mean) hours [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 hoursworkedweekly dgn using /// - "$dir_data/simulated_data.dta", clear - -rename hoursworkedweekly lhw - -* Select sample -keep if dgn == "Male" -keep if les_c4 == "EmployedOrSelfEmployed" - -*Compute mean and sd -collapse (mean) lhw, by(run year) -collapse (mean) lhw /// - (sd) lhw_sd = lhw /// - , by(year) - -* Approx 95% confidence interval -foreach varname in lhw { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea lhw_high lhw_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// -(line hours year, sort color(green) legend(label(2 "UKHLS"))), /// - title("Average Weekly Hours Worked") /// - subtitle("Ages 18-65, males") /// - xtitle("Year", size(small)) /// - ytitle("Hours per week", size(small)) /// - ylabel(35 [2] 43 ,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on sample of employed and self-employed males", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_${min_age}_${max_age}_male.jpg", /// - replace width(2560) height(1440) quality(100) - - -* Females - -* Prepare validation data -use year dwt les_c4 hours dgn lhw_flag using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Select sample -keep if dgn == 0 -keep if les_c4 == 1 -drop if lhw_flag == 1 - -replace hours = $max_hours if hours > $max_hours & hours != . - -* Compute mean -collapse (mean) hours [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year les_c4 hoursworkedweekly dgn using /// - "$dir_data/simulated_data.dta", clear - -rename hoursworkedweekly lhw - -* Select sample -keep if dgn == "Female" - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" - -* Compute mean and sd -collapse (mean) lhw, by(run year) -collapse (mean) lhw /// - (sd) lhw_sd = lhw /// - , by(year) - -* Approx 95% confidnece interval -foreach varname in lhw { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - - *Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea lhw_high lhw_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// -(line hours year, sort color(green) legend(label(2 "UKHLS"))), /// - title("Average Weekly Hours Worked") /// - subtitle("Ages 18-65, females") /// - xtitle("Year", size(small)) /// - ytitle("Hours per week", size(small)) /// - ylabel(29 [1] 34 ,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on sample of employed and self-employed females", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/hours_worked/validation_${country}_hours_worked_ts_${min_age}_${max_age}_female.jpg", /// - replace width(2560) height(1440) quality(100) - - - -******************************************************************************** -* 2 : Histograms by year -******************************************************************************** - -******************************************************************************** -* 2.1 : Histograms by year - ages 18-65 -******************************************************************************** - -* Prepare validation data -use year dwt les_c4 hours lhw_flag using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Select sample -keep if les_c4 == 1 -drop if lhw_flag == 1 //remove those that - -replace hours = $max_hours if hours > $max_hours & hours != . - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen hours if year == `year' , /// - bin(60) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run idperson year les_c4 hoursworkedweekly using /// - "$dir_data/simulated_data.dta", clear - -rename hoursworkedweekly lhw - -* Select sample -keep if les_c4 == "EmployedOrSelfEmployed" - -collapse (mean) lhw, by(idperson year) - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen lhw if year == `year', bin(60) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - twoway (hist lhw if year == `year' /*& lhw <= 65*/, width(1) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist hours if year == `year' /*& hours <= 65*/, width(1) color(red%30) /// - legend(label(2 "UKHLS"))), /// - title("Weekly Hours Worked") /// - subtitle("`year'") /// - xtitle("Hours per week", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(0(`steps')`max_y', labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on sample of employed and self-employed individuals age 18-65. UKHLS hours unrestricted.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_unrestricted.png", /// - replace width(2400) height(1350) - - drop d_sim v1 max_d_sim max_value - -} - -* Restricted - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen lhw if year == `year', bin(60) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - twoway (hist lhw if year == `year' & lhw <= 65, width(1) color(green%30) /// - legend(label(1 "Simulated"))) /// - (hist hours if year == `year' & hours <= 65, width(1) color(red%30) /// - legend(label(2 "UKHLS"))), /// - title("Weekly Hours Worked") /// - subtitle("`year'") /// - xtitle("Hours per week", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(0(`steps')`max_y', labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on sample of employed and self-employed individuals age 18-65. UKHLS hours restricted to be" "at most 65 hours per week.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'.png", /// - replace width(2400) height(1350) - - drop d_sim v1 max_d_sim max_value - -} - - -******************************************************************************** -* 2.1 : Histograms by year - ages 18-65, by gender -******************************************************************************** - -* Female -* Prepare validation data -use year dwt les_c4 hours dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Select sample -keep if les_c4 == 1 -keep if dgn == 0 - -drop if lhw_flag == 1 - -replace hours = $max_hours if hours > $max_hours & hours != . - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen hours if year == `year' , /// - bin(60) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run idperson year les_c4 dgn hoursworkedweekly using /// - "$dir_data/simulated_data.dta", clear - -rename hoursworkedweekly lhw - -* Keep only employed individuals -keep if les_c4 == "EmployedOrSelfEmployed" -keep if dgn == "Female" - -collapse (mean) lhw, by(idperson year) - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen lhw if year == `year', bin(60) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - twoway (hist lhw if year == `year' /*& lhw <= 65*/, width(1) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist hours if year == `year' /*& hours <= 65*/, width(1) color(red%30) /// - legend(label(2 "UKHLS"))), /// - title("Weekly Hours Worked") /// - subtitle("`year', females") /// - xtitle("Hours per week", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(0(`steps')`max_y', labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on sample of employed and self-employed females age 18-65. UKHLS hours unrestricted.", /// - size(vsmall)) - - graph export /// - "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_female.png", /// - replace width(2400) height(1350) - - drop d_sim v1 max_d_sim max_value - -} - - -* Male -* Prepare validation data -use year dwt les_c4 hours dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Select sample -keep if les_c4 == 1 -keep if dgn == 1 - -* Prepare info needed for dynamic y axis labels -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - twoway__histogram_gen hours if year == `year' , /// - bin(60) den gen(d_valid v2) - - qui sum d_valid - gen max_d_valid_`year' = r(max) - - drop d_valid v2 - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run idperson year les_c4 dgn hoursworkedweekly using /// - "$dir_data/simulated_data.dta", clear - -rename hoursworkedweekly lhw - -* Select sample -keep if les_c4 == "EmployedOrSelfEmployed" -keep if dgn == "Male" - -collapse (mean) lhw, by(idperson year) - -append using "$dir_data/temp_valid_stats.dta" - -qui sum year -local min_year = 2011 -local max_year = r(max) - -forval year = `min_year'/`max_year' { - - * Prepare info needed for dynamic y axis labels - twoway__histogram_gen lhw if year == `year', bin(60) /// - den gen(d_sim v1) - - qui sum d_sim - gen max_d_sim = r(max) - - gen max_value = max_d_valid_`year' if max_d_valid_`year' > max_d_sim - replace max_value = max_d_sim if max_value == . - - sum max_value - local max_y = 1.25*r(max) - local steps = `max_y'/2 - - twoway (hist lhw if year == `year' /*& lhw <= 65*/, width(1) /// - color(green%30) legend(label(1 "Simulated"))) /// - (hist hours if year == `year' /*& lhw <= 65*/, width(1) color(red%30) /// - legend(label(2 "UKHLS"))), /// - title("Weekly Hours Worked") /// - subtitle("`year', males") /// - xtitle("Hours per week", size(small)) /// - ytitle("Density", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(0(`steps')`max_y', labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Note: Statistics calculated on sample of employed and self-employed males age 18-65. UKHLS hours unrestricted.", /// - size(vsmall)) - - - graph export /// - "$dir_output_files/hours_worked/validation_${country}_hours_worked_hist_`year'_male.png", /// - replace width(2400) height(1350) - - drop d_sim v1 max_d_sim max_value -} - - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_11_plot_income_shares.do b/validation/02_simulated_output_validation/do_files/06_11_plot_income_shares.do deleted file mode 100644 index c9e63968b..000000000 --- a/validation/02_simulated_output_validation/do_files/06_11_plot_income_shares.do +++ /dev/null @@ -1,459 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Income shares -* AUTHORS: Patryk Bronka, Ashley Burdett -* LAST UPDATE: 06/2025 (AB) -* COUNTRY: Greece - -* NOTES: This do file plots simulated and observed income shares and -* incomes by deciles of gross income -* Altered pension age to 65 - TO UPDATE -******************************************************************************** - -******************************************************************************** -* Observed -******************************************************************************** - -use year dwt max_age_in_bu gross_labour_income_share_bu /// - pension_income_share_bu capital_income_share_bu social_income_share_bu /// - net_income_share_bu gross_income_bu net_income_bu gross_labour_income_bu /// - pension_income_bu capital_income_bu social_income_bu /// - gross_income_bu_jittered using /// - "$dir_data/${country}-eusilc_validation_sample.dta", clear - -xtile obs_gross_income_group = gross_income_bu, nq(10) -//xtile obs_gross_income_group = gross_income_bu_jittered, nq(10) - - -* All ages - -* Graph income shares (gross) -graph bar (mean) gross_labour_income_share_bu (mean) /// - pension_income_share_bu (mean) capital_income_share_bu [aweight = dwt], /// - over(obs_gross_income_group) stack title(`"Observed (all ages)"', /// - size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(observed_income_shares, replace) b1title("Decile", size(small)) /// - ytitle("Share", size(small)) ylabel(0 [0.5] 1) /// - graphregion(color(white)) - -* Graph income level (gross) -graph bar (mean) gross_labour_income_bu (mean) pension_income_bu (mean) /// - capital_income_bu [aweight = dwt], /// - over(obs_gross_income_group) stack title(`"Observed (all ages)"', /// - size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(observed_incomes, replace) ylabel(0 [100000] 100000) /// - ytitle("€", size(small)) b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph share of net income in gross income -graph bar (mean) net_income_share_bu [aweight = dwt], /// - over(obs_gross_income_group) stack title(`"Observed (all ages)"', /// - size(medium)) legend(order(1 "Net income") position(6) rows(1)) /// - name(observed_net_income_shares, replace) ytitle("Share", size(small)) /// - b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph income level (net) -graph bar (mean) net_income_bu [aweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (all ages)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(observed_net_income, replace) ylabel(0 [80000] 80000) /// - ytitle("€", size(small)) b1title("Decile", size(small)) legend(off) /// - graphregion(color(white)) - - -* Oldest person above65 (pension age) - -preserve -drop obs_gross_income_group -keep if max_age_in_bu >= 65 -xtile obs_gross_income_group = gross_income_bu, nq(10) - -* Graph income shares (gross) -graph bar (mean) gross_labour_income_share_bu (mean) /// - pension_income_share_bu (mean) capital_income_share_bu [pweight = dwt], /// - over(obs_gross_income_group) stack title(`"Observed (oldest age >=65)"', /// - size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(observed_income_shares_o68, replace) /// - b1title("Decile", size(small)) /// - ytitle("Share", size(small)) ylabel(0 [0.5] 1) /// - graphregion(color(white)) - -* Graph income level (gross) -graph bar (mean) gross_labour_income_bu (mean) pension_income_bu /// - (mean) capital_income_bu [pweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (oldest age >=65)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(observed_incomes_o68, replace) ylabel(0 [100000] 100000) /// - ytitle("€") b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph share of net income in gross income -graph bar (mean) net_income_share_bu [pweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (oldest age >=65)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(observed_net_income_shares_o68, replace) /// - b1title("Decile", size(small)) ytitle("Share", size(small)) /// - graphregion(color(white)) - //ylabel(0 [0.5] 1) - -// Graph income level (net) -graph bar (mean) net_income_bu [pweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (oldest age >=65)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(observed_net_income_o68, replace) ylabel(0 [80000] 80000) /// - ytitle("£") b1title("Decile", size(small)) legend(off) /// - graphregion(color(white)) - - -* Oldest person below65 (pension age) - -restore -drop obs_gross_income_group -keep if max_age_in_bu <65 -xtile obs_gross_income_group = gross_income_bu, nq(10) - -* Graph income shares (gross) -graph bar (mean) gross_labour_income_share_bu (mean) /// - pension_income_share_bu (mean) capital_income_share_bu [pweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (oldest age <65)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(observed_income_shares_u68, replace) /// - b1title("Decile", size(small)) ytitle("Share", size(small)) /// - ylabel(0 [0.5] 1) /// - graphregion(color(white)) - -* Graph income level (gross) -graph bar (mean) gross_labour_income_bu (mean) pension_income_bu /// - (mean) capital_income_bu [pweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (oldest age <65)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(observed_incomes_u68, replace) ylabel(0 [100000] 100000) /// - ytitle("£") b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph share of net income in gross income -graph bar (mean) net_income_share_bu [pweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (oldest age <65)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(observed_net_income_shares_u68, replace) /// - b1title("Decile", size(small)) ytitle("Share", size(small)) /// - graphregion(color(white)) - -* Graph income level (net) -graph bar (mean) net_income_bu [pweight = dwt], /// - over(obs_gross_income_group) /// - stack title(`"Observed (oldest age <65)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(observed_net_income_u68, replace) ylabel(0 [80000] 80000) /// - ytitle("€") b1title("Decile", size(small)) legend(off) /// - graphregion(color(white)) - - -******************************************************************************** -* Simulated -******************************************************************************** - -* Load simulated data -use run year idperson max_age_in_bu sim_yplgrs_dv_lvl_bu sim_ypnoab_lvl_bu /// - sim_ypncp_lvl_bu sim_y_disp_yr_bu sim_y_gross_yr_bu using /// - "$dir_data/simulated_data.dta", clear - -gen calc_bu_gross_income = sim_yplgrs_dv_lvl_bu + sim_ypncp_lvl_bu + /// - sim_ypnoab_lvl_bu - -collapse max_age_in_bu sim_yplgrs_dv_lvl_bu sim_ypnoab_lvl_bu /// - sim_ypncp_lvl_bu sim_y_disp_yr_bu sim_y_gross_yr_bu /// - calc_bu_gross_income, by(idperson year) - -* Income shares: -gen gross_labour_income_share_bu = sim_yplgrs_dv_lvl_bu / calc_bu_gross_income -gen pension_income_share_bu = sim_ypnoab_lvl_bu / calc_bu_gross_income -gen capital_income_share_bu = sim_ypncp_lvl_bu / calc_bu_gross_income -gen net_income_share_bu = sim_y_disp_yr_bu / calc_bu_gross_income -replace net_income_share_bu = . if net_income_share_bu >= 50 - -xtile sim_gross_income_group = calc_bu_gross_income, nq(10) - -* All ages - -* Graph income shares (gross) -graph bar (mean) gross_labour_income_share_bu (mean) pension_income_share_bu /// - (mean) capital_income_share_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (all ages)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") /// - position(6) rows(1)) name(simulated_income_shares, replace) /// - b1title("Decile", size(small)) ytitle("Share", size(small)) /// - ylabel(0 [0.5] 1) /// - graphregion(color(white)) - -* Graph income level (gross) -graph bar (mean) sim_yplgrs_dv_lvl_bu (mean) sim_ypnoab_lvl_bu /// - (mean) sim_ypncp_lvl_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (all ages)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") /// - position(6) rows(1)) name(simulated_incomes, replace) /// - ylabel(0 [100000] 100000) ytitle("€") b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph share of net income in gross income -graph bar (mean) net_income_share_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (all ages)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(simulated_net_income_shares, replace) /// - b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph income level (net) -graph bar (mean) sim_y_disp_yr_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (all ages)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - legend(off) name(simulated_net_income, replace) /// - ylabel(0 [80000] 80000) ytitle("€") /// - b1title("Decile", size(small)) ytitle("€") /// - graphregion(color(white)) - - -* Oldest person above65 (pension age) - -preserve -drop sim_gross_income_group -keep if max_age_in_bu >= 65 -xtile sim_gross_income_group = calc_bu_gross_income, nq(10) - -* Graph income shares (gross) -graph bar (mean) gross_labour_income_share_bu (mean) pension_income_share_bu /// - (mean) capital_income_share_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (oldest age >=65)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(simulated_income_shares_o68, replace) /// - b1title("Decile", size(small)) ytitle("Share", size(small)) /// - ylabel(0 [0.5] 1) /// - graphregion(color(white)) - -* Graph income level (gross) -graph bar (mean) sim_yplgrs_dv_lvl_bu (mean) sim_ypnoab_lvl_bu /// - (mean) sim_ypncp_lvl_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (oldest age >=65)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(simulated_incomes_o68, replace) ylabel(0 [100000] 100000) /// - ytitle("€") b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph share of net income in gross income -graph bar (mean) net_income_share_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (oldest age >=65)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(simulated_net_income_shares_o68, replace) /// - b1title("Decile", size(small)) /// - ytitle("Share", size(small)) /// - graphregion(color(white)) - -* Graph income level (net) -graph bar (mean) sim_y_disp_yr_bu, over(sim_gross_income_group) /// - stack title(`"Simulated (oldest age >=65)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(simulated_net_income_o68, replace) ylabel(0 [80000] 80000) /// - ytitle("€") b1title("Decile", size(small)) legend(off) /// - graphregion(color(white)) - - -* Oldest person below 68 (pension age) - -restore -drop sim_gross_income_group -keep if max_age_in_bu < 65 -xtile sim_gross_income_group = calc_bu_gross_income, nq(10) - -* Graph income shares (gross) -graph bar (mean) gross_labour_income_share_bu (mean) /// - pension_income_share_bu (mean) capital_income_share_bu, /// - over(sim_gross_income_group) /// - stack title(`"Simulated (oldest age <65)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") /// - position(6) rows(1)) name(simulated_income_shares_u68, replace) /// - b1title("Decile", size(small)) ytitle("Share", size(small)) /// - ylabel(0 [0.5] 1) /// - graphregion(color(white)) - -* Graph income level (gross) -graph bar (mean) sim_yplgrs_dv_lvl_bu (mean) sim_ypnoab_lvl_bu /// - (mean) sim_ypncp_lvl_bu, over(sim_gross_income_group) stack /// - title(`"Observed (oldest age <65)"', size(medium)) /// - legend(order(1 "Labour" 2 "Pension" 3 "Capital") position(6) rows(1)) /// - name(simulated_incomes_u68, replace) ylabel(0 [100000] 100000) /// - ytitle("") b1title("Decile", size(small)) /// - graphregion(color(white)) - -* Graph share of net income in gross income -graph bar (mean) net_income_share_bu, over(sim_gross_income_group) stack /// - title(`"Observed (oldest age <65)"', size(medium)) /// - legend(order(1 "Net income") position(6) rows(1)) /// - name(simulated_net_income_shares_u68, replace) /// - b1title("Decile", size(small)) ytitle("Share", size(small)) /// - graphregion(color(white)) - -* Graph income level (net) -graph bar (mean) sim_y_disp_yr_bu, over(sim_gross_income_group) /// - stack title(`"Observed (oldest age <65)"', /// - size(medium)) legend(order(1 "Net income") /// - position(6) rows(1)) legend(off) name(simulated_net_income_u68, replace) /// - ylabel(0 [80000] 80000) ytitle("€") b1title("Decile", size(small)) /// - graphregion(color(white)) - - -******************************************************************************** -* Combine graphs -******************************************************************************** - -* Simulated and observed gross income shares, -* all / above pension age / below pension age -grc1leg simulated_income_shares observed_income_shares /// - simulated_income_shares_o68 observed_income_shares_o68 /// - simulated_income_shares_u68 observed_income_shares_u68, /// - legendfrom(observed_income_shares) rows(3) /// - graphregion(color(white)) /// - title("Gross income shares") /// - subtitle("By age and benefit unit gross income decile") /// - note("Notes: Statistics computed at the benefit unit level. Based on values in € per year (2015 prices).", /// - size(vsmall)) - -graph export "$dir_output_files/income/validation_${country}_income_shares.png", /// - replace width(2400) height(1350) - - -* Simulated and observed income levels, -* all / above pension age / below pension age -grc1leg simulated_incomes observed_incomes simulated_incomes_o68 /// - observed_incomes_o68 simulated_incomes_u68 observed_incomes_u68, /// - legendfrom(observed_incomes) rows(3) /// - graphregion(color(white)) /// - subtitle("Gross income sources, by age and ben unit gross income decile") /// - note("Statistics computed at the benefit unit level. Values in € per year (2015 prices).", /// - size(vsmall)) - -graph export /// - "$dir_output_files/income/validation_${country}_combined_income_levels.png", /// - replace width(2400) height(1350) - -* Simulated and observed net income share in gross, -* all / above pension age / below pension age -grc1leg simulated_net_income_shares observed_net_income_shares /// - simulated_net_income_shares_o68 observed_net_income_shares_o68 /// - simulated_net_income_shares_u68 observed_net_income_shares_u68, /// - legendfrom(observed_net_income_shares) rows(3) /// - graphregion(color(white)) /// - subtitle("Net income shares, by age and ben unit gross income decile") /// - note("Statistics computed at the benefit unit level. Based on values in € per year (2015 prices).", /// - size(vsmall)) - -//graph export "$dir_output_files/combined_net_income_share.jpg", /// -// replace width(2560) height(1440) quality(100) - -// Simulated and observed net income level, -* all / above pension age / below pension age -grc1leg simulated_net_income observed_net_income simulated_net_income_o68 /// - observed_net_income_o68 simulated_net_income_u68 /// - observed_net_income_u68, rows(3) ycommon /// - subtitle("Net income, by age and ben unit gross income decile") /// - legendfrom(simulated_net_income) /// - graphregion(color(white)) /// - note("Statistics computed at the benefit unit level. Values in € per year (2015 prices).", /// - size(vsmall)) - -graph export /// - "$dir_output_files/income/validation_${country}_combined_net_income_levels.png", /// - replace width(2400) height(1350) - - -graph drop _all - - -/* -** Investigate components of capital income fpr third decile - -use "$dir_data/${country}-eusilc_validation_sample.dta", clear - -// how many BU have no gross income? -sum gross_income_bu -count if gross_income_bu == 0 - // 65,775 obs have no gross income from labour, capital or private pension - // 20% of bs have no gross income - -cap drop obs_gross_income_group -xtile obs_gross_income_group = gross_income_bu, nq(10) - -//26% of BU have no gross income -sum hy080g_pc if obs_gross_income_group == 3, de //inter-hh transfers 90% pop -sum hy110g_pc if obs_gross_income_group == 3, de //child income 95% pop -sum hy040g_pc if obs_gross_income_group == 3, de //property income 99% pop -sum hy090g_pc if obs_gross_income_group == 3, de //cap investments 99% -// inter hh transfer and child income are the largest sources - -sum py010g if obs_gross_income_group == 3, de //wages 90% pop -sum py050g if obs_gross_income_group == 3, de //self emp 90% pop -// most don't have labour income - -// pension income small for all - - -foreach var in hy080g_pc hy110g_pc hy040g_pc hy090g_pc py010g py050g { - - gen d_`var' = (`var' != 0) - -} - -tab d_hy080g_pc if obs_gross_income_group == 3 // 35% inter-hh transfers -tab d_hy110g_pc if obs_gross_income_group == 3 // 13% child income -tab d_hy040g_pc if obs_gross_income_group == 3 // 6% property income -tab d_hy090g_pc if obs_gross_income_group == 3 // 7% capital investments -tab d_py010g if obs_gross_income_group == 3 // 16% wages -tab d_py050g if obs_gross_income_group == 3 // 17% self employment - -gen ind_work_income = (d_py010g == 1 | d_py050g == 1) -tab ind_work_income if obs_gross_income_group == 3 - -// => 68% report no income from work - - - -tab hhsize if obs_gross_income_group == 3 - -/* - hhsize | Freq. Percent Cum. -------------+----------------------------------- - 1 | 747 8.93 8.93 - 2 | 1,624 19.41 28.33 - 3 | 1,848 22.08 50.42 - 4 | 1,660 19.84 70.26 - 5 | 1,168 13.96 84.21 - 6 | 739 8.83 93.04 - 7 | 343 4.10 97.14 - 8 | 130 1.55 98.70 - 9 | 44 0.53 99.22 - 10 | 47 0.56 99.78 - 11 | 12 0.14 99.93 - 12 | 4 0.05 99.98 - 15 | 2 0.02 100.00 */ - -* Age -histogram dag if obs_gross_income_group == 3 // U-shaped - -* Activity -tab les_c3 if obs_gross_income_group == 3 // 57% not employed, 19% students - - -graph drop _all - diff --git a/validation/02_simulated_output_validation/do_files/06_12_plot_partnership_status.do b/validation/02_simulated_output_validation/do_files/06_12_plot_partnership_status.do deleted file mode 100644 index e2f498a82..000000000 --- a/validation/02_simulated_output_validation/do_files/06_12_plot_partnership_status.do +++ /dev/null @@ -1,482 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Partnership -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 (AB) -* COUNTRY: UK - -* NOTES: -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean values over time - ages 18-65 -******************************************************************************** - -* Prepare validation data -use year dwt valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Compute shares -collapse (mean) valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner using "$dir_data/simulated_data.dta", clear - -* Compute shares and sd -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner, by(run year) - -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner /// - (sd) sim_dcpst_p_sd = sim_dcpst_p /// - sim_dcpst_snm_sd = sim_dcpst_snm /// - sim_dcpst_prvp_sd = sim_dcpst_prvp /// - sim_dcpst_snmprvp_sd = sim_dcpst_snmprvp /// - sim_has_partner_sd = sim_has_partner /// - , by(year) - -* Approx 95% confidence interval -foreach varname in sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp /// -sim_dcpst_snmprvp sim_has_partner { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Share partnered - -* Plot figure -twoway (rarea sim_dcpst_p_high sim_dcpst_p_low year, sort color(green%20) /// - legend(label(1 "Simulated"))) /// -(line valid_dcpst_p year, sort color(green) /// - legend(label(2 "UKHLS "))), /// - title("Partnered") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(0[0.1]0.7, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/partnership/validation_${country}_partnered_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -* Partnership status shares - -twoway (rarea sim_dcpst_p_high sim_dcpst_p_low year, sort color(green%20) /// - legend(label(1 "Partnered, simulated"))) /// -(line valid_dcpst_p year, sort color(green) /// - legend(label(2 "Partnered, UKHLS "))) /// -(rarea sim_dcpst_snm_high sim_dcpst_snm_low year, sort color(red%20) /// - legend(label(3 "Single, simulated"))) /// -(line valid_dcpst_snm year, sort color(red) /// - legend(label(4 "Single, UKHLS "))) /// -(rarea sim_dcpst_prvp_high sim_dcpst_prvp_low year, sort color(blue%20) /// - legend(label(5 "Prev partnered, simulated"))) /// -(line valid_dcpst_prvp year, sort color(blue) /// - legend(label(6 "Prev partnered, UKHLS "))) , /// - title("Partnership status") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(0[0.1]0.7, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/partnership/validation_${country}_partnership_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2 : Mean values over time - by age group -******************************************************************************** -* Those in their 20s - -* Validation data -use year dwt valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp ageGroup using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if ageGroup == 2 | ageGroup == 3 - -* Compute shares -collapse (mean) valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner ageGroup using "$dir_data/simulated_data.dta", clear - -keep if ageGroup == 2 | ageGroup == 3 - -* Compute shares snd sd -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner, by(run year) - -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner /// - (sd) sim_dcpst_p_sd = sim_dcpst_p /// - sim_dcpst_snm_sd = sim_dcpst_snm /// - sim_dcpst_prvp_sd = sim_dcpst_prvp /// - sim_dcpst_snmprvp_sd = sim_dcpst_snmprvp /// - sim_has_partner_sd = sim_has_partner /// - , by(year) - -* Approx 95% confidence interval -foreach varname in sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp /// -sim_dcpst_snmprvp sim_has_partner { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_dcpst_p_high sim_dcpst_p_low year, sort color(green%20) /// - legend(label(1 "Partnered, simulated"))) /// -(line valid_dcpst_p year, sort color(green) /// - legend(label(2 "Partnered, UKHLS "))) /// -(rarea sim_dcpst_snm_high sim_dcpst_snm_low year, sort color(red%20) /// - legend(label(3 "Single, simulated"))) /// -(line valid_dcpst_snm year, sort color(red) /// - legend(label(4 "Single, UKHLS "))) /// -(rarea sim_dcpst_prvp_high sim_dcpst_prvp_low year, sort color(blue%20) /// - legend(label(5 "Prev partnered, simulated"))) /// -(line valid_dcpst_prvp year, sort color(blue) /// - legend(label(6 "Prev partnered, UKHLS "))) , /// - title("Partnership status") /// - subtitle("Ages 20-29") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/partnership/validation_${country}_partnership_ts_20_29_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -* Those in their 30s - -* Validation data -use year dwt valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp ageGroup using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if ageGroup == 4 | ageGroup == 5 - -* Compute shares -collapse (mean) valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner ageGroup using "$dir_data/simulated_data.dta", clear - -keep if ageGroup == 4 | ageGroup == 5 - -* Compute shares and sd -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner, by(run year) - -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner /// - (sd) sim_dcpst_p_sd = sim_dcpst_p /// - sim_dcpst_snm_sd = sim_dcpst_snm /// - sim_dcpst_prvp_sd = sim_dcpst_prvp /// - sim_dcpst_snmprvp_sd = sim_dcpst_snmprvp /// - sim_has_partner_sd = sim_has_partner /// - , by(year) - -* APprox 95% confidence interval -foreach varname in sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp /// -sim_dcpst_snmprvp sim_has_partner { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* COmbien datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_dcpst_p_high sim_dcpst_p_low year, sort color(green%20) /// - legend(label(1 "Partnered, simulated"))) /// -(line valid_dcpst_p year, sort color(green) /// - legend(label(2 "Partnered, UKHLS "))) /// -(rarea sim_dcpst_snm_high sim_dcpst_snm_low year, sort color(red%20) /// - legend(label(3 "Single, simulated"))) /// -(line valid_dcpst_snm year, sort color(red) /// - legend(label(4 "Single, UKHLS "))) /// -(rarea sim_dcpst_prvp_high sim_dcpst_prvp_low year, sort color(blue%20) /// - legend(label(5 "Prev partnered, simulated"))) /// -(line valid_dcpst_prvp year, sort color(blue) /// - legend(label(6 "Prev partnered, UKHLS "))) , /// - title("Partnership status") /// - subtitle("Ages 30-39") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - - -graph export /// -"$dir_output_files/partnership/validation_${country}_partnership_ts_30_39_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -* Those in their 40-59 - -* Validation data -use year dwt valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp ageGroup using /// - "$dir_data/ukhls_validation_sample.dta", clear - -keep if ageGroup == 6 - -* Compute shares -collapse (mean) valid_dcpst_p valid_dcpst_snm valid_dcpst_prvp /// - valid_dcpst_snmprvp [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner ageGroup using "$dir_data/simulated_data.dta", clear - -keep if ageGroup == 6 - -* Compute shares and sd -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner, by(run year) - -collapse (mean) sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp sim_dcpst_snmprvp /// - sim_has_partner /// - (sd) sim_dcpst_p_sd = sim_dcpst_p /// - sim_dcpst_snm_sd = sim_dcpst_snm /// - sim_dcpst_prvp_sd = sim_dcpst_prvp /// - sim_dcpst_snmprvp_sd = sim_dcpst_snmprvp /// - sim_has_partner_sd = sim_has_partner /// - , by(year) - -* Approx 95% confidence interval -foreach varname in sim_dcpst_p sim_dcpst_snm sim_dcpst_prvp /// -sim_dcpst_snmprvp sim_has_partner { - - gen `varname'_high = `varname' + 1.96*`varname'_sd - gen `varname'_low = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_dcpst_p_high sim_dcpst_p_low year, sort color(green%20) /// - legend(label(1 "Partnered, simulated"))) /// -(line valid_dcpst_p year, sort color(green) /// - legend(label(2 "Partnered, UKHLS "))) /// -(rarea sim_dcpst_snm_high sim_dcpst_snm_low year, sort color(red%20) /// - legend(label(3 "Single, simulated"))) /// -(line valid_dcpst_snm year, sort color(red) /// - legend(label(4 "Single, UKHLS "))) /// -(rarea sim_dcpst_prvp_high sim_dcpst_prvp_low year, sort color(blue%20) /// - legend(label(5 "Prev partnered, simulated"))) /// -(line valid_dcpst_prvp year, sort color(blue) /// - legend(label(6 "Prev partnered, UKHLS "))) , /// - title("Partnership status") /// - subtitle("Ages 40-59") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(,labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/partnership/validation_${country}_partnership_ts_40_59_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -graph drop _all - - -******************************************************************************** -* 1.3 : Mean values over time - by children -******************************************************************************** - -* Load validation data -use year dwt valid_dcpst_p_children_0 valid_dcpst_p_children_1 /// - valid_dcpst_p_children_2 valid_dcpst_p_children_3p /// - valid_dcpst_snm_children_0 valid_dcpst_snm_children_1 /// - valid_dcpst_snm_children_2 valid_dcpst_snm_children_3p /// - valid_dcpst_prvp_children_0 valid_dcpst_prvp_children_1 /// - valid_dcpst_prvp_children_2 valid_dcpst_prvp_children_3p /// - valid_dcpst_snmprvp_children_0 valid_dcpst_snmprvp_children_1 /// - valid_dcpst_snmprvp_children_2 valid_dcpst_snmprvp_children_3p using /// - "$dir_data/ukhls_validation_sample.dta", clear - -collapse (mean) valid_dcpst_p_children_0 valid_dcpst_p_children_1 /// - valid_dcpst_p_children_2 valid_dcpst_p_children_3p /// - valid_dcpst_snm_children_0 valid_dcpst_snm_children_1 /// - valid_dcpst_snm_children_2 valid_dcpst_snm_children_3p /// - valid_dcpst_prvp_children_0 valid_dcpst_prvp_children_1 /// - valid_dcpst_prvp_children_2 valid_dcpst_prvp_children_3p /// - valid_dcpst_snmprvp_children_0 valid_dcpst_snmprvp_children_1 /// - valid_dcpst_snmprvp_children_2 valid_dcpst_snmprvp_children_3p /// - [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Load simulated data -use run year sim_dcpst_p_children_0 sim_dcpst_p_children_1 /// - sim_dcpst_p_children_2 sim_dcpst_p_children_3p sim_dcpst_snm_children_0 /// - sim_dcpst_snm_children_1 sim_dcpst_snm_children_2 /// - sim_dcpst_snm_children_3p sim_dcpst_prvp_children_0 /// - sim_dcpst_prvp_children_1 sim_dcpst_prvp_children_2 /// - sim_dcpst_prvp_children_3p sim_dcpst_snmprvp_children_0 /// - sim_dcpst_snmprvp_children_1 sim_dcpst_snmprvp_children_2 /// - sim_dcpst_snmprvp_children_3p /// - using "$dir_data/simulated_data.dta", clear - -* Compute shares and sd -collapse (mean) sim_dcpst_p_children_0 sim_dcpst_p_children_1 /// - sim_dcpst_p_children_2 sim_dcpst_p_children_3p /// - sim_dcpst_snm_children_0 sim_dcpst_snm_children_1 /// - sim_dcpst_snm_children_2 sim_dcpst_snm_children_3p /// - sim_dcpst_prvp_children_0 sim_dcpst_prvp_children_1 /// - sim_dcpst_prvp_children_2 sim_dcpst_prvp_children_3p /// - sim_dcpst_snmprvp_children_0 sim_dcpst_snmprvp_children_1 /// - sim_dcpst_snmprvp_children_2 sim_dcpst_snmprvp_children_3p, /// - by(run year) - -collapse (mean) sim_dcpst_p_children_0 sim_dcpst_p_children_1 /// - sim_dcpst_p_children_2 sim_dcpst_p_children_3p sim_dcpst_snm_children_0 /// - sim_dcpst_snm_children_1 sim_dcpst_snm_children_2 /// - sim_dcpst_snm_children_3p sim_dcpst_prvp_children_0 /// - sim_dcpst_prvp_children_1 sim_dcpst_prvp_children_2 /// - sim_dcpst_prvp_children_3p sim_dcpst_snmprvp_children_0 /// - sim_dcpst_snmprvp_children_1 sim_dcpst_snmprvp_children_2 /// - sim_dcpst_snmprvp_children_3p /// - (sd) sim_dcpst_p_children_0_sd = sim_dcpst_p_children_0 /// - sim_dcpst_p_children_1_sd = sim_dcpst_p_children_1 /// - sim_dcpst_p_children_2_sd = sim_dcpst_p_children_2 /// - sim_dcpst_p_children_3p_sd = sim_dcpst_p_children_3p /// - sim_dcpst_snm_children_0_sd = sim_dcpst_snm_children_0 /// - sim_dcpst_snm_children_1_sd = sim_dcpst_snm_children_1 /// - sim_dcpst_snm_children_2_sd = sim_dcpst_snm_children_2 /// - sim_dcpst_snm_children_3p_sd = sim_dcpst_snm_children_3 /// - sim_dcpst_prvp_children_0_sd = sim_dcpst_prvp_children_0 /// - sim_dcpst_prvp_children_1_sd = sim_dcpst_prvp_children_1 /// - sim_dcpst_prvp_children_2_sd = sim_dcpst_prvp_children_2 /// - sim_dcpst_prvp_children_3p_sd = sim_dcpst_prvp_children_3p /// - sim_dcpst_snmprvp_children_0_sd = sim_dcpst_snmprvp_children_0 /// - sim_dcpst_snmprvp_children_1_sd = sim_dcpst_snmprvp_children_1 /// - sim_dcpst_snmprvp_children_2_sd = sim_dcpst_snmprvp_children_2 /// - sim_dcpst_snmprvp_children_3p_sd = sim_dcpst_snmprvp_children_3p /// - , by(year) - -* Approx 95% confidence interval -foreach varname in sim_dcpst_p_children_0 sim_dcpst_p_children_1 /// -sim_dcpst_p_children_2 sim_dcpst_p_children_3p sim_dcpst_snm_children_0 /// -sim_dcpst_snm_children_1 sim_dcpst_snm_children_2 sim_dcpst_snm_children_3p /// -sim_dcpst_prvp_children_0 sim_dcpst_prvp_children_1 /// -sim_dcpst_prvp_children_2 sim_dcpst_prvp_children_3p /// -sim_dcpst_snmprvp_children_0 sim_dcpst_snmprvp_children_1 /// -sim_dcpst_snmprvp_children_2 sim_dcpst_snmprvp_children_3p { - - gen `varname'_h = `varname' + 1.96*`varname'_sd - gen `varname'_l = `varname' - 1.96*`varname'_sd - -} - -* Combine datasets -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Label variables -label var sim_dcpst_p_children_0 "Partnered, no children" -label var sim_dcpst_p_children_1 "Partnered, 1 child" -label var sim_dcpst_p_children_2 "Partnered, 2 children" -label var sim_dcpst_p_children_3p "Partnered, 3+ children" -label var sim_dcpst_snmprvp_children_0 "Not partnered, no children" -label var sim_dcpst_snmprvp_children_1 "Not partnered, 1 child" -label var sim_dcpst_snmprvp_children_2 "Not partnered, 2 children" -label var sim_dcpst_snmprvp_children_3p "Not partnered, 3+ children" - -* Plot figures -foreach varname in dcpst_p_children_0 dcpst_p_children_1 dcpst_p_children_2 /// -dcpst_p_children_3p { - - local vtext : variable label sim_`varname' - if `"`vtext'"' == "" local vtext "sim_`varname'" - twoway (rarea sim_`varname'_h sim_`varname'_l year, sort color(red%20) /// - legend(label(1 "Simulated") position(6) rows(1))) /// - (line valid_`varname' year, sort color(red) /// - legend(label(2 "UKHLS"))), /// - subtitle("`vtext'") /// - name(`varname', replace) /// - ytitle("Share", size(small)) /// - xtitle("") /// - ylabel(0[0.1]0.5,labsize(vsmall)) /// - xlabel(,labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - -} - -* Combine plots -grc1leg dcpst_p_children_0 dcpst_p_children_1 dcpst_p_children_2 /// - dcpst_p_children_3p , /// - title("Share Partnered and Number of Children") /// - legendfrom(dcpst_p_children_0) /// - rows(2) /// - graphregion(color(white)) /// - note("Notes: Samples contains all individual ages 18-65. ", size(vsmall)) - -graph export /// -"$dir_output_files/partnership/validation_${country}_partnership_children_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - -graph drop _all - diff --git a/validation/02_simulated_output_validation/do_files/06_15_plot_inequality.do b/validation/02_simulated_output_validation/do_files/06_15_plot_inequality.do deleted file mode 100644 index ec82c2dce..000000000 --- a/validation/02_simulated_output_validation/do_files/06_15_plot_inequality.do +++ /dev/null @@ -1,253 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Inequality -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/2025 (AB) -* COUNTRY: Greece - -* NOTES: Equivalized disposable income used to create ratios -******************************************************************************** - -//ssc install ineqdeco - -******************************************************************************** -* 1 : Income ratios through time -******************************************************************************** - -******************************************************************************** -* 1.1 : Income ratio, 90/50 -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_eq_disp_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_eq_disp_bu_yr, d - - replace valid_y_eq_disp_bu_yr = . if /// - valid_y_eq_disp_bu_yr < r(p1) | valid_y_eq_disp_bu_yr > r(p99) - -} - -collapse (p90) p90_disp = valid_y_eq_disp_bu_yr /// - (p50) p50_disp = valid_y_eq_disp_bu_yr /// - [aw = dwt] , by(year) - -gen p90_p50_ratio_disp_obs = p90_disp/p50_disp - -* Align reference years -gen l_p90_p50_ratio_disp_obs = p90_p50_ratio_disp_obs[_n+1] - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year equivalisedincome using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum equivalisedincome, d - - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) - -} - -collapse (p90) p90_disp = equivalisedincome /// - (p50) p50_disp = equivalisedincome, by(run year) - -gen p90_p50_ratio_disp = p90_disp/p50_disp - -collapse (mean) p90_p50_ratio_disp /// - (sd) sd_p90_p50_ratio_disp = p90_p50_ratio_disp /// - , by(year) - - foreach var in p90_p50_ratio_disp { - - gen `var'_high = `var' + 1.96*sd_`var' - gen `var'_low = `var' - 1.96*sd_`var' - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -* Plot figure -twoway (rarea p90_p50_ratio_disp_high p90_p50_ratio_disp_low year, sort /// - color(green%20) legend(label(1 "Simulated") position(6) rows(1))) /// -(line p90_p50_ratio_disp_obs year, sort color(green)legend(label(2 "UKHLS"))), /// - title("P90/P50 Disposable Income Ratio") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Ratio", size(small)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Ratios computed using individual observations of benefit unit measure of equivalized disposable income.", /// - size(vsmall)) - -* Save figure -graph export "$dir_output_files/inequality/validation_${country}_p90p50.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.1 : Income ratio, 90/10 -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_eq_disp_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_eq_disp_bu_yr, d - - replace valid_y_eq_disp_bu_yr = . if /// - valid_y_eq_disp_bu_yr < r(p1) | valid_y_eq_disp_bu_yr > r(p99) - -} - -collapse (p90) p90_disp = valid_y_eq_disp_bu_yr /// - (p10) p10_disp = valid_y_eq_disp_bu_yr /// - [aw = dwt], by(year) - -gen p90_p10_ratio_disp_obs = p90_disp/p10_disp - -* Align reference years -gen l_p90_p10_ratio_disp_obs = p90_p10_ratio_disp_obs[_n+1] - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year equivalisedincome using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum equivalisedincome, d - - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) - -} - -collapse (p90) p90_disp = equivalisedincome /// - (p10) p10_disp = equivalisedincome, by(run year) - -gen p90_p10_ratio_disp = p90_disp/p10_disp - -collapse (mean) p90_p10_ratio_disp /// - (sd) sd_p90_p10_ratio_disp = p90_p10_ratio_disp /// - , by(year) - - foreach var in p90_p10_ratio_disp { - - gen `var'_high = `var' + 1.96*sd_`var' - gen `var'_low = `var' - 1.96*sd_`var' - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -* Plot figure -twoway (rarea p90_p10_ratio_disp_high p90_p10_ratio_disp_low year, sort /// - color(green%20) legend(label(1 "Simulated") position(6) rows(1))) /// -(line p90_p10_ratio_disp_obs year, sort color(green)legend(label(2 "UKHLS"))), /// - title("P90/P10 Disposable Income Ratio") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Ratio", size(small)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Ratios computed using individual observations of benefit unit measure of equivalized disposable income.", /// - size(vsmall)) - -* Save figure -graph export "$dir_output_files/inequality/validation_${country}_p90p10.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.3 : Gini coefficeint -******************************************************************************** - -* Prepare validation data -use year dwt valid_y_eq_disp_bu_yr using /// - "$dir_data/ukhls_validation_sample.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum valid_y_eq_disp_bu_yr, d - - replace valid_y_eq_disp_bu_yr = . if /// - valid_y_eq_disp_bu_yr < r(p1) | valid_y_eq_disp_bu_yr > r(p99) - -} - -* Calulate gini for each year -statsby gini = r(gini), by(year) clear: ineqdeco valid_y_eq_disp_bu_yr [aw=dwt] - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year equivalisedincome using "$dir_data/simulated_data.dta", clear - -* Trim outliers -if "$trim_outliers" == "true" { - - sum equivalisedincome, d - - replace equivalisedincome = . if /// - equivalisedincome < r(p1) | equivalisedincome > r(p99) - -} - -* Calulate gini for each year and run -statsby gini = r(gini), by(year run) clear: ineqdeco equivalisedincome - -* Obtain the mean and standard deviation by year -collapse (mean) gini /// - (sd) gini_sd = gini, by(year) - -* Compute the 95% confidence interval -gen gini_high = gini + 1.96 * gini_sd -gen gini_low = gini - 1.96 * gini_sd - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea gini_high gini_low year, sort /// - color(green%20) legend(label(1 "Simulated") position(6) rows(1))) /// -(line gini year, sort color(green)legend(label(2 "UKHLS"))), /// - title("Gini Coefficient") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Coefficient", size(small)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Gini coefficient computed using individual observations of benefit unit measure of equivalized disposable income.", /// - size(vsmall)) - -* Save figure -graph export "$dir_output_files/inequality/validation_${country}_gini.jpg", /// - replace width(2400) height(1350) quality(100) - - -graph drop _all - diff --git a/validation/02_simulated_output_validation/do_files/06_16_plot_number_children.do b/validation/02_simulated_output_validation/do_files/06_16_plot_number_children.do deleted file mode 100644 index 6359addbd..000000000 --- a/validation/02_simulated_output_validation/do_files/06_16_plot_number_children.do +++ /dev/null @@ -1,176 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Children -* AUTHORS: Ashley Burdett -* LAST UPDATE: 06/2025 (AB) -* COUNTRY: UK - -* NOTES: This do file plots simulated and UKHLS % of benefit units -* with a given number of children -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean values over time, working age (18-65), children < 18 -******************************************************************************** - -* Prepare validation data -use year idbenefitunit dwt children_* using /// - "$dir_data/ukhls_validation_sample.dta", clear - -bys year idbenefitunit: keep if _n == 1 - -* Calculate weighted share of benefit units with 0, 1, 2, 3 or more children -collapse (mean) children_* [aw = dwt], by(year) - -foreach varname in children_0 children_1 children_2 children_3p { - - rename `varname' valid_`varname' - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year idbenefitunit children_* using /// - "$dir_data/simulated_data.dta", clear - -bys run year idbenefitunit: keep if _n == 1 - -collapse (mean) children_*, by(run year) - -rename children_3plus children_3p - -collapse (mean) children_* /// - (sd) children_0_sd = children_0 /// - children_1_sd = children_1 /// - children_2_sd = children_2 /// - children_3p_sd = children_3p /// - , by(year) - -foreach varname in children_0 children_1 children_2 children_3p { - - gen sim_`varname'_h = `varname' + 1.96*`varname'_sd - gen sim_`varname'_l = `varname' - 1.96*`varname'_sd - rename `varname' sim_`varname' - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figures -label var sim_children_0 "No children" -label var sim_children_1 "1 child" -label var sim_children_2 "2 children" -label var sim_children_3p "3+ children" - - -twoway (rarea sim_children_0_h sim_children_0_l year, /// - sort color(green%20) legend(label(1 "No children, simulated"))) /// -(line valid_children_0 year, sort color(green) /// - legend(label(2 "No children, UKHLS"))) /// - (rarea sim_children_1_h sim_children_1_l year, sort color(blue%20) /// - legend(label(3 "1 child, simulated"))) /// -(line valid_children_1 year, sort color(blue) /// - legend(label(4 "1 child, UKHLS"))) /// -(rarea sim_children_2_h sim_children_2_l year, sort color(red%20) /// - legend(label(5 "2 children, simulated"))) /// -(line valid_children_2 year, sort color(red) /// - legend(label(6 "2 children, UKHLS"))) /// -(rarea sim_children_3p_h sim_children_3p_l year, sort color(grey%20) /// - legend(label(7 "3+ children, simulated"))) /// -(line valid_children_3p year, sort color(grey) /// - legend(label(8 "3+ children, UKHLS"))), /// - title("Number of Children") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) ///) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Statistics computed at the benefit unit level.", size(vsmall)) - -* Save figure -graph export "$dir_output_files/children/validation_${country}_children_ts_18_65_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2 : Mean values over time, working age (18-65), children < 3 -******************************************************************************** - -* Prepare validation data -use year idbenefitunit dwt dnc02 using /// - "$dir_data/ukhls_validation_sample.dta", clear - -gen child02 = . -replace child02 = 0 if dnc02 == 0 -replace child02 = 1 if dnc02 > 0 & dnc02 != . - -bys year idbenefitunit: keep if _n == 1 - -* Calculate weighted share of benefit units with 0, 1, 2, 3 or more children -collapse (mean) child02 [aw = dwt], by(year) - -foreach varname in child02 { - - rename `varname' valid_`varname' - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulated data -use run year idbenefitunit sim_dnc02 using /// - "$dir_data/simulated_data.dta", clear - -gen sim_child02 = . -replace sim_child02 = 0 if sim_dnc02 == 0 -replace sim_child02 = 1 if sim_dnc02 > 0 & sim_dnc02 != . - -bys run year idbenefitunit: keep if _n == 1 - -collapse (mean) sim_child02, by(run year) - -collapse (mean) sim_child02 /// - (sd) sim_child02_sd = sim_child02 /// - , by(year) - -foreach varname in sim_child02 { - - gen `varname'_h = `varname' + 1.96*`varname'_sd - gen `varname'_l = `varname' - 1.96*`varname'_sd - rename `varname' sim_`varname' - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -* Plot figures -twoway (rarea sim_child02_h sim_child02_l year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_child02 year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Share With Child 0-2 Years Old") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: Statistics computed at the benefit unit level.", size(vsmall)) - -* Save figure -graph export "$dir_output_files/children/validation_${country}_young_child_ts_18_65_both.jpg", /// - replace width(2400) height(1350) quality(100) - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_17_plot_disability.do b/validation/02_simulated_output_validation/do_files/06_17_plot_disability.do deleted file mode 100644 index 25b719146..000000000 --- a/validation/02_simulated_output_validation/do_files/06_17_plot_disability.do +++ /dev/null @@ -1,284 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Disability -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 (AB) -* COUNTRY: UK - -* NOTES: -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean values over time, working age (18-65) -******************************************************************************** - -* Prepare validation data -use year dwt dlltsd using /// - "$dir_data/ukhls_validation_sample.dta", clear - -collapse (mean) dlltsd [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare simulation data -use year sim_dlltsd run using "$dir_data/simulated_data.dta", clear - -collapse (mean) sim_dlltsd, by(run year) - -collapse (mean) sim_dlltsd (sd) sim_dlltsd_sd = sim_dlltsd, by(year) - -gen sim_dlltsd_high = sim_dlltsd + 1.96*sim_dlltsd_sd -gen sim_dlltsd_low = sim_dlltsd - 1.96*sim_dlltsd_sd - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -* Plot figure -twoway (rarea sim_dlltsd_high sim_dlltsd_low year, sort color(green%20) /// - legend(label(1 "Simulated "))) /// -(line dlltsd year, sort color(green) /// - legend(label(2 "UKHLS "))), /// - title("Disabled/Long-term Sick ") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/disability/validation_${country}_disability_ts_${min_age}_${max_age}_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2 : Mean values over time, working age (18-65), by gender -******************************************************************************** - -* Prepare validation data -use year dwt dlltsd dgn using /// - "$dir_data/ukhls_validation_sample.dta", clear - -collapse (mean) dlltsd [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulation data -use year sim_dlltsd run dgn using "$dir_data/simulated_data.dta", clear - -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" - -drop dgn -rename dgn2 dgn - -collapse (mean) sim_dlltsd, by(run year dgn) - -collapse (mean) sim_dlltsd (sd) sim_dlltsd_sd = sim_dlltsd, by(year dgn) - -gen sim_dlltsd_high = sim_dlltsd + 1.96*sim_dlltsd_sd -gen sim_dlltsd_low = sim_dlltsd - 1.96*sim_dlltsd_sd - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - - -twoway (rarea sim_dlltsd_high sim_dlltsd_low year if dgn == 0, sort color(green%20) /// - legend(label(1 "Female, simulated"))) /// -(line dlltsd year if dgn == 0, sort color(green) /// - legend(label(2 "Female, UKHLS "))) /// - (rarea sim_dlltsd_high sim_dlltsd_low year if dgn == 1, sort color(red%20) /// - legend(label(3 "Male, simulated"))) /// -(line dlltsd year if dgn == 1, sort color(red) /// - legend(label(4 "Male, UKHLS"))), /// - title("Disabled/Long-term Sick ") /// - subtitle("Ages 18-65") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - -graph export /// -"$dir_output_files/disability/validation_${country}_disability_ts_${min_age}_${max_age}_male_female.jpg", /// - replace width(2560) height(1440) quality(100) - - -******************************************************************************** -* 1.2 : Mean values over time, working age (18-65), by age -******************************************************************************** - -* Prepare validation data -use year dwt dlltsd dgn dag ageGroup using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -drop if ageGroup == 0 | ageGroup == 8 - -collapse (mean) dlltsd [aweight=dwt], by(ageGroup year) - -drop if missing(ageGroup) - -reshape wide dlltsd, i(year) j(ageGroup) - -forvalues i = 1(1)7 { - - rename dlltsd`i' dlltsd_`i'_valid - -} - -save "$dir_data/temp_valid_stats_full.dta", replace - - -* Prepare simulation data -use run year sim_dlltsd ageGroup using "$dir_data/simulated_data.dta", clear - -collapse (mean) sim_dlltsd, by(ageGroup run year) - -drop if missing(ageGroup) - -reshape wide sim_dlltsd, i(year run) j(ageGroup) - -forvalues i = 1(1)7{ - - rename sim_dlltsd`i' dlltsd_`i'_sim - -} - -collapse (mean) dlltsd* /// - (sd) sd_dlltsd_1_sim = dlltsd_1_sim /// - sd_dlltsd_2_sim = dlltsd_2_sim /// - sd_dlltsd_3_sim = dlltsd_3_sim /// - sd_dlltsd_4_sim = dlltsd_4_sim /// - sd_dlltsd_5_sim = dlltsd_5_sim /// - sd_dlltsd_6_sim = dlltsd_6_sim /// - sd_dlltsd_7_sim = dlltsd_7_sim /// - , by(year) - -forvalues i = 1(1)7 { - - gen dlltsd_`i'_sim_high = dlltsd_`i'_sim + 1.96*sd_dlltsd_`i'_sim - gen dlltsd_`i'_sim_low = dlltsd_`i'_sim - 1.96*sd_dlltsd_`i'_sim - -} - -recast double year - -merge 1:1 year using "$dir_data/temp_valid_stats_full.dta", keep(3) nogen - -* Plot figures -foreach vble in "dlltsd" { - - twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, /// - sort color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_1_valid year, sort legend(label(2 "UKHLS"))), /// - title("Age 15-19") /// - name(`vble'_1, replace) /// - ylabel(0 [0.1] 0.2, labsize(vsmall)) /// - xlabel(, labsize(vsmall)) /// - ytitle("Share", size(small)) /// - xtitle("Year", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) - - twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, /// - sort color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_2_valid year, sort legend(label(2 "UKHLS"))), /// - title("Age 20-24") /// - name(`vble'_2, replace) /// - ylabel(0 [0.1] 0.2, labsize(vsmall)) /// - xlabel(, labsize(vsmall)) /// - ytitle("Share", size(small)) /// - xtitle("Year", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) - - twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, /// - sort color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_3_valid year, sort legend(label(2 "UKHLS"))), /// - title("Age 25-29") /// - name(`vble'_3, replace) /// - ylabel(0 [0.1] 0.2, labsize(vsmall)) /// - xlabel(, labsize(vsmall)) /// - ytitle("Share", size(small)) /// - xtitle("Year", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - graphregion(color(white)) - - twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, /// - sort color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_4_valid year, sort legend(label(2 "UKHLS"))), /// - title("Age 30-34") /// - name(`vble'_4, replace) /// - ylabel(0 [0.1] 0.2, labsize(vsmall)) /// - xlabel(, labsize(vsmall)) /// - ytitle("Share", size(small)) /// - xtitle("Year", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - graphregion(color(white)) - - twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, /// - sort color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_5_valid year, sort legend(label(2 "UKHLS"))), /// - title("Age 35-39") /// - name(`vble'_5, replace) /// - ylabel(0 [0.1] 0.2, labsize(vsmall)) /// - xlabel(, labsize(vsmall)) /// - ytitle("Share", size(small)) /// - xtitle("Year", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - graphregion(color(white)) - - twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, /// - sort color(red%20) legend(label(1 "Simulated") position(6) /// - rows(1)))(line `vble'_6_valid year, sort /// - legend(label(2 "UKHLS"))), /// - title("Age 40-59") /// - name(`vble'_6, replace) /// - ylabel(0 [0.1] 0.2, labsize(vsmall)) /// - xlabel(, labsize(vsmall)) /// - ytitle("Share", size(small)) /// - xtitle("Year", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - graphregion(color(white)) - - twoway (rarea `vble'_7_sim_high `vble'_7_sim_low year, /// - sort color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_7_valid year, sort legend(label(2 "UKHLS"))), /// - title("Age 60-65") /// - name(`vble'_7, replace) /// - ylabel(0 [0.1] 0.2, labsize(vsmall)) /// - xlabel(, labsize(vsmall)) /// - ytitle("Share", size(small)) /// - xtitle("Year", size(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - graphregion(color(white)) - -} - -grc1leg dlltsd_1 dlltsd_2 dlltsd_3 dlltsd_4 dlltsd_5 dlltsd_6 dlltsd_7 , /// - title("Disabled/Long-term Sick by Age Group") /// - legendfrom(dlltsd_1) /// - graphregion(color(white)) /// - note("Notes:", size(vsmall)) - - -graph export /// -"$dir_output_files/disability/validation_${country}_disability_ts_all_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -graph drop _all diff --git a/validation/02_simulated_output_validation/do_files/06_18_plot_social_care.do b/validation/02_simulated_output_validation/do_files/06_18_plot_social_care.do deleted file mode 100644 index b5b68ff47..000000000 --- a/validation/02_simulated_output_validation/do_files/06_18_plot_social_care.do +++ /dev/null @@ -1,382 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Social care -* AUTHORS: Ashley Burdett -* LAST UPDATE: 9/25 -* COUNTRY: UK - -* NOTES: -******************************************************************************** - -******************************************************************************** -* 1 : Mean values over time -******************************************************************************** - -******************************************************************************** -* 1.1 : Mean values over time - need care -******************************************************************************** - -* Prepare validation data -use year idbenefitunit dwt need_socare dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -keep if dag > 64 & dag != . - -gen valid_need_care = 0 -replace valid_need_care = 1 if need_socare == 1 -replace valid_need_care = . if need_socare < 0 | need_socare == . - - -collapse (mean) valid_need_care [aw = dwt], by(year) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year idbenefitunit needsocialcare dag using /// - "$dir_data/simulated_data_full.dta", clear - -keep if dag > 64 - -gen sim_need_care = 0 -replace sim_need_care = 1 if needsocialcare == "True" - -collapse (mean) sim_need_care, by(year run) - -collapse (mean) sim_need_care /// - (sd) sim_need_care_sd = sim_need_care, by(year) - - -* Compute 95% confidence intervals -gen sim_need_care_h = sim_need_care + 1.96*sim_need_care_sd -gen sim_need_care_l = sim_need_care - 1.96*sim_need_care_sd - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_need_care_h sim_need_care_l year, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_need_care year, sort color(green) /// - legend(label(2 "UKHLS"))), /// - title("Need Social Care") /// - subtitle("Ages 65+") /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) ///) /// - xlabel(, labsize(small)) /// - ylabel(, labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - note("Notes: ", size(vsmall)) - - -* Save figure -graph export /// -"$dir_output_files/care/validation_${country}_need_care_ts_65plus_both.jpg", /// - replace width(2400) height(1350) quality(100) - - -******************************************************************************** -* 1.2 : Mean values over time - need care, by gender -******************************************************************************** - -* Prepare validation data -use year idbenefitunit dwt dgn need_socare dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -keep if dag > 64 & dag != . - -gen valid_need_care = 0 -replace valid_need_care = 1 if need_socare == 1 -replace valid_need_care = . if need_socare < 0 | need_socare == . - -collapse (mean) valid_need_care [aw = dwt], by(year dgn) - -save "$dir_data/temp_valid_stats.dta", replace - -* Prepare simulated data -use run year idbenefitunit needsocialcare dgn dag using /// - "$dir_data/simulated_data_full.dta", clear - -keep if dag > 64 - -gen dgn2 = 0 if dgn == "Female" -replace dgn2 = 1 if dgn == "Male" - -drop dgn -rename dgn2 dgn - -gen sim_need_care = 0 -replace sim_need_care = 1 if needsocialcare == "True" - -collapse (mean) sim_need_care, by(year dgn run) - -collapse (mean) sim_need_care /// - (sd) sim_need_care_sd = sim_need_care, by(year dgn) - -* Compute 95% confidence intervals -gen sim_need_care_h = sim_need_care + 1.96*sim_need_care_sd -gen sim_need_care_l = sim_need_care - 1.96*sim_need_care_sd - -merge 1:1 year dgn using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -twoway (rarea sim_need_care_h sim_need_care_l year if dgn == 0, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_need_care year if dgn == 0, sort color(green) /// - legend(label(2 "UKHLS"))), /// - subtitle("Females") /// - name(health_female, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.1]0.5,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - - -twoway (rarea sim_need_care_h sim_need_care_l year if dgn == 1, /// - sort color(green%20) legend(label(1 "Simulated"))) /// -(line valid_need_care year if dgn == 1, sort color(green) /// - legend(label(2 "UKHLS"))), /// - subtitle("Males") /// - name(health_male, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.1]0.5,labsize(small)) /// - xlabel(,labsize(small)) /// - legend(size(small)) /// - graphregion(color(white)) /// - -grc1leg health_female health_male, /// - title("Need Social Care") /// - subtitle("Ages 65+") /// - legendfrom(health_female) rows(1) /// - graphregion(color(white)) /// -note("Notes: ", /// - size(vsmall)) - -* Save figure -graph export /// -"$dir_output_files/care/validation_${country}_need_care_ts_65plus_gender.jpg", /// - replace width(2400) height(1350) quality(100) - - -graph drop _all - - -******************************************************************************** -* 1.3 : Mean values over time - need care age group and gender -******************************************************************************** - -* Prepare validation data -use year dwt dgn ageGroup need_socare dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - -drop ageGroup -gen ageGroup = 1 if inrange(dag,65,69) -replace ageGroup = 2 if inrange(dag,70,74) -replace ageGroup = 3 if inrange(dag,75,79) -replace ageGroup = 4 if inrange(dag,80,85) -replace ageGroup = 5 if inrange(dag,85,89) -replace ageGroup = 6 if inrange(dag,90,100) - -drop if dag < 65 - -gen valid_need_care = 0 -replace valid_need_care = 1 if need_socare == 1 -replace valid_need_care = . if need_socare < 0 | need_socare == . - -gen care_m = valid_need_care if dgn == 1 -gen care_f = valid_need_care if dgn == 0 - -drop if ageGroup == 0 - -collapse (mean) care* [aw = dwt], by(ageGroup year) - -drop if missing(ageGroup) -reshape wide care*, i(year) j(ageGroup) - -forvalues i = 1(1)6 { - - rename care_f`i' care_f_`i'_valid - rename care_m`i' care_m_`i'_valid - -} - -save "$dir_data/temp_valid_stats.dta", replace - - -* Prepare Simulated data -use run year sim_sex ageGroup needsocialcare dag using /// - "$dir_data/simulated_data_full.dta", clear - -drop ageGroup -gen ageGroup = 1 if inrange(dag,65,69) -replace ageGroup = 2 if inrange(dag,70,74) -replace ageGroup = 3 if inrange(dag,75,79) -replace ageGroup = 4 if inrange(dag,80,85) -replace ageGroup = 5 if inrange(dag,85,89) -replace ageGroup = 6 if inrange(dag,90,100) - -drop if dag < 65 - -gen sim_need_care = 0 -replace sim_need_care = 1 if needsocialcare == "True" - -gen care_m = sim_need_care if sim_sex == 1 -gen care_f = sim_need_care if sim_sex == 2 - -collapse (mean) care*, by(ageGroup run year) -drop if missing(ageGroup) -reshape wide care*, i(year run) j(ageGroup) - -collapse (mean) care* /// - (sd) care_m_1_sd = care_m1 /// - (sd) care_f_1_sd = care_f1 /// - (sd) care_m_2_sd = care_m2 /// - (sd) care_f_2_sd = care_f2 /// - (sd) care_m_3_sd = care_m3 /// - (sd) care_f_3_sd = care_f3 /// - (sd) care_m_4_sd = care_m4 /// - (sd) care_f_4_sd = care_f4 /// - (sd) care_m_5_sd = care_m5 /// - (sd) care_f_5_sd = care_f5 /// - (sd) care_m_6_sd = care_m6 /// - (sd) care_f_6_sd = care_f6 /// - , by(year) - /*(sd) care_m_8_sd = care_m8 /// - *(sd) care_f_8_sd = care_f8 /// */ - -forvalues i=1(1)6 { - - gen care_f_`i'_sim_high = care_f`i' + 1.96*care_f_`i'_sd - gen care_f_`i'_sim_low = care_f`i' - 1.96*care_f_`i'_sd - gen care_m_`i'_sim_high = care_m`i' + 1.96*care_m_`i'_sd - gen care_m_`i'_sim_low = care_m`i' - 1.96*care_m_`i'_sd - -} - -merge 1:1 year using "$dir_data/temp_valid_stats.dta", keep(3) nogen - -* Plot figure -foreach vble in "care_f" "care_m" { - - twoway (rarea `vble'_1_sim_high `vble'_1_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_1_valid year, sort color(red) legend(label(2 "UKHLS"))), /// - subtitle("Age 65-69") /// - name(`vble'_1, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.3]0.9, labsize(vsmall)) /// - xlabel(,labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - twoway (rarea `vble'_2_sim_high `vble'_2_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_2_valid year, sort color(red) legend(label(2 "UKHLS"))), /// - subtitle("Age 70-74") /// - name(`vble'_2, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.3]0.9, labsize(vsmall)) /// - xlabel(,labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - twoway (rarea `vble'_3_sim_high `vble'_3_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_3_valid year, sort color(red) legend(label(2 "UKHLS"))), /// - subtitle("Age 75-79") /// - name(`vble'_3, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.3]0.9, labsize(vsmall)) /// - xlabel(,labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - twoway (rarea `vble'_4_sim_high `vble'_4_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_4_valid year, sort color(red) legend(label(2 "UKHLS"))), /// - subtitle("Age 80-84") /// - name(`vble'_4, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.3]0.9, labsize(vsmall)) /// - xlabel(,labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - twoway (rarea `vble'_5_sim_high `vble'_5_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_5_valid year, sort color(red) legend(label(2 "UKHLS"))), /// - subtitle("Age 85-89") /// - name(`vble'_5, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.3]0.9, labsize(vsmall)) /// - xlabel(,labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - twoway (rarea `vble'_6_sim_high `vble'_6_sim_low year, sort /// - color(red%20) legend(label(1 "Simulated") position(6) rows(1))) /// - (line `vble'_6_valid year, sort color(red) legend(label(2 "UKHLS"))), /// - subtitle("Age 90-100") /// - name(`vble'_6, replace) /// - xtitle("Year", size(small)) /// - ytitle("Share", size(small)) /// - ylabel(0[0.3]0.9, labsize(vsmall)) /// - xlabel(,labsize(vsmall)) /// - legend(size(small)) /// - graphregion(color(white)) - - -} - -* Save figures -grc1leg care_f_1 care_f_2 care_f_3 care_f_4 care_f_5 care_f_6 , /// - title("Need Social Care") /// - subtitle("Females") /// - legendfrom(care_f_1) /// - graphregion(color(white)) /// -note("Notes: ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/care/validation_${country}_need_care_ts_65plus_female.jpg", /// - replace width(2400) height(1350) quality(100) - - -grc1leg care_m_1 care_m_2 care_m_3 care_m_4 care_m_5 care_m_6, /// - title("Need Social Care") /// - subtitle("Males") /// - legendfrom(care_m_1) /// - graphregion(color(white)) /// -note("Notes: ", /// - size(vsmall)) - -graph export /// -"$dir_output_files/care/validation_${country}_need_care_ts_65plus_male.jpg", /// - replace width(2400) height(1350) quality(100) - - -graph drop _all - - -/* -******************************************************************************** -* 1.4 : Mean values over time - provide care -******************************************************************************** - -* Prepare validation data -use year dwt dgn ageGroup careHours ProvidedWeekly dag using /// - "$dir_data/ukhls_validation_full_sample.dta", clear - - - - - -* Prepare simualted data diff --git a/validation/02_simulated_output_validation/do_files/07_01_correlations.do b/validation/02_simulated_output_validation/do_files/07_01_correlations.do deleted file mode 100644 index 56ac2fdb7..000000000 --- a/validation/02_simulated_output_validation/do_files/07_01_correlations.do +++ /dev/null @@ -1,159 +0,0 @@ -******************************************************************************** -* PROJECT: ESPON -* SECTION: Validation -* OBJECT: Correlation -* AUTHORS: Patryk Bronka, Ashley Burdett -* LAST UPDATE: 06/2025 (AB) -* COUNTRY: Greece - -* NOTES: This file calculates correlations between variables of -* interest, in observed and simulated data -* -* List of variables considered. Name : simulated : validation -* -* 1. disposable income per benefit unit : sim_y_disp_yr_bu : -* valid_y_disp_yr_bu -* 2. labour market status : sim_employed, sim_student, -* sim_inactive, sim_retired : valid_employed, -* valid_student, valid_inactive, valid_retired - - TO UPDATE -******************************************************************************** - -global sim_varlist sim_employed sim_inactive sim_retired sim_edu_high /// - sim_edu_med sim_edu_low sim_y_gross_yr_bu sim_yplgrs_dv_lvl_bu /// - sim_ypncp_lvl_bu sim_ypnoab_lvl_bu sim_y_disp_yr_bu equivalisedincome /// - /*potential_earnings_hourly*/ hoursworkedweekly sim_dcpst_snmprvp /// - sim_dcpst_p dhe - -global valid_varlist valid_employed valid_inactive valid_retired /// - valid_edu_high valid_edu_med valid_edu_low valid_y_gross_nsbc_yr_bu /// - valid_y_gross_labour_yr_bu capital_income_bu pension_income_bu /// - valid_y_disp_yr_bu valid_y_eq_disp_yr_bu /*valid_wage_hour*/ valid_lhw /// - valid_dcpst_snmprvp valid_dcpst_p dhe - -/* -Simulated correlations -*/ - -use run year ${sim_varlist} using "$dir_data/simulated_data.dta", clear - -lab var dhe "Health" -lab var sim_employed "Employed" -lab var sim_inactive "Non-employed" -lab var sim_retired "Retired" -lab var sim_edu_high "High education" -lab var sim_edu_med "Medium education" -lab var sim_edu_low "Low education" -lab var sim_y_disp_yr_bu "Disposable income" -lab var sim_y_gross_yr_bu "Gross income" -lab var sim_yplgrs_dv_lvl_bu "Gross labour income" -lab var sim_ypncp_lvl_bu "Capital income" -lab var sim_ypnoab_lvl_bu "Private pension income" -lab var equivalisedincome "Equivalised disposable income" -//lab var potential_earnings_hourly "Hourly wage" -lab var hoursworkedweekly "Hours worked" -lab var sim_dcpst_snmprvp "Single" -lab var sim_dcpst_p "Partnered" - -keep if run == 1 - -quietly correlate ${sim_varlist} -matrix CS = r(C) - -heatplot CS, values(format(%3.2f) size(1.1)) cuts(-1.05(.1)1.05) /// - color(hcl diverging, intensity(.6)) legend(off) aspectratio(1) /// - lower label xlabel(, angle(90) labsize(vsmall)) /// - ylabel(, labsize(vsmall)) title("Simulated") name(sim_corr, replace) /// - graphregion(color(white)) - -* Save figure -graph export /// - "$dir_output_files/correlations/validation_correlations_simulated_${min_age}_${max_age}.jpg", /// - replace width(2560) height(1440) quality(100) - -/* -Observed correlations -*/ - -use year dwt ${valid_varlist} using /// - "$dir_data/${country}-eusilc_validation_sample.dta", clear - -lab var dhe "Health" -lab var valid_employed "Employed" -lab var valid_inactive "Non-employed" -lab var valid_retired "Retired" -lab var valid_edu_high "High education" -lab var valid_edu_med "Medium education" -lab var valid_edu_low "Low education" -lab var valid_y_disp_yr_bu "Disposable income" -lab var valid_y_gross_nsbc_yr_bu "Gross income" -lab var valid_y_gross_labour_yr_bu "Gross labour income" -lab var capital_income_bu "Capital income" -lab var pension_income_bu "Private pension income" -lab var valid_y_eq_disp_yr_bu "Equivalised disposable income" -//lab var valid_wage_hour "Hourly wage" -lab var valid_lhw "Hours worked" -lab var valid_dcpst_p "Partnered" -lab var valid_dcpst_snmprvp "Single" - -replace valid_lhw = 0 if valid_inactive == 1 | valid_retired == 1 - -quietly correlate ${valid_varlist} - -matrix CV = r(C) - -heatplot CV, values(format(%3.2f) size(1.1)) cuts(-1.05(.1)1.05) /// - color(hcl diverging, intensity(.6)) legend(off) aspectratio(1) /// - lower label xlabel(, angle(90) labsize(vsmall)) /// - ylabel(, labsize(vsmall)) title("Observed") name(obs_corr, replace) /// - graphregion(color(white)) - - -* Save figure -graph combine sim_corr obs_corr, title("Correlation coefficients") /// - note("Notes: Ages 18-65 included. ", size(vsmall)) graphregion(color(white)) - - -graph export /// -"$dir_output_files/correlations/validation_correlations_simulated_observed_${min_age}_${max_age}.png", /// - replace width(2560) height(1440) - -/* -Calculate the difference and absolute difference matrix -*/ - -matrix CDiff = CS - CV -//matewmf CDiff CDiffAbs, f(abs) - -/* -* Heatplot for the distance matrix -heatplot CDiff, values(format(%3.2f) size(tiny)) cuts(-1.05(.1)1.05) /// - color(hcl diverging, intensity(.6)) legend(off) aspectratio(1) /// - lower label xlabel(, angle(45) labsize(small)) /// - ylabel(, labsize(small)) /// - title("Distance between simulated" "and observed correlations") - -* Save figure -graph export /// - "$dir_output_files/validation_correlations_distance_${min_age}_${max_age}.jpg", /// - replace width(2560) height(1440) quality(100) -*/ - -* Heatplot for the difference matrix -heatplot CDiff, values(format(%3.2f) size(tiny)) cuts(-1.05(.1)1.05) /// - color(hcl diverging, intensity(.6)) legend(off) aspectratio(1) /// - lower label xlabel(, angle(90) labsize(vsmall)) /// - ylabel(, labsize(vsmall)) /// - title("Difference between simulated" "and observed correlations") /// - note("Positive values indicate that simulated correlation was stronger than observed", size(vsmall)) graphregion(color(white)) - -* Save figure -graph export /// -"$dir_output_files/correlations/validation_correlations_difference_${min_age}_${max_age}.jpg", /// - replace width(2560) height(1440) quality(100) - - -graph drop _all - - diff --git a/validation/02_simulated_output_validation/graphs/.DS_Store b/validation/02_simulated_output_validation/graphs/.DS_Store deleted file mode 100644 index 80c312e8a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/.DS_Store and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/.DS_Store b/validation/02_simulated_output_validation/graphs/20250909_run/.DS_Store deleted file mode 100644 index b226b12b1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/.DS_Store and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/00_README b/validation/02_simulated_output_validation/graphs/20250909_run/00_README deleted file mode 100644 index d299ebe42..000000000 --- a/validation/02_simulated_output_validation/graphs/20250909_run/00_README +++ /dev/null @@ -1,2 +0,0 @@ -Date: 15/9/2025 -Current graphs produced using simulated data from Run ID "SimPaths_UK_with_ethnicity_2011_2023_50K_3runs_with_dcpst_idpartner". diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/children/validation_UK_children_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/children/validation_UK_children_ts_18_65_both.jpg deleted file mode 100644 index 2eece80da..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/children/validation_UK_children_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/children/validation_UK_young_child_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/children/validation_UK_young_child_ts_18_65_both.jpg deleted file mode 100644 index f65c71296..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/children/validation_UK_young_child_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_18_65_both.jpg deleted file mode 100644 index 6fd16b668..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_18_65_male_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_18_65_male_female.jpg deleted file mode 100644 index 7cf6ef9f4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_18_65_male_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_all_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_all_both.jpg deleted file mode 100644 index 0e75a145b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/disability/validation_UK_disability_ts_all_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_60_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_60_female.jpg deleted file mode 100644 index fde95c9dc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_60_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both.jpg deleted file mode 100644 index ac6428022..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_partnered.jpg deleted file mode 100644 index 8375952e8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_prev_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_prev_partnered.jpg deleted file mode 100644 index bbf7d1712..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_prev_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_single.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_single.jpg deleted file mode 100644 index 89884e8b9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_both_single.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female.jpg deleted file mode 100644 index 31c2cc7df..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female_partnered.jpg deleted file mode 100644 index 080d593a6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female_prev_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female_prev_partnered.jpg deleted file mode 100644 index 78a976657..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_female_prev_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male.jpg deleted file mode 100644 index e21dd4cf1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_partnered.jpg deleted file mode 100644 index ec9bcd877..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_prev_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_prev_partnered.jpg deleted file mode 100644 index 7d500b160..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_prev_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_single.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_single.jpg deleted file mode 100644 index 5898674cf..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17_65_male_single.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_both.jpg deleted file mode 100644 index 7e1ce6a69..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_female.jpg deleted file mode 100644 index c5bf5f77f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_male.jpg deleted file mode 100644 index 699bb439e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_17plus_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_both.jpg deleted file mode 100644 index cd2b59042..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_female.jpg deleted file mode 100644 index 547d3f015..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_male.jpg deleted file mode 100644 index 7acaebbdb..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_not_employed_ts_all_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_note_employed_ts_17_65_female_single.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_note_employed_ts_17_65_female_single.jpg deleted file mode 100644 index 6a219e530..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_note_employed_ts_17_65_female_single.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_both.jpg deleted file mode 100644 index b1b223d90..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_female.jpg deleted file mode 100644 index cb0385ed8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_male.jpg deleted file mode 100644 index 2e86fbef7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_30_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_60_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_60_female.jpg deleted file mode 100644 index f4f74cda5..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_60_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both.jpg deleted file mode 100644 index 25236bf15..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_partnered.jpg deleted file mode 100644 index e50f949a2..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_prev_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_prev_partnered.jpg deleted file mode 100644 index e3c9adce4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_prev_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_single.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_single.jpg deleted file mode 100644 index abb369eb1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_single.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_single_com.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_single_com.jpg deleted file mode 100644 index 82acd41db..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_both_single_com.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female.jpg deleted file mode 100644 index 4a5e4411b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_partnered.jpg deleted file mode 100644 index 7b42ecefc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_prev_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_prev_partnered.jpg deleted file mode 100644 index ffc6a56d0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_prev_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_single.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_single.jpg deleted file mode 100644 index bb899e91b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_single.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_single_com.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_single_com.jpg deleted file mode 100644 index 26c7ca310..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_female_single_com.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male.jpg deleted file mode 100644 index be7ef4a94..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_partnered.jpg deleted file mode 100644 index edf78c640..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_prev_partnered.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_prev_partnered.jpg deleted file mode 100644 index 3bd757041..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_prev_partnered.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_single.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_single.jpg deleted file mode 100644 index 86c3d063c..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_single.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_single_com.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_single_com.jpg deleted file mode 100644 index e64b4d1ca..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17_65_male_single_com.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_both.jpg deleted file mode 100644 index 6428e1daa..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_female.jpg deleted file mode 100644 index 5c473c135..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_male.jpg deleted file mode 100644 index c807a69f2..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_17plus_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_both.jpg deleted file mode 100644 index a2b8e605e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_female.jpg deleted file mode 100644 index d8b991501..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_male.jpg deleted file mode 100644 index 83c23569d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_activity_status_ts_all_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_60_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_60_female.jpg deleted file mode 100644 index 8401f5dd3..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_60_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_both.jpg deleted file mode 100644 index 5840be74d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_female.jpg deleted file mode 100644 index e0534bf90..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_male.jpg deleted file mode 100644 index d9acfa28e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17_65_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_both.jpg deleted file mode 100644 index 8bcf4a564..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_female.jpg deleted file mode 100644 index 7636f02fe..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_male.jpg deleted file mode 100644 index cc7c3a501..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_17plus_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_both.jpg deleted file mode 100644 index a307ff58e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_female.jpg deleted file mode 100644 index d25964016..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_male.jpg deleted file mode 100644 index e71e3038f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_employed_ts_all_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_students_ts_15_29_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_students_ts_15_29_both.jpg deleted file mode 100644 index 243cb2372..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_students_ts_15_29_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_students_ts_all_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_students_ts_all_both.jpg deleted file mode 100644 index 9bda794f5..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/economic_activity/validation_UK_students_ts_all_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_both.jpg deleted file mode 100644 index 9c6c423a9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_female.jpg deleted file mode 100644 index 33ee1eb91..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_male.jpg deleted file mode 100644 index 85b303cac..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_30_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_both.jpg deleted file mode 100644 index f70d1caec..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_female.jpg deleted file mode 100644 index c60e679ce..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_male.jpg deleted file mode 100644 index 06cf2a6f8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_17_65_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_66_70_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_66_70_both.jpg deleted file mode 100644 index 0643f08f8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_education_ts_66_70_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_leave_education_ts_17_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_leave_education_ts_17_65_both.jpg deleted file mode 100644 index 2c51513d6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/education/validation_UK_leave_education_ts_17_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_18_65_both.jpg deleted file mode 100644 index a9287b4a4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_18_65_gender.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_18_65_gender.jpg deleted file mode 100644 index f12b81f45..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_18_65_gender.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_all_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_all_female.jpg deleted file mode 100644 index 191801adc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_all_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_all_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_all_male.jpg deleted file mode 100644 index 07989b01a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_mcs_ts_all_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_18_65_both.jpg deleted file mode 100644 index 4c5d31b35..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_18_65_gender.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_18_65_gender.jpg deleted file mode 100644 index d8207dec8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_18_65_gender.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_all_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_all_female.jpg deleted file mode 100644 index b7a112f69..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_all_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_all_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_all_male.jpg deleted file mode 100644 index f98794e5e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_pcs_ts_all_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_hist_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_hist_18_65_both.jpg deleted file mode 100644 index e74960624..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_hist_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_hist_18_65_gender.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_hist_18_65_gender.jpg deleted file mode 100644 index 1cdd8dd78..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_hist_18_65_gender.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_18_65_both.jpg deleted file mode 100644 index ac8f8c755..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_18_65_gender.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_18_65_gender.jpg deleted file mode 100644 index ace931cb4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_18_65_gender.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_all_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_all_female.jpg deleted file mode 100644 index 34897d7aa..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_all_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_all_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_all_male.jpg deleted file mode 100644 index 923d56d38..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/health/validation_UK_sf1_ts_all_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011.png deleted file mode 100644 index da5877aad..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_female.png deleted file mode 100644 index 2fac97ffc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_male.png deleted file mode 100644 index bad275949..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_unrestricted.png deleted file mode 100644 index 57aa8de69..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2011_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012.png deleted file mode 100644 index af93136db..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_female.png deleted file mode 100644 index 68244aa9f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_male.png deleted file mode 100644 index e2c961796..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_unrestricted.png deleted file mode 100644 index 0b491f933..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2012_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013.png deleted file mode 100644 index 6a4bbc08c..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_female.png deleted file mode 100644 index 66ed31014..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_male.png deleted file mode 100644 index d99df582e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_unrestricted.png deleted file mode 100644 index f67b4c202..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2013_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014.png deleted file mode 100644 index f2916c72f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_female.png deleted file mode 100644 index c1e536118..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_male.png deleted file mode 100644 index 55d63eeb2..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_unrestricted.png deleted file mode 100644 index bffaea76f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2014_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015.png deleted file mode 100644 index 30a7e0d9b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_female.png deleted file mode 100644 index d4c8a1a3f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_male.png deleted file mode 100644 index 54bf05a77..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_unrestricted.png deleted file mode 100644 index 315e423ce..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2015_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016.png deleted file mode 100644 index 5c6c39f93..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_female.png deleted file mode 100644 index 8e9a11771..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_male.png deleted file mode 100644 index 1ce73ca59..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_unrestricted.png deleted file mode 100644 index 44e29b0ef..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2016_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017.png deleted file mode 100644 index 16cba516a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_female.png deleted file mode 100644 index ddb8751a1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_male.png deleted file mode 100644 index dbc6bd905..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_unrestricted.png deleted file mode 100644 index ab5505d8d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2017_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018.png deleted file mode 100644 index 6e52756a2..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_female.png deleted file mode 100644 index 883a8d02a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_male.png deleted file mode 100644 index c6e7064e0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_unrestricted.png deleted file mode 100644 index 8bf8978a3..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2018_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019.png deleted file mode 100644 index cdfdedb2b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_female.png deleted file mode 100644 index 25e5a920a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_male.png deleted file mode 100644 index df5e4ec57..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_unrestricted.png deleted file mode 100644 index 56985e6e7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2019_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020.png deleted file mode 100644 index 48460b9bd..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_female.png deleted file mode 100644 index e8848b7ac..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_male.png deleted file mode 100644 index 3fefd9447..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_unrestricted.png deleted file mode 100644 index 1bd0bd47c..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2020_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021.png deleted file mode 100644 index 0d0001a38..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_female.png deleted file mode 100644 index 10f46796a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_male.png deleted file mode 100644 index a3d24c23f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_unrestricted.png deleted file mode 100644 index 99dab5243..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2021_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022.png deleted file mode 100644 index 9a3c1fb37..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_female.png deleted file mode 100644 index 6f2730d5a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_male.png deleted file mode 100644 index fc1724e2c..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_unrestricted.png deleted file mode 100644 index 7f18a9f76..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2022_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023.png deleted file mode 100644 index 300626ccc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_female.png deleted file mode 100644 index e8ad77258..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_male.png deleted file mode 100644 index 85477a8bc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_unrestricted.png b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_unrestricted.png deleted file mode 100644 index 451a993aa..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_hist_2023_unrestricted.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_both.jpg deleted file mode 100644 index 8d789de14..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_female.jpg deleted file mode 100644 index 53bfa6f03..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_male.jpg deleted file mode 100644 index 4b98f7c00..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/hours_worked/validation_UK_hours_worked_ts_18_65_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/.DS_Store b/validation/02_simulated_output_validation/graphs/20250909_run/income/.DS_Store deleted file mode 100644 index 26fa95923..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/.DS_Store and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2011.png deleted file mode 100644 index 86ea0b2a4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2012.png deleted file mode 100644 index 5253b712a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2013.png deleted file mode 100644 index 5b33b89a3..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2014.png deleted file mode 100644 index 647f32366..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2015.png deleted file mode 100644 index ba20e2cc0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2016.png deleted file mode 100644 index 719f3cac1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2017.png deleted file mode 100644 index e5545960b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2018.png deleted file mode 100644 index b99b24c8d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2019.png deleted file mode 100644 index e56b89c92..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2020.png deleted file mode 100644 index 4d96aa5e1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2021.png deleted file mode 100644 index 0b60b23d8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2022.png deleted file mode 100644 index 1a3b2d75f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2023.png deleted file mode 100644 index 11ae1405a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_ts_18_65_both.jpg deleted file mode 100644 index 1df0321a8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_capital_income_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_no_capital_income_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_no_capital_income_ts_18_65_both.jpg deleted file mode 100644 index 3feff37ed..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_no_capital_income_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2011.png deleted file mode 100644 index c16662acc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2012.png deleted file mode 100644 index a1ec0fc30..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2013.png deleted file mode 100644 index a799f78a9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2014.png deleted file mode 100644 index bf90e03d1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2015.png deleted file mode 100644 index 11a63d425..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2016.png deleted file mode 100644 index c0ff37581..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2017.png deleted file mode 100644 index 0340505b4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2018.png deleted file mode 100644 index 43626b251..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2019.png deleted file mode 100644 index 9d5507c7e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2020.png deleted file mode 100644 index 336f7d586..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2021.png deleted file mode 100644 index b3bb2dc0a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2022.png deleted file mode 100644 index 335a4b9b6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2023.png deleted file mode 100644 index 508f70e4b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/capital_income/validation_UK_positive_capital_income_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2011.png deleted file mode 100644 index 05b475fd6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2012.png deleted file mode 100644 index 3ff242cc7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2013.png deleted file mode 100644 index 8b4e17f00..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2014.png deleted file mode 100644 index ef5512934..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2015.png deleted file mode 100644 index f6140d54e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2016.png deleted file mode 100644 index 3fef0ed39..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2017.png deleted file mode 100644 index 4a1f77ee6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2018.png deleted file mode 100644 index 6136ad30d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2019.png deleted file mode 100644 index f6ec3659e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2020.png deleted file mode 100644 index f99c43980..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2021.png deleted file mode 100644 index 0a08febc6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2022.png deleted file mode 100644 index ae5815559..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2023.png deleted file mode 100644 index 3a21d2e3d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_ts_18_65_both.jpg deleted file mode 100644 index af80a04a9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/disposable_income/validation_UK_disposable_income_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2011.png deleted file mode 100644 index 1cdc32a3d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2012.png deleted file mode 100644 index 9ade7b622..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2013.png deleted file mode 100644 index 24b9f6b2f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2014.png deleted file mode 100644 index f8396c6b1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2015.png deleted file mode 100644 index 469943c1a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2016.png deleted file mode 100644 index 13d5e7599..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2017.png deleted file mode 100644 index a45214457..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2018.png deleted file mode 100644 index 37759804f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2019.png deleted file mode 100644 index bbb948681..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2020.png deleted file mode 100644 index bd5ba89d6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2021.png deleted file mode 100644 index f619169db..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2022.png deleted file mode 100644 index d71bce8da..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2023.png deleted file mode 100644 index 5d717236c..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_inc_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_income_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_income_ts_18_65_both.jpg deleted file mode 100644 index 386813e9a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/equivalised_disposable_income/validation_UK_equivalised_disposable_income_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2011.png deleted file mode 100644 index bc6d47ece..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2012.png deleted file mode 100644 index 469b3776b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2013.png deleted file mode 100644 index e5d6c26d4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2014.png deleted file mode 100644 index 7e66d0402..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2015.png deleted file mode 100644 index 2908a0063..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2016.png deleted file mode 100644 index 8035a313d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2017.png deleted file mode 100644 index 9f23d4aba..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2018.png deleted file mode 100644 index ebeb760d2..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2019.png deleted file mode 100644 index cd59671e4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2020.png deleted file mode 100644 index 6f7d32651..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2021.png deleted file mode 100644 index 5514c8ef7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2022.png deleted file mode 100644 index 131d1e966..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2023.png deleted file mode 100644 index b4f5e34fb..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_ts_18_65_both.jpg deleted file mode 100644 index 551f70779..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_gross_income_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2011_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2011_female.png deleted file mode 100644 index 9e32bf688..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2011_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2011_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2011_male.png deleted file mode 100644 index eaf8cc67e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2011_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2012_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2012_female.png deleted file mode 100644 index e3597a775..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2012_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2012_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2012_male.png deleted file mode 100644 index b220351d0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2012_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2013_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2013_female.png deleted file mode 100644 index 1c3aed1e3..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2013_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2013_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2013_male.png deleted file mode 100644 index f8204a474..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2013_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2014_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2014_female.png deleted file mode 100644 index 8d30f5c60..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2014_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2014_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2014_male.png deleted file mode 100644 index 88e63003f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2014_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2015_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2015_female.png deleted file mode 100644 index 4c801b7a9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2015_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2015_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2015_male.png deleted file mode 100644 index 7f529fe54..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2015_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2016_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2016_female.png deleted file mode 100644 index 0ac68fb40..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2016_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2016_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2016_male.png deleted file mode 100644 index ed3e36293..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2016_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2017_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2017_female.png deleted file mode 100644 index a9c30eda6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2017_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2017_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2017_male.png deleted file mode 100644 index a865e5a63..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2017_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2018_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2018_female.png deleted file mode 100644 index 17aea8079..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2018_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2018_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2018_male.png deleted file mode 100644 index fbb81fbae..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2018_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2019_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2019_female.png deleted file mode 100644 index fb04bfaee..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2019_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2019_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2019_male.png deleted file mode 100644 index f0b709da8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2019_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2020_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2020_female.png deleted file mode 100644 index 3ec85ef67..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2020_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2020_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2020_male.png deleted file mode 100644 index a8879ca4f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2020_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2021_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2021_female.png deleted file mode 100644 index 54513446e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2021_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2021_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2021_male.png deleted file mode 100644 index 7d026d6bf..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2021_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2022_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2022_female.png deleted file mode 100644 index 217b8dca7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2022_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2022_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2022_male.png deleted file mode 100644 index dbdbbfc3e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2022_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2023_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2023_female.png deleted file mode 100644 index bd5b6c6c9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2023_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2023_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2023_male.png deleted file mode 100644 index 13bc6fc9e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_dist_2023_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_both.jpg deleted file mode 100644 index b968e1894..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_female.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_female.jpg deleted file mode 100644 index 2705dd38b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_female.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_male.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_male.jpg deleted file mode 100644 index bfe5ea7c5..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_income/validation_UK_ind_gross_income_ts_18_65_male.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2011_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2011_both.png deleted file mode 100644 index 90d1aaa1a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2011_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2012_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2012_both.png deleted file mode 100644 index 6704d1835..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2012_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2013_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2013_both.png deleted file mode 100644 index 269e6af67..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2013_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2014_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2014_both.png deleted file mode 100644 index 953151628..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2014_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2015_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2015_both.png deleted file mode 100644 index 32b5b2b07..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2015_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2016_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2016_both.png deleted file mode 100644 index 40dd910a2..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2016_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2017_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2017_both.png deleted file mode 100644 index bb5608bce..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2017_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2018_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2018_both.png deleted file mode 100644 index cb8745b63..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2018_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2019_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2019_both.png deleted file mode 100644 index 029d47eb1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2019_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2020_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2020_both.png deleted file mode 100644 index 5facf0b1d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2020_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2021_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2021_both.png deleted file mode 100644 index 572242697..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2021_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2022_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2022_both.png deleted file mode 100644 index 282ebe641..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2022_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2023_both.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2023_both.png deleted file mode 100644 index e5304059a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_dist_2023_both.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_ts_18_65.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_ts_18_65.jpg deleted file mode 100644 index d801c902a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/gross_labour_income/validation_UK_gross_labour_income_ts_18_65.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_no_pension_income_ts_65plus_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_no_pension_income_ts_65plus_both.jpg deleted file mode 100644 index 5694eb7be..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_no_pension_income_ts_65plus_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2011.png deleted file mode 100644 index f5e313103..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2012.png deleted file mode 100644 index 377456b5f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2013.png deleted file mode 100644 index b93252683..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2014.png deleted file mode 100644 index 5ef13183b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2015.png deleted file mode 100644 index e54e90b82..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2016.png deleted file mode 100644 index 6787d357e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2017.png deleted file mode 100644 index 5f9944eb0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2018.png deleted file mode 100644 index e1c82f5e7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2019.png deleted file mode 100644 index 3677b2068..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2020.png deleted file mode 100644 index 425eaae5e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2021.png deleted file mode 100644 index 784753d87..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2022.png deleted file mode 100644 index f93545470..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2023.png deleted file mode 100644 index e1cd84a5e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_ts_65plus_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_ts_65plus_both.jpg deleted file mode 100644 index 434261300..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_pension_income_ts_65plus_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2011.png deleted file mode 100644 index 9d9543199..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2012.png deleted file mode 100644 index a3825e382..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2013.png deleted file mode 100644 index b9de8fd3d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2014.png deleted file mode 100644 index 3a1a46fa7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2015.png deleted file mode 100644 index b714e835d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2016.png deleted file mode 100644 index d44c06a1a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2017.png deleted file mode 100644 index 1d76df0bb..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2018.png deleted file mode 100644 index 425588f17..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2019.png deleted file mode 100644 index 57bb4f5f9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2020.png deleted file mode 100644 index 37af93e8e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2021.png deleted file mode 100644 index 71aa76368..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2022.png deleted file mode 100644 index 6d0a4e5f8..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2023.png deleted file mode 100644 index 264b24257..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/income/pension_income/validation_UK_positive_pension_income_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_9010.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_9010.jpg deleted file mode 100644 index 8351ee0f6..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_9010.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_gini.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_gini.jpg deleted file mode 100644 index ef9f7d500..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_gini.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_p90p10.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_p90p10.jpg deleted file mode 100644 index 0158ab5df..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_p90p10.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_p90p50.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_p90p50.jpg deleted file mode 100644 index 75b58ac7b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/inequality/validation_UK_p90p50.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnered_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnered_ts_18_65_both.jpg deleted file mode 100644 index 19796e246..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnered_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_children_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_children_ts_18_65_both.jpg deleted file mode 100644 index 19afed283..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_children_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_18_65_both.jpg deleted file mode 100644 index cdd910889..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_20_29_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_20_29_both.jpg deleted file mode 100644 index 71f51cad0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_20_29_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_30_39_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_30_39_both.jpg deleted file mode 100644 index b65dff72e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_30_39_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_40_59_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_40_59_both.jpg deleted file mode 100644 index 9c05647c0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/partnership/validation_UK_partnership_ts_40_59_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/poverty/validation_UK_at_risk_of_poverty_18_65.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/poverty/validation_UK_at_risk_of_poverty_18_65.jpg deleted file mode 100644 index 704f7c5ec..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/poverty/validation_UK_at_risk_of_poverty_18_65.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/poverty/validation_UK_at_risk_of_poverty_18plus.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/poverty/validation_UK_at_risk_of_poverty_18plus.jpg deleted file mode 100644 index 1a8b2e3b9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/poverty/validation_UK_at_risk_of_poverty_18plus.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011.png deleted file mode 100644 index 7d9ff0edd..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011_female.png deleted file mode 100644 index 513877364..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011_male.png deleted file mode 100644 index 44d10778d..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2011_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012.png deleted file mode 100644 index 86216b63e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012_female.png deleted file mode 100644 index dec64942a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012_male.png deleted file mode 100644 index 976a8d338..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2012_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013.png deleted file mode 100644 index eee332a48..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013_female.png deleted file mode 100644 index 28cc52931..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013_male.png deleted file mode 100644 index e63276b22..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2013_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014.png deleted file mode 100644 index 028d08dba..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014_female.png deleted file mode 100644 index d685e2aed..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014_male.png deleted file mode 100644 index bf1a6cb21..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2014_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015.png deleted file mode 100644 index 1b1e8860e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015_female.png deleted file mode 100644 index 7d1f2e88f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015_male.png deleted file mode 100644 index 721117468..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2015_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016.png deleted file mode 100644 index 5c82243e0..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016_female.png deleted file mode 100644 index 7cd2ef4cc..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016_male.png deleted file mode 100644 index c737ca8b5..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2016_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017.png deleted file mode 100644 index bea5de786..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017_female.png deleted file mode 100644 index f8dbdf1fe..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017_male.png deleted file mode 100644 index 646b490a7..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2017_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018.png deleted file mode 100644 index afdef0dfb..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018_female.png deleted file mode 100644 index e3b460b17..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018_male.png deleted file mode 100644 index 8e6eb7da9..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2018_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019.png deleted file mode 100644 index 6e96ee515..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019_female.png deleted file mode 100644 index ef5b1ca6f..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019_male.png deleted file mode 100644 index 37e9516c4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2019_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020.png deleted file mode 100644 index 306fe7dae..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020_female.png deleted file mode 100644 index 072e2a7b3..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020_male.png deleted file mode 100644 index 1e40a6b7b..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2020_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021.png deleted file mode 100644 index f60db9e08..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021_female.png deleted file mode 100644 index 47a0a73ba..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021_male.png deleted file mode 100644 index 833d9f34e..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2021_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022.png deleted file mode 100644 index 126c9b849..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022_female.png deleted file mode 100644 index f30ebc8e2..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022_male.png deleted file mode 100644 index f9f143592..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2022_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023.png deleted file mode 100644 index a9dd6c3c4..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023_female.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023_female.png deleted file mode 100644 index 36aea78a1..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023_female.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023_male.png b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023_male.png deleted file mode 100644 index ab9a84884..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_dist_2023_male.png and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_ts_18_65_both.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_ts_18_65_both.jpg deleted file mode 100644 index 2f6db112a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_ts_18_65_both.jpg and /dev/null differ diff --git a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_ts_18_65_gender.jpg b/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_ts_18_65_gender.jpg deleted file mode 100644 index 1de20036a..000000000 Binary files a/validation/02_simulated_output_validation/graphs/20250909_run/wages/validation_UK_wages_ts_18_65_gender.jpg and /dev/null differ