Set up environment

#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))

#####STEP 0-2: Install packages #####
list.of.packages <- c("grf", "metafor", "splitstackshape", "dplyr", 
                     "tidyverse", "foreach", "cowplot", "reshape2", 
                     "doParallel", "survival", "readstata13", "ggplot2", 
                     "rsample", "DiagrammeR", "e1071", "pscl", "pROC", 
                     "caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
                     "lmtest", "sandwich", "haven", "rpms", "randomForest",
                     "fabricatr", "gridExtra", "VIM", "mice",
                     "lmtest", "ivreg", "kableExtra")
new.packages <- list.of.packages[!(list.of.packages %in% 
                                 installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(list.of.packages, library, character.only = TRUE)
## Warning: package 'grf' was built under R version 4.3.3
## Warning: package 'metafor' was built under R version 4.3.3
## Warning: package 'metadat' was built under R version 4.3.3
## Warning: package 'numDeriv' was built under R version 4.3.1
## Warning: package 'splitstackshape' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'foreach' was built under R version 4.3.3
## Warning: package 'cowplot' was built under R version 4.3.3
## Warning: package 'reshape2' was built under R version 4.3.3
## Warning: package 'doParallel' was built under R version 4.3.3
## Warning: package 'iterators' was built under R version 4.3.3
## Warning: package 'readstata13' was built under R version 4.3.3
## Warning: package 'rsample' was built under R version 4.3.3
## Warning: package 'DiagrammeR' was built under R version 4.3.3
## Warning: package 'e1071' was built under R version 4.3.3
## Warning: package 'pscl' was built under R version 4.3.3
## Warning: package 'pROC' was built under R version 4.3.3
## Warning: package 'caret' was built under R version 4.3.3
## Warning: package 'ModelMetrics' was built under R version 4.3.3
## Warning: package 'MatchIt' was built under R version 4.3.3
## Warning: package 'Hmisc' was built under R version 4.3.3
## Warning: package 'scales' was built under R version 4.3.3
## Warning: package 'lmtest' was built under R version 4.3.3
## Warning: package 'zoo' was built under R version 4.3.3
## Warning: package 'sandwich' was built under R version 4.3.3
## Warning: package 'haven' was built under R version 4.3.3
## Warning: package 'rpms' was built under R version 4.3.3
## Warning: package 'randomForest' was built under R version 4.3.3
## Warning: package 'fabricatr' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
## Warning: package 'VIM' was built under R version 4.3.3
## Warning: package 'colorspace' was built under R version 4.3.3
## Warning: package 'mice' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch: 
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
## Warning: package 'ivreg' was built under R version 4.3.3
## Warning: package 'kableExtra' was built under R version 4.3.3
print(paste("Version of grf package:", packageVersion("grf")))

#####STEP 0-3: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)

Set non-run-specific parameters

#####STEP 0-4: Set non-run-specific parameters #####
seedset <- 777
numthreadsset <- min(6, parallel::detectCores()) 
if (numthreadsset!= 6) {
  print("the results of grf vary by num.thread (publication paper used num.thread=6)")
} 

cat("number of threads (affects grf results):", numthreadsset,"\n")
## number of threads (affects grf results): 6
# Set printing options
options(digits = 4)

Set run-specific parameters

#####STEP 0-5: Set run-specific parameters #####
published_paper_run <- 0 # ANALYST FORM
if (published_paper_run == 1) {
  print("save intermediate files into Cleaned_input_data/As_published/empirical/ folders")
  warning("Changing this setting to 1 overwrites the input files required for 
          replicating on different platforms.")
} else {
  print("save intermediate files into Cleaned_input_data/Testing/empirical/ folders")
}
## [1] "save intermediate files into Cleaned_input_data/Testing/empirical/ folders"
nfolds <- 10 # Set number of folds

cw_run_name <- "med" # ANALYST FORM
cw_grid <- 5 # ANALYST FORM

# Set cW name based on cW_run_name and cW_grid
cw_name <- paste0("cw_", cw_run_name, "_", cw_grid)

# List of outcomes and their maximum C_w values
outcomes_list <- list( # ANALYST FORM
  list("sbp_neg", -117.00),
  list("hdl_level_neg", -46.08),
  list("debt_neg", 0.46),
  list("ohp_all_ever_inperson", 0), 
  list("sim_sbp_neg_alpha_5_presentation", -119.00),
  list("sim_hdl_level_neg_alpha_5_presentation", -46.08),
  list("sim_debt_neg_alpha_5_presentation", 0.43)
)

# List of outcomes that are continuous
continuous_outcomes <- c("sbp_neg",
                        "hdl_level_neg",
                        "sim_sbp_neg_alpha_5_presentation",
                        "sim_hdl_level_neg_alpha_5_presentation")

# List of outcomes that are binary
binary_outcomes <- c("ohp_all_ever_inperson",
                    "debt_neg",
                    "sim_debt_neg_alpha_5_presentation")

print("Outcomes and their maximum C_w values:")
## [1] "Outcomes and their maximum C_w values:"
for(outcome in outcomes_list) {
  print(paste("Outcome:", outcome[[1]], "Max C_w:", outcome[[2]]))
}
## [1] "Outcome: sbp_neg Max C_w: -117"
## [1] "Outcome: hdl_level_neg Max C_w: -46.08"
## [1] "Outcome: debt_neg Max C_w: 0.46"
## [1] "Outcome: ohp_all_ever_inperson Max C_w: 0"
## [1] "Outcome: sim_sbp_neg_alpha_5_presentation Max C_w: -119"
## [1] "Outcome: sim_hdl_level_neg_alpha_5_presentation Max C_w: -46.08"
## [1] "Outcome: sim_debt_neg_alpha_5_presentation Max C_w: 0.43"
print(paste("Number of grid points:", cw_grid))
## [1] "Number of grid points: 5"
print(paste("Number of folds:", nfolds))
## [1] "Number of folds: 10"

Set file paths

#####STEP 0-6: Set file paths #####
# Set the processed path based on published_paper_run
# ***Important: for file paths to work, use the drop-down menu on "Knit" to select the "Current Working Directory" under "Knit Directory." This assumes that you have cloned the repo to your local directory.***
processedpath <- if (published_paper_run == 1) {
    "PP_Full_Analysis/Cleaned_input_data/As_published/"
  } else {
    "PP_Full_Analysis/Cleaned_input_data/Testing/"
  }

# Set the results directory as cw_name subdirectory
result_dir <- paste0(processedpath, "empirical/", cw_name, "/")

# If there is not a folder with the name result_dir, create it
if (!dir.exists(result_dir)) {
  dir.create(result_dir, recursive = TRUE)
}

print(paste0("Processed data path:", processedpath, "empirical/"))
## [1] "Processed data path:PP_Full_Analysis/Cleaned_input_data/Testing/empirical/"
print(paste("Results directory:", result_dir))
## [1] "Results directory: PP_Full_Analysis/Cleaned_input_data/Testing/empirical/cw_med_5/"

Load data

Read in raw and simulated data, then combine them.

#####STEP 3-1: Read data #####
d_empirical <- read.csv(paste0(processedpath, "empirical/2_Imputed_Wide_Dataset.csv"))

print("Information on empirical dataset:")
## [1] "Information on empirical dataset:"
glimpse(d_empirical)
## Rows: 12,208
## Columns: 70
## $ numhh_list            <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender_inp            <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp               <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ hispanic_inp          <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp        <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp        <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp      <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ast_dx_pre_lottery    <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ charg_tot_pre_ed      <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, 0.0, 0.0, 5743.9,…
## $ ed_charg_tot_pre_ed   <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, 0.0, 0.0, 4542.4,…
## $ num_visit_pre_cens_ed <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ any_depres_pre_ed     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ lessHS                <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED               <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ X                     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id             <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp      <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ diabetes              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension          <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol       <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression            <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq                   <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk              <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any               <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any                <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend             <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt                  <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow                <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ a1c                   <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level             <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level             <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi                   <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp                   <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp                   <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any     <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions         <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id          <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility           <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num               <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num                <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ charg_tot_ed          <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_ed       <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ sbp_neg               <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg               <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg         <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg         <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg               <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg               <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg               <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg          <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg              <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg            <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
#####STEP 3-6: Read simulated data #####
d_simulated <- read.csv(paste0(processedpath, "simulated/2b_Simulated_Outcomes.csv"))

print("Information on simulated dataset:")
## [1] "Information on simulated dataset:"
glimpse(d_simulated)
## Rows: 12,208
## Columns: 8
## $ person_id                              <int> 5, 8, 16, 17, 18, 23, 24, 29, 4…
## $ sim_sbp                                <dbl> 248.979, 160.146, 251.084, 160.…
## $ sim_debt_neg_alpha_5                   <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5                    <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5              <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_debt_neg_alpha_5_presentation      <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5_presentation       <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5_presentation <dbl> -48.33, -51.33, -5.64, -51.33, …
#####STEP 3-7: Combine empirical and simulated data #####
# Combine empirical and simulated data based on person_id
d <- merge(d_empirical, d_simulated, by = "person_id", all = TRUE)

print("Information on combined dataset:")
## [1] "Information on combined dataset:"
glimpse(d)
## Rows: 12,208
## Columns: 77
## $ person_id                              <int> 5, 8, 16, 17, 18, 23, 24, 29, 4…
## $ numhh_list                             <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1…
## $ gender_inp                             <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1…
## $ age_inp                                <int> 60, 41, 39, 52, 51, 32, 34, 23,…
## $ hispanic_inp                           <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0…
## $ race_white_inp                         <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1…
## $ race_black_inp                         <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0…
## $ race_nwother_inp                       <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ ast_dx_pre_lottery                     <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0…
## $ dia_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hbp_dx_pre_lottery                     <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1…
## $ chl_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ami_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ chf_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ emp_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ kid_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cancer_dx_pre_lottery                  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ dep_dx_pre_lottery                     <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1…
## $ charg_tot_pre_ed                       <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, …
## $ ed_charg_tot_pre_ed                    <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, …
## $ num_visit_pre_cens_ed                  <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 0, 0…
## $ any_depres_pre_ed                      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lessHS                                 <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0…
## $ HSorGED                                <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0…
## $ X                                      <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, …
## $ weight_total_inp                       <dbl> 1.1504, 0.8975, 1.0000, 1.2126,…
## $ diabetes                               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hypertension                           <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ highcholesterol                        <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression                             <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ phq                                    <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8…
## $ cvd_risk                               <dbl> 0.1370, 0.1120, 0.0330, 0.2530,…
## $ doc_any                                <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1…
## $ ed_any                                 <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0…
## $ hosp_any                               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ oop_spend                              <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1…
## $ catastrophic                           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ debt                                   <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1…
## $ borrow                                 <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1…
## $ a1c                                    <dbl> 5.037, 5.201, 5.854, 5.364, 5.5…
## $ hdl_level                              <dbl> 48.33, 51.33, 38.58, 51.33, 28.…
## $ chl_level                              <dbl> 241.0, 229.9, 229.9, 235.4, 177…
## $ bmi                                    <dbl> 26.66, 35.23, 37.12, 24.81, 27.…
## $ sbp                                    <int> 144, 134, 126, 168, 119, 98, 10…
## $ dbp                                    <int> 81, 82, 94, 110, 79, 59, 63, 76…
## $ prescriptions_any                      <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1…
## $ prescriptions                          <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, …
## $ hypertension_med                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ cholesterol_med                        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ diabetes_med                           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression_med                         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ household_id                           <int> 100005, 102094, 140688, 100017,…
## $ eligibility                            <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0…
## $ ohp_all_ever_inperson                  <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
## $ doc_num                                <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12,…
## $ ed_num                                 <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, …
## $ hosp_num                               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ charg_tot_ed                           <dbl> 0, 2751, 15233, NA, 0, 0, 0, 84…
## $ ed_charg_tot_ed                        <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0…
## $ sbp_neg                                <int> -144, -134, -126, -168, -119, -…
## $ dbp_neg                                <int> -81, -82, -94, -110, -79, -59, …
## $ chl_level_neg                          <dbl> -241.0, -229.9, -229.9, -235.4,…
## $ hdl_level_neg                          <dbl> -48.33, -51.33, -38.58, -51.33,…
## $ a1c_neg                                <dbl> -5.037, -5.201, -5.854, -5.364,…
## $ bmi_neg                                <dbl> -26.66, -35.23, -37.12, -24.81,…
## $ phq_neg                                <int> -1, -9, -2, -13, -2, -3, -2, -1…
## $ cvd_risk_neg                           <dbl> -0.1370, -0.1120, -0.0330, -0.2…
## $ debt_neg                               <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ borrow_neg                             <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0…
## $ catastrophic_neg                       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ sim_sbp                                <dbl> 248.979, 160.146, 251.084, 160.…
## $ sim_debt_neg_alpha_5                   <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5                    <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5              <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_debt_neg_alpha_5_presentation      <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5_presentation       <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5_presentation <dbl> -48.33, -51.33, -5.64, -51.33, …

Create folds

#####STEP 3-2: Create and assign folds #####
set.seed(seedset)  # Set seed for reproducibility
d <- d %>%
  mutate(fold = sample(1:nfolds, n(), replace = TRUE)) # Assign folds to each row

print("Information on folds:")
## [1] "Information on folds:"
table(d$fold)
## 
##    1    2    3    4    5    6    7    8    9   10 
## 1303 1253 1179 1242 1175 1235 1214 1218 1201 1188
fold_summaries <- d %>%
  group_by(fold) %>%
  summarise(
    across(where(is.numeric), ~ mean(., na.rm = TRUE), .names = "avg_{.col}"),
    across(where(is.factor), ~ mean(as.numeric(.), na.rm = TRUE), 
           .names = "prop_{.col}")
  )

# Create a kable table
kable(fold_summaries, format = "markdown")
fold avg_person_id avg_numhh_list avg_gender_inp avg_age_inp avg_hispanic_inp avg_race_white_inp avg_race_black_inp avg_race_nwother_inp avg_ast_dx_pre_lottery avg_dia_dx_pre_lottery avg_hbp_dx_pre_lottery avg_chl_dx_pre_lottery avg_ami_dx_pre_lottery avg_chf_dx_pre_lottery avg_emp_dx_pre_lottery avg_kid_dx_pre_lottery avg_cancer_dx_pre_lottery avg_dep_dx_pre_lottery avg_charg_tot_pre_ed avg_ed_charg_tot_pre_ed avg_num_visit_pre_cens_ed avg_any_depres_pre_ed avg_lessHS avg_HSorGED avg_X avg_weight_total_inp avg_diabetes avg_hypertension avg_highcholesterol avg_depression avg_phq avg_cvd_risk avg_doc_any avg_ed_any avg_hosp_any avg_oop_spend avg_catastrophic avg_debt avg_borrow avg_a1c avg_hdl_level avg_chl_level avg_bmi avg_sbp avg_dbp avg_prescriptions_any avg_prescriptions avg_hypertension_med avg_cholesterol_med avg_diabetes_med avg_depression_med avg_household_id avg_eligibility avg_ohp_all_ever_inperson avg_doc_num avg_ed_num avg_hosp_num avg_charg_tot_ed avg_ed_charg_tot_ed avg_sbp_neg avg_dbp_neg avg_chl_level_neg avg_hdl_level_neg avg_a1c_neg avg_bmi_neg avg_phq_neg avg_cvd_risk_neg avg_debt_neg avg_borrow_neg avg_catastrophic_neg avg_sim_sbp avg_sim_debt_neg_alpha_5 avg_sim_sbp_neg_alpha_5 avg_sim_hdl_level_neg_alpha_5 avg_sim_debt_neg_alpha_5_presentation avg_sim_sbp_neg_alpha_5_presentation avg_sim_hdl_level_neg_alpha_5_presentation
1 37499 1.236 0.5664 40.63 0.1896 0.6884 0.0990 0.1589 0.2049 0.0714 0.1742 0.1282 0.0200 0.0061 0.0207 0.0169 0.0345 0.3415 2452 999.2 0.8074 0.0123 0.2218 0.4474 6104 1.244 0.0224 0.0596 0.0666 0.0496 6.584 0.0815 0.6567 0.4040 0.1413 0.5862 0.0437 0.5573 0.2235 5.348 48.20 205.7 30.03 119.4 75.95 0.5411 1.903 0.1466 0.0936 0.0760 0.1727 140365 0.5280 0.3239 5.706 1.0146 0.2121 3284 1365 -119.4 -75.95 -205.7 -48.20 -5.348 -30.03 -6.584 -0.0815 0.4427 0.7765 0.9563 197.7 0.4203 -121.5 -49.89 0.4203 -121.5 -49.89
2 36399 1.257 0.5786 40.61 0.1828 0.6784 0.1030 0.1357 0.2003 0.0670 0.1931 0.1277 0.0200 0.0112 0.0247 0.0231 0.0503 0.3272 2419 946.2 0.8244 0.0223 0.2099 0.4653 5924 1.258 0.0184 0.0565 0.0637 0.0525 6.709 0.0790 0.6896 0.4032 0.1288 0.5720 0.0544 0.5494 0.2187 5.314 48.35 206.2 29.74 118.9 75.38 0.5587 1.829 0.1325 0.0838 0.0607 0.1668 139845 0.5164 0.2889 6.410 0.9888 0.2037 3628 1725 -118.9 -75.38 -206.2 -48.35 -5.314 -29.74 -6.709 -0.0790 0.4506 0.7813 0.9456 201.2 0.4145 -122.2 -50.95 0.4145 -122.2 -50.95
3 38011 1.229 0.5810 40.78 0.1824 0.7048 0.0958 0.1187 0.1883 0.0729 0.1891 0.1374 0.0170 0.0076 0.0246 0.0153 0.0382 0.3427 1906 885.0 0.7498 0.0187 0.2078 0.4521 6189 1.246 0.0136 0.0601 0.0643 0.0478 6.925 0.0808 0.6689 0.4049 0.1197 0.5742 0.0534 0.5380 0.2351 5.340 46.79 204.5 30.20 118.8 75.58 0.5496 1.927 0.1510 0.0916 0.0780 0.1705 141094 0.5344 0.3028 5.522 0.9770 0.1811 3211 1454 -118.8 -75.58 -204.5 -46.79 -5.340 -30.20 -6.925 -0.0808 0.4620 0.7649 0.9466 198.5 0.4526 -122.1 -49.40 0.4526 -122.1 -49.40
4 37765 1.244 0.5564 40.95 0.1683 0.6997 0.1023 0.1441 0.1916 0.0676 0.1667 0.1232 0.0225 0.0137 0.0274 0.0217 0.0467 0.3341 2192 854.2 0.8172 0.0185 0.1981 0.4557 6148 1.225 0.0121 0.0576 0.0664 0.0658 6.726 0.0825 0.6820 0.4153 0.1435 0.5606 0.0575 0.5720 0.2119 5.314 47.56 205.2 29.72 119.1 75.78 0.5564 1.801 0.1272 0.0853 0.0652 0.1538 140672 0.5201 0.3132 6.075 1.0057 0.2138 3448 1342 -119.1 -75.78 -205.2 -47.56 -5.314 -29.72 -6.726 -0.0825 0.4280 0.7881 0.9425 199.8 0.4052 -122.5 -50.26 0.4052 -122.5 -50.26
5 38389 1.260 0.5532 41.44 0.1745 0.6706 0.1157 0.1481 0.2000 0.0706 0.1906 0.1226 0.0196 0.0111 0.0170 0.0170 0.0443 0.3421 2011 838.6 0.7532 0.0128 0.2034 0.4315 6250 1.236 0.0137 0.0481 0.0654 0.0551 6.893 0.0841 0.6661 0.3986 0.1261 0.5656 0.0434 0.5437 0.2383 5.335 48.68 207.4 29.74 120.1 76.34 0.5307 1.897 0.1353 0.0860 0.0723 0.1855 141058 0.5319 0.3294 5.841 0.9343 0.2022 2618 1175 -120.1 -76.34 -207.4 -48.68 -5.335 -29.74 -6.893 -0.0841 0.4563 0.7617 0.9566 200.0 0.4161 -123.6 -51.41 0.4161 -123.6 -51.41
6 37575 1.224 0.5846 40.58 0.1725 0.6802 0.1117 0.1555 0.1911 0.0607 0.1749 0.1174 0.0178 0.0089 0.0259 0.0121 0.0445 0.3441 1812 817.0 0.7150 0.0154 0.2146 0.4348 6117 1.257 0.0163 0.0781 0.0701 0.0591 7.027 0.0767 0.6778 0.3994 0.1308 0.5857 0.0463 0.5545 0.2344 5.324 48.00 206.8 30.03 118.5 75.58 0.5652 2.049 0.1466 0.0955 0.0640 0.1919 140096 0.5158 0.2939 6.192 1.0342 0.2060 3877 1399 -118.5 -75.58 -206.8 -48.00 -5.324 -30.03 -7.027 -0.0767 0.4455 0.7656 0.9537 198.0 0.4128 -121.2 -50.09 0.4128 -121.2 -50.09
7 37796 1.246 0.5535 41.04 0.1730 0.6829 0.1063 0.1524 0.1730 0.0758 0.1977 0.1310 0.0231 0.0107 0.0214 0.0198 0.0404 0.3386 2057 824.1 0.7100 0.0107 0.2158 0.4613 6153 1.227 0.0132 0.0505 0.0578 0.0399 6.970 0.0848 0.6636 0.3977 0.1245 0.5677 0.0412 0.5375 0.2244 5.349 47.23 204.0 29.81 119.6 76.00 0.5544 1.900 0.1474 0.0980 0.0725 0.1779 140679 0.4942 0.3097 5.362 0.9163 0.1800 2800 1339 -119.6 -76.00 -204.0 -47.23 -5.349 -29.81 -6.970 -0.0848 0.4625 0.7756 0.9588 200.1 0.4367 -122.3 -49.42 0.4367 -122.3 -49.42
8 37003 1.250 0.5525 40.47 0.1823 0.6929 0.0878 0.1420 0.1929 0.0788 0.1650 0.1174 0.0181 0.0123 0.0181 0.0148 0.0411 0.3374 2151 932.7 0.8112 0.0131 0.1864 0.4598 6023 1.231 0.0157 0.0624 0.0637 0.0522 6.944 0.0779 0.6604 0.4145 0.1472 0.5442 0.0397 0.5444 0.2204 5.345 47.79 205.5 29.78 118.4 75.49 0.5484 1.917 0.1388 0.0846 0.0747 0.1897 140584 0.5230 0.3136 6.401 1.0635 0.2287 3637 1616 -118.4 -75.49 -205.5 -47.79 -5.345 -29.78 -6.944 -0.0779 0.4556 0.7796 0.9603 200.3 0.4299 -120.9 -49.76 0.4299 -120.9 -49.76
9 37606 1.251 0.5737 40.99 0.1907 0.6719 0.1099 0.1507 0.1990 0.0691 0.2015 0.1324 0.0150 0.0133 0.0241 0.0225 0.0425 0.3372 1964 871.7 0.8060 0.0125 0.1948 0.4721 6122 1.236 0.0226 0.0553 0.0703 0.0472 6.670 0.0804 0.6542 0.4242 0.1125 0.5617 0.0389 0.5386 0.2132 5.348 47.65 205.9 29.86 119.8 76.40 0.5517 1.892 0.1357 0.0849 0.0683 0.1624 140770 0.5287 0.3106 5.477 1.0159 0.1548 3423 1481 -119.8 -76.40 -205.9 -47.65 -5.348 -29.86 -6.670 -0.0804 0.4614 0.7868 0.9611 198.3 0.4371 -122.2 -49.57 0.4371 -122.2 -49.57
10 37022 1.252 0.5556 40.53 0.1827 0.7003 0.1002 0.1322 0.1911 0.0774 0.1667 0.1296 0.0244 0.0160 0.0160 0.0227 0.0455 0.3628 2360 930.0 0.8359 0.0168 0.1961 0.4621 6027 1.242 0.0160 0.0538 0.0589 0.0454 6.753 0.0805 0.6765 0.3997 0.1289 0.5747 0.0393 0.5414 0.2291 5.327 46.69 203.2 29.73 118.4 75.16 0.5463 1.931 0.1380 0.0901 0.0774 0.1810 139994 0.5286 0.3241 6.438 0.9949 0.1755 3221 1430 -118.4 -75.16 -203.2 -46.69 -5.327 -29.73 -6.753 -0.0805 0.4586 0.7709 0.9607 199.1 0.4381 -121.7 -49.33 0.4381 -121.7 -49.33

Create charges variables

#####STEP 3-3: Create charges variables #####
# Print summary statistics and number of zeros of charge variables
print("Summary statistics of charg_tot_pre_ed:")
## [1] "Summary statistics of charg_tot_pre_ed:"
summary(d$charg_tot_pre_ed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0    2136     744  180055
print("Number of zeros of charg_tot_pre_ed:")
## [1] "Number of zeros of charg_tot_pre_ed:"
sum(d$charg_tot_pre_ed == 0, na.rm = TRUE)
## [1] 8273
print("Summary statistics of ed_charg_tot_pre_ed:")
## [1] "Summary statistics of ed_charg_tot_pre_ed:"
summary(d$ed_charg_tot_pre_ed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0     891     603   41246
print("Number of zeros of ed_charg_tot_pre_ed:")
## [1] "Number of zeros of ed_charg_tot_pre_ed:"
sum(d$ed_charg_tot_pre_ed == 0, na.rm = TRUE)
## [1] 8280
# Create categories for charge = 0
d$tot_charge_0 <- as.numeric(d$charg_tot_pre_ed == 0)
d$ed_charge_0 <- as.numeric(d$ed_charg_tot_pre_ed == 0)

# Print summary statistics of new charge variables
print("Distribution of tot_charge_0:")
## [1] "Distribution of tot_charge_0:"
table(d$tot_charge_0, useNA = "ifany")
## 
##    0    1 
## 3935 8273
print("Distribution of ed_charge_0:")
## [1] "Distribution of ed_charge_0:"
table(d$ed_charge_0, useNA = "ifany")
## 
##    0    1 
## 3928 8280

Create penalized outcomes

#####STEP 3-4: Create penalized outcomes #####
# Loop through each outcome and create penalized versions
for(outcome_info in outcomes_list) {
  outcome_name <- outcome_info[[1]]
  max_cw <- outcome_info[[2]]
  
  # Print summary statistics of the outcome
  print(paste("Summary statistics of", outcome_name, ":", sep=" "))
  print(summary(d[[outcome_name]]))

  # Define penalty values
  cw_values <- seq(0, max_cw, length.out=cw_grid)

  # For each penalty value, create new penalized outcome
  for(i in seq_along(cw_values)) {
    cw <- cw_values[i]
    new_col_name <- paste0(outcome_name, "_cw", i-1)
    
    # Check if outcome is continuous
    if(outcome_name %in% continuous_outcomes) {
      # Current continuous variable logic
      d[[new_col_name]] <- d[[outcome_name]] - cw * d$eligibility
    
    # Check if outcome is binary
    } else if(outcome_name %in% binary_outcomes) {
      # Initialize new column with original values
      d[[new_col_name]] <- d[[outcome_name]]
      
      # For rows where outcome is 1, flip to 0 with probability = cw
      ones_idx <- which(d[[outcome_name]] == 1)
      for(idx in ones_idx) {
        if(runif(1) < (cw * d$eligibility[idx])) {
          d[[new_col_name]][idx] <- 0
        }
      }
      
    } else {
      stop(paste("Error: outcome", outcome_name, "is in neither continuous nor binary list."))
    }
    
    print(paste("Summary statistics of", new_col_name, ":", sep=" "))
    print(summary(d[[new_col_name]]))
  }
}
## [1] "Summary statistics of sbp_neg :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -229    -128    -117    -119    -108     -72      41 
## [1] "Summary statistics of sbp_neg_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -229    -128    -117    -119    -108     -72      41 
## [1] "Summary statistics of sbp_neg_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -228.0  -118.0  -103.0  -103.8   -86.8   -46.8      41 
## [1] "Summary statistics of sbp_neg_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -228.0  -117.0   -89.5   -88.6   -57.5   -17.5      41 
## [1] "Summary statistics of sbp_neg_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -228.0  -116.0   -63.2   -73.3   -28.2    11.8      41 
## [1] "Summary statistics of sbp_neg_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -228    -116     -34     -58       1      41      41 
## [1] "Summary statistics of hdl_level_neg :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -139.08  -55.08  -46.08  -47.70  -38.58    1.92      57 
## [1] "Summary statistics of hdl_level_neg_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -139.08  -55.08  -46.08  -47.70  -38.58    1.92      57 
## [1] "Summary statistics of hdl_level_neg_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -134.58  -50.31  -40.83  -41.69  -31.56    2.94      57 
## [1] "Summary statistics of hdl_level_neg_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -47.6   -35.0   -35.7   -22.3    14.5      57 
## [1] "Summary statistics of hdl_level_neg_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -46.1   -29.6   -29.7   -11.5    26.0      57 
## [1] "Summary statistics of hdl_level_neg_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -46.1   -23.6   -23.6     0.0    37.5      57 
## [1] "Summary statistics of debt_neg :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.45    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.45    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.42    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.39    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.37    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.34    1.00    1.00     114 
## [1] "Summary statistics of ohp_all_ever_inperson :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -137.0  -119.0  -122.0  -106.0   -30.6      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -137.0  -119.0  -122.0  -106.0   -30.6      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -240.39 -126.00 -105.25 -106.47  -84.25   -9.86      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -121.0   -93.0   -90.9   -56.5    19.9      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -120.0   -71.8   -75.4   -26.8    49.6      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -240.39 -120.00  -48.39  -59.86    2.91   79.39      41 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -139.1   -62.5   -48.3   -50.0   -37.8    13.9      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -139.1   -62.5   -48.3   -50.0   -37.8    13.9      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -57.1   -42.8   -44.0   -30.8    25.4      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -53.6   -37.8   -38.0   -21.3    36.9      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -51.3   -32.6   -32.0   -10.8    48.4      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -49.8   -27.3   -25.9     0.4    59.9      57 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.43    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.43    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0     0.0     0.0     0.4     1.0     1.0     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.37    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.35    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.32    1.00    1.00     114

Test outcome = outcome_cw0

#####STEP 3-5: Test outcome = outcome_cw0 ##### 
# Loop through each outcome and check if outcome = outcome_cw0
for(outcome_info in outcomes_list) {
  outcome_name <- outcome_info[[1]]
  cw0_name <- paste0(outcome_name, "_cw0")
  
  # Check if values are identical
  if(!all(d[[outcome_name]] == d[[cw0_name]], na.rm=TRUE)) {
    stop(paste("Error:", outcome_name, "is not the same as", cw0_name))
  }
  
  # If we get here, they are identical so drop the original column
  d[[outcome_name]] <- NULL
  print(paste("Dropped", outcome_name, "since it equals", cw0_name))
}
## [1] "Dropped sbp_neg since it equals sbp_neg_cw0"
## [1] "Dropped hdl_level_neg since it equals hdl_level_neg_cw0"
## [1] "Dropped debt_neg since it equals debt_neg_cw0"
## [1] "Dropped ohp_all_ever_inperson since it equals ohp_all_ever_inperson_cw0"
## [1] "Dropped sim_sbp_neg_alpha_5_presentation since it equals sim_sbp_neg_alpha_5_presentation_cw0"
## [1] "Dropped sim_hdl_level_neg_alpha_5_presentation since it equals sim_hdl_level_neg_alpha_5_presentation_cw0"
## [1] "Dropped sim_debt_neg_alpha_5_presentation since it equals sim_debt_neg_alpha_5_presentation_cw0"

Write output file

#####STEP 3-6: Write output file ##### 
write.csv(d, paste0(result_dir, "3_Penalty_Outcomes_Wide_Dataset.csv"))