Set up environment

#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))

#####STEP 0-2: Install packages #####
list.of.packages <- c("grf", "metafor", "splitstackshape", "dplyr", 
                     "tidyverse", "foreach", "cowplot", "reshape2", 
                     "doParallel", "survival", "readstata13", "ggplot2", 
                     "rsample", "DiagrammeR", "e1071", "pscl", "pROC", 
                     "caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
                     "lmtest", "sandwich", "haven", "rpms", "randomForest",
                     "fabricatr", "gridExtra", "VIM", "mice",
                     "lmtest", "ivreg", "kableExtra")
new.packages <- list.of.packages[!(list.of.packages %in% 
                                 installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(list.of.packages, library, character.only = TRUE)

## Warning: package 'grf' was built under R version 4.3.3

## Warning: package 'metafor' was built under R version 4.3.3

## Warning: package 'metadat' was built under R version 4.3.3

## Warning: package 'numDeriv' was built under R version 4.3.1

## Warning: package 'splitstackshape' was built under R version 4.3.3

## Warning: package 'dplyr' was built under R version 4.3.3

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tibble' was built under R version 4.3.3

## Warning: package 'tidyr' was built under R version 4.3.3

## Warning: package 'readr' was built under R version 4.3.3

## Warning: package 'purrr' was built under R version 4.3.3

## Warning: package 'stringr' was built under R version 4.3.3

## Warning: package 'forcats' was built under R version 4.3.3

## Warning: package 'lubridate' was built under R version 4.3.3

## Warning: package 'foreach' was built under R version 4.3.3

## Warning: package 'cowplot' was built under R version 4.3.3

## Warning: package 'reshape2' was built under R version 4.3.3

## Warning: package 'doParallel' was built under R version 4.3.3

## Warning: package 'iterators' was built under R version 4.3.3

## Warning: package 'readstata13' was built under R version 4.3.3

## Warning: package 'rsample' was built under R version 4.3.3

## Warning: package 'DiagrammeR' was built under R version 4.3.3

## Warning: package 'e1071' was built under R version 4.3.3

## Warning: package 'pscl' was built under R version 4.3.3

## Warning: package 'pROC' was built under R version 4.3.3

## Warning: package 'caret' was built under R version 4.3.3

## Warning: package 'ModelMetrics' was built under R version 4.3.3

## Warning: package 'MatchIt' was built under R version 4.3.3

## Warning: package 'Hmisc' was built under R version 4.3.3

## Warning: package 'scales' was built under R version 4.3.3

## Warning: package 'lmtest' was built under R version 4.3.3

## Warning: package 'zoo' was built under R version 4.3.3

## Warning: package 'sandwich' was built under R version 4.3.3

## Warning: package 'haven' was built under R version 4.3.3

## Warning: package 'rpms' was built under R version 4.3.3

## Warning: package 'randomForest' was built under R version 4.3.3

## Warning: package 'fabricatr' was built under R version 4.3.3

## Warning: package 'gridExtra' was built under R version 4.3.3

## Warning: package 'VIM' was built under R version 4.3.3

## Warning: package 'colorspace' was built under R version 4.3.3

## Warning: package 'mice' was built under R version 4.3.3

## Warning in check_dep_version(): ABI version mismatch: 
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package

## Warning: package 'ivreg' was built under R version 4.3.3

## Warning: package 'kableExtra' was built under R version 4.3.3

print(paste("Version of grf package:", packageVersion("grf")))

#####STEP 0-3: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)

Set non-run-specific parameters

#####STEP 0-4: Set non-run-specific parameters #####
seedset <- 777
numthreadsset <- min(6, parallel::detectCores()) 
if (numthreadsset!= 6) {
  print("the results of grf vary by num.thread (publication paper used num.thread=6)")
} 

cat("number of threads (affects grf results):", numthreadsset,"\n")

## number of threads (affects grf results): 6

# Set printing options
options(digits = 4)

Set run-specific parameters

#####STEP 0-5: Set run-specific parameters #####
published_paper_run <- 0 # ANALYST FORM
if (published_paper_run == 1) {
  print("save intermediate files into Cleaned_input_data/As_published/empirical/ folders")
  warning("Changing this setting to 1 overwrites the input files required for 
          replicating on different platforms.")
} else {
  print("save intermediate files into Cleaned_input_data/Testing/empirical/ folders")
}

## [1] "save intermediate files into Cleaned_input_data/Testing/empirical/ folders"

nfolds <- 10 # Set number of folds

cw_run_name <- "med" # ANALYST FORM
cw_grid <- 5 # ANALYST FORM

# Set cW name based on cW_run_name and cW_grid
cw_name <- paste0("cw_", cw_run_name, "_", cw_grid)

# List of outcomes and their maximum C_w values
outcomes_list <- list( # ANALYST FORM
  list("sbp_neg", -117.00),
  list("hdl_level_neg", -46.08),
  list("debt_neg", 0.46),
  list("ohp_all_ever_inperson", 0), 
  list("sim_sbp_neg_alpha_5_presentation", -119.00),
  list("sim_hdl_level_neg_alpha_5_presentation", -46.08),
  list("sim_debt_neg_alpha_5_presentation", 0.43)
)

# List of outcomes that are continuous
continuous_outcomes <- c("sbp_neg",
                        "hdl_level_neg",
                        "sim_sbp_neg_alpha_5_presentation",
                        "sim_hdl_level_neg_alpha_5_presentation")

# List of outcomes that are binary
binary_outcomes <- c("ohp_all_ever_inperson",
                    "debt_neg",
                    "sim_debt_neg_alpha_5_presentation")

print("Outcomes and their maximum C_w values:")

## [1] "Outcomes and their maximum C_w values:"

for(outcome in outcomes_list) {
  print(paste("Outcome:", outcome[[1]], "Max C_w:", outcome[[2]]))
}

## [1] "Outcome: sbp_neg Max C_w: -117"
## [1] "Outcome: hdl_level_neg Max C_w: -46.08"
## [1] "Outcome: debt_neg Max C_w: 0.46"
## [1] "Outcome: ohp_all_ever_inperson Max C_w: 0"
## [1] "Outcome: sim_sbp_neg_alpha_5_presentation Max C_w: -119"
## [1] "Outcome: sim_hdl_level_neg_alpha_5_presentation Max C_w: -46.08"
## [1] "Outcome: sim_debt_neg_alpha_5_presentation Max C_w: 0.43"

print(paste("Number of grid points:", cw_grid))

## [1] "Number of grid points: 5"

print(paste("Number of folds:", nfolds))

## [1] "Number of folds: 10"

Set file paths

#####STEP 0-6: Set file paths #####
# Set the processed path based on published_paper_run
# ***Important: for file paths to work, use the drop-down menu on "Knit" to select the "Current Working Directory" under "Knit Directory." This assumes that you have cloned the repo to your local directory.***
processedpath <- if (published_paper_run == 1) {
    "PP_Full_Analysis/Cleaned_input_data/As_published/"
  } else {
    "PP_Full_Analysis/Cleaned_input_data/Testing/"
  }

# Set the results directory as cw_name subdirectory
result_dir <- paste0(processedpath, "empirical/", cw_name, "/")

# If there is not a folder with the name result_dir, create it
if (!dir.exists(result_dir)) {
  dir.create(result_dir, recursive = TRUE)
}

print(paste0("Processed data path:", processedpath, "empirical/"))

## [1] "Processed data path:PP_Full_Analysis/Cleaned_input_data/Testing/empirical/"

print(paste("Results directory:", result_dir))

## [1] "Results directory: PP_Full_Analysis/Cleaned_input_data/Testing/empirical/cw_med_5/"

Load data

Read in raw and simulated data, then combine them.

#####STEP 3-1: Read data #####
d_empirical <- read.csv(paste0(processedpath, "empirical/2_Imputed_Wide_Dataset.csv"))

print("Information on empirical dataset:")

## [1] "Information on empirical dataset:"

glimpse(d_empirical)

## Rows: 12,208
## Columns: 70
## $ numhh_list            <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender_inp            <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp               <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ hispanic_inp          <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp        <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp        <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp      <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ast_dx_pre_lottery    <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery    <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ charg_tot_pre_ed      <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, 0.0, 0.0, 5743.9,…
## $ ed_charg_tot_pre_ed   <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, 0.0, 0.0, 4542.4,…
## $ num_visit_pre_cens_ed <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ any_depres_pre_ed     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ lessHS                <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED               <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ X                     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id             <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp      <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ diabetes              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension          <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol       <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression            <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq                   <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk              <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any               <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any                <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend             <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt                  <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow                <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ a1c                   <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level             <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level             <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi                   <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp                   <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp                   <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any     <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions         <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id          <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility           <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num               <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num                <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ charg_tot_ed          <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_ed       <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ sbp_neg               <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg               <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg         <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg         <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg               <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg               <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg               <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg          <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg              <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg            <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …

#####STEP 3-6: Read simulated data #####
d_simulated <- read.csv(paste0(processedpath, "simulated/2b_Simulated_Outcomes.csv"))

print("Information on simulated dataset:")

## [1] "Information on simulated dataset:"

glimpse(d_simulated)

## Rows: 12,208
## Columns: 8
## $ person_id                              <int> 5, 8, 16, 17, 18, 23, 24, 29, 4…
## $ sim_sbp                                <dbl> 248.979, 160.146, 251.084, 160.…
## $ sim_debt_neg_alpha_5                   <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5                    <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5              <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_debt_neg_alpha_5_presentation      <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5_presentation       <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5_presentation <dbl> -48.33, -51.33, -5.64, -51.33, …

#####STEP 3-7: Combine empirical and simulated data #####
# Combine empirical and simulated data based on person_id
d <- merge(d_empirical, d_simulated, by = "person_id", all = TRUE)

print("Information on combined dataset:")

## [1] "Information on combined dataset:"

glimpse(d)

## Rows: 12,208
## Columns: 77
## $ person_id                              <int> 5, 8, 16, 17, 18, 23, 24, 29, 4…
## $ numhh_list                             <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1…
## $ gender_inp                             <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1…
## $ age_inp                                <int> 60, 41, 39, 52, 51, 32, 34, 23,…
## $ hispanic_inp                           <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0…
## $ race_white_inp                         <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1…
## $ race_black_inp                         <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0…
## $ race_nwother_inp                       <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ ast_dx_pre_lottery                     <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0…
## $ dia_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hbp_dx_pre_lottery                     <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1…
## $ chl_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ami_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ chf_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ emp_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ kid_dx_pre_lottery                     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cancer_dx_pre_lottery                  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ dep_dx_pre_lottery                     <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1…
## $ charg_tot_pre_ed                       <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, …
## $ ed_charg_tot_pre_ed                    <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, …
## $ num_visit_pre_cens_ed                  <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 0, 0…
## $ any_depres_pre_ed                      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lessHS                                 <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0…
## $ HSorGED                                <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0…
## $ X                                      <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, …
## $ weight_total_inp                       <dbl> 1.1504, 0.8975, 1.0000, 1.2126,…
## $ diabetes                               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hypertension                           <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ highcholesterol                        <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression                             <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ phq                                    <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8…
## $ cvd_risk                               <dbl> 0.1370, 0.1120, 0.0330, 0.2530,…
## $ doc_any                                <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1…
## $ ed_any                                 <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0…
## $ hosp_any                               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ oop_spend                              <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1…
## $ catastrophic                           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ debt                                   <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1…
## $ borrow                                 <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1…
## $ a1c                                    <dbl> 5.037, 5.201, 5.854, 5.364, 5.5…
## $ hdl_level                              <dbl> 48.33, 51.33, 38.58, 51.33, 28.…
## $ chl_level                              <dbl> 241.0, 229.9, 229.9, 235.4, 177…
## $ bmi                                    <dbl> 26.66, 35.23, 37.12, 24.81, 27.…
## $ sbp                                    <int> 144, 134, 126, 168, 119, 98, 10…
## $ dbp                                    <int> 81, 82, 94, 110, 79, 59, 63, 76…
## $ prescriptions_any                      <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1…
## $ prescriptions                          <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, …
## $ hypertension_med                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ cholesterol_med                        <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ diabetes_med                           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression_med                         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ household_id                           <int> 100005, 102094, 140688, 100017,…
## $ eligibility                            <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0…
## $ ohp_all_ever_inperson                  <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
## $ doc_num                                <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12,…
## $ ed_num                                 <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, …
## $ hosp_num                               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ charg_tot_ed                           <dbl> 0, 2751, 15233, NA, 0, 0, 0, 84…
## $ ed_charg_tot_ed                        <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0…
## $ sbp_neg                                <int> -144, -134, -126, -168, -119, -…
## $ dbp_neg                                <int> -81, -82, -94, -110, -79, -59, …
## $ chl_level_neg                          <dbl> -241.0, -229.9, -229.9, -235.4,…
## $ hdl_level_neg                          <dbl> -48.33, -51.33, -38.58, -51.33,…
## $ a1c_neg                                <dbl> -5.037, -5.201, -5.854, -5.364,…
## $ bmi_neg                                <dbl> -26.66, -35.23, -37.12, -24.81,…
## $ phq_neg                                <int> -1, -9, -2, -13, -2, -3, -2, -1…
## $ cvd_risk_neg                           <dbl> -0.1370, -0.1120, -0.0330, -0.2…
## $ debt_neg                               <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ borrow_neg                             <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0…
## $ catastrophic_neg                       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ sim_sbp                                <dbl> 248.979, 160.146, 251.084, 160.…
## $ sim_debt_neg_alpha_5                   <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5                    <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5              <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_debt_neg_alpha_5_presentation      <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5_presentation       <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5_presentation <dbl> -48.33, -51.33, -5.64, -51.33, …

Create folds

#####STEP 3-2: Create and assign folds #####
set.seed(seedset)  # Set seed for reproducibility
d <- d %>%
  mutate(fold = sample(1:nfolds, n(), replace = TRUE)) # Assign folds to each row

print("Information on folds:")

## [1] "Information on folds:"

table(d$fold)

## 
##    1    2    3    4    5    6    7    8    9   10 
## 1303 1253 1179 1242 1175 1235 1214 1218 1201 1188

fold_summaries <- d %>%
  group_by(fold) %>%
  summarise(
    across(where(is.numeric), ~ mean(., na.rm = TRUE), .names = "avg_{.col}"),
    across(where(is.factor), ~ mean(as.numeric(.), na.rm = TRUE), 
           .names = "prop_{.col}")
  )

# Create a kable table
kable(fold_summaries, format = "markdown")

fold	avg_person_id	avg_numhh_list	avg_gender_inp	avg_age_inp	avg_hispanic_inp	avg_race_white_inp	avg_race_black_inp	avg_race_nwother_inp	avg_ast_dx_pre_lottery	avg_dia_dx_pre_lottery	avg_hbp_dx_pre_lottery	avg_chl_dx_pre_lottery	avg_ami_dx_pre_lottery	avg_chf_dx_pre_lottery	avg_emp_dx_pre_lottery	avg_kid_dx_pre_lottery	avg_cancer_dx_pre_lottery	avg_dep_dx_pre_lottery	avg_charg_tot_pre_ed	avg_ed_charg_tot_pre_ed	avg_num_visit_pre_cens_ed	avg_any_depres_pre_ed	avg_lessHS	avg_HSorGED	avg_X	avg_weight_total_inp	avg_diabetes	avg_hypertension	avg_highcholesterol	avg_depression	avg_phq	avg_cvd_risk	avg_doc_any	avg_ed_any	avg_hosp_any	avg_oop_spend	avg_catastrophic	avg_debt	avg_borrow	avg_a1c	avg_hdl_level	avg_chl_level	avg_bmi	avg_sbp	avg_dbp	avg_prescriptions_any	avg_prescriptions	avg_hypertension_med	avg_cholesterol_med	avg_diabetes_med	avg_depression_med	avg_household_id	avg_eligibility	avg_ohp_all_ever_inperson	avg_doc_num	avg_ed_num	avg_hosp_num	avg_charg_tot_ed	avg_ed_charg_tot_ed	avg_sbp_neg	avg_dbp_neg	avg_chl_level_neg	avg_hdl_level_neg	avg_a1c_neg	avg_bmi_neg	avg_phq_neg	avg_cvd_risk_neg	avg_debt_neg	avg_borrow_neg	avg_catastrophic_neg	avg_sim_sbp	avg_sim_debt_neg_alpha_5	avg_sim_sbp_neg_alpha_5	avg_sim_hdl_level_neg_alpha_5	avg_sim_debt_neg_alpha_5_presentation	avg_sim_sbp_neg_alpha_5_presentation	avg_sim_hdl_level_neg_alpha_5_presentation
1	37499	1.236	0.5664	40.63	0.1896	0.6884	0.0990	0.1589	0.2049	0.0714	0.1742	0.1282	0.0200	0.0061	0.0207	0.0169	0.0345	0.3415	2452	999.2	0.8074	0.0123	0.2218	0.4474	6104	1.244	0.0224	0.0596	0.0666	0.0496	6.584	0.0815	0.6567	0.4040	0.1413	0.5862	0.0437	0.5573	0.2235	5.348	48.20	205.7	30.03	119.4	75.95	0.5411	1.903	0.1466	0.0936	0.0760	0.1727	140365	0.5280	0.3239	5.706	1.0146	0.2121	3284	1365	-119.4	-75.95	-205.7	-48.20	-5.348	-30.03	-6.584	-0.0815	0.4427	0.7765	0.9563	197.7	0.4203	-121.5	-49.89	0.4203	-121.5	-49.89
2	36399	1.257	0.5786	40.61	0.1828	0.6784	0.1030	0.1357	0.2003	0.0670	0.1931	0.1277	0.0200	0.0112	0.0247	0.0231	0.0503	0.3272	2419	946.2	0.8244	0.0223	0.2099	0.4653	5924	1.258	0.0184	0.0565	0.0637	0.0525	6.709	0.0790	0.6896	0.4032	0.1288	0.5720	0.0544	0.5494	0.2187	5.314	48.35	206.2	29.74	118.9	75.38	0.5587	1.829	0.1325	0.0838	0.0607	0.1668	139845	0.5164	0.2889	6.410	0.9888	0.2037	3628	1725	-118.9	-75.38	-206.2	-48.35	-5.314	-29.74	-6.709	-0.0790	0.4506	0.7813	0.9456	201.2	0.4145	-122.2	-50.95	0.4145	-122.2	-50.95
3	38011	1.229	0.5810	40.78	0.1824	0.7048	0.0958	0.1187	0.1883	0.0729	0.1891	0.1374	0.0170	0.0076	0.0246	0.0153	0.0382	0.3427	1906	885.0	0.7498	0.0187	0.2078	0.4521	6189	1.246	0.0136	0.0601	0.0643	0.0478	6.925	0.0808	0.6689	0.4049	0.1197	0.5742	0.0534	0.5380	0.2351	5.340	46.79	204.5	30.20	118.8	75.58	0.5496	1.927	0.1510	0.0916	0.0780	0.1705	141094	0.5344	0.3028	5.522	0.9770	0.1811	3211	1454	-118.8	-75.58	-204.5	-46.79	-5.340	-30.20	-6.925	-0.0808	0.4620	0.7649	0.9466	198.5	0.4526	-122.1	-49.40	0.4526	-122.1	-49.40
4	37765	1.244	0.5564	40.95	0.1683	0.6997	0.1023	0.1441	0.1916	0.0676	0.1667	0.1232	0.0225	0.0137	0.0274	0.0217	0.0467	0.3341	2192	854.2	0.8172	0.0185	0.1981	0.4557	6148	1.225	0.0121	0.0576	0.0664	0.0658	6.726	0.0825	0.6820	0.4153	0.1435	0.5606	0.0575	0.5720	0.2119	5.314	47.56	205.2	29.72	119.1	75.78	0.5564	1.801	0.1272	0.0853	0.0652	0.1538	140672	0.5201	0.3132	6.075	1.0057	0.2138	3448	1342	-119.1	-75.78	-205.2	-47.56	-5.314	-29.72	-6.726	-0.0825	0.4280	0.7881	0.9425	199.8	0.4052	-122.5	-50.26	0.4052	-122.5	-50.26
5	38389	1.260	0.5532	41.44	0.1745	0.6706	0.1157	0.1481	0.2000	0.0706	0.1906	0.1226	0.0196	0.0111	0.0170	0.0170	0.0443	0.3421	2011	838.6	0.7532	0.0128	0.2034	0.4315	6250	1.236	0.0137	0.0481	0.0654	0.0551	6.893	0.0841	0.6661	0.3986	0.1261	0.5656	0.0434	0.5437	0.2383	5.335	48.68	207.4	29.74	120.1	76.34	0.5307	1.897	0.1353	0.0860	0.0723	0.1855	141058	0.5319	0.3294	5.841	0.9343	0.2022	2618	1175	-120.1	-76.34	-207.4	-48.68	-5.335	-29.74	-6.893	-0.0841	0.4563	0.7617	0.9566	200.0	0.4161	-123.6	-51.41	0.4161	-123.6	-51.41
6	37575	1.224	0.5846	40.58	0.1725	0.6802	0.1117	0.1555	0.1911	0.0607	0.1749	0.1174	0.0178	0.0089	0.0259	0.0121	0.0445	0.3441	1812	817.0	0.7150	0.0154	0.2146	0.4348	6117	1.257	0.0163	0.0781	0.0701	0.0591	7.027	0.0767	0.6778	0.3994	0.1308	0.5857	0.0463	0.5545	0.2344	5.324	48.00	206.8	30.03	118.5	75.58	0.5652	2.049	0.1466	0.0955	0.0640	0.1919	140096	0.5158	0.2939	6.192	1.0342	0.2060	3877	1399	-118.5	-75.58	-206.8	-48.00	-5.324	-30.03	-7.027	-0.0767	0.4455	0.7656	0.9537	198.0	0.4128	-121.2	-50.09	0.4128	-121.2	-50.09
7	37796	1.246	0.5535	41.04	0.1730	0.6829	0.1063	0.1524	0.1730	0.0758	0.1977	0.1310	0.0231	0.0107	0.0214	0.0198	0.0404	0.3386	2057	824.1	0.7100	0.0107	0.2158	0.4613	6153	1.227	0.0132	0.0505	0.0578	0.0399	6.970	0.0848	0.6636	0.3977	0.1245	0.5677	0.0412	0.5375	0.2244	5.349	47.23	204.0	29.81	119.6	76.00	0.5544	1.900	0.1474	0.0980	0.0725	0.1779	140679	0.4942	0.3097	5.362	0.9163	0.1800	2800	1339	-119.6	-76.00	-204.0	-47.23	-5.349	-29.81	-6.970	-0.0848	0.4625	0.7756	0.9588	200.1	0.4367	-122.3	-49.42	0.4367	-122.3	-49.42
8	37003	1.250	0.5525	40.47	0.1823	0.6929	0.0878	0.1420	0.1929	0.0788	0.1650	0.1174	0.0181	0.0123	0.0181	0.0148	0.0411	0.3374	2151	932.7	0.8112	0.0131	0.1864	0.4598	6023	1.231	0.0157	0.0624	0.0637	0.0522	6.944	0.0779	0.6604	0.4145	0.1472	0.5442	0.0397	0.5444	0.2204	5.345	47.79	205.5	29.78	118.4	75.49	0.5484	1.917	0.1388	0.0846	0.0747	0.1897	140584	0.5230	0.3136	6.401	1.0635	0.2287	3637	1616	-118.4	-75.49	-205.5	-47.79	-5.345	-29.78	-6.944	-0.0779	0.4556	0.7796	0.9603	200.3	0.4299	-120.9	-49.76	0.4299	-120.9	-49.76
9	37606	1.251	0.5737	40.99	0.1907	0.6719	0.1099	0.1507	0.1990	0.0691	0.2015	0.1324	0.0150	0.0133	0.0241	0.0225	0.0425	0.3372	1964	871.7	0.8060	0.0125	0.1948	0.4721	6122	1.236	0.0226	0.0553	0.0703	0.0472	6.670	0.0804	0.6542	0.4242	0.1125	0.5617	0.0389	0.5386	0.2132	5.348	47.65	205.9	29.86	119.8	76.40	0.5517	1.892	0.1357	0.0849	0.0683	0.1624	140770	0.5287	0.3106	5.477	1.0159	0.1548	3423	1481	-119.8	-76.40	-205.9	-47.65	-5.348	-29.86	-6.670	-0.0804	0.4614	0.7868	0.9611	198.3	0.4371	-122.2	-49.57	0.4371	-122.2	-49.57
10	37022	1.252	0.5556	40.53	0.1827	0.7003	0.1002	0.1322	0.1911	0.0774	0.1667	0.1296	0.0244	0.0160	0.0160	0.0227	0.0455	0.3628	2360	930.0	0.8359	0.0168	0.1961	0.4621	6027	1.242	0.0160	0.0538	0.0589	0.0454	6.753	0.0805	0.6765	0.3997	0.1289	0.5747	0.0393	0.5414	0.2291	5.327	46.69	203.2	29.73	118.4	75.16	0.5463	1.931	0.1380	0.0901	0.0774	0.1810	139994	0.5286	0.3241	6.438	0.9949	0.1755	3221	1430	-118.4	-75.16	-203.2	-46.69	-5.327	-29.73	-6.753	-0.0805	0.4586	0.7709	0.9607	199.1	0.4381	-121.7	-49.33	0.4381	-121.7	-49.33

Create charges variables

#####STEP 3-3: Create charges variables #####
# Print summary statistics and number of zeros of charge variables
print("Summary statistics of charg_tot_pre_ed:")

## [1] "Summary statistics of charg_tot_pre_ed:"

summary(d$charg_tot_pre_ed)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0    2136     744  180055

print("Number of zeros of charg_tot_pre_ed:")

## [1] "Number of zeros of charg_tot_pre_ed:"

sum(d$charg_tot_pre_ed == 0, na.rm = TRUE)

## [1] 8273

print("Summary statistics of ed_charg_tot_pre_ed:")

## [1] "Summary statistics of ed_charg_tot_pre_ed:"

summary(d$ed_charg_tot_pre_ed)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0     891     603   41246

print("Number of zeros of ed_charg_tot_pre_ed:")

## [1] "Number of zeros of ed_charg_tot_pre_ed:"

sum(d$ed_charg_tot_pre_ed == 0, na.rm = TRUE)

## [1] 8280

# Create categories for charge = 0
d$tot_charge_0 <- as.numeric(d$charg_tot_pre_ed == 0)
d$ed_charge_0 <- as.numeric(d$ed_charg_tot_pre_ed == 0)

# Print summary statistics of new charge variables
print("Distribution of tot_charge_0:")

## [1] "Distribution of tot_charge_0:"

table(d$tot_charge_0, useNA = "ifany")

## 
##    0    1 
## 3935 8273

print("Distribution of ed_charge_0:")

## [1] "Distribution of ed_charge_0:"

table(d$ed_charge_0, useNA = "ifany")

## 
##    0    1 
## 3928 8280

Create penalized outcomes

#####STEP 3-4: Create penalized outcomes #####
# Loop through each outcome and create penalized versions
for(outcome_info in outcomes_list) {
  outcome_name <- outcome_info[[1]]
  max_cw <- outcome_info[[2]]
  
  # Print summary statistics of the outcome
  print(paste("Summary statistics of", outcome_name, ":", sep=" "))
  print(summary(d[[outcome_name]]))

  # Define penalty values
  cw_values <- seq(0, max_cw, length.out=cw_grid)

  # For each penalty value, create new penalized outcome
  for(i in seq_along(cw_values)) {
    cw <- cw_values[i]
    new_col_name <- paste0(outcome_name, "_cw", i-1)
    
    # Check if outcome is continuous
    if(outcome_name %in% continuous_outcomes) {
      # Current continuous variable logic
      d[[new_col_name]] <- d[[outcome_name]] - cw * d$eligibility
    
    # Check if outcome is binary
    } else if(outcome_name %in% binary_outcomes) {
      # Initialize new column with original values
      d[[new_col_name]] <- d[[outcome_name]]
      
      # For rows where outcome is 1, flip to 0 with probability = cw
      ones_idx <- which(d[[outcome_name]] == 1)
      for(idx in ones_idx) {
        if(runif(1) < (cw * d$eligibility[idx])) {
          d[[new_col_name]][idx] <- 0
        }
      }
      
    } else {
      stop(paste("Error: outcome", outcome_name, "is in neither continuous nor binary list."))
    }
    
    print(paste("Summary statistics of", new_col_name, ":", sep=" "))
    print(summary(d[[new_col_name]]))
  }
}

## [1] "Summary statistics of sbp_neg :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -229    -128    -117    -119    -108     -72      41 
## [1] "Summary statistics of sbp_neg_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -229    -128    -117    -119    -108     -72      41 
## [1] "Summary statistics of sbp_neg_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -228.0  -118.0  -103.0  -103.8   -86.8   -46.8      41 
## [1] "Summary statistics of sbp_neg_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -228.0  -117.0   -89.5   -88.6   -57.5   -17.5      41 
## [1] "Summary statistics of sbp_neg_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -228.0  -116.0   -63.2   -73.3   -28.2    11.8      41 
## [1] "Summary statistics of sbp_neg_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    -228    -116     -34     -58       1      41      41 
## [1] "Summary statistics of hdl_level_neg :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -139.08  -55.08  -46.08  -47.70  -38.58    1.92      57 
## [1] "Summary statistics of hdl_level_neg_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -139.08  -55.08  -46.08  -47.70  -38.58    1.92      57 
## [1] "Summary statistics of hdl_level_neg_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -134.58  -50.31  -40.83  -41.69  -31.56    2.94      57 
## [1] "Summary statistics of hdl_level_neg_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -47.6   -35.0   -35.7   -22.3    14.5      57 
## [1] "Summary statistics of hdl_level_neg_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -46.1   -29.6   -29.7   -11.5    26.0      57 
## [1] "Summary statistics of hdl_level_neg_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -46.1   -23.6   -23.6     0.0    37.5      57 
## [1] "Summary statistics of debt_neg :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.45    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.45    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.42    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.39    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.37    1.00    1.00     114 
## [1] "Summary statistics of debt_neg_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.34    1.00    1.00     114 
## [1] "Summary statistics of ohp_all_ever_inperson :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of ohp_all_ever_inperson_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.311   1.000   1.000 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -137.0  -119.0  -122.0  -106.0   -30.6      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -137.0  -119.0  -122.0  -106.0   -30.6      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -240.39 -126.00 -105.25 -106.47  -84.25   -9.86      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -121.0   -93.0   -90.9   -56.5    19.9      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -240.4  -120.0   -71.8   -75.4   -26.8    49.6      41 
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -240.39 -120.00  -48.39  -59.86    2.91   79.39      41 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -139.1   -62.5   -48.3   -50.0   -37.8    13.9      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -139.1   -62.5   -48.3   -50.0   -37.8    13.9      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -57.1   -42.8   -44.0   -30.8    25.4      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -53.6   -37.8   -38.0   -21.3    36.9      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -51.3   -32.6   -32.0   -10.8    48.4      57 
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -134.6   -49.8   -27.3   -25.9     0.4    59.9      57 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.43    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw0 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.43    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw1 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0     0.0     0.0     0.4     1.0     1.0     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw2 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.37    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw3 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.35    1.00    1.00     114 
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw4 :"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.32    1.00    1.00     114

Test outcome = outcome_cw0

#####STEP 3-5: Test outcome = outcome_cw0 ##### 
# Loop through each outcome and check if outcome = outcome_cw0
for(outcome_info in outcomes_list) {
  outcome_name <- outcome_info[[1]]
  cw0_name <- paste0(outcome_name, "_cw0")
  
  # Check if values are identical
  if(!all(d[[outcome_name]] == d[[cw0_name]], na.rm=TRUE)) {
    stop(paste("Error:", outcome_name, "is not the same as", cw0_name))
  }
  
  # If we get here, they are identical so drop the original column
  d[[outcome_name]] <- NULL
  print(paste("Dropped", outcome_name, "since it equals", cw0_name))
}

## [1] "Dropped sbp_neg since it equals sbp_neg_cw0"
## [1] "Dropped hdl_level_neg since it equals hdl_level_neg_cw0"
## [1] "Dropped debt_neg since it equals debt_neg_cw0"
## [1] "Dropped ohp_all_ever_inperson since it equals ohp_all_ever_inperson_cw0"
## [1] "Dropped sim_sbp_neg_alpha_5_presentation since it equals sim_sbp_neg_alpha_5_presentation_cw0"
## [1] "Dropped sim_hdl_level_neg_alpha_5_presentation since it equals sim_hdl_level_neg_alpha_5_presentation_cw0"
## [1] "Dropped sim_debt_neg_alpha_5_presentation since it equals sim_debt_neg_alpha_5_presentation_cw0"

Write output file

#####STEP 3-6: Write output file ##### 
write.csv(d, paste0(result_dir, "3_Penalty_Outcomes_Wide_Dataset.csv"))

3_Create_New_Outcomes

Kosuke Inoue, Revati Kapshikar, Kristine Koutout

2025-01-20

Set up environment

Set non-run-specific parameters

Set run-specific parameters

Set file paths

Load data

Create folds

Create charges variables

Create penalized outcomes

Test outcome = outcome_cw0

Write output file