#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))
#####STEP 0-2: Install packages #####
list.of.packages <- c("grf", "metafor", "splitstackshape", "dplyr",
"tidyverse", "foreach", "cowplot", "reshape2",
"doParallel", "survival", "readstata13", "ggplot2",
"rsample", "DiagrammeR", "e1071", "pscl", "pROC",
"caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
"lmtest", "sandwich", "haven", "rpms", "randomForest",
"fabricatr", "gridExtra", "VIM", "mice",
"lmtest", "ivreg", "kableExtra")
new.packages <- list.of.packages[!(list.of.packages %in%
installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, library, character.only = TRUE)
## Warning: package 'grf' was built under R version 4.3.3
## Warning: package 'metafor' was built under R version 4.3.3
## Warning: package 'metadat' was built under R version 4.3.3
## Warning: package 'numDeriv' was built under R version 4.3.1
## Warning: package 'splitstackshape' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'foreach' was built under R version 4.3.3
## Warning: package 'cowplot' was built under R version 4.3.3
## Warning: package 'reshape2' was built under R version 4.3.3
## Warning: package 'doParallel' was built under R version 4.3.3
## Warning: package 'iterators' was built under R version 4.3.3
## Warning: package 'readstata13' was built under R version 4.3.3
## Warning: package 'rsample' was built under R version 4.3.3
## Warning: package 'DiagrammeR' was built under R version 4.3.3
## Warning: package 'e1071' was built under R version 4.3.3
## Warning: package 'pscl' was built under R version 4.3.3
## Warning: package 'pROC' was built under R version 4.3.3
## Warning: package 'caret' was built under R version 4.3.3
## Warning: package 'ModelMetrics' was built under R version 4.3.3
## Warning: package 'MatchIt' was built under R version 4.3.3
## Warning: package 'Hmisc' was built under R version 4.3.3
## Warning: package 'scales' was built under R version 4.3.3
## Warning: package 'lmtest' was built under R version 4.3.3
## Warning: package 'zoo' was built under R version 4.3.3
## Warning: package 'sandwich' was built under R version 4.3.3
## Warning: package 'haven' was built under R version 4.3.3
## Warning: package 'rpms' was built under R version 4.3.3
## Warning: package 'randomForest' was built under R version 4.3.3
## Warning: package 'fabricatr' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
## Warning: package 'VIM' was built under R version 4.3.3
## Warning: package 'colorspace' was built under R version 4.3.3
## Warning: package 'mice' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch:
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
## Warning: package 'ivreg' was built under R version 4.3.3
## Warning: package 'kableExtra' was built under R version 4.3.3
print(paste("Version of grf package:", packageVersion("grf")))
#####STEP 0-3: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)
#####STEP 0-4: Set non-run-specific parameters #####
seedset <- 777
numthreadsset <- min(6, parallel::detectCores())
if (numthreadsset!= 6) {
print("the results of grf vary by num.thread (publication paper used num.thread=6)")
}
cat("number of threads (affects grf results):", numthreadsset,"\n")
## number of threads (affects grf results): 6
#####STEP 0-5: Set run-specific parameters #####
published_paper_run <- 0 # ANALYST FORM
if (published_paper_run == 1) {
print("save intermediate files into Cleaned_input_data/As_published/empirical/ folders")
warning("Changing this setting to 1 overwrites the input files required for
replicating on different platforms.")
} else {
print("save intermediate files into Cleaned_input_data/Testing/empirical/ folders")
}
## [1] "save intermediate files into Cleaned_input_data/Testing/empirical/ folders"
nfolds <- 10 # Set number of folds
cw_run_name <- "med" # ANALYST FORM
cw_grid <- 5 # ANALYST FORM
# Set cW name based on cW_run_name and cW_grid
cw_name <- paste0("cw_", cw_run_name, "_", cw_grid)
# List of outcomes and their maximum C_w values
outcomes_list <- list( # ANALYST FORM
list("sbp_neg", -117.00),
list("hdl_level_neg", -46.08),
list("debt_neg", 0.46),
list("ohp_all_ever_inperson", 0),
list("sim_sbp_neg_alpha_5_presentation", -119.00),
list("sim_hdl_level_neg_alpha_5_presentation", -46.08),
list("sim_debt_neg_alpha_5_presentation", 0.43)
)
# List of outcomes that are continuous
continuous_outcomes <- c("sbp_neg",
"hdl_level_neg",
"sim_sbp_neg_alpha_5_presentation",
"sim_hdl_level_neg_alpha_5_presentation")
# List of outcomes that are binary
binary_outcomes <- c("ohp_all_ever_inperson",
"debt_neg",
"sim_debt_neg_alpha_5_presentation")
print("Outcomes and their maximum C_w values:")
## [1] "Outcomes and their maximum C_w values:"
## [1] "Outcome: sbp_neg Max C_w: -117"
## [1] "Outcome: hdl_level_neg Max C_w: -46.08"
## [1] "Outcome: debt_neg Max C_w: 0.46"
## [1] "Outcome: ohp_all_ever_inperson Max C_w: 0"
## [1] "Outcome: sim_sbp_neg_alpha_5_presentation Max C_w: -119"
## [1] "Outcome: sim_hdl_level_neg_alpha_5_presentation Max C_w: -46.08"
## [1] "Outcome: sim_debt_neg_alpha_5_presentation Max C_w: 0.43"
## [1] "Number of grid points: 5"
## [1] "Number of folds: 10"
#####STEP 0-6: Set file paths #####
# Set the processed path based on published_paper_run
# ***Important: for file paths to work, use the drop-down menu on "Knit" to select the "Current Working Directory" under "Knit Directory." This assumes that you have cloned the repo to your local directory.***
processedpath <- if (published_paper_run == 1) {
"PP_Full_Analysis/Cleaned_input_data/As_published/"
} else {
"PP_Full_Analysis/Cleaned_input_data/Testing/"
}
# Set the results directory as cw_name subdirectory
result_dir <- paste0(processedpath, "empirical/", cw_name, "/")
# If there is not a folder with the name result_dir, create it
if (!dir.exists(result_dir)) {
dir.create(result_dir, recursive = TRUE)
}
print(paste0("Processed data path:", processedpath, "empirical/"))
## [1] "Processed data path:PP_Full_Analysis/Cleaned_input_data/Testing/empirical/"
## [1] "Results directory: PP_Full_Analysis/Cleaned_input_data/Testing/empirical/cw_med_5/"
Read in raw and simulated data, then combine them.
#####STEP 3-1: Read data #####
d_empirical <- read.csv(paste0(processedpath, "empirical/2_Imputed_Wide_Dataset.csv"))
print("Information on empirical dataset:")
## [1] "Information on empirical dataset:"
## Rows: 12,208
## Columns: 70
## $ numhh_list <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender_inp <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, …
## $ age_inp <int> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 25, …
## $ hispanic_inp <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ race_white_inp <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ race_black_inp <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ race_nwother_inp <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ast_dx_pre_lottery <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, …
## $ dia_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hbp_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ chl_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ami_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ chf_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ emp_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ kid_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dep_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, …
## $ charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, 0.0, 0.0, 5743.9,…
## $ ed_charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, 0.0, 0.0, 4542.4,…
## $ num_visit_pre_cens_ed <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ any_depres_pre_ed <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ lessHS <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ HSorGED <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68, 70…
## $ weight_total_inp <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.0033, …
## $ diabetes <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ hypertension <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ highcholesterol <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ phq <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8, 7, 3, 2, 0, 2, …
## $ cvd_risk <dbl> 0.1370, 0.1120, 0.0330, 0.2530, 0.1560, 0.0120, …
## $ doc_any <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, …
## $ ed_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, …
## $ hosp_any <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ oop_spend <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ catastrophic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ debt <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, …
## $ borrow <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, …
## $ a1c <dbl> 5.037, 5.201, 5.854, 5.364, 5.527, 5.037, 5.446,…
## $ hdl_level <dbl> 48.33, 51.33, 38.58, 51.33, 28.08, 31.08, 25.83,…
## $ chl_level <dbl> 241.0, 229.9, 229.9, 235.4, 177.7, 173.8, 152.7,…
## $ bmi <dbl> 26.66, 35.23, 37.12, 24.81, 27.02, 26.26, 27.70,…
## $ sbp <int> 144, 134, 126, 168, 119, 98, 108, 125, 100, 104,…
## $ dbp <int> 81, 82, 94, 110, 79, 59, 63, 76, 77, 63, 84, 62,…
## $ prescriptions_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, …
## $ prescriptions <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, 4, 0, 4, 2, 1, 4,…
## $ hypertension_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ cholesterol_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ diabetes_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ depression_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ household_id <int> 100005, 102094, 140688, 100017, 100018, 115253, …
## $ eligibility <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, …
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, …
## $ doc_num <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12, 6, 0, 3, 0, 10, …
## $ ed_num <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, 0, 2, 0, 0, 0, 0,…
## $ hosp_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ charg_tot_ed <dbl> 0, 2751, 15233, NA, 0, 0, 0, 8436, 0, NA, 0, 0, …
## $ ed_charg_tot_ed <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0.0, 0.0, 7067.0, …
## $ sbp_neg <int> -144, -134, -126, -168, -119, -98, -108, -125, -…
## $ dbp_neg <int> -81, -82, -94, -110, -79, -59, -63, -76, -77, -6…
## $ chl_level_neg <dbl> -241.0, -229.9, -229.9, -235.4, -177.7, -173.8, …
## $ hdl_level_neg <dbl> -48.33, -51.33, -38.58, -51.33, -28.08, -31.08, …
## $ a1c_neg <dbl> -5.037, -5.201, -5.854, -5.364, -5.527, -5.037, …
## $ bmi_neg <dbl> -26.66, -35.23, -37.12, -24.81, -27.02, -26.26, …
## $ phq_neg <int> -1, -9, -2, -13, -2, -3, -2, -14, -11, -8, -7, -…
## $ cvd_risk_neg <dbl> -0.1370, -0.1120, -0.0330, -0.2530, -0.1560, -0.…
## $ debt_neg <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, …
## $ borrow_neg <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, …
## $ catastrophic_neg <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
#####STEP 3-6: Read simulated data #####
d_simulated <- read.csv(paste0(processedpath, "simulated/2b_Simulated_Outcomes.csv"))
print("Information on simulated dataset:")
## [1] "Information on simulated dataset:"
## Rows: 12,208
## Columns: 8
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 4…
## $ sim_sbp <dbl> 248.979, 160.146, 251.084, 160.…
## $ sim_debt_neg_alpha_5 <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5 <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5 <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_debt_neg_alpha_5_presentation <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5_presentation <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5_presentation <dbl> -48.33, -51.33, -5.64, -51.33, …
#####STEP 3-7: Combine empirical and simulated data #####
# Combine empirical and simulated data based on person_id
d <- merge(d_empirical, d_simulated, by = "person_id", all = TRUE)
print("Information on combined dataset:")
## [1] "Information on combined dataset:"
## Rows: 12,208
## Columns: 77
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 4…
## $ numhh_list <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1…
## $ gender_inp <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1…
## $ age_inp <int> 60, 41, 39, 52, 51, 32, 34, 23,…
## $ hispanic_inp <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0…
## $ race_white_inp <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1…
## $ race_black_inp <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0…
## $ race_nwother_inp <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ ast_dx_pre_lottery <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0…
## $ dia_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hbp_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1…
## $ chl_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ami_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ chf_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ emp_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ kid_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ dep_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1…
## $ charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, …
## $ ed_charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, …
## $ num_visit_pre_cens_ed <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 0, 0…
## $ any_depres_pre_ed <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lessHS <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0…
## $ HSorGED <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0…
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, …
## $ weight_total_inp <dbl> 1.1504, 0.8975, 1.0000, 1.2126,…
## $ diabetes <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hypertension <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ highcholesterol <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ phq <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8…
## $ cvd_risk <dbl> 0.1370, 0.1120, 0.0330, 0.2530,…
## $ doc_any <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1…
## $ ed_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0…
## $ hosp_any <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ oop_spend <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1…
## $ catastrophic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ debt <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1…
## $ borrow <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1…
## $ a1c <dbl> 5.037, 5.201, 5.854, 5.364, 5.5…
## $ hdl_level <dbl> 48.33, 51.33, 38.58, 51.33, 28.…
## $ chl_level <dbl> 241.0, 229.9, 229.9, 235.4, 177…
## $ bmi <dbl> 26.66, 35.23, 37.12, 24.81, 27.…
## $ sbp <int> 144, 134, 126, 168, 119, 98, 10…
## $ dbp <int> 81, 82, 94, 110, 79, 59, 63, 76…
## $ prescriptions_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1…
## $ prescriptions <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, …
## $ hypertension_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ cholesterol_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ diabetes_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ household_id <int> 100005, 102094, 140688, 100017,…
## $ eligibility <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0…
## $ ohp_all_ever_inperson <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
## $ doc_num <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12,…
## $ ed_num <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, …
## $ hosp_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ charg_tot_ed <dbl> 0, 2751, 15233, NA, 0, 0, 0, 84…
## $ ed_charg_tot_ed <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0…
## $ sbp_neg <int> -144, -134, -126, -168, -119, -…
## $ dbp_neg <int> -81, -82, -94, -110, -79, -59, …
## $ chl_level_neg <dbl> -241.0, -229.9, -229.9, -235.4,…
## $ hdl_level_neg <dbl> -48.33, -51.33, -38.58, -51.33,…
## $ a1c_neg <dbl> -5.037, -5.201, -5.854, -5.364,…
## $ bmi_neg <dbl> -26.66, -35.23, -37.12, -24.81,…
## $ phq_neg <int> -1, -9, -2, -13, -2, -3, -2, -1…
## $ cvd_risk_neg <dbl> -0.1370, -0.1120, -0.0330, -0.2…
## $ debt_neg <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ borrow_neg <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0…
## $ catastrophic_neg <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ sim_sbp <dbl> 248.979, 160.146, 251.084, 160.…
## $ sim_debt_neg_alpha_5 <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5 <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5 <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_debt_neg_alpha_5_presentation <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5_presentation <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5_presentation <dbl> -48.33, -51.33, -5.64, -51.33, …
#####STEP 3-2: Create and assign folds #####
set.seed(seedset) # Set seed for reproducibility
d <- d %>%
mutate(fold = sample(1:nfolds, n(), replace = TRUE)) # Assign folds to each row
print("Information on folds:")
## [1] "Information on folds:"
##
## 1 2 3 4 5 6 7 8 9 10
## 1303 1253 1179 1242 1175 1235 1214 1218 1201 1188
fold_summaries <- d %>%
group_by(fold) %>%
summarise(
across(where(is.numeric), ~ mean(., na.rm = TRUE), .names = "avg_{.col}"),
across(where(is.factor), ~ mean(as.numeric(.), na.rm = TRUE),
.names = "prop_{.col}")
)
# Create a kable table
kable(fold_summaries, format = "markdown")
fold | avg_person_id | avg_numhh_list | avg_gender_inp | avg_age_inp | avg_hispanic_inp | avg_race_white_inp | avg_race_black_inp | avg_race_nwother_inp | avg_ast_dx_pre_lottery | avg_dia_dx_pre_lottery | avg_hbp_dx_pre_lottery | avg_chl_dx_pre_lottery | avg_ami_dx_pre_lottery | avg_chf_dx_pre_lottery | avg_emp_dx_pre_lottery | avg_kid_dx_pre_lottery | avg_cancer_dx_pre_lottery | avg_dep_dx_pre_lottery | avg_charg_tot_pre_ed | avg_ed_charg_tot_pre_ed | avg_num_visit_pre_cens_ed | avg_any_depres_pre_ed | avg_lessHS | avg_HSorGED | avg_X | avg_weight_total_inp | avg_diabetes | avg_hypertension | avg_highcholesterol | avg_depression | avg_phq | avg_cvd_risk | avg_doc_any | avg_ed_any | avg_hosp_any | avg_oop_spend | avg_catastrophic | avg_debt | avg_borrow | avg_a1c | avg_hdl_level | avg_chl_level | avg_bmi | avg_sbp | avg_dbp | avg_prescriptions_any | avg_prescriptions | avg_hypertension_med | avg_cholesterol_med | avg_diabetes_med | avg_depression_med | avg_household_id | avg_eligibility | avg_ohp_all_ever_inperson | avg_doc_num | avg_ed_num | avg_hosp_num | avg_charg_tot_ed | avg_ed_charg_tot_ed | avg_sbp_neg | avg_dbp_neg | avg_chl_level_neg | avg_hdl_level_neg | avg_a1c_neg | avg_bmi_neg | avg_phq_neg | avg_cvd_risk_neg | avg_debt_neg | avg_borrow_neg | avg_catastrophic_neg | avg_sim_sbp | avg_sim_debt_neg_alpha_5 | avg_sim_sbp_neg_alpha_5 | avg_sim_hdl_level_neg_alpha_5 | avg_sim_debt_neg_alpha_5_presentation | avg_sim_sbp_neg_alpha_5_presentation | avg_sim_hdl_level_neg_alpha_5_presentation |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 37499 | 1.236 | 0.5664 | 40.63 | 0.1896 | 0.6884 | 0.0990 | 0.1589 | 0.2049 | 0.0714 | 0.1742 | 0.1282 | 0.0200 | 0.0061 | 0.0207 | 0.0169 | 0.0345 | 0.3415 | 2452 | 999.2 | 0.8074 | 0.0123 | 0.2218 | 0.4474 | 6104 | 1.244 | 0.0224 | 0.0596 | 0.0666 | 0.0496 | 6.584 | 0.0815 | 0.6567 | 0.4040 | 0.1413 | 0.5862 | 0.0437 | 0.5573 | 0.2235 | 5.348 | 48.20 | 205.7 | 30.03 | 119.4 | 75.95 | 0.5411 | 1.903 | 0.1466 | 0.0936 | 0.0760 | 0.1727 | 140365 | 0.5280 | 0.3239 | 5.706 | 1.0146 | 0.2121 | 3284 | 1365 | -119.4 | -75.95 | -205.7 | -48.20 | -5.348 | -30.03 | -6.584 | -0.0815 | 0.4427 | 0.7765 | 0.9563 | 197.7 | 0.4203 | -121.5 | -49.89 | 0.4203 | -121.5 | -49.89 |
2 | 36399 | 1.257 | 0.5786 | 40.61 | 0.1828 | 0.6784 | 0.1030 | 0.1357 | 0.2003 | 0.0670 | 0.1931 | 0.1277 | 0.0200 | 0.0112 | 0.0247 | 0.0231 | 0.0503 | 0.3272 | 2419 | 946.2 | 0.8244 | 0.0223 | 0.2099 | 0.4653 | 5924 | 1.258 | 0.0184 | 0.0565 | 0.0637 | 0.0525 | 6.709 | 0.0790 | 0.6896 | 0.4032 | 0.1288 | 0.5720 | 0.0544 | 0.5494 | 0.2187 | 5.314 | 48.35 | 206.2 | 29.74 | 118.9 | 75.38 | 0.5587 | 1.829 | 0.1325 | 0.0838 | 0.0607 | 0.1668 | 139845 | 0.5164 | 0.2889 | 6.410 | 0.9888 | 0.2037 | 3628 | 1725 | -118.9 | -75.38 | -206.2 | -48.35 | -5.314 | -29.74 | -6.709 | -0.0790 | 0.4506 | 0.7813 | 0.9456 | 201.2 | 0.4145 | -122.2 | -50.95 | 0.4145 | -122.2 | -50.95 |
3 | 38011 | 1.229 | 0.5810 | 40.78 | 0.1824 | 0.7048 | 0.0958 | 0.1187 | 0.1883 | 0.0729 | 0.1891 | 0.1374 | 0.0170 | 0.0076 | 0.0246 | 0.0153 | 0.0382 | 0.3427 | 1906 | 885.0 | 0.7498 | 0.0187 | 0.2078 | 0.4521 | 6189 | 1.246 | 0.0136 | 0.0601 | 0.0643 | 0.0478 | 6.925 | 0.0808 | 0.6689 | 0.4049 | 0.1197 | 0.5742 | 0.0534 | 0.5380 | 0.2351 | 5.340 | 46.79 | 204.5 | 30.20 | 118.8 | 75.58 | 0.5496 | 1.927 | 0.1510 | 0.0916 | 0.0780 | 0.1705 | 141094 | 0.5344 | 0.3028 | 5.522 | 0.9770 | 0.1811 | 3211 | 1454 | -118.8 | -75.58 | -204.5 | -46.79 | -5.340 | -30.20 | -6.925 | -0.0808 | 0.4620 | 0.7649 | 0.9466 | 198.5 | 0.4526 | -122.1 | -49.40 | 0.4526 | -122.1 | -49.40 |
4 | 37765 | 1.244 | 0.5564 | 40.95 | 0.1683 | 0.6997 | 0.1023 | 0.1441 | 0.1916 | 0.0676 | 0.1667 | 0.1232 | 0.0225 | 0.0137 | 0.0274 | 0.0217 | 0.0467 | 0.3341 | 2192 | 854.2 | 0.8172 | 0.0185 | 0.1981 | 0.4557 | 6148 | 1.225 | 0.0121 | 0.0576 | 0.0664 | 0.0658 | 6.726 | 0.0825 | 0.6820 | 0.4153 | 0.1435 | 0.5606 | 0.0575 | 0.5720 | 0.2119 | 5.314 | 47.56 | 205.2 | 29.72 | 119.1 | 75.78 | 0.5564 | 1.801 | 0.1272 | 0.0853 | 0.0652 | 0.1538 | 140672 | 0.5201 | 0.3132 | 6.075 | 1.0057 | 0.2138 | 3448 | 1342 | -119.1 | -75.78 | -205.2 | -47.56 | -5.314 | -29.72 | -6.726 | -0.0825 | 0.4280 | 0.7881 | 0.9425 | 199.8 | 0.4052 | -122.5 | -50.26 | 0.4052 | -122.5 | -50.26 |
5 | 38389 | 1.260 | 0.5532 | 41.44 | 0.1745 | 0.6706 | 0.1157 | 0.1481 | 0.2000 | 0.0706 | 0.1906 | 0.1226 | 0.0196 | 0.0111 | 0.0170 | 0.0170 | 0.0443 | 0.3421 | 2011 | 838.6 | 0.7532 | 0.0128 | 0.2034 | 0.4315 | 6250 | 1.236 | 0.0137 | 0.0481 | 0.0654 | 0.0551 | 6.893 | 0.0841 | 0.6661 | 0.3986 | 0.1261 | 0.5656 | 0.0434 | 0.5437 | 0.2383 | 5.335 | 48.68 | 207.4 | 29.74 | 120.1 | 76.34 | 0.5307 | 1.897 | 0.1353 | 0.0860 | 0.0723 | 0.1855 | 141058 | 0.5319 | 0.3294 | 5.841 | 0.9343 | 0.2022 | 2618 | 1175 | -120.1 | -76.34 | -207.4 | -48.68 | -5.335 | -29.74 | -6.893 | -0.0841 | 0.4563 | 0.7617 | 0.9566 | 200.0 | 0.4161 | -123.6 | -51.41 | 0.4161 | -123.6 | -51.41 |
6 | 37575 | 1.224 | 0.5846 | 40.58 | 0.1725 | 0.6802 | 0.1117 | 0.1555 | 0.1911 | 0.0607 | 0.1749 | 0.1174 | 0.0178 | 0.0089 | 0.0259 | 0.0121 | 0.0445 | 0.3441 | 1812 | 817.0 | 0.7150 | 0.0154 | 0.2146 | 0.4348 | 6117 | 1.257 | 0.0163 | 0.0781 | 0.0701 | 0.0591 | 7.027 | 0.0767 | 0.6778 | 0.3994 | 0.1308 | 0.5857 | 0.0463 | 0.5545 | 0.2344 | 5.324 | 48.00 | 206.8 | 30.03 | 118.5 | 75.58 | 0.5652 | 2.049 | 0.1466 | 0.0955 | 0.0640 | 0.1919 | 140096 | 0.5158 | 0.2939 | 6.192 | 1.0342 | 0.2060 | 3877 | 1399 | -118.5 | -75.58 | -206.8 | -48.00 | -5.324 | -30.03 | -7.027 | -0.0767 | 0.4455 | 0.7656 | 0.9537 | 198.0 | 0.4128 | -121.2 | -50.09 | 0.4128 | -121.2 | -50.09 |
7 | 37796 | 1.246 | 0.5535 | 41.04 | 0.1730 | 0.6829 | 0.1063 | 0.1524 | 0.1730 | 0.0758 | 0.1977 | 0.1310 | 0.0231 | 0.0107 | 0.0214 | 0.0198 | 0.0404 | 0.3386 | 2057 | 824.1 | 0.7100 | 0.0107 | 0.2158 | 0.4613 | 6153 | 1.227 | 0.0132 | 0.0505 | 0.0578 | 0.0399 | 6.970 | 0.0848 | 0.6636 | 0.3977 | 0.1245 | 0.5677 | 0.0412 | 0.5375 | 0.2244 | 5.349 | 47.23 | 204.0 | 29.81 | 119.6 | 76.00 | 0.5544 | 1.900 | 0.1474 | 0.0980 | 0.0725 | 0.1779 | 140679 | 0.4942 | 0.3097 | 5.362 | 0.9163 | 0.1800 | 2800 | 1339 | -119.6 | -76.00 | -204.0 | -47.23 | -5.349 | -29.81 | -6.970 | -0.0848 | 0.4625 | 0.7756 | 0.9588 | 200.1 | 0.4367 | -122.3 | -49.42 | 0.4367 | -122.3 | -49.42 |
8 | 37003 | 1.250 | 0.5525 | 40.47 | 0.1823 | 0.6929 | 0.0878 | 0.1420 | 0.1929 | 0.0788 | 0.1650 | 0.1174 | 0.0181 | 0.0123 | 0.0181 | 0.0148 | 0.0411 | 0.3374 | 2151 | 932.7 | 0.8112 | 0.0131 | 0.1864 | 0.4598 | 6023 | 1.231 | 0.0157 | 0.0624 | 0.0637 | 0.0522 | 6.944 | 0.0779 | 0.6604 | 0.4145 | 0.1472 | 0.5442 | 0.0397 | 0.5444 | 0.2204 | 5.345 | 47.79 | 205.5 | 29.78 | 118.4 | 75.49 | 0.5484 | 1.917 | 0.1388 | 0.0846 | 0.0747 | 0.1897 | 140584 | 0.5230 | 0.3136 | 6.401 | 1.0635 | 0.2287 | 3637 | 1616 | -118.4 | -75.49 | -205.5 | -47.79 | -5.345 | -29.78 | -6.944 | -0.0779 | 0.4556 | 0.7796 | 0.9603 | 200.3 | 0.4299 | -120.9 | -49.76 | 0.4299 | -120.9 | -49.76 |
9 | 37606 | 1.251 | 0.5737 | 40.99 | 0.1907 | 0.6719 | 0.1099 | 0.1507 | 0.1990 | 0.0691 | 0.2015 | 0.1324 | 0.0150 | 0.0133 | 0.0241 | 0.0225 | 0.0425 | 0.3372 | 1964 | 871.7 | 0.8060 | 0.0125 | 0.1948 | 0.4721 | 6122 | 1.236 | 0.0226 | 0.0553 | 0.0703 | 0.0472 | 6.670 | 0.0804 | 0.6542 | 0.4242 | 0.1125 | 0.5617 | 0.0389 | 0.5386 | 0.2132 | 5.348 | 47.65 | 205.9 | 29.86 | 119.8 | 76.40 | 0.5517 | 1.892 | 0.1357 | 0.0849 | 0.0683 | 0.1624 | 140770 | 0.5287 | 0.3106 | 5.477 | 1.0159 | 0.1548 | 3423 | 1481 | -119.8 | -76.40 | -205.9 | -47.65 | -5.348 | -29.86 | -6.670 | -0.0804 | 0.4614 | 0.7868 | 0.9611 | 198.3 | 0.4371 | -122.2 | -49.57 | 0.4371 | -122.2 | -49.57 |
10 | 37022 | 1.252 | 0.5556 | 40.53 | 0.1827 | 0.7003 | 0.1002 | 0.1322 | 0.1911 | 0.0774 | 0.1667 | 0.1296 | 0.0244 | 0.0160 | 0.0160 | 0.0227 | 0.0455 | 0.3628 | 2360 | 930.0 | 0.8359 | 0.0168 | 0.1961 | 0.4621 | 6027 | 1.242 | 0.0160 | 0.0538 | 0.0589 | 0.0454 | 6.753 | 0.0805 | 0.6765 | 0.3997 | 0.1289 | 0.5747 | 0.0393 | 0.5414 | 0.2291 | 5.327 | 46.69 | 203.2 | 29.73 | 118.4 | 75.16 | 0.5463 | 1.931 | 0.1380 | 0.0901 | 0.0774 | 0.1810 | 139994 | 0.5286 | 0.3241 | 6.438 | 0.9949 | 0.1755 | 3221 | 1430 | -118.4 | -75.16 | -203.2 | -46.69 | -5.327 | -29.73 | -6.753 | -0.0805 | 0.4586 | 0.7709 | 0.9607 | 199.1 | 0.4381 | -121.7 | -49.33 | 0.4381 | -121.7 | -49.33 |
#####STEP 3-3: Create charges variables #####
# Print summary statistics and number of zeros of charge variables
print("Summary statistics of charg_tot_pre_ed:")
## [1] "Summary statistics of charg_tot_pre_ed:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 2136 744 180055
## [1] "Number of zeros of charg_tot_pre_ed:"
## [1] 8273
## [1] "Summary statistics of ed_charg_tot_pre_ed:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 891 603 41246
## [1] "Number of zeros of ed_charg_tot_pre_ed:"
## [1] 8280
# Create categories for charge = 0
d$tot_charge_0 <- as.numeric(d$charg_tot_pre_ed == 0)
d$ed_charge_0 <- as.numeric(d$ed_charg_tot_pre_ed == 0)
# Print summary statistics of new charge variables
print("Distribution of tot_charge_0:")
## [1] "Distribution of tot_charge_0:"
##
## 0 1
## 3935 8273
## [1] "Distribution of ed_charge_0:"
##
## 0 1
## 3928 8280
#####STEP 3-4: Create penalized outcomes #####
# Loop through each outcome and create penalized versions
for(outcome_info in outcomes_list) {
outcome_name <- outcome_info[[1]]
max_cw <- outcome_info[[2]]
# Print summary statistics of the outcome
print(paste("Summary statistics of", outcome_name, ":", sep=" "))
print(summary(d[[outcome_name]]))
# Define penalty values
cw_values <- seq(0, max_cw, length.out=cw_grid)
# For each penalty value, create new penalized outcome
for(i in seq_along(cw_values)) {
cw <- cw_values[i]
new_col_name <- paste0(outcome_name, "_cw", i-1)
# Check if outcome is continuous
if(outcome_name %in% continuous_outcomes) {
# Current continuous variable logic
d[[new_col_name]] <- d[[outcome_name]] - cw * d$eligibility
# Check if outcome is binary
} else if(outcome_name %in% binary_outcomes) {
# Initialize new column with original values
d[[new_col_name]] <- d[[outcome_name]]
# For rows where outcome is 1, flip to 0 with probability = cw
ones_idx <- which(d[[outcome_name]] == 1)
for(idx in ones_idx) {
if(runif(1) < (cw * d$eligibility[idx])) {
d[[new_col_name]][idx] <- 0
}
}
} else {
stop(paste("Error: outcome", outcome_name, "is in neither continuous nor binary list."))
}
print(paste("Summary statistics of", new_col_name, ":", sep=" "))
print(summary(d[[new_col_name]]))
}
}
## [1] "Summary statistics of sbp_neg :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -229 -128 -117 -119 -108 -72 41
## [1] "Summary statistics of sbp_neg_cw0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -229 -128 -117 -119 -108 -72 41
## [1] "Summary statistics of sbp_neg_cw1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -228.0 -118.0 -103.0 -103.8 -86.8 -46.8 41
## [1] "Summary statistics of sbp_neg_cw2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -228.0 -117.0 -89.5 -88.6 -57.5 -17.5 41
## [1] "Summary statistics of sbp_neg_cw3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -228.0 -116.0 -63.2 -73.3 -28.2 11.8 41
## [1] "Summary statistics of sbp_neg_cw4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -228 -116 -34 -58 1 41 41
## [1] "Summary statistics of hdl_level_neg :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -139.08 -55.08 -46.08 -47.70 -38.58 1.92 57
## [1] "Summary statistics of hdl_level_neg_cw0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -139.08 -55.08 -46.08 -47.70 -38.58 1.92 57
## [1] "Summary statistics of hdl_level_neg_cw1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.58 -50.31 -40.83 -41.69 -31.56 2.94 57
## [1] "Summary statistics of hdl_level_neg_cw2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.6 -47.6 -35.0 -35.7 -22.3 14.5 57
## [1] "Summary statistics of hdl_level_neg_cw3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.6 -46.1 -29.6 -29.7 -11.5 26.0 57
## [1] "Summary statistics of hdl_level_neg_cw4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.6 -46.1 -23.6 -23.6 0.0 37.5 57
## [1] "Summary statistics of debt_neg :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.45 1.00 1.00 114
## [1] "Summary statistics of debt_neg_cw0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.45 1.00 1.00 114
## [1] "Summary statistics of debt_neg_cw1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.42 1.00 1.00 114
## [1] "Summary statistics of debt_neg_cw2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.39 1.00 1.00 114
## [1] "Summary statistics of debt_neg_cw3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.37 1.00 1.00 114
## [1] "Summary statistics of debt_neg_cw4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.34 1.00 1.00 114
## [1] "Summary statistics of ohp_all_ever_inperson :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.311 1.000 1.000
## [1] "Summary statistics of ohp_all_ever_inperson_cw0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.311 1.000 1.000
## [1] "Summary statistics of ohp_all_ever_inperson_cw1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.311 1.000 1.000
## [1] "Summary statistics of ohp_all_ever_inperson_cw2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.311 1.000 1.000
## [1] "Summary statistics of ohp_all_ever_inperson_cw3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.311 1.000 1.000
## [1] "Summary statistics of ohp_all_ever_inperson_cw4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 0.311 1.000 1.000
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -240.4 -137.0 -119.0 -122.0 -106.0 -30.6 41
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -240.4 -137.0 -119.0 -122.0 -106.0 -30.6 41
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -240.39 -126.00 -105.25 -106.47 -84.25 -9.86 41
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -240.4 -121.0 -93.0 -90.9 -56.5 19.9 41
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -240.4 -120.0 -71.8 -75.4 -26.8 49.6 41
## [1] "Summary statistics of sim_sbp_neg_alpha_5_presentation_cw4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -240.39 -120.00 -48.39 -59.86 2.91 79.39 41
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -139.1 -62.5 -48.3 -50.0 -37.8 13.9 57
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -139.1 -62.5 -48.3 -50.0 -37.8 13.9 57
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.6 -57.1 -42.8 -44.0 -30.8 25.4 57
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.6 -53.6 -37.8 -38.0 -21.3 36.9 57
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.6 -51.3 -32.6 -32.0 -10.8 48.4 57
## [1] "Summary statistics of sim_hdl_level_neg_alpha_5_presentation_cw4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -134.6 -49.8 -27.3 -25.9 0.4 59.9 57
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.43 1.00 1.00 114
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.43 1.00 1.00 114
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0 0.0 0.0 0.4 1.0 1.0 114
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.37 1.00 1.00 114
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.35 1.00 1.00 114
## [1] "Summary statistics of sim_debt_neg_alpha_5_presentation_cw4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 0.00 0.00 0.32 1.00 1.00 114
#####STEP 3-5: Test outcome = outcome_cw0 #####
# Loop through each outcome and check if outcome = outcome_cw0
for(outcome_info in outcomes_list) {
outcome_name <- outcome_info[[1]]
cw0_name <- paste0(outcome_name, "_cw0")
# Check if values are identical
if(!all(d[[outcome_name]] == d[[cw0_name]], na.rm=TRUE)) {
stop(paste("Error:", outcome_name, "is not the same as", cw0_name))
}
# If we get here, they are identical so drop the original column
d[[outcome_name]] <- NULL
print(paste("Dropped", outcome_name, "since it equals", cw0_name))
}
## [1] "Dropped sbp_neg since it equals sbp_neg_cw0"
## [1] "Dropped hdl_level_neg since it equals hdl_level_neg_cw0"
## [1] "Dropped debt_neg since it equals debt_neg_cw0"
## [1] "Dropped ohp_all_ever_inperson since it equals ohp_all_ever_inperson_cw0"
## [1] "Dropped sim_sbp_neg_alpha_5_presentation since it equals sim_sbp_neg_alpha_5_presentation_cw0"
## [1] "Dropped sim_hdl_level_neg_alpha_5_presentation since it equals sim_hdl_level_neg_alpha_5_presentation_cw0"
## [1] "Dropped sim_debt_neg_alpha_5_presentation since it equals sim_debt_neg_alpha_5_presentation_cw0"