#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))
#####STEP 0-2: Install packages #####
list.of.packages <- c("grf", "metafor", "splitstackshape", "dplyr",
"tidyverse", "foreach", "cowplot", "reshape2",
"doParallel", "survival", "readstata13", "ggplot2",
"rsample", "DiagrammeR", "e1071", "pscl", "pROC",
"caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
"lmtest", "sandwich", "haven", "rpms", "randomForest",
"fabricatr", "gridExtra", "VIM", "mice",
"lmtest", "ivreg", "kableExtra")
new.packages <- list.of.packages[!(list.of.packages %in%
installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, library, character.only = TRUE)
## Warning: package 'grf' was built under R version 4.3.3
## Warning: package 'metafor' was built under R version 4.3.3
## Warning: package 'metadat' was built under R version 4.3.3
## Warning: package 'numDeriv' was built under R version 4.3.1
## Warning: package 'splitstackshape' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'foreach' was built under R version 4.3.3
## Warning: package 'cowplot' was built under R version 4.3.3
## Warning: package 'reshape2' was built under R version 4.3.3
## Warning: package 'doParallel' was built under R version 4.3.3
## Warning: package 'iterators' was built under R version 4.3.3
## Warning: package 'readstata13' was built under R version 4.3.3
## Warning: package 'rsample' was built under R version 4.3.3
## Warning: package 'DiagrammeR' was built under R version 4.3.3
## Warning: package 'e1071' was built under R version 4.3.3
## Warning: package 'pscl' was built under R version 4.3.3
## Warning: package 'pROC' was built under R version 4.3.3
## Warning: package 'caret' was built under R version 4.3.3
## Warning: package 'ModelMetrics' was built under R version 4.3.3
## Warning: package 'MatchIt' was built under R version 4.3.3
## Warning: package 'Hmisc' was built under R version 4.3.3
## Warning: package 'scales' was built under R version 4.3.3
## Warning: package 'lmtest' was built under R version 4.3.3
## Warning: package 'zoo' was built under R version 4.3.3
## Warning: package 'sandwich' was built under R version 4.3.3
## Warning: package 'haven' was built under R version 4.3.3
## Warning: package 'rpms' was built under R version 4.3.3
## Warning: package 'randomForest' was built under R version 4.3.3
## Warning: package 'fabricatr' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
## Warning: package 'VIM' was built under R version 4.3.3
## Warning: package 'colorspace' was built under R version 4.3.3
## Warning: package 'mice' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch:
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
## Warning: package 'ivreg' was built under R version 4.3.3
## Warning: package 'kableExtra' was built under R version 4.3.3
print(paste("Version of grf package:", packageVersion("grf")))
#####STEP 0-3: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)
#####STEP 0-4: Set non-run-specific parameters #####
seedset <- 777
seed.ivforest <- 123456789
seed.cf <- 987654321
numthreadsset <- min(6, parallel::detectCores())
if (numthreadsset!= 6) {
print("the results of grf vary by num.thread (publication paper used num.thread=6)")
}
cat("number of threads (affects grf results):", numthreadsset,"\n")
## number of threads (affects grf results): 6
#####STEP 0-5: Set run-specific parameters #####
published_paper_run <- 0 # ANALYST FORM
if (published_paper_run == 1) {
print("save intermediate files into PP_Full_Analysis/Cleaned_input_data/As_published/empirical/ folder")
warning("Changing this setting to 1 overwrites the input files required for replicating on different platforms.")
} else {
print("save intermediate files into PP_Full_Analysis/Cleaned_input_data/Testing/empirical/ folder")
}
## [1] "save intermediate files into PP_Full_Analysis/Cleaned_input_data/Testing/empirical/ folder"
cw_run_name <- "med" # ANALYST FORM
cw_grid <- 5 # ANALYST FORM
outcomes_list <- c(("sbp_neg_cw0")) # ANALYST FORM ***Note: this outcomes_list contains only outcome names, not max_cw values like script 3_Create_New_Outcomes.Rmd***
covariates_list <- c("numhh_list", "gender_inp", "age_inp", "hispanic_inp", "race_white_inp",
"race_black_inp", "race_nwother_inp", "ast_dx_pre_lottery", "dia_dx_pre_lottery",
"hbp_dx_pre_lottery", "chl_dx_pre_lottery", "ami_dx_pre_lottery",
"chf_dx_pre_lottery", "emp_dx_pre_lottery", "kid_dx_pre_lottery",
"cancer_dx_pre_lottery", "dep_dx_pre_lottery", "lessHS", "HSorGED",
"charg_tot_pre_ed", "ed_charg_tot_pre_ed") # ANALYST FORM
lambda_length <- 5 # ANALYST FORM
# Set cW name based on cW_run_name and cW_grid
cw_name <- paste0("cw_", cw_run_name, "_", cw_grid)
# Generate lambda list
lambda_list <- seq(0, 1, length.out = lambda_length)
# Print run-specific parameters
print(paste("outcomes_list:", outcomes_list))
## [1] "outcomes_list: sbp_neg_cw0"
## [1] "covariates_list: numhh_list"
## [2] "covariates_list: gender_inp"
## [3] "covariates_list: age_inp"
## [4] "covariates_list: hispanic_inp"
## [5] "covariates_list: race_white_inp"
## [6] "covariates_list: race_black_inp"
## [7] "covariates_list: race_nwother_inp"
## [8] "covariates_list: ast_dx_pre_lottery"
## [9] "covariates_list: dia_dx_pre_lottery"
## [10] "covariates_list: hbp_dx_pre_lottery"
## [11] "covariates_list: chl_dx_pre_lottery"
## [12] "covariates_list: ami_dx_pre_lottery"
## [13] "covariates_list: chf_dx_pre_lottery"
## [14] "covariates_list: emp_dx_pre_lottery"
## [15] "covariates_list: kid_dx_pre_lottery"
## [16] "covariates_list: cancer_dx_pre_lottery"
## [17] "covariates_list: dep_dx_pre_lottery"
## [18] "covariates_list: lessHS"
## [19] "covariates_list: HSorGED"
## [20] "covariates_list: charg_tot_pre_ed"
## [21] "covariates_list: ed_charg_tot_pre_ed"
## [1] "lambda_list: 0" "lambda_list: 0.25" "lambda_list: 0.5"
## [4] "lambda_list: 0.75" "lambda_list: 1"
#####STEP 0-6: Set file path #####
# Set the processed path based on published_paper_run
# ***Important: for file paths to work, use the drop-down menu on "Knit" to select the "Current Working Directory" under "Knit Directory." This assumes that you have cloned the repo to your local directory.***
processedpath <- if (published_paper_run == 1) {
paste0("PP_Full_Analysis/Cleaned_input_data/As_published/empirical/", cw_name)
} else {
paste0("PP_Full_Analysis/Cleaned_input_data/Testing/empirical/", cw_name)
}
# Create a results_dir path based on published_paper_run that goes to Intermediate_data
results_dir <- if (published_paper_run == 1) {
paste0("PP_Full_Analysis/Intermediate_data/As_published/empirical/", cw_name)
} else {
paste0("PP_Full_Analysis/Intermediate_data/Testing/empirical/", cw_name)
}
# If a results_dir does not exist, create it
if (!dir.exists(results_dir)) {
dir.create(results_dir, recursive = TRUE)
}
# Print processed path
print(paste("Processed path:", processedpath))
## [1] "Processed path: PP_Full_Analysis/Cleaned_input_data/Testing/empirical/cw_med_5"
#####STEP 4-1: Load data #####
# ***Important: for file paths to work, use the drop-down menu on "Knit" to select the "Current Working Directory" under "Knit Directory." This assumes that you have cloned the repo to your local directory.***
data_file <- paste0(processedpath, "/3_Penalty_Outcomes_Wide_Dataset.csv")
# Read the data from the selected file
d <- read.csv(data_file)
# Print information on the data
print("Information on the data:")
## [1] "Information on the data:"
## Rows: 12,208
## Columns: 94
## $ X.1 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, …
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 4…
## $ numhh_list <int> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1…
## $ gender_inp <int> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1…
## $ age_inp <int> 60, 41, 39, 52, 51, 32, 34, 23,…
## $ hispanic_inp <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0…
## $ race_white_inp <int> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1…
## $ race_black_inp <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0…
## $ race_nwother_inp <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ ast_dx_pre_lottery <int> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0…
## $ dia_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hbp_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1…
## $ chl_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ami_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ chf_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ emp_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ kid_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cancer_dx_pre_lottery <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ dep_dx_pre_lottery <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1…
## $ charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, …
## $ ed_charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, …
## $ num_visit_pre_cens_ed <int> 0, 0, 1, 0, 2, 0, 0, 7, 0, 0, 0…
## $ any_depres_pre_ed <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lessHS <int> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0…
## $ HSorGED <int> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0…
## $ X <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, …
## $ weight_total_inp <dbl> 1.1504, 0.8975, 1.0000, 1.2126,…
## $ diabetes <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hypertension <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ highcholesterol <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression <int> 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ phq <int> 1, 9, 2, 13, 2, 3, 2, 14, 11, 8…
## $ cvd_risk <dbl> 0.1370, 0.1120, 0.0330, 0.2530,…
## $ doc_any <int> 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1…
## $ ed_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0…
## $ hosp_any <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ oop_spend <int> 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1…
## $ catastrophic <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ debt <int> 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1…
## $ borrow <int> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1…
## $ a1c <dbl> 5.037, 5.201, 5.854, 5.364, 5.5…
## $ hdl_level <dbl> 48.33, 51.33, 38.58, 51.33, 28.…
## $ chl_level <dbl> 241.0, 229.9, 229.9, 235.4, 177…
## $ bmi <dbl> 26.66, 35.23, 37.12, 24.81, 27.…
## $ sbp <int> 144, 134, 126, 168, 119, 98, 10…
## $ dbp <int> 81, 82, 94, 110, 79, 59, 63, 76…
## $ prescriptions_any <int> 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1…
## $ prescriptions <int> 0, 2, 2, NA, 0, 0, 0, 3, 0, 3, …
## $ hypertension_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ cholesterol_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ diabetes_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ depression_med <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ household_id <int> 100005, 102094, 140688, 100017,…
## $ eligibility <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0…
## $ doc_num <int> 0, 6, 12, 0, 0, 5, 0, 5, 1, 12,…
## $ ed_num <int> 0, 2, 1, 1, 0, 0, 0, 10, 2, 6, …
## $ hosp_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ charg_tot_ed <dbl> 0, 2751, 15233, NA, 0, 0, 0, 84…
## $ ed_charg_tot_ed <dbl> 0.0, 2751.4, 7100.8, NA, 0.0, 0…
## $ dbp_neg <int> -81, -82, -94, -110, -79, -59, …
## $ chl_level_neg <dbl> -241.0, -229.9, -229.9, -235.4,…
## $ hdl_level_neg <dbl> -48.33, -51.33, -38.58, -51.33,…
## $ a1c_neg <dbl> -5.037, -5.201, -5.854, -5.364,…
## $ bmi_neg <dbl> -26.66, -35.23, -37.12, -24.81,…
## $ phq_neg <int> -1, -9, -2, -13, -2, -3, -2, -1…
## $ cvd_risk_neg <dbl> -0.1370, -0.1120, -0.0330, -0.2…
## $ debt_neg <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ borrow_neg <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0…
## $ catastrophic_neg <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ sim_sbp <dbl> 248.979, 160.146, 251.084, 160.…
## $ sim_debt_neg_alpha_5 <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5 <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5 <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_debt_neg_alpha_5_presentation <int> 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0…
## $ sim_sbp_neg_alpha_5_presentation <dbl> -144.00, -134.00, -84.61, -168.…
## $ sim_hdl_level_neg_alpha_5_presentation <dbl> -48.33, -51.33, -5.64, -51.33, …
## $ sim_sbp_neg_alpha_5_testing <dbl> -185.39, -175.39, -43.21, -209.…
## $ sim_sbp_neg_alpha_5_testing_ed <dbl> -144.00, -134.00, -43.21, -168.…
## $ plustau <dbl> 1.000, 1.133, 2.718, 1.284, 2.7…
## $ sim_sbp_neg_alpha_5_exp_tot_dx_index <dbl> -185.39, -180.90, -13.48, -221.…
## $ perturbation_sbp_neg <dbl> 41.39, 46.90, -112.52, 53.15, 1…
## $ fold <int> 8, 1, 10, 3, 10, 9, 9, 4, 4, 10…
## $ tot_charge_0 <int> 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1…
## $ ed_charge_0 <int> 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1…
## $ sbp_neg_cw0 <int> -144, -134, -126, -168, -119, -…
## $ sbp_neg_cw1 <dbl> -114.75, -134.00, -126.00, -168…
## $ sbp_neg_cw2 <dbl> -85.5, -134.0, -126.0, -168.0, …
## $ sbp_neg_cw3 <dbl> -56.25, -134.00, -126.00, -168.…
## $ sbp_neg_cw4 <int> -27, -134, -126, -168, -119, 19…
## $ ohp_all_ever_inperson_cw0 <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
## $ ohp_all_ever_inperson_cw1 <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
## $ ohp_all_ever_inperson_cw2 <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
## $ ohp_all_ever_inperson_cw3 <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
## $ ohp_all_ever_inperson_cw4 <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0…
#####STEP 4-2: Calculate CLATE function using instrumental forests #####
calculate_clate <- function(data, outcome) {
tryCatch({
cat(sprintf("\nProcessing outcome: %s\n", outcome))
# Prepare forest inputs
newd <- data[!is.na(data[[outcome]]), ] # Remove rows with missing outcome
fmla <- formula(paste0("~ 0 + ", paste0(covariates_list, collapse="+"))) # Create formula for covariates
X <- model.matrix(fmla, newd) # Create X matrix
W <- as.integer(newd[["ohp_all_ever_inperson_cw0"]]) # Create treatment
Z <- as.integer(newd[["eligibility"]]) # Create instrument
Y <- as.numeric(newd[[outcome]]) # Create outcome
weights <- as.numeric(newd$weight_total_inp) # Create weights
folds <- as.integer(newd$fold) # Create folds
# Print the dimensions and types of the forest inputs
print(paste("fmla:", deparse(fmla)))
print(sprintf("X matrix: %d rows x %d columns (%s)",
nrow(X), ncol(X), paste(class(X), collapse="/")))
print(paste("Name of W:", "ohp_all_ever_inperson_cw0",
"Length of W:", length(W),
"Types of W:", class(W)))
print(paste("Name of Z:", "eligibility",
"Length of Z:", length(Z),
"Types of Z:", class(Z)))
print(paste("Name of Y:", outcome,
"Length of Y:", length(Y),
"Types of Y:", class(Y)))
print(paste("Length of weights:", length(weights),
"Types of weights:", class(weights)))
print(paste("Length of folds:", length(folds),
"Types of folds:", class(folds)))
# Fit instrumental forest
cat("\nFitting instrumental forest...\n")
ivforest <- instrumental_forest(X, Y, W, Z,
honesty = TRUE,
tune.parameters = "all",
sample.weights = weights,
num.threads = numthreadsset,
seed = seed.ivforest,
clusters = folds)
# Calculate CLATE predictions with standard errors
pred <- predict(ivforest, estimate.variance = TRUE)
priority.clate <- pred$predictions
priority.clate.se <- sqrt(pred$variance.estimates)
# Print the summary statistics of priority.clate and priority.clate.se
print(summary(priority.clate))
print(summary(priority.clate.se))
# Calculate CLATE AUTOC
rate.oob = rank_average_treatment_effect(ivforest, priority.clate)
autoc <- rate.oob$estimate
autoc_se <- rate.oob$std.err
# Print the AUTOC and AUTOC SE
print(paste("AUTOC:", autoc, "AUTOC SE:", autoc_se))
# Calculate t-statistic for AUTOC
t.stat.oob <- autoc / autoc_se
# Print the t-statistic for AUTOC
print(paste("t-statistic for AUTOC:", t.stat.oob))
# RANKING 5 - print statement
print("RANKING 5")
# Initialize ranking vector
ranking_5 <- rep(NA, length(folds))
# Calculate quintile rankings using cross-fitting
unique_folds <- unique(folds)
for (fold in unique_folds) {
print(paste("Processing fold:", fold))
# Calculate quintile boundaries using training data
tau_quintiles <- quantile(priority.clate[folds != fold],
probs = seq(0, 1, by = 1/5),
include.lowest = TRUE)
print(paste("Quintile boundaries:", tau_quintiles))
# Assign quintile rankings (1-5) to validation data
ranking_5[folds == fold] <- as.numeric(cut(priority.clate[folds == fold],
breaks = tau_quintiles,
labels = 1:5,
include.lowest = TRUE))
print(table(ranking_5))
}
# RANKING 20 - print statement
print("RANKING 20")
# Initialize ranking vector
ranking_20 <- rep(NA, length(folds))
# Calculate ventile rankings using cross-fitting
for (fold in unique_folds) {
print(paste("Processing fold:", fold))
# Calculate ventile boundaries using training data
tau_quintiles <- quantile(priority.clate[folds != fold],
probs = seq(0, 1, by = 1/20))
print(paste("Quintile boundaries:", tau_quintiles))
# Assign ventile rankings (1-20) to validation data
ranking_20[folds == fold] <- as.numeric(cut(priority.clate[folds == fold],
breaks = tau_quintiles,
labels = 1:20,
include.lowest = TRUE))
print(table(ranking_20))
}
# Store results
results <- list(
person_id = newd$person_id,
clate = priority.clate,
se = priority.clate.se,
ranking_5 = ranking_5,
ranking_20 = ranking_20,
autoc = autoc,
autoc_se = autoc_se,
t.stat.oob = t.stat.oob,
forest = ivforest
)
# Save results with structured naming
results_path <- file.path(results_dir,
sprintf("clate_results_%s.RData",
outcome))
# Create metadata
metadata <- list(
timestamp = Sys.time(),
seed = seed.ivforest,
num_threads = numthreadsset,
session_info = sessionInfo()
)
# Combine results and metadata
final_results <- list(
results = results,
metadata = metadata
)
# Save combined results
save(final_results, file = results_path)
# Save a dataframe with only person_id, X, Y, W, Z, weights, folds, clate, se, ranking_5, ranking_20
clate_results <- data.frame(person_id = newd$person_id,
X = X,
Y = Y,
clate_W = W,
Z = Z,
weights = weights,
folds = folds,
clate = priority.clate,
clate_se = priority.clate.se,
clate_ranking_5 = ranking_5,
clate_ranking_20 = ranking_20)
cat(sprintf("Results saved to: %s\n", results_path))
cat(sprintf("Completed %s. AUTOC: %.3f (SE: %.3f)\n",
outcome, autoc$estimate, autoc$std.err))
}, error = function(e) {
warning(sprintf("Error processing %s: %s", outcome, e$message))
return(NULL)
})
return(clate_results)
}
#####STEP 4-3: Calculate CATE function using causal forests #####
calculate_cate <- function(data, outcome) {
tryCatch({
cat(sprintf("\nProcessing outcome: %s\n", outcome))
# Prepare forest inputs
newd <- data[!is.na(data[[outcome]]), ] # Remove rows with missing outcome
fmla <- formula(paste0("~ 0 + ", paste0(covariates_list, collapse="+"))) # Create formula for covariates
X <- model.matrix(fmla, newd) # Create X matrix
W <- as.integer(newd[["eligibility"]]) # Create treatment
Y <- as.numeric(newd[[outcome]]) # Create outcome
weights <- as.numeric(newd$weight_total_inp) # Create weights
folds <- as.integer(newd$fold) # Create folds
# Print the dimensions and types of the forest inputs
print(paste("fmla:", fmla))
print(paste("Dimensions of X:", dim(X),
"Types of X:", class(X)))
print(paste("Name of W:", "eligibility",
"Dimensions of W:", length(W),
"Types of W:", class(W)))
print(paste("Name of Y:", outcome,
"Dimensions of Y:", length(Y),
"Types of Y:", class(Y)))
print(paste("Dimensions of weights:", length(weights),
"Types of weights:", class(weights)))
print(paste("Dimensions of folds:", length(folds),
"Types of folds:", class(folds)))
print("Table of folds: ")
print(table(folds))
# Fit causal forest
cat("\nFitting causal forest...\n")
cf <- causal_forest(X, Y, W,
honesty = TRUE,
tune.parameters = "all",
sample.weights = weights,
num.threads = numthreadsset,
seed = seed.cf,
clusters = folds)
# Calculate CATE predictions with standard errors
pred <- predict(cf, estimate.variance = TRUE)
priority.cate <- pred$predictions
priority.cate.se <- sqrt(pred$variance.estimates)
# Print the summary statistics of priority.cate and priority.cate.se
print(summary(priority.cate))
print(summary(priority.cate.se))
# Calculate CATE AUTOC
rate.oob = rank_average_treatment_effect(cf, priority.cate)
autoc <- rate.oob$estimate
autoc_se <- rate.oob$std.err
# Print the AUTOC and AUTOC SE
print(paste("AUTOC:", autoc, "AUTOC SE:", autoc_se))
# Calculate t-statistic for AUTOC
t.stat.oob <- autoc / autoc_se
# Print the t-statistic for AUTOC
print(paste("t-statistic for AUTOC:", t.stat.oob))
# RANKING 5 - print statement
print("RANKING 5")
# Initialize ranking vector
ranking_5 <- rep(NA, length(folds))
# Calculate quintile rankings using cross-fitting
unique_folds <- unique(folds)
for (fold in unique_folds) {
print(paste("Processing fold:", fold))
# Calculate quintile boundaries using training data
tau_quintiles <- quantile(priority.cate[folds != fold],
probs = seq(0, 1, by = 1/5),
include.lowest = TRUE)
print(paste("Quintile boundaries:", tau_quintiles))
# Assign rankings to test data
ranking_5[folds == fold] <- cut(priority.cate[folds == fold],
breaks = tau_quintiles,
include.lowest = TRUE,
labels = FALSE)
}
print("Ranking 5: ")
print(table(ranking_5))
# print average outcome and priority.cate by ranking_5
print("Average outcome by ranking 5: ")
print(tapply(Y, ranking_5, mean))
print("Average priority.cate by ranking 5: ")
print(tapply(priority.cate, ranking_5, mean))
# RANKING 20 - print statement
print("RANKING 20")
# Initialize ranking vector
ranking_20 <- rep(NA, length(folds))
# Calculate ventile rankings using cross-fitting
for (fold in unique_folds) {
print(paste("Processing fold:", fold))
# Calculate ventile boundaries using training data
tau_ventiles <- quantile(priority.cate[folds != fold],
probs = seq(0, 1, by = 1/20),
include.lowest = TRUE)
print(paste("Ventile boundaries:", tau_ventiles))
# Assign rankings to test data
ranking_20[folds == fold] <- cut(priority.cate[folds == fold],
breaks = tau_ventiles,
include.lowest = TRUE,
labels = FALSE)
}
print("Ranking 20: ")
print(table(ranking_20))
# print average outcome by ranking_20
print("Average outcome by ranking 20: ")
print(tapply(Y, ranking_20, mean))
# print average priority.cate by ranking_20
print("Average priority.cate by ranking 20: ")
print(tapply(priority.cate, ranking_20, mean))
# Create results path
results_path <- file.path(results_dir,
sprintf("cate_results_%s.RData",
outcome))
# Store results
results <- list(
person_id = newd$person_id,
cate = priority.cate,
se = priority.cate.se,
ranking_5 = ranking_5,
ranking_20 = ranking_20,
autoc = autoc,
autoc_se = autoc_se,
t.stat.oob = t.stat.oob,
forest = cf
)
# Store metadata
metadata <- list(
outcome = outcome,
timestamp = Sys.time(),
seed = seed.cf,
num_threads = numthreadsset,
session_info = sessionInfo()
)
# Combine results and metadata
final_results <- list(
results = results,
metadata = metadata
)
# Save combined results
save(final_results, file = results_path)
# Save a dataframe with only person_id, X, Y, W, weights, folds, cate, se, ranking_5, ranking_20
cate_results <- data.frame(person_id = newd$person_id,
X = X,
Y = Y,
cate_W = W,
weights = weights,
folds = folds,
cate = priority.cate,
cate_se = priority.cate.se,
cate_ranking_5 = ranking_5,
cate_ranking_20 = ranking_20)
cat(sprintf("Results saved to: %s\n", results_path))
}, error = function(e) {
warning(sprintf("Error processing %s: %s", outcome, e$message))
return(NULL)
})
return(cate_results)
}
CLATE function does not run when outcome = “ohp_all_ever_inperson_cw0.”
#####STEP 4-4: Run loop to generate CLATE & CATE results #####
# Loop over outcomes in outcomes_list
for (outcome in outcomes_list) {
print(paste("Processing outcome:", outcome))
# Run both CLATE and CATE functions if outcome != "ohp_all_ever_inperson_cw0"
if (outcome != "ohp_all_ever_inperson_cw0") {
# Get results for this outcome
clate_results <- calculate_clate(d, outcome)
cate_results <- calculate_cate(d, outcome)
# Identify shared columns
shared_cols <- intersect(names(clate_results), names(cate_results))
# Verify shared columns are identical
for (col in shared_cols) {
if (!identical(clate_results[[col]], cate_results[[col]])) {
stop(sprintf("Column %s differs between CLATE and CATE results for outcome %s",
col, outcome))
}
}
# If we get here, all shared columns are identical
# Get unique columns from CATE results
unique_cate_cols <- setdiff(names(cate_results), shared_cols)
# Combine results
combined_results <- cbind(
clate_results,
cate_results[, unique_cate_cols]
)
# Save with an outcome-specific name
assign(paste0(outcome, "_results"), combined_results)
} else {
# Run CATE function only if outcome = "ohp_all_ever_inperson_cw0"
cate_results <- calculate_cate(d, outcome)
assign(paste0(outcome, "_results"), cate_results)
}
}
## [1] "Processing outcome: sbp_neg_cw0"
##
## Processing outcome: sbp_neg_cw0
## [1] "fmla: ~0 + numhh_list + gender_inp + age_inp + hispanic_inp + race_white_inp + "
## [2] "fmla: race_black_inp + race_nwother_inp + ast_dx_pre_lottery + "
## [3] "fmla: dia_dx_pre_lottery + hbp_dx_pre_lottery + chl_dx_pre_lottery + "
## [4] "fmla: ami_dx_pre_lottery + chf_dx_pre_lottery + emp_dx_pre_lottery + "
## [5] "fmla: kid_dx_pre_lottery + cancer_dx_pre_lottery + dep_dx_pre_lottery + "
## [6] "fmla: lessHS + HSorGED + charg_tot_pre_ed + ed_charg_tot_pre_ed"
## [1] "X matrix: 12167 rows x 21 columns (matrix/array)"
## [1] "Name of W: ohp_all_ever_inperson_cw0 Length of W: 12167 Types of W: integer"
## [1] "Name of Z: eligibility Length of Z: 12167 Types of Z: integer"
## [1] "Name of Y: sbp_neg_cw0 Length of Y: 12167 Types of Y: numeric"
## [1] "Length of weights: 12167 Types of weights: numeric"
## [1] "Length of folds: 12167 Types of folds: integer"
##
## Fitting instrumental forest...
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.643 -0.088 1.051 0.569 1.867 5.997
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.645 1.598 2.157 2.400 2.932 8.270
## Warning in get_scores.instrumental_forest(forest, subset = subset,
## debiasing.weights = debiasing.weights, : The instrument appears to be weak,
## with some compliance scores as low as -0.0607
## [1] "AUTOC: 6.60979075367768 AUTOC SE: 9.03768523975623"
## [1] "t-statistic for AUTOC: 0.731358813493704"
## [1] "RANKING 5"
## [1] "Processing fold: 8"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.644252914340446"
## [3] "Quintile boundaries: 0.636454166622027"
## [4] "Quintile boundaries: 1.30676851812979"
## [5] "Quintile boundaries: 1.99337320548439"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 164 130 154 368 397
## [1] "Processing fold: 1"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.593243098496514"
## [3] "Quintile boundaries: 0.710379761458994"
## [4] "Quintile boundaries: 1.39329794633477"
## [5] "Quintile boundaries: 2.06529953435719"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 403 410 471 649 578
## [1] "Processing fold: 10"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.597861558249903"
## [3] "Quintile boundaries: 0.690358531638557"
## [4] "Quintile boundaries: 1.36847427634727"
## [5] "Quintile boundaries: 2.04287138974813"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 613 629 726 917 809
## [1] "Processing fold: 3"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.514730954535782"
## [3] "Quintile boundaries: 0.7360335126426"
## [4] "Quintile boundaries: 1.40160612783087"
## [5] "Quintile boundaries: 2.06072770584594"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 926 868 965 1131 979
## [1] "Processing fold: 9"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.599115482680688"
## [3] "Quintile boundaries: 0.666564596610817"
## [4] "Quintile boundaries: 1.32701691841324"
## [5] "Quintile boundaries: 1.96465162694608"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 1134 1026 1115 1326 1464
## [1] "Processing fold: 4"
## [1] "Quintile boundaries: -9.60043107134764"
## [2] "Quintile boundaries: -0.519259734497905"
## [3] "Quintile boundaries: 0.800182042072073"
## [4] "Quintile boundaries: 1.44632472121756"
## [5] "Quintile boundaries: 2.10067850146567"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 1452 1473 1375 1468 1536
## [1] "Processing fold: 7"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.580505395383313"
## [3] "Quintile boundaries: 0.688204296797965"
## [4] "Quintile boundaries: 1.37894538127402"
## [5] "Quintile boundaries: 2.07331753431179"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 1691 1666 1685 1792 1681
## [1] "Processing fold: 6"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.643829239047997"
## [3] "Quintile boundaries: 0.626537514364275"
## [4] "Quintile boundaries: 1.29137280823184"
## [5] "Quintile boundaries: 1.93971142302895"
## [6] "Quintile boundaries: 5.63573401463081"
## ranking_5
## 1 2 3 4 5
## 1861 1768 1820 2037 2260
## [1] "Processing fold: 5"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.462798220684813"
## [3] "Quintile boundaries: 0.831421140607129"
## [4] "Quintile boundaries: 1.46499745028252"
## [5] "Quintile boundaries: 2.11452644076394"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 2236 2232 2034 2132 2282
## [1] "Processing fold: 2"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -0.598358644217261"
## [3] "Quintile boundaries: 0.688201779405125"
## [4] "Quintile boundaries: 1.37956978756119"
## [5] "Quintile boundaries: 2.04743347678089"
## [6] "Quintile boundaries: 5.99684040392317"
## ranking_5
## 1 2 3 4 5
## 2457 2458 2354 2389 2507
## [1] "RANKING 20"
## [1] "Processing fold: 8"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.91514188034147"
## [3] "Quintile boundaries: -2.33537832696344"
## [4] "Quintile boundaries: -1.38056055599172"
## [5] "Quintile boundaries: -0.644252914340446"
## [6] "Quintile boundaries: -0.168747025145593"
## [7] "Quintile boundaries: 0.17352142177785"
## [8] "Quintile boundaries: 0.417209013077621"
## [9] "Quintile boundaries: 0.636454166622027"
## [10] "Quintile boundaries: 0.842702799224499"
## [11] "Quintile boundaries: 0.993998157382675"
## [12] "Quintile boundaries: 1.14203915094192"
## [13] "Quintile boundaries: 1.30676851812979"
## [14] "Quintile boundaries: 1.46532202206842"
## [15] "Quintile boundaries: 1.6372760370216"
## [16] "Quintile boundaries: 1.81089791927045"
## [17] "Quintile boundaries: 1.99337320548439"
## [18] "Quintile boundaries: 2.19686395658321"
## [19] "Quintile boundaries: 2.46541534441195"
## [20] "Quintile boundaries: 2.9106060532467"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 46 37 45 36 32 25 39 34 47 29 37 41 87 104 79 98 83 87 102 125
## [1] "Processing fold: 1"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.827430570622"
## [3] "Quintile boundaries: -2.24289429241832"
## [4] "Quintile boundaries: -1.29467904515383"
## [5] "Quintile boundaries: -0.593243098496514"
## [6] "Quintile boundaries: -0.113832858902498"
## [7] "Quintile boundaries: 0.221522817479394"
## [8] "Quintile boundaries: 0.474676806925722"
## [9] "Quintile boundaries: 0.710379761458994"
## [10] "Quintile boundaries: 0.905541368694363"
## [11] "Quintile boundaries: 1.05395272939537"
## [12] "Quintile boundaries: 1.22086260029798"
## [13] "Quintile boundaries: 1.39329794633477"
## [14] "Quintile boundaries: 1.54923540215144"
## [15] "Quintile boundaries: 1.71336937234415"
## [16] "Quintile boundaries: 1.88434557280009"
## [17] "Quintile boundaries: 2.06529953435719"
## [18] "Quintile boundaries: 2.26134190969104"
## [19] "Quintile boundaries: 2.53091306510557"
## [20] "Quintile boundaries: 3.01307416924447"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 121 97 108 77 82 94 125 109 106 106 123 136 142 166 170 171 133 127 155 163
## [1] "Processing fold: 10"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.92892811051461"
## [3] "Quintile boundaries: -2.26522908993374"
## [4] "Quintile boundaries: -1.327218465797"
## [5] "Quintile boundaries: -0.597861558249903"
## [6] "Quintile boundaries: -0.101793108335647"
## [7] "Quintile boundaries: 0.224202519719655"
## [8] "Quintile boundaries: 0.470127230074377"
## [9] "Quintile boundaries: 0.690358531638557"
## [10] "Quintile boundaries: 0.885941288735554"
## [11] "Quintile boundaries: 1.03421281046658"
## [12] "Quintile boundaries: 1.20247590215686"
## [13] "Quintile boundaries: 1.36847427634727"
## [14] "Quintile boundaries: 1.53116552818323"
## [15] "Quintile boundaries: 1.69934654438293"
## [16] "Quintile boundaries: 1.86720733424632"
## [17] "Quintile boundaries: 2.04287138974813"
## [18] "Quintile boundaries: 2.25581351088102"
## [19] "Quintile boundaries: 2.53537245730074"
## [20] "Quintile boundaries: 3.02553379221974"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 162 164 152 135 151 148 183 147 149 156 210 211 202 246 234 235 226 191 212 180
## [1] "Processing fold: 3"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.88135574557611"
## [3] "Quintile boundaries: -2.21132769705426"
## [4] "Quintile boundaries: -1.23062226807462"
## [5] "Quintile boundaries: -0.514730954535782"
## [6] "Quintile boundaries: -0.0218239520142785"
## [7] "Quintile boundaries: 0.277355359092713"
## [8] "Quintile boundaries: 0.512053765093251"
## [9] "Quintile boundaries: 0.7360335126426"
## [10] "Quintile boundaries: 0.924252311358356"
## [11] "Quintile boundaries: 1.07197089083061"
## [12] "Quintile boundaries: 1.24274250397042"
## [13] "Quintile boundaries: 1.40160612783087"
## [14] "Quintile boundaries: 1.55491083181289"
## [15] "Quintile boundaries: 1.7166876338818"
## [16] "Quintile boundaries: 1.88417505437772"
## [17] "Quintile boundaries: 2.06072770584594"
## [18] "Quintile boundaries: 2.26154582904593"
## [19] "Quintile boundaries: 2.5422305716742"
## [20] "Quintile boundaries: 3.01965162127038"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 217 250 231 228 236 211 229 192 206 225 272 262 239 301 301 290 287 237 249 206
## [1] "Processing fold: 9"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.93760406055076"
## [3] "Quintile boundaries: -2.28738358049764"
## [4] "Quintile boundaries: -1.32923814097495"
## [5] "Quintile boundaries: -0.599115482680688"
## [6] "Quintile boundaries: -0.120124070925415"
## [7] "Quintile boundaries: 0.210503262694214"
## [8] "Quintile boundaries: 0.443674619742238"
## [9] "Quintile boundaries: 0.666564596610817"
## [10] "Quintile boundaries: 0.86554949039976"
## [11] "Quintile boundaries: 1.01514639284775"
## [12] "Quintile boundaries: 1.16894594402644"
## [13] "Quintile boundaries: 1.32701691841324"
## [14] "Quintile boundaries: 1.47907146873248"
## [15] "Quintile boundaries: 1.6305136315796"
## [16] "Quintile boundaries: 1.79792153376821"
## [17] "Quintile boundaries: 1.96465162694608"
## [18] "Quintile boundaries: 2.16614333343942"
## [19] "Quintile boundaries: 2.42651326483745"
## [20] "Quintile boundaries: 2.88089617871942"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 257 313 281 283 280 255 271 220 251 262 314 288 280 346 351 349 372 347 383 362
## [1] "Processing fold: 4"
## [1] "Quintile boundaries: -9.60043107134764"
## [2] "Quintile boundaries: -3.78456321217256"
## [3] "Quintile boundaries: -2.1931418346342"
## [4] "Quintile boundaries: -1.23822274004709"
## [5] "Quintile boundaries: -0.519259734497905"
## [6] "Quintile boundaries: -0.0220828681666831"
## [7] "Quintile boundaries: 0.303026081628565"
## [8] "Quintile boundaries: 0.561390509499025"
## [9] "Quintile boundaries: 0.800182042072073"
## [10] "Quintile boundaries: 0.972447670617255"
## [11] "Quintile boundaries: 1.12439396052747"
## [12] "Quintile boundaries: 1.29378846937687"
## [13] "Quintile boundaries: 1.44632472121756"
## [14] "Quintile boundaries: 1.60538973761458"
## [15] "Quintile boundaries: 1.76725855572851"
## [16] "Quintile boundaries: 1.92452035922756"
## [17] "Quintile boundaries: 2.10067850146567"
## [18] "Quintile boundaries: 2.29199381672486"
## [19] "Quintile boundaries: 2.56137037897723"
## [20] "Quintile boundaries: 3.02705546077892"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 347 379 349 377 375 390 384 324 327 325 374 349 329 384 375 380 400 363 394 379
## [1] "Processing fold: 7"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.86971558444759"
## [3] "Quintile boundaries: -2.24563370014883"
## [4] "Quintile boundaries: -1.29772994983238"
## [5] "Quintile boundaries: -0.580505395383313"
## [6] "Quintile boundaries: -0.100682212325264"
## [7] "Quintile boundaries: 0.223205014101619"
## [8] "Quintile boundaries: 0.463818942968104"
## [9] "Quintile boundaries: 0.688204296797965"
## [10] "Quintile boundaries: 0.900249443039437"
## [11] "Quintile boundaries: 1.05243695087643"
## [12] "Quintile boundaries: 1.22088188088188"
## [13] "Quintile boundaries: 1.37894538127402"
## [14] "Quintile boundaries: 1.55354249321435"
## [15] "Quintile boundaries: 1.72550468190314"
## [16] "Quintile boundaries: 1.89805535230035"
## [17] "Quintile boundaries: 2.07331753431179"
## [18] "Quintile boundaries: 2.26571379126257"
## [19] "Quintile boundaries: 2.55353277402738"
## [20] "Quintile boundaries: 3.02214299186916"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 409 440 407 435 423 440 431 372 422 405 461 397 435 483 453 421 445 412 421 403
## [1] "Processing fold: 6"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.87266106164882"
## [3] "Quintile boundaries: -2.32201236477056"
## [4] "Quintile boundaries: -1.3822945067092"
## [5] "Quintile boundaries: -0.643829239047997"
## [6] "Quintile boundaries: -0.167209374841911"
## [7] "Quintile boundaries: 0.174244108730333"
## [8] "Quintile boundaries: 0.409395720565897"
## [9] "Quintile boundaries: 0.626537514364275"
## [10] "Quintile boundaries: 0.827620181264414"
## [11] "Quintile boundaries: 0.979874636412327"
## [12] "Quintile boundaries: 1.12995846560861"
## [13] "Quintile boundaries: 1.29137280823184"
## [14] "Quintile boundaries: 1.43924003890014"
## [15] "Quintile boundaries: 1.6018363741344"
## [16] "Quintile boundaries: 1.76470451032961"
## [17] "Quintile boundaries: 1.93971142302895"
## [18] "Quintile boundaries: 2.15226636920211"
## [19] "Quintile boundaries: 2.40177215175141"
## [20] "Quintile boundaries: 2.86669953148156"
## [21] "Quintile boundaries: 5.63573401463081"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 470 468 446 477 457 465 453 393 457 430 502 431 484 527 508 518 564 527 592 577
## [1] "Processing fold: 5"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.78082558091066"
## [3] "Quintile boundaries: -2.13953356741882"
## [4] "Quintile boundaries: -1.14803700489222"
## [5] "Quintile boundaries: -0.462798220684813"
## [6] "Quintile boundaries: 0.0415242361160807"
## [7] "Quintile boundaries: 0.345359290608124"
## [8] "Quintile boundaries: 0.592241822204554"
## [9] "Quintile boundaries: 0.831421140607129"
## [10] "Quintile boundaries: 0.996672040208078"
## [11] "Quintile boundaries: 1.15029537448286"
## [12] "Quintile boundaries: 1.31840104116191"
## [13] "Quintile boundaries: 1.46499745028252"
## [14] "Quintile boundaries: 1.61686304726776"
## [15] "Quintile boundaries: 1.78158786599882"
## [16] "Quintile boundaries: 1.93688740357094"
## [17] "Quintile boundaries: 2.11452644076394"
## [18] "Quintile boundaries: 2.29801320011639"
## [19] "Quintile boundaries: 2.57151623056251"
## [20] "Quintile boundaries: 3.03747624476094"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 560 550 544 582 588 583 551 510 530 482 556 466 516 561 526 529 573 532 595 582
## [1] "Processing fold: 2"
## [1] "Quintile boundaries: -9.64333793055109"
## [2] "Quintile boundaries: -3.906699062629"
## [3] "Quintile boundaries: -2.25549673285277"
## [4] "Quintile boundaries: -1.31202152422161"
## [5] "Quintile boundaries: -0.598358644217261"
## [6] "Quintile boundaries: -0.128696476701344"
## [7] "Quintile boundaries: 0.20183957003684"
## [8] "Quintile boundaries: 0.445350035647271"
## [9] "Quintile boundaries: 0.688201779405125"
## [10] "Quintile boundaries: 0.892397087009201"
## [11] "Quintile boundaries: 1.04894985968065"
## [12] "Quintile boundaries: 1.21238974142486"
## [13] "Quintile boundaries: 1.37956978756119"
## [14] "Quintile boundaries: 1.55253781334544"
## [15] "Quintile boundaries: 1.7149771386575"
## [16] "Quintile boundaries: 1.87521757353529"
## [17] "Quintile boundaries: 2.04743347678089"
## [18] "Quintile boundaries: 2.24389601931646"
## [19] "Quintile boundaries: 2.51163016126617"
## [20] "Quintile boundaries: 2.97589634026766"
## [21] "Quintile boundaries: 5.99684040392317"
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 612 617 599 629 625 617 618 598 604 582 615 553 617 618 573 581 612 595 654 646
## Results saved to: PP_Full_Analysis/Intermediate_data/Testing/empirical/cw_med_5/clate_results_sbp_neg_cw0.RData
## Warning in value[[3L]](cond): Error processing sbp_neg_cw0: $ operator is
## invalid for atomic vectors
##
## Processing outcome: sbp_neg_cw0
## [1] "fmla: ~"
## [2] "fmla: 0 + numhh_list + gender_inp + age_inp + hispanic_inp + race_white_inp + race_black_inp + race_nwother_inp + ast_dx_pre_lottery + dia_dx_pre_lottery + hbp_dx_pre_lottery + chl_dx_pre_lottery + ami_dx_pre_lottery + chf_dx_pre_lottery + emp_dx_pre_lottery + kid_dx_pre_lottery + cancer_dx_pre_lottery + dep_dx_pre_lottery + lessHS + HSorGED + charg_tot_pre_ed + ed_charg_tot_pre_ed"
## [1] "Dimensions of X: 12167 Types of X: matrix"
## [2] "Dimensions of X: 21 Types of X: array"
## [1] "Name of W: eligibility Dimensions of W: 12167 Types of W: integer"
## [1] "Name of Y: sbp_neg_cw0 Dimensions of Y: 12167 Types of Y: numeric"
## [1] "Dimensions of weights: 12167 Types of weights: numeric"
## [1] "Dimensions of folds: 12167 Types of folds: integer"
## [1] "Table of folds: "
## folds
## 1 2 3 4 5 6 7 8 9 10
## 1298 1249 1175 1240 1170 1232 1211 1213 1196 1183
##
## Fitting causal forest...
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9330 -0.0219 0.1934 0.1116 0.3367 0.9913
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.149 0.311 0.421 0.485 0.599 2.489
## [1] "AUTOC: -0.108843044710947 AUTOC SE: 0.256852305886902"
## [1] "t-statistic for AUTOC: -0.423757319737176"
## [1] "RANKING 5"
## [1] "Processing fold: 8"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.108243788776989"
## [3] "Quintile boundaries: 0.113279781872706"
## [4] "Quintile boundaries: 0.236028094220441"
## [5] "Quintile boundaries: 0.363584843947608"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 1"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.0994391272199975"
## [3] "Quintile boundaries: 0.125889010468613"
## [4] "Quintile boundaries: 0.251685083701791"
## [5] "Quintile boundaries: 0.38497761891962"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 10"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.101548743246648"
## [3] "Quintile boundaries: 0.121723331133646"
## [4] "Quintile boundaries: 0.243345131232122"
## [5] "Quintile boundaries: 0.374657849618638"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 3"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.0886789285617261"
## [3] "Quintile boundaries: 0.128969910824437"
## [4] "Quintile boundaries: 0.252511090881263"
## [5] "Quintile boundaries: 0.380248551747209"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 9"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.102167857798624"
## [3] "Quintile boundaries: 0.118596838164622"
## [4] "Quintile boundaries: 0.239485229614079"
## [5] "Quintile boundaries: 0.360174960729071"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 4"
## [1] "Quintile boundaries: -1.85647316321698"
## [2] "Quintile boundaries: -0.0855226877296801"
## [3] "Quintile boundaries: 0.137892582011159"
## [4] "Quintile boundaries: 0.262242263087959"
## [5] "Quintile boundaries: 0.391787460261034"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 7"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.0908831831873371"
## [3] "Quintile boundaries: 0.128301909306015"
## [4] "Quintile boundaries: 0.254428111271374"
## [5] "Quintile boundaries: 0.390440986722185"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 6"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.106700421540492"
## [3] "Quintile boundaries: 0.112736653859025"
## [4] "Quintile boundaries: 0.236030190473006"
## [5] "Quintile boundaries: 0.352236309384544"
## [6] "Quintile boundaries: 0.9806283260347"
## [1] "Processing fold: 5"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.0585035175054851"
## [3] "Quintile boundaries: 0.150091190882994"
## [4] "Quintile boundaries: 0.267238503038912"
## [5] "Quintile boundaries: 0.392118198229431"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Processing fold: 2"
## [1] "Quintile boundaries: -1.93301987441284"
## [2] "Quintile boundaries: -0.103939738407682"
## [3] "Quintile boundaries: 0.118631951930629"
## [4] "Quintile boundaries: 0.245213353449846"
## [5] "Quintile boundaries: 0.376591920068862"
## [6] "Quintile boundaries: 0.991290990126138"
## [1] "Ranking 5: "
## ranking_5
## 1 2 3 4 5
## 2504 2449 2363 2295 2554
## [1] "Average outcome by ranking 5: "
## 1 2 3 4 5
## -121.2 -118.1 -117.1 -118.1 -120.7
## [1] "Average priority.cate by ranking 5: "
## 1 2 3 4 5
## -0.43317 0.03689 0.19249 0.30379 0.47038
## [1] "RANKING 20"
## [1] "Processing fold: 8"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.622788524875838"
## [3] "Ventile boundaries: -0.357733964548729"
## [4] "Ventile boundaries: -0.205265248351498"
## [5] "Ventile boundaries: -0.108243788776989"
## [6] "Ventile boundaries: -0.0362015603886667"
## [7] "Ventile boundaries: 0.0224338666055155"
## [8] "Ventile boundaries: 0.0712617881748323"
## [9] "Ventile boundaries: 0.113279781872706"
## [10] "Ventile boundaries: 0.141857604832184"
## [11] "Ventile boundaries: 0.176542523502198"
## [12] "Ventile boundaries: 0.21010960560996"
## [13] "Ventile boundaries: 0.236028094220441"
## [14] "Ventile boundaries: 0.260946741954317"
## [15] "Ventile boundaries: 0.288803394897487"
## [16] "Ventile boundaries: 0.320355074187017"
## [17] "Ventile boundaries: 0.363584843947608"
## [18] "Ventile boundaries: 0.407280801611316"
## [19] "Ventile boundaries: 0.451712113883169"
## [20] "Ventile boundaries: 0.514862649546393"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 1"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.611939915397532"
## [3] "Ventile boundaries: -0.348490053470197"
## [4] "Ventile boundaries: -0.195882996825787"
## [5] "Ventile boundaries: -0.0994391272199975"
## [6] "Ventile boundaries: -0.0281160525616813"
## [7] "Ventile boundaries: 0.0316360238402868"
## [8] "Ventile boundaries: 0.0833750610872936"
## [9] "Ventile boundaries: 0.125889010468613"
## [10] "Ventile boundaries: 0.159250648929932"
## [11] "Ventile boundaries: 0.194757218249485"
## [12] "Ventile boundaries: 0.223687561838814"
## [13] "Ventile boundaries: 0.251685083701791"
## [14] "Ventile boundaries: 0.281236480617924"
## [15] "Ventile boundaries: 0.30948330891535"
## [16] "Ventile boundaries: 0.346078435320774"
## [17] "Ventile boundaries: 0.38497761891962"
## [18] "Ventile boundaries: 0.418444240525373"
## [19] "Ventile boundaries: 0.46060602089172"
## [20] "Ventile boundaries: 0.520856714363221"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 10"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.621530935516495"
## [3] "Ventile boundaries: -0.353127447180405"
## [4] "Ventile boundaries: -0.197084127203838"
## [5] "Ventile boundaries: -0.101548743246648"
## [6] "Ventile boundaries: -0.0305227866415122"
## [7] "Ventile boundaries: 0.0281431593893946"
## [8] "Ventile boundaries: 0.0779899659714191"
## [9] "Ventile boundaries: 0.121723331133646"
## [10] "Ventile boundaries: 0.15144971366883"
## [11] "Ventile boundaries: 0.187695614649001"
## [12] "Ventile boundaries: 0.217029569545673"
## [13] "Ventile boundaries: 0.243345131232122"
## [14] "Ventile boundaries: 0.271169378008915"
## [15] "Ventile boundaries: 0.300182951789991"
## [16] "Ventile boundaries: 0.334386199151869"
## [17] "Ventile boundaries: 0.374657849618638"
## [18] "Ventile boundaries: 0.409970332643253"
## [19] "Ventile boundaries: 0.455586268186038"
## [20] "Ventile boundaries: 0.520964894544548"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 3"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.605073624044172"
## [3] "Ventile boundaries: -0.337726678473857"
## [4] "Ventile boundaries: -0.185879861641206"
## [5] "Ventile boundaries: -0.0886789285617261"
## [6] "Ventile boundaries: -0.0135956457080875"
## [7] "Ventile boundaries: 0.0442778708688986"
## [8] "Ventile boundaries: 0.0915899749612692"
## [9] "Ventile boundaries: 0.128969910824437"
## [10] "Ventile boundaries: 0.161227654229693"
## [11] "Ventile boundaries: 0.197803029525717"
## [12] "Ventile boundaries: 0.224499353944094"
## [13] "Ventile boundaries: 0.252511090881263"
## [14] "Ventile boundaries: 0.279246469752128"
## [15] "Ventile boundaries: 0.304967624576343"
## [16] "Ventile boundaries: 0.33907797431729"
## [17] "Ventile boundaries: 0.380248551747209"
## [18] "Ventile boundaries: 0.416799426439965"
## [19] "Ventile boundaries: 0.460801207777424"
## [20] "Ventile boundaries: 0.521989801322047"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 9"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.622529088083222"
## [3] "Ventile boundaries: -0.354425816985857"
## [4] "Ventile boundaries: -0.200253586265673"
## [5] "Ventile boundaries: -0.102167857798624"
## [6] "Ventile boundaries: -0.0294544545033001"
## [7] "Ventile boundaries: 0.0294704123684153"
## [8] "Ventile boundaries: 0.076927391891724"
## [9] "Ventile boundaries: 0.118596838164622"
## [10] "Ventile boundaries: 0.146747862055264"
## [11] "Ventile boundaries: 0.182640688614569"
## [12] "Ventile boundaries: 0.21291888291455"
## [13] "Ventile boundaries: 0.239485229614079"
## [14] "Ventile boundaries: 0.264673383409916"
## [15] "Ventile boundaries: 0.291380761582032"
## [16] "Ventile boundaries: 0.320026835957081"
## [17] "Ventile boundaries: 0.360174960729071"
## [18] "Ventile boundaries: 0.401104900388554"
## [19] "Ventile boundaries: 0.43918684722995"
## [20] "Ventile boundaries: 0.505509913347003"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 4"
## [1] "Ventile boundaries: -1.85647316321698"
## [2] "Ventile boundaries: -0.59886285127684"
## [3] "Ventile boundaries: -0.342899041550503"
## [4] "Ventile boundaries: -0.178774483738233"
## [5] "Ventile boundaries: -0.0855226877296801"
## [6] "Ventile boundaries: -0.0124572333085423"
## [7] "Ventile boundaries: 0.0513911094031834"
## [8] "Ventile boundaries: 0.102500194672404"
## [9] "Ventile boundaries: 0.137892582011159"
## [10] "Ventile boundaries: 0.176158166128517"
## [11] "Ventile boundaries: 0.210059399821669"
## [12] "Ventile boundaries: 0.237285238100885"
## [13] "Ventile boundaries: 0.262242263087959"
## [14] "Ventile boundaries: 0.289163526204913"
## [15] "Ventile boundaries: 0.317064037096731"
## [16] "Ventile boundaries: 0.355377948132279"
## [17] "Ventile boundaries: 0.391787460261034"
## [18] "Ventile boundaries: 0.423535284766643"
## [19] "Ventile boundaries: 0.465478195407889"
## [20] "Ventile boundaries: 0.524992815497549"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 7"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.622519390604753"
## [3] "Ventile boundaries: -0.348463348893451"
## [4] "Ventile boundaries: -0.186078246324117"
## [5] "Ventile boundaries: -0.0908831831873371"
## [6] "Ventile boundaries: -0.0191381100610608"
## [7] "Ventile boundaries: 0.0409340643929713"
## [8] "Ventile boundaries: 0.0888191980437601"
## [9] "Ventile boundaries: 0.128301909306015"
## [10] "Ventile boundaries: 0.162757026505197"
## [11] "Ventile boundaries: 0.198651664180684"
## [12] "Ventile boundaries: 0.227638361827809"
## [13] "Ventile boundaries: 0.254428111271374"
## [14] "Ventile boundaries: 0.283527612596015"
## [15] "Ventile boundaries: 0.313183372401017"
## [16] "Ventile boundaries: 0.353774870138312"
## [17] "Ventile boundaries: 0.390440986722185"
## [18] "Ventile boundaries: 0.422804493445907"
## [19] "Ventile boundaries: 0.464559747270682"
## [20] "Ventile boundaries: 0.525041756018219"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 6"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.622128396840227"
## [3] "Ventile boundaries: -0.357996271460854"
## [4] "Ventile boundaries: -0.204307235056442"
## [5] "Ventile boundaries: -0.106700421540492"
## [6] "Ventile boundaries: -0.035800154565186"
## [7] "Ventile boundaries: 0.022366976459366"
## [8] "Ventile boundaries: 0.0703955396439935"
## [9] "Ventile boundaries: 0.112736653859025"
## [10] "Ventile boundaries: 0.140674876280232"
## [11] "Ventile boundaries: 0.176078478314598"
## [12] "Ventile boundaries: 0.208939787994482"
## [13] "Ventile boundaries: 0.236030190473006"
## [14] "Ventile boundaries: 0.25984536015322"
## [15] "Ventile boundaries: 0.286043746595285"
## [16] "Ventile boundaries: 0.313681335589084"
## [17] "Ventile boundaries: 0.352236309384544"
## [18] "Ventile boundaries: 0.392796031596651"
## [19] "Ventile boundaries: 0.433603199372835"
## [20] "Ventile boundaries: 0.491173708642662"
## [21] "Ventile boundaries: 0.9806283260347"
## [1] "Processing fold: 5"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.578190931446627"
## [3] "Ventile boundaries: -0.315966001495087"
## [4] "Ventile boundaries: -0.156468553971104"
## [5] "Ventile boundaries: -0.0585035175054851"
## [6] "Ventile boundaries: 0.0150931118892233"
## [7] "Ventile boundaries: 0.071943624075206"
## [8] "Ventile boundaries: 0.117851485654704"
## [9] "Ventile boundaries: 0.150091190882994"
## [10] "Ventile boundaries: 0.188208273560978"
## [11] "Ventile boundaries: 0.217442618976878"
## [12] "Ventile boundaries: 0.24283368565962"
## [13] "Ventile boundaries: 0.267238503038912"
## [14] "Ventile boundaries: 0.292351613190232"
## [15] "Ventile boundaries: 0.32010084063056"
## [16] "Ventile boundaries: 0.35630606993303"
## [17] "Ventile boundaries: 0.392118198229431"
## [18] "Ventile boundaries: 0.423970740567959"
## [19] "Ventile boundaries: 0.46525538714646"
## [20] "Ventile boundaries: 0.524594865743772"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Processing fold: 2"
## [1] "Ventile boundaries: -1.93301987441284"
## [2] "Ventile boundaries: -0.627528641682461"
## [3] "Ventile boundaries: -0.356135765233695"
## [4] "Ventile boundaries: -0.201700681859173"
## [5] "Ventile boundaries: -0.103939738407682"
## [6] "Ventile boundaries: -0.033663348684952"
## [7] "Ventile boundaries: 0.0263250592910331"
## [8] "Ventile boundaries: 0.0751428207934547"
## [9] "Ventile boundaries: 0.118631951930629"
## [10] "Ventile boundaries: 0.148686772111625"
## [11] "Ventile boundaries: 0.187028929106598"
## [12] "Ventile boundaries: 0.217879223135115"
## [13] "Ventile boundaries: 0.245213353449846"
## [14] "Ventile boundaries: 0.27209857337009"
## [15] "Ventile boundaries: 0.300420718111642"
## [16] "Ventile boundaries: 0.336455107470363"
## [17] "Ventile boundaries: 0.376591920068862"
## [18] "Ventile boundaries: 0.412261670960812"
## [19] "Ventile boundaries: 0.455904148057559"
## [20] "Ventile boundaries: 0.520690175246193"
## [21] "Ventile boundaries: 0.991290990126138"
## [1] "Ranking 20: "
## ranking_20
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 609 617 634 644 620 598 586 645 571 558 602 632 566 623 555 551 621 665 587 681
## [1] "Average outcome by ranking 20: "
## 1 2 3 4 5 6 7 8 9 10 11
## -122.8 -120.8 -121.5 -119.9 -119.1 -119.2 -117.2 -117.0 -118.2 -118.2 -116.3
## 12 13 14 15 16 17 18 19 20
## -116.0 -116.8 -119.1 -118.0 -118.6 -119.2 -119.2 -121.7 -122.6
## [1] "Average priority.cate by ranking 20: "
## 1 2 3 4 5 6 7 8
## -0.90324 -0.46376 -0.25957 -0.13024 -0.04882 0.01527 0.06743 0.11157
## 9 10 11 12 13 14 15 16
## 0.14344 0.17692 0.20863 0.23519 0.26163 0.28842 0.31750 0.35066
## 17 18 19 20
## 0.38859 0.42760 0.47808 0.58009
## Results saved to: PP_Full_Analysis/Intermediate_data/Testing/empirical/cw_med_5/cate_results_sbp_neg_cw0.RData
#####STEP 4-5: Create lambda outcomes #####
# Loop over outcomes to add lambda outcomes
for (outcome in outcomes_list) {
print(paste("Processing lambda outcomes for:", outcome))
# Read in the results dataframe
results_obj_name <- paste0(outcome, "_results")
results <- get(results_obj_name)
# Loop over lambda values
for (i in seq_along(lambda_list)) {
lambda <- lambda_list[i]
cate_lambda_outcome <- paste0("cate_lambda_", i-1)
# Create cate lambda outcome
results[[cate_lambda_outcome]] <- results$cate - lambda * results$cate_se
print(paste("Summary of", cate_lambda_outcome, ":"))
print(summary(results[[cate_lambda_outcome]]))
# Create quintile rankings based on lambda_outcome
results[[paste0(cate_lambda_outcome, "_ranking_5")]] <- cut(results[[cate_lambda_outcome]],
breaks = quantile(results[[cate_lambda_outcome]], probs = seq(0, 1, by = 1/5)),
include.lowest = TRUE,
labels = FALSE)
# Print information on ranking_5
print(paste("Information on", cate_lambda_outcome, "_ranking_5:"))
print(summary(results[[paste0(cate_lambda_outcome, "_ranking_5")]]))
# Create ventile rankings based on lambda_outcome
results[[paste0(cate_lambda_outcome, "_ranking_20")]] <- cut(results[[cate_lambda_outcome]],
breaks = quantile(results[[cate_lambda_outcome]], probs = seq(0, 1, by = 1/20)),
include.lowest = TRUE,
labels = FALSE)
# Print information on ranking_20
print(paste("Information on", cate_lambda_outcome, "_ranking_20:"))
print(summary(results[[paste0(cate_lambda_outcome, "_ranking_20")]]))
}
# Update the results object
assign(results_obj_name, results)
# Print information on results
print(paste("Information on:", results_obj_name))
glimpse(results)
}
## [1] "Processing lambda outcomes for: sbp_neg_cw0"
## [1] "Summary of cate_lambda_0 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9330 -0.0219 0.1934 0.1116 0.3367 0.9913
## [1] "Information on cate_lambda_0 _ranking_5:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2 3 3 4 5
## [1] "Information on cate_lambda_0 _ranking_20:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 5.5 10.0 10.5 15.0 20.0
## [1] "Summary of cate_lambda_1 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.4788 -0.1511 0.0867 -0.0095 0.2385 0.8203
## [1] "Information on cate_lambda_1 _ranking_5:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2 3 3 4 5
## [1] "Information on cate_lambda_1 _ranking_20:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 5.5 10.0 10.5 15.5 20.0
## [1] "Summary of cate_lambda_2 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.1011 -0.2908 -0.0149 -0.1307 0.1464 0.7438
## [1] "Information on cate_lambda_2 _ranking_5:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2 3 3 4 5
## [1] "Information on cate_lambda_2 _ranking_20:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 5.5 10.0 10.5 15.5 20.0
## [1] "Summary of cate_lambda_3 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.723 -0.438 -0.120 -0.252 0.054 0.672
## [1] "Information on cate_lambda_3 _ranking_5:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2 3 3 4 5
## [1] "Information on cate_lambda_3 _ranking_20:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 5.5 10.0 10.5 15.5 20.0
## [1] "Summary of cate_lambda_4 :"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.346 -0.590 -0.226 -0.373 -0.033 0.600
## [1] "Information on cate_lambda_4 _ranking_5:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2 3 3 4 5
## [1] "Information on cate_lambda_4 _ranking_20:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 5.5 10.0 10.5 15.0 20.0
## [1] "Information on: sbp_neg_cw0_results"
## Rows: 12,167
## Columns: 51
## $ person_id <int> 5, 8, 16, 17, 18, 23, 24, 29, 47, 57, 59, 68,…
## $ X.numhh_list <dbl> 1, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ X.gender_inp <dbl> 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ X.age_inp <dbl> 60, 41, 39, 52, 51, 32, 34, 23, 43, 46, 38, 2…
## $ X.hispanic_inp <dbl> 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ X.race_white_inp <dbl> 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, …
## $ X.race_black_inp <dbl> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, …
## $ X.race_nwother_inp <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ X.ast_dx_pre_lottery <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, …
## $ X.dia_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ X.hbp_dx_pre_lottery <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, …
## $ X.chl_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ X.ami_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ X.chf_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ X.emp_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ X.kid_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ X.cancer_dx_pre_lottery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ X.dep_dx_pre_lottery <dbl> 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, …
## $ X.lessHS <dbl> 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ X.HSorGED <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, …
## $ X.charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1715.3, 0.0, 0.0, 5743…
## $ X.ed_charg_tot_pre_ed <dbl> 0.0, 0.0, 1888.2, 0.0, 1006.3, 0.0, 0.0, 4542…
## $ Y <dbl> -144, -134, -126, -168, -119, -98, -108, -125…
## $ clate_W <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, …
## $ Z <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, …
## $ weights <dbl> 1.1504, 0.8975, 1.0000, 1.2126, 1.0000, 1.003…
## $ folds <int> 8, 1, 10, 3, 10, 9, 9, 4, 4, 10, 7, 6, 5, 7, …
## $ clate <dbl> 1.0925, 2.1799, 0.9884, 0.6387, 0.4253, 1.931…
## $ clate_se <dbl> 1.7080, 4.3096, 1.3903, 2.0994, 2.0334, 1.513…
## $ clate_ranking_5 <dbl> 3, 5, 3, 2, 2, 4, 4, 1, 2, 3, 1, 5, 2, 4, 4, …
## $ clate_ranking_20 <dbl> 11, 17, 10, 8, 7, 16, 16, 2, 8, 10, 3, 19, 5,…
## $ cate_W <int> 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, …
## $ cate <dbl> 0.20748, 0.35149, 0.52744, 0.15051, -0.06743,…
## $ cate_se <dbl> 0.8420, 0.2838, 0.6358, 1.0498, 0.6368, 0.240…
## $ cate_ranking_5 <int> 3, 4, 5, 3, 2, 5, 5, 1, 2, 2, 1, 5, 1, 5, 4, …
## $ cate_ranking_20 <int> 11, 16, 20, 9, 5, 18, 20, 1, 7, 7, 3, 18, 4, …
## $ cate_lambda_0 <dbl> 0.20748, 0.35149, 0.52744, 0.15051, -0.06743,…
## $ cate_lambda_0_ranking_5 <int> 3, 4, 5, 3, 2, 5, 5, 1, 2, 2, 1, 5, 2, 5, 4, …
## $ cate_lambda_0_ranking_20 <int> 11, 16, 20, 9, 5, 18, 20, 1, 8, 7, 3, 17, 5, …
## $ cate_lambda_1 <dbl> -0.003019, 0.280529, 0.368482, -0.111937, -0.…
## $ cate_lambda_1_ranking_5 <int> 2, 5, 5, 2, 2, 5, 5, 1, 3, 2, 1, 5, 1, 5, 4, …
## $ cate_lambda_1_ranking_20 <int> 8, 17, 19, 6, 5, 19, 20, 1, 9, 8, 2, 18, 4, 1…
## $ cate_lambda_2 <dbl> -0.21352, 0.20957, 0.20953, -0.37438, -0.3858…
## $ cate_lambda_2_ranking_5 <int> 2, 5, 5, 2, 2, 5, 5, 1, 3, 3, 1, 5, 1, 5, 4, …
## $ cate_lambda_2_ranking_20 <int> 7, 17, 17, 5, 5, 19, 20, 1, 10, 9, 2, 19, 4, …
## $ cate_lambda_3 <dbl> -0.424014, 0.138614, 0.050570, -0.636826, -0.…
## $ cate_lambda_3_ranking_5 <int> 2, 5, 4, 1, 2, 5, 5, 1, 3, 3, 1, 5, 1, 5, 4, …
## $ cate_lambda_3_ranking_20 <int> 6, 17, 15, 4, 5, 19, 19, 1, 12, 10, 2, 19, 4,…
## $ cate_lambda_4 <dbl> -0.63451, 0.06766, -0.10839, -0.89927, -0.704…
## $ cate_lambda_4_ranking_5 <int> 2, 5, 4, 1, 2, 5, 5, 1, 4, 3, 1, 5, 1, 5, 4, …
## $ cate_lambda_4_ranking_20 <int> 5, 18, 13, 3, 5, 19, 19, 1, 13, 11, 2, 20, 4,…
#####STEP 4-6: Check that cate = cate_lambda_0 #####
# Loop over outcomes to check that cate = cate_lambda_0
for (outcome in outcomes_list) {
print(paste("Processing outcome:", outcome))
df <- get(paste0(outcome, "_results"))
# Check if values are identical
if(!all(df$cate == df$cate_lambda_0, na.rm=TRUE)) {
stop(paste("Error: cate is not the same as cate_lambda_0"))
} else {
print(paste("cate is the same as cate_lambda_0"))
}
}
## [1] "Processing outcome: sbp_neg_cw0"
## [1] "cate is the same as cate_lambda_0"
#####STEP 4-7: Save final results #####
# Loop over outcomes to save final results
for (outcome in outcomes_list) {
results <- get(paste0(outcome, "_results"))
# Save results
output_file <- file.path(results_dir,
sprintf("cate_clate_results_%s.csv", outcome))
write.csv(results, file = output_file, row.names = FALSE)
print(paste("Saved results for", outcome, "to:", output_file))
}
## [1] "Saved results for sbp_neg_cw0 to: PP_Full_Analysis/Intermediate_data/Testing/empirical/cw_med_5/cate_clate_results_sbp_neg_cw0.csv"