Set up environment
#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))
#####STEP 0-2: Install packages #####
list.of.packages <- c( "grf", "metafor", "splitstackshape", "dplyr", "tidyverse", "foreach", "cowplot",
"reshape2", "doParallel", "survival", "readstata13", "ggplot2", "rsample", "DiagrammeR",
"e1071", "pscl", "pROC", "caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
"lmtest", "sandwich","haven", "rpms", "randomForest", "fabricatr", "gridExtra",
"VIM", "mice", "missForest", "lmtest", "ivreg", "kableExtra", "policytree")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, library, character.only = TRUE)
## Warning: package 'grf' was built under R version 4.3.3
## Warning: package 'metafor' was built under R version 4.3.3
## Warning: package 'metadat' was built under R version 4.3.3
## Warning: package 'numDeriv' was built under R version 4.3.1
## Warning: package 'splitstackshape' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'foreach' was built under R version 4.3.3
## Warning: package 'cowplot' was built under R version 4.3.3
## Warning: package 'reshape2' was built under R version 4.3.3
## Warning: package 'doParallel' was built under R version 4.3.3
## Warning: package 'iterators' was built under R version 4.3.3
## Warning: package 'readstata13' was built under R version 4.3.3
## Warning: package 'rsample' was built under R version 4.3.3
## Warning: package 'DiagrammeR' was built under R version 4.3.3
## Warning: package 'e1071' was built under R version 4.3.3
## Warning: package 'pscl' was built under R version 4.3.3
## Warning: package 'pROC' was built under R version 4.3.3
## Warning: package 'caret' was built under R version 4.3.3
## Warning: package 'ModelMetrics' was built under R version 4.3.3
## Warning: package 'MatchIt' was built under R version 4.3.3
## Warning: package 'Hmisc' was built under R version 4.3.3
## Warning: package 'scales' was built under R version 4.3.3
## Warning: package 'lmtest' was built under R version 4.3.3
## Warning: package 'zoo' was built under R version 4.3.3
## Warning: package 'sandwich' was built under R version 4.3.3
## Warning: package 'haven' was built under R version 4.3.3
## Warning: package 'rpms' was built under R version 4.3.3
## Warning: package 'randomForest' was built under R version 4.3.3
## Warning: package 'fabricatr' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
## Warning: package 'VIM' was built under R version 4.3.3
## Warning: package 'colorspace' was built under R version 4.3.3
## Warning: package 'mice' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch:
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
## Warning: package 'missForest' was built under R version 4.3.3
## Warning: package 'ivreg' was built under R version 4.3.3
## Warning: package 'kableExtra' was built under R version 4.3.3
## Warning: package 'policytree' was built under R version 4.3.3
print(paste("Version of grf package:", packageVersion("grf")))
#####STEP 0-2: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)
Set non-run-specific parameters
#####STEP 0-3: Set non-run-specific parameters #####
seedset <- 777
seed.ivforest <- 123456789
seed.cf <- 987654321
numthreadsset <- min(6, parallel::detectCores())
if (numthreadsset!= 6) {
print("the results of grf vary by num.thread (publication paper used num.thread=6)")
}
cat("number of threads (affects grf results):", numthreadsset,"\n")
## number of threads (affects grf results): 6
# Set printing options
options(digits = 4)
Set run-specific parameters
#####STEP 0-4: Set run-specific parameters #####
published_paper_run <- 0
if (published_paper_run == 1) {
print("save intermediate files into both intdat and intdatAsPublished folders")
warning("Changing this setting to 1 overwrites the input files required for replicating on different platforms.")
} else {
print("save intermediate files into only intdat folder")
}
## [1] "save intermediate files into only intdat folder"
forest_files = c("cate_results_sim_sbp_neg_alpha_5_presentation_cw0.RData") # ANALYST FORM
cw_run_name <- "med" # ANALYST FORM
cw_grid <- 5 # ANALYST FORM
# Define cw_name
cw_name <- paste0("cw_", cw_run_name, "_", cw_grid)
Set file paths
#####STEP 0-6: Set file paths #####
# Set the processed path based on published_paper_run
processedpath <- if (published_paper_run == 1) {
paste0("PP_Full_Analysis/Intermediate_data/As_published/empirical/", cw_name)
} else {
paste0("PP_Full_Analysis/Intermediate_data/Testing/empirical/", cw_name)
}
# Set the original data path based on published_paper_run
originaldatapath <- if (published_paper_run == 1) {
paste0("PP_Full_Analysis/Cleaned_input_data/As_published/empirical/", cw_name)
} else {
paste0("PP_Full_Analysis/Cleaned_input_data/Testing/empirical/", cw_name)
}
# Set the results_dir based on published_paper_run and cw_name
results_dir <- if (published_paper_run == 1) {
paste0("PP_Full_Analysis/Saved_tables_figures/As_published/", cw_name)
} else {
paste0("PP_Full_Analysis/Saved_tables_figures/Testing/", cw_name)
}
# If results directory does not exist, create it
if (!dir.exists(results_dir)) {
dir.create(results_dir, recursive = TRUE)
}
# Print processed path and results directory
print(paste("Processed path:", processedpath))
## [1] "Processed path: PP_Full_Analysis/Intermediate_data/Testing/empirical/cw_med_5"
print(paste("Results directory:", results_dir))
## [1] "Results directory: PP_Full_Analysis/Saved_tables_figures/Testing/cw_med_5"
Main loop
# Main Loop
for (i in seq_along(forest_files)) {
# Load the RData file
load(file.path(processedpath, forest_files[i]))
print(paste("Successfully loaded:", forest_files[i]))
# Extract the forest object from final_results
if (exists("final_results")) {
forest_input <- final_results$results$forest
print("Successfully extracted forest from final_results")
# Extract file title for naming
file_title <- sub("\\.RData$", "", basename(forest_files[i]))
# Check if "clate" is present in file_title
if (grepl("clate", file_title)) {
# Do something if "clate" is found
# Replicate cate results but with IV reg instead of LM.
# You can replace this with any code you want to execute when "clate" is found
} else {
combined_df <- get_test_predictions(forest_input, file_title)
final_results <- evaluate_policy(combined_df)
# Extract the file title for naming without the cate_clate_results
results_title <- sub("cate_results_sim_sbp_neg_alpha_5_presentation_cw0", "", file_title)
# Save the final_results to a CSV file
write.csv(final_results, file.path(results_dir, paste0(results_title, "_final_results.csv")), row.names = FALSE)
}
} else {
print("Error: 'final_results' not found in loaded data")
}
}
## [1] "Successfully loaded: cate_results_sim_sbp_neg_alpha_5_presentation_cw0.RData"
## [1] "Successfully extracted forest from final_results"
## [1] "Currently processing test predictions for cate_results_sim_sbp_neg_alpha_5_presentation_cw0"
## [1] "Currently processing policy tree for cate_results_sim_sbp_neg_alpha_5_presentation_cw0"
## [1] "Policy Tree: "
## policy_tree object
## Tree depth: 2
## Actions: 1: control 2: treated
## Variable splits:
## (1) split_variable: ed_charg_tot_pre_ed split_value: 1107.95
## (2) split_variable: ed_charg_tot_pre_ed split_value: 1052.9
## (4) * action: 2
## (5) * action: 1
## (3) split_variable: ed_charg_tot_pre_ed split_value: 27182.6
## (6) * action: 2
## (7) * action: 1
## [1] "Dimensions of pi.hat: "
## [1] 12167
## [1] "Dimensions of predictions: "
## [1] 6035
## [1] "Dimensions of pi.hat: "
## [1] 12167
## [1] "Dimensions of Y: "
## [1] 12167
## [1] "Dimensions of W: "
## [1] 12167
## [1] "Information on combined_df:"
## Rows: 6,035
## Columns: 3
## $ pi.hat <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Y <dbl> -144.00, -84.61, -160.39, -98.00, -108.00, -104.00, -111.00, -1…
## $ W <int> 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, …
## [1] "Starting policy evaluation"
## [1] "Policy Evaluation Results:"
##
##
## |Metric |Value |
## |:-------------------------|:----------------|
## |Estimated Policy Value |-119.084 (0.469) |
## |Uniform Control |-124.528 (0.489) |
## |Difference from Control |5.444 (0.678) |
## |Uniform Treatment |-119.004 (0.468) |
## |Difference from Treatment |-0.079 (0.663) |