#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))
#####STEP 0-2: Install packages #####
list.of.packages <- c("grf", "metafor", "splitstackshape", "dplyr",
"tidyverse", "foreach", "cowplot", "reshape2",
"doParallel", "survival", "readstata13", "ggplot2",
"rsample", "DiagrammeR", "e1071", "pscl", "pROC",
"caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
"lmtest", "sandwich", "haven", "rpms", "randomForest",
"fabricatr", "gridExtra", "VIM", "mice",
"lmtest", "ivreg", "kableExtra", "corrplot", "psych")
new.packages <- list.of.packages[!(list.of.packages %in%
installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, library, character.only = TRUE)
## Warning: package 'grf' was built under R version 4.3.3
## Warning: package 'metafor' was built under R version 4.3.3
## Warning: package 'metadat' was built under R version 4.3.3
## Warning: package 'numDeriv' was built under R version 4.3.1
## Warning: package 'splitstackshape' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'foreach' was built under R version 4.3.3
## Warning: package 'cowplot' was built under R version 4.3.3
## Warning: package 'reshape2' was built under R version 4.3.3
## Warning: package 'doParallel' was built under R version 4.3.3
## Warning: package 'iterators' was built under R version 4.3.3
## Warning: package 'readstata13' was built under R version 4.3.3
## Warning: package 'rsample' was built under R version 4.3.3
## Warning: package 'DiagrammeR' was built under R version 4.3.3
## Warning: package 'e1071' was built under R version 4.3.3
## Warning: package 'pscl' was built under R version 4.3.3
## Warning: package 'pROC' was built under R version 4.3.3
## Warning: package 'caret' was built under R version 4.3.3
## Warning: package 'ModelMetrics' was built under R version 4.3.3
## Warning: package 'MatchIt' was built under R version 4.3.3
## Warning: package 'Hmisc' was built under R version 4.3.3
## Warning: package 'scales' was built under R version 4.3.3
## Warning: package 'lmtest' was built under R version 4.3.3
## Warning: package 'zoo' was built under R version 4.3.3
## Warning: package 'sandwich' was built under R version 4.3.3
## Warning: package 'haven' was built under R version 4.3.3
## Warning: package 'rpms' was built under R version 4.3.3
## Warning: package 'randomForest' was built under R version 4.3.3
## Warning: package 'fabricatr' was built under R version 4.3.3
## Warning: package 'gridExtra' was built under R version 4.3.3
## Warning: package 'VIM' was built under R version 4.3.3
## Warning: package 'colorspace' was built under R version 4.3.3
## Warning: package 'mice' was built under R version 4.3.3
## Warning in check_dep_version(): ABI version mismatch:
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
## Warning: package 'ivreg' was built under R version 4.3.3
## Warning: package 'kableExtra' was built under R version 4.3.3
## Warning: package 'corrplot' was built under R version 4.3.3
## Warning: package 'psych' was built under R version 4.3.3
print(paste("Version of grf package:", packageVersion("grf")))
#####STEP 0-3: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)
#####STEP 0-4: Set non-run-specific parameters #####
seedset <- 777
numthreadsset <- min(6, parallel::detectCores())
if (numthreadsset!= 6) {
print("the results of grf vary by num.thread (publication paper used num.thread=6)")
}
cat("number of threads (affects grf results):", numthreadsset,"\n")
## number of threads (affects grf results): 6
#####STEP 0-5: Set run-specific parameters #####
published_paper_run <- 0 # ANALYST FORM
if (published_paper_run == 1) {
print("save output into PP_Full_Analysis/Saved_tables_figures/As_published/ folder")
warning("Changing this setting to 1 overwrites the input files required for replicating on different platforms.")
} else {
print("save output into PP_Full_Analysis/Saved_tables_figures/Testing/ folder")
}
## [1] "save output into PP_Full_Analysis/Saved_tables_figures/Testing/ folder"
cw_run_name <- "med" # ANALYST FORM
cw_grid <- 5 # ANALYST FORM
correlations_run_name <- "main_sim" # ANALYST FORM
# List of outcomes to analyze
outcomes_list <- c( # ANALYST FORM
"sim_sbp_neg_alpha_5_presentation_cw0_lambda_0",
"sim_hdl_level_neg_alpha_5_presentation_cw0_lambda_0",
"sim_debt_neg_alpha_5_presentation_cw0_lambda_0"
)
# Define cw_name
cw_name <- paste0("cw_", cw_run_name, "_", cw_grid)
#####STEP 0-6: Set file paths #####
# Set the processed path based on published_paper_run
processedpath <- if (published_paper_run == 1) {
paste0("PP_Full_Analysis/Intermediate_data/As_published/empirical/", cw_name)
} else {
paste0("PP_Full_Analysis/Intermediate_data/Testing/empirical/", cw_name)
}
# Set the results_dir based on published_paper_run and cw_name
results_dir <- if (published_paper_run == 1) {
paste0("PP_Full_Analysis/Saved_tables_figures/As_published/", cw_name)
} else {
paste0("PP_Full_Analysis/Saved_tables_figures/Testing/", cw_name)
}
# If results directory does not exist, create it
if (!dir.exists(results_dir)) {
dir.create(results_dir, recursive = TRUE)
}
# Print processed path and results directory
print(paste("Processed path:", processedpath))
## [1] "Processed path: PP_Full_Analysis/Intermediate_data/Testing/empirical/cw_med_5"
## [1] "Results directory: PP_Full_Analysis/Saved_tables_figures/Testing/cw_med_5"
#####STEP 8-1: Read and prepare data #####
# Lists to store individual outcome dataframes
cate_data <- list()
clate_data <- list()
# Read data for each outcome
for(outcome in outcomes_list) {
# Extract base outcome name
base_outcome <- sub("_lambda_.*", "", outcome)
# Read the data file
filename <- paste0(processedpath, "/cate_clate_results_", base_outcome, ".csv")
if(file.exists(filename)) {
data <- read.csv(filename)
# Extract lambda number
lambda_num <- sub(".*lambda_", "", outcome)
# Get CATE column
cate_col <- paste0("cate_lambda_", lambda_num, "_ranking_5")
if(cate_col %in% names(data)) {
# Create temporary dataframe with person_id and CATE value
temp_cate <- data.frame(
person_id = data$person_id,
temp_val = data[[cate_col]]
)
names(temp_cate)[2] <- outcome # Rename the value column to outcome name
cate_data[[outcome]] <- temp_cate
} else {
print(paste("CATE column not found for outcome:", outcome))
}
# Get CLATE column if it exists and outcome is not OHP
if(!"ohp_all_ever_inperson" %in% outcome && "clate" %in% names(data)) {
# Create temporary dataframe with person_id and CLATE value
temp_clate <- data.frame(
person_id = data$person_id,
temp_val = data[["clate_ranking_5"]]
)
names(temp_clate)[2] <- outcome # Rename the value column to outcome name
clate_data[[outcome]] <- temp_clate
} else {
print(paste("CLATE column not found for outcome:", outcome))
}
}
}
# Merge all CATE dataframes with full outer join to preserve all person_ids
cate_df <- Reduce(function(x, y) merge(x, y, by = "person_id", all = TRUE), cate_data)
# Merge all CLATE dataframes with full outer join to preserve all person_ids
clate_df <- Reduce(function(x, y) merge(x, y, by = "person_id", all = TRUE), clate_data)
### Step 8-2: Add ohp_all_ever_inperson to cate and clate dataframes###
# Read data for ohp_all_ever_inperson
ohp_data <- read.csv(paste0(processedpath, "/cate_clate_results_ohp_all_ever_inperson_cw0.csv"))
# Create temporary dataframe for OHP data
ohp_temp <- data.frame(
person_id = ohp_data$person_id,
ohp_all_ever_inperson = ohp_data$cate_lambda_0_ranking_5
)
# Add OHP data to CATE dataframe
cate_df <- merge(cate_df, ohp_temp, by = "person_id", all = TRUE)
# Add OHP data to CLATE dataframe if it exists
clate_df <- merge(clate_df, ohp_temp, by = "person_id", all = TRUE)
# Print information on dataframes
print("Information on cate_df:")
## [1] "Information on cate_df:"
## Rows: 12,208
## Columns: 5
## $ person_id <int> 5, 8, 16, 17, 18, …
## $ sim_sbp_neg_alpha_5_presentation_cw0_lambda_0 <int> 3, 2, 5, 1, 5, 1, …
## $ sim_hdl_level_neg_alpha_5_presentation_cw0_lambda_0 <int> 1, 2, 5, 2, 5, 3, …
## $ sim_debt_neg_alpha_5_presentation_cw0_lambda_0 <int> 5, 1, 1, 5, 2, 1, …
## $ ohp_all_ever_inperson <int> 2, 2, 4, 5, 4, 1, …
## [1] "Information on clate_df:"
## Rows: 12,208
## Columns: 5
## $ person_id <int> 5, 8, 16, 17, 18, …
## $ sim_sbp_neg_alpha_5_presentation_cw0_lambda_0 <int> 2, 2, 5, 1, 5, 2, …
## $ sim_hdl_level_neg_alpha_5_presentation_cw0_lambda_0 <int> 1, 1, 5, 2, 5, 3, …
## $ sim_debt_neg_alpha_5_presentation_cw0_lambda_0 <int> 5, 1, 1, 5, 2, 1, …
## $ ohp_all_ever_inperson <int> 2, 2, 4, 5, 4, 1, …
#####STEP 8-2: Create correlation tables #####
# Calculate correlations with standard errors for cates
cate_cors_with_se <- corr.test(cate_df[, -which(names(cate_df) == "person_id")],
use = "pairwise.complete.obs")
# Create dataframes for correlations and SEs
cate_correlations <- cate_cors_with_se$r
cate_standard_errors <- cate_cors_with_se$se
# Convert matrices to data frames
cate_correlations_df <- data.frame(
correlations = cate_correlations,
standard_errors = cate_standard_errors
)
# Function to format correlation with SE
format_corr_se <- function(corr, se) {
paste0(round(corr, 3), "<br>(", round(se, 3), ")")
}
# Create matrix to store formatted values
n <- nrow(cate_correlations)
formatted_matrix <- matrix("", n, n)
rownames(formatted_matrix) <- rownames(cate_correlations)
colnames(formatted_matrix) <- colnames(cate_correlations)
# Fill matrix with formatted values
for(i in 1:n) {
for(j in 1:n) {
formatted_matrix[i,j] <- format_corr_se(cate_correlations[i,j], cate_standard_errors[i,j])
}
}
# Convert to data frame for saving
cate_correlations_df <- as.data.frame(formatted_matrix)
kable(cate_correlations_df)
sim_sbp_neg_alpha_5_presentation_cw0_lambda_0 | sim_hdl_level_neg_alpha_5_presentation_cw0_lambda_0 | sim_debt_neg_alpha_5_presentation_cw0_lambda_0 | ohp_all_ever_inperson | |
---|---|---|---|---|
sim_sbp_neg_alpha_5_presentation_cw0_lambda_0 | 1 (0) |
0.573 (0.007) |
0.058 (0.009) |
0.497 (0.008) |
sim_hdl_level_neg_alpha_5_presentation_cw0_lambda_0 | 0.573 (0.007) |
1 (0) |
-0.081 (0.009) |
0.308 (0.009) |
sim_debt_neg_alpha_5_presentation_cw0_lambda_0 | 0.058 (0.009) |
-0.081 (0.009) |
1 (0) |
0.543 (0.008) |
ohp_all_ever_inperson | 0.497 (0.008) |
0.308 (0.009) |
0.543 (0.008) |
1 (0) |
#####STEP 8-2: Create correlation tables #####
# Calculate correlations with standard errors for clates
clate_cors_with_se <- corr.test(clate_df[, -which(names(clate_df) == "person_id")],
use = "pairwise.complete.obs")
# Create dataframes for correlations and SEs
clate_correlations <- clate_cors_with_se$r
clate_standard_errors <- clate_cors_with_se$se
# Convert matrices to data frames
clate_correlations_df <- data.frame(
correlations = clate_correlations,
standard_errors = clate_standard_errors
)
# Function to format correlation with SE
format_corr_se <- function(corr, se) {
paste0(round(corr, 3), "<br>(", round(se, 3), ")")
}
# Create matrix to store formatted values
n <- nrow(clate_correlations)
formatted_matrix <- matrix("", n, n)
rownames(formatted_matrix) <- rownames(clate_correlations)
colnames(formatted_matrix) <- colnames(clate_correlations)
# Fill matrix with formatted values
for(i in 1:n) {
for(j in 1:n) {
formatted_matrix[i,j] <- format_corr_se(clate_correlations[i,j], clate_standard_errors[i,j])
}
}
# Convert to data frame for saving
clate_correlations_df <- as.data.frame(formatted_matrix)
kable(clate_correlations_df)
sim_sbp_neg_alpha_5_presentation_cw0_lambda_0 | sim_hdl_level_neg_alpha_5_presentation_cw0_lambda_0 | sim_debt_neg_alpha_5_presentation_cw0_lambda_0 | ohp_all_ever_inperson | |
---|---|---|---|---|
sim_sbp_neg_alpha_5_presentation_cw0_lambda_0 | 1 (0) |
0.555 (0.008) |
-0.14 (0.009) |
0.351 (0.008) |
sim_hdl_level_neg_alpha_5_presentation_cw0_lambda_0 | 0.555 (0.008) |
1 (0) |
-0.219 (0.009) |
0.214 (0.009) |
sim_debt_neg_alpha_5_presentation_cw0_lambda_0 | -0.14 (0.009) |
-0.219 (0.009) |
1 (0) |
0.241 (0.009) |
ohp_all_ever_inperson | 0.351 (0.008) |
0.214 (0.009) |
0.241 (0.009) |
1 (0) |
#####STEP 8-3: Create CATE correlation plots #####
create_heatmap <- function(cor_matrix, se_matrix,title) {
# Create data frame with correlations and SEs
plot_data <- reshape2::melt(cor_matrix)
plot_data$SE <- reshape2::melt(se_matrix)$value
# Format the text labels
plot_data$label <- sprintf("%.3f\n(%.3f)", plot_data$value, plot_data$SE)
# Create heatmap
heatmap <- ggplot(data = plot_data,
aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = label), size = 3) + # Add text labels
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), name = "Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
labs(title = title)
return(heatmap)
}
#####STEP 8-4: Create plots #####
# Create CATE heatmap
cate_heatmap <- create_heatmap(cate_correlations, cate_standard_errors, "CATE Correlations")
print(cate_heatmap)
# Save CATE plot
ggsave(file.path(results_dir, paste0(correlations_run_name, "_cate_correlations.png")),
cate_heatmap, width = 10, height = 8)
ggsave(file.path(results_dir, paste0(correlations_run_name, "_cate_correlations.pdf")),
cate_heatmap, width = 10, height = 8)
# Create CLATE heatmap if it exists
clate_heatmap <- create_heatmap(clate_correlations, clate_standard_errors, "CLATE Correlations")
print(clate_heatmap)