Set up environment

#####STEP 0-1: Reset environment #####
rm(list=ls())
knitr::opts_chunk$set(echo = TRUE)
options(repos = structure(c(CRAN = "http://cran.rstudio.com/")))

#####STEP 0-2: Install packages #####
list.of.packages <- c( "grf", "metafor", "splitstackshape", "dplyr", "tidyverse", "foreach", "cowplot",
                       "reshape2", "doParallel", "survival", "readstata13", "ggplot2", "rsample", "DiagrammeR",
                       "e1071", "pscl", "pROC", "caret", "ModelMetrics", "MatchIt", "Hmisc", "scales",
                       "lmtest", "sandwich","haven", "rpms", "randomForest",  "fabricatr", "gridExtra", 
                       "VIM", "mice", "missForest", "lmtest", "ivreg", "kableExtra")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(list.of.packages, library, character.only = TRUE)

## Warning: package 'grf' was built under R version 4.3.3

## Warning: package 'metafor' was built under R version 4.3.3

## Warning: package 'metadat' was built under R version 4.3.3

## Warning: package 'numDeriv' was built under R version 4.3.1

## Warning: package 'splitstackshape' was built under R version 4.3.3

## Warning: package 'dplyr' was built under R version 4.3.3

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tibble' was built under R version 4.3.3

## Warning: package 'tidyr' was built under R version 4.3.3

## Warning: package 'readr' was built under R version 4.3.3

## Warning: package 'purrr' was built under R version 4.3.3

## Warning: package 'stringr' was built under R version 4.3.3

## Warning: package 'forcats' was built under R version 4.3.3

## Warning: package 'lubridate' was built under R version 4.3.3

## Warning: package 'foreach' was built under R version 4.3.3

## Warning: package 'cowplot' was built under R version 4.3.3

## Warning: package 'reshape2' was built under R version 4.3.3

## Warning: package 'doParallel' was built under R version 4.3.3

## Warning: package 'iterators' was built under R version 4.3.3

## Warning: package 'readstata13' was built under R version 4.3.3

## Warning: package 'rsample' was built under R version 4.3.3

## Warning: package 'DiagrammeR' was built under R version 4.3.3

## Warning: package 'e1071' was built under R version 4.3.3

## Warning: package 'pscl' was built under R version 4.3.3

## Warning: package 'pROC' was built under R version 4.3.3

## Warning: package 'caret' was built under R version 4.3.3

## Warning: package 'ModelMetrics' was built under R version 4.3.3

## Warning: package 'MatchIt' was built under R version 4.3.3

## Warning: package 'Hmisc' was built under R version 4.3.3

## Warning: package 'scales' was built under R version 4.3.3

## Warning: package 'lmtest' was built under R version 4.3.3

## Warning: package 'zoo' was built under R version 4.3.3

## Warning: package 'sandwich' was built under R version 4.3.3

## Warning: package 'haven' was built under R version 4.3.3

## Warning: package 'rpms' was built under R version 4.3.3

## Warning: package 'randomForest' was built under R version 4.3.3

## Warning: package 'fabricatr' was built under R version 4.3.3

## Warning: package 'gridExtra' was built under R version 4.3.3

## Warning: package 'VIM' was built under R version 4.3.3

## Warning: package 'colorspace' was built under R version 4.3.3

## Warning: package 'mice' was built under R version 4.3.3

## Warning in check_dep_version(): ABI version mismatch: 
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package

## Warning: package 'missForest' was built under R version 4.3.3

## Warning: package 'ivreg' was built under R version 4.3.3

## Warning: package 'kableExtra' was built under R version 4.3.3

print(paste("Version of grf package:", packageVersion("grf")))

#####STEP 0-2: Basic information #####
Sys.time()
# Get detailed R session and system information
session_info <- sessionInfo()
system_info <- Sys.info()
# Combine the output
list(session_info = session_info, system_info = system_info)

Set non-run-specific parameters

#####STEP 0-3: Set non-run-specific parameters #####
seedset <- 777
seed.ivforest <- 123456789
seed.cf       <- 987654321
numthreadsset <- min(6, parallel::detectCores()) 
if (numthreadsset!= 6) {
  print("the results of grf vary by num.thread (publication paper used num.thread=6)")
} 

cat("number of threads (affects grf results):", numthreadsset,"\n")

## number of threads (affects grf results): 6

# Set printing options
options(digits = 4)

Set run-specific parameters

#####STEP 0-4: Set run-specific parameters #####
published_paper_run <- 0
if (published_paper_run == 1) {
  print("save intermediate files into both intdat and intdatAsPublished folders")
  warning("Changing this setting to 1 overwrites the input files required for replicating on different platforms.")
} else {
  print("save intermediate files into only intdat folder")
}

## [1] "save intermediate files into only intdat folder"

forest_files = c("cate_results_sbp_cw0.RData", "clate_results_sbp_cw0.RData") # ANALYST FORM

cw_run_name <- "sd" # ANALYST FORM
cw_grid <- 10 # ANALYST FORM

# Define cw_name
cw_name <- paste0("cw_", cw_run_name, "_", cw_grid)

Set file paths

#####STEP 0-6: Set file paths #####
# Set the processed path based on published_paper_run
processedpath <- if (published_paper_run == 1) {
  paste0("PP_Full_Analysis/Cleaned_input_data/As_published/empirical/", cw_name)
} else {
  paste0("PP_Full_Analysis/Cleaned_input_data/Testing/empirical/", cw_name)
}

# Set the results_dir based on published_paper_run and cw_name
results_dir <- if (published_paper_run == 1) {
  paste0("PP_Full_Analysis/Saved_tables_figures/As_published/", cw_name)
} else {
  paste0("PP_Full_Analysis/Saved_tables_figures/Testing/", cw_name)
}

# If results directory does not exist, create it
if (!dir.exists(results_dir)) {
  dir.create(results_dir, recursive = TRUE)
}

# Print processed path and results directory
print(paste("Processed path:", processedpath))

## [1] "Processed path: PP_Full_Analysis/Cleaned_input_data/Testing/empirical/cw_sd_10"

print(paste("Results directory:", results_dir))

## [1] "Results directory: PP_Full_Analysis/Saved_tables_figures/Testing/cw_sd_10"

Create TOC plot function

toc_plots <- function(forest_input, output_name) {  
  # Step 1: Extract predictions from the forest_input
  pred <- predict(forest_input)
  tau.hat.oob <- pred$predictions
  
  # Step 2: Compute the rate of average treatment effect
  rate.oob <- rank_average_treatment_effect(forest_input, tau.hat.oob)
  
  # Step 3: Compute t-statistic for out-of-bag predictions
  t.stat.oob <- rate.oob$estimate / rate.oob$std.err
  
  # Step 4: Compute one-sided p-values for the t-statistic
  p.val.onesided <- pnorm(t.stat.oob, lower.tail = FALSE)
  
  # Step 5: Create the dataframe with the relevant columns
  result_df <- data.frame(
    rate = rate.oob$estimate,
    t.stat = t.stat.oob,
    p.val = p.val.onesided
  )

  # Save result_df as csv in results_dir
  write.csv(result_df, file.path(results_dir, paste0("toc_", output_name, ".csv")), row.names = FALSE)

  # Create and save plot as png
  png(file.path(results_dir, paste0("toc_", output_name, ".png")), width = 10, height = 6, units = "in", res = 300)
  plot(rate.oob, main=paste0("TOC evaluated for ", output_name))
  dev.off()

  # Create and save plot as pdf
  pdf(file.path(results_dir, paste0("toc_", output_name, ".pdf")), width = 10, height = 6)
  plot(rate.oob, main=paste0("TOC evaluated for ", output_name))
  dev.off()
  
  # Return the figure
  return(result_df)
}

# Main Loop
for (i in seq_along(forest_files)) {
    # Load the RData file
    load(file.path(processedpath, forest_files[i]))
    print(paste("Successfully loaded:", forest_files[i]))
    
    # Extract the forest object from final_results
    if (exists("final_results")) {
        forest_input <- final_results$results$forest
        print("Successfully extracted forest from final_results")
        
        # Extract file title for naming
        file_title <- sub("\\.RData$", "", basename(forest_files[i]))
        
        # Create plot
        toc_plots(forest_input, file_title)
    } else {
        print("Error: 'final_results' not found in loaded data")
    }
}

## [1] "Successfully loaded: cate_results_sbp_cw0.RData"
## [1] "Successfully extracted forest from final_results"

## [1] "Successfully loaded: clate_results_sbp_cw0.RData"
## [1] "Successfully extracted forest from final_results"

## Warning in get_scores.instrumental_forest(forest, subset = subset,
## debiasing.weights = debiasing.weights, : The instrument appears to be weak,
## with some compliance scores as low as -0.048

6_TOCs

Revati Kapshikar, Kristine Koutout

2025-01-14

Set up environment

Set non-run-specific parameters

Set run-specific parameters

Set file paths

Create TOC plot function