library( gbm )
## Loading required package: survival
## Loading required package: lattice
## Loading required package: splines
## Loading required package: parallel
library( MASS ) # used to generate correlated variables
library( dplyr )
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:MASS':
##
## select
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library( Hmisc ) # used for graphing se bars
## Loading required package: grid
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
library( grid )
#source("~/perts_analyses/common/util.R")
## HELPER FUNCTIONS
cor_vars <- function( n=100 , r=.5, mus=c(0,0) ){
# create the covariance matrix
sigma <- matrix( rep(c(1,r,r,1)), 2,2)
# generate the variables
mat <- mvrnorm( n=n, mu=mus, sigma, empirical=T)
return( list(mat[,1], mat[,2]) )
}
# Define Dataset Parameters
n <- 2000 # size of sample (half will be treated)
es <- 2 # effect size (Cohen's d) on influeced group
cov_r <- 0 # correlation of y with covariate
# Create the moderator variable
moderator <- sample(
x=c("Woman","Man"),
size=n,
replace=TRUE
)
# simluate y and a correlated covariate
cor_list <- cor_vars( n=n, r=cov_r)
y <- cor_list[[1]]
cov <- cor_list[[2]]
treat <- rep(c(0,1),n/2)
d <- data.frame(
y=y,
cov=cov,
moderator=moderator,
treat=treat
)
# add moderated treatment effect (boost treated women by es)
influenced <- d$treat == 1 & d$moderator %in% c("Woman")
d$y[influenced] <- d$y[influenced] + es
aggregate( y ~ treat + moderator, data=d, mean)
## treat moderator y
## 1 0 Man -0.01450592
## 2 1 Man -0.01094801
## 3 0 Woman 0.01895792
## 4 1 Woman 2.00575981
# set model parameters
CV_FOLDS <- 5
GBM_NTREES <- 5000
gbm_formula <- "y ~ treat + moderator + cov" %>%
as.formula()
model <- gbm( gbm_formula ,
,data = d
,distribution = "gaussian"
#,distribution = list(name="quantile",alpha=0.5)
,n.trees = GBM_NTREES
,shrinkage = 0.01
,interaction.depth = 2
,n.minobsinnode = 50
,verbose = FALSE
,cv.folds = CV_FOLDS
)
# shows error in training and cv sets
best.iter <- gbm.perf(model, method="cv")

# Overall summary
summary(model, n.trees=best.iter)

## var rel.inf
## treat treat 50.187844
## moderator moderator 48.139219
## cov cov 1.672937
# interactions
gbm:::interact.gbm(model, data=d, i.var=c("treat", "moderator"), best.iter)
## [1] 0.5498301
gbm:::interact.gbm(model, data=d, i.var=c("treat", "cov"), best.iter)
## [1] 0.0101033
gbm:::interact.gbm(model, data=d, i.var=c("moderator", "cov"), best.iter)
## [1] 0.006432768