## ### Using 12 cores
## ### Using doParallelMC as backend
library(tidyverse)
## Hugh Chipman & Robert McCulloch
library(BayesTree)
Here is the example in the manual.
##simulate data (example from Friedman MARS paper)
f = function(x){
10*sin(pi*x[,1]*x[,2]) + 20*(x[,3]-.5)^2+10*x[,4]+5*x[,5]
}
sigma = 1.0 #y = f(x) + sigma*z , z~N(0,1)
n = 100 #number of observations
set.seed(99)
x=matrix(runif(n*10),n,10) #10 variables, only first 5 matter
Ey = f(x)
y=Ey+sigma*rnorm(n)
## compare lm fit to BART later
lmFit = lm(y~.,data.frame(x,y))
## run BART
set.seed(99)
bartFit = bart(x, y, ndpost = 1000)
##
##
## Running BART with numeric y
##
## number of trees: 200
## Prior:
## k: 2.000000
## degrees of freedom in sigma prior: 3
## quantile in sigma prior: 0.900000
## power and base for tree prior: 2.000000 0.950000
## use quantiles for rule cut points: 0
## data:
## number of training observations: 100
## number of test observations: 0
## number of explanatory variables: 10
##
##
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100) (4: 100) (5: 100)
## (6: 100) (7: 100) (8: 100) (9: 100) (10: 100)
##
##
##
## Running mcmc loop:
## iteration: 100 (of 1100)
## iteration: 200 (of 1100)
## iteration: 300 (of 1100)
## iteration: 400 (of 1100)
## iteration: 500 (of 1100)
## iteration: 600 (of 1100)
## iteration: 700 (of 1100)
## iteration: 800 (of 1100)
## iteration: 900 (of 1100)
## iteration: 1000 (of 1100)
## iteration: 1100 (of 1100)
## time for loop: 3
##
## Tree sizes, last iteration:
## 3 2 2 3 4 4 2 4 2 2 3 2 2 2 1 2 1 3 2 3
## 2 3 3 2 2 2 2 3 3 2 2 2 2 2 3 3 2 4 3 2
## 5 2 2 4 2 3 3 2 3 2 1 3 2 2 2 2 2 2 2 2
## 2 1 4 2 2 2 4 3 2 3 5 4 4 1 2 3 2 2 4 2
## 2 2 5 4 2 2 2 3 1 3 2 1 2 2 4 3 2 2 4 2
## 2 2 3 3 3 1 2 2 2 2 3 2 2 2 2 3 2 3 2 2
## 2 2 2 2 2 3 2 2 1 3 2 3 2 2 3 2 2 2 2 3
## 2 3 2 3 2 3 2 1 2 1 2 2 2 2 1 2 2 2 2 3
## 3 2 2 3 2 3 2 3 2 3 3 1 4 3 4 2 3 1 3 3
## 3 2 1 2 2 2 3 2 2 2 2 2 4 7 2 3 3 2 2 3
## Variable Usage, last iteration (var:count):
## (1: 27) (2: 30) (3: 35) (4: 36) (5: 22)
## (6: 26) (7: 28) (8: 31) (9: 25) (10: 23)
##
## DONE BART 11-2-2014
## plot bart fit
plot(bartFit)
## compare BART fit to linear matter and truth = Ey
fitmat = cbind(y,Ey,lmFit$fitted,bartFit$yhat.train.mean)
colnames(fitmat) = c('y','Ey','lm','bart')
print(cor(fitmat))
## y Ey lm bart
## y 1.0000000 0.9847984 0.8841787 0.9982987
## Ey 0.9847984 1.0000000 0.9009389 0.9887067
## lm 0.8841787 0.9009389 1.0000000 0.8981988
## bart 0.9982987 0.9887067 0.8981988 1.0000000
## https://github.com/vdorie/bartCause
## devtools::install_github("vdorie/bartCause")
library(bartCause)
Simulate a simple dataset.
## fit a simple linear model
n <- 100L
beta.z <- c(.75, -0.5, 0.25)
beta.y <- c(.5, 1.0, -1.5)
sigma <- 2
set.seed(725)
## Continuous Covariates
x <- matrix(rnorm(3 * n), n, 3)
## One treatment Effect
tau <- rgamma(1L, 0.25 * 16 * rgamma(1L, 1 * 32, 32), 16)
p.score <- pnorm(x %*% beta.z)
## Binary Treatment
z <- rbinom(n, 1, p.score)
mu.0 <- x %*% beta.y
mu.1 <- x %*% beta.y + tau
## Continuous Outcome
y <- mu.0 * (1 - z) + mu.1 * z + rnorm(n, 0, sigma)
tau
## [1] 0.2677603
Explanation of the main function. The only outcome type supported is continuous.
bartc package:bartCause R Documentation
Causal Inference using Bayesian Additive Regression Trees
Description:
Fits a collection of treatment and response models using the
Bayesian Additive Regresssion Trees (BART) algorithm, producing
estimates of treatment effects.
Usage:
bartc(response, treatment, confounders, data, subset, weights,
method.rsp = c("bart", "p.weight", "tmle"),
method.trt = c("none", "glm", "bart", "bart.xval"),
estimand = c("ate", "att", "atc"),
group.by = NULL,
commonSup.rule = c("none", "sd", "chisq"),
commonSup.cut = c(NA_real_, 1, 0.05),
p.scoreAsCovariate = TRUE, use.rbart = FALSE,
keepCall = TRUE, verbose = TRUE,
...)
Arguments:
response: A vector of the continuous outcome variable, or a reference
to such in the ‘data’ argument.
treatment: A vector of the binary treatment variable, or a reference to
‘data’.
confounders: A matrix or data frame of covariates to be used in
estimating the treatment and response model. Can also the
right-hand-side of a formula (e.g. ‘x1 + x2 + ...’). The
‘data’ argument will be searched when supplied.
data: An optional data frame or named list containing the
‘response’, ‘treatment’, and ‘confounders’.
...
estimand: A character string specifying which causal effect to target.
Options are ‘"ate"’ - average treatment effect, ‘"att"’ -
average treatment effect on the treated, and ‘"atc"’ -
average treatment effect on the controls.
group.by: An optional factor that, when present, causes the treatment
effect estimate to be calculated within each group.
Fit without a PS model.
bartc1 <- bartc(response = y, treatment = z, confounders = x, n.samples = 100L)
## fitting response model via method 'bart'
summary(bartc1)
## Call: bartc(response = y, treatment = z, confounders = x, n.samples = 100L)
##
## Causal inference model fit by:
## model.rsp: bart
## model.trt: none
##
## Treatment effect:
## estimate sd ci.lower ci.upper
## ate 0.574 0.5023 -0.4104 1.558
## Estimates fit from 100 total observations
## 95% credible interval calculated by: normal approximation
## Result based on 400 posterior samples across 4 chains
example to show refitting under the common support rule
bartc2 <- refit(bartc1, commonSup.rule = "sd")
summary(bartc2)
## Call: bartc(response = y, treatment = z, confounders = x, n.samples = 100L)
##
## Causal inference model fit by:
## model.rsp: bart
## model.trt: none
##
## Treatment effect:
## estimate sd ci.lower ci.upper
## ate 0.5777 0.5007 -0.4036 1.559
## Estimates fit from 94 total observations
## 95% credible interval calculated by: normal approximation
##
## Common support enforced by cutting using 'sd' rule, cutoff: 1
## Suppressed observations:
## trt ctl
## 5 1
##
## Result based on 400 posterior samples across 4 chains
bartc3 <- bartc(y, z, x, subset = bartc2$commonSup.sub, n.samples = 100L)
## fitting response model via method 'bart'
summary(bartc3)
## Call: bartc(response = y, treatment = z, confounders = x, subset = bartc2$commonSup.sub,
## n.samples = 100L)
##
## Causal inference model fit by:
## model.rsp: bart
## model.trt: none
##
## Treatment effect:
## estimate sd ci.lower ci.upper
## ate 0.6199 0.5168 -0.393 1.633
## Estimates fit from 94 total observations
## 95% credible interval calculated by: normal approximation
## Result based on 400 posterior samples across 4 chains