This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
#install package "fastshap"
utils::install.packages("fastshap")
Installing package into ‘C:/Users/gredy/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/fastshap_0.1.0.zip'
Content type 'application/zip' length 986891 bytes (963 KB)
downloaded 963 KB
package ‘fastshap’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\gredy\AppData\Local\Temp\RtmpoRYoRa\downloaded_packages
# utils::contrib.url(repos, "source")
#install.packages("fastshap")
library(fastshap)
head(t1 <- titanic_mice[[1L]])
t1$pclass <- as.ordered(t1$pclass) # makes more sense as an ordered factor
install.packages("ranger")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/gredy/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/ranger_0.16.0.zip'
Content type 'application/zip' length 758581 bytes (740 KB)
downloaded 740 KB
package ‘ranger’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\gredy\AppData\Local\Temp\RtmpoRYoRa\downloaded_packages
library(ranger)
set.seed(2053) # for reproducibility
(rfo <- ranger(survived ~ ., data = t1, probability = TRUE))
Ranger result
Call:
ranger(survived ~ ., data = t1, probability = TRUE)
Type: Probability estimation
Number of trees: 500
Sample size: 1309
Number of independent variables: 5
Mtry: 2
Target node size: 10
Variable importance mode: none
Splitrule: gini
OOB prediction error (Brier s.): 0.1337358
jack.dawson <- data.frame(
#survived = 0L, # in case you haven't seen the movie
pclass = 3L, # third-class passenger
age = 20.0, # twenty years old
sex = factor("male", levels = c("female", "male")), # male
sibsp = 0L, # no siblings/spouses aboard
parch = 0L # no parents/children aboard
)
pfun <- function(object, newdata) { # prediction wrapper
unname(predict(object, data = newdata)$predictions[, "yes"])
}
# Compute Jack's predicted likelihood of survival
(jack.prob <- pfun(rfo, newdata = jack.dawson))
[1] 0.1314723
# Average prediction across all passengers
(baseline <- mean(pfun(rfo, newdata = t1)))
[1] 0.3821045
# Difference between Jack and average
(difference <- jack.prob - baseline)
[1] -0.2506322
X <- subset(t1, select = -survived) # features only
set.seed(2129) # for reproducibility
(ex.jack <- explain(rfo, X = X, pred_wrapper = pfun, newdata = jack.dawson,
nsim = 1000))
pclass age sex sibsp
[1,] -0.07554003 -0.01240914 -0.1414107 0.001836116
parch
[1,] -0.01103988
attr(,"baseline")
[1] 0
attr(,"class")
[1] "explain" "matrix" "array"
set.seed(2133) # for reproducibility
(ex.jack.adj <- explain(rfo, X = X, pred_wrapper = pfun, newdata = jack.dawson,
nsim = 1000, adjust = TRUE))
pclass age sex sibsp parch
[1,] -0.0697378 -0.02354202 -0.1485205 0.003980237 -0.01281207
attr(,"baseline")
[1] 0.3821045
attr(,"class")
[1] "explain" "matrix" "array"
# Sanity check
sum(ex.jack.adj) # should be -0.2484481
[1] -0.2506322
install.packages("shapviz")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/gredy/AppData/Local/R/win-library/4.3’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.3/shapviz_0.9.2.zip'
Content type 'application/zip' length 1693973 bytes (1.6 MB)
downloaded 1.6 MB
package ‘shapviz’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\gredy\AppData\Local\Temp\RtmpoRYoRa\downloaded_packages
library(shapviz)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
shv <- shapviz(ex.jack.adj, X = jack.dawson, baseline = baseline)
sv_waterfall(shv)
sv_force(shv)
# global
set.seed(2224) # for reproducibility
ex.t1 <- explain(rfo, X = X, pred_wrapper = pfun, nsim = 100, adjust = TRUE,
shap_only = FALSE)
tibble::as_tibble(ex.t1$shapley_values)
#means graphs
shv.global <- shapviz(ex.t1)
sv_importance(shv)
# PARTIAL DEPENDENCE PLOT
sv_dependence(shv.global, v = "age")
#fin