0.1 Easyalluvial 0.2.1

easyalluvial allows you to build exploratory alluvial plots (sankey diagrams) with a single line of code while automatically binning numerical variables. This releas 0.2.1 ensures tidyr 1.0.0 compatibility and fixes a bug around categorical variables for model response plots

[참고] https://www.datisticsblog.com/2019/04/visualising-model-response-with-easyalluvial


0.1.1 Packages Loading

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------ tidyverse 1.2.1 --
## √ ggplot2 3.2.0     √ purrr   0.3.2
## √ tibble  2.1.3     √ dplyr   0.8.3
## √ tidyr   1.0.0     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(easyalluvial)

0.1.2 Data loading

data(titanic)
str(titanic)
## Classes 'tbl_df', 'tbl' and 'data.frame':    891 obs. of  10 variables:
##  $ Survived: Factor w/ 2 levels "no","yes": 1 2 2 2 1 1 1 1 2 2 ...
##  $ Pclass  : Ord.factor w/ 3 levels "1"<"2"<"3": 3 1 3 1 3 3 1 3 3 2 ...
##  $ Sex     : Factor w/ 2 levels "male","female": 1 2 2 2 1 1 1 1 2 2 ...
##  $ Age     : num  22 38 26 35 35 30 54 2 27 14 ...
##  $ SibSp   : int  1 1 0 1 0 0 0 3 0 1 ...
##  $ Parch   : int  0 0 0 0 0 0 0 1 2 0 ...
##  $ Fare    : num  7.25 71.28 7.92 53.1 8.05 ...
##  $ Cabin   : chr  "" "C85" "" "C123" ...
##  $ Embarked: chr  "S" "C" "S" "S" ...
##  $ title   : Factor w/ 8 levels "Mr","Mrs","Miss",..: 1 2 3 2 1 1 1 4 2 2 ...
df <- titanic %>%            # 대박
  select_if(is.factor)

str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame':    891 obs. of  4 variables:
##  $ Survived: Factor w/ 2 levels "no","yes": 1 2 2 2 1 1 1 1 2 2 ...
##  $ Pclass  : Ord.factor w/ 3 levels "1"<"2"<"3": 3 1 3 1 3 3 1 3 3 2 ...
##  $ Sex     : Factor w/ 2 levels "male","female": 1 2 2 2 1 1 1 1 2 2 ...
##  $ title   : Factor w/ 8 levels "Mr","Mrs","Miss",..: 1 2 3 2 1 1 1 4 2 2 ...

0.1.3 Analysis

set.seed(0)
m <- randomForest::randomForest( Survived ~ ., df)

imp <- m$importance

dspace <- get_data_space(df, imp, degree = 3)
pred <- predict(m, newdata = dspace,type = 'response')

0.1.4 Plot

p <- alluvial_model_response(pred, dspace, imp, degree = 3)

grid <- add_marginal_histograms(p, plot = F, data_input = df) 
grid <- add_imp_plot(grid = grid, p = p, data_input = df, plot = T)