## Welcome to DALEX (version: 1.0).
## Find examples and detailed introduction at: https://pbiecek.github.io/ema/
## Additional features will be available after installation of: ggpubr.
## Use 'install_dependencies()' to get all suggested dependencies
## [1] "CRASH_NUM1" "NARRATIVE" "ACCESS_CNTL_CD"
## [4] "ALIGNMENT_CD" "HWY_TYPE_CD" "INVEST_AGENCY_CD"
## [7] "LIGHTING_CD" "LOC_TYPE_CD" "MAN_COLL_CD"
## [10] "PRI_CONTRIB_FAC_CD" "ROAD_COND_CD" "ROAD_REL_CD"
## [13] "ROAD_TYPE_CD" "SEC_CONTRIB_FAC_CD" "SEVERITY_CD"
## [16] "SURF_COND_CD" "SURF_TYPE_CD" "WEATHER_CD"
## [19] "CRASH_DATE" "CRASH_TIME" "CR_MONTH"
## [22] "CR_HOUR" "DAY_OF_WK" "INTERSECTION"
## [25] "NUM_VEH" "LAT" "LONG"
## [28] "PARISH_CD" "CITY_CD" "TIME_AMB_ARR"
## [31] "TIME_AMB_ARR_HOSP" "HIT_AND_RUN"
mn01 <- it01[, c("SEVERITY_CD", "DAY_OF_WK", "LIGHTING_CD", "HWY_TYPE_CD", "WEATHER_CD", "CR_HOUR",
"NUM_VEH")]
mn02= na.omit(mn01)
mn02$SEVERITY_CD= as.factor(mn02$SEVERITY_CD)
model_HR_ranger <- ranger(SEVERITY_CD~., data = mn02, probability = TRUE, num.trees = 50)
model_HR_ranger
## Ranger result
##
## Call:
## ranger(SEVERITY_CD ~ ., data = mn02, probability = TRUE, num.trees = 50)
##
## Type: Probability estimation
## Number of trees: 50
## Sample size: 338
## Number of independent variables: 6
## Mtry: 2
## Target node size: 10
## Variable importance mode: none
## Splitrule: gini
## OOB prediction error (Brier s.): 0.4839634
## Preparation of a new explainer is initiated
## -> model label : Ranger Multilabel Classification
## -> data : 338 rows 6 cols
## -> target variable : 338 values
## -> target variable : Please note that 'y' is a factor. ( WARNING )
## -> target variable : Consider changing the 'y' to a logical or numerical vector.
## -> target variable : Otherwise I will not be able to calculate residuals or loss function.
## -> model_info : package ranger , ver. 0.12.1 , task classification ( default )
## -> predict function : yhat.ranger will be used ( default )
## -> predicted values : predict function returns multiple columns: 5 ( WARNING ) some of functionalities may not work
## -> residual function : difference between y and yhat ( default )
## Warning in Ops.factor(y, predict_function(model, data)): '-' not meaningful for
## factors
## -> residuals : numerical, min = NA , mean = NA , max = NA
## A new explainer has been created!
## [1] 3 4 4 3 2 2 3 4 2 4
## Preparation of a new explainer is initiated
## -> model label : Ranger Multilabel Classification
## -> data : 338 rows 6 cols
## -> target variable : 338 values
## -> model_info : package ranger , ver. 0.12.1 , task classification ( default )
## -> predict function : yhat.ranger will be used ( default )
## -> predicted values : predict function returns multiple columns: 5 ( WARNING ) some of functionalities may not work
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = 0.5780405 , mean = 3.711243 , max = 5
## A new explainer has been created!

## Warning in if (class(new_observation_ext) != "data.frame") {: the condition has
## length > 1 and only the first element will be used

## Warning in if (class(new_observation_ext) != "data.frame") {: the condition has
## length > 1 and only the first element will be used



residual_function <- function(model, data, y) {
y_char <- as.character(y)
pred <- predict(model, data, probability = TRUE)$predictions
res <- c()
for (i in 1:nrow(pred)) {
res[i] <- 1-pred[i, y_char[i]]
}
res
}
explain_HR_ranger_residual <- explain(model_HR_ranger,
data = mn02[,-1],
y = mn02$SEVERITY_CD,
label = "Ranger Multilabel Classification",
residual_function = residual_function,
colorize = FALSE)
## Preparation of a new explainer is initiated
## -> model label : Ranger Multilabel Classification
## -> data : 338 rows 6 cols
## -> target variable : 338 values
## -> target variable : Please note that 'y' is a factor. ( WARNING )
## -> target variable : Consider changing the 'y' to a logical or numerical vector.
## -> target variable : Otherwise I will not be able to calculate residuals or loss function.
## -> model_info : package ranger , ver. 0.12.1 , task classification ( default )
## -> predict function : yhat.ranger will be used ( default )
## -> predicted values : predict function returns multiple columns: 5 ( WARNING ) some of functionalities may not work
## -> residual function : residual_function
## -> residuals : numerical, min = 0.2261979 , mean = 0.5522091 , max = 0.9241105
## A new explainer has been created!
## Warning in ks.test(residuals_all, residuals_sel): p-value will be approximate in
## the presence of ties

## Warning in if (class(new_observation_ext) != "data.frame") {: the condition has
## length > 1 and only the first element will be used
## Warning in if (class(new_observation_ext) != "data.frame") {: the condition has
## length > 1 and only the first element will be used
