setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/ALL_PAPERS/Journal/Submitted/2020_2021/0_Colla/HAMSA_Collaborations/Bicycle")
library(readxl)
aa1= read_excel("Bicycle_10_16a.xlsx", sheet= "Main")
names(aa1)
## [1] "ID" "CRASH_NUM1" "VEH_NUM" "CRASH_VEH_NUM1"
## [5] "DR_COND" "DR_RACE" "DR_SEX" "DR_AGE"
## [9] "M_HARM_EV" "MOVEMENT_REASON" "PRIOR_MOVEMENT" "TRAFF_CNTL"
## [13] "VEH_LIGHTING" "VIOLATIONS" "POSTED_SPEED" "ALIGNMENT"
## [17] "HWY_TYPE" "LIGHTING" "LOC_TYPE" "MAN_COLL"
## [21] "ROAD_TYPE" "SEVERITY" "WEATHER" "YEAR"
## [25] "DAY_OF_WK" "INTERSECTION" "HIT_AND_RUN"
aa2= aa1[, -c(1, 2, 3, 4, 24)]
names(aa2)
## [1] "DR_COND" "DR_RACE" "DR_SEX" "DR_AGE"
## [5] "M_HARM_EV" "MOVEMENT_REASON" "PRIOR_MOVEMENT" "TRAFF_CNTL"
## [9] "VEH_LIGHTING" "VIOLATIONS" "POSTED_SPEED" "ALIGNMENT"
## [13] "HWY_TYPE" "LIGHTING" "LOC_TYPE" "MAN_COLL"
## [17] "ROAD_TYPE" "SEVERITY" "WEATHER" "DAY_OF_WK"
## [21] "INTERSECTION" "HIT_AND_RUN"
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
aa2 <- aa2 %>% mutate_if(is.character, as.factor)
detach("package:tidyverse", unload=TRUE)
library(fairmodels)
library(DALEX)
## Welcome to DALEX (version: 2.2.0).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
##
## Attaching package: 'DALEX'
## The following object is masked from 'package:dplyr':
##
## explain
library(ranger)
# train
rf_compas <- ranger(SEVERITY ~., data = aa2, probability = TRUE)
# numeric target values
y_numeric <- as.numeric(aa2$SEVERITY)-1
# explainer
rf_explainer <- DALEX::explain(rf_compas, data = aa2[,-c(18)],
y = y_numeric, colorize = FALSE)
## Preparation of a new explainer is initiated
## -> model label : ranger ( default )
## -> data : 6136 rows 21 cols
## -> data : tibble converted into a data.frame
## -> target variable : 6136 values
## -> predict function : yhat.ranger will be used ( default )
## -> predicted values : No value for predict function target column. ( default )
## -> model_info : package ranger , ver. 0.12.1 , task classification ( default )
## -> predicted values : numerical, min = 0.00542963 , mean = 0.216089 , max = 0.9901553
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = -0.822672 , mean = -0.00471348 , max = 0.7765461
## A new explainer has been created!
fobject <- fairness_check(rf_explainer, # explainer
protected = aa2$LIGHTING, # protected variable as factor
privileged = "Daylight", # level in protected variable, potentially more privileged
cutoff = 0.5, # cutoff - optional, default = 0.5
colorize = FALSE)
## Creating fairness object
## -> Privileged subgroup : character ( Ok )
## -> Protected variable : factor ( Ok )
## -> Cutoff values for explainers : 0.5 ( for all subgroups )
## -> Fairness objects : 0 objects
## -> Checking explainers : 1 in total ( compatible )
## -> Metric calculation : 10/12 metrics calculated for all models ( 2 NA created )
## Fairness object created succesfully
print(fobject, colorize = FALSE)
##
## Fairness check for models: ranger
##
## ranger passes 2/5 metrics
## Total loss: 10.20797


plot(metric_scores(fobject))

## Loaded gbm 2.1.8
rf_compas_1 <- ranger(SEVERITY ~DR_COND+TRAFF_CNTL+VIOLATIONS+MAN_COLL+LIGHTING,
data = aa2,
probability = TRUE)
lr_compas_1 <- glm(SEVERITY ~., data = aa2,
family=binomial(link="logit"))
rf_compas_2 <- ranger(SEVERITY ~DR_COND+TRAFF_CNTL+VIOLATIONS+MAN_COLL+LIGHTING+WEATHER, data = aa2, probability = TRUE)
rf_compas_3 <- ranger(SEVERITY ~DR_COND+PRIOR_MOVEMENT+TRAFF_CNTL+MAN_COLL+LIGHTING, data = aa2,
probability = TRUE)
df <- aa2
df$SEVERITY <- as.numeric(aa2$SEVERITY)-1
gbm_compas_1<- gbm(SEVERITY ~., data = df)
## Distribution not specified, assuming bernoulli ...
explainer_1 <- DALEX::explain(rf_compas_1,
data = aa2[-18], y = y_numeric)
## Preparation of a new explainer is initiated
## -> model label : ranger ( [33m default [39m )
## -> data : 6136 rows 21 cols
## -> data : tibble converted into a data.frame
## -> target variable : 6136 values
## -> predict function : yhat.ranger will be used ( [33m default [39m )
## -> predicted values : No value for predict function target column. ( [33m default [39m )
## -> model_info : package ranger , ver. 0.12.1 , task classification ( [33m default [39m )
## -> predicted values : numerical, min = 0.008890305 , mean = 0.2124075 , max = 0.7945492
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.6540793 , mean = -0.001032014 , max = 0.9154429
## [32m A new explainer has been created! [39m
explainer_2 <- DALEX::explain(lr_compas_1, data = aa2[,-18],
y = y_numeric)
## Preparation of a new explainer is initiated
## -> model label : lm ( [33m default [39m )
## -> data : 6136 rows 21 cols
## -> data : tibble converted into a data.frame
## -> target variable : 6136 values
## -> predict function : yhat.glm will be used ( [33m default [39m )
## -> predicted values : No value for predict function target column. ( [33m default [39m )
## -> model_info : package stats , ver. 4.0.5 , task classification ( [33m default [39m )
## -> predicted values : numerical, min = 2.02059e-07 , mean = 0.2113755 , max = 0.9999995
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.980677 , mean = -1.02154e-09 , max = 0.969129
## [32m A new explainer has been created! [39m
explainer_3 <- DALEX::explain(rf_compas_2, data = aa2[-18],
y = y_numeric, label = "ranger_2")
## Preparation of a new explainer is initiated
## -> model label : ranger_2
## -> data : 6136 rows 21 cols
## -> data : tibble converted into a data.frame
## -> target variable : 6136 values
## -> predict function : yhat.ranger will be used ( [33m default [39m )
## -> predicted values : No value for predict function target column. ( [33m default [39m )
## -> model_info : package ranger , ver. 0.12.1 , task classification ( [33m default [39m )
## -> predicted values : numerical, min = 0.01378684 , mean = 0.2121356 , max = 0.7762111
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.6697377 , mean = -0.0007601572 , max = 0.908237
## [32m A new explainer has been created! [39m
explainer_4 <- DALEX::explain(rf_compas_3, data= aa2[-18],
y = y_numeric, label = "ranger_3")
## Preparation of a new explainer is initiated
## -> model label : ranger_3
## -> data : 6136 rows 21 cols
## -> data : tibble converted into a data.frame
## -> target variable : 6136 values
## -> predict function : yhat.ranger will be used ( [33m default [39m )
## -> predicted values : No value for predict function target column. ( [33m default [39m )
## -> model_info : package ranger , ver. 0.12.1 , task classification ( [33m default [39m )
## -> predicted values : numerical, min = 0.006009169 , mean = 0.2125147 , max = 0.744479
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.676354 , mean = -0.001139241 , max = 0.9313859
## [32m A new explainer has been created! [39m
explainer_5 <- DALEX::explain(gbm_compas_1, data = aa2[,-18], y = y_numeric)
## Preparation of a new explainer is initiated
## -> model label : gbm ( [33m default [39m )
## -> data : 6136 rows 21 cols
## -> data : tibble converted into a data.frame
## -> target variable : 6136 values
## -> predict function : yhat.gbm will be used ( [33m default [39m )
## -> predicted values : No value for predict function target column. ( [33m default [39m )
## -> model_info : package gbm , ver. 2.1.8 , task classification ( [33m default [39m )
## -> predicted values : numerical, min = 0.05510084 , mean = 0.2110858 , max = 0.9518961
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.9438503 , mean = 0.0002896893 , max = 0.9198814
## [32m A new explainer has been created! [39m
fobject <- fairness_check(explainer_1, explainer_2,
explainer_3, explainer_4,
explainer_5,
protected = aa2$LIGHTING, # protected variable as factor
privileged = "Daylight",
verbose = FALSE)
fobject$parity_loss_metric_data
## TPR TNR PPV NPV FNR FPR FDR FOR
## 1 3.888013 0.03969482 1.3366940 0.2594656 1.1160549 NA NA 1.259136
## 2 1.282564 0.05313272 0.4468252 0.2392741 0.6736247 2.325905 2.076729 1.391812
## 3 4.585918 0.01978879 0.7556578 0.2552571 1.0095027 NA NA 1.195994
## 4 6.464199 0.05026887 0.6562669 0.2853394 1.2021054 NA NA 1.348803
## 5 1.283364 0.03433158 0.3695356 0.2315210 0.6695295 2.103612 1.733093 1.345561
## TS STP ACC F1
## 1 3.948120 4.434428 0.3114638 3.3224400
## 2 1.189349 1.521520 0.2479164 0.9025144
## 3 4.604500 4.152736 0.2931527 3.9763151
## 4 6.342786 5.521134 0.2928713 5.5961962
## 5 1.212648 1.404598 0.2134996 0.9196052
## $`Dark - Continuous Street Light`
## [1] 0.5
##
## $`Dark - No Street Lights`
## [1] 0.5
##
## $`Dark - Street Light At Intersection Only`
## [1] 0.5
##
## $`Dawn/Dusk`
## [1] 0.5
##
## $Daylight
## [1] 0.5
##
## $Other
## [1] 0.5
sm <- stack_metrics(fobject)
## Warning in drop_metrics_with_na(parity_loss_metric_data): Found metric with NA: FPR, omiting it

fair_pca <- fairness_pca(fobject)
## Warning in drop_metrics_with_na(data): Found metric with NA: FPR, FDR, omiting it

fheatmap <- fairness_heatmap(fobject)
plot(fheatmap, text_size = 3)
## Warning: Removed 6 rows containing missing values (geom_text).
