Fairmodel Pedestrian

setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/ALL_PAPERS/Journal/Submitted/2020_2021/0_Colla/HAMSA_Collaborations/Bicycle")


library(readxl)
aa1= read_excel("Bicycle_10_16a.xlsx", sheet= "Main")
names(aa1)
##  [1] "ID"              "CRASH_NUM1"      "VEH_NUM"         "CRASH_VEH_NUM1" 
##  [5] "DR_COND"         "DR_RACE"         "DR_SEX"          "DR_AGE"         
##  [9] "M_HARM_EV"       "MOVEMENT_REASON" "PRIOR_MOVEMENT"  "TRAFF_CNTL"     
## [13] "VEH_LIGHTING"    "VIOLATIONS"      "POSTED_SPEED"    "ALIGNMENT"      
## [17] "HWY_TYPE"        "LIGHTING"        "LOC_TYPE"        "MAN_COLL"       
## [21] "ROAD_TYPE"       "SEVERITY"        "WEATHER"         "YEAR"           
## [25] "DAY_OF_WK"       "INTERSECTION"    "HIT_AND_RUN"
aa2= aa1[, -c(1, 2, 3, 4, 24)]
names(aa2)
##  [1] "DR_COND"         "DR_RACE"         "DR_SEX"          "DR_AGE"         
##  [5] "M_HARM_EV"       "MOVEMENT_REASON" "PRIOR_MOVEMENT"  "TRAFF_CNTL"     
##  [9] "VEH_LIGHTING"    "VIOLATIONS"      "POSTED_SPEED"    "ALIGNMENT"      
## [13] "HWY_TYPE"        "LIGHTING"        "LOC_TYPE"        "MAN_COLL"       
## [17] "ROAD_TYPE"       "SEVERITY"        "WEATHER"         "DAY_OF_WK"      
## [21] "INTERSECTION"    "HIT_AND_RUN"
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
aa2 <- aa2 %>% mutate_if(is.character, as.factor)

detach("package:tidyverse", unload=TRUE)
library(fairmodels)
library(DALEX)
## Welcome to DALEX (version: 2.2.0).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## 
## Attaching package: 'DALEX'
## The following object is masked from 'package:dplyr':
## 
##     explain
library(ranger)

# train
rf_compas <- ranger(SEVERITY ~., data = aa2, probability = TRUE)

# numeric target values
y_numeric <- as.numeric(aa2$SEVERITY)-1

# explainer
rf_explainer <- DALEX::explain(rf_compas, data = aa2[,-c(18)], 
                        y = y_numeric, colorize = FALSE)
## Preparation of a new explainer is initiated
##   -> model label       :  ranger  (  default  )
##   -> data              :  6136  rows  21  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  6136  values 
##   -> predict function  :  yhat.ranger  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification (  default  ) 
##   -> predicted values  :  numerical, min =  0.00542963 , mean =  0.216089 , max =  0.9901553  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.822672 , mean =  -0.00471348 , max =  0.7765461  
##   A new explainer has been created!
fobject <- fairness_check(rf_explainer,                         # explainer
                          protected = aa2$LIGHTING,         # protected variable as factor
                          privileged = "Daylight",             # level in protected variable, potentially more privileged
                          cutoff = 0.5,                         # cutoff - optional, default = 0.5
                          colorize = FALSE)     
## Creating fairness object
## -> Privileged subgroup       : character ( Ok  )
## -> Protected variable        : factor ( Ok  ) 
## -> Cutoff values for explainers  : 0.5 ( for all subgroups )
## -> Fairness objects      : 0 objects 
## -> Checking explainers       : 1 in total (  compatible  )
## -> Metric calculation        : 10/12 metrics calculated for all models ( 2 NA created )
##  Fairness object created succesfully
print(fobject, colorize = FALSE)
## 
## Fairness check for models: ranger 
## 
## ranger passes 2/5 metrics
## Total loss:  10.20797
plot(fobject)

plot_density(fobject)

plot(metric_scores(fobject))

library(gbm)
## Loaded gbm 2.1.8
rf_compas_1 <- ranger(SEVERITY ~DR_COND+TRAFF_CNTL+VIOLATIONS+MAN_COLL+LIGHTING, 
                      data = aa2,
                      probability = TRUE)

lr_compas_1 <- glm(SEVERITY ~., data = aa2,
                   family=binomial(link="logit"))

rf_compas_2 <- ranger(SEVERITY ~DR_COND+TRAFF_CNTL+VIOLATIONS+MAN_COLL+LIGHTING+WEATHER, data = aa2, probability = TRUE) 
rf_compas_3 <- ranger(SEVERITY ~DR_COND+PRIOR_MOVEMENT+TRAFF_CNTL+MAN_COLL+LIGHTING, data = aa2,
                      probability = TRUE)

df <- aa2
df$SEVERITY  <- as.numeric(aa2$SEVERITY)-1
gbm_compas_1<- gbm(SEVERITY ~., data = df) 
## Distribution not specified, assuming bernoulli ...
explainer_1 <- DALEX::explain(rf_compas_1, 
                       data = aa2[-18], y = y_numeric)
## Preparation of a new explainer is initiated
##   -> model label       :  ranger  (  default  )
##   -> data              :  6136  rows  21  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  6136  values 
##   -> predict function  :  yhat.ranger  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification (  default  ) 
##   -> predicted values  :  numerical, min =  0.008890305 , mean =  0.2124075 , max =  0.7945492  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.6540793 , mean =  -0.001032014 , max =  0.9154429  
##   A new explainer has been created! 
explainer_2 <- DALEX::explain(lr_compas_1,  data = aa2[,-18], 
                              y = y_numeric)
## Preparation of a new explainer is initiated
##   -> model label       :  lm  (  default  )
##   -> data              :  6136  rows  21  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  6136  values 
##   -> predict function  :  yhat.glm  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package stats , ver. 4.0.5 , task classification (  default  ) 
##   -> predicted values  :  numerical, min =  2.02059e-07 , mean =  0.2113755 , max =  0.9999995  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.980677 , mean =  -1.02154e-09 , max =  0.969129  
##   A new explainer has been created! 
explainer_3 <- DALEX::explain(rf_compas_2,  data = aa2[-18], 
                       y = y_numeric, label = "ranger_2")
## Preparation of a new explainer is initiated
##   -> model label       :  ranger_2 
##   -> data              :  6136  rows  21  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  6136  values 
##   -> predict function  :  yhat.ranger  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification (  default  ) 
##   -> predicted values  :  numerical, min =  0.01378684 , mean =  0.2121356 , max =  0.7762111  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.6697377 , mean =  -0.0007601572 , max =  0.908237  
##   A new explainer has been created! 
explainer_4 <- DALEX::explain(rf_compas_3,  data= aa2[-18], 
                       y = y_numeric, label = "ranger_3")
## Preparation of a new explainer is initiated
##   -> model label       :  ranger_3 
##   -> data              :  6136  rows  21  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  6136  values 
##   -> predict function  :  yhat.ranger  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification (  default  ) 
##   -> predicted values  :  numerical, min =  0.006009169 , mean =  0.2125147 , max =  0.744479  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.676354 , mean =  -0.001139241 , max =  0.9313859  
##   A new explainer has been created! 
explainer_5 <- DALEX::explain(gbm_compas_1, data = aa2[,-18], y = y_numeric)
## Preparation of a new explainer is initiated
##   -> model label       :  gbm  (  default  )
##   -> data              :  6136  rows  21  cols 
##   -> data              :  tibble converted into a data.frame 
##   -> target variable   :  6136  values 
##   -> predict function  :  yhat.gbm  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package gbm , ver. 2.1.8 , task classification (  default  ) 
##   -> predicted values  :  numerical, min =  0.05510084 , mean =  0.2110858 , max =  0.9518961  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.9438503 , mean =  0.0002896893 , max =  0.9198814  
##   A new explainer has been created! 
fobject <- fairness_check(explainer_1, explainer_2,
                          explainer_3, explainer_4,
                          explainer_5,
                          protected = aa2$LIGHTING,         # protected variable as factor
                          privileged = "Daylight",  
                          verbose = FALSE) 

fobject$parity_loss_metric_data
##        TPR        TNR       PPV       NPV       FNR      FPR      FDR      FOR
## 1 3.888013 0.03969482 1.3366940 0.2594656 1.1160549       NA       NA 1.259136
## 2 1.282564 0.05313272 0.4468252 0.2392741 0.6736247 2.325905 2.076729 1.391812
## 3 4.585918 0.01978879 0.7556578 0.2552571 1.0095027       NA       NA 1.195994
## 4 6.464199 0.05026887 0.6562669 0.2853394 1.2021054       NA       NA 1.348803
## 5 1.283364 0.03433158 0.3695356 0.2315210 0.6695295 2.103612 1.733093 1.345561
##         TS      STP       ACC        F1
## 1 3.948120 4.434428 0.3114638 3.3224400
## 2 1.189349 1.521520 0.2479164 0.9025144
## 3 4.604500 4.152736 0.2931527 3.9763151
## 4 6.342786 5.521134 0.2928713 5.5961962
## 5 1.212648 1.404598 0.2134996 0.9196052
fobject$cutoff$ranger
## $`Dark - Continuous Street Light`
## [1] 0.5
## 
## $`Dark - No Street Lights`
## [1] 0.5
## 
## $`Dark - Street Light At Intersection Only`
## [1] 0.5
## 
## $`Dawn/Dusk`
## [1] 0.5
## 
## $Daylight
## [1] 0.5
## 
## $Other
## [1] 0.5
sm <- stack_metrics(fobject)
## Warning in drop_metrics_with_na(parity_loss_metric_data): Found metric with NA: FPR, omiting it
plot(sm)

fair_pca <- fairness_pca(fobject)
## Warning in drop_metrics_with_na(data): Found metric with NA: FPR, FDR, omiting it
plot(fair_pca)

fheatmap <- fairness_heatmap(fobject)
plot(fheatmap, text_size = 3)
## Warning: Removed 6 rows containing missing values (geom_text).