faimodels 01

library(fairmodels)

## Warning: package 'fairmodels' was built under R version 3.6.3

## From now on difference in fariness_check and parity_loss
## has been changed to ratios to adhere to four-fifths (80%) rule (adverse impact).
## More on that in documentation. Thank you for using fairmodels!

data("compas")
head(compas)

##   Two_yr_Recidivism Number_of_Priors Age_Above_FourtyFive Age_Below_TwentyFive
## 1                 0                0                    1                    0
## 2                 1                0                    0                    0
## 3                 1                4                    0                    1
## 4                 0                0                    0                    0
## 5                 1               14                    0                    0
## 6                 0                3                    0                    0
##   Misdemeanor        Ethnicity  Sex
## 1           0            Other Male
## 2           0 African_American Male
## 3           0 African_American Male
## 4           1            Other Male
## 5           0        Caucasian Male
## 6           0            Other Male

compas$Two_yr_Recidivism <- as.factor(ifelse(compas$Two_yr_Recidivism == '1', '0', '1'))
library(DALEX)

## Warning: package 'DALEX' was built under R version 3.6.2

## Welcome to DALEX (version: 1.0).
## Find examples and detailed introduction at: https://pbiecek.github.io/ema/

library(ranger)

## Warning: package 'ranger' was built under R version 3.6.2

# train
rf_compas <- ranger(Two_yr_Recidivism ~., data = compas, probability = TRUE)

# numeric target values
y_numeric <- as.numeric(compas$Two_yr_Recidivism)-1

# explainer
rf_explainer <- explain(rf_compas, data = compas[,-1], y = y_numeric, colorize = FALSE)

## Preparation of a new explainer is initiated
##   -> model label       :  ranger  (  default  )
##   -> data              :  6172  rows  6  cols 
##   -> target variable   :  6172  values 
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification (  default  ) 
##   -> predict function  :  yhat.ranger  will be used (  default  )
##   -> predicted values  :  numerical, min =  0.1566328 , mean =  0.5452321 , max =  0.8713493  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -0.8548187 , mean =  -0.0003519728 , max =  0.7851198  
##   A new explainer has been created!

fobject <- fairness_check(rf_explainer,                         # explainer
                          protected = compas$Ethnicity,         # protected variable as factor
                          privileged = "Caucasian",             # level in protected variable, potentially more privileged
                          cutoff = 0.5,                         # cutoff - optional, default = 0.5
                          colorize = FALSE)

## Creating fairness object
## -> Privileged subgroup       : character ( Ok  )
## -> Protected variable        : factor ( Ok  ) 
## -> Cutoff values for explainers  : 0.5 ( for all subgroups )
## -> Fairness objects      : 0 objects 
## -> Checking explainers       : 1 in total (  compatible  )
## -> Metric calculation        : 12/12 metrics calculated for all models
##  Fairness object created succesfully

print(fobject, colorize = FALSE)

## 
## Fairness check for models: ranger 
## 
## ranger passes 3/5 metrics
## Total loss:  3.493797

plot(fobject)

plot_density(fobject)

plot(metric_scores(fobject))

library(gbm)

## Warning: package 'gbm' was built under R version 3.6.3

## Loaded gbm 2.1.8

rf_compas_1 <- ranger(Two_yr_Recidivism ~Number_of_Priors+Age_Below_TwentyFive,
                      data = compas,
                      probability = TRUE)

lr_compas_1 <- glm(Two_yr_Recidivism~.,
                   data=compas,
                   family=binomial(link="logit"))

rf_compas_2 <- ranger(Two_yr_Recidivism ~., data = compas, probability = TRUE) 
rf_compas_3 <- ranger(Two_yr_Recidivism ~ Age_Above_FourtyFive+Misdemeanor,
                      data = compas,
                      probability = TRUE)

df <- compas
df$Two_yr_Recidivism <- as.numeric(compas$Two_yr_Recidivism)-1
gbm_compas_1<- gbm(Two_yr_Recidivism~., data = df)

## Distribution not specified, assuming bernoulli ...

explainer_1 <- explain(rf_compas_1,  data = compas[,-1], y = y_numeric)

## Preparation of a new explainer is initiated
##   -> model label       :  ranger  ( [33m default [39m )
##   -> data              :  6172  rows  6  cols 
##   -> target variable   :  6172  values 
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification ( [33m default [39m ) 
##   -> predict function  :  yhat.ranger  will be used ( [33m default [39m )
##   -> predicted values  :  numerical, min =  0.2113924 , mean =  0.5447904 , max =  0.7023442  
##   -> residual function :  difference between y and yhat ( [33m default [39m )
##   -> residuals         :  numerical, min =  -0.7023442 , mean =  8.966551e-05 , max =  0.7604288  
##  [32m A new explainer has been created! [39m

explainer_2 <- explain(lr_compas_1,  data = compas[,-1], y = y_numeric)

## Preparation of a new explainer is initiated
##   -> model label       :  lm  ( [33m default [39m )
##   -> data              :  6172  rows  6  cols 
##   -> target variable   :  6172  values 
##   -> model_info        :  package stats , ver. 3.6.0 , task regression ( [33m default [39m ) 
##   -> predict function  :  yhat.glm  will be used ( [33m default [39m )
##   -> predicted values  :  numerical, min =  0.004522979 , mean =  0.5448801 , max =  0.8855426  
##   -> residual function :  difference between y and yhat ( [33m default [39m )
##   -> residuals         :  numerical, min =  -0.8822826 , mean =  -5.075761e-13 , max =  0.9767658  
##  [32m A new explainer has been created! [39m

explainer_3 <- explain(rf_compas_2,  data = compas[,-1], y = y_numeric, label = "ranger_2")

## Preparation of a new explainer is initiated
##   -> model label       :  ranger_2 
##   -> data              :  6172  rows  6  cols 
##   -> target variable   :  6172  values 
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification ( [33m default [39m ) 
##   -> predict function  :  yhat.ranger  will be used ( [33m default [39m )
##   -> predicted values  :  numerical, min =  0.1354131 , mean =  0.5448542 , max =  0.8679436  
##   -> residual function :  difference between y and yhat ( [33m default [39m )
##   -> residuals         :  numerical, min =  -0.8475469 , mean =  2.594059e-05 , max =  0.7874139  
##  [32m A new explainer has been created! [39m

explainer_4 <- explain(rf_compas_3,  data = compas[,-1], y = y_numeric, label = "ranger_3")

## Preparation of a new explainer is initiated
##   -> model label       :  ranger_3 
##   -> data              :  6172  rows  6  cols 
##   -> target variable   :  6172  values 
##   -> model_info        :  package ranger , ver. 0.12.1 , task classification ( [33m default [39m ) 
##   -> predict function  :  yhat.ranger  will be used ( [33m default [39m )
##   -> predicted values  :  numerical, min =  0.488697 , mean =  0.5455337 , max =  0.6938  
##   -> residual function :  difference between y and yhat ( [33m default [39m )
##   -> residuals         :  numerical, min =  -0.6938 , mean =  -0.0006535596 , max =  0.511303  
##  [32m A new explainer has been created! [39m

explainer_5 <- explain(gbm_compas_1, data = compas[,-1], y = y_numeric)

## Preparation of a new explainer is initiated
##   -> model label       :  gbm  ( [33m default [39m )
##   -> data              :  6172  rows  6  cols 
##   -> target variable   :  6172  values 
##   -> model_info        :  package gbm , ver. 2.1.8 , task classification ( [33m default [39m ) 
##   -> predict function  :  yhat.gbm  will be used ( [33m default [39m )
##   -> predicted values  :  numerical, min =  0.09186 , mean =  0.5429901 , max =  0.8781552  
##   -> residual function :  difference between y and yhat ( [33m default [39m )
##   -> residuals         :  numerical, min =  -0.8781552 , mean =  0.001890009 , max =  0.8686451  
##  [32m A new explainer has been created! [39m

fobject <- fairness_check(explainer_1, explainer_2,
                            explainer_3, explainer_4,
                            explainer_5,
                            protected = compas$Ethnicity,
                            privileged = "Caucasian",
                            verbose = FALSE) 

fobject$parity_loss_metric_data

##         TPR       TNR       PPV       NPV       FNR       FPR       FDR
## 1 0.3976902 1.2511537 0.4859671 0.7339807 1.6654218 1.7041164 1.3910955
## 2 0.5471779        NA 0.1630229        NA 3.2047184 1.9228318 0.3724644
## 3 0.4863510 1.2565799 0.4461356 0.8507621        NA 1.7609548 1.3719957
## 4 0.6611125 0.7401031 0.7674172 0.8209521 0.8762755 0.8532876 1.4329174
## 5 0.4450676 2.5063743 0.3608679 1.1016724 2.0684872 2.2139740 1.0365102
##         FOR        TS       STP       ACC        F1
## 1 1.3892629 0.7150599 0.8731178 0.4174524 0.4430506
## 2 1.7116794 0.5362121 1.1034842 0.1969839 0.3361082
## 3        NA 0.7552041 1.0052899 0.4591076 0.4578185
## 4 0.6729784 0.8970468 0.5742171 0.3638060 0.6304323
## 5 1.6163655 0.6441347 1.0482399 0.3439627 0.3989397

fobject$groups_data$ranger$TPR

## African_American            Asian        Caucasian         Hispanic 
##        0.6697490        0.8695652        0.8149883        0.8406250 
##  Native_American            Other 
##        0.8333333        0.8858447

fobject$cutoff$ranger

## $African_American
## [1] 0.5
## 
## $Asian
## [1] 0.5
## 
## $Caucasian
## [1] 0.5
## 
## $Hispanic
## [1] 0.5
## 
## $Native_American
## [1] 0.5
## 
## $Other
## [1] 0.5

sm <- stack_metrics(fobject)

## Warning in drop_metrics_with_na(parity_loss_metric_data): Found metric with NA: TNR, NPV, FNR, FOR, omiting it

plot(sm)

cm <- choose_metric(fobject, "TPR")
plot(cm)

fair_pca <- fairness_pca(fobject)

## Warning in drop_metrics_with_na(data): Found metric with NA: TNR, NPV, FNR, FOR, omiting it

print(fair_pca)

## Fairness PCA : 
##             PC1        PC2          PC3         PC4           PC5
## [1,]  0.2568924  1.2523889 -0.568732704 -0.19759040  0.000000e+00
## [2,] -2.8494529 -1.6650431 -0.002735714 -0.15273510  0.000000e+00
## [3,]  0.3387039  1.0850158  0.630993933 -0.15582060 -2.289835e-16
## [4,]  3.6459225 -1.2643264 -0.031416445  0.07402038  3.330669e-16
## [5,] -1.3920659  0.5919648 -0.028109070  0.43212572  6.938894e-16
## 
## Created with: 
## [1] "ranger"   "lm"       "ranger_2" "ranger_3" "gbm"     
## 
## First two components explained 97 % of variance.

plot(fair_pca)

fheatmap <- fairness_heatmap(fobject)
plot(fheatmap, text_size = 3)

## Warning: Removed 4 rows containing missing values (geom_text).

fap <- performance_and_fairness(fobject, fairness_metric = "STP")

## Performace metric is NULL, setting deafult ( accuracy )  
## 
## Creating object with: 
## Fairness metric: STP 
## Performance metric: accuracy

plot(fap)

fobject2 <- fairness_check(explainer_1,explainer_2, 
                                   protected = compas$Ethnicity,
                                   privileged = "Caucasian", 
                                    verbose = FALSE)


gm <- group_metric(fobject2, fairness_metric = "FPR")

## Performace metric not given, setting deafult ( accuracy )  
## 
## Creating object with: 
## Fairness metric:  FPR 
## Performance metric:  accuracy

plot(gm)

fradar <- fairness_radar(fobject2)

## Warning in fairness_radar(fobject2): Found metric with NA: TNR, NPV, ommiting it

plot(fradar)

ac <- all_cutoffs(fobject2)

plot(ac)

## Warning: Removed 26 row(s) containing missing values (geom_path).

cpc <- ceteris_paribus_cutoff(fobject2, subgroup = "African_American")

plot(cpc)

## Warning: Removed 15 row(s) containing missing values (geom_path).

library(equatiomatic)

## Warning: package 'equatiomatic' was built under R version 3.6.3

m <- lm(bill_length_mm ~ bill_depth_mm + flipper_length_mm, penguins)
extract_eq(m)

\[ \operatorname{bill\_length\_mm} = \alpha + \beta_{1}(\operatorname{bill\_depth\_mm}) + \beta_{2}(\operatorname{flipper\_length\_mm}) + \epsilon \]

m2 <- lm(bill_length_mm ~ bill_depth_mm*island, penguins)
extract_eq(m2, wrap = TRUE)

\[ \begin{aligned} \operatorname{bill\_length\_mm} &= \alpha + \beta_{1}(\operatorname{bill\_depth\_mm}) + \beta_{2}(\operatorname{island}_{\operatorname{Dream}}) + \beta_{3}(\operatorname{island}_{\operatorname{Torgersen}})\ + \\ &\quad \beta_{4}(\operatorname{bill\_depth\_mm} \times \operatorname{island}_{\operatorname{Dream}}) + \beta_{5}(\operatorname{bill\_depth\_mm} \times \operatorname{island}_{\operatorname{Torgersen}}) + \epsilon \end{aligned} \]