faimodels 01
library(fairmodels)## Warning: package 'fairmodels' was built under R version 3.6.3
## From now on difference in fariness_check and parity_loss
## has been changed to ratios to adhere to four-fifths (80%) rule (adverse impact).
## More on that in documentation. Thank you for using fairmodels!
data("compas")
head(compas)## Two_yr_Recidivism Number_of_Priors Age_Above_FourtyFive Age_Below_TwentyFive
## 1 0 0 1 0
## 2 1 0 0 0
## 3 1 4 0 1
## 4 0 0 0 0
## 5 1 14 0 0
## 6 0 3 0 0
## Misdemeanor Ethnicity Sex
## 1 0 Other Male
## 2 0 African_American Male
## 3 0 African_American Male
## 4 1 Other Male
## 5 0 Caucasian Male
## 6 0 Other Male
compas$Two_yr_Recidivism <- as.factor(ifelse(compas$Two_yr_Recidivism == '1', '0', '1'))
library(DALEX)## Warning: package 'DALEX' was built under R version 3.6.2
## Welcome to DALEX (version: 1.0).
## Find examples and detailed introduction at: https://pbiecek.github.io/ema/
library(ranger)## Warning: package 'ranger' was built under R version 3.6.2
# train
rf_compas <- ranger(Two_yr_Recidivism ~., data = compas, probability = TRUE)
# numeric target values
y_numeric <- as.numeric(compas$Two_yr_Recidivism)-1
# explainer
rf_explainer <- explain(rf_compas, data = compas[,-1], y = y_numeric, colorize = FALSE)## Preparation of a new explainer is initiated
## -> model label : ranger ( default )
## -> data : 6172 rows 6 cols
## -> target variable : 6172 values
## -> model_info : package ranger , ver. 0.12.1 , task classification ( default )
## -> predict function : yhat.ranger will be used ( default )
## -> predicted values : numerical, min = 0.1566328 , mean = 0.5452321 , max = 0.8713493
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = -0.8548187 , mean = -0.0003519728 , max = 0.7851198
## A new explainer has been created!
fobject <- fairness_check(rf_explainer, # explainer
protected = compas$Ethnicity, # protected variable as factor
privileged = "Caucasian", # level in protected variable, potentially more privileged
cutoff = 0.5, # cutoff - optional, default = 0.5
colorize = FALSE) ## Creating fairness object
## -> Privileged subgroup : character ( Ok )
## -> Protected variable : factor ( Ok )
## -> Cutoff values for explainers : 0.5 ( for all subgroups )
## -> Fairness objects : 0 objects
## -> Checking explainers : 1 in total ( compatible )
## -> Metric calculation : 12/12 metrics calculated for all models
## Fairness object created succesfully
print(fobject, colorize = FALSE)##
## Fairness check for models: ranger
##
## ranger passes 3/5 metrics
## Total loss: 3.493797
plot(fobject)plot_density(fobject)plot(metric_scores(fobject))library(gbm)## Warning: package 'gbm' was built under R version 3.6.3
## Loaded gbm 2.1.8
rf_compas_1 <- ranger(Two_yr_Recidivism ~Number_of_Priors+Age_Below_TwentyFive,
data = compas,
probability = TRUE)
lr_compas_1 <- glm(Two_yr_Recidivism~.,
data=compas,
family=binomial(link="logit"))
rf_compas_2 <- ranger(Two_yr_Recidivism ~., data = compas, probability = TRUE)
rf_compas_3 <- ranger(Two_yr_Recidivism ~ Age_Above_FourtyFive+Misdemeanor,
data = compas,
probability = TRUE)
df <- compas
df$Two_yr_Recidivism <- as.numeric(compas$Two_yr_Recidivism)-1
gbm_compas_1<- gbm(Two_yr_Recidivism~., data = df) ## Distribution not specified, assuming bernoulli ...
explainer_1 <- explain(rf_compas_1, data = compas[,-1], y = y_numeric)## Preparation of a new explainer is initiated
## -> model label : ranger ( [33m default [39m )
## -> data : 6172 rows 6 cols
## -> target variable : 6172 values
## -> model_info : package ranger , ver. 0.12.1 , task classification ( [33m default [39m )
## -> predict function : yhat.ranger will be used ( [33m default [39m )
## -> predicted values : numerical, min = 0.2113924 , mean = 0.5447904 , max = 0.7023442
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.7023442 , mean = 8.966551e-05 , max = 0.7604288
## [32m A new explainer has been created! [39m
explainer_2 <- explain(lr_compas_1, data = compas[,-1], y = y_numeric)## Preparation of a new explainer is initiated
## -> model label : lm ( [33m default [39m )
## -> data : 6172 rows 6 cols
## -> target variable : 6172 values
## -> model_info : package stats , ver. 3.6.0 , task regression ( [33m default [39m )
## -> predict function : yhat.glm will be used ( [33m default [39m )
## -> predicted values : numerical, min = 0.004522979 , mean = 0.5448801 , max = 0.8855426
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.8822826 , mean = -5.075761e-13 , max = 0.9767658
## [32m A new explainer has been created! [39m
explainer_3 <- explain(rf_compas_2, data = compas[,-1], y = y_numeric, label = "ranger_2")## Preparation of a new explainer is initiated
## -> model label : ranger_2
## -> data : 6172 rows 6 cols
## -> target variable : 6172 values
## -> model_info : package ranger , ver. 0.12.1 , task classification ( [33m default [39m )
## -> predict function : yhat.ranger will be used ( [33m default [39m )
## -> predicted values : numerical, min = 0.1354131 , mean = 0.5448542 , max = 0.8679436
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.8475469 , mean = 2.594059e-05 , max = 0.7874139
## [32m A new explainer has been created! [39m
explainer_4 <- explain(rf_compas_3, data = compas[,-1], y = y_numeric, label = "ranger_3")## Preparation of a new explainer is initiated
## -> model label : ranger_3
## -> data : 6172 rows 6 cols
## -> target variable : 6172 values
## -> model_info : package ranger , ver. 0.12.1 , task classification ( [33m default [39m )
## -> predict function : yhat.ranger will be used ( [33m default [39m )
## -> predicted values : numerical, min = 0.488697 , mean = 0.5455337 , max = 0.6938
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.6938 , mean = -0.0006535596 , max = 0.511303
## [32m A new explainer has been created! [39m
explainer_5 <- explain(gbm_compas_1, data = compas[,-1], y = y_numeric)## Preparation of a new explainer is initiated
## -> model label : gbm ( [33m default [39m )
## -> data : 6172 rows 6 cols
## -> target variable : 6172 values
## -> model_info : package gbm , ver. 2.1.8 , task classification ( [33m default [39m )
## -> predict function : yhat.gbm will be used ( [33m default [39m )
## -> predicted values : numerical, min = 0.09186 , mean = 0.5429901 , max = 0.8781552
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -0.8781552 , mean = 0.001890009 , max = 0.8686451
## [32m A new explainer has been created! [39m
fobject <- fairness_check(explainer_1, explainer_2,
explainer_3, explainer_4,
explainer_5,
protected = compas$Ethnicity,
privileged = "Caucasian",
verbose = FALSE)
fobject$parity_loss_metric_data## TPR TNR PPV NPV FNR FPR FDR
## 1 0.3976902 1.2511537 0.4859671 0.7339807 1.6654218 1.7041164 1.3910955
## 2 0.5471779 NA 0.1630229 NA 3.2047184 1.9228318 0.3724644
## 3 0.4863510 1.2565799 0.4461356 0.8507621 NA 1.7609548 1.3719957
## 4 0.6611125 0.7401031 0.7674172 0.8209521 0.8762755 0.8532876 1.4329174
## 5 0.4450676 2.5063743 0.3608679 1.1016724 2.0684872 2.2139740 1.0365102
## FOR TS STP ACC F1
## 1 1.3892629 0.7150599 0.8731178 0.4174524 0.4430506
## 2 1.7116794 0.5362121 1.1034842 0.1969839 0.3361082
## 3 NA 0.7552041 1.0052899 0.4591076 0.4578185
## 4 0.6729784 0.8970468 0.5742171 0.3638060 0.6304323
## 5 1.6163655 0.6441347 1.0482399 0.3439627 0.3989397
fobject$groups_data$ranger$TPR## African_American Asian Caucasian Hispanic
## 0.6697490 0.8695652 0.8149883 0.8406250
## Native_American Other
## 0.8333333 0.8858447
fobject$cutoff$ranger## $African_American
## [1] 0.5
##
## $Asian
## [1] 0.5
##
## $Caucasian
## [1] 0.5
##
## $Hispanic
## [1] 0.5
##
## $Native_American
## [1] 0.5
##
## $Other
## [1] 0.5
sm <- stack_metrics(fobject)## Warning in drop_metrics_with_na(parity_loss_metric_data): Found metric with NA: TNR, NPV, FNR, FOR, omiting it
plot(sm)cm <- choose_metric(fobject, "TPR")
plot(cm)fair_pca <- fairness_pca(fobject)## Warning in drop_metrics_with_na(data): Found metric with NA: TNR, NPV, FNR, FOR, omiting it
print(fair_pca)## Fairness PCA :
## PC1 PC2 PC3 PC4 PC5
## [1,] 0.2568924 1.2523889 -0.568732704 -0.19759040 0.000000e+00
## [2,] -2.8494529 -1.6650431 -0.002735714 -0.15273510 0.000000e+00
## [3,] 0.3387039 1.0850158 0.630993933 -0.15582060 -2.289835e-16
## [4,] 3.6459225 -1.2643264 -0.031416445 0.07402038 3.330669e-16
## [5,] -1.3920659 0.5919648 -0.028109070 0.43212572 6.938894e-16
##
## Created with:
## [1] "ranger" "lm" "ranger_2" "ranger_3" "gbm"
##
## First two components explained 97 % of variance.
plot(fair_pca)fheatmap <- fairness_heatmap(fobject)
plot(fheatmap, text_size = 3)## Warning: Removed 4 rows containing missing values (geom_text).
fap <- performance_and_fairness(fobject, fairness_metric = "STP")## Performace metric is NULL, setting deafult ( accuracy )
##
## Creating object with:
## Fairness metric: STP
## Performance metric: accuracy
plot(fap)fobject2 <- fairness_check(explainer_1,explainer_2,
protected = compas$Ethnicity,
privileged = "Caucasian",
verbose = FALSE)
gm <- group_metric(fobject2, fairness_metric = "FPR")## Performace metric not given, setting deafult ( accuracy )
##
## Creating object with:
## Fairness metric: FPR
## Performance metric: accuracy
plot(gm)fradar <- fairness_radar(fobject2)## Warning in fairness_radar(fobject2): Found metric with NA: TNR, NPV, ommiting it
plot(fradar)ac <- all_cutoffs(fobject2)
plot(ac)## Warning: Removed 26 row(s) containing missing values (geom_path).
cpc <- ceteris_paribus_cutoff(fobject2, subgroup = "African_American")
plot(cpc)## Warning: Removed 15 row(s) containing missing values (geom_path).
library(equatiomatic)## Warning: package 'equatiomatic' was built under R version 3.6.3
m <- lm(bill_length_mm ~ bill_depth_mm + flipper_length_mm, penguins)
extract_eq(m)\[ \operatorname{bill\_length\_mm} = \alpha + \beta_{1}(\operatorname{bill\_depth\_mm}) + \beta_{2}(\operatorname{flipper\_length\_mm}) + \epsilon \]
m2 <- lm(bill_length_mm ~ bill_depth_mm*island, penguins)
extract_eq(m2, wrap = TRUE)\[ \begin{aligned} \operatorname{bill\_length\_mm} &= \alpha + \beta_{1}(\operatorname{bill\_depth\_mm}) + \beta_{2}(\operatorname{island}_{\operatorname{Dream}}) + \beta_{3}(\operatorname{island}_{\operatorname{Torgersen}})\ + \\ &\quad \beta_{4}(\operatorname{bill\_depth\_mm} \times \operatorname{island}_{\operatorname{Dream}}) + \beta_{5}(\operatorname{bill\_depth\_mm} \times \operatorname{island}_{\operatorname{Torgersen}}) + \epsilon \end{aligned} \]