Data Issues
library(caret)
library(tidyverse)
# load COMPAS data
compas.df <- mlba::COMPAS_clean %>%
select(-id) %>%
mutate(
age_cat = factor(age_cat, levels=c("Less than 25", "25 - 45", "Greater than 45")),
c_charge_degree = factor(c_charge_degree, levels=c("F", "M")),
race = factor(race, levels=c("African-American", "Asian", "Caucasian", "Hispanic",
"Native American", "Other")),
sex = factor(sex, levels=c("Female", "Male")),
two_year_recid = factor(two_year_recid, levels=c(0, 1), labels=c("No", "Yes"))
)
# split dataset and train models
set.seed(1)
idx <- createDataPartition(compas.df$two_year_recid, p=0.7, list=FALSE)
train.df <- compas.df[idx, ]
valid.df <- compas.df[-idx, ]
trControl <- trainControl(method="cv", number=5, allowParallel=TRUE)
# logistic regression model
logreg.model <- train(two_year_recid ~ . - race, data=train.df,
method="glm", family="binomial", trControl=trControl)
# random forest model
rf.model <- train(two_year_recid ~ . - race, data=train.df,
method="rf", trControl=trControl)
# extract coefficients and calculate odds shown in table
logreg.coef <- coef(logreg.model$finalModel)
data.frame(
coefficient=logreg.coef,
odds=c(NA, exp(logreg.coef)[-c(1)])
) %>% round(3)
#> coefficient odds
#> (Intercept) -0.619 NA
#> `age_cat25 - 45` -0.717 0.488
#> `age_catGreater than 45` -1.525 0.218
#> c_charge_degreeM -0.239 0.788
#> sexMale 0.433 1.542
#> priors_count 0.162 1.176
# calculation of model accuracy based on cross-validation results
caret::confusionMatrix(logreg.model)
#> Cross-Validated (5 fold) Confusion Matrix
#>
#> (entries are percentual average cell counts across resamples)
#>
#> Reference
#> Prediction No Yes
#> No 57.0 24.2
#> Yes 6.4 12.4
#>
#> Accuracy (average) : 0.6947
caret::confusionMatrix(rf.model)
#> Cross-Validated (5 fold) Confusion Matrix
#>
#> (entries are percentual average cell counts across resamples)
#>
#> Reference
#> Prediction No Yes
#> No 54.8 21.6
#> Yes 8.6 15.0
#>
#> Accuracy (average) : 0.6974
Auditing the Model
library(ROCR)
holdoutMetrics <- function(df, model) {
result <- data.frame(obs = df$two_year_recid, pred = predict(model, newdata=df),
prob = predict(model, newdata=df, type="prob")$Yes)
pred <- prediction(result$prob, result$obs)
# compute overall performance
perf_AUC <- performance(pred, "auc")
AUC <- perf_AUC@y.values[[1]]
cm <- confusionMatrix(result$pred, result$obs, positive="Yes")
return (tibble(AUC=AUC, Accuracy = cm$overall["Accuracy"],
FPR = 100*(1-cm$byClass["Specificity"]),
FNR = 100*(1-cm$byClass["Sensitivity"])))
}
# compute performance by race
metricsByRace <- function(model) {
metrics <- tibble()
for (raceValue in levels(compas.df$race)) {
df <- compas.df %>% filter(race==raceValue)
metrics <- bind_rows(metrics, tibble(race=raceValue, holdoutMetrics(df, model)))
}
return (metrics)
}
# combine metrics for logistic and random forest
metrics <- bind_rows(
tibble(Model="Random forest", metricsByRace(rf.model)),
tibble(Model="Logistic regression", metricsByRace(logreg.model))
) %>% filter(! race %in% c("Asian", "Native American"))
library(gridExtra)
makeBarchart <- function(metrics, aesthetics) {
g <- ggplot(metrics, aesthetics) +
geom_bar(position="dodge", stat="identity") +
geom_text(hjust=1.5, position=position_dodge(.9)) +
coord_flip() +
scale_x_discrete(limits=rev) +
labs(x="Race") +
theme_bw()
return (g)
}
g1 <- makeBarchart(metrics, aes(x=race, y=Accuracy, fill=Model, label=round(Accuracy, 3))) +
theme(legend.position="none")
g2 <- makeBarchart(metrics, aes(x=race, y=AUC, fill=Model, label=round(AUC, 3))) +
theme(legend.position="bottom")
grid.arrange(g1, g2, nrow=2, heights=c(6.25, 7))

g <- arrangeGrob(g1, g2, heights=c(6.25, 7))
ggsave(file=file.path(getwd(), "figures", "chapter_22", "c22_acc_auc.pdf"),
g, width=5, height=5, units="in")
g1 <- makeBarchart(metrics, aes(x=race, y=FPR, fill=Model, label=round(FPR, 3))) +
theme(legend.position="none")
g2 <- makeBarchart(metrics, aes(x=race, y=FNR, fill=Model, label=round(FNR, 3))) +
theme(legend.position="bottom")
grid.arrange(g1, g2, heights=c(6.25, 7))

g <- arrangeGrob(g1, g2, heights=c(6.25, 7))
ggsave(file=file.path(getwd(), "figures", "chapter_22", "c22_fpr_fnr.pdf"),
g, width=5, height=5, units="in")
Interpretability Methods
library(iml)
predictor.rf = Predictor$new(rf.model, data=valid.df, y=valid.df$two_year_recid)
predictor.lm = Predictor$new(logreg.model, data=valid.df, y=valid.df$two_year_recid)
featureEffect.lm = FeatureEffect$new(predictor.lm, feature='priors_count', method='pdp')
featureEffect.rf = FeatureEffect$new(predictor.rf, feature='priors_count', method='pdp')
combined <- bind_rows(
tibble(Method="Logistic regression", featureEffect.lm$results %>% filter(.class=="Yes")),
tibble(Method="Random forest", featureEffect.rf$results %>% filter(.class=="Yes"))
)
ggplot(combined, aes(x=priors_count, y=.value, color=Method)) +
geom_line() +
labs(x="Feature value", y="Probability of recidivism")

ggsave(file=file.path(getwd(), "figures", "chapter_22", "c22f005.pdf"),
last_plot() + theme_bw(), width=5, height=3, units="in")
library(iml)
predictor.rf = Predictor$new(rf.model, data=valid.df, y=valid.df$two_year_recid)
predictor.lm = Predictor$new(logreg.model, data=valid.df, y=valid.df$two_year_recid)
# permutation feature importance
FeatureImp$new(predictor.lm, "ce", compare="ratio", n.repetitions=5)
#> Interpretation method: FeatureImp
#> error function: ce
#>
#> Analysed predictor:
#> Prediction task: classification
#> Classes:
#>
#> Analysed data:
#> Sampling from data.frame with 1590 rows and 6 columns.
#>
#>
#> Head of results:
#> feature importance.05 importance importance.95 permutation.error
#> 1 priors_count 1.2871369 1.2966805 1.321577 0.3930818
#> 2 age_cat 1.0481328 1.0622407 1.090871 0.3220126
#> 3 c_charge_degree 0.9896266 1.0000000 1.012863 0.3031447
#> 4 race 1.0000000 1.0000000 1.000000 0.3031447
#> 5 two_year_recid 1.0000000 1.0000000 1.000000 0.3031447
#> 6 sex 0.9850622 0.9917012 1.001660 0.3006289
FeatureImp$new(predictor.rf, "ce", compare="ratio", n.repetitions=5)
#> Interpretation method: FeatureImp
#> error function: ce
#>
#> Analysed predictor:
#> Prediction task: classification
#> Classes:
#>
#> Analysed data:
#> Sampling from data.frame with 1590 rows and 6 columns.
#>
#>
#> Head of results:
#> feature importance.05 importance importance.95 permutation.error
#> 1 priors_count 1.3425577 1.3773585 1.391195 0.4132075
#> 2 age_cat 1.0637317 1.0838574 1.085954 0.3251572
#> 3 race 1.0000000 1.0000000 1.000000 0.3000000
#> 4 two_year_recid 1.0000000 1.0000000 1.000000 0.3000000
#> 5 sex 0.9861635 0.9958071 1.014256 0.2987421
#> 6 c_charge_degree 0.9861635 0.9916143 1.021803 0.2974843