if (!require(mlba)) {
  library(devtools)
  install_github("gedeck/mlba/mlba", force=TRUE)
}

Example: Applying the RDS Framework to the COMPAS Example

Data Issues

library(caret)
library(tidyverse)
# load COMPAS data
compas.df <- mlba::COMPAS_clean %>%
  select(-id) %>%
  mutate(
    age_cat = factor(age_cat, levels=c("Less than 25", "25 - 45", "Greater than 45")),
    c_charge_degree = factor(c_charge_degree, levels=c("F", "M")),
    race = factor(race, levels=c("African-American", "Asian", "Caucasian", "Hispanic",
                                 "Native American", "Other")),
    sex = factor(sex, levels=c("Female", "Male")),
    two_year_recid = factor(two_year_recid, levels=c(0, 1), labels=c("No", "Yes"))
  )

# split dataset and train models
set.seed(1)
idx <- createDataPartition(compas.df$two_year_recid, p=0.7, list=FALSE)
train.df <- compas.df[idx, ]
valid.df <- compas.df[-idx, ]
trControl <- trainControl(method="cv", number=5, allowParallel=TRUE)
# logistic regression model
logreg.model <- train(two_year_recid ~ . - race, data=train.df,
                      method="glm", family="binomial", trControl=trControl)
# random forest model
rf.model <- train(two_year_recid ~ . - race, data=train.df,
                         method="rf", trControl=trControl)

# extract coefficients and calculate odds shown in table
logreg.coef <- coef(logreg.model$finalModel)
data.frame(
  coefficient=logreg.coef,
  odds=c(NA, exp(logreg.coef)[-c(1)])
) %>% round(3)
#>                          coefficient  odds
#> (Intercept)                   -0.619    NA
#> `age_cat25 - 45`              -0.717 0.488
#> `age_catGreater than 45`      -1.525 0.218
#> c_charge_degreeM              -0.239 0.788
#> sexMale                        0.433 1.542
#> priors_count                   0.162 1.176
# calculation of model accuracy based on cross-validation results
caret::confusionMatrix(logreg.model)
#> Cross-Validated (5 fold) Confusion Matrix 
#> 
#> (entries are percentual average cell counts across resamples)
#>  
#>           Reference
#> Prediction   No  Yes
#>        No  57.0 24.2
#>        Yes  6.4 12.4
#>                             
#>  Accuracy (average) : 0.6947
caret::confusionMatrix(rf.model)
#> Cross-Validated (5 fold) Confusion Matrix 
#> 
#> (entries are percentual average cell counts across resamples)
#>  
#>           Reference
#> Prediction   No  Yes
#>        No  54.8 21.6
#>        Yes  8.6 15.0
#>                             
#>  Accuracy (average) : 0.6974

Auditing the Model

library(ROCR)
holdoutMetrics <- function(df, model) {
  result <- data.frame(obs = df$two_year_recid, pred = predict(model, newdata=df),
    prob = predict(model, newdata=df, type="prob")$Yes)
  pred <- prediction(result$prob, result$obs)
  # compute overall performance
  perf_AUC <- performance(pred, "auc")
  AUC <- perf_AUC@y.values[[1]]
  cm <- confusionMatrix(result$pred, result$obs, positive="Yes")
  return (tibble(AUC=AUC, Accuracy = cm$overall["Accuracy"],
    FPR = 100*(1-cm$byClass["Specificity"]),
    FNR = 100*(1-cm$byClass["Sensitivity"])))
}
# compute performance by race
metricsByRace <- function(model) {
  metrics <- tibble()
  for (raceValue in levels(compas.df$race)) {
    df <- compas.df %>% filter(race==raceValue)
    metrics <- bind_rows(metrics, tibble(race=raceValue, holdoutMetrics(df, model)))
  }
  return (metrics)
}
# combine metrics for logistic and random forest
metrics <- bind_rows(
  tibble(Model="Random forest", metricsByRace(rf.model)),
  tibble(Model="Logistic regression", metricsByRace(logreg.model))
) %>% filter(! race %in% c("Asian", "Native American"))
library(gridExtra)
makeBarchart <- function(metrics, aesthetics) {
  g <- ggplot(metrics, aesthetics) +
    geom_bar(position="dodge", stat="identity") +
    geom_text(hjust=1.5, position=position_dodge(.9)) +
    coord_flip() +
    scale_x_discrete(limits=rev) +
    labs(x="Race") +
    theme_bw()
  return (g)
}
g1 <- makeBarchart(metrics, aes(x=race, y=Accuracy, fill=Model, label=round(Accuracy, 3))) +
  theme(legend.position="none")
g2 <- makeBarchart(metrics, aes(x=race, y=AUC, fill=Model, label=round(AUC, 3))) +
  theme(legend.position="bottom")
grid.arrange(g1, g2, nrow=2, heights=c(6.25, 7))

g <- arrangeGrob(g1, g2, heights=c(6.25, 7))
ggsave(file=file.path(getwd(), "figures", "chapter_22", "c22_acc_auc.pdf"),
       g, width=5, height=5, units="in")

g1 <- makeBarchart(metrics, aes(x=race, y=FPR, fill=Model, label=round(FPR, 3))) +
  theme(legend.position="none")
g2 <- makeBarchart(metrics, aes(x=race, y=FNR, fill=Model, label=round(FNR, 3))) +
  theme(legend.position="bottom")
grid.arrange(g1, g2, heights=c(6.25, 7))

g <- arrangeGrob(g1, g2, heights=c(6.25, 7))
ggsave(file=file.path(getwd(), "figures", "chapter_22", "c22_fpr_fnr.pdf"),
    g, width=5, height=5, units="in")

Interpretability Methods

library(iml)
predictor.rf = Predictor$new(rf.model, data=valid.df, y=valid.df$two_year_recid)
predictor.lm = Predictor$new(logreg.model, data=valid.df, y=valid.df$two_year_recid)
featureEffect.lm = FeatureEffect$new(predictor.lm, feature='priors_count', method='pdp')
featureEffect.rf = FeatureEffect$new(predictor.rf, feature='priors_count', method='pdp')
combined <- bind_rows(
  tibble(Method="Logistic regression", featureEffect.lm$results %>% filter(.class=="Yes")),
  tibble(Method="Random forest", featureEffect.rf$results %>% filter(.class=="Yes"))
)
ggplot(combined, aes(x=priors_count, y=.value, color=Method)) +
  geom_line() +
  labs(x="Feature value", y="Probability of recidivism")

ggsave(file=file.path(getwd(), "figures", "chapter_22", "c22f005.pdf"),
  last_plot() + theme_bw(), width=5, height=3, units="in")
library(iml)
predictor.rf = Predictor$new(rf.model, data=valid.df, y=valid.df$two_year_recid)
predictor.lm = Predictor$new(logreg.model, data=valid.df, y=valid.df$two_year_recid)

# permutation feature importance
FeatureImp$new(predictor.lm, "ce", compare="ratio", n.repetitions=5)
#> Interpretation method:  FeatureImp 
#> error function: ce
#> 
#> Analysed predictor: 
#> Prediction task: classification 
#> Classes:  
#> 
#> Analysed data:
#> Sampling from data.frame with 1590 rows and 6 columns.
#> 
#> 
#> Head of results:
#>           feature importance.05 importance importance.95 permutation.error
#> 1    priors_count     1.2871369  1.2966805      1.321577         0.3930818
#> 2         age_cat     1.0481328  1.0622407      1.090871         0.3220126
#> 3 c_charge_degree     0.9896266  1.0000000      1.012863         0.3031447
#> 4            race     1.0000000  1.0000000      1.000000         0.3031447
#> 5  two_year_recid     1.0000000  1.0000000      1.000000         0.3031447
#> 6             sex     0.9850622  0.9917012      1.001660         0.3006289
FeatureImp$new(predictor.rf, "ce", compare="ratio", n.repetitions=5)
#> Interpretation method:  FeatureImp 
#> error function: ce
#> 
#> Analysed predictor: 
#> Prediction task: classification 
#> Classes:  
#> 
#> Analysed data:
#> Sampling from data.frame with 1590 rows and 6 columns.
#> 
#> 
#> Head of results:
#>           feature importance.05 importance importance.95 permutation.error
#> 1    priors_count     1.3425577  1.3773585      1.391195         0.4132075
#> 2         age_cat     1.0637317  1.0838574      1.085954         0.3251572
#> 3            race     1.0000000  1.0000000      1.000000         0.3000000
#> 4  two_year_recid     1.0000000  1.0000000      1.000000         0.3000000
#> 5             sex     0.9861635  0.9958071      1.014256         0.2987421
#> 6 c_charge_degree     0.9861635  0.9916143      1.021803         0.2974843