departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
factors_vec <- departures %>% 
    select(departure_code, co_per_rol, fyear, tenure_no_ceodb, max_tenure_ceodb, fyear_gone) %>% 
    names()

library(dplyr)
library(lubridate)

data_clean <- departures %>% 
    select(-c(interim_coceo, still_there, eight_ks, gvkey, co_per_rol, cik, fyear, '_merge', notes, sources)) %>%
    filter(fyear_gone != "2997") %>%
    filter(!is.na(ceo_dismissal)) %>%
    mutate(
        departure_code = factor(departure_code),
        tenure_no_ceodb = factor(tenure_no_ceodb),
        max_tenure_ceodb = factor(max_tenure_ceodb),
        ceo_dismissal = factor(ceo_dismissal),
        leftofc = as.Date(leftofc),  # Ensure leftofc is a Date
        year = year(leftofc),         # Create year directly
        doy = yday(leftofc),          # Create day of the year directly
        month = month(leftofc)        # Create month directly
    ) %>%
    select(-leftofc) %>%            # Remove leftofc as it's no longer needed
    # Drop zero-variance variables
    select(-c(tenure_no_ceodb, max_tenure_ceodb)) %>%
    # Ensure ceo_dismissal is character, then recode
    mutate(ceo_dismissal = if_else(ceo_dismissal == "1", "dismissed",
                             if_else(ceo_dismissal == "0", "not dismissed",
                             as.character(ceo_dismissal))))  # Handle NA implicitly

 data_clean <- data_clean %>% sample_n(100) 

Exlpore data

skimr::skim(departures)
Data summary
Name departures
Number of rows 9423
Number of columns 19
_______________________
Column type frequency:
character 8
numeric 10
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
coname 0 1.00 2 30 0 3860 0
exec_fullname 0 1.00 5 790 0 8701 0
interim_coceo 9105 0.03 6 7 0 6 0
still_there 7311 0.22 3 10 0 77 0
notes 1644 0.83 5 3117 0 7755 0
sources 1475 0.84 18 1843 0 7915 0
eight_ks 4499 0.52 69 3884 0 4914 0
_merge 0 1.00 11 11 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1.00 5684.10 25005.46 1 2305.5 4593 6812.5 559044 ▇▁▁▁▁
gvkey 0 1.00 40132.48 53921.34 1004 7337.0 14385 60900.5 328795 ▇▁▁▁▁
fyear 0 1.00 2007.74 8.19 1987 2000.0 2008 2016.0 2020 ▁▆▅▅▇
co_per_rol 0 1.00 25580.22 18202.38 -1 8555.5 22980 39275.5 64602 ▇▆▅▃▃
departure_code 1667 0.82 5.20 1.53 1 5.0 5 7.0 9 ▁▃▇▅▁
ceo_dismissal 1813 0.81 0.20 0.40 0 0.0 0 0.0 1 ▇▁▁▁▂
tenure_no_ceodb 0 1.00 1.03 0.17 0 1.0 1 1.0 3 ▁▇▁▁▁
max_tenure_ceodb 0 1.00 1.05 0.24 1 1.0 1 1.0 4 ▇▁▁▁▁
fyear_gone 1802 0.81 2006.64 13.63 1980 2000.0 2007 2013.0 2997 ▇▁▁▁▁
cik 245 0.97 741469.17 486551.43 1750 106413.0 857323 1050375.8 1808065 ▆▁▇▂▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
leftofc 1802 0.81 1981-01-01 2998-04-27 2006-12-31 3627
data_clean %>% count(ceo_dismissal)
## # A tibble: 2 × 2
##   ceo_dismissal     n
##   <chr>         <int>
## 1 dismissed        15
## 2 not dismissed    85
data_clean %>%
    ggplot(aes(ceo_dismissal)) +
    geom_bar()

correlation plot

# Step 1: binarize
data_binarized <- data_clean %>%
    select(-exec_fullname, -coname) %>%
    binarize()

data_binarized %>% glimpse()
## Rows: 100
## Columns: 26
## $ `dismissal_dataset_id__-Inf_2479.75` <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ dismissal_dataset_id__2479.75_5027   <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, …
## $ dismissal_dataset_id__5027_6993.5    <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, …
## $ dismissal_dataset_id__6993.5_Inf     <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ departure_code__3                    <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code__5                    <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, …
## $ departure_code__6                    <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ departure_code__7                    <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, …
## $ ceo_dismissal__dismissed             <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ceo_dismissal__not_dismissed         <dbl> 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ `fyear_gone__-Inf_2000.75`           <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, …
## $ fyear_gone__2000.75_2008             <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, …
## $ fyear_gone__2008_2015                <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, …
## $ fyear_gone__2015_Inf                 <dbl> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, …
## $ `year__-Inf_2000.75`                 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, …
## $ year__2000.75_2008                   <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, …
## $ year__2008_2015                      <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ year__2015_Inf                       <dbl> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, …
## $ `doy__-Inf_71`                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ doy__71_154.5                        <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, …
## $ doy__154.5_278.25                    <dbl> 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ doy__278.25_Inf                      <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, …
## $ `month__-Inf_3`                      <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, …
## $ month__3_6                           <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, …
## $ month__6_10                          <dbl> 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, …
## $ month__10_Inf                        <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
# Step 2: correlation
data_correlation <- data_binarized %>%
    correlate(ceo_dismissal__dismissed)

data_correlation
## # A tibble: 26 × 3
##    feature              bin           correlation
##    <fct>                <chr>               <dbl>
##  1 departure_code       3                   1    
##  2 ceo_dismissal        dismissed           1    
##  3 ceo_dismissal        not_dismissed      -1    
##  4 departure_code       5                  -0.484
##  5 departure_code       7                  -0.249
##  6 dismissal_dataset_id -Inf_2479.75        0.210
##  7 month                10_Inf             -0.210
##  8 doy                  71_154.5            0.146
##  9 month                3_6                 0.123
## 10 dismissal_dataset_id 5027_6993.5        -0.113
## # ℹ 16 more rows
# Step 3: plot
data_correlation %>%
    correlationfunnel::plot_correlation_funnel()

Model building

Split data

library(tidymodels)
## Warning: package 'broom' was built under R version 4.3.3
## Warning: package 'modeldata' was built under R version 4.3.3
## Warning: package 'recipes' was built under R version 4.3.3
set.seed(1234)
#data_clean <- data_clean %>% sample_n(100)

data_split <- initial_split(data_clean, strata = ceo_dismissal)
data_train <- training(data_split)
data_test <- testing(data_split)

data_cv <- rsample::vfold_cv(data_train, strata = ceo_dismissal)
data_cv
## #  10-fold cross-validation using stratification 
## # A tibble: 10 × 2
##    splits         id    
##    <list>         <chr> 
##  1 <split [65/9]> Fold01
##  2 <split [66/8]> Fold02
##  3 <split [66/8]> Fold03
##  4 <split [67/7]> Fold04
##  5 <split [67/7]> Fold05
##  6 <split [67/7]> Fold06
##  7 <split [67/7]> Fold07
##  8 <split [67/7]> Fold08
##  9 <split [67/7]> Fold09
## 10 <split [67/7]> Fold10
data_train <- data_train %>%
  mutate(unique_id = paste(dismissal_dataset_id, exec_fullname, year, sep = "_")) %>%
  group_by(unique_id) %>%
  summarize(across(everything(), first))  # Aggregating duplicate entries

Preprocess data

library(themis)
library(recipes)

# Remove unique_id from data_train before creating the recipe
data_train_cleaned <- data_train %>%
  select(-unique_id)

# Create the recipe using the cleaned dataset
xgboost_rec <- recipe(ceo_dismissal ~ ., data = data_train_cleaned) %>%
  step_dummy(all_nominal_predictors(), -all_outcomes()) %>%
  step_smote(ceo_dismissal)

# Prepare and check the recipe
xgboost_rec_prep <- xgboost_rec %>% prep()
data_prepped <- xgboost_rec_prep %>% juice() %>% glimpse()
## Rows: 126
## Columns: 156
## $ dismissal_dataset_id                    <dbl> 1024, 1167, 1276, 1433, 1457, …
## $ fyear_gone                              <dbl> 2020, 2009, 2015, 2003, 2017, …
## $ year                                    <dbl> 2020, 2009, 2015, 2003, 2017, …
## $ doy                                     <dbl> 245, 274, 293, 344, 152, 65, 1…
## $ month                                   <dbl> 9, 10, 10, 12, 6, 3, 5, 9, 7, …
## $ coname_AEROJET.ROCKETDYNE.HOLDINGS      <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ coname_ALTABA.INC                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ANCHOR.GAMING                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_APPLIED.INDUSTRIAL.TECH.INC      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_AVON.PRODUCTS                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BANK.OF.HAWAII.CORP              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BEST.BUY.CO.INC                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BIOLASE.INC                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BIOMATRIX.INC                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BUFFETS.HOLDINGS.INC             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CAPELLA.EDUCATION.CO             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CARE.COM.INC                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CATALYST.HEALTH.SOLUTIONS        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CDW.CORP                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CEC.ENTERTAINMENT.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CEDAR.REALTY.TRUST.INC           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_DANAHER.CORP                     <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_DIPLOMAT.PHARMACY.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ENERSYS                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_EOG.RESOURCES.INC                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_EXAR.CORP                        <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FIRSTENERGY.CORP                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FLOWSERVE.CORP                   <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FLUOR.DANIEL.GTI.INC             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FREEPORT.MCMORAN.INC             <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ coname_FREESCALE.SEMICONDUCTOR.INC      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FRONTIER.INSURANCE.GROUP.INC     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_HARLEY.DAVIDSON.INC              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_HNI.CORP                         <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ coname_INGRAM.MICRO.INC                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_INTEGRAL.SYSTEMS.INC             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_INTERGRAPH.CORP                  <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ coname_MARRIOTT.INTL.INC                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MARTEK.BIOSCIENCES.CORP          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MASCO.CORP                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ coname_MBIA.INC                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MENTOR.CORP                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ coname_MOLSON.COORS.BREWING.CO          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MOORE.WALLACE.INC                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MULTIMEDIA.GAMES.HOLDING.CO      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NATIONAL.RETAIL.PROPERTIES       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NATURES.SUNSHINE.PRODS.INC       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NAUTILUS.INC                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NCI.BUILDING.SYSTEMS.INC         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ORBITAL.ATK.INC                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_PANERA.BREAD.CO                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_PEPSI.BOTTLING.GROUP.INC         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_POTLATCHDELTIC.CORP              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_QLOGIC.CORP                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_QRS.CORP                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_RCSB.FINANCIAL.INC               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ROADRUNNER.TRANS.SYSTEMS.INC     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_RUBICON.TECHNOLOGY.INC           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SABRE.HOLDINGS.CORP..CL.A        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SAFETY.KLEEN.CORP.OLD            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SEAHAWK.DRILLING.INC             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SRA.INTERNATIONAL.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_STAGE.STORES.INC                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SWK.HOLDINGS.CORP                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_TEGNA.INC                        <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ coname_TEMPUR.SEALY.INTL.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_TERADATA.CORP                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_TRICORD.SYSTEMS.INC              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_U.S.ROBOTICS.CORP                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_UNION.PLANTERS.CORP              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_VIEWLOGIC.SYSTEMS.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_VOLT.INFO.SCIENCES.INC           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_WEBSTER.FINANCIAL.CORP           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_WEC.ENERGY.GROUP.INC             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_WPX.ENERGY.INC                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_XL.GROUP.LTD                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Alain.Moni                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Alan.D..Kennedy           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Anthony.J..Alexander      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Anthony.M..Sanfilippo     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Bradbury.H..Anderson      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Brian.R..Cook             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Casey.G..Cowell           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Christopher.J..Conway     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exec_fullname_Craig.E..Weatherup        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Craig.Macnab              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Daniel.J..Murphy.Jr.      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_David.H..Elliott          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_David.T..Blair            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Derrick.R..Meyer          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Donald.R..Riley           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Donald.W..Brinckman       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Endre.A..Balazs           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Eric.J..Foss              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Ernst.Volgenau            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Forrest.E..Hoglund        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Gracia.Catherine.Martore  <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ exec_fullname_H..Thomas.Bryant          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Harry.W..Rhulen           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Henry.Linsert.Jr.         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Jack.D..Michaels          <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ exec_fullname_Jackson.W..Moore.Jr.      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_James.Copenhaver.Smith    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_James.R..Moffett.Jr.      <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_James.R..Scarborough      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Jean.Hu                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.C..Dannemiller       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.D..Craig             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.F..Nemelka           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.J..Mitcham           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.S..Simon             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.Willard.Marriott.Jr. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Lawrence.M..Johnson       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Leo.S..Ullman             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Leonard.S..Simon          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Lewis.Mark.Kling          <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Louis.DiNardo.BA          <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Mark.A..DiBlasi           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.D..Rumbolz        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.J..Covey          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.P..Krasny         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.S..McGavick       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.Sam.Gilliland     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Peter.H..Coors            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Peter.J..Gaffney          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Philip.R..Hagerman        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_R..Halsey.Wise            <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ exec_fullname_R..Michael.Andrews..Jr.   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Ralph.A..Hill             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Randall.D..Stilley        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Reto.Braun                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.A..Abdoo          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.A..Manoogian      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ exec_fullname_Richard.F..Teerlink       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.M..Beyer          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.M..Frank          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Robert.E..Grant           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Ronald.Kochman            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Sheila.Lirio.Marcelo      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Sherilyn.S..D..McCoy      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Stephen.G..Shank          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Terry.L..Hall.CPA.PFS     <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ exec_fullname_Thomas.P..Joyce.Jr.       <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Timothy.R..Morse          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Victor.Lynn.Lund          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Walter.C..Barber          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_William.F..Weissman       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_William.K..Coors          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_William.W..Moreton        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X2                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X3                       <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X4                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X5                       <dbl> 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ departure_code_X6                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X7                       <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ ceo_dismissal                           <fct> not dismissed, not dismissed, …

Specify model

xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
    loss_reduction = tune(), sample_size = tune()) %>% 
  set_mode("classification") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_rec) %>% 
  add_model(xgboost_spec) 

Tune hyperparameters

doParallel::registerDoParallel()

set.seed(65743)
xgboost_tune <-
  tune_grid(xgboost_workflow, 
            resamples = data_cv,
            grid = 5,
            control = control_grid(save_pred = TRUE))
## Warning: package 'xgboost' was built under R version 4.3.3

Model Evaluation

Identify Optimal Values for Hyperparameters

collect_metrics(xgboost_tune)
## # A tibble: 15 × 12
##    trees min_n tree_depth learn_rate loss_reduction sample_size .metric    
##    <int> <int>      <int>      <dbl>          <dbl>       <dbl> <chr>      
##  1   725     3          5    0.0276   0.00000000248       0.841 accuracy   
##  2   725     3          5    0.0276   0.00000000248       0.841 brier_class
##  3   725     3          5    0.0276   0.00000000248       0.841 roc_auc    
##  4  1354    17         13    0.00495  0.00402             0.424 accuracy   
##  5  1354    17         13    0.00495  0.00402             0.424 brier_class
##  6  1354    17         13    0.00495  0.00402             0.424 roc_auc    
##  7   941    24          9    0.266    0.000675            0.637 accuracy   
##  8   941    24          9    0.266    0.000675            0.637 brier_class
##  9   941    24          9    0.266    0.000675            0.637 roc_auc    
## 10    85    27          1    0.00212  0.277               0.187 accuracy   
## 11    85    27          1    0.00212  0.277               0.187 brier_class
## 12    85    27          1    0.00212  0.277               0.187 roc_auc    
## 13  1712    38         12    0.0446   0.0000000705        0.718 accuracy   
## 14  1712    38         12    0.0446   0.0000000705        0.718 brier_class
## 15  1712    38         12    0.0446   0.0000000705        0.718 roc_auc    
## # ℹ 5 more variables: .estimator <chr>, mean <dbl>, n <int>, std_err <dbl>,
## #   .config <chr>
collect_predictions(xgboost_tune) %>%
    group_by(id) %>%
    roc_curve(ceo_dismissal, .pred_dismissed) %>%
    autoplot()

Fit the Model for the Last Time

xgboost_last <- xgboost_workflow %>%
    finalize_workflow(select_best(xgboost_tune, metric = "accuracy")) %>%
    last_fit(data_split)

collect_metrics(xgboost_last)
## # A tibble: 3 × 4
##   .metric     .estimator .estimate .config             
##   <chr>       <chr>          <dbl> <chr>               
## 1 accuracy    binary       1       Preprocessor1_Model1
## 2 roc_auc     binary       1       Preprocessor1_Model1
## 3 brier_class binary       0.00294 Preprocessor1_Model1
collect_predictions(xgboost_last) %>%
    yardstick::conf_mat(ceo_dismissal, .pred_class)
##                Truth
## Prediction      dismissed not dismissed
##   dismissed             4             0
##   not dismissed         0            22

Variable Importance

library(vip)

xgboost_last %>%
    workflows::extract_fit_engine() %>%
    vip()