Goal is to to predict CEO departure (ceo_dismissal).

Import Data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(correlationfunnel)
## ══ correlationfunnel Tip #1 ════════════════════════════════════════════════════
## Make sure your data is not overly imbalanced prior to using `correlate()`.
## If less than 5% imbalance, consider sampling. :)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom        1.0.6     ✔ rsample      1.2.1
## ✔ dials        1.3.0     ✔ tune         1.2.1
## ✔ infer        1.0.7     ✔ workflows    1.1.4
## ✔ modeldata    1.4.0     ✔ workflowsets 1.1.0
## ✔ parsnip      1.2.1     ✔ yardstick    1.3.2
## ✔ recipes      1.1.1     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
library(textrecipes)
library(tidytext)
library(usemodels)
library(xgboost)
## 
## Attaching package: 'xgboost'
## 
## The following object is masked from 'package:dplyr':
## 
##     slice
library(themis)
library(doParallel)
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## 
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## 
## Loading required package: iterators
## Loading required package: parallel
data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-04-27/departures.csv')
## Rows: 9423 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (8): coname, exec_fullname, interim_coceo, still_there, notes, sources...
## dbl  (10): dismissal_dataset_id, gvkey, fyear, co_per_rol, departure_code, c...
## dttm  (1): leftofc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data %>% skimr::skim()
Data summary
Name Piped data
Number of rows 9423
Number of columns 19
_______________________
Column type frequency:
character 8
numeric 10
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
coname 0 1.00 2 30 0 3860 0
exec_fullname 0 1.00 5 790 0 8701 0
interim_coceo 9105 0.03 6 7 0 6 0
still_there 7311 0.22 3 10 0 77 0
notes 1644 0.83 5 3117 0 7755 0
sources 1475 0.84 18 1843 0 7915 0
eight_ks 4499 0.52 69 3884 0 4914 0
_merge 0 1.00 11 11 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1.00 5684.10 25005.46 1 2305.5 4593 6812.5 559044 ▇▁▁▁▁
gvkey 0 1.00 40132.48 53921.34 1004 7337.0 14385 60900.5 328795 ▇▁▁▁▁
fyear 0 1.00 2007.74 8.19 1987 2000.0 2008 2016.0 2020 ▁▆▅▅▇
co_per_rol 0 1.00 25580.22 18202.38 -1 8555.5 22980 39275.5 64602 ▇▆▅▃▃
departure_code 1667 0.82 5.20 1.53 1 5.0 5 7.0 9 ▁▃▇▅▁
ceo_dismissal 1813 0.81 0.20 0.40 0 0.0 0 0.0 1 ▇▁▁▁▂
tenure_no_ceodb 0 1.00 1.03 0.17 0 1.0 1 1.0 3 ▁▇▁▁▁
max_tenure_ceodb 0 1.00 1.05 0.24 1 1.0 1 1.0 4 ▇▁▁▁▁
fyear_gone 1802 0.81 2006.64 13.63 1980 2000.0 2007 2013.0 2997 ▇▁▁▁▁
cik 245 0.97 741469.17 486551.43 1750 106413.0 857323 1050375.8 1808065 ▆▁▇▂▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
leftofc 1802 0.81 1981-01-01 2998-04-27 2006-12-31 3627

Clean Dataset

data_clean <- data %>%
    
    # Clean the target
    filter(!is.na(ceo_dismissal)) %>%
    mutate(ceo_dismissal = if_else(ceo_dismissal == 1, "dismissed", "not_dis")) %>%
    mutate(ceo_dismissal = as.factor(ceo_dismissal)) %>%
    
    # Address too many missing values
    select(-still_there, -interim_coceo, -eight_ks, -notes) %>%
    
    # Remove irrelevant variables
    select(-`_merge`, -sources) %>%
    
    # Remove variables that can't be used
    select(-departure_code) %>%
    
    # Remove redundant variables
    select(-cik, -gvkey, -co_per_rol, -fyear, -leftofc) %>%
    
    # Remove duplicates in the id variable
    distinct(dismissal_dataset_id, .keep_all = TRUE) %>%
    
    # Remove 2997 in fyear_gone
    filter(fyear_gone < 2025) %>%
    
    # Convert character columns to factors
    mutate(across(c(tenure_no_ceodb, fyear_gone), as.factor)) %>%

    # Convert character columns to factors
    mutate(across(where(is.character), as.factor)) %>%

    # Omit missing values
    na.omit()
    
data_clean %>% skimr::skim()
Data summary
Name Piped data
Number of rows 7475
Number of columns 7
_______________________
Column type frequency:
factor 5
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
coname 0 1 FALSE 3427 BAR: 8, CLA: 8, FED: 8, GRE: 8
exec_fullname 0 1 FALSE 6975 Joh: 4, Mel: 4, Alb: 3, Ami: 3
ceo_dismissal 0 1 FALSE 2 not: 5992, dis: 1483
tenure_no_ceodb 0 1 FALSE 3 1: 7289, 2: 179, 3: 7
fyear_gone 0 1 FALSE 34 200: 379, 199: 351, 200: 334, 200: 321

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1 5570.32 25757.33 1 2175.5 4326 6579.5 559044 ▇▁▁▁▁
max_tenure_ceodb 0 1 1.05 0.23 1 1.0 1 1.0 4 ▇▁▁▁▁

Explore Data

data_clean %>% count(ceo_dismissal)
## # A tibble: 2 × 2
##   ceo_dismissal     n
##   <fct>         <int>
## 1 dismissed      1483
## 2 not_dis        5992
data_clean %>%
    ggplot(aes(ceo_dismissal)) +
    geom_bar()

ceo_dismissal vs. fyear_gone

data_clean %>%
    ggplot(aes(group = ceo_dismissal, fyear_gone)) +
    geom_boxplot()

correlation plot

# Step 1: Binarize
data_binarized <- data_clean %>%
    select(-dismissal_dataset_id) %>%
    na.omit() %>%
    binarize()

data_binarized %>% glimpse()
## Rows: 7,475
## Columns: 40
## $ coname__BARRICK_GOLD_CORP   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `coname__-OTHER`            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ exec_fullname__John_W._Rowe <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `exec_fullname__-OTHER`     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ceo_dismissal__dismissed    <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ ceo_dismissal__not_dis      <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, …
## $ tenure_no_ceodb__1          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ tenure_no_ceodb__2          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `tenure_no_ceodb__-OTHER`   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ max_tenure_ceodb__1         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ max_tenure_ceodb__2         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `max_tenure_ceodb__-OTHER`  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__1993            <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, …
## $ fyear_gone__1994            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__1995            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ fyear_gone__1996            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__1997            <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__1998            <dbl> 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__1999            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2000            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2001            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, …
## $ fyear_gone__2002            <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2003            <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2004            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2005            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2006            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2007            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ fyear_gone__2008            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2009            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2010            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2011            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2012            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2013            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2014            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2015            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2016            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2017            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2018            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear_gone__2019            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `fyear_gone__-OTHER`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
# Step 2: Correlation
data_correlation <- data_binarized %>%
    correlate(ceo_dismissal__dismissed)

data_correlation
## # A tibble: 40 × 3
##    feature          bin       correlation
##    <fct>            <chr>           <dbl>
##  1 ceo_dismissal    dismissed      1     
##  2 ceo_dismissal    not_dis       -1     
##  3 max_tenure_ceodb 1              0.0580
##  4 max_tenure_ceodb 2             -0.0536
##  5 fyear_gone       1999          -0.0391
##  6 fyear_gone       2002           0.0374
##  7 fyear_gone       2003           0.0296
##  8 fyear_gone       2009           0.0289
##  9 tenure_no_ceodb  1              0.0256
## 10 fyear_gone       2008           0.0256
## # ℹ 30 more rows
# Step 3: Plot
data_correlation %>%
    correlationfunnel::plot_correlation_funnel()
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Model Building

Split Data

set.seed(1234)
data_clean <- data_clean %>% sample_n(100)

data_split <- initial_split(data_clean, strata = ceo_dismissal)
data_train <- training(data_split)
data_test <- testing(data_split)

data_cv <- rsample::vfold_cv(data_train, strata = ceo_dismissal)
data_cv
## #  10-fold cross-validation using stratification 
## # A tibble: 10 × 2
##    splits         id    
##    <list>         <chr> 
##  1 <split [66/8]> Fold01
##  2 <split [66/8]> Fold02
##  3 <split [66/8]> Fold03
##  4 <split [66/8]> Fold04
##  5 <split [67/7]> Fold05
##  6 <split [67/7]> Fold06
##  7 <split [67/7]> Fold07
##  8 <split [67/7]> Fold08
##  9 <split [67/7]> Fold09
## 10 <split [67/7]> Fold10

Preprocess Data

xgboost_rec <- recipes::recipe(ceo_dismissal ~ ., data = data_train) %>%
    update_role(dismissal_dataset_id, new_role = "ID") %>%
    step_other(exec_fullname, coname, threshold = 0.01) %>%
    step_dummy(all_nominal_predictors()) 

xgboost_rec %>% prep() %>% juice() %>% glimpse()
## Rows: 74
## Columns: 186
## $ dismissal_dataset_id                     <dbl> 8187, 6361, 2197, 2276, 4131,…
## $ max_tenure_ceodb                         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ ceo_dismissal                            <fct> dismissed, dismissed, dismiss…
## $ coname_ALLTEL.CORP                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_AMCOL.INTERNATIONAL.CORP          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_ANADARKO.PETROLEUM.CORP           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_ATLAS.AIR.WORLDWIDE.HLDG.INC      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_ATMOS.ENERGY.CORP                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_BALLY.ENTERTAINMENT.CORP          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_BELL.INDUSTRIES.INC               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_CAESARS.ENTERTAINMENT.CORP        <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ coname_CARBO.CERAMICS.INC                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_CBS.INC                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_CENTRAL.PACIFIC.FINANCIAL.CP      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_CNO.FINANCIAL.GROUP.INC           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_CONVEX.COMPUTER.CORP              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_COVENTRY.HEALTH.CARE.INC          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_CPI.CORP                          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ coname_CREDENCE.SYSTEMS.CORP             <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_DPL.INC                           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_EOG.RESOURCES.INC                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_EPICOR.SOFTWARE.CORP..OLD         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_EQUITY.ONE.INC                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_ESSENDANT.INC                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_FACTORY.2.U.STORES.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_FRANKLIN.RESOURCES.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_GENERAL.RE.CORP                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_GIBSON.GREETINGS.INC              <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ coname_GRANCARE.INC                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_HANDLEMAN.CO                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_HAWAIIAN.HOLDINGS.INC             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_HILL.ROM.HOLDINGS.INC             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_JACOBS.ENGINEERING.GROUP.INC      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_KELLWOOD.CO                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_KENNAMETAL.INC                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_KULICKE...SOFFA.INDUSTRIES        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_LEGGETT...PLATT.INC               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_MACERICH.CO                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_MAGNETEK.INC                      <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ coname_MATERIAL.SCIENCES.CORP            <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_MCKESSON.CORP                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_MEADE.INSTRUMENTS.CORP            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_MERCK...CO                        <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ coname_MONDELEZ.INTERNATIONAL.INC        <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_NEW.ENGLAND.BUSINESS.SVC.INC      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_NEW.YORK.TIMES.CO..CL.A           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ coname_NEWMONT.GOLDCORP.CORP             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_PENFORD.CORP                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_PEPCO.HOLDINGS.INC                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_PERFORMANCE.FOOD.GROUP.CO         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_PERSEPTIVE.BIOSYSTEMS.INC         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_PLIANT.SYSTEMS.INC                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_PROCTER...GAMBLE.CO               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_PROVIDENT.BANKSHARES.CORP         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_QUAKER.CHEMICAL.CORP              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_RED.HAT.INC                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_REGIONS.FINANCIAL.CORP            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_REGIS.CORP.MN                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_ROCKWELL.AUTOMATION               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_RURAL.METRO.CORP                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_SANTA.FE.PACIFIC.CORP             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_SCOTT.TECHNOLOGIES.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_SERVICE.CORP.INTERNATIONAL        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_SOUTHERN.CO                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_SOUTHERN.NEW.ENG.TELECOMM         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_STEWART...STEVENSON.SERVICES      <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
## $ coname_SUNTRUST.BANKS.INC                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_SYNEOS.HEALTH.INC                 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_SYNNEX.CORP                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_TILE.SHOP.HOLDINGS.INC            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_TRANSOCEAN.LTD                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_TXU.GAS.CO                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_ULTA.BEAUTY.INC                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_VIACOM.INC                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_WILHELMINA.INTERNATIONAL.INC      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_WYNDHAM.DESTINATIONS.INC          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ coname_other                             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Anli.Ngo                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Arthur.M..Coppola.CPA      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Benjamin.J..Sottile        <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ exec_fullname_C..Mark.Pearson            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_C..Michael.Gray            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_C..Scott.Kulicke           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Carl.E..Jones.Jr.          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Carl.S..Rubin              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Carl.W..Stearn             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Chris.R..Homeister         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_D..Jamie.MacDonald         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Daniel.J..Miglio           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_David.A..Ranhoff           <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_David.L..Mahoney           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_David.R..Lukes             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_David.S..Haffner           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Donahue.L..Wildman         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Donald.R..Beall            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Edward.Franklin.Mitchell   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Edward.L..Addison          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Gary.C..Wendt              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Gary.William.Loveman       <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ exec_fullname_Gene.E..Burleson           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Gerald.G..Nadig            <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Glen.W..Lindemann          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Gordon.Rae.Parker          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Irene.B..Rosenfeld         <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_J..David.Pierson           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ exec_fullname_Jack.E..Brucker            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_James.B..Williams          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_James.M..Whitehurst        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_James.T..Hackett           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Janet.L..Robinson          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ exec_fullname_Joel.D..Spungin            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_John.E..Pepper             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_John.Hughes                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Joseph.J..Jacobs           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Kevin.Masazo.Murai         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_L..George.Klaus            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Laurence.Alan.Tisch        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Mark.B..Dunkerley          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Mark.G..Papa               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Martin.L..Flanagan.CFA.CPA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Michael.L..Grimes          <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
## $ exec_fullname_Michael.M..Searles         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Noubar.B..Afeyan           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Parris.H..Holmes.Jr.       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Paul.D..Finkelstein        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Paul.L..Sagan              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Peter.A..Benoliel          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Peter.H..Forster           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Raymond.V..Gilmartin       <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Richard.H..Shuyler         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Robert.D..Krebs            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Robert.J..Murray           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Robert.J..Paluck           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Robert.L..McGeehan         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Robert.L..Waltrip          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Robert.W..Best             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Ronald.E..Ferguson         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Salim.A..L..Bhatia         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Scott.T..Ford              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Stephen.P..Holmes          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Stephen.Strome             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Steven.G..Murdock          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Sumner.M..Redstone         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Thomas.D..Malkoski         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_Thomas.G..Boren            <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ exec_fullname_Tracy.A..Edwards           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_W..August.Hillenbrand      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_William.C..McCord          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_William.C..O.Malley        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_William.J..McKenna         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ exec_fullname_other                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tenure_no_ceodb_X2                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tenure_no_ceodb_X3                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1988                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1990                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1991                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1992                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1993                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1994                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1995                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1996                         <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ fyear_gone_X1997                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1998                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X1999                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2000                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2001                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2002                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2003                         <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,…
## $ fyear_gone_X2004                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ fyear_gone_X2005                         <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2006                         <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2007                         <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2008                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2009                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2010                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2011                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ fyear_gone_X2012                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2013                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2014                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2015                         <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
## $ fyear_gone_X2016                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2017                         <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2018                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2019                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2020                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ fyear_gone_X2021                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…

Specify Model

xgboost_spec <- 
  boost_tree(trees = tune()) %>% 
  set_mode("classification") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_rec) %>% 
  add_model(xgboost_spec) 

Tune Hyperparameters

doParallel::registerDoParallel()

set.seed(17375)
xgboost_tune <-
  tune_grid(xgboost_workflow, 
            resamples = data_cv, 
            grid = 5)