departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
skimr::skim(departures)
Data summary
Name departures
Number of rows 9423
Number of columns 19
_______________________
Column type frequency:
character 8
numeric 10
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
coname 0 1.00 2 30 0 3860 0
exec_fullname 0 1.00 5 790 0 8701 0
interim_coceo 9105 0.03 6 7 0 6 0
still_there 7311 0.22 3 10 0 77 0
notes 1644 0.83 5 3117 0 7755 0
sources 1475 0.84 18 1843 0 7915 0
eight_ks 4499 0.52 69 3884 0 4914 0
_merge 0 1.00 11 11 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1.00 5684.10 25005.46 1 2305.5 4593 6812.5 559044 ▇▁▁▁▁
gvkey 0 1.00 40132.48 53921.34 1004 7337.0 14385 60900.5 328795 ▇▁▁▁▁
fyear 0 1.00 2007.74 8.19 1987 2000.0 2008 2016.0 2020 ▁▆▅▅▇
co_per_rol 0 1.00 25580.22 18202.38 -1 8555.5 22980 39275.5 64602 ▇▆▅▃▃
departure_code 1667 0.82 5.20 1.53 1 5.0 5 7.0 9 ▁▃▇▅▁
ceo_dismissal 1813 0.81 0.20 0.40 0 0.0 0 0.0 1 ▇▁▁▁▂
tenure_no_ceodb 0 1.00 1.03 0.17 0 1.0 1 1.0 3 ▁▇▁▁▁
max_tenure_ceodb 0 1.00 1.05 0.24 1 1.0 1 1.0 4 ▇▁▁▁▁
fyear_gone 1802 0.81 2006.64 13.63 1980 2000.0 2007 2013.0 2997 ▇▁▁▁▁
cik 245 0.97 741469.17 486551.43 1750 106413.0 857323 1050375.8 1808065 ▆▁▇▂▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
leftofc 1802 0.81 1981-01-01 2998-04-27 2006-12-31 3627
# Select relevant columns
factors_vec <- departures %>% 
    select(departure_code, co_per_rol, fyear, tenure_no_ceodb, max_tenure_ceodb, fyear_gone) %>% 
    names()

# Clean the data
data_clean <- departures %>%
    select(-c(interim_coceo, still_there, eight_ks, gvkey, co_per_rol, cik, fyear, '_merge', notes, sources, departure_code)) %>%
    filter(fyear_gone != "2997") %>%
    filter(!is.na(ceo_dismissal)) %>%
    mutate(
        tenure_no_ceodb = factor(tenure_no_ceodb),
        max_tenure_ceodb = factor(max_tenure_ceodb),
        ceo_dismissal = factor(ceo_dismissal),
        leftofc = as.Date(leftofc),
        date_column = as.Date(leftofc) 
    )

# Create year, doy, and month
data_clean <- data_clean %>%
    mutate(
        year = lubridate::year(leftofc),
        doy = lubridate::yday(leftofc),
        month = lubridate::month(leftofc)
    ) %>%
    select(-leftofc, -date_column)  # Remove non-numeric columns

Explore data

skimr::skim(departures)
Data summary
Name departures
Number of rows 9423
Number of columns 19
_______________________
Column type frequency:
character 8
numeric 10
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
coname 0 1.00 2 30 0 3860 0
exec_fullname 0 1.00 5 790 0 8701 0
interim_coceo 9105 0.03 6 7 0 6 0
still_there 7311 0.22 3 10 0 77 0
notes 1644 0.83 5 3117 0 7755 0
sources 1475 0.84 18 1843 0 7915 0
eight_ks 4499 0.52 69 3884 0 4914 0
_merge 0 1.00 11 11 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1.00 5684.10 25005.46 1 2305.5 4593 6812.5 559044 ▇▁▁▁▁
gvkey 0 1.00 40132.48 53921.34 1004 7337.0 14385 60900.5 328795 ▇▁▁▁▁
fyear 0 1.00 2007.74 8.19 1987 2000.0 2008 2016.0 2020 ▁▆▅▅▇
co_per_rol 0 1.00 25580.22 18202.38 -1 8555.5 22980 39275.5 64602 ▇▆▅▃▃
departure_code 1667 0.82 5.20 1.53 1 5.0 5 7.0 9 ▁▃▇▅▁
ceo_dismissal 1813 0.81 0.20 0.40 0 0.0 0 0.0 1 ▇▁▁▁▂
tenure_no_ceodb 0 1.00 1.03 0.17 0 1.0 1 1.0 3 ▁▇▁▁▁
max_tenure_ceodb 0 1.00 1.05 0.24 1 1.0 1 1.0 4 ▇▁▁▁▁
fyear_gone 1802 0.81 2006.64 13.63 1980 2000.0 2007 2013.0 2997 ▇▁▁▁▁
cik 245 0.97 741469.17 486551.43 1750 106413.0 857323 1050375.8 1808065 ▆▁▇▂▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
leftofc 1802 0.81 1981-01-01 2998-04-27 2006-12-31 3627
data_clean %>% count(ceo_dismissal)
## # A tibble: 2 × 2
##   ceo_dismissal     n
##   <fct>         <int>
## 1 0              5993
## 2 1              1484
data_clean %>%
    ggplot(aes(ceo_dismissal)) +
    geom_bar()

ceo_dismissal vs. max tenure

#data_clean %>%
    #ggplot(aes(max_tenure_ceodb)) +
    #geom_boxplot()
# Doesn't represent the data well in my case

correlation plot

# Step 1: binarize
data_binarized <- data_clean %>%
    select(-exec_fullname, -coname) %>%
    binarize()

data_binarized %>% glimpse
## Rows: 7,477
## Columns: 28
## $ `dismissal_dataset_id__-Inf_2176` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dismissal_dataset_id__2176_4326   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__4326_6580   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__6580_Inf    <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ceo_dismissal__0                  <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ ceo_dismissal__1                  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ tenure_no_ceodb__1                <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ tenure_no_ceodb__2                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `tenure_no_ceodb__-OTHER`         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ max_tenure_ceodb__1               <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ max_tenure_ceodb__2               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `max_tenure_ceodb__-OTHER`        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `fyear_gone__-Inf_2000`           <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ fyear_gone__2000_2006             <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, …
## $ fyear_gone__2006_2013             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ fyear_gone__2013_Inf              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `year__-Inf_2000`                 <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, …
## $ year__2000_2006                   <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, …
## $ year__2006_2013                   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ year__2013_Inf                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `doy__-Inf_87`                    <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ doy__87_180                       <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, …
## $ doy__180_274                      <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, …
## $ doy__274_Inf                      <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, …
## $ `month__-Inf_3`                   <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ month__3_6                        <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, …
## $ month__6_10                       <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, …
## $ month__10_Inf                     <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, …
# Step 2: correlation
data_correlation <- data_binarized %>%
    select(-`tenure_no_ceodb__-OTHER`, -`dismissal_dataset_id__-Inf_2176`, -`fyear_gone__-Inf_2000`, -`dismissal_dataset_id__-Inf_2176`, -`max_tenure_ceodb__-OTHER`, -`tenure_no_ceodb__-OTHER`) %>%
    correlate(ceo_dismissal__1)

data_correlation
## # A tibble: 24 × 3
##    feature          bin       correlation
##    <fct>            <chr>           <dbl>
##  1 ceo_dismissal    0             -1     
##  2 ceo_dismissal    1              1     
##  3 year             -Inf_2000     -0.0616
##  4 max_tenure_ceodb 1              0.0582
##  5 max_tenure_ceodb 2             -0.0539
##  6 year             2000_2006      0.0316
##  7 fyear_gone       2000_2006      0.0288
##  8 month            6_10           0.0274
##  9 month            3_6           -0.0264
## 10 tenure_no_ceodb  1              0.0260
## # ℹ 14 more rows
# Step 3: plot
data_correlation %>%
    correlationfunnel::plot_correlation_funnel()

There is a moderate correlation between departure codes and ceo dismissals so some departures codes are more indicative of ceo dismissals than others.

Model Building

Split Data

library(dplyr)
library(rsample)

set.seed(1234) 
data_clean <- data_clean %>% sample_n(100)  
data_split <- initial_split(data_clean, strata = ceo_dismissal) 
data_train <- training(data_split) 
data_test <- testing(data_split)  
data_cv <- rsample::vfold_cv(data_train, strata = ceo_dismissal) 
data_cv  
## #  10-fold cross-validation using stratification 
## # A tibble: 10 × 2
##    splits         id    
##    <list>         <chr> 
##  1 <split [66/9]> Fold01
##  2 <split [66/9]> Fold02
##  3 <split [67/8]> Fold03
##  4 <split [68/7]> Fold04
##  5 <split [68/7]> Fold05
##  6 <split [68/7]> Fold06
##  7 <split [68/7]> Fold07
##  8 <split [68/7]> Fold08
##  9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
# Create a numeric version of ceo_dismissal
#data_train$ceo_dismissal_numeric <- as.numeric(as.character(data_train$departure_code))

# Create a factor version of ceo_dismissal
#data_train$ceo_dismissal_factor <- factor(data_train$ceo_dismissal, levels = c(0, 1), labels = c("Not Dismissed", "Dismissed"))

# Check the structure to confirm both variables are present
str(data_train)
## tibble [75 × 10] (S3: tbl_df/tbl/data.frame)
##  $ dismissal_dataset_id: num [1:75] 1994 3192 6735 4562 7051 ...
##  $ coname              : chr [1:75] "KELLY SERVICES INC -CL A" "EDISON INTERNATIONAL" "AZZURRA HOLDING CORP" "POLARIS INDUSTRIES INC" ...
##  $ exec_fullname       : chr [1:75] "Terence E. Adderley" "John E. Bryson" "George P. Roberts" "W. Hall Wendel Jr." ...
##  $ ceo_dismissal       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tenure_no_ceodb     : Factor w/ 3 levels "1","2","3": 1 1 2 1 2 1 1 1 1 1 ...
##  $ max_tenure_ceodb    : Factor w/ 4 levels "1","2","3","4": 1 1 2 1 2 1 1 1 1 1 ...
##  $ fyear_gone          : num [1:75] 2006 2008 2003 1999 2015 ...
##  $ year                : num [1:75] 2006 2008 2003 1999 2015 ...
##  $ doy                 : num [1:75] 40 213 245 141 1 288 91 152 203 151 ...
##  $ month               : num [1:75] 2 7 9 5 1 10 3 6 7 5 ...

Preprocess Data

library(dplyr)
library(recipes)

# Create the recipe for modeling
xgboost_rec <- recipe(ceo_dismissal ~ ., data = data_clean) %>%
    update_role(dismissal_dataset_id, new_role = "ID") %>%
    step_dummy(all_nominal_predictors()) %>%
    step_zv(all_predictors()) %>%
    step_normalize(all_numeric_predictors())

# Prep the recipe
xgboost_rec_prep <- xgboost_rec %>% prep()
xgboost_rec_prep %>% juice() %>% glimpse()
## Rows: 100
## Columns: 206
## $ dismissal_dataset_id                                    <dbl> 4116, 8749, 44…
## $ fyear_gone                                              <dbl> 1.4750294, 0.8…
## $ year                                                    <dbl> 1.4801796, 0.8…
## $ doy                                                     <dbl> 0.37704630, 0.…
## $ month                                                   <dbl> 0.37584272, 0.…
## $ ceo_dismissal                                           <fct> 1, 1, 0, 0, 0,…
## $ coname_AKAMAI.TECHNOLOGIES.INC                          <dbl> -0.1, -0.1, -0…
## $ coname_ALERIS.CORP                                      <dbl> -0.1, -0.1, -0…
## $ coname_ALLIED.WASTE.INDUSTRIES.INC                      <dbl> -0.1, -0.1, -0…
## $ coname_AMEREN.CORP                                      <dbl> -0.1, -0.1, -0…
## $ coname_ARROW.ELECTRONICS.INC                            <dbl> -0.1, -0.1, -0…
## $ coname_ATLAS.AIR.WORLDWIDE.HLDG.INC                     <dbl> -0.1, -0.1, -0…
## $ coname_AZZURRA.HOLDING.CORP                             <dbl> -0.1, -0.1, -0…
## $ coname_BALLY.ENTERTAINMENT.CORP                         <dbl> -0.1, -0.1, -0…
## $ coname_BANDAG.INC                                       <dbl> -0.1, -0.1, -0…
## $ coname_BELL.INDUSTRIES.INC                              <dbl> -0.1, -0.1, -0…
## $ coname_BIOMATRIX.INC                                    <dbl> -0.1, -0.1, -0…
## $ coname_BIRMINGHAM.STEEL.CORP                            <dbl> -0.1, -0.1, -0…
## $ coname_BONANZA.CREEK.ENERGY.INC                         <dbl> -0.1, -0.1, -0…
## $ coname_BORDERS.GROUP.INC                                <dbl> -0.1, -0.1, -0…
## $ coname_BORGWARNER.INC                                   <dbl> -0.1, -0.1, -0…
## $ coname_CAMBRIDGE.TECHNOLOGY.PARTNER                     <dbl> -0.1, -0.1, -0…
## $ coname_CARBO.CERAMICS.INC                               <dbl> -0.1, -0.1, -0…
## $ coname_CAREFUSION.CORP                                  <dbl> -0.1, -0.1, -0…
## $ coname_CAREINSITE.INC                                   <dbl> -0.1, -0.1, -0…
## $ coname_CBS.INC                                          <dbl> -0.1, -0.1, -0…
## $ coname_CENTURY.ALUMINUM.CO                              <dbl> -0.1, -0.1, -0…
## $ coname_CILCORP.INC                                      <dbl> -0.1, -0.1, -0…
## $ coname_CONCERTO.SOFTWARE.INC                            <dbl> -0.1, -0.1, -0…
## $ coname_CONECTIV.INC                                     <dbl> -0.1, -0.1, -0…
## $ coname_CORECIVIC.INC                                    <dbl> -0.1, -0.1, -0…
## $ coname_COTY.INC                                         <dbl> -0.1, -0.1, -0…
## $ coname_COVENTRY.HEALTH.CARE.INC                         <dbl> -0.1, -0.1, -0…
## $ coname_CREDENCE.SYSTEMS.CORP                            <dbl> -0.1, -0.1, -0…
## $ coname_DELTA.WOODSIDE.INDUSTRIES                        <dbl> -0.1, -0.1, -0…
## $ coname_DENTSPLY.SIRONA.INC                              <dbl> -0.1, -0.1, 9.…
## $ coname_DIGITAL.INSIGHT.CORP                             <dbl> -0.1, -0.1, -0…
## $ coname_DIRECTV                                          <dbl> -0.1, -0.1, -0…
## $ coname_DOWNEY.FINANCIAL.CORP                            <dbl> -0.1, -0.1, -0…
## $ coname_EDISON.INTERNATIONAL                             <dbl> -0.1, -0.1, -0…
## $ coname_ENERGEN.CORP                                     <dbl> -0.1, -0.1, -0…
## $ coname_EOG.RESOURCES.INC                                <dbl> -0.1, -0.1, -0…
## $ coname_EPICOR.SOFTWARE.CORP..OLD                        <dbl> -0.1, -0.1, -0…
## $ coname_FIRST.COMMERCIAL.CORP                            <dbl> -0.1, -0.1, -0…
## $ coname_FISERV.INC                                       <dbl> -0.1, -0.1, -0…
## $ coname_FRANKLIN.RESOURCES.INC                           <dbl> -0.1, -0.1, -0…
## $ coname_GENERAL.RE.CORP                                  <dbl> -0.1, -0.1, -0…
## $ coname_GENWORTH.FINANCIAL.INC                           <dbl> -0.1, -0.1, -0…
## $ coname_GILLETTE.CO                                      <dbl> -0.1, -0.1, -0…
## $ coname_GLOBAL.CROSSING.LTD                              <dbl> -0.1, -0.1, -0…
## $ coname_HANDLEMAN.CO                                     <dbl> -0.1, -0.1, -0…
## $ coname_HANDY...HARMAN.LTD                               <dbl> -0.1, -0.1, -0…
## $ coname_HYPERION.SOFTWARE.CORP                           <dbl> -0.1, -0.1, -0…
## $ coname_IMPAX.LABORATORIES.INC                           <dbl> -0.1, -0.1, -0…
## $ coname_INTEGRATED.CIRCUIT.SYSTEMS                       <dbl> -0.1, -0.1, -0…
## $ coname_INTL.GAME.TECHNOLOGY                             <dbl> -0.1, -0.1, -0…
## $ coname_IOWA.ILLINOIS.GAS...ELEC                         <dbl> -0.1, -0.1, -0…
## $ coname_IPAYMENT.INC                                     <dbl> -0.1, -0.1, -0…
## $ coname_JONES.PHARMA.INC                                 <dbl> -0.1, -0.1, -0…
## $ coname_JUST.FOR.FEET.INC                                <dbl> -0.1, -0.1, -0…
## $ coname_KELLY.SERVICES.INC..CL.A                         <dbl> -0.1, -0.1, -0…
## $ coname_LABORATORY.CP.OF.AMER.HLDGS                      <dbl> -0.1, -0.1, -0…
## $ coname_LEGGETT...PLATT.INC                              <dbl> -0.1, -0.1, -0…
## $ coname_MARINER.HEALTH.GROUP.INC                         <dbl> -0.1, -0.1, -0…
## $ coname_MATERIAL.SCIENCES.CORP                           <dbl> -0.1, -0.1, -0…
## $ coname_MCCAW.CELLULAR.COMM..CL.A                        <dbl> -0.1, -0.1, -0…
## $ coname_MCKESSON.CORP                                    <dbl> -0.1, -0.1, -0…
## $ coname_MEAD.CORP                                        <dbl> -0.1, -0.1, -0…
## $ coname_MEADE.INSTRUMENTS.CORP                           <dbl> -0.1, -0.1, -0…
## $ coname_MILLIPORE.CORP                                   <dbl> -0.1, -0.1, -0…
## $ coname_MONDELEZ.INTERNATIONAL.INC                       <dbl> -0.1, -0.1, -0…
## $ coname_NEWMONT.GOLDCORP.CORP                            <dbl> -0.1, -0.1, -0…
## $ coname_ORGANOGENESIS.INC                                <dbl> -0.1, -0.1, -0…
## $ coname_PERFORMANCE.FOOD.GROUP.CO                        <dbl> -0.1, -0.1, -0…
## $ coname_POLARIS.INDUSTRIES.INC                           <dbl> -0.1, -0.1, -0…
## $ coname_PORTLAND.GENERAL.CORP                            <dbl> -0.1, -0.1, -0…
## $ coname_PRICE..T..ROWE..GROUP                            <dbl> -0.1, -0.1, -0…
## $ coname_PROCTER...GAMBLE.CO                              <dbl> -0.1, -0.1, -0…
## $ coname_QUAKER.CHEMICAL.CORP                             <dbl> -0.1, -0.1, -0…
## $ coname_REGIONS.FINANCIAL.CORP                           <dbl> -0.1, -0.1, -0…
## $ coname_REGIS.CORP.MN                                    <dbl> -0.1, -0.1, -0…
## $ coname_REYNOLDS.METALS.CO                               <dbl> -0.1, -0.1, -0…
## $ coname_ROCKWELL.AUTOMATION                              <dbl> -0.1, -0.1, -0…
## $ coname_RS.LEGACY.CORP                                   <dbl> -0.1, -0.1, -0…
## $ coname_RURAL.METRO.CORP                                 <dbl> -0.1, -0.1, -0…
## $ coname_SANTA.FE.SNYDER.CORP                             <dbl> -0.1, -0.1, -0…
## $ coname_SANTANDER.HOLDINGS.USA.INC                       <dbl> -0.1, -0.1, -0…
## $ coname_SEARS.ROEBUCK...CO                               <dbl> -0.1, -0.1, -0…
## $ coname_SOUTHERN.NEW.ENG.TELECOMM                        <dbl> -0.1, -0.1, -0…
## $ coname_SPORTS.AUTHORITY.INC.OLD                         <dbl> -0.1, -0.1, -0…
## $ coname_SRA.INTERNATIONAL.INC                            <dbl> -0.1, -0.1, -0…
## $ coname_ST.JOE.CO                                        <dbl> 9.9, -0.1, -0.…
## $ coname_STERIS.PLC                                       <dbl> -0.1, -0.1, -0…
## $ coname_STRATUS.COMPUTER.INC                             <dbl> -0.1, -0.1, -0…
## $ coname_SUN.MICROSYSTEMS.INC                             <dbl> -0.1, -0.1, -0…
## $ coname_SUNOCO.INC                                       <dbl> -0.1, -0.1, -0…
## $ coname_TIME.WARNER.INC                                  <dbl> -0.1, -0.1, -0…
## $ coname_TRAVELERS.COS.INC                                <dbl> -0.1, -0.1, -0…
## $ coname_TXU.GAS.CO                                       <dbl> -0.1, -0.1, -0…
## $ coname_U.S.TRUST.CORP                                   <dbl> -0.1, -0.1, -0…
## $ coname_ULTA.BEAUTY.INC                                  <dbl> -0.1, 9.9, -0.…
## $ coname_VERTEX.PHARMACEUTICALS.INC                       <dbl> -0.1, -0.1, -0…
## $ coname_WASHINGTON.GROUP.INTL.INC                        <dbl> -0.1, -0.1, -0…
## $ coname_WINDSTREAM.HOLDINGS.INC                          <dbl> -0.1, -0.1, -0…
## $ coname_XTO.ENERGY.INC                                   <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Arthur.C..Martinez                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Arthur.W..Stratton                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Barnett.Grace                             <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Bernd.Erich.Beetz                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Bill.R..Sanford                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Bob.R..Simpson                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Carl.A..Grimstad                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Charles.B..Johnson                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Craig.A..Davis                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Craig.O..McCaw                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Daniel.D..Rosenthal                       <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.L..Mahoney                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.L..Schlotterbeck.B.S..M.S.          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.P..King                             <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.W..Sear                             <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Dennis.M..Jones                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Donahue.L..Wildman                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Donald.R..Beall                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Endre.A..Balazs                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ernst.Volgenau                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Forrest.E..Hoglund                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Francis.J..Lunger                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Francis.M..Scricco                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_G..Robert.Evans                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_George.Frederick.Wilkinson                <dbl> -0.1, -0.1, -0…
## $ exec_fullname_George.H..Conrades                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_George.P..Roberts                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Gordon.Rae.Parker                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Gregory.P..Josefowicz                     <dbl> -0.1, -0.1, -0…
## $ exec_fullname_H..Marshall.Schwarz                       <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Harold.Ruttenberg                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Harry.M..Cornell.Jr.                      <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Howard.E..Cosgrove                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.A..Perakis                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.Aloysius.Charles.Kennedy.C.F.A..CFA <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.D..Foy                              <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.K..Sims                             <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.L..Payne                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.Stanley.Mackin                      <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.T..McManus.II                       <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jay.S..Fishman                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jeffery.R..Gardner                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jeffrey.L..Bewkes                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jeffrey.T..Slovin                         <dbl> -0.1, -0.1, 9.…
## $ exec_fullname_Jeremiah.J..Sheehan                       <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jerome.F..Tatar                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jesse.P..Orsini                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.B..Furman                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.B..Yasinsky                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.C..Diebel                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.C..Dorman                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.D..Ferguson                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.E..Bryson                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.E..Pepper                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.H..Wimberly                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.J..Zillmer                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jonathan.I..Schwartz                      <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Joseph.C..Magnacca                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Joseph.P..Campanelli                      <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ken.L..Harrison                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_L..George.Klaus                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_L..Park.Brady..Jr.                        <dbl> 9.9, -0.1, -0.…
## $ exec_fullname_Laurence.Alan.Tisch                       <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Leo.J..Hindery.Jr.                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Leslie.M..Muma                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Lyn.Kirby                                 <dbl> -0.1, 9.9, -0.…
## $ exec_fullname_Martin.E..Hanaka                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Martin.G..Carver                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Marvin.M..Chronister                      <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Matthew.W..Emmens                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.A..Chowdry                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.C..Hawley                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.D..White                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.Desmond.Fraizer                   <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.L..Sabolinski                     <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Myron.Kunin                               <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Neil.D..Arnold                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Paul.C..Suthern                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ralph.L..Cheek                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.A..Garvey                          <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.C..Sledd                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.H..Campbell                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.O..Viets                           <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Roger.K..Deromedi                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ronald.E..Ferguson                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Sigismundus.W.W..Lubsen                   <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Stanley.J..Bright                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Stephen.Strome                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Terence.E..Adderley                       <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Theodore.Williams                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Thomas.J..Matthews                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Thomas.R..Voss                            <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Timothy.M..Manganello                     <dbl> -0.1, -0.1, -0…
## $ exec_fullname_W..Hall.Wendel.Jr.                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Walter.H..Monteith.Jr.                    <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.C..McCord                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.E..Foster                         <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.F..Garrett                        <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.G..Howard..Jr.                    <dbl> -0.1, -0.1, -0…
## $ tenure_no_ceodb_X2                                      <dbl> -0.2282658, -0…
## $ max_tenure_ceodb_X2                                     <dbl> -0.2729764, -0…

Specify Model

library(usemodels)
usemodels::use_xgboost(ceo_dismissal ~ ., data = data_train)
## xgboost_recipe <- 
##   recipe(formula = ceo_dismissal ~ ., data = data_train) %>% 
##   step_zv(all_predictors()) 
## 
## xgboost_spec <- 
##   boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
##     loss_reduction = tune(), sample_size = tune()) %>% 
##   set_mode("classification") %>% 
##   set_engine("xgboost") 
## 
## xgboost_workflow <- 
##   workflow() %>% 
##   add_recipe(xgboost_recipe) %>% 
##   add_model(xgboost_spec) 
## 
## set.seed(64987)
## xgboost_tune <-
##   tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
library(workflows)
library(parsnip)

xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
    loss_reduction = tune(), sample_size = tune()) %>% 
  set_mode("classification") %>% 
  set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_rec) %>% 
  add_model(xgboost_spec) 

Tune hyperparameters

library(tune)

doParallel::registerDoParallel()

set.seed(17375)
xgboost_tune <-
  tune_grid(xgboost_workflow, 
            resamples = data_cv, 
            grid = 5)
## Warning: package 'xgboost' was built under R version 4.3.3