departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
factors_vec <- departures %>%
select(departure_code, co_per_rol, fyear, tenure_no_ceodb, max_tenure_ceodb, fyear_gone) %>%
names()
library(dplyr)
library(lubridate)
data_clean <- departures %>%
select(-c(interim_coceo, still_there, eight_ks, gvkey, co_per_rol, cik, fyear, '_merge', notes, sources)) %>%
filter(fyear_gone != "2997") %>%
filter(!is.na(ceo_dismissal)) %>%
mutate(
departure_code = factor(departure_code),
tenure_no_ceodb = factor(tenure_no_ceodb),
max_tenure_ceodb = factor(max_tenure_ceodb),
ceo_dismissal = factor(ceo_dismissal),
leftofc = as.Date(leftofc), # Ensure leftofc is a Date
year = year(leftofc), # Create year directly
doy = yday(leftofc), # Create day of the year directly
month = month(leftofc) # Create month directly
) %>%
select(-leftofc) %>% # Remove leftofc as it's no longer needed
# Drop zero-variance variables
select(-c(tenure_no_ceodb, max_tenure_ceodb)) %>%
# Ensure ceo_dismissal is character, then recode
mutate(ceo_dismissal = if_else(ceo_dismissal == "1", "dismissed",
if_else(ceo_dismissal == "0", "not dismissed",
as.character(ceo_dismissal)))) # Handle NA implicitly
data_clean <- data_clean %>% sample_n(100)
skimr::skim(departures)
Name | departures |
Number of rows | 9423 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 10 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
coname | 0 | 1.00 | 2 | 30 | 0 | 3860 | 0 |
exec_fullname | 0 | 1.00 | 5 | 790 | 0 | 8701 | 0 |
interim_coceo | 9105 | 0.03 | 6 | 7 | 0 | 6 | 0 |
still_there | 7311 | 0.22 | 3 | 10 | 0 | 77 | 0 |
notes | 1644 | 0.83 | 5 | 3117 | 0 | 7755 | 0 |
sources | 1475 | 0.84 | 18 | 1843 | 0 | 7915 | 0 |
eight_ks | 4499 | 0.52 | 69 | 3884 | 0 | 4914 | 0 |
_merge | 0 | 1.00 | 11 | 11 | 0 | 1 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dismissal_dataset_id | 0 | 1.00 | 5684.10 | 25005.46 | 1 | 2305.5 | 4593 | 6812.5 | 559044 | ▇▁▁▁▁ |
gvkey | 0 | 1.00 | 40132.48 | 53921.34 | 1004 | 7337.0 | 14385 | 60900.5 | 328795 | ▇▁▁▁▁ |
fyear | 0 | 1.00 | 2007.74 | 8.19 | 1987 | 2000.0 | 2008 | 2016.0 | 2020 | ▁▆▅▅▇ |
co_per_rol | 0 | 1.00 | 25580.22 | 18202.38 | -1 | 8555.5 | 22980 | 39275.5 | 64602 | ▇▆▅▃▃ |
departure_code | 1667 | 0.82 | 5.20 | 1.53 | 1 | 5.0 | 5 | 7.0 | 9 | ▁▃▇▅▁ |
ceo_dismissal | 1813 | 0.81 | 0.20 | 0.40 | 0 | 0.0 | 0 | 0.0 | 1 | ▇▁▁▁▂ |
tenure_no_ceodb | 0 | 1.00 | 1.03 | 0.17 | 0 | 1.0 | 1 | 1.0 | 3 | ▁▇▁▁▁ |
max_tenure_ceodb | 0 | 1.00 | 1.05 | 0.24 | 1 | 1.0 | 1 | 1.0 | 4 | ▇▁▁▁▁ |
fyear_gone | 1802 | 0.81 | 2006.64 | 13.63 | 1980 | 2000.0 | 2007 | 2013.0 | 2997 | ▇▁▁▁▁ |
cik | 245 | 0.97 | 741469.17 | 486551.43 | 1750 | 106413.0 | 857323 | 1050375.8 | 1808065 | ▆▁▇▂▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
leftofc | 1802 | 0.81 | 1981-01-01 | 2998-04-27 | 2006-12-31 | 3627 |
data_clean %>% count(ceo_dismissal)
## # A tibble: 2 × 2
## ceo_dismissal n
## <chr> <int>
## 1 dismissed 15
## 2 not dismissed 85
data_clean %>%
ggplot(aes(ceo_dismissal)) +
geom_bar()
correlation plot
# Step 1: binarize
data_binarized <- data_clean %>%
select(-exec_fullname, -coname) %>%
binarize()
data_binarized %>% glimpse()
## Rows: 100
## Columns: 26
## $ `dismissal_dataset_id__-Inf_2479.75` <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ dismissal_dataset_id__2479.75_5027 <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, …
## $ dismissal_dataset_id__5027_6993.5 <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, …
## $ dismissal_dataset_id__6993.5_Inf <dbl> 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ departure_code__3 <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code__5 <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, …
## $ departure_code__6 <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ departure_code__7 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, …
## $ ceo_dismissal__dismissed <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ceo_dismissal__not_dismissed <dbl> 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ `fyear_gone__-Inf_2000.75` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, …
## $ fyear_gone__2000.75_2008 <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, …
## $ fyear_gone__2008_2015 <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, …
## $ fyear_gone__2015_Inf <dbl> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, …
## $ `year__-Inf_2000.75` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, …
## $ year__2000.75_2008 <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, …
## $ year__2008_2015 <dbl> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ year__2015_Inf <dbl> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, …
## $ `doy__-Inf_71` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ doy__71_154.5 <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, …
## $ doy__154.5_278.25 <dbl> 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, …
## $ doy__278.25_Inf <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, …
## $ `month__-Inf_3` <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, …
## $ month__3_6 <dbl> 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, …
## $ month__6_10 <dbl> 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, …
## $ month__10_Inf <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
# Step 2: correlation
data_correlation <- data_binarized %>%
correlate(ceo_dismissal__dismissed)
data_correlation
## # A tibble: 26 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 departure_code 3 1
## 2 ceo_dismissal dismissed 1
## 3 ceo_dismissal not_dismissed -1
## 4 departure_code 5 -0.484
## 5 departure_code 7 -0.249
## 6 dismissal_dataset_id -Inf_2479.75 0.210
## 7 month 10_Inf -0.210
## 8 doy 71_154.5 0.146
## 9 month 3_6 0.123
## 10 dismissal_dataset_id 5027_6993.5 -0.113
## # ℹ 16 more rows
# Step 3: plot
data_correlation %>%
correlationfunnel::plot_correlation_funnel()
library(tidymodels)
## Warning: package 'broom' was built under R version 4.3.3
## Warning: package 'modeldata' was built under R version 4.3.3
## Warning: package 'recipes' was built under R version 4.3.3
set.seed(1234)
#data_clean <- data_clean %>% sample_n(100)
data_split <- initial_split(data_clean, strata = ceo_dismissal)
data_train <- training(data_split)
data_test <- testing(data_split)
data_cv <- rsample::vfold_cv(data_train, strata = ceo_dismissal)
data_cv
## # 10-fold cross-validation using stratification
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [65/9]> Fold01
## 2 <split [66/8]> Fold02
## 3 <split [66/8]> Fold03
## 4 <split [67/7]> Fold04
## 5 <split [67/7]> Fold05
## 6 <split [67/7]> Fold06
## 7 <split [67/7]> Fold07
## 8 <split [67/7]> Fold08
## 9 <split [67/7]> Fold09
## 10 <split [67/7]> Fold10
data_train <- data_train %>%
mutate(unique_id = paste(dismissal_dataset_id, exec_fullname, year, sep = "_")) %>%
group_by(unique_id) %>%
summarize(across(everything(), first)) # Aggregating duplicate entries
library(themis)
library(recipes)
# Remove unique_id from data_train before creating the recipe
data_train_cleaned <- data_train %>%
select(-unique_id)
# Create the recipe using the cleaned dataset
xgboost_rec <- recipe(ceo_dismissal ~ ., data = data_train_cleaned) %>%
step_dummy(all_nominal_predictors(), -all_outcomes()) %>%
step_smote(ceo_dismissal)
# Prepare and check the recipe
xgboost_rec_prep <- xgboost_rec %>% prep()
data_prepped <- xgboost_rec_prep %>% juice() %>% glimpse()
## Rows: 126
## Columns: 156
## $ dismissal_dataset_id <dbl> 1024, 1167, 1276, 1433, 1457, …
## $ fyear_gone <dbl> 2020, 2009, 2015, 2003, 2017, …
## $ year <dbl> 2020, 2009, 2015, 2003, 2017, …
## $ doy <dbl> 245, 274, 293, 344, 152, 65, 1…
## $ month <dbl> 9, 10, 10, 12, 6, 3, 5, 9, 7, …
## $ coname_AEROJET.ROCKETDYNE.HOLDINGS <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ coname_ALTABA.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ANCHOR.GAMING <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_APPLIED.INDUSTRIAL.TECH.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_AVON.PRODUCTS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BANK.OF.HAWAII.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BEST.BUY.CO.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BIOLASE.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BIOMATRIX.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_BUFFETS.HOLDINGS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CAPELLA.EDUCATION.CO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CARE.COM.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CATALYST.HEALTH.SOLUTIONS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CDW.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CEC.ENTERTAINMENT.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_CEDAR.REALTY.TRUST.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_DANAHER.CORP <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_DIPLOMAT.PHARMACY.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ENERSYS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_EOG.RESOURCES.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_EXAR.CORP <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FIRSTENERGY.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FLOWSERVE.CORP <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FLUOR.DANIEL.GTI.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FREEPORT.MCMORAN.INC <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ coname_FREESCALE.SEMICONDUCTOR.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_FRONTIER.INSURANCE.GROUP.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_HARLEY.DAVIDSON.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_HNI.CORP <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ coname_INGRAM.MICRO.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_INTEGRAL.SYSTEMS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_INTERGRAPH.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ coname_MARRIOTT.INTL.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MARTEK.BIOSCIENCES.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MASCO.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ coname_MBIA.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MENTOR.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ coname_MOLSON.COORS.BREWING.CO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MOORE.WALLACE.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_MULTIMEDIA.GAMES.HOLDING.CO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NATIONAL.RETAIL.PROPERTIES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NATURES.SUNSHINE.PRODS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NAUTILUS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_NCI.BUILDING.SYSTEMS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ORBITAL.ATK.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_PANERA.BREAD.CO <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_PEPSI.BOTTLING.GROUP.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_POTLATCHDELTIC.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_QLOGIC.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_QRS.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_RCSB.FINANCIAL.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_ROADRUNNER.TRANS.SYSTEMS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_RUBICON.TECHNOLOGY.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SABRE.HOLDINGS.CORP..CL.A <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SAFETY.KLEEN.CORP.OLD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SEAHAWK.DRILLING.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SRA.INTERNATIONAL.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_STAGE.STORES.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_SWK.HOLDINGS.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_TEGNA.INC <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ coname_TEMPUR.SEALY.INTL.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_TERADATA.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_TRICORD.SYSTEMS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_U.S.ROBOTICS.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_UNION.PLANTERS.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_VIEWLOGIC.SYSTEMS.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_VOLT.INFO.SCIENCES.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_WEBSTER.FINANCIAL.CORP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_WEC.ENERGY.GROUP.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_WPX.ENERGY.INC <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ coname_XL.GROUP.LTD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Alain.Moni <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Alan.D..Kennedy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Anthony.J..Alexander <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Anthony.M..Sanfilippo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Bradbury.H..Anderson <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Brian.R..Cook <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Casey.G..Cowell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Christopher.J..Conway <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ exec_fullname_Craig.E..Weatherup <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Craig.Macnab <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Daniel.J..Murphy.Jr. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_David.H..Elliott <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_David.T..Blair <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Derrick.R..Meyer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Donald.R..Riley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Donald.W..Brinckman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Endre.A..Balazs <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Eric.J..Foss <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Ernst.Volgenau <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Forrest.E..Hoglund <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Gracia.Catherine.Martore <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ exec_fullname_H..Thomas.Bryant <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Harry.W..Rhulen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Henry.Linsert.Jr. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Jack.D..Michaels <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ exec_fullname_Jackson.W..Moore.Jr. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_James.Copenhaver.Smith <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_James.R..Moffett.Jr. <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_James.R..Scarborough <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Jean.Hu <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.C..Dannemiller <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.D..Craig <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.F..Nemelka <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.J..Mitcham <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.S..Simon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_John.Willard.Marriott.Jr. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Lawrence.M..Johnson <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Leo.S..Ullman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Leonard.S..Simon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Lewis.Mark.Kling <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Louis.DiNardo.BA <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Mark.A..DiBlasi <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.D..Rumbolz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.J..Covey <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.P..Krasny <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.S..McGavick <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Michael.Sam.Gilliland <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Peter.H..Coors <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Peter.J..Gaffney <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Philip.R..Hagerman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_R..Halsey.Wise <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ exec_fullname_R..Michael.Andrews..Jr. <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Ralph.A..Hill <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Randall.D..Stilley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Reto.Braun <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.A..Abdoo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.A..Manoogian <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ exec_fullname_Richard.F..Teerlink <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.M..Beyer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Richard.M..Frank <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Robert.E..Grant <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Ronald.Kochman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Sheila.Lirio.Marcelo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Sherilyn.S..D..McCoy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Stephen.G..Shank <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Terry.L..Hall.CPA.PFS <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ exec_fullname_Thomas.P..Joyce.Jr. <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Timothy.R..Morse <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Victor.Lynn.Lund <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_Walter.C..Barber <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_William.F..Weissman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_William.K..Coors <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ exec_fullname_William.W..Moreton <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X3 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X5 <dbl> 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ departure_code_X6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code_X7 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ ceo_dismissal <fct> not dismissed, not dismissed, …
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("classification") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_rec) %>%
add_model(xgboost_spec)
doParallel::registerDoParallel()
set.seed(65743)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5,
control = control_grid(save_pred = TRUE))
## Warning: package 'xgboost' was built under R version 4.3.3
collect_metrics(xgboost_tune)
## # A tibble: 15 × 12
## trees min_n tree_depth learn_rate loss_reduction sample_size .metric
## <int> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 725 3 5 0.0276 0.00000000248 0.841 accuracy
## 2 725 3 5 0.0276 0.00000000248 0.841 brier_class
## 3 725 3 5 0.0276 0.00000000248 0.841 roc_auc
## 4 1354 17 13 0.00495 0.00402 0.424 accuracy
## 5 1354 17 13 0.00495 0.00402 0.424 brier_class
## 6 1354 17 13 0.00495 0.00402 0.424 roc_auc
## 7 941 24 9 0.266 0.000675 0.637 accuracy
## 8 941 24 9 0.266 0.000675 0.637 brier_class
## 9 941 24 9 0.266 0.000675 0.637 roc_auc
## 10 85 27 1 0.00212 0.277 0.187 accuracy
## 11 85 27 1 0.00212 0.277 0.187 brier_class
## 12 85 27 1 0.00212 0.277 0.187 roc_auc
## 13 1712 38 12 0.0446 0.0000000705 0.718 accuracy
## 14 1712 38 12 0.0446 0.0000000705 0.718 brier_class
## 15 1712 38 12 0.0446 0.0000000705 0.718 roc_auc
## # ℹ 5 more variables: .estimator <chr>, mean <dbl>, n <int>, std_err <dbl>,
## # .config <chr>
collect_predictions(xgboost_tune) %>%
group_by(id) %>%
roc_curve(ceo_dismissal, .pred_dismissed) %>%
autoplot()
xgboost_last <- xgboost_workflow %>%
finalize_workflow(select_best(xgboost_tune, metric = "accuracy")) %>%
last_fit(data_split)
collect_metrics(xgboost_last)
## # A tibble: 3 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy binary 1 Preprocessor1_Model1
## 2 roc_auc binary 1 Preprocessor1_Model1
## 3 brier_class binary 0.00294 Preprocessor1_Model1
collect_predictions(xgboost_last) %>%
yardstick::conf_mat(ceo_dismissal, .pred_class)
## Truth
## Prediction dismissed not dismissed
## dismissed 4 0
## not dismissed 0 22
library(vip)
xgboost_last %>%
workflows::extract_fit_engine() %>%
vip()