departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
skimr::skim(departures)
Name | departures |
Number of rows | 9423 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 10 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
coname | 0 | 1.00 | 2 | 30 | 0 | 3860 | 0 |
exec_fullname | 0 | 1.00 | 5 | 790 | 0 | 8701 | 0 |
interim_coceo | 9105 | 0.03 | 6 | 7 | 0 | 6 | 0 |
still_there | 7311 | 0.22 | 3 | 10 | 0 | 77 | 0 |
notes | 1644 | 0.83 | 5 | 3117 | 0 | 7755 | 0 |
sources | 1475 | 0.84 | 18 | 1843 | 0 | 7915 | 0 |
eight_ks | 4499 | 0.52 | 69 | 3884 | 0 | 4914 | 0 |
_merge | 0 | 1.00 | 11 | 11 | 0 | 1 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dismissal_dataset_id | 0 | 1.00 | 5684.10 | 25005.46 | 1 | 2305.5 | 4593 | 6812.5 | 559044 | ▇▁▁▁▁ |
gvkey | 0 | 1.00 | 40132.48 | 53921.34 | 1004 | 7337.0 | 14385 | 60900.5 | 328795 | ▇▁▁▁▁ |
fyear | 0 | 1.00 | 2007.74 | 8.19 | 1987 | 2000.0 | 2008 | 2016.0 | 2020 | ▁▆▅▅▇ |
co_per_rol | 0 | 1.00 | 25580.22 | 18202.38 | -1 | 8555.5 | 22980 | 39275.5 | 64602 | ▇▆▅▃▃ |
departure_code | 1667 | 0.82 | 5.20 | 1.53 | 1 | 5.0 | 5 | 7.0 | 9 | ▁▃▇▅▁ |
ceo_dismissal | 1813 | 0.81 | 0.20 | 0.40 | 0 | 0.0 | 0 | 0.0 | 1 | ▇▁▁▁▂ |
tenure_no_ceodb | 0 | 1.00 | 1.03 | 0.17 | 0 | 1.0 | 1 | 1.0 | 3 | ▁▇▁▁▁ |
max_tenure_ceodb | 0 | 1.00 | 1.05 | 0.24 | 1 | 1.0 | 1 | 1.0 | 4 | ▇▁▁▁▁ |
fyear_gone | 1802 | 0.81 | 2006.64 | 13.63 | 1980 | 2000.0 | 2007 | 2013.0 | 2997 | ▇▁▁▁▁ |
cik | 245 | 0.97 | 741469.17 | 486551.43 | 1750 | 106413.0 | 857323 | 1050375.8 | 1808065 | ▆▁▇▂▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
leftofc | 1802 | 0.81 | 1981-01-01 | 2998-04-27 | 2006-12-31 | 3627 |
# Select relevant columns
factors_vec <- departures %>%
select(departure_code, co_per_rol, fyear, tenure_no_ceodb, max_tenure_ceodb, fyear_gone) %>%
names()
library(dplyr)
library(lubridate)
data_clean <- departures %>%
select(-c(interim_coceo, still_there, eight_ks, gvkey, co_per_rol, cik, fyear, '_merge', notes, sources)) %>%
filter(fyear_gone != "2997") %>%
filter(!is.na(ceo_dismissal)) %>%
mutate(
departure_code = factor(departure_code),
tenure_no_ceodb = factor(tenure_no_ceodb),
max_tenure_ceodb = factor(max_tenure_ceodb),
ceo_dismissal = factor(ceo_dismissal),
leftofc = as.Date(leftofc), # Ensure leftofc is a Date
year = year(leftofc), # Create year directly
doy = yday(leftofc), # Create day of the year directly
month = month(leftofc) # Create month directly
) %>%
select(-leftofc) %>% # Remove leftofc as it's no longer needed
# Drop zero-variance variables
select(-c(tenure_no_ceodb, max_tenure_ceodb)) %>%
# Ensure ceo_dismissal is character, then recode
mutate(ceo_dismissal = if_else(ceo_dismissal == "1", "dismissed",
if_else(ceo_dismissal == "0", "not dismissed",
as.character(ceo_dismissal)))) # Handle NA implicitly
data_clean <- data_clean %>% sample_n(100)
skimr::skim(departures)
Name | departures |
Number of rows | 9423 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 10 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
coname | 0 | 1.00 | 2 | 30 | 0 | 3860 | 0 |
exec_fullname | 0 | 1.00 | 5 | 790 | 0 | 8701 | 0 |
interim_coceo | 9105 | 0.03 | 6 | 7 | 0 | 6 | 0 |
still_there | 7311 | 0.22 | 3 | 10 | 0 | 77 | 0 |
notes | 1644 | 0.83 | 5 | 3117 | 0 | 7755 | 0 |
sources | 1475 | 0.84 | 18 | 1843 | 0 | 7915 | 0 |
eight_ks | 4499 | 0.52 | 69 | 3884 | 0 | 4914 | 0 |
_merge | 0 | 1.00 | 11 | 11 | 0 | 1 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dismissal_dataset_id | 0 | 1.00 | 5684.10 | 25005.46 | 1 | 2305.5 | 4593 | 6812.5 | 559044 | ▇▁▁▁▁ |
gvkey | 0 | 1.00 | 40132.48 | 53921.34 | 1004 | 7337.0 | 14385 | 60900.5 | 328795 | ▇▁▁▁▁ |
fyear | 0 | 1.00 | 2007.74 | 8.19 | 1987 | 2000.0 | 2008 | 2016.0 | 2020 | ▁▆▅▅▇ |
co_per_rol | 0 | 1.00 | 25580.22 | 18202.38 | -1 | 8555.5 | 22980 | 39275.5 | 64602 | ▇▆▅▃▃ |
departure_code | 1667 | 0.82 | 5.20 | 1.53 | 1 | 5.0 | 5 | 7.0 | 9 | ▁▃▇▅▁ |
ceo_dismissal | 1813 | 0.81 | 0.20 | 0.40 | 0 | 0.0 | 0 | 0.0 | 1 | ▇▁▁▁▂ |
tenure_no_ceodb | 0 | 1.00 | 1.03 | 0.17 | 0 | 1.0 | 1 | 1.0 | 3 | ▁▇▁▁▁ |
max_tenure_ceodb | 0 | 1.00 | 1.05 | 0.24 | 1 | 1.0 | 1 | 1.0 | 4 | ▇▁▁▁▁ |
fyear_gone | 1802 | 0.81 | 2006.64 | 13.63 | 1980 | 2000.0 | 2007 | 2013.0 | 2997 | ▇▁▁▁▁ |
cik | 245 | 0.97 | 741469.17 | 486551.43 | 1750 | 106413.0 | 857323 | 1050375.8 | 1808065 | ▆▁▇▂▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
leftofc | 1802 | 0.81 | 1981-01-01 | 2998-04-27 | 2006-12-31 | 3627 |
data_clean %>% count(ceo_dismissal)
## # A tibble: 2 × 2
## ceo_dismissal n
## <chr> <int>
## 1 dismissed 25
## 2 not dismissed 75
data_clean %>%
ggplot(aes(ceo_dismissal)) +
geom_bar()
ceo_dismissal vs. max tenure
#data_clean %>%
#ggplot(aes(max_tenure_ceodb)) +
#geom_boxplot()
# Doesn't represent the data well in my case
correlation plot
# Step 1: binarize
data_binarized <- data_clean %>%
select(-exec_fullname, -coname) %>%
binarize()
data_binarized %>% glimpse
## Rows: 100
## Columns: 29
## $ `dismissal_dataset_id__-Inf_1616.75` <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, …
## $ dismissal_dataset_id__1616.75_3793.5 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, …
## $ dismissal_dataset_id__3793.5_5810.75 <dbl> 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__5810.75_Inf <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ departure_code__1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code__2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ departure_code__3 <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, …
## $ departure_code__4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code__5 <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, …
## $ departure_code__6 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ departure_code__7 <dbl> 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, …
## $ ceo_dismissal__dismissed <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, …
## $ ceo_dismissal__not_dismissed <dbl> 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, …
## $ `fyear_gone__-Inf_1999` <dbl> 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, …
## $ fyear_gone__1999_2004 <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, …
## $ fyear_gone__2004_2010 <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ fyear_gone__2010_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `year__-Inf_1999` <dbl> 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, …
## $ year__1999_2004 <dbl> 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, …
## $ year__2004_2010 <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, …
## $ year__2010_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `doy__-Inf_94` <dbl> 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ doy__94_167.5 <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ doy__167.5_274.5 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, …
## $ doy__274.5_Inf <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, …
## $ `month__-Inf_4` <dbl> 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ month__4_6 <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, …
## $ month__6_9.25 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ month__9.25_Inf <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, …
# Step 2: correlation
data_correlation <- data_binarized %>%
correlate(ceo_dismissal__dismissed)
data_correlation
## # A tibble: 29 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 ceo_dismissal dismissed 1
## 2 ceo_dismissal not_dismissed -1
## 3 departure_code 3 0.947
## 4 departure_code 5 -0.522
## 5 departure_code 7 -0.307
## 6 dismissal_dataset_id 5810.75_Inf -0.173
## 7 month 4_6 -0.173
## 8 dismissal_dataset_id -Inf_1616.75 0.147
## 9 dismissal_dataset_id 3793.5_5810.75 0.147
## 10 doy 274.5_Inf 0.147
## # ℹ 19 more rows
# Step 3: plot
data_correlation %>%
correlationfunnel::plot_correlation_funnel()
There is a moderate correlation between departure codes and ceo dismissals so some departures codes are more indicative of ceo dismissals than others.
library(dplyr)
library(rsample)
set.seed(1234)
#data_clean <- data_clean %>% sample_n(100)
data_split <- initial_split(data_clean, strata = ceo_dismissal)
data_train <- training(data_split)
data_test <- testing(data_split)
data_cv <- rsample::vfold_cv(data_train, strata = ceo_dismissal)
data_cv
## # 10-fold cross-validation using stratification
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [66/8]> Fold01
## 2 <split [66/8]> Fold02
## 3 <split [66/8]> Fold03
## 4 <split [66/8]> Fold04
## 5 <split [66/8]> Fold05
## 6 <split [66/8]> Fold06
## 7 <split [67/7]> Fold07
## 8 <split [67/7]> Fold08
## 9 <split [68/6]> Fold09
## 10 <split [68/6]> Fold10
data_train <- data_train %>%
mutate(unique_id = paste(dismissal_dataset_id, exec_fullname, year, sep = "_")) %>%
group_by(unique_id) %>%
summarize(across(everything(), first))
library(themis)
library(recipes)
# Remove unique_id from data_train before creating the recipe
data_train_cleaned <- data_train %>%
select(-unique_id, -departure_code)
# Create the recipe using the cleaned dataset
xgboost_rec <- recipe(ceo_dismissal ~ ., data = data_train_cleaned) %>%
step_dummy(all_nominal_predictors(), -all_outcomes()) %>%
step_normalize(all_numeric_predictors()) %>%
step_smote(ceo_dismissal)
# Prepare and check the recipe
xgboost_rec_prep <- xgboost_rec %>% prep()
data_prepped <- xgboost_rec_prep %>% juice() %>% glimpse()
## Rows: 112
## Columns: 152
## $ dismissal_dataset_id <dbl> -1.0912647, -1.0202653, -0.9921104…
## $ fyear_gone <dbl> -0.67737819, 1.11776731, 0.0130623…
## $ year <dbl> -0.67368413, 1.12156277, 0.0167954…
## $ doy <dbl> 1.06546612, 1.25201408, -1.6861162…
## $ month <dbl> 0.9880798, 1.2805514, -1.6441648, …
## $ coname_ADVENT.SOFTWARE.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_AGILENT.TECHNOLOGIES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_ALLEGHENY.ENERGY.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_ALLEGHENY.TECHNOLOGIES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_AMBAC.FINANCIAL.GROUP.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_APRIA.HEALTHCARE.GROUP.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_ARMCO.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_AT.T.WIRELESS.SERVICES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_BANKBOSTON.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_BARRETT.RESOURCES.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_BAXTER.INTERNATIONAL.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_BLANCH.E.W.HOLDINGS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_BMC.STOCK.HOLDINGS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_BOWNE...CO.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CAMBREX.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CATALINA.MARKETING.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CATALYST.HEALTH.SOLUTIONS <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CATO.CORP..CL.A <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CBEYOND.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CERADYNE.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CIT.GROUP.INC.OLD <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CORUS.BANKSHARES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CPI.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_CURATIVE.HEALTH.SERVICES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_DSP.COMMUNICATIONS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_DUN...BRADSTREET.CORP <dbl> 8.4860776, -0.1162476, -0.1162476,…
## $ coname_EPRESENCE.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_EQUITY.OFFICE.PROPERTIES.TR <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_FEDERAL.HOME.LOAN.MORTG.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_FERRO.CORP <dbl> -0.1162476, 8.4860776, -0.1162476,…
## $ coname_FLOWERS.FOODS.INC <dbl> -0.1162476, -0.1162476, 8.4860776,…
## $ coname_FULLER..H..B...CO <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_GENERAL.DYNAMICS.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_GLOBAL.INDUSTRIAL.TECH.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_GREAT.ELM.CAPITAL.GROUP.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_GREEN.PLAINS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_HEXION.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_HUTCHINSON.TECHNOLOGY.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_ICONIX.BRAND.GROUP.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_IKON.OFFICE.SOLUTIONS <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_INFORMATION.RESOURCES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_INTERMET.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_INTERSTATE.BAKERIES.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_INVACARE.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_JARDEN.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_KEMET.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_LEGATO.SYSTEMS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_LUBYS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_MANOR.CARE.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_MARSHALL.INDUSTRIES <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_MAYTAG.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_MBNA.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_NASH.FINCH.CO <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_NATURES.SUNSHINE.PRODS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_NISOURCE.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_OFFSHORE.PIPELINES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_ORBITAL.ATK.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_OVERSEAS.SHIPHOLDING.GROUP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_QUANTUM.HEALTH.RESOURCES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_RH <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_ROBBINS...MYERS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_ROCKWELL.AUTOMATION <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_SHERWIN.WILLIAMS.CO <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_STATE.STREET.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_STRATEX.NETWORKS.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_SYSCO.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_TNP.ENTERPRISES.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_TREDEGAR.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_TRIBUNE.MEDIA.CO <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_WELLMAN.INC <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_WHITNEY.HOLDING.CORP <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_WILLAMETTE.INDUSTRIES <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ coname_XL.GROUP.LTD <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Albert.R..Gamper..Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Amos.R..McMullian <dbl> -0.1162476, -0.1162476, 8.4860776,…
## $ exec_fullname_Barry.A..Ellsworth <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Barry.J..C..Parker <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Brian.Michael.O.Hara <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Bruce.L..Hammonds <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Charles.A..Sullivan <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Charles.D..Kissner <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Charles.H..Cotros <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Charles.T..Brumback <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Christopher.M..Connor <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Craig.O..Morrison <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Daniel.P..Howells <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Daniel.W..Duval <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_David.A..Spina <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_David.B..Wright <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_David.K..Laniak <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_David.T..Blair <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_David.W..Thompson <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_David.William.Wallis <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Don.H..Davis.Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Donald.J..Listwin <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Douglas.H..Stickney <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Dwight.R..Spurlock <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Edgar.W..Blanch.Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Edward.W..Barnholt <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Frank.C..Wade <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Gary.G..Friedman <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_George.W..Off <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Gordon.S..Marshall <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Harold.B..Finch.Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Ira.Stepanian <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_James.A..Mack <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_James.E..Lillie <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_James.F..Geiger <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_James.F..Kirsch <dbl> -0.1162476, 8.4860776, -0.1162476,…
## $ exec_fullname_James.G..Andress <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Jeffrey.W..Green <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Jeremy.M..Jones <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Joel.P..Moskowitz <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_John.C..Hope..III <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_John.D..Gottwald <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_John.D..Zeglis <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_John.Doddridge <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_John.E..Stuart <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_John.N..Haugh <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_John.Vakoutis <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Klaus.Bergman <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Leland.C..Brendsel <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Leonard.A..Hadley <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Michele.Volpi <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Morton.P..Hyman <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Nathan.Hod <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Paul.A..Ormond <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Per.Olof.Loof <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Peter.C..Alexander <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Peter.M..Caswell <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Rawles.Fulgham <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Renato.Cataldo.Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Richard.H..Koontz <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Robert.C..Skaggs.Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Robert.J..Glickman <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Robert.L..Purdum <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Samuel.Zell.J.D. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Thomas.A..Corcoran <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Thomas.M..Duff <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Vernon.R..Loucks.Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_Volney.Taylor <dbl> 8.4860776, -0.1162476, -0.1162476,…
## $ exec_fullname_Wayland.H..Cato.Jr. <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_William.A..Anders <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_William.J..Barrett <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_William.P..Ferry <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ exec_fullname_William.Swindells <dbl> -0.1162476, -0.1162476, -0.1162476…
## $ ceo_dismissal <fct> dismissed, dismissed, not dismisse…
library(usemodels)
usemodels::use_xgboost(ceo_dismissal ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = ceo_dismissal ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(6993)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
library(workflows)
library(parsnip)
xgboost_spec <-
boost_tree(trees = tune(), tree_depth = tune()) %>%
#loss_reduction = tune(), sample_size = tune()) %>%
set_mode("classification") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_rec) %>%
add_model(xgboost_spec)
library(tune)
library(ggplot2)
library(dials)
tree_grid <- grid_regular(trees(),
tree_depth(),
levels = 5)
doParallel::registerDoParallel()
set.seed(17375)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5,
control = control_grid(save_pred = TRUE))
## Warning: package 'xgboost' was built under R version 4.3.3
library(yardstick)
collect_metrics(xgboost_tune)
## # A tibble: 15 × 8
## trees tree_depth .metric .estimator mean n std_err .config
## <int> <int> <chr> <chr> <dbl> <int> <dbl> <chr>
## 1 277 3 accuracy binary 0.576 10 0.0773 Preprocessor1_Mo…
## 2 277 3 brier_class binary 0.309 10 0.0516 Preprocessor1_Mo…
## 3 277 3 roc_auc binary 0.647 10 0.0770 Preprocessor1_Mo…
## 4 427 4 accuracy binary 0.590 10 0.0725 Preprocessor1_Mo…
## 5 427 4 brier_class binary 0.322 10 0.0522 Preprocessor1_Mo…
## 6 427 4 roc_auc binary 0.638 10 0.0772 Preprocessor1_Mo…
## 7 1816 7 accuracy binary 0.611 10 0.0522 Preprocessor1_Mo…
## 8 1816 7 brier_class binary 0.327 10 0.0472 Preprocessor1_Mo…
## 9 1816 7 roc_auc binary 0.59 10 0.0993 Preprocessor1_Mo…
## 10 1483 11 accuracy binary 0.611 10 0.0522 Preprocessor1_Mo…
## 11 1483 11 brier_class binary 0.310 10 0.0443 Preprocessor1_Mo…
## 12 1483 11 roc_auc binary 0.653 10 0.0829 Preprocessor1_Mo…
## 13 910 15 accuracy binary 0.611 10 0.0522 Preprocessor1_Mo…
## 14 910 15 brier_class binary 0.306 10 0.0428 Preprocessor1_Mo…
## 15 910 15 roc_auc binary 0.653 10 0.0829 Preprocessor1_Mo…
collect_predictions(xgboost_tune) %>%
group_by(id) %>%
roc_curve(ceo_dismissal, .pred_dismissed) %>%
autoplot()
xgboost_last <- xgboost_workflow %>%
finalize_workflow(select_best(xgboost_tune, metric = "accuracy")) %>%
last_fit(data_split)
collect_metrics(xgboost_last)
## # A tibble: 3 × 4
## .metric .estimator .estimate .config
## <chr> <chr> <dbl> <chr>
## 1 accuracy binary 0.654 Preprocessor1_Model1
## 2 roc_auc binary 0.421 Preprocessor1_Model1
## 3 brier_class binary 0.303 Preprocessor1_Model1
collect_predictions(xgboost_last) %>%
yardstick::conf_mat(ceo_dismissal, .pred_class)
## Truth
## Prediction dismissed not dismissed
## dismissed 2 4
## not dismissed 5 15
library(vip)
xgboost_last %>%
workflows::extract_fit_engine() %>%
vip()
The previous model had accuracy of 0.56 and AUC of 0.395