departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
skimr::skim(departures)
Name | departures |
Number of rows | 9423 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 10 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
coname | 0 | 1.00 | 2 | 30 | 0 | 3860 | 0 |
exec_fullname | 0 | 1.00 | 5 | 790 | 0 | 8701 | 0 |
interim_coceo | 9105 | 0.03 | 6 | 7 | 0 | 6 | 0 |
still_there | 7311 | 0.22 | 3 | 10 | 0 | 77 | 0 |
notes | 1644 | 0.83 | 5 | 3117 | 0 | 7755 | 0 |
sources | 1475 | 0.84 | 18 | 1843 | 0 | 7915 | 0 |
eight_ks | 4499 | 0.52 | 69 | 3884 | 0 | 4914 | 0 |
_merge | 0 | 1.00 | 11 | 11 | 0 | 1 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dismissal_dataset_id | 0 | 1.00 | 5684.10 | 25005.46 | 1 | 2305.5 | 4593 | 6812.5 | 559044 | ▇▁▁▁▁ |
gvkey | 0 | 1.00 | 40132.48 | 53921.34 | 1004 | 7337.0 | 14385 | 60900.5 | 328795 | ▇▁▁▁▁ |
fyear | 0 | 1.00 | 2007.74 | 8.19 | 1987 | 2000.0 | 2008 | 2016.0 | 2020 | ▁▆▅▅▇ |
co_per_rol | 0 | 1.00 | 25580.22 | 18202.38 | -1 | 8555.5 | 22980 | 39275.5 | 64602 | ▇▆▅▃▃ |
departure_code | 1667 | 0.82 | 5.20 | 1.53 | 1 | 5.0 | 5 | 7.0 | 9 | ▁▃▇▅▁ |
ceo_dismissal | 1813 | 0.81 | 0.20 | 0.40 | 0 | 0.0 | 0 | 0.0 | 1 | ▇▁▁▁▂ |
tenure_no_ceodb | 0 | 1.00 | 1.03 | 0.17 | 0 | 1.0 | 1 | 1.0 | 3 | ▁▇▁▁▁ |
max_tenure_ceodb | 0 | 1.00 | 1.05 | 0.24 | 1 | 1.0 | 1 | 1.0 | 4 | ▇▁▁▁▁ |
fyear_gone | 1802 | 0.81 | 2006.64 | 13.63 | 1980 | 2000.0 | 2007 | 2013.0 | 2997 | ▇▁▁▁▁ |
cik | 245 | 0.97 | 741469.17 | 486551.43 | 1750 | 106413.0 | 857323 | 1050375.8 | 1808065 | ▆▁▇▂▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
leftofc | 1802 | 0.81 | 1981-01-01 | 2998-04-27 | 2006-12-31 | 3627 |
# Select relevant columns
factors_vec <- departures %>%
select(departure_code, co_per_rol, fyear, tenure_no_ceodb, max_tenure_ceodb, fyear_gone) %>%
names()
# Clean the data
data_clean <- departures %>%
select(-c(interim_coceo, still_there, eight_ks, gvkey, co_per_rol, cik, fyear, '_merge', notes, sources, departure_code)) %>%
filter(fyear_gone != "2997") %>%
filter(!is.na(ceo_dismissal)) %>%
mutate(
tenure_no_ceodb = factor(tenure_no_ceodb),
max_tenure_ceodb = factor(max_tenure_ceodb),
ceo_dismissal = factor(ceo_dismissal),
leftofc = as.Date(leftofc),
date_column = as.Date(leftofc)
)
# Create year, doy, and month
data_clean <- data_clean %>%
mutate(
year = lubridate::year(leftofc),
doy = lubridate::yday(leftofc),
month = lubridate::month(leftofc)
) %>%
select(-leftofc, -date_column) # Remove non-numeric columns
skimr::skim(departures)
Name | departures |
Number of rows | 9423 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 10 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
coname | 0 | 1.00 | 2 | 30 | 0 | 3860 | 0 |
exec_fullname | 0 | 1.00 | 5 | 790 | 0 | 8701 | 0 |
interim_coceo | 9105 | 0.03 | 6 | 7 | 0 | 6 | 0 |
still_there | 7311 | 0.22 | 3 | 10 | 0 | 77 | 0 |
notes | 1644 | 0.83 | 5 | 3117 | 0 | 7755 | 0 |
sources | 1475 | 0.84 | 18 | 1843 | 0 | 7915 | 0 |
eight_ks | 4499 | 0.52 | 69 | 3884 | 0 | 4914 | 0 |
_merge | 0 | 1.00 | 11 | 11 | 0 | 1 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dismissal_dataset_id | 0 | 1.00 | 5684.10 | 25005.46 | 1 | 2305.5 | 4593 | 6812.5 | 559044 | ▇▁▁▁▁ |
gvkey | 0 | 1.00 | 40132.48 | 53921.34 | 1004 | 7337.0 | 14385 | 60900.5 | 328795 | ▇▁▁▁▁ |
fyear | 0 | 1.00 | 2007.74 | 8.19 | 1987 | 2000.0 | 2008 | 2016.0 | 2020 | ▁▆▅▅▇ |
co_per_rol | 0 | 1.00 | 25580.22 | 18202.38 | -1 | 8555.5 | 22980 | 39275.5 | 64602 | ▇▆▅▃▃ |
departure_code | 1667 | 0.82 | 5.20 | 1.53 | 1 | 5.0 | 5 | 7.0 | 9 | ▁▃▇▅▁ |
ceo_dismissal | 1813 | 0.81 | 0.20 | 0.40 | 0 | 0.0 | 0 | 0.0 | 1 | ▇▁▁▁▂ |
tenure_no_ceodb | 0 | 1.00 | 1.03 | 0.17 | 0 | 1.0 | 1 | 1.0 | 3 | ▁▇▁▁▁ |
max_tenure_ceodb | 0 | 1.00 | 1.05 | 0.24 | 1 | 1.0 | 1 | 1.0 | 4 | ▇▁▁▁▁ |
fyear_gone | 1802 | 0.81 | 2006.64 | 13.63 | 1980 | 2000.0 | 2007 | 2013.0 | 2997 | ▇▁▁▁▁ |
cik | 245 | 0.97 | 741469.17 | 486551.43 | 1750 | 106413.0 | 857323 | 1050375.8 | 1808065 | ▆▁▇▂▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
leftofc | 1802 | 0.81 | 1981-01-01 | 2998-04-27 | 2006-12-31 | 3627 |
data_clean %>% count(ceo_dismissal)
## # A tibble: 2 × 2
## ceo_dismissal n
## <fct> <int>
## 1 0 5993
## 2 1 1484
data_clean %>%
ggplot(aes(ceo_dismissal)) +
geom_bar()
ceo_dismissal vs. max tenure
#data_clean %>%
#ggplot(aes(max_tenure_ceodb)) +
#geom_boxplot()
# Doesn't represent the data well in my case
correlation plot
# Step 1: binarize
data_binarized <- data_clean %>%
select(-exec_fullname, -coname) %>%
binarize()
data_binarized %>% glimpse
## Rows: 7,477
## Columns: 28
## $ `dismissal_dataset_id__-Inf_2176` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dismissal_dataset_id__2176_4326 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__4326_6580 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__6580_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ceo_dismissal__0 <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
## $ ceo_dismissal__1 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ tenure_no_ceodb__1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ tenure_no_ceodb__2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `tenure_no_ceodb__-OTHER` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ max_tenure_ceodb__1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ max_tenure_ceodb__2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `max_tenure_ceodb__-OTHER` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `fyear_gone__-Inf_2000` <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ fyear_gone__2000_2006 <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, …
## $ fyear_gone__2006_2013 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, …
## $ fyear_gone__2013_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `year__-Inf_2000` <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, …
## $ year__2000_2006 <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, …
## $ year__2006_2013 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ year__2013_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `doy__-Inf_87` <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ doy__87_180 <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, …
## $ doy__180_274 <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, …
## $ doy__274_Inf <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, …
## $ `month__-Inf_3` <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ month__3_6 <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, …
## $ month__6_10 <dbl> 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, …
## $ month__10_Inf <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, …
# Step 2: correlation
data_correlation <- data_binarized %>%
select(-`tenure_no_ceodb__-OTHER`, -`dismissal_dataset_id__-Inf_2176`, -`fyear_gone__-Inf_2000`, -`dismissal_dataset_id__-Inf_2176`, -`max_tenure_ceodb__-OTHER`, -`tenure_no_ceodb__-OTHER`) %>%
correlate(ceo_dismissal__1)
data_correlation
## # A tibble: 24 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 ceo_dismissal 0 -1
## 2 ceo_dismissal 1 1
## 3 year -Inf_2000 -0.0616
## 4 max_tenure_ceodb 1 0.0582
## 5 max_tenure_ceodb 2 -0.0539
## 6 year 2000_2006 0.0316
## 7 fyear_gone 2000_2006 0.0288
## 8 month 6_10 0.0274
## 9 month 3_6 -0.0264
## 10 tenure_no_ceodb 1 0.0260
## # ℹ 14 more rows
# Step 3: plot
data_correlation %>%
correlationfunnel::plot_correlation_funnel()
There is a moderate correlation between departure codes and ceo dismissals so some departures codes are more indicative of ceo dismissals than others.
library(dplyr)
library(rsample)
set.seed(1234)
data_clean <- data_clean %>% sample_n(100)
data_split <- initial_split(data_clean, strata = ceo_dismissal)
data_train <- training(data_split)
data_test <- testing(data_split)
data_cv <- rsample::vfold_cv(data_train, strata = ceo_dismissal)
data_cv
## # 10-fold cross-validation using stratification
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [66/9]> Fold01
## 2 <split [66/9]> Fold02
## 3 <split [67/8]> Fold03
## 4 <split [68/7]> Fold04
## 5 <split [68/7]> Fold05
## 6 <split [68/7]> Fold06
## 7 <split [68/7]> Fold07
## 8 <split [68/7]> Fold08
## 9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
# Create a numeric version of ceo_dismissal
#data_train$ceo_dismissal_numeric <- as.numeric(as.character(data_train$departure_code))
# Create a factor version of ceo_dismissal
#data_train$ceo_dismissal_factor <- factor(data_train$ceo_dismissal, levels = c(0, 1), labels = c("Not Dismissed", "Dismissed"))
# Check the structure to confirm both variables are present
str(data_train)
## tibble [75 × 10] (S3: tbl_df/tbl/data.frame)
## $ dismissal_dataset_id: num [1:75] 1994 3192 6735 4562 7051 ...
## $ coname : chr [1:75] "KELLY SERVICES INC -CL A" "EDISON INTERNATIONAL" "AZZURRA HOLDING CORP" "POLARIS INDUSTRIES INC" ...
## $ exec_fullname : chr [1:75] "Terence E. Adderley" "John E. Bryson" "George P. Roberts" "W. Hall Wendel Jr." ...
## $ ceo_dismissal : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ tenure_no_ceodb : Factor w/ 3 levels "1","2","3": 1 1 2 1 2 1 1 1 1 1 ...
## $ max_tenure_ceodb : Factor w/ 4 levels "1","2","3","4": 1 1 2 1 2 1 1 1 1 1 ...
## $ fyear_gone : num [1:75] 2006 2008 2003 1999 2015 ...
## $ year : num [1:75] 2006 2008 2003 1999 2015 ...
## $ doy : num [1:75] 40 213 245 141 1 288 91 152 203 151 ...
## $ month : num [1:75] 2 7 9 5 1 10 3 6 7 5 ...
library(dplyr)
library(recipes)
# Create the recipe for modeling
xgboost_rec <- recipe(ceo_dismissal ~ ., data = data_clean) %>%
update_role(dismissal_dataset_id, new_role = "ID") %>%
step_dummy(all_nominal_predictors()) %>%
step_zv(all_predictors()) %>%
step_normalize(all_numeric_predictors())
# Prep the recipe
xgboost_rec_prep <- xgboost_rec %>% prep()
xgboost_rec_prep %>% juice() %>% glimpse()
## Rows: 100
## Columns: 206
## $ dismissal_dataset_id <dbl> 4116, 8749, 44…
## $ fyear_gone <dbl> 1.4750294, 0.8…
## $ year <dbl> 1.4801796, 0.8…
## $ doy <dbl> 0.37704630, 0.…
## $ month <dbl> 0.37584272, 0.…
## $ ceo_dismissal <fct> 1, 1, 0, 0, 0,…
## $ coname_AKAMAI.TECHNOLOGIES.INC <dbl> -0.1, -0.1, -0…
## $ coname_ALERIS.CORP <dbl> -0.1, -0.1, -0…
## $ coname_ALLIED.WASTE.INDUSTRIES.INC <dbl> -0.1, -0.1, -0…
## $ coname_AMEREN.CORP <dbl> -0.1, -0.1, -0…
## $ coname_ARROW.ELECTRONICS.INC <dbl> -0.1, -0.1, -0…
## $ coname_ATLAS.AIR.WORLDWIDE.HLDG.INC <dbl> -0.1, -0.1, -0…
## $ coname_AZZURRA.HOLDING.CORP <dbl> -0.1, -0.1, -0…
## $ coname_BALLY.ENTERTAINMENT.CORP <dbl> -0.1, -0.1, -0…
## $ coname_BANDAG.INC <dbl> -0.1, -0.1, -0…
## $ coname_BELL.INDUSTRIES.INC <dbl> -0.1, -0.1, -0…
## $ coname_BIOMATRIX.INC <dbl> -0.1, -0.1, -0…
## $ coname_BIRMINGHAM.STEEL.CORP <dbl> -0.1, -0.1, -0…
## $ coname_BONANZA.CREEK.ENERGY.INC <dbl> -0.1, -0.1, -0…
## $ coname_BORDERS.GROUP.INC <dbl> -0.1, -0.1, -0…
## $ coname_BORGWARNER.INC <dbl> -0.1, -0.1, -0…
## $ coname_CAMBRIDGE.TECHNOLOGY.PARTNER <dbl> -0.1, -0.1, -0…
## $ coname_CARBO.CERAMICS.INC <dbl> -0.1, -0.1, -0…
## $ coname_CAREFUSION.CORP <dbl> -0.1, -0.1, -0…
## $ coname_CAREINSITE.INC <dbl> -0.1, -0.1, -0…
## $ coname_CBS.INC <dbl> -0.1, -0.1, -0…
## $ coname_CENTURY.ALUMINUM.CO <dbl> -0.1, -0.1, -0…
## $ coname_CILCORP.INC <dbl> -0.1, -0.1, -0…
## $ coname_CONCERTO.SOFTWARE.INC <dbl> -0.1, -0.1, -0…
## $ coname_CONECTIV.INC <dbl> -0.1, -0.1, -0…
## $ coname_CORECIVIC.INC <dbl> -0.1, -0.1, -0…
## $ coname_COTY.INC <dbl> -0.1, -0.1, -0…
## $ coname_COVENTRY.HEALTH.CARE.INC <dbl> -0.1, -0.1, -0…
## $ coname_CREDENCE.SYSTEMS.CORP <dbl> -0.1, -0.1, -0…
## $ coname_DELTA.WOODSIDE.INDUSTRIES <dbl> -0.1, -0.1, -0…
## $ coname_DENTSPLY.SIRONA.INC <dbl> -0.1, -0.1, 9.…
## $ coname_DIGITAL.INSIGHT.CORP <dbl> -0.1, -0.1, -0…
## $ coname_DIRECTV <dbl> -0.1, -0.1, -0…
## $ coname_DOWNEY.FINANCIAL.CORP <dbl> -0.1, -0.1, -0…
## $ coname_EDISON.INTERNATIONAL <dbl> -0.1, -0.1, -0…
## $ coname_ENERGEN.CORP <dbl> -0.1, -0.1, -0…
## $ coname_EOG.RESOURCES.INC <dbl> -0.1, -0.1, -0…
## $ coname_EPICOR.SOFTWARE.CORP..OLD <dbl> -0.1, -0.1, -0…
## $ coname_FIRST.COMMERCIAL.CORP <dbl> -0.1, -0.1, -0…
## $ coname_FISERV.INC <dbl> -0.1, -0.1, -0…
## $ coname_FRANKLIN.RESOURCES.INC <dbl> -0.1, -0.1, -0…
## $ coname_GENERAL.RE.CORP <dbl> -0.1, -0.1, -0…
## $ coname_GENWORTH.FINANCIAL.INC <dbl> -0.1, -0.1, -0…
## $ coname_GILLETTE.CO <dbl> -0.1, -0.1, -0…
## $ coname_GLOBAL.CROSSING.LTD <dbl> -0.1, -0.1, -0…
## $ coname_HANDLEMAN.CO <dbl> -0.1, -0.1, -0…
## $ coname_HANDY...HARMAN.LTD <dbl> -0.1, -0.1, -0…
## $ coname_HYPERION.SOFTWARE.CORP <dbl> -0.1, -0.1, -0…
## $ coname_IMPAX.LABORATORIES.INC <dbl> -0.1, -0.1, -0…
## $ coname_INTEGRATED.CIRCUIT.SYSTEMS <dbl> -0.1, -0.1, -0…
## $ coname_INTL.GAME.TECHNOLOGY <dbl> -0.1, -0.1, -0…
## $ coname_IOWA.ILLINOIS.GAS...ELEC <dbl> -0.1, -0.1, -0…
## $ coname_IPAYMENT.INC <dbl> -0.1, -0.1, -0…
## $ coname_JONES.PHARMA.INC <dbl> -0.1, -0.1, -0…
## $ coname_JUST.FOR.FEET.INC <dbl> -0.1, -0.1, -0…
## $ coname_KELLY.SERVICES.INC..CL.A <dbl> -0.1, -0.1, -0…
## $ coname_LABORATORY.CP.OF.AMER.HLDGS <dbl> -0.1, -0.1, -0…
## $ coname_LEGGETT...PLATT.INC <dbl> -0.1, -0.1, -0…
## $ coname_MARINER.HEALTH.GROUP.INC <dbl> -0.1, -0.1, -0…
## $ coname_MATERIAL.SCIENCES.CORP <dbl> -0.1, -0.1, -0…
## $ coname_MCCAW.CELLULAR.COMM..CL.A <dbl> -0.1, -0.1, -0…
## $ coname_MCKESSON.CORP <dbl> -0.1, -0.1, -0…
## $ coname_MEAD.CORP <dbl> -0.1, -0.1, -0…
## $ coname_MEADE.INSTRUMENTS.CORP <dbl> -0.1, -0.1, -0…
## $ coname_MILLIPORE.CORP <dbl> -0.1, -0.1, -0…
## $ coname_MONDELEZ.INTERNATIONAL.INC <dbl> -0.1, -0.1, -0…
## $ coname_NEWMONT.GOLDCORP.CORP <dbl> -0.1, -0.1, -0…
## $ coname_ORGANOGENESIS.INC <dbl> -0.1, -0.1, -0…
## $ coname_PERFORMANCE.FOOD.GROUP.CO <dbl> -0.1, -0.1, -0…
## $ coname_POLARIS.INDUSTRIES.INC <dbl> -0.1, -0.1, -0…
## $ coname_PORTLAND.GENERAL.CORP <dbl> -0.1, -0.1, -0…
## $ coname_PRICE..T..ROWE..GROUP <dbl> -0.1, -0.1, -0…
## $ coname_PROCTER...GAMBLE.CO <dbl> -0.1, -0.1, -0…
## $ coname_QUAKER.CHEMICAL.CORP <dbl> -0.1, -0.1, -0…
## $ coname_REGIONS.FINANCIAL.CORP <dbl> -0.1, -0.1, -0…
## $ coname_REGIS.CORP.MN <dbl> -0.1, -0.1, -0…
## $ coname_REYNOLDS.METALS.CO <dbl> -0.1, -0.1, -0…
## $ coname_ROCKWELL.AUTOMATION <dbl> -0.1, -0.1, -0…
## $ coname_RS.LEGACY.CORP <dbl> -0.1, -0.1, -0…
## $ coname_RURAL.METRO.CORP <dbl> -0.1, -0.1, -0…
## $ coname_SANTA.FE.SNYDER.CORP <dbl> -0.1, -0.1, -0…
## $ coname_SANTANDER.HOLDINGS.USA.INC <dbl> -0.1, -0.1, -0…
## $ coname_SEARS.ROEBUCK...CO <dbl> -0.1, -0.1, -0…
## $ coname_SOUTHERN.NEW.ENG.TELECOMM <dbl> -0.1, -0.1, -0…
## $ coname_SPORTS.AUTHORITY.INC.OLD <dbl> -0.1, -0.1, -0…
## $ coname_SRA.INTERNATIONAL.INC <dbl> -0.1, -0.1, -0…
## $ coname_ST.JOE.CO <dbl> 9.9, -0.1, -0.…
## $ coname_STERIS.PLC <dbl> -0.1, -0.1, -0…
## $ coname_STRATUS.COMPUTER.INC <dbl> -0.1, -0.1, -0…
## $ coname_SUN.MICROSYSTEMS.INC <dbl> -0.1, -0.1, -0…
## $ coname_SUNOCO.INC <dbl> -0.1, -0.1, -0…
## $ coname_TIME.WARNER.INC <dbl> -0.1, -0.1, -0…
## $ coname_TRAVELERS.COS.INC <dbl> -0.1, -0.1, -0…
## $ coname_TXU.GAS.CO <dbl> -0.1, -0.1, -0…
## $ coname_U.S.TRUST.CORP <dbl> -0.1, -0.1, -0…
## $ coname_ULTA.BEAUTY.INC <dbl> -0.1, 9.9, -0.…
## $ coname_VERTEX.PHARMACEUTICALS.INC <dbl> -0.1, -0.1, -0…
## $ coname_WASHINGTON.GROUP.INTL.INC <dbl> -0.1, -0.1, -0…
## $ coname_WINDSTREAM.HOLDINGS.INC <dbl> -0.1, -0.1, -0…
## $ coname_XTO.ENERGY.INC <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Arthur.C..Martinez <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Arthur.W..Stratton <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Barnett.Grace <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Bernd.Erich.Beetz <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Bill.R..Sanford <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Bob.R..Simpson <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Carl.A..Grimstad <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Charles.B..Johnson <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Craig.A..Davis <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Craig.O..McCaw <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Daniel.D..Rosenthal <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.L..Mahoney <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.L..Schlotterbeck.B.S..M.S. <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.P..King <dbl> -0.1, -0.1, -0…
## $ exec_fullname_David.W..Sear <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Dennis.M..Jones <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Donahue.L..Wildman <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Donald.R..Beall <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Endre.A..Balazs <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ernst.Volgenau <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Forrest.E..Hoglund <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Francis.J..Lunger <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Francis.M..Scricco <dbl> -0.1, -0.1, -0…
## $ exec_fullname_G..Robert.Evans <dbl> -0.1, -0.1, -0…
## $ exec_fullname_George.Frederick.Wilkinson <dbl> -0.1, -0.1, -0…
## $ exec_fullname_George.H..Conrades <dbl> -0.1, -0.1, -0…
## $ exec_fullname_George.P..Roberts <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Gordon.Rae.Parker <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Gregory.P..Josefowicz <dbl> -0.1, -0.1, -0…
## $ exec_fullname_H..Marshall.Schwarz <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Harold.Ruttenberg <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Harry.M..Cornell.Jr. <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Howard.E..Cosgrove <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.A..Perakis <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.Aloysius.Charles.Kennedy.C.F.A..CFA <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.D..Foy <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.K..Sims <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.L..Payne <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.Stanley.Mackin <dbl> -0.1, -0.1, -0…
## $ exec_fullname_James.T..McManus.II <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jay.S..Fishman <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jeffery.R..Gardner <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jeffrey.L..Bewkes <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jeffrey.T..Slovin <dbl> -0.1, -0.1, 9.…
## $ exec_fullname_Jeremiah.J..Sheehan <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jerome.F..Tatar <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jesse.P..Orsini <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.B..Furman <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.B..Yasinsky <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.C..Diebel <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.C..Dorman <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.D..Ferguson <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.E..Bryson <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.E..Pepper <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.H..Wimberly <dbl> -0.1, -0.1, -0…
## $ exec_fullname_John.J..Zillmer <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Jonathan.I..Schwartz <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Joseph.C..Magnacca <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Joseph.P..Campanelli <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ken.L..Harrison <dbl> -0.1, -0.1, -0…
## $ exec_fullname_L..George.Klaus <dbl> -0.1, -0.1, -0…
## $ exec_fullname_L..Park.Brady..Jr. <dbl> 9.9, -0.1, -0.…
## $ exec_fullname_Laurence.Alan.Tisch <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Leo.J..Hindery.Jr. <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Leslie.M..Muma <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Lyn.Kirby <dbl> -0.1, 9.9, -0.…
## $ exec_fullname_Martin.E..Hanaka <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Martin.G..Carver <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Marvin.M..Chronister <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Matthew.W..Emmens <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.A..Chowdry <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.C..Hawley <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.D..White <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.Desmond.Fraizer <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Michael.L..Sabolinski <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Myron.Kunin <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Neil.D..Arnold <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Paul.C..Suthern <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ralph.L..Cheek <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.A..Garvey <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.C..Sledd <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.H..Campbell <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Robert.O..Viets <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Roger.K..Deromedi <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Ronald.E..Ferguson <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Sigismundus.W.W..Lubsen <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Stanley.J..Bright <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Stephen.Strome <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Terence.E..Adderley <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Theodore.Williams <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Thomas.J..Matthews <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Thomas.R..Voss <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Timothy.M..Manganello <dbl> -0.1, -0.1, -0…
## $ exec_fullname_W..Hall.Wendel.Jr. <dbl> -0.1, -0.1, -0…
## $ exec_fullname_Walter.H..Monteith.Jr. <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.C..McCord <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.E..Foster <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.F..Garrett <dbl> -0.1, -0.1, -0…
## $ exec_fullname_William.G..Howard..Jr. <dbl> -0.1, -0.1, -0…
## $ tenure_no_ceodb_X2 <dbl> -0.2282658, -0…
## $ max_tenure_ceodb_X2 <dbl> -0.2729764, -0…
library(usemodels)
usemodels::use_xgboost(ceo_dismissal ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = ceo_dismissal ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(64987)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
library(workflows)
library(parsnip)
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("classification") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_rec) %>%
add_model(xgboost_spec)
library(tune)
doParallel::registerDoParallel()
set.seed(17375)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5)
## Warning: package 'xgboost' was built under R version 4.3.3