departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
skimr::skim(departures)
Data summary
Name departures
Number of rows 9423
Number of columns 19
_______________________
Column type frequency:
character 8
numeric 10
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
coname 0 1.00 2 30 0 3860 0
exec_fullname 0 1.00 5 790 0 8701 0
interim_coceo 9105 0.03 6 7 0 6 0
still_there 7311 0.22 3 10 0 77 0
notes 1644 0.83 5 3117 0 7755 0
sources 1475 0.84 18 1843 0 7915 0
eight_ks 4499 0.52 69 3884 0 4914 0
_merge 0 1.00 11 11 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1.00 5684.10 25005.46 1 2305.5 4593 6812.5 559044 ▇▁▁▁▁
gvkey 0 1.00 40132.48 53921.34 1004 7337.0 14385 60900.5 328795 ▇▁▁▁▁
fyear 0 1.00 2007.74 8.19 1987 2000.0 2008 2016.0 2020 ▁▆▅▅▇
co_per_rol 0 1.00 25580.22 18202.38 -1 8555.5 22980 39275.5 64602 ▇▆▅▃▃
departure_code 1667 0.82 5.20 1.53 1 5.0 5 7.0 9 ▁▃▇▅▁
ceo_dismissal 1813 0.81 0.20 0.40 0 0.0 0 0.0 1 ▇▁▁▁▂
tenure_no_ceodb 0 1.00 1.03 0.17 0 1.0 1 1.0 3 ▁▇▁▁▁
max_tenure_ceodb 0 1.00 1.05 0.24 1 1.0 1 1.0 4 ▇▁▁▁▁
fyear_gone 1802 0.81 2006.64 13.63 1980 2000.0 2007 2013.0 2997 ▇▁▁▁▁
cik 245 0.97 741469.17 486551.43 1750 106413.0 857323 1050375.8 1808065 ▆▁▇▂▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
leftofc 1802 0.81 1981-01-01 2998-04-27 2006-12-31 3627
factors_vec <- departures %>% select(leftofc, departure_code, co_per_rol, fyear, coname) %>% names()

data_clean <- departures %>%
    select(-c(tenure_no_ceodb,max_tenure_ceodb,`_merge`)) %>%
    filter(!is.na(departure_code))

Explore data

skimr::skim(departures)
Data summary
Name departures
Number of rows 9423
Number of columns 19
_______________________
Column type frequency:
character 8
numeric 10
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
coname 0 1.00 2 30 0 3860 0
exec_fullname 0 1.00 5 790 0 8701 0
interim_coceo 9105 0.03 6 7 0 6 0
still_there 7311 0.22 3 10 0 77 0
notes 1644 0.83 5 3117 0 7755 0
sources 1475 0.84 18 1843 0 7915 0
eight_ks 4499 0.52 69 3884 0 4914 0
_merge 0 1.00 11 11 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1.00 5684.10 25005.46 1 2305.5 4593 6812.5 559044 ▇▁▁▁▁
gvkey 0 1.00 40132.48 53921.34 1004 7337.0 14385 60900.5 328795 ▇▁▁▁▁
fyear 0 1.00 2007.74 8.19 1987 2000.0 2008 2016.0 2020 ▁▆▅▅▇
co_per_rol 0 1.00 25580.22 18202.38 -1 8555.5 22980 39275.5 64602 ▇▆▅▃▃
departure_code 1667 0.82 5.20 1.53 1 5.0 5 7.0 9 ▁▃▇▅▁
ceo_dismissal 1813 0.81 0.20 0.40 0 0.0 0 0.0 1 ▇▁▁▁▂
tenure_no_ceodb 0 1.00 1.03 0.17 0 1.0 1 1.0 3 ▁▇▁▁▁
max_tenure_ceodb 0 1.00 1.05 0.24 1 1.0 1 1.0 4 ▇▁▁▁▁
fyear_gone 1802 0.81 2006.64 13.63 1980 2000.0 2007 2013.0 2997 ▇▁▁▁▁
cik 245 0.97 741469.17 486551.43 1750 106413.0 857323 1050375.8 1808065 ▆▁▇▂▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
leftofc 1802 0.81 1981-01-01 2998-04-27 2006-12-31 3627
data_clean %>% count(departure_code)
## # A tibble: 9 × 2
##   departure_code     n
##            <dbl> <int>
## 1              1    84
## 2              2    97
## 3              3  1320
## 4              4   195
## 5              5  3598
## 6              6   183
## 7              7  2133
## 8              8    53
## 9              9    93
data_clean %>%
    ggplot(aes(departure_code)) +
    geom_bar()

departure_code vs. year event happened

data_clean %>%
    ggplot(aes(departure_code, fyear)) +
    geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

correlation plot

# Step 1: binarize
data_binarized <- data_clean %>%
    select(-exec_fullname,-coname, -leftofc, -notes, -sources, -eight_ks, -still_there,-interim_coceo,-ceo_dismissal,-fyear_gone, -cik, - departure_code, -co_per_rol) %>%
    binarize()

data_binarized %>% glimpse
## Rows: 7,756
## Columns: 12
## $ `dismissal_dataset_id__-Inf_2197.75` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dismissal_dataset_id__2197.75_4359.5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__4359.5_6654.25 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__6654.25_Inf    <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `gvkey__-Inf_7086`                   <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gvkey__7086_13348                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gvkey__13348_30612                   <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gvkey__30612_Inf                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `fyear__-Inf_1999`                   <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, …
## $ fyear__1999_2006                     <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, …
## $ fyear__2006_2012                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear__2012_Inf                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
# Step 2: correlation
data_correlation <- data_binarized %>%
    correlate(`fyear__-Inf_1999`)

data_correlation
## # A tibble: 12 × 3
##    feature              bin            correlation
##    <fct>                <chr>                <dbl>
##  1 fyear                -Inf_1999           1     
##  2 fyear                1999_2006          -0.366 
##  3 fyear                2012_Inf           -0.335 
##  4 fyear                2006_2012          -0.334 
##  5 gvkey                30612_Inf          -0.252 
##  6 dismissal_dataset_id 6654.25_Inf        -0.251 
##  7 gvkey                7086_13348          0.127 
##  8 dismissal_dataset_id 2197.75_4359.5      0.126 
##  9 dismissal_dataset_id -Inf_2197.75        0.105 
## 10 gvkey                -Inf_7086           0.104 
## 11 gvkey                13348_30612         0.0213
## 12 dismissal_dataset_id 4359.5_6654.25      0.0194
# Step 3: plot
data_correlation %>%
    correlationfunnel::plot_correlation_funnel()

First year in earlier years, like 1999, have some correlation to CEO departure, however, in the more recent years, there is very little correlation between CEO departure and the year they started.

High company codes have a weak correlation, but may be worth looking into. I think the dataset could benefit from more information being gathered like salary, years at the company, if they have a family, etc.