departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
skimr::skim(departures)
Name | departures |
Number of rows | 9423 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 10 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
coname | 0 | 1.00 | 2 | 30 | 0 | 3860 | 0 |
exec_fullname | 0 | 1.00 | 5 | 790 | 0 | 8701 | 0 |
interim_coceo | 9105 | 0.03 | 6 | 7 | 0 | 6 | 0 |
still_there | 7311 | 0.22 | 3 | 10 | 0 | 77 | 0 |
notes | 1644 | 0.83 | 5 | 3117 | 0 | 7755 | 0 |
sources | 1475 | 0.84 | 18 | 1843 | 0 | 7915 | 0 |
eight_ks | 4499 | 0.52 | 69 | 3884 | 0 | 4914 | 0 |
_merge | 0 | 1.00 | 11 | 11 | 0 | 1 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dismissal_dataset_id | 0 | 1.00 | 5684.10 | 25005.46 | 1 | 2305.5 | 4593 | 6812.5 | 559044 | ▇▁▁▁▁ |
gvkey | 0 | 1.00 | 40132.48 | 53921.34 | 1004 | 7337.0 | 14385 | 60900.5 | 328795 | ▇▁▁▁▁ |
fyear | 0 | 1.00 | 2007.74 | 8.19 | 1987 | 2000.0 | 2008 | 2016.0 | 2020 | ▁▆▅▅▇ |
co_per_rol | 0 | 1.00 | 25580.22 | 18202.38 | -1 | 8555.5 | 22980 | 39275.5 | 64602 | ▇▆▅▃▃ |
departure_code | 1667 | 0.82 | 5.20 | 1.53 | 1 | 5.0 | 5 | 7.0 | 9 | ▁▃▇▅▁ |
ceo_dismissal | 1813 | 0.81 | 0.20 | 0.40 | 0 | 0.0 | 0 | 0.0 | 1 | ▇▁▁▁▂ |
tenure_no_ceodb | 0 | 1.00 | 1.03 | 0.17 | 0 | 1.0 | 1 | 1.0 | 3 | ▁▇▁▁▁ |
max_tenure_ceodb | 0 | 1.00 | 1.05 | 0.24 | 1 | 1.0 | 1 | 1.0 | 4 | ▇▁▁▁▁ |
fyear_gone | 1802 | 0.81 | 2006.64 | 13.63 | 1980 | 2000.0 | 2007 | 2013.0 | 2997 | ▇▁▁▁▁ |
cik | 245 | 0.97 | 741469.17 | 486551.43 | 1750 | 106413.0 | 857323 | 1050375.8 | 1808065 | ▆▁▇▂▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
leftofc | 1802 | 0.81 | 1981-01-01 | 2998-04-27 | 2006-12-31 | 3627 |
factors_vec <- departures %>% select(leftofc, departure_code, co_per_rol, fyear, coname) %>% names()
data_clean <- departures %>%
select(-c(tenure_no_ceodb,max_tenure_ceodb,`_merge`)) %>%
filter(!is.na(departure_code))
skimr::skim(departures)
Name | departures |
Number of rows | 9423 |
Number of columns | 19 |
_______________________ | |
Column type frequency: | |
character | 8 |
numeric | 10 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
coname | 0 | 1.00 | 2 | 30 | 0 | 3860 | 0 |
exec_fullname | 0 | 1.00 | 5 | 790 | 0 | 8701 | 0 |
interim_coceo | 9105 | 0.03 | 6 | 7 | 0 | 6 | 0 |
still_there | 7311 | 0.22 | 3 | 10 | 0 | 77 | 0 |
notes | 1644 | 0.83 | 5 | 3117 | 0 | 7755 | 0 |
sources | 1475 | 0.84 | 18 | 1843 | 0 | 7915 | 0 |
eight_ks | 4499 | 0.52 | 69 | 3884 | 0 | 4914 | 0 |
_merge | 0 | 1.00 | 11 | 11 | 0 | 1 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
dismissal_dataset_id | 0 | 1.00 | 5684.10 | 25005.46 | 1 | 2305.5 | 4593 | 6812.5 | 559044 | ▇▁▁▁▁ |
gvkey | 0 | 1.00 | 40132.48 | 53921.34 | 1004 | 7337.0 | 14385 | 60900.5 | 328795 | ▇▁▁▁▁ |
fyear | 0 | 1.00 | 2007.74 | 8.19 | 1987 | 2000.0 | 2008 | 2016.0 | 2020 | ▁▆▅▅▇ |
co_per_rol | 0 | 1.00 | 25580.22 | 18202.38 | -1 | 8555.5 | 22980 | 39275.5 | 64602 | ▇▆▅▃▃ |
departure_code | 1667 | 0.82 | 5.20 | 1.53 | 1 | 5.0 | 5 | 7.0 | 9 | ▁▃▇▅▁ |
ceo_dismissal | 1813 | 0.81 | 0.20 | 0.40 | 0 | 0.0 | 0 | 0.0 | 1 | ▇▁▁▁▂ |
tenure_no_ceodb | 0 | 1.00 | 1.03 | 0.17 | 0 | 1.0 | 1 | 1.0 | 3 | ▁▇▁▁▁ |
max_tenure_ceodb | 0 | 1.00 | 1.05 | 0.24 | 1 | 1.0 | 1 | 1.0 | 4 | ▇▁▁▁▁ |
fyear_gone | 1802 | 0.81 | 2006.64 | 13.63 | 1980 | 2000.0 | 2007 | 2013.0 | 2997 | ▇▁▁▁▁ |
cik | 245 | 0.97 | 741469.17 | 486551.43 | 1750 | 106413.0 | 857323 | 1050375.8 | 1808065 | ▆▁▇▂▁ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
leftofc | 1802 | 0.81 | 1981-01-01 | 2998-04-27 | 2006-12-31 | 3627 |
data_clean %>% count(departure_code)
## # A tibble: 9 × 2
## departure_code n
## <dbl> <int>
## 1 1 84
## 2 2 97
## 3 3 1320
## 4 4 195
## 5 5 3598
## 6 6 183
## 7 7 2133
## 8 8 53
## 9 9 93
data_clean %>%
ggplot(aes(departure_code)) +
geom_bar()
departure_code vs. year event happened
data_clean %>%
ggplot(aes(departure_code, fyear)) +
geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
correlation plot
# Step 1: binarize
data_binarized <- data_clean %>%
select(-exec_fullname,-coname, -leftofc, -notes, -sources, -eight_ks, -still_there,-interim_coceo,-ceo_dismissal,-fyear_gone, -cik, - departure_code, -co_per_rol) %>%
binarize()
data_binarized %>% glimpse
## Rows: 7,756
## Columns: 12
## $ `dismissal_dataset_id__-Inf_2197.75` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dismissal_dataset_id__2197.75_4359.5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__4359.5_6654.25 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__6654.25_Inf <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `gvkey__-Inf_7086` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gvkey__7086_13348 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gvkey__13348_30612 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gvkey__30612_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `fyear__-Inf_1999` <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, …
## $ fyear__1999_2006 <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, …
## $ fyear__2006_2012 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear__2012_Inf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
# Step 2: correlation
data_correlation <- data_binarized %>%
correlate(`fyear__-Inf_1999`)
data_correlation
## # A tibble: 12 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 fyear -Inf_1999 1
## 2 fyear 1999_2006 -0.366
## 3 fyear 2012_Inf -0.335
## 4 fyear 2006_2012 -0.334
## 5 gvkey 30612_Inf -0.252
## 6 dismissal_dataset_id 6654.25_Inf -0.251
## 7 gvkey 7086_13348 0.127
## 8 dismissal_dataset_id 2197.75_4359.5 0.126
## 9 dismissal_dataset_id -Inf_2197.75 0.105
## 10 gvkey -Inf_7086 0.104
## 11 gvkey 13348_30612 0.0213
## 12 dismissal_dataset_id 4359.5_6654.25 0.0194
# Step 3: plot
data_correlation %>%
correlationfunnel::plot_correlation_funnel()
First year in earlier years, like 1999, have some correlation to CEO departure, however, in the more recent years, there is very little correlation between CEO departure and the year they started.
High company codes have a weak correlation, but may be worth looking into. I think the dataset could benefit from more information being gathered like salary, years at the company, if they have a family, etc.