Explore data

skimr::skim(departures)

Data summary
Name	departures
Number of rows	9423
Number of columns	19
_______________________
Column type frequency:
character	8
numeric	10
POSIXct	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
coname	0	1.00	2	30	3860
exec_fullname	0	1.00	5	790	8701
interim_coceo	9105	0.03	6	7	6
still_there	7311	0.22	3	10	77
notes	1644	0.83	5	3117	7755
sources	1475	0.84	18	1843	7915
eight_ks	4499	0.52	69	3884	4914
_merge	0	1.00	11	11	1

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
dismissal_dataset_id	0	1.00	5684.10	25005.46	1	2305.5	4593	6812.5	559044	▇▁▁▁▁
gvkey	0	1.00	40132.48	53921.34	1004	7337.0	14385	60900.5	328795	▇▁▁▁▁
fyear	0	1.00	2007.74	8.19	1987	2000.0	2008	2016.0	2020	▁▆▅▅▇
co_per_rol	0	1.00	25580.22	18202.38	-1	8555.5	22980	39275.5	64602	▇▆▅▃▃
departure_code	1667	0.82	5.20	1.53	1	5.0	5	7.0	9	▁▃▇▅▁
ceo_dismissal	1813	0.81	0.20	0.40	0	0.0	0	0.0	1	▇▁▁▁▂
tenure_no_ceodb	0	1.00	1.03	0.17	0	1.0	1	1.0	3	▁▇▁▁▁
max_tenure_ceodb	0	1.00	1.05	0.24	1	1.0	1	1.0	4	▇▁▁▁▁
fyear_gone	1802	0.81	2006.64	13.63	1980	2000.0	2007	2013.0	2997	▇▁▁▁▁
cik	245	0.97	741469.17	486551.43	1750	106413.0	857323	1050375.8	1808065	▆▁▇▂▁

Variable type: POSIXct

skim_variable	n_missing	complete_rate	min	max	median	n_unique
leftofc	1802	0.81	1981-01-01	2998-04-27	2006-12-31	3627

data_clean %>% count(departure_code)

## # A tibble: 9 × 2
##   departure_code     n
##            <dbl> <int>
## 1              1    84
## 2              2    97
## 3              3  1320
## 4              4   195
## 5              5  3598
## 6              6   183
## 7              7  2133
## 8              8    53
## 9              9    93

data_clean %>%
    ggplot(aes(departure_code)) +
    geom_bar()

departure_code vs. year event happened

data_clean %>%
    ggplot(aes(departure_code, fyear)) +
    geom_boxplot()

## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

correlation plot

# Step 1: binarize
data_binarized <- data_clean %>%
    select(-exec_fullname,-coname, -leftofc, -notes, -sources, -eight_ks, -still_there,-interim_coceo,-ceo_dismissal,-fyear_gone, -cik, - departure_code, -co_per_rol) %>%
    binarize()

data_binarized %>% glimpse

## Rows: 7,756
## Columns: 12
## $ `dismissal_dataset_id__-Inf_2197.75` <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ dismissal_dataset_id__2197.75_4359.5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__4359.5_6654.25 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ dismissal_dataset_id__6654.25_Inf    <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `gvkey__-Inf_7086`                   <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gvkey__7086_13348                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gvkey__13348_30612                   <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ gvkey__30612_Inf                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `fyear__-Inf_1999`                   <dbl> 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, …
## $ fyear__1999_2006                     <dbl> 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, …
## $ fyear__2006_2012                     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ fyear__2012_Inf                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …

# Step 2: correlation
data_correlation <- data_binarized %>%
    correlate(`fyear__-Inf_1999`)

data_correlation

## # A tibble: 12 × 3
##    feature              bin            correlation
##    <fct>                <chr>                <dbl>
##  1 fyear                -Inf_1999           1     
##  2 fyear                1999_2006          -0.366 
##  3 fyear                2012_Inf           -0.335 
##  4 fyear                2006_2012          -0.334 
##  5 gvkey                30612_Inf          -0.252 
##  6 dismissal_dataset_id 6654.25_Inf        -0.251 
##  7 gvkey                7086_13348          0.127 
##  8 dismissal_dataset_id 2197.75_4359.5      0.126 
##  9 dismissal_dataset_id -Inf_2197.75        0.105 
## 10 gvkey                -Inf_7086           0.104 
## 11 gvkey                13348_30612         0.0213
## 12 dismissal_dataset_id 4359.5_6654.25      0.0194

# Step 3: plot
data_correlation %>%
    correlationfunnel::plot_correlation_funnel()

First year in earlier years, like 1999, have some correlation to CEO departure, however, in the more recent years, there is very little correlation between CEO departure and the year they started.

High company codes have a weak correlation, but may be worth looking into. I think the dataset could benefit from more information being gathered like salary, years at the company, if they have a family, etc.

CApply 5: CEO Departures

Erin McEvoy

2024-10-10

Explore data