The dataset documents the reasons for CEO departure in S&P 1500 firms from 2000 through 2018. Goal is to predict CEO departure (ceo_dismissal) by using the departures dataset.

Import Data

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.3.1

## Warning: package 'tidyr' was built under R version 4.3.1

## Warning: package 'dplyr' was built under R version 4.3.1

## Warning: package 'stringr' was built under R version 4.3.1

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidytext)

## Warning: package 'tidytext' was built under R version 4.3.1

library(correlationfunnel)

## ══ correlationfunnel Tip #2 ════════════════════════════════════════════════════
## Clean your NA's prior to using `binarize()`.
## Missing values and cleaning data are critical to getting great correlations. :)

data <- read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv")

Explore Data

skimr::skim(data)

Data summary
Name	data
Number of rows	9423
Number of columns	19
_______________________
Column type frequency:
character	9
numeric	10
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
coname	0	1.00	2	30	3860
exec_fullname	0	1.00	5	790	8701
interim_coceo	9105	0.03	6	7	6
leftofc	1802	0.81	20	20	3627
still_there	7311	0.22	3	10	77
notes	1644	0.83	5	3117	7755
sources	1475	0.84	18	1843	7915
eight_ks	4499	0.52	69	3884	4914
X_merge	0	1.00	11	11	1

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
dismissal_dataset_id	0	1.00	5684.10	25005.46	1	2305.5	4593	6812.5	559044	▇▁▁▁▁
gvkey	0	1.00	40132.48	53921.34	1004	7337.0	14385	60900.5	328795	▇▁▁▁▁
fyear	0	1.00	2007.74	8.19	1987	2000.0	2008	2016.0	2020	▁▆▅▅▇
co_per_rol	0	1.00	25580.22	18202.38	-1	8555.5	22980	39275.5	64602	▇▆▅▃▃
departure_code	1667	0.82	5.20	1.53	1	5.0	5	7.0	9	▁▃▇▅▁
ceo_dismissal	1813	0.81	0.20	0.40	0	0.0	0	0.0	1	▇▁▁▁▂
tenure_no_ceodb	0	1.00	1.03	0.17	0	1.0	1	1.0	3	▁▇▁▁▁
max_tenure_ceodb	0	1.00	1.05	0.24	1	1.0	1	1.0	4	▇▁▁▁▁
fyear_gone	1802	0.81	2006.64	13.63	1980	2000.0	2007	2013.0	2997	▇▁▁▁▁
cik	245	0.97	741469.17	486551.43	1750	106413.0	857323	1050375.8	1808065	▆▁▇▂▁

Issues with the data:

Missing Values:

* interim_coceo (91% missing)
* eight_ks (52% missing)

Factors or Numeric Variables:

* departure_code (categorical but currently numeric)
* interim_coceo (needs to be a factor)
* leftofc (needs to be a factor)
* still_there (needs to be a factor)

Zero Variance Variables:

* X-merge

Character Variables:

* coname 
* exec_fullname
* sources

Unbalanced Target Variable:

* ceo_dismissal

ID Variable:

* dismissal_dataset_id
* gvkey
* cik

Clean Data

# Identify factor variables
factors_vec <- c("departure_code", "leftofc", "still_there")

# Clean the data and ensure ceo_dismissal is a factor
data_clean <- data %>%
   
    # Convert ceo_dismissal and factors to proper types
    mutate(ceo_dismissal = as.factor(ceo_dismissal),
           across(all_of(factors_vec), as.factor)) %>%
    
    # Remove rows with missing values in key columns
    drop_na(ceo_dismissal, tenure_no_ceodb, fyear_gone, departure_code, leftofc, still_there) %>%
    
    # Drop zero-variance and irrelevant columns
    select(-c(X_merge, interim_coceo, eight_ks, dismissal_dataset_id, gvkey, cik, coname, exec_fullname, sources))

skimr::skim(data_clean)

Data summary
Name	data_clean
Number of rows	501
Number of columns	10
_______________________
Column type frequency:
character	1
factor	4
numeric	5
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
notes	0	1	109	2013	0	500	0

Variable type: factor

skim_variable	complete_rate	ordered	n_unique	top_counts
departure_code	1	FALSE	3	7: 496, 3: 4, 5: 1, 1: 0
ceo_dismissal	1	FALSE	2	0: 497, 1: 4
leftofc	1	FALSE	465	199: 3, 200: 3, 199: 2, 199: 2
still_there	1	FALSE	44	01a: 33, 29d: 28, 01j: 26, 01m: 26

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
fyear	1	2001.63	6.93	1993	1996	1999	2006	2018	▇▅▃▁▂
co_per_rol	1	13989.14	13410.46	900	4323	7354	20346	63294	▇▂▁▁▁
tenure_no_ceodb	1	1.01	0.13	1	1	1	1	3	▇▁▁▁▁
max_tenure_ceodb	1	1.02	0.21	1	1	1	1	4	▇▁▁▁▁
fyear_gone	1	2004.94	44.96	1990	1998	2000	2007	2997	▇▁▁▁▁

Explore Data

# Bar plot for CEO Dismissal
data_clean %>%
    ggplot(aes(ceo_dismissal)) + 
    geom_bar() +
    labs(title = "CEO Dismissal Count", x = "CEO Dismissal", y = "Count")

data_clean %>%    
    ggplot(aes(x = ceo_dismissal, y = tenure_no_ceodb)) + 
    geom_boxplot() +
    labs(title = "CEO Dismissal vs. Tenure", x = "CEO Dismissal", y = "CEO Tenure")

# Step 1: Binarize the data
data_binarized <- data_clean %>%
    binarize()

data_binarized %>% glimpse()

## Rows: 501
## Columns: 55
## $ `fyear__-Inf_1996`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      <dbl> …
## $ fyear__1996_1999                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear__1999_2006                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear__2006_Inf                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <dbl> …
## $ `co_per_rol__-Inf_4323`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 <dbl> …
## $ co_per_rol__4323_7354                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   <dbl> …
## $ co_per_rol__7354_20346                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ co_per_rol__20346_Inf                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   <dbl> …
## $ departure_code__7                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       <dbl> …
## $ `departure_code__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <dbl> …
## $ ceo_dismissal__0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ `ceo_dismissal__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 <dbl> …
## $ tenure_no_ceodb__1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      <dbl> …
## $ `tenure_no_ceodb__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               <dbl> …
## $ max_tenure_ceodb__1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     <dbl> …
## $ `max_tenure_ceodb__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              <dbl> …
## $ `fyear_gone__-Inf_1998`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 <dbl> …
## $ fyear_gone__1998_2000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   <dbl> …
## $ fyear_gone__2000_2007                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   <dbl> …
## $ fyear_gone__2007_Inf                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <dbl> …
## $ `leftofc__1996-08-01T00:00:00Z`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <dbl> …
## $ `leftofc__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       <dbl> …
## $ still_there__01apr2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__01feb2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__01jan2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__01jul2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__01jun2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__01mar2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__01may2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12apr2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12aug2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12jul2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12jun2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12mar2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12nov2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12oct2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__12sep2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__13dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__14dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__14jan2021                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__17dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__20dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__21dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__24dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__25dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__26dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__27dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__28dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__29dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__30dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ still_there__31dec2020                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ `still_there__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   <dbl> …
## $ `notes__Constantine_S._Macricostas_is_Chairman_of_the_Board_and_founder_of_the_Company._Mr._Macricostas_was_Executive_Chairman_of_the_Company_until_January_20,_2018._Mr._Macricostas_previously_served_as_Chief_Executive_Officer_of_the_Company_on_three_different_occasions_from_1974_until_August_1997,_from_February_2004_to_June_2005,_and_from_April_2009_until_May_2015._Mr._Macricostas_is_a_former_director_of_RagingWire_Data_Centers,_Inc.,_(“RagingWire”)._Mr._Macricostas_is_the_father_of_George_Macricostas._Mr._Macricostas’_knowledge_of_the_Company_and_its_operations,_as_well_as,_the_industry_is_invaluable_to_the_Board_of_Directors_in_evaluating_and_directing_the_Company’s_future._Through_his_long_service_to_the_Company_and_experience_in_the_photomask_industry,_he_has_developed_extensive_knowledge_in_the_areas_of_leadership,_safety,_risk_oversight,_management,_and_corporate_governance,_each_of_which_provides_great_value_to_the_Board_of_Directors._Mr._Macricostas_is_a_member_of_the_Cyber_Security_Committee_of_the_Board.` <dbl> …
## $ `notes__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <dbl> …

# Correlation for both categories of ceo_dismissal
data_correlation_0 <- data_binarized %>%
    correlate(`ceo_dismissal__0`)  # Correlation for one class

data_correlation_other <- data_binarized %>%
    correlate(`ceo_dismissal__-OTHER`)  # Correlation for the other class

## Warning: correlate(): [Data Imbalance Detected] Consider sampling to balance the classes more than 5%
##   Column with imbalance: ceo_dismissal__-OTHER

# Display the correlation results
data_correlation_0

## # A tibble: 55 × 3
##    feature        bin                  correlation
##    <fct>          <chr>                      <dbl>
##  1 ceo_dismissal  0                         1     
##  2 ceo_dismissal  -OTHER                   -1     
##  3 departure_code 7                         0.894 
##  4 departure_code -OTHER                   -0.894 
##  5 leftofc        1996-08-01T00:00:00Z     -0.284 
##  6 leftofc        -OTHER                    0.284 
##  7 still_there    12jul2020                -0.268 
##  8 fyear_gone     -Inf_1998                -0.130 
##  9 co_per_rol     4323_7354                -0.104 
## 10 still_there    12sep2020                -0.0996
## # ℹ 45 more rows

data_correlation_other

## # A tibble: 55 × 3
##    feature        bin                  correlation
##    <fct>          <chr>                      <dbl>
##  1 ceo_dismissal  0                        -1     
##  2 ceo_dismissal  -OTHER                    1     
##  3 departure_code 7                        -0.894 
##  4 departure_code -OTHER                    0.894 
##  5 leftofc        1996-08-01T00:00:00Z      0.284 
##  6 leftofc        -OTHER                   -0.284 
##  7 still_there    12jul2020                 0.268 
##  8 fyear_gone     -Inf_1998                 0.130 
##  9 co_per_rol     4323_7354                 0.104 
## 10 still_there    12sep2020                 0.0996
## # ℹ 45 more rows

# Step 3: Plot the correlation funnel
data_correlation_0 %>% 
    correlationfunnel::plot_correlation_funnel() +
    labs(title = "Correlation Funnel for CEO Dismissal (Class 0)")

## Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

data_correlation_other %>% 
    correlationfunnel::plot_correlation_funnel() +
    labs(title = "Correlation Funnel for CEO Dismissal (Class Other)")

## Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Apply it to Your Data 5: CEO Departures

Job Boonstoppel

2024-10-16

Import Data

Explore Data

Issues with the data:

Missing Values:

Factors or Numeric Variables:

Zero Variance Variables:

Character Variables:

Unbalanced Target Variable:

ID Variable:

Clean Data

Explore Data