Import data

departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-04-27/departures.csv')
## Rows: 9423 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (8): coname, exec_fullname, interim_coceo, still_there, notes, sources...
## dbl  (10): dismissal_dataset_id, gvkey, fyear, co_per_rol, departure_code, c...
## dttm  (1): leftofc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore Data

skimr::skim(departures)
Data summary
Name departures
Number of rows 9423
Number of columns 19
_______________________
Column type frequency:
character 8
numeric 10
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
coname 0 1.00 2 30 0 3860 0
exec_fullname 0 1.00 5 790 0 8701 0
interim_coceo 9105 0.03 6 7 0 6 0
still_there 7311 0.22 3 10 0 77 0
notes 1644 0.83 5 3117 0 7755 0
sources 1475 0.84 18 1843 0 7915 0
eight_ks 4499 0.52 69 3884 0 4914 0
_merge 0 1.00 11 11 0 1 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1.00 5684.10 25005.46 1 2305.5 4593 6812.5 559044 ▇▁▁▁▁
gvkey 0 1.00 40132.48 53921.34 1004 7337.0 14385 60900.5 328795 ▇▁▁▁▁
fyear 0 1.00 2007.74 8.19 1987 2000.0 2008 2016.0 2020 ▁▆▅▅▇
co_per_rol 0 1.00 25580.22 18202.38 -1 8555.5 22980 39275.5 64602 ▇▆▅▃▃
departure_code 1667 0.82 5.20 1.53 1 5.0 5 7.0 9 ▁▃▇▅▁
ceo_dismissal 1813 0.81 0.20 0.40 0 0.0 0 0.0 1 ▇▁▁▁▂
tenure_no_ceodb 0 1.00 1.03 0.17 0 1.0 1 1.0 3 ▁▇▁▁▁
max_tenure_ceodb 0 1.00 1.05 0.24 1 1.0 1 1.0 4 ▇▁▁▁▁
fyear_gone 1802 0.81 2006.64 13.63 1980 2000.0 2007 2013.0 2997 ▇▁▁▁▁
cik 245 0.97 741469.17 486551.43 1750 106413.0 857323 1050375.8 1808065 ▆▁▇▂▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
leftofc 1802 0.81 1981-01-01 2998-04-27 2006-12-31 3627

Issues with the data

Missing Values *interim_coceo, still_there, notes (string variable), sources, eight_ks, departure_code, ceo_dismissal, fyear_gone, cik

Converting Numeric Variables to Factors * tenure_no_ceodb, max_tenure_ceodb, fyear_gone

Zero variance variables * _merge, gvkey, dismissal_dataset_id

Character Variables * coname, exec_fullname, sources, eight_ks, notes (string variable and not factor)

Unbalanced Target Variable ceo_dismissal #adress in recipe section step_smoke function

Handling ID Variables * dismissal_dataset_id #dataset primary key

Explore data

# Clean data
departures_clean <- departures %>%
    
    # Clean the target variable
    filter(!is.na(ceo_dismissal)) %>%
    mutate(ceo_dismissal = if_else(ceo_dismissal == 1, "dismissed", "not_dis")) %>%
    mutate(ceo_dismissal = as.factor(ceo_dismissal)) %>%
    
    # Remove variables with too many missing values
    select(-c(interim_coceo, still_there, eight_ks))%>%
    
    # Remove irrelevant variables
    select(-`_merge`, -sources) %>%
               
    # Remove variables with info that only becomes 
    select(-departure_code) %>%
    
    # Remove redundant variables 
    select(-c(gvkey, cik, co_per_rol, leftofc, fyear)) %>%
    
    #Remove duplicated in dismissal_dataset_id our id variable
    distinct(dismissal_dataset_id, .keep_all = TRUE) %>%
    
    #Remove 2997 in fyear_gone
    filter(fyear_gone < 2025) %>%
    
    # Convert factors that are incorrectly imported as numeric variables
    mutate(across(c(tenure_no_ceodb, max_tenure_ceodb, fyear_gone), as.factor)) %>%
    
    mutate(across(where(is.character), as.factor)) %>%
    
    mutate(notes = as.character(notes))

skimr::skim(departures_clean) 
Data summary
Name departures_clean
Number of rows 7475
Number of columns 8
_______________________
Column type frequency:
character 1
factor 6
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
notes 17 1 5 3117 0 7448 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
coname 0 1 FALSE 3427 BAR: 8, CLA: 8, FED: 8, GRE: 8
exec_fullname 0 1 FALSE 6975 Joh: 4, Mel: 4, Alb: 3, Ami: 3
ceo_dismissal 0 1 FALSE 2 not: 5992, dis: 1483
tenure_no_ceodb 0 1 FALSE 3 1: 7289, 2: 179, 3: 7
max_tenure_ceodb 0 1 FALSE 4 1: 7138, 2: 319, 3: 15, 4: 3
fyear_gone 0 1 FALSE 34 200: 379, 199: 351, 200: 334, 200: 321

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
dismissal_dataset_id 0 1 5570.32 25757.33 1 2175.5 4326 6579.5 559044 ▇▁▁▁▁

Explore data

departures_clean %>% count(ceo_dismissal)
## # A tibble: 2 × 2
##   ceo_dismissal     n
##   <fct>         <int>
## 1 dismissed      1483
## 2 not_dis        5992
departures_clean %>%
    ggplot(aes(ceo_dismissal)) + 
    geom_bar() +
    labs(title = "CEO Dismissal Count", x = "CEO Dismissal", y = "Count")

CEO Dismissal or Tenure

departures_clean %>%
  count(ceo_dismissal, tenure_no_ceodb) %>%
  ggplot(mapping = aes(x = ceo_dismissal, y = tenure_no_ceodb)) +
  geom_tile(mapping = aes(fill = n)) +
  labs(title = "CEO Dismissal or Tenure", x = "CEO Dismissal", y = "CEO Tenure")

Correlation Plot

departures_clean <- departures_clean %>%
    drop_na(notes)

# Step 1: Binarize the data
data_binarized <- departures_clean %>%
    binarize()

data_binarized %>% glimpse()
## Rows: 7,458
## Columns: 46
## $ `dismissal_dataset_id__-Inf_2170.25`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <dbl> …
## $ dismissal_dataset_id__2170.25_4321.5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <dbl> …
## $ dismissal_dataset_id__4321.5_6575.75                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <dbl> …
## $ dismissal_dataset_id__6575.75_Inf                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       <dbl> …
## $ coname__BARRICK_GOLD_CORP                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               <dbl> …
## $ `coname__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ exec_fullname__John_W._Rowe                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             <dbl> …
## $ `exec_fullname__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 <dbl> …
## $ ceo_dismissal__dismissed                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <dbl> …
## $ ceo_dismissal__not_dis                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  <dbl> …
## $ tenure_no_ceodb__1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      <dbl> …
## $ tenure_no_ceodb__2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      <dbl> …
## $ `tenure_no_ceodb__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               <dbl> …
## $ max_tenure_ceodb__1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     <dbl> …
## $ max_tenure_ceodb__2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     <dbl> …
## $ `max_tenure_ceodb__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              <dbl> …
## $ fyear_gone__1993                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__1994                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__1995                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__1996                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__1997                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__1998                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__1999                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2000                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2001                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2002                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2003                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2004                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2005                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2006                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2007                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2008                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2009                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2010                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2011                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2012                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2013                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2014                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2015                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2016                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2017                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2018                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ fyear_gone__2019                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <dbl> …
## $ `fyear_gone__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <dbl> …
## $ `notes__Constantine_S._Macricostas_is_Chairman_of_the_Board_and_founder_of_the_Company._Mr._Macricostas_was_Executive_Chairman_of_the_Company_until_January_20,_2018._Mr._Macricostas_previously_served_as_Chief_Executive_Officer_of_the_Company_on_three_different_occasions_from_1974_until_August_1997,_from_February_2004_to_June_2005,_and_from_April_2009_until_May_2015._Mr._Macricostas_is_a_former_director_of_RagingWire_Data_Centers,_Inc.,_(“RagingWire”)._Mr._Macricostas_is_the_father_of_George_Macricostas._Mr._Macricostas’_knowledge_of_the_Company_and_its_operations,_as_well_as,_the_industry_is_invaluable_to_the_Board_of_Directors_in_evaluating_and_directing_the_Company’s_future._Through_his_long_service_to_the_Company_and_experience_in_the_photomask_industry,_he_has_developed_extensive_knowledge_in_the_areas_of_leadership,_safety,_risk_oversight,_management,_and_corporate_governance,_each_of_which_provides_great_value_to_the_Board_of_Directors._Mr._Macricostas_is_a_member_of_the_Cyber_Security_Committee_of_the_Board.` <dbl> …
## $ `notes__-OTHER`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         <dbl> …
# Step 2: Correlation
data_correlation_dismissed <- data_binarized %>%
    correlate(ceo_dismissal__dismissed)

data_correlation_notdis <- data_binarized %>%
    correlate(ceo_dismissal__not_dis)

data_correlation_dismissed
## # A tibble: 46 × 3
##    feature          bin       correlation
##    <fct>            <chr>           <dbl>
##  1 ceo_dismissal    dismissed      1     
##  2 ceo_dismissal    not_dis       -1     
##  3 max_tenure_ceodb 1              0.0577
##  4 max_tenure_ceodb 2             -0.0533
##  5 fyear_gone       1999          -0.0390
##  6 fyear_gone       2002           0.0378
##  7 fyear_gone       2003           0.0303
##  8 fyear_gone       2009           0.0292
##  9 fyear_gone       2008           0.0261
## 10 fyear_gone       1997          -0.0255
## # ℹ 36 more rows
# Step 3: Plot
data_correlation_dismissed %>% 
    correlationfunnel::plot_correlation_funnel() +
    labs(title = "Correlation Funnel for CEO Dismissal")
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

data_correlation_notdis %>% 
    correlationfunnel::plot_correlation_funnel() +
    labs(title = "Correlation Funnel for CEO not Dismissal")
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps