load and clean data
data <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-04-27/departures.csv')
## Rows: 9423 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): coname, exec_fullname, interim_coceo, still_there, notes, sources...
## dbl (10): dismissal_dataset_id, gvkey, fyear, co_per_rol, departure_code, c...
## dttm (1): leftofc
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_clean <- data %>%
select(-c(`_merge`, still_there, sources, eight_ks)) %>%
na.omit() %>%
mutate(across(c(departure_code, ceo_dismissal), as.factor))
h2o.init()
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 8 days 19 hours
## H2O cluster timezone: America/New_York
## H2O data parsing timezone: UTC
## H2O cluster version: 3.44.0.3
## H2O cluster version age: 1 year, 4 months and 11 days
## H2O cluster name: H2O_started_from_R_owner_dit581
## H2O cluster total nodes: 1
## H2O cluster total memory: 1.29 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 4.3.1 (2023-06-16)
## Warning in h2o.clusterInfo():
## Your H2O cluster version is (1 year, 4 months and 11 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
split data
set.seed(123)
data_split <- initial_split(data_clean, strata = ceo_dismissal)
train_tbl <- training(data_split)
test_tbl <- testing(data_split)
pre with recipes
ceo_recipe <- recipe(ceo_dismissal ~ ., data = train_tbl) %>%
update_role(dismissal_dataset_id, new_role = "ID") %>%
step_date(leftofc, keep_original_cols = FALSE) %>%
step_other(coname, exec_fullname) %>%
step_dummy(all_nominal_predictors()) %>%
step_normalize(all_numeric_predictors()) %>%
step_zv(all_predictors()) %>%
prep()
train_prepped <- bake(ceo_recipe, new_data = train_tbl)
test_prepped <- bake(ceo_recipe, new_data = test_tbl)
## Warning: ! There are new levels in `notes`: "Prior to his appointment as Executive Vice
## President of Administration on February 26, 2007, Mr. Hann had served as
## Interim President and Chief Executive Officer from March 27, 2006 through
## February 26, 2007. He quit and retired from his Vp spot 3 months after the
## new person came in", "George J. Harad relinquished the Chief Executive
## position at Boise in October 2004, when Boise sold its forest product assets
## and changed its name to OfficeMax .", "MELVYN J. ESTRIN, age 56, has served
## as a director of the Corporation since 1990. Mr. Estrin has also served as
## Co-Chairman of the Board of the Corporation since March 1991 and was
## appointed Co-Chief Executive Officer of the Corporation in October 1991.
## Dallas, Texas—December 11, 2002—Avatex Corporation (OTCBB: AVAT) today
## announced that it and five subsidiaries each filed a voluntary petition for
## relief under chapter 11 of the Bankruptcy Code in the United States
## Bankruptcy Court for the Northern District of Texas, Dallas Division. . . .
## The Board also terminated without cause the employment of all of company's
## other officers, including its Co-Chief Executive Officers, Abbey J. Butler
## and Melvyn J. Estrin.", "The company said Chairman James V. Napier will be
## interim chief executive officer while a search is under way for a permanent
## replacement.", "He was there to find a replacement after the company fired
## Thoman. He was part of a settlement with the SEC for accounting fraud in
## 2003", "Cline will retire as chairman of Airborne's board on April 30, along
## with Robert G. Brazier, the vice chairman. During his time, Airborne saw its
## financial performance sag. Excluding federal assistance of $5.2 million in
## the fourth quarter and a one-time sale, the company lost money in every
## quarter last year.", "Mr. Smucker, 68, has been a Director since October
## 1973. He has been the Company’s Chairman of the Board since 1987 and served
## as Co-Chief Executive Officer from February 2001 through August 2011.", "Nov
## 19, 2009-- In a move that signals Tyson Foods Inc. believes its chicken
## segment is again profitable, the world's largest meat maker has named a new
## CEO to replace interim head Leland Tollett, who had been tapped in January to
## help weather an industry downturn. Tollett, who had also been interim
## president since January, will assist the 50-year-old Smith during the
## transition period.", "Cumenal, 57, who had run the company since April 2015,
## is being succeeded on an interim basis by chairman and former CEO Michael
## Kowalski, Tiffany said on Sunday afternoon. The shake-up follows the
## departure of the jeweler’s top designer three weeks ago and weak holiday
## sales that sent the stock tumbling. February 6, 2017, Tiffany & Co. abruptly
## replaced Chief Executive Officer Frederic Cumenal after disappointing
## financial results, just hours before the jewelry chain introduced a new
## campaign with the first Super Bowl ad in its history.", "Metz succeeds
## Christopher Twomey, the company’s chairman and longtime CEO who returned to
## serve as an interim CEO after the company fired its previous CEO, Claude
## Jordan, on June 2. Twomey will remain chairman of the board. “I am excited to
## lead a terrific team at Arctic Cat.” Christopher Metz, Arctic Cat’s new chief
## executive, will start Dec. 3.", "Mr. Leonard Joseph Feinstein is a Co-Founder
## of Bed Bath & Beyond, Inc. and has been its Co-Chairman since 1999. Mr.
## Feinstein served as Co-Chief Executive Officer of Bed Bath & Beyond Inc. from
## 1971 to April 2003 and President from 1992 to 1999. He has been a Director of
## Bed Bath & Beyond, Inc. since 1971.", "At the age of 81, Mr. Shaw passed away
## in his home that morning.", "Mr. Houdaille ceased to be an executive officer
## of the Company as of November 1996. Mr. Houdaille served as President and
## Chief Executive Officer of the company from May 1996 to November 1996. SEC
## filing said that he was filling the roll they could meet as a board to
## discuss", "Gibson Greetings Fires CEO: Gibson Greetings Inc. said Benjamin J.
## Sottile will no longer head the Cincinnati-based company. Albert R. Pezzillo,
## chairman of Gibson’s executive committee, was named interim chairman and CEO
## while the company searches for a successor. GIBSON GREETINGS INC.,
## Cincinnati, named Frank O'Connell president and chief executive. Mr.
## O'Connell succeeds Albert R. Pezzillo, who will continue as chairman.",
## "Christian Wilhelm Erich Haub or CH is and has been a member of the Board of
## Directors of the Company (the “Board”) since December 3, 1991, has served as
## Chairman of the Board since May 1, 2001 and has served as Chair of the
## Executive Committee of the Board since August 15, 2005. In addition, CH
## served as Interim President and Chief Executive Officer of the Company from
## October 20, 2009 through February 8, 2010, Chief Executive Officer of the
## Company from May 1, 1998 through August 15, 2005 and President of the Company
## from December 7, 1993 through February 24, 2002, and from November 4, 2002
## through November 15, 2004.", "Restaurant operator Luby's Inc. has named
## Christopher J. Pappas president and CEO and Harris J. Pappas as chief
## operating officer. The company said David Daviss resigned as acting chief
## executive and chairman. He'll remain a member of the board. Board member
## Robert T. Herres has been elected to the chairman post.", "ROBERT v.d. LUFT
## [|] Age 63 [|] Director Since 1992 [|] Chadds Ford, Pennsylvania [|] -Acting
## Chief Executive Officer of Entergy, May-December, 1998", "Retired once the
## issues with the company were solved", …, "On March 8, 2017, the Board
## appointed Jeffrey L. Rutherford, age 56, as Interim President and Interim
## Chief Executive Officer, effective immediately. The Board also elected Mr.
## Rutherford as Chairman of the Board, effective immediately. Mr. Rutherford
## joined the Board on February 16, 2017.", and "Irvine’s Opus Bank (Nasdaq:
## OPB), the third largest bank based in Orange County, named Paul W. Taylor as
## chief executive officer and president, effective today, May 1, 2019. Taylor
## succeeds Paul G. Greig, chairman of Opus’ board, who served as interim CEO
## after the departure last November of founder Stephen Gordon.".
## ℹ Consider using step_novel() (`?recipes::step_novel()`) before `step_dummy()`
## to handle unseen values.
convert h20 to frames
train_h2o <- as.h2o(train_prepped)
##
|
| | 0%
|
|======================================================================| 100%
test_h2o <- as.h2o(test_prepped)
##
|
| | 0%
|
|======================================================================| 100%
train H20 model
h2o_model <- h2o.gbm(
x = setdiff(names(train_h2o), "ceo_dismissal"),
y = "ceo_dismissal",
training_frame = train_h2o,
model_id = "ceo_dismissal_gbm_model",
seed = 1234
)
##
|
| | 0%
|
|======================================================================| 100%
evaluate model
perf <- h2o.performance(h2o_model, newdata = test_h2o)
print(perf)
## H2OBinomialMetrics: gbm
##
## MSE: 0.001510004
## RMSE: 0.03885877
## LogLoss: 0.009715358
## Mean Per-Class Error: 0
## AUC: 1
## AUCPR: 1
## Gini: 1
## R^2: 0.9471041
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## 0 1 Error Rate
## 0 66 0 0.000000 =0/66
## 1 0 2 0.000000 =0/2
## Totals 66 2 0.000000 =0/68
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.984863 1.000000 1
## 2 max f2 0.984863 1.000000 1
## 3 max f0point5 0.984863 1.000000 1
## 4 max accuracy 0.984863 1.000000 1
## 5 max precision 0.995013 1.000000 0
## 6 max recall 0.984863 1.000000 1
## 7 max specificity 0.995013 1.000000 0
## 8 max absolute_mcc 0.984863 1.000000 1
## 9 max min_per_class_accuracy 0.984863 1.000000 1
## 10 max mean_per_class_accuracy 0.984863 1.000000 1
## 11 max tns 0.995013 66.000000 0
## 12 max fns 0.995013 1.000000 0
## 13 max fps 0.000461 66.000000 42
## 14 max tps 0.984863 2.000000 1
## 15 max tnr 0.995013 1.000000 0
## 16 max fnr 0.995013 0.500000 0
## 17 max fpr 0.000461 1.000000 42
## 18 max tpr 0.984863 1.000000 1
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
h2o.auc(perf)
## [1] 1
h2o.confusionMatrix(perf)
## Confusion Matrix (vertical: actual; across: predicted) for max f1 @ threshold = 0.984862555298816:
## 0 1 Error Rate
## 0 66 0 0.000000 =0/66
## 1 0 2 0.000000 =0/2
## Totals 66 2 0.000000 =0/68
plot(perf, type = "roc")

predict on test data
predictions <- h2o.predict(h2o_model, test_h2o)
##
|
| | 0%
|
|======================================================================| 100%
head(predictions)
## predict p0 p1
## 1 0 0.688898615 0.3111013851
## 2 0 0.979731526 0.0202684744
## 3 1 0.004986509 0.9950134909
## 4 0 0.999535375 0.0004646254
## 5 0 0.999535375 0.0004646253
## 6 1 0.015137445 0.9848625553