library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
departures <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-27/departures.csv')
## Rows: 9423 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): coname, exec_fullname, interim_coceo, still_there, notes, sources...
## dbl (10): dismissal_dataset_id, gvkey, fyear, co_per_rol, departure_code, c...
## dttm (1): leftofc
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
departures %>% distinct(coname)
## # A tibble: 3,860 × 1
## coname
## <chr>
## 1 SONICBLUE INC
## 2 AMERICAN AIRLINES GROUP INC
## 3 ABBOTT LABORATORIES
## 4 ADVANCED MICRO DEVICES
## 5 AETNA INC
## 6 AHMANSON (H F) & CO
## 7 AIR PRODUCTS & CHEMICALS INC
## 8 ALBERTO-CULVER CO
## 9 ALBERTSON'S INC
## 10 ALCAN INC
## # ℹ 3,850 more rows
departures %>% distinct(notes)
## # A tibble: 7,756 × 1
## notes
## <chr>
## 1 "Ballard took over when the outgoing CEO said that the board has given itsel…
## 2 "Mr. Crandall, the chairman and chief executive, announced that he would ret…
## 3 "Donald Carty resigned as the head of American Airlines today, but an abrupt…
## 4 "The top two executives of Abbott Laboratories took Wall Street and the comp…
## 5 "Advanced Micro Devices founder and chairman Walter Sanders is to step down …
## 6 "Joseph T. Sebastianelli resigned as Aestna's president and chief executive …
## 7 "Richard H. Deihl has retired as chairman of H.F. Ahmanson & Co., the parent…
## 8 "In March, Ahmanson had agreed to be acquired. Within weeks, it would exist …
## 9 "DeKruif came up as an insurance salesman with H.F. Ahmanson & Co. and rose …
## 10 "Air Products and Chemicals (AP) said Thursday its board has elected AP pres…
## # ℹ 7,746 more rows
departures %>% distinct(coname, exec_fullname)
## # A tibble: 9,203 × 2
## coname exec_fullname
## <chr> <chr>
## 1 SONICBLUE INC L. Gregory Ballard
## 2 AMERICAN AIRLINES GROUP INC Robert L. Crandall
## 3 AMERICAN AIRLINES GROUP INC Donald J. Carty
## 4 ABBOTT LABORATORIES Duane L. Burnham
## 5 ADVANCED MICRO DEVICES Walter Jeremiah Sanders III
## 6 AETNA INC Ronald Edward Compton
## 7 AHMANSON (H F) & CO Richard H. Deihl
## 8 AHMANSON (H F) & CO Charles R. Rinehart
## 9 AHMANSON (H F) & CO Robert M. De Kruif
## 10 AIR PRODUCTS & CHEMICALS INC Harold A. Wagner
## # ℹ 9,193 more rows
library(tidytext)
tidy_departures <-
departures %>%
mutate(notes = str_replace_all(notes, "\x92", "'")) %>%
unnest_tokens(word, notes) %>%
anti_join(get_stopwords())
## Joining with `by = join_by(word)`
tidy_departures %>%
count(word, sort = TRUE)
## # A tibble: 25,553 × 2
## word n
## <chr> <int>
## 1 company 6713
## 2 executive 5447
## 3 chief 4946
## 4 chairman 4814
## 5 ceo 4744
## 6 board 4352
## 7 officer 4213
## 8 president 4042
## 9 mr 3845
## 10 inc 2945
## # ℹ 25,543 more rows
tidy_departures %>%
count(word, word, sort = TRUE)
## # A tibble: 25,553 × 2
## word n
## <chr> <int>
## 1 company 6713
## 2 executive 5447
## 3 chief 4946
## 4 chairman 4814
## 5 ceo 4744
## 6 board 4352
## 7 officer 4213
## 8 president 4042
## 9 mr 3845
## 10 inc 2945
## # ℹ 25,543 more rows
departures_sparse <-
tidy_departures %>%
count(exec_fullname, word) %>%
bind_tf_idf(word, exec_fullname, n) %>%
slice_max(order_by = tf_idf, n = 1000) %>%
cast_sparse(exec_fullname, word, n)
dim(departures_sparse)
## [1] 651 923
library(stm)
## stm v1.3.6.1 successfully loaded. See ?stm for help.
## Papers, resources, and other materials at structuraltopicmodel.com
set.seed(123)
topic_model <- stm(departures_sparse, K = 3, verbose = FALSE)
summary(topic_model)
## A topic model with 3 topics, 651 documents and a 923 word dictionary.
## Topic 1 Top Words:
## Highest Prob: regarding, gallagher, planned, web, exec, callon, dismissal
## FREX: planned, regarding, gallagher, web, information, old, exec
## Lift: 75yrs, 49, bailey's, old, planned, advance, orderly
## Score: regarding, planned, gallagher, exec, web, callon, dismissal
## Topic 2 Top Words:
## Highest Prob: checkfree, tcby, barker, roadway, circus, justin, motorola
## FREX: checkfree, tcby, barker, roadway, circus, justin, motorola
## Lift: checkfree, premeditated, tcby, barker, roadway, circus, justin
## Score: checkfree, tcby, barker, roadway, circus, justin, motorola
## Topic 3 Top Words:
## Highest Prob: execucomp, shaw, sold, heart, age, interim, appear
## FREX: shaw, heart, age, died, attack, goes, casey’s
## Lift: rusty, chip, goes, covering, accused, paased, away
## Score: execucomp, shaw, age, heart, interim, sold, died
summary(topic_model)
## A topic model with 3 topics, 651 documents and a 923 word dictionary.
## Topic 1 Top Words:
## Highest Prob: regarding, gallagher, planned, web, exec, callon, dismissal
## FREX: planned, regarding, gallagher, web, information, old, exec
## Lift: 75yrs, 49, bailey's, old, planned, advance, orderly
## Score: regarding, planned, gallagher, exec, web, callon, dismissal
## Topic 2 Top Words:
## Highest Prob: checkfree, tcby, barker, roadway, circus, justin, motorola
## FREX: checkfree, tcby, barker, roadway, circus, justin, motorola
## Lift: checkfree, premeditated, tcby, barker, roadway, circus, justin
## Score: checkfree, tcby, barker, roadway, circus, justin, motorola
## Topic 3 Top Words:
## Highest Prob: execucomp, shaw, sold, heart, age, interim, appear
## FREX: shaw, heart, age, died, attack, goes, casey’s
## Lift: rusty, chip, goes, covering, accused, paased, away
## Score: execucomp, shaw, age, heart, interim, sold, died
word_topics <- tidy(topic_model, matrix = "beta")
word_topics
## # A tibble: 2,769 × 3
## topic term beta
## <int> <chr> <dbl>
## 1 1 info 1.43e- 3
## 2 2 info 2.75e- 9
## 3 3 info 8.54e- 7
## 4 1 fraud 2.86e- 3
## 5 2 fraud 6.34e- 9
## 6 3 fraud 2.56e- 6
## 7 1 75yrs 1.43e- 3
## 8 2 75yrs 1.38e-16
## 9 3 75yrs 1.31e-13
## 10 1 premeditated 4.48e-14
## # ℹ 2,759 more rows
word_topics %>%
group_by(topic) %>%
slice_max(beta, n = 10) %>%
ungroup() %>%
mutate(topic = paste("Topic", topic)) %>%
ggplot(aes(beta, reorder_within(term, beta, topic), fill = topic)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(topic), scales = "free_y") +
scale_x_continuous(expand = c(0, 0)) +
scale_y_reordered() +
labs(x = expression(beta), y = NULL)
departures_topics <- tidy(topic_model,
matrix = "gamma",
document_names = rownames(departures_sparse)
)
departures_topics
## # A tibble: 1,953 × 3
## document topic gamma
## <chr> <int> <dbl>
## 1 Larry Stephen Smith 1 0.852
## 2 Anu D. Saad 1 0.852
## 3 Thomas G. Deranek 1 0.894
## 4 Wayne R. Weidner 1 0.217
## 5 C. Lee Cox 1 0.137
## 6 John R. Frantz 1 0.184
## 7 Clifford W. Illig 1 0.852
## 8 Russell A. Childs 1 0.184
## 9 Arif Shakeel 1 0.893
## 10 Christopher William Eccleshare 1 0.893
## # ℹ 1,943 more rows
departures_topics %>%
group_by(topic) %>%
slice_max(order_by = gamma, n = 5) %>%
ungroup() %>%
mutate(
notes = fct_reorder(document, gamma),
topic = factor(topic)
) %>%
ggplot(aes(gamma, topic, fill = topic)) +
geom_col(show.legend = FALSE) +
facet_wrap(vars(notes), ncol = 3) +
scale_x_continuous(expand = c(0, 0)) +
labs(x = expression(gamma), y = "Topic")
ChatGPT 1.1 The goal of this analysis is to explore and understand the patterns in executive departures across various companies using topic modeling. By analyzing the notes on these departures, the analysis aims to uncover common themes or reasons behind these executive changes.
1.2 The dataset includes several key variables:
coname: The name of the company. exec_fullname: Full name of the departing executive. notes: Descriptive notes about the executive’s departure. Additional details like dismissal dataset ID, fiscal year, departure codes, etc.
1.3 coname and exec_fullname are categorical, identifying specific companies and individuals. notes is a text variable providing qualitative insights into each departure. Other quantitative variables provide contextual information about each departure.
2.1 In preparation for modeling:
Text in notes is cleaned and split into individual words (tokenization). Irrelevant common words (stop words) are removed to focus on significant terms. Data is transformed into a sparse matrix, emphasizing key terms associated with executives, essential for topic modeling.
3.1 The analysis employs a Structural Topic Model (STM), a specialized type of topic model that can handle metadata and textual data effectively.
3.2 The process to determine the optimal number of topics (k) isn’t detailed in the provided script. Generally, this involves methods like evaluating model fit, coherence scores, or relying on domain expertise. In this analysis, k is set to 3, but the rationale behind this choice isn’t specified.
4 The analysis identifies three main topics within the executive departure notes. Each topic is defined by a set of words with the highest probability of occurrence, suggesting different contexts or reasons for these departures. The topics likely represent various themes such as organizational changes, personal reasons for leaving, or industry-specific issues influencing executive turnover. This provides a nuanced understanding of the dynamics behind executive departures in different companies.