Purpose

This script is created to merge the the SDC mergers dataset with the Compustat, CRSP, and Execucomp datasets to replicate the results of Furfine and Rosen (2011) on the impact of mergers on default risk. The script will also create a dataset that can be used for further analysis.

Loading the dataset

# Load the SDC mergers dataset

sdc_clean <- read_csv("data/processed/sdc_clean.csv")
n_sdc <- as.character(nrow(sdc_clean))

There are currently 3805 rows in the SDC mergers dataset. This data was filtered in the cleaning script. In short, the filter we used were as follows:

1. Completed merger and acquistion transactions
2. Acquiring more than 50\% of the target. Acquirer's post-merger ownership is greater than 90\%
3. Excluding banks and utility companies based on SIC code
4. Time elapsed between date announced and date completed less than 365 days
5. Acquirer's asset to deal size between 5\% and 150\%

Identifier

We now merge the clean SDC merger data set with the Execucomp data. Michael Ewens, a Finance Professor from Columbia has created a table that maps SDC deal number to target gvkey tgvkey and acquirer’s gvkey agvkey.

deal_identier <- read_csv("data/dealnum_to_gvkey.csv")

# rename DealNumber to deal_number

deal_identier <- deal_identier %>%
  rename(deal_number = DealNumber)

names(deal_identier )
## [1] "deal_number"      "agvkey"           "tgvkey"           "updated2024"     
## [5] "multiple_targets"

We now performing the merge using deal_number.

# Before merging, let make sure that deal_number is a character variable in both datasets

sdc_clean <- sdc_clean %>%
  mutate(deal_number = as.character(deal_number))

deal_identier <- deal_identier %>%
  mutate(deal_number = as.character(deal_number))

sdc_clean <- sdc_clean %>%
  left_join(deal_identier, by = "deal_number")

I define successful matches as those observations where either tgvkey or agvkey is not missing.

# count the number of successful matches

sdc_clean %>%
  filter(!is.na(tgvkey) | !is.na(agvkey)) %>%
  summarise(n_matches = n()) %>%
  kable("html", caption = "Number of Successful Matches")%>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Number of Successful Matches
n_matches
1914

Looking at transactions, where we have a agvkey identifier.

sdc_clean %>%
  filter(!is.na(agvkey)) %>%
  summarise(n_agvkey = n()) %>%
  kable("html", caption = "Number of Transactions with Acquirer GVKEY")%>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Number of Transactions with Acquirer GVKEY
n_agvkey
1758

We will only keep observations we have agvkey as we are only interested in the acquirer.

sdc_clean <- sdc_clean %>%
  filter(!is.na(agvkey))

n_sdc_clean <- as.character(nrow(sdc_clean))

After filtering, we have 1758 rows in the SDC mergers dataset with acquirer gvkey.

Execucomp Data

The Execucomp data was downloaded on 07/27/2029. gvkey is the main identifier

# Load the Execucomp dataset

execucomp_raw <- read_csv("data/raw/execucomp.csv")

names(execucomp_raw)
## [1] "gvkey"         "year"          "execid"        "exec_fullname"
## [5] "co_per_rol"    "pceo"          "total_alt1"

In short the variables are defined as follows:

- `gvkey`: Unique identifier for the company
- `year`: Year of the observation
- `total_alt1`: Total Value of Stock/Options Compensation
- `pceo`: current CEO
- `execid`: Executive ID
- `exec_fullname`: Full name of the excecutive

We now filter the Execucomp dataset to only include the current CEO.

# Show count of executive levels

execucomp_raw %>% 
  group_by(year, pceo) %>%
  summarise(n_executives = n()) %>%
  kable("html", col.names = c("Year", "CEO", "Count") ,caption = "Count of Executive Levels") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Count of Executive Levels
Year CEO Count
1992 CEO 578
1992 NA 7467
1993 CEO 669
1993 NA 9152
1994 CEO 713
1994 NA 9976
1995 CEO 782
1995 NA 10365
1996 CEO 855
1996 NA 10841
1997 CEO 856
1997 NA 11207
1998 CEO 869
1998 NA 11787
1999 CEO 780
1999 NA 11434
2000 CEO 707
2000 NA 10835
2001 CEO 692
2001 NA 10692
2002 CEO 740
2002 NA 10819
2003 CEO 788
2003 NA 11029
2004 CEO 768
2004 NA 10136
2005 CEO 699
2005 NA 8689
2006 CEO 819
2006 NA 10152
2007 CEO 1046
2007 NA 12653
2008 CEO 1069
2008 NA 12156
2009 CEO 1089
2009 NA 11596
2010 CEO 1124
2010 NA 11282
2011 CEO 1138
2011 NA 11139
2012 CEO 1180
2012 NA 10970
2013 CEO 1259
2013 NA 10847
2014 CEO 1332
2014 NA 10631
2015 CEO 1373
2015 NA 10340
2016 CEO 1405
2016 NA 9840
2017 CEO 1458
2017 NA 9462
2018 CEO 1513
2018 NA 9141
2019 CEO 1577
2019 NA 8784
2020 CEO 1641
2020 NA 8554
2021 CEO 1678
2021 NA 8256
2022 CEO 1726
2022 NA 8077
2023 CEO 1728
2023 NA 7708
2024 CEO 1627
2024 NA 6809
# We now filter the Execucomp dataset to only include the current CEO

execucomp_raw %>%
  filter(pceo == 1) %>%
  select(gvkey, year, total_alt1, execid, exec_fullname)
## # A tibble: 0 × 5
## # ℹ 5 variables: gvkey <chr>, year <dbl>, total_alt1 <dbl>, execid <chr>,
## #   exec_fullname <chr>
# Fiter to only include CEOs 

execucomp_ceo <- execucomp_raw %>%
  filter(pceo == "CEO")

# export this list as csv

write_csv(execucomp_ceo, "data/processed/execucomp_ceo.csv")

We now look at the distribution of the value of option-based award. The variable total_alt1 is the total value of stock/options compensation, which we will use as a proxy for option-based award.

# Show distribution

execucomp_ceo %>%
  ggplot(aes(x = total_alt1)) +
  geom_histogram(bins = 10000, fill = "blue", color = "black") +
  labs(title = "Histogram of Option-Based Award Value",
       x = "Option-Based Award Value",
       y = "Frequency") +
  theme_minimal()

# Show summary statistics of the value of option-based award

execucomp_ceo %>%
  summarise(
    mean_option_awards_blk_value = mean(total_alt1, na.rm = TRUE),
    median_option_awards_blk_value = median(total_alt1, na.rm = TRUE),
    sd_option_awards_blk_value = sd(total_alt1, na.rm = TRUE)
  ) %>%
  kable("html", caption = "Summary Statistics of Option-Based Award Value") %>%
   kable_styling(bootstrap_options = c("striped", "hover"))
Summary Statistics of Option-Based Award Value
mean_option_awards_blk_value median_option_awards_blk_value sd_option_awards_blk_value
6558.36 3859.697 32556.69

Merging SDC with Execucomp

# Merging the SDC dataset with the Execucomp dataset using gvkey and year

sdc_clean <- sdc_clean %>%
  mutate(year = year(de))

# Make sure gvkey and year are character variables in both datasets

sdc_clean <- sdc_clean %>%
  mutate(agvkey = as.character(agvkey),
         year = as.character(year))

execucomp_ceo <- execucomp_ceo %>%
  mutate(gvkey = as.character(gvkey),
         year = as.character(year))

sdc_merge_ceo <- sdc_clean %>%
  left_join(execucomp_ceo, by = c("agvkey" = "gvkey", "year" = "year"))
# Count the number of rows in the merged dataset

n_sdc_merge_ceo <- as.character(nrow(sdc_merge_ceo))

Merging the SDC dataset with the Execucomp dataset has resulted in 1758 rows in the dataset with CEO information.

# Show the number of CEOs by year in the merged dataset

sdc_merge_ceo %>%
  group_by(year) %>%
  summarise(n_ceos = n_distinct(execid)) %>%
  kable("html", col.names = c("Year", "Number of CEOs"), caption = "Number of CEOs by Year in Merged Dataset") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Number of CEOs by Year in Merged Dataset
Year Number of CEOs
1994 1
1995 1
1996 1
1997 1
1998 3
1999 6
2000 7
2001 4
2002 3
2003 1
2004 6
2005 4
2006 4
# Show distribution of option-based award value in the merged dataset

sdc_merge_ceo %>%
  ggplot(aes(x = total_alt1)) +
  geom_histogram(bins = 10000, fill = "blue", color = "black") +
  labs(title = "Histogram of Option-Based Award Value in Merged Dataset",
       x = "Option-Based Award Value",
       y = "Frequency") +
  theme_minimal()