Loading the dataset

The data files are downloaded from LSEG terminal and stored locally. Due to large size, the data is split into multiple files. The following code reads the files and combines them into a single dataset.

The sample period is from 07/01/1996 to

# Read the data files stored as xlsx, 

sdc_raw_one <- read_csv("sdc_1996_97_csv.csv")
sdc_raw_two <- read_csv("sdc_1998_00_csv.csv")
sdc_raw_three <- read_csv("sdc_2001_2003_csv.csv")
sdc_raw_four <- read_csv("sdc_2004_2006_csv.csv")

# Append these dataset into one dataset

sdc_raw <- bind_rows(sdc_raw_one, sdc_raw_two, sdc_raw_three, sdc_raw_four)

# Save as csv

write_csv(sdc_raw, "sdc_raw_full.csv")

n<-as.character(nrow(sdc_raw))

The raw data contains 367048 observations. We now rename the variables for easier access.

DA: Date Announced
DOA: Date Orginally Announced
DE: Date Effective
IDV: Implied Deal Value
DV: Deal Value
ACQ_MACRO_IND: Macro Industry Code of the Acquirer
TAR_MACRO_IND: Macro Industry Code of the Target
TAR_CUSIP: Target's CUSIP
ACQ_TOTALASSET_12: Acquirer's Total Assets (12 months prior to the deal)
ACQ_CUSIP: Acquirer's CUSIP
ACQ_INDUSTRY: Acquirer's Industry Group
ACQ_SECTOR: Acquirer's Sector
# Renaming the variables. First uncapitalize the names, remove the dots, and then rename them, remove space,

sdc <- sdc_raw %>%
  rename_with(~ tolower(.x), everything()) %>%
  rename_with(~ str_replace_all(.x, "\\.", "-"), everything())



# Drop acquiror total assets last 12 months (usd millions)---15 (duplication)

sdc <- sdc %>%
  select(-"acquiror total assets last 12 months (usd millions)---15",
         -"seller 6-digit cusip",
         -"seller primary sic")

names(sdc)
##  [1] "date announced"                                         
##  [2] "date originally announced"                              
##  [3] "date effective"                                         
##  [4] "implied deal value (usd millions)"                      
##  [5] "deal value (usd millions)"                              
##  [6] "acquiror macro industry"                                
##  [7] "target macro industry"                                  
##  [8] "acquiror total assets last 12 months (usd millions)---8"
##  [9] "acquiror 6-digit cusip"                                 
## [10] "acquiror industry group"                                
## [11] "acquiror industry sector"                               
## [12] "target 6-digit cusip"
# renaming variable
sdc <- sdc %>%
  rename(
    da="date announced",
    doa="date originally announced",
    de="date effective",
    idv="implied deal value (usd millions)",
    dv="deal value (usd millions)",
    acq_macro_ind = "acquiror macro industry",
    tar_macro_ind = "target macro industry",
    acq_totalasset= "acquiror total assets last 12 months (usd millions)---8",
    acq_cusip = "acquiror 6-digit cusip",
    tar_cusip = "target 6-digit cusip",
    acq_ind = "acquiror industry group",
    acq_sec = "acquiror industry sector"
  )

names(sdc)
##  [1] "da"             "doa"            "de"             "idv"           
##  [5] "dv"             "acq_macro_ind"  "tar_macro_ind"  "acq_totalasset"
##  [9] "acq_cusip"      "acq_ind"        "acq_sec"        "tar_cusip"

The first filter is to drop mergers where the time elapsed between the date announced and the date completion is greater than one year. Before the drop, we have 367048 mergers in the data. Variable time_duration_orginal represents the time duration between the date originally announced and the date completion. Variable time_duration represents the time duration between the date announced and the date completion. In short the differences between date orginally announced and date announced can be explained as follows:

- DA: The date the deal was most recently announced publicly
- DOA: The date the deal was first ever announced, even if it was later withdrawn, renegotiated, or restructured.
# create a variable to measure the time duration (in days) between the date announced and the date completion

sdc <- sdc %>%
  mutate(da = as.Date(da, format = "%m/%d/%Y"),
         de = as.Date(de, format = "%m/%d/%Y"),
         doa = as.Date(doa, format = "%m/%d/%Y"),
         time_duration_orginal = as.numeric(difftime(de, doa, units = "days")),
         time_duration = as.numeric(difftime(de, da, units = "days")))

The article uses DA, but for completeness I am reporting the summary statistics for both the time duration.

# Summary statistics for time_duration_orginal and time_duration
# Dropped missing values
sdc %>%
  filter(!is.na(time_duration)) %>%
  summarise(
    mean_time_duration = mean(time_duration, na.rm = TRUE),
    median_time_duration = median(time_duration, na.rm = TRUE),
    sd_time_duration = sd(time_duration, na.rm = TRUE),
    min_time_duration = min(time_duration, na.rm = TRUE),
    max_time_duration = max(time_duration, na.rm = TRUE)
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3))) %>%
  kable("html", col.names = c("Mean Time Duration", "Median Time Duration", "SD Time Duration", "Min Time Duration", "Max Time Duration")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Mean Time Duration Median Time Duration SD Time Duration Min Time Duration Max Time Duration
42.199 0 152.457 0 7595
sdc %>%
  filter(!is.na( time_duration_orginal)) %>%
  summarise(
    mean_time_duration = mean(time_duration_orginal, na.rm = TRUE),
    median_time_duration = median(time_duration_orginal, na.rm = TRUE),
    sd_time_duration = sd(time_duration_orginal, na.rm = TRUE),
    min_time_duration = min(time_duration_orginal, na.rm = TRUE),
    max_time_duration = max(time_duration_orginal, na.rm = TRUE)
  ) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3))) %>%
  kable("html", col.names = c("Mean Time Duration Orginal", "Median Time Duration Orginal", "SD Time Duration Orginal", "Min Time Duration Orginal", "Max Time Duration Orginal")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Mean Time Duration Orginal Median Time Duration Orginal SD Time Duration Orginal Min Time Duration Orginal Max Time Duration Orginal
48.177 0 178.786 -7263 7595

Dropping time duration greater than 365 days

sdc_drop_one <- sdc %>%
  filter(time_duration <= 365)

n_drop_one <- as.character(nrow(sdc_drop_one))

After dropping the time duration greater than 365 days, we have 275599 observations. Drawing a distribution of mergers that are announced and completed within zero day. We now show a distribution of mergers in a histogram

sdc_drop_one %>%
  ggplot(aes(x = time_duration)) +
  geom_histogram(binwidth = 10, fill = "blue", color = "black") +
  labs(title = "Distribution of Mergers Completed on the Same Day as Announced",
       x = "Time Duration (Days)",
       y = "Count") +
  theme_minimal()

From the histogram we see that there is spike at 0 suggesting there are multiple mergers that announced and completed in less than a day, which does not make sense. We drop these mergers.

sdc_drop_two <- sdc_drop_one %>%
  filter(time_duration > 0)

n_drop_two <- as.character(nrow(sdc_drop_two))

sdc_drop_two %>%
  ggplot(aes(x = time_duration)) +
  geom_histogram(binwidth = 10, fill = "blue", color = "black") +
  labs(title = "Distribution of Mergers Completed on the Same Day as Announced",
       x = "Time Duration (Days)",
       y = "Count") +
  theme_minimal()

n_drop_two <- as.character(nrow(sdc_drop_two))

After dropping the mergers that announced and completed in less than a day, we have 92340 observations. We now look at industry, dropping acquirers and targets in banks or utilities company.

sdc_drop_two %>%
  count(acq_macro_ind, name = "count") %>%
  mutate(sample_percentage = count / sum(count)) %>%
  kable("html", col.names = c("Acquiror Macro Industry", "Count", "%")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Acquiror Macro Industry Count %
Consumer Products and Services 5574 0.0603639
Consumer Staples 4984 0.0539744
Energy and Power 6262 0.0678146
Financials 27801 0.3010721
Government and Agencies 239 0.0025883
Healthcare 4244 0.0459606
High Technology 10543 0.1141759
Industrials 9252 0.1001949
Materials 7630 0.0826294
Media and Entertainment 5436 0.0588694
Real Estate 3433 0.0371778
Retail 3389 0.0367013
Telecommunications 3553 0.0384774
sdc_drop_two %>%
  count(tar_macro_ind, name = "count") %>%
  mutate(sample_percentage = count / sum(count)) %>%
  kable("html", col.names = c("Target Macro Industry", "Count", "%")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Target Macro Industry Count %
Consumer Products and Services 7811 0.0845896
Consumer Staples 5997 0.0649448
Energy and Power 7026 0.0760884
Financials 14372 0.1556422
Government and Agencies 81 0.0008772
Healthcare 4976 0.0538878
High Technology 12483 0.1351852
Industrials 11210 0.1213992
Materials 8998 0.0974442
Media and Entertainment 7236 0.0783626
Real Estate 3737 0.0404700
Retail 4549 0.0492636
Telecommunications 3864 0.0418454