Purpose

The purpose of this script is to show the steps and decisions taken to complete Homework 2 for FINA 9310.
The data used are downloaded from Dropbox and saved locally in the same folder as this script.

library(dplyr)
library(tidyverse)
library(data.table)
library(ggplot2)
library(kableExtra)
library(xtable)
library(TTR)
library(zoo)
library(MASS)
library(DBI)
library(coin)

Data

# Load the data

data <- read.csv("data_stock.csv")

Part 1: Data Download

The data is downloaded from the CRSP database, filtering to firms in the energy and oil industry (SIC codes 1200-1399)

# Filter the data to firms in the energy and oil industry (SIC codes 1200-1399)

df <- data %>% filter(SICCD >= 1200 & SICCD <= 1399)

# counting the number of firms in the energy and oil industry

n_firms <- df %>% distinct(PERMNO) %>% nrow() %>% as.numeric()

After filtering, there are 296 firms in the energy and oil industry.

Part 2:

Examining cummulative returns when the legislation was announced on April 21, 2016

a) Filtering

Filtering ticker with missing returns during this the time of the legislation announcement.

# Filtering ticker with missing returns during the time of the legislation announcement

df <- df %>% filter(!is.na(RET))

b) CAR for the (0, +1) and (-2,+2) windows.

# CAR for the (0, +1) and (-2,+2) windows

df <- df %>% mutate(date = as.Date(date, format = "%Y-%m-%d"))

# Filtering the data to the (-2, +2) window and find average CAR for the (-2, +2) window

df_car <- df %>% filter(date >= "2016-04-19" & date <= "2016-04-25") %>% 
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

# Find average value-weighted CAR for the (-2, +2) window

df_vwcar <- df %>% filter(date >= "2016-04-19" & date <= "2016-04-25") %>% 
  group_by(PERMNO) %>% 
  mutate(VWCAR = cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(VWCAR = mean(VWCAR, na.rm = TRUE))

# Find average equal-weighted CAR for the (-2, +2) window

df_ewcar <- df %>% filter(date >= "2016-04-19" & date <= "2016-04-25") %>% 
  group_by(PERMNO) %>% 
  mutate(EWCAR = cumsum(ewretx)) %>% 
  group_by(date) %>% 
  summarise(EWCAR = mean(EWCAR, na.rm = TRUE))


# Merge these three df 

df_allcar <- merge(df_car, df_vwcar, by = "date") %>% merge(df_ewcar, by = "date")


# Filtering the data to the (0, +1) window and find average CAR for the (0, +1) window

df_car_0_1 <- df %>% filter(date >= "2016-04-21" & date <= "2016-04-22") %>% 
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

# Find average value-weighted CAR for the (0, +1) window

df_vwcar_0_1 <- df %>% filter(date >= "2016-04-21" & date <= "2016-04-22") %>% 
  group_by(PERMNO) %>% 
  mutate(VWCAR = cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(VWCAR = mean(VWCAR, na.rm = TRUE))

# Find average equal-weighted CAR for the (0, +1) window

df_ewcar_0_1 <- df %>% filter(date >= "2016-04-21" & date <= "2016-04-22") %>% 
  group_by(PERMNO) %>% 
  mutate(EWCAR = cumsum(ewretx)) %>% 
  group_by(date) %>% 
  summarise(EWCAR = mean(EWCAR, na.rm = TRUE))

Plotting the CARs for the (-2, +2) window

# Plotting the average CAR, value-weighted CAR, and equal-weighted CAR for the (-2, +2) window, provide a line at the date of the legislation announcement and label

ggplot(df_allcar, aes(x = date)) + geom_line(aes(y = CAR, color = "CAR")) + geom_line(aes(y = VWCAR, color = "VWCAR")) + geom_line(aes(y = EWCAR, color = "EWCAR")) + labs(title = "Average CAR for the (-2, +2) window", x = "Date", y = "CAR") + geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, label = "Legislation Announcement April 21, 2016"), color = "blue", angle = 0, vjust = -0.1)

Hypothesis testing to determine whether the average CAR is significantly different from zero for the (-2, +2) window and (0, +1). The means that market participants were able to anticipate the legislation announcement prior to the announcement. This implies some form of information leakage and insider trading.

# Hypothesis testing to determine whether the average CAR is significantly different from zero for the (-2, +2) window and (0, +1)

# (-2, +2) window

t.test(df_allcar$CAR, mu = 0)

## 
##  One Sample t-test
## 
## data:  df_allcar$CAR
## t = 9.2651, df = 4, p-value = 0.0007547
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.0395410 0.0733795
## sample estimates:
##  mean of x 
## 0.05646025

# (0, +1) window

t.test(df_car_0_1$CAR, mu = 0)

## 
##  One Sample t-test
## 
## data:  df_car_0_1$CAR
## t = 0.43458, df = 1, p-value = 0.739
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1877937  0.2010945
## sample estimates:
##   mean of x 
## 0.006650394

Running t-tests we found that the average CAR is significantly different from zero for the (-2, +2) window but not for the (0, +1) window.

c) Plotting (-20, 20) window

# Filtering the data to the (-20, +20) window and only find average CAR for the (-20, +20) window

df_car_20 <- df %>% filter(date >= "2016-04-01" & date <= "2016-05-21") %>% 
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

# Plotting the average CAR for the (-20, +20) window, provide a line at the date of the legislation announcement and label

ggplot(df_car_20, aes(x = date)) + 
  geom_line(aes(y = CAR)) + 
  labs(title = "Average CAR for the (-20, +20) window", x = "Date", y = "CAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              0, hjust = 0, vjust = 2.5, size = 3.5) + 
  scale_x_date(date_breaks = "5 days", date_labels = "%b %d")

d) The (0, +1) window

Calculate percentage of firms that had negative CARs

# Calculate percentage of firms that had negative CARs for the (0, +1) window

df_firms <- df %>% 
  filter(date >= "2016-04-21" & date <= "2016-04-22") %>% 
  group_by(PERMNO) %>%
  mutate(CAR = cumsum(RET))

n_firms <- df_firms %>% 
  distinct(PERMNO) %>% 
  nrow() %>% 
  as.numeric()  

n_firms_neg <- df_firms %>% filter(CAR < 0) %>% 
  nrow() %>%
  as.numeric()


percent_neg <- as.numeric(n_firms_neg/n_firms)

The percentage of firms that had negative CARs for the (0, +1) window is 0.8984772.

We now conduct a Wilcoxon signed-rank test to check if the median of CAR is different from 0

wilcox.test(df_firms$CAR, mu = 0, alternative = "two.sided")

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  df_firms$CAR
## V = 42390, p-value = 0.005255
## alternative hypothesis: true location is not equal to 0

The Wilcoxon signed-rank test results indicate that the median cumulative abnormal return (CAR) is significantly different from zero. The test statistic (V) is 42,390, and the p-value is 0.005255, which is well below the conventional 0.05 significance level. This allows us to reject the null hypothesis, which assumes that the median CAR is equal to zero. In practical terms, this suggests that the observed abnormal returns are not purely random and that there is a statistically significant effect present.

e)

The main advantages of using the Wilcoxon signed-rank test are that it does not require the data to be normally distributed and that it is more robust to outliers. This makes it a good choice for analyzing CAR, which often has non-normal distributions and outliers.

Part 3

a) Calculate average CAR for 1 year and 3 year horizons

# Calculate average CAR for 1 year and 3 year horizons

# 1 year horizon

df_car_1y <- df %>% filter(date >= "2016-04-21" & date <= "2017-04-21") %>% 
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

# 3 year horizon

df_car_3y <- df %>% filter(date >= "2016-04-21" & date <= "2019-04-21") %>% 
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

# Plotting the average CAR for the 1 year and 3 year horizons, provide a line at the date of the legislation announcement and label

ggplot(df_car_1y, aes(x = date)) + 
  geom_line(aes(y = CAR)) + 
  labs(title = "Average CAR for the 1 year horizon", x = "Date", y = "CAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              90, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "30 days", date_labels = "%b %d")

ggplot(df_car_3y, aes(x = date)) + 
  geom_line(aes(y = CAR)) + 
  labs(title = "Average CAR for the 3 year horizon", x = "Date", y = "CAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              90, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "70 days", date_labels = "%b %d")

b) Calculate BHAR for 1 year and 3 year horizons

# Calculate buy and hold abnormal return for 1 year and 3 year horizons

# 1 year horizon

df_bhar_1y <- df %>% filter(date >= "2016-04-21" & date <= "2017-04-21") %>% 
  group_by(PERMNO) %>% 
  mutate(BHAR = cumsum(RET) - cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(BHAR = mean(BHAR, na.rm = TRUE))

# 3 year horizon

df_bhar_3y <- df %>% filter(date >= "2016-04-21" & date <= "2019-04-21") %>% 
  group_by(PERMNO) %>% 
  mutate(BHAR = cumsum(RET) - cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(BHAR = mean(BHAR, na.rm = TRUE))

Plotting the BHAR for the 1 year and 3 year horizons

# Plotting the average BHAR for the 1 year and 3 year horizons, provide a line at the date of the legislation announcement and label

ggplot(df_bhar_1y, aes(x = date)) + 
  geom_line(aes(y = BHAR)) + 
  labs(title = "Average BHAR for the 1 year horizon", x = "Date", y = "BHAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              90, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "30 days", date_labels = "%b %d")

ggplot(df_bhar_3y, aes(x = date)) +
  geom_line(aes(y = BHAR)) + 
  labs(title = "Average BHAR for the 3 year horizon", x = "Date", y = "BHAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              90, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "70 days", date_labels = "%b %d")

c) Better method?

The BHAR is a better method for measuring abnormal returns over long horizons because it accounts for the market return over the same period. This allows us to isolate the abnormal returns due to the event of interest rather than the overall market performance. The CAR, on the other hand, only measures the abnormal returns relative to the market on a daily basis, which can be influenced by market movements and noise.

Part 4

We now create a variable for market capitalization to compare small vs large firms. Firms above sample median market capitalization are classified as large firms, while firms below the median are classified as small firms.

df <- df %>% mutate(MarketCap = abs(SHROUT * abs(PRC)))

df$MarketCap <- as.numeric(df$MarketCap)

median_marketcap <- df %>% summarise(median_value = median(MarketCap, na.rm = TRUE)) %>% pull(median_value)

df <- df %>% mutate(Size = ifelse(MarketCap > median_marketcap, "Large", "Small"))

Repeat part 2(b) for large and small firms. For the (-2, +2) window, we create the average CAR plot for both large and small firms

# Filtering the data to the (-2, +2) window and find average CAR for the (-2, +2) window for large and small firms

df_car_large <- df %>% filter(date >= "2016-04-19" & date <= "2016-04-25" & Size == "Large") %>% 
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

df_car_small <- df %>% filter(date >= "2016-04-19" & date <= "2016-04-25" & Size == "Small") %>%
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

# Merge these two dfs

df_car_all <- merge(df_car_large, df_car_small, by = "date")

# Plotting the average CAR for the (-2, +2) window for large and small firms on one graph, provide a line at the date of the legislation announcement and label

ggplot(df_car_all, aes(x = date)) + 
  geom_line(aes(y = CAR.x, color = "Large")) + 
  geom_line(aes(y = CAR.y, color = "Small")) + 
  labs(title = "Average CAR for the (-2, +2) window for Large and Small Firms", x = "Date", y = "CAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              0, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "1 days", date_labels = "%b %d")

For the (0, +1) window, we repeat the same process and plot the average CAR for large and small firms

# Filtering the data to the (0, +1) window and find average CAR for the (0, +1) window for large and small firms

df_car_large_0_1 <- df %>% filter(date >= "2016-04-21" & date <= "2016-04-22" & Size == "Large") %>% 
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

df_car_small_0_1 <- df %>% filter(date >= "2016-04-21" & date <= "2016-04-22" & Size == "Small") %>%
  group_by(PERMNO) %>% 
  mutate(CAR = cumsum(RET)) %>% 
  group_by(date) %>% 
  summarise(CAR = mean(CAR, na.rm = TRUE))

# Merge these two dfs
df_car_all_0_1 <- merge(df_car_large_0_1, df_car_small_0_1, by = "date")

# Plotting 

ggplot(df_car_all_0_1, aes(x = date)) + 
  geom_line(aes(y = CAR.x, color = "Large")) + 
  geom_line(aes(y = CAR.y, color = "Small")) + 
  labs(title = "Average CAR for the (0, +1) window for Large and Small Firms", x = "Date", y = "CAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              0, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "1 days", date_labels = "%b %d")

For large firms, test whether the average CAR is significantly different from zero for the (-2, +2) window and (0, +1) window.

# Large firms

# (-2, +2) window

t.test(df_car_large$CAR, mu = 0)

## 
##  One Sample t-test
## 
## data:  df_car_large$CAR
## t = 8.6579, df = 4, p-value = 0.0009791
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.03586109 0.06971901
## sample estimates:
##  mean of x 
## 0.05279005

# (0, +1) window

t.test(df_car_large_0_1$CAR, mu = 0)

## 
##  One Sample t-test
## 
## data:  df_car_large_0_1$CAR
## t = 0.26795, df = 1, p-value = 0.8333
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2106369  0.2197123
## sample estimates:
## mean of x 
## 0.0045377

The result of the t-test for large firms indicates that the average CAR is significantly different from zero for the (-2, +2) window but not for the (0, +1) window.

For small firms, test whether the average CAR is significantly different from zero for the (-2, +2) window and (0, +1) window.

# Small firms

# (-2, +2) window

t.test(df_car_small$CAR, mu = 0)

## 
##  One Sample t-test
## 
## data:  df_car_small$CAR
## t = 9.4011, df = 4, p-value = 0.0007135
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.04121223 0.07575705
## sample estimates:
##  mean of x 
## 0.05848464

# (0, +1) window

t.test(df_car_small_0_1$CAR, mu = 0)

## 
##  One Sample t-test
## 
## data:  df_car_small_0_1$CAR
## t = 0.59141, df = 1, p-value = 0.66
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1699802  0.1865760
## sample estimates:
##   mean of x 
## 0.008297908

The result of the t-test for small firms indicates that the average CAR is significantly different from zero for the (-2, +2) window but not for the (0, +1) window.

We now test to see if the average CAR are statistically different between large and small firms for the (-2, +2) window and (0, +1) window.

# Testing to see if the average CAR are statistically different between large and small firms for the (-2, +2) window and (0, +1) window

# (-2, +2) window

wilcox.test(df_car_large$CAR, df_car_small$CAR, alternative = "two.sided")

## 
##  Wilcoxon rank sum exact test
## 
## data:  df_car_large$CAR and df_car_small$CAR
## W = 8, p-value = 0.4206
## alternative hypothesis: true location shift is not equal to 0

# (0, +1) window

wilcox.test(df_car_large_0_1$CAR, df_car_small_0_1$CAR, alternative = "two.sided")

## 
##  Wilcoxon rank sum exact test
## 
## data:  df_car_large_0_1$CAR and df_car_small_0_1$CAR
## W = 1, p-value = 0.6667
## alternative hypothesis: true location shift is not equal to 0

The result of the Wilcoxon signed-rank test indicates that the average CAR not significantly different between large and small firms for both the (-2, +2) window and the (0, +1) window.

Repeat part 3(b), calculate BHAR for large and small firms for the 1 year and 3 year horizons.

# Calculate buy and hold abnormal return for 1 year and 3 year horizons for large and small firms

# 1 year horizon

df_bhar_large_1y <- df %>% filter(date >= "2016-04-21" & date <= "2017-04-21" & Size == "Large") %>% 
  group_by(PERMNO) %>% 
  mutate(BHAR = cumsum(RET) - cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(BHAR = mean(BHAR, na.rm = TRUE)) 

df_bhar_small_1y <- df %>% filter(date >= "2016-04-21" & date <= "2017-04-21" & Size == "Small") %>%
  group_by(PERMNO) %>% 
  mutate(BHAR = cumsum(RET) - cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(BHAR = mean(BHAR, na.rm = TRUE))

# merge these two dfs

df_bhar_all_1y <- merge(df_bhar_large_1y, df_bhar_small_1y, by = "date")


# 3 year horizon

df_bhar_large_3y <- df %>% filter(date >= "2016-04-21" & date <= "2019-04-21" & Size == "Large") %>% 
  group_by(PERMNO) %>% 
  mutate(BHAR = cumsum(RET) - cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(BHAR = mean(BHAR, na.rm = TRUE))

df_bhar_small_3y <- df %>% filter(date >= "2016-04-21" & date <= "2019-04-21" & Size == "Small") %>%
  group_by(PERMNO) %>% 
  mutate(BHAR = cumsum(RET) - cumsum(vwretx)) %>% 
  group_by(date) %>% 
  summarise(BHAR = mean(BHAR, na.rm = TRUE))

# merge 

df_bhar_all_3y <- merge(df_bhar_large_3y, df_bhar_small_3y, by = "date")

# Plotting the average BHAR for the 1 year and 3 year horizons for large and small firms, provide a line at the date of the legislation announcement and label

ggplot(df_bhar_all_1y, aes(x = date)) + 
  geom_line(aes(y = BHAR.x, color = "Large")) + 
  geom_line(aes(y = BHAR.y, color = "Small")) + 
  labs(title = "Average BHAR for the 1 year horizon for Large and Small Firms", x = "Date", y = "BHAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              0, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "30 days", date_labels = "%b %d")

ggplot(df_bhar_all_3y, aes(x = date)) +
  geom_line(aes(y = BHAR.x, color = "Large")) + 
  geom_line(aes(y = BHAR.y, color = "Small")) + 
  labs(title = "Average BHAR for the 3 year horizon for Large and Small Firms", x = "Date", y = "BHAR") + 
  geom_vline(xintercept = as.Date("2016-04-21"), linetype = "dashed", color = "blue") + 
  geom_text(aes(x = as.Date("2016-04-21"), y = 0.1, 
                label = "Legislation Announcement April 21, 2016"), 
            color = "blue", angle = 
              0, hjust = 0, vjust = 2.5, size = 2.5) + 
  scale_x_date(date_breaks = "70 days", date_labels = "%b %d")

Test whether the BHAR is significantly different between large and small firms:

# Testing to see if the average BHAR are statistically different between large and small firms for the 1 year and 3 year horizons

# 1 year horizon

wilcox.test(df_bhar_large_1y$BHAR, df_bhar_small_1y$BHAR, alternative = "two.sided")

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_bhar_large_1y$BHAR and df_bhar_small_1y$BHAR
## W = 18051, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

# 3 year horizon

wilcox.test(df_bhar_large_3y$BHAR, df_bhar_small_3y$BHAR, alternative = "two.sided")

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_bhar_large_3y$BHAR and df_bhar_small_3y$BHAR
## W = 172815, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

The result of the Wilcoxon signed-rank test indicates that the average BHAR are both significantly different between large and small firms for both the 1 year and 3 year horizons. This implies that the market reaction to the legislation announcement was different for large and small firms over these horizons.

FINA 9310 - Homework 2

Kiet Le

2024-08-17