Purpose
The purpose of this script is to show the steps and decisions
taken to complete the pre-assignment 2 for FINA 9210.
The data used are downloaded from Dropbox and saved locally in
the same folder as this script.
library(dplyr)
library(tidyverse)
library(data.table)
library(ggplot2)
library(kableExtra)
library(xtable)
library(TTR)
library(zoo)
library(MASS)
library(DBI)
library(PerformanceAnalytics)
Data
The data is first downloaded from WRDS and stored locally, which
will then used for analysis.
Data between Jan 2021 to Dec 2023
data <-read.csv("data.csv")
Part a
- For each month, report the number of stocks
data$year <- as.numeric(format(as.Date(data$date), "%Y"))
data$month <- as.factor(format(as.Date(data$date), "%m"))
data %>%
group_by(year,month) %>%
summarise(n_stocks = n_distinct(TICKER))%>%
kable(digits = 3, col.names = c("Year", "Month", "Number of Stocks")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Number of Stocks
|
2021
|
01
|
8046
|
2021
|
02
|
8139
|
2021
|
03
|
8328
|
2021
|
04
|
8476
|
2021
|
05
|
8573
|
2021
|
06
|
8689
|
2021
|
07
|
8820
|
2021
|
08
|
8884
|
2021
|
09
|
8990
|
2021
|
10
|
9101
|
2021
|
11
|
9185
|
2021
|
12
|
9314
|
2022
|
01
|
9388
|
2022
|
02
|
9454
|
2022
|
03
|
9500
|
2022
|
04
|
9540
|
2022
|
05
|
9550
|
2022
|
06
|
9567
|
2022
|
07
|
9571
|
2022
|
08
|
9602
|
2022
|
09
|
9620
|
2022
|
10
|
9629
|
2022
|
11
|
9608
|
2022
|
12
|
9609
|
2023
|
01
|
9520
|
2023
|
02
|
9505
|
2023
|
03
|
9502
|
2023
|
04
|
9439
|
2023
|
05
|
9429
|
2023
|
06
|
9442
|
2023
|
07
|
9419
|
2023
|
08
|
9415
|
2023
|
09
|
9452
|
2023
|
10
|
9503
|
2023
|
11
|
9485
|
2023
|
12
|
9478
|
Part b
- For each month, report the raw return
data$RET <- as.numeric(data$RET)
data %>%
group_by(year,month) %>%
summarise(raw_return = mean(RET, na.rm = TRUE))%>%
kable(digits = 3, col.names = c("Year", "Month", "Raw Return")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Raw Return
|
2021
|
01
|
0.070
|
2021
|
02
|
0.065
|
2021
|
03
|
0.013
|
2021
|
04
|
0.017
|
2021
|
05
|
0.013
|
2021
|
06
|
0.021
|
2021
|
07
|
-0.032
|
2021
|
08
|
0.019
|
2021
|
09
|
-0.028
|
2021
|
10
|
0.027
|
2021
|
11
|
-0.045
|
2021
|
12
|
0.000
|
2022
|
01
|
-0.067
|
2022
|
02
|
-0.008
|
2022
|
03
|
0.015
|
2022
|
04
|
-0.088
|
2022
|
05
|
-0.015
|
2022
|
06
|
-0.068
|
2022
|
07
|
0.065
|
2022
|
08
|
-0.013
|
2022
|
09
|
-0.098
|
2022
|
10
|
0.050
|
2022
|
11
|
0.030
|
2022
|
12
|
-0.046
|
2023
|
01
|
0.122
|
2023
|
02
|
-0.034
|
2023
|
03
|
-0.033
|
2023
|
04
|
-0.009
|
2023
|
05
|
-0.009
|
2023
|
06
|
0.052
|
2023
|
07
|
0.042
|
2023
|
08
|
-0.050
|
2023
|
09
|
-0.054
|
2023
|
10
|
-0.058
|
2023
|
11
|
0.079
|
2023
|
12
|
0.081
|
Graphing average raw returns
graph_returns <- data %>%
group_by(year,month) %>%
summarise(raw_return = mean(RET, na.rm = TRUE))
# create another column for date
graph_returns$date <- as.Date(paste(graph_returns$year, graph_returns$month, "01", sep = "-"))
# graph
ggplot(graph_returns, aes(x = date, y = raw_return)) +
geom_line() +
labs(title = "Average Raw Returns by Month", x = "Date", y = "Average Raw Return") +
theme_minimal()

Part c
- For each month, report the total market capitalization
data$mktcap <- as.numeric(data$PRC) * as.numeric(data$SHROUT)
data %>%
group_by(year,month) %>%
summarise(mktcap = sum(mktcap, na.rm = TRUE))%>%
kable(digits = 3, col.names = c("Year", "Month", "Market Capitalization")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Market Capitalization
|
2021
|
01
|
51902378967
|
2021
|
02
|
53555109644
|
2021
|
03
|
55421424919
|
2021
|
04
|
58227078177
|
2021
|
05
|
58688835704
|
2021
|
06
|
60304574800
|
2021
|
07
|
60927963589
|
2021
|
08
|
62567408312
|
2021
|
09
|
60024943877
|
2021
|
10
|
63932741495
|
2021
|
11
|
62792794887
|
2021
|
12
|
64863624233
|
2022
|
01
|
61047842678
|
2022
|
02
|
59621755623
|
2022
|
03
|
61279592338
|
2022
|
04
|
55728517854
|
2022
|
05
|
55564548569
|
2022
|
06
|
50896478856
|
2022
|
07
|
55237331252
|
2022
|
08
|
53146923252
|
2022
|
09
|
48147152663
|
2022
|
10
|
51644599360
|
2022
|
11
|
54412243029
|
2022
|
12
|
51197347473
|
2023
|
01
|
54885518207
|
2023
|
02
|
53342415333
|
2023
|
03
|
54441593731
|
2023
|
04
|
54835852335
|
2023
|
05
|
54757632331
|
2023
|
06
|
58280185550
|
2023
|
07
|
60404446467
|
2023
|
08
|
59034704677
|
2023
|
09
|
56231996101
|
2023
|
10
|
54418452710
|
2023
|
11
|
59307215187
|
2023
|
12
|
62426648543
|
- For each month, report the average market capitalization
data %>%
group_by(year,month) %>%
summarise(avg_mktcap = mean(mktcap, na.rm = TRUE))%>%
kable(digits = 3, col.names = c("Year", "Month", "Average Market Capitalization")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Average Market Capitalization
|
2021
|
01
|
6452310
|
2021
|
02
|
6580060
|
2021
|
03
|
6676476
|
2021
|
04
|
6887518
|
2021
|
05
|
6849771
|
2021
|
06
|
6949934
|
2021
|
07
|
6914204
|
2021
|
08
|
7057005
|
2021
|
09
|
6685781
|
2021
|
10
|
7051924
|
2021
|
11
|
6848380
|
2021
|
12
|
6986603
|
2022
|
01
|
6513159
|
2022
|
02
|
6317872
|
2022
|
03
|
6466821
|
2022
|
04
|
5858759
|
2022
|
05
|
5834774
|
2022
|
06
|
5332825
|
2022
|
07
|
5787044
|
2022
|
08
|
5561047
|
2022
|
09
|
5022653
|
2022
|
10
|
5398766
|
2022
|
11
|
5685710
|
2022
|
12
|
5392600
|
2023
|
01
|
5795113
|
2023
|
02
|
5644700
|
2023
|
03
|
5789811
|
2023
|
04
|
5837327
|
2023
|
05
|
5831484
|
2023
|
06
|
6214564
|
2023
|
07
|
6453467
|
2023
|
08
|
6305106
|
2023
|
09
|
5971962
|
2023
|
10
|
5779360
|
2023
|
11
|
6295883
|
2023
|
12
|
6641133
|
Part d
- For each month, report the total trade volume
data$VOL <- as.numeric(data$VOL)
data %>%
group_by(year,month) %>%
summarise(total_volume = sum(VOL, na.rm = TRUE))%>%
kable(digits = 3, col.names = c("Year", "Month", "Total Trade Volume")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Total Trade Volume
|
2021
|
01
|
2867731618
|
2021
|
02
|
2770588408
|
2021
|
03
|
2976303882
|
2021
|
04
|
2022042340
|
2021
|
05
|
2080067959
|
2021
|
06
|
2391876889
|
2021
|
07
|
2000361974
|
2021
|
08
|
1932969399
|
2021
|
09
|
2168697670
|
2021
|
10
|
2123197567
|
2021
|
11
|
2267063870
|
2021
|
12
|
2321601283
|
2022
|
01
|
2403657990
|
2022
|
02
|
2244584120
|
2022
|
03
|
3136482920
|
2022
|
04
|
2303785238
|
2022
|
05
|
2720314008
|
2022
|
06
|
2628773273
|
2022
|
07
|
2104310835
|
2022
|
08
|
2383966278
|
2022
|
09
|
2345135568
|
2022
|
10
|
2353901824
|
2022
|
11
|
2303717424
|
2022
|
12
|
2218075353
|
2023
|
01
|
2222773687
|
2023
|
02
|
2125989155
|
2023
|
03
|
2824535615
|
2023
|
04
|
1930726487
|
2023
|
05
|
2303103999
|
2023
|
06
|
2335292440
|
2023
|
07
|
2057683519
|
2023
|
08
|
2375469953
|
2023
|
09
|
2042122423
|
2023
|
10
|
2310597118
|
2023
|
11
|
2180462750
|
2023
|
12
|
2378536853
|
Part e
- Find the average length of individual returns / I am not sure why
this question is asking
Part f
- Tabulate the percentage of returns that are missing, report, total
number of returns, and the number of missing returns
data %>%
group_by(year,month) %>%
summarise(missing_return = sum(is.na(RET))/n(),
total_missing = sum(is.na(RET)),
total = n())%>%
kable(digits = 3, col.names = c("Year", "Month", "Percentage of Missing Returns", "Total Missing", "N")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Percentage of Missing Returns
|
Total Missing
|
N
|
2021
|
01
|
0.034
|
275
|
8208
|
2021
|
02
|
0.044
|
368
|
8393
|
2021
|
03
|
0.054
|
464
|
8558
|
2021
|
04
|
0.045
|
392
|
8653
|
2021
|
05
|
0.035
|
307
|
8738
|
2021
|
06
|
0.039
|
346
|
8883
|
2021
|
07
|
0.033
|
292
|
8944
|
2021
|
08
|
0.030
|
274
|
9051
|
2021
|
09
|
0.036
|
334
|
9169
|
2021
|
10
|
0.039
|
359
|
9278
|
2021
|
11
|
0.037
|
344
|
9375
|
2021
|
12
|
0.035
|
330
|
9452
|
2022
|
01
|
0.027
|
257
|
9513
|
2022
|
02
|
0.023
|
223
|
9560
|
2022
|
03
|
0.022
|
207
|
9605
|
2022
|
04
|
0.019
|
180
|
9615
|
2022
|
05
|
0.017
|
162
|
9632
|
2022
|
06
|
0.016
|
153
|
9636
|
2022
|
07
|
0.017
|
163
|
9666
|
2022
|
08
|
0.022
|
213
|
9696
|
2022
|
09
|
0.019
|
187
|
9697
|
2022
|
10
|
0.020
|
196
|
9703
|
2022
|
11
|
0.017
|
163
|
9678
|
2022
|
12
|
0.024
|
229
|
9670
|
2023
|
01
|
0.016
|
154
|
9584
|
2023
|
02
|
0.019
|
182
|
9583
|
2023
|
03
|
0.024
|
233
|
9570
|
2023
|
04
|
0.018
|
168
|
9511
|
2023
|
05
|
0.018
|
174
|
9511
|
2023
|
06
|
0.022
|
206
|
9515
|
2023
|
07
|
0.021
|
202
|
9505
|
2023
|
08
|
0.025
|
236
|
9530
|
2023
|
09
|
0.026
|
251
|
9565
|
2023
|
10
|
0.029
|
281
|
9596
|
2023
|
11
|
0.025
|
236
|
9575
|
2023
|
12
|
0.018
|
171
|
9498
|
Part g
- Find the number of firms that delisted during this period (DLSTCD is
not NA and not 100)
delist <- data %>%
filter(!is.na(DLSTCD)) %>%
filter(DLSTCD != 100)
delist %>%
group_by(year,month) %>%
summarise(n_delist = n_distinct(TICKER))%>%
kable(digits = 3, col.names = c("Year", "Month", "Number of Delisted Firms")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Number of Delisted Firms
|
2021
|
01
|
23
|
2021
|
02
|
18
|
2021
|
03
|
49
|
2021
|
04
|
37
|
2021
|
05
|
20
|
2021
|
06
|
31
|
2021
|
07
|
23
|
2021
|
08
|
36
|
2021
|
09
|
37
|
2021
|
10
|
56
|
2021
|
11
|
33
|
2021
|
12
|
45
|
2022
|
01
|
31
|
2022
|
02
|
32
|
2022
|
03
|
42
|
2022
|
04
|
38
|
2022
|
05
|
41
|
2022
|
06
|
44
|
2022
|
07
|
36
|
2022
|
08
|
62
|
2022
|
09
|
49
|
2022
|
10
|
78
|
2022
|
11
|
55
|
2022
|
12
|
129
|
2023
|
01
|
65
|
2023
|
02
|
64
|
2023
|
03
|
111
|
2023
|
04
|
65
|
2023
|
05
|
52
|
2023
|
06
|
81
|
2023
|
07
|
75
|
2023
|
08
|
61
|
2023
|
09
|
48
|
2023
|
10
|
96
|
2023
|
11
|
85
|
2023
|
12
|
85
|
Part h
- Calculate average delisting return
delist$DLRET <- as.numeric(delist$DLRET)
delist %>%
group_by(year,month) %>%
summarise(avg_delist_return = mean(DLRET, na.rm = TRUE))%>%
kable(digits = 3, col.names = c("Year", "Month", "Average Delisting Return")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Average Delisting Return
|
2021
|
01
|
-0.047
|
2021
|
02
|
-0.006
|
2021
|
03
|
-0.019
|
2021
|
04
|
0.018
|
2021
|
05
|
-0.001
|
2021
|
06
|
0.013
|
2021
|
07
|
-0.067
|
2021
|
08
|
0.018
|
2021
|
09
|
-0.056
|
2021
|
10
|
-0.007
|
2021
|
11
|
-0.008
|
2021
|
12
|
-0.005
|
2022
|
01
|
-0.015
|
2022
|
02
|
-0.060
|
2022
|
03
|
-0.099
|
2022
|
04
|
-0.066
|
2022
|
05
|
-0.132
|
2022
|
06
|
-0.139
|
2022
|
07
|
-0.004
|
2022
|
08
|
-0.014
|
2022
|
09
|
-0.120
|
2022
|
10
|
0.063
|
2022
|
11
|
-0.109
|
2022
|
12
|
-0.050
|
2023
|
01
|
-0.040
|
2023
|
02
|
-0.089
|
2023
|
03
|
-0.066
|
2023
|
04
|
-0.146
|
2023
|
05
|
-0.081
|
2023
|
06
|
-0.097
|
2023
|
07
|
-0.088
|
2023
|
08
|
-0.134
|
2023
|
09
|
0.024
|
2023
|
10
|
-0.138
|
2023
|
11
|
-0.109
|
2023
|
12
|
-0.133
|
Part i
Delisting bias occurs in financial databases like CRSP when a stock
is delisted from an exchange, and the final returns of the delisted
stock are not properly accounted for. This can lead to overstated or
understated average stock returns, causing biases in empirical research.
If we are not careful, we may find ourselves drawing incorrect
conclusions from our analysis.
Part j
Surviorship bias is the tendency to focus on the firms that have
survived and are still in the sample, while ignoring the firms that have
delisted or gone bankrupt. This can lead to an overestimation of the
average stock returns, as the firms that have delisted or gone bankrupt
are likely to have lower returns than the firms that have survived. If
we are not careful, we may find ourselves drawing incorrect conclusions
from our analysis.
Let us now compare the means, medians and standard deviations of the
raw returns with and without delisted firms.
# creating a variable for firms that were delisted or not
data$delisted <- ifelse(data$DLSTCD == 100 | is.na(data$DLSTCD), 0, 1)
# calculate the means, medians and standard deviations of the raw returns with and without delisted firms
data %>%
group_by(delisted, year, month) %>%
summarise(mean_return = mean(RET, na.rm = TRUE),
median_return = median(RET, na.rm = TRUE),
sd_return = sd(RET, na.rm = TRUE),
n = n())%>%
kable(digits = 3, col.names = c("Delisted", "Year", "Month", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Delisted
|
Year
|
Month
|
Mean Return
|
Median Return
|
Standard Deviation
|
N
|
0
|
2021
|
01
|
0.070
|
0.007
|
0.377
|
8185
|
0
|
2021
|
02
|
0.065
|
0.031
|
0.194
|
8375
|
0
|
2021
|
03
|
0.013
|
0.016
|
0.178
|
8509
|
0
|
2021
|
04
|
0.017
|
0.020
|
0.176
|
8616
|
0
|
2021
|
05
|
0.013
|
0.007
|
0.130
|
8718
|
0
|
2021
|
06
|
0.021
|
0.005
|
0.144
|
8852
|
0
|
2021
|
07
|
-0.032
|
-0.005
|
0.125
|
8921
|
0
|
2021
|
08
|
0.019
|
0.011
|
0.138
|
9015
|
0
|
2021
|
09
|
-0.028
|
-0.031
|
0.132
|
9132
|
0
|
2021
|
10
|
0.027
|
0.021
|
0.143
|
9222
|
0
|
2021
|
11
|
-0.045
|
-0.027
|
0.148
|
9342
|
0
|
2021
|
12
|
0.000
|
0.014
|
0.141
|
9407
|
0
|
2022
|
01
|
-0.067
|
-0.048
|
0.131
|
9482
|
0
|
2022
|
02
|
-0.008
|
-0.010
|
0.140
|
9528
|
0
|
2022
|
03
|
0.015
|
0.005
|
0.167
|
9563
|
0
|
2022
|
04
|
-0.088
|
-0.067
|
0.132
|
9577
|
0
|
2022
|
05
|
-0.015
|
0.000
|
0.140
|
9591
|
0
|
2022
|
06
|
-0.068
|
-0.070
|
0.166
|
9592
|
0
|
2022
|
07
|
0.065
|
0.054
|
0.161
|
9630
|
0
|
2022
|
08
|
-0.013
|
-0.024
|
0.153
|
9634
|
0
|
2022
|
09
|
-0.098
|
-0.091
|
0.137
|
9648
|
0
|
2022
|
10
|
0.049
|
0.042
|
0.174
|
9625
|
0
|
2022
|
11
|
0.030
|
0.038
|
0.183
|
9623
|
0
|
2022
|
12
|
-0.046
|
-0.040
|
0.190
|
9541
|
0
|
2023
|
01
|
0.122
|
0.074
|
0.288
|
9519
|
0
|
2023
|
02
|
-0.034
|
-0.026
|
0.134
|
9519
|
0
|
2023
|
03
|
-0.033
|
-0.005
|
0.162
|
9459
|
0
|
2023
|
04
|
-0.009
|
0.001
|
0.245
|
9446
|
0
|
2023
|
05
|
-0.009
|
-0.017
|
0.183
|
9459
|
0
|
2023
|
06
|
0.052
|
0.045
|
0.160
|
9434
|
0
|
2023
|
07
|
0.042
|
0.028
|
0.151
|
9430
|
0
|
2023
|
08
|
-0.050
|
-0.031
|
0.164
|
9469
|
0
|
2023
|
09
|
-0.054
|
-0.044
|
0.150
|
9517
|
0
|
2023
|
10
|
-0.058
|
-0.036
|
0.196
|
9500
|
0
|
2023
|
11
|
0.079
|
0.072
|
0.224
|
9490
|
0
|
2023
|
12
|
0.081
|
0.052
|
0.236
|
9413
|
1
|
2021
|
01
|
0.003
|
0.002
|
0.005
|
23
|
1
|
2021
|
02
|
0.036
|
0.026
|
0.036
|
18
|
1
|
2021
|
03
|
0.074
|
0.028
|
0.129
|
49
|
1
|
2021
|
04
|
-0.013
|
0.005
|
0.110
|
37
|
1
|
2021
|
05
|
0.013
|
0.012
|
0.004
|
20
|
1
|
2021
|
06
|
-0.022
|
-0.033
|
0.024
|
31
|
1
|
2021
|
07
|
-0.003
|
-0.003
|
NA
|
23
|
1
|
2021
|
08
|
0.011
|
0.011
|
0.010
|
36
|
1
|
2021
|
09
|
0.021
|
0.002
|
0.179
|
37
|
1
|
2021
|
10
|
0.034
|
0.073
|
0.110
|
56
|
1
|
2021
|
11
|
0.036
|
0.000
|
0.115
|
33
|
1
|
2021
|
12
|
0.031
|
0.017
|
0.031
|
45
|
1
|
2022
|
01
|
-0.015
|
0.003
|
0.051
|
31
|
1
|
2022
|
02
|
0.031
|
0.013
|
0.037
|
32
|
1
|
2022
|
03
|
0.000
|
-0.015
|
0.047
|
42
|
1
|
2022
|
04
|
0.002
|
0.002
|
0.001
|
38
|
1
|
2022
|
05
|
0.007
|
0.007
|
0.008
|
41
|
1
|
2022
|
06
|
-0.047
|
-0.007
|
0.094
|
44
|
1
|
2022
|
07
|
0.017
|
0.017
|
0.012
|
36
|
1
|
2022
|
08
|
0.013
|
0.009
|
0.044
|
62
|
1
|
2022
|
09
|
-0.018
|
-0.008
|
0.121
|
49
|
1
|
2022
|
10
|
0.126
|
0.018
|
0.224
|
78
|
1
|
2022
|
11
|
0.004
|
0.004
|
0.039
|
55
|
1
|
2022
|
12
|
-0.249
|
-0.047
|
0.343
|
129
|
1
|
2023
|
01
|
0.023
|
0.024
|
0.013
|
65
|
1
|
2023
|
02
|
-0.173
|
-0.012
|
0.289
|
64
|
1
|
2023
|
03
|
-0.142
|
-0.012
|
0.374
|
111
|
1
|
2023
|
04
|
-0.043
|
0.001
|
0.314
|
65
|
1
|
2023
|
05
|
-0.020
|
0.001
|
0.081
|
52
|
1
|
2023
|
06
|
-0.156
|
0.004
|
0.330
|
81
|
1
|
2023
|
07
|
0.000
|
0.004
|
0.041
|
75
|
1
|
2023
|
08
|
-0.036
|
-0.028
|
0.055
|
61
|
1
|
2023
|
09
|
0.034
|
0.051
|
0.077
|
48
|
1
|
2023
|
10
|
0.513
|
0.513
|
0.660
|
96
|
1
|
2023
|
11
|
0.048
|
0.064
|
0.064
|
85
|
1
|
2023
|
12
|
NaN
|
NA
|
NA
|
85
|
data %>%
group_by(delisted, year) %>%
summarise(mean_return = mean(RET, na.rm = TRUE),
median_return = median(RET, na.rm = TRUE),
sd_return = sd(RET, na.rm = TRUE),
n = n())%>%
kable(digits = 3, col.names = c("Delisted", "Year", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Delisted
|
Year
|
Mean Return
|
Median Return
|
Standard Deviation
|
N
|
0
|
2021
|
0.010
|
0.004
|
0.182
|
106294
|
0
|
2022
|
-0.020
|
-0.014
|
0.166
|
115034
|
0
|
2023
|
0.011
|
0.004
|
0.205
|
113655
|
1
|
2021
|
0.025
|
0.010
|
0.099
|
408
|
1
|
2022
|
-0.037
|
0.002
|
0.187
|
637
|
1
|
2023
|
-0.027
|
0.006
|
0.242
|
888
|
We now compare the entire sample with firms who survived to the end
of 2023
Total Sample
data %>%
group_by(year, month) %>%
summarise(mean_return = mean(RET, na.rm = TRUE),
median_return = median(RET, na.rm = TRUE),
sd_return = sd(RET, na.rm = TRUE),
n = n())%>%
kable(digits = 3, col.names = c("Year", "Month", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Mean Return
|
Median Return
|
Standard Deviation
|
N
|
2021
|
01
|
0.070
|
0.007
|
0.377
|
8208
|
2021
|
02
|
0.065
|
0.031
|
0.194
|
8393
|
2021
|
03
|
0.013
|
0.016
|
0.178
|
8558
|
2021
|
04
|
0.017
|
0.020
|
0.176
|
8653
|
2021
|
05
|
0.013
|
0.008
|
0.130
|
8738
|
2021
|
06
|
0.021
|
0.005
|
0.144
|
8883
|
2021
|
07
|
-0.032
|
-0.005
|
0.125
|
8944
|
2021
|
08
|
0.019
|
0.011
|
0.138
|
9051
|
2021
|
09
|
-0.028
|
-0.031
|
0.132
|
9169
|
2021
|
10
|
0.027
|
0.021
|
0.143
|
9278
|
2021
|
11
|
-0.045
|
-0.027
|
0.148
|
9375
|
2021
|
12
|
0.000
|
0.014
|
0.141
|
9452
|
2022
|
01
|
-0.067
|
-0.048
|
0.131
|
9513
|
2022
|
02
|
-0.008
|
-0.010
|
0.140
|
9560
|
2022
|
03
|
0.015
|
0.005
|
0.167
|
9605
|
2022
|
04
|
-0.088
|
-0.067
|
0.132
|
9615
|
2022
|
05
|
-0.015
|
0.000
|
0.140
|
9632
|
2022
|
06
|
-0.068
|
-0.070
|
0.166
|
9636
|
2022
|
07
|
0.065
|
0.054
|
0.161
|
9666
|
2022
|
08
|
-0.013
|
-0.024
|
0.153
|
9696
|
2022
|
09
|
-0.098
|
-0.091
|
0.137
|
9697
|
2022
|
10
|
0.050
|
0.042
|
0.174
|
9703
|
2022
|
11
|
0.030
|
0.037
|
0.183
|
9678
|
2022
|
12
|
-0.046
|
-0.040
|
0.190
|
9670
|
2023
|
01
|
0.122
|
0.074
|
0.288
|
9584
|
2023
|
02
|
-0.034
|
-0.026
|
0.134
|
9583
|
2023
|
03
|
-0.033
|
-0.005
|
0.162
|
9570
|
2023
|
04
|
-0.009
|
0.001
|
0.245
|
9511
|
2023
|
05
|
-0.009
|
-0.017
|
0.182
|
9511
|
2023
|
06
|
0.052
|
0.045
|
0.160
|
9515
|
2023
|
07
|
0.042
|
0.028
|
0.151
|
9505
|
2023
|
08
|
-0.050
|
-0.031
|
0.164
|
9530
|
2023
|
09
|
-0.054
|
-0.044
|
0.150
|
9565
|
2023
|
10
|
-0.058
|
-0.036
|
0.196
|
9596
|
2023
|
11
|
0.079
|
0.072
|
0.224
|
9575
|
2023
|
12
|
0.081
|
0.052
|
0.236
|
9498
|
Firms that survived
data %>%
filter(delisted == 0) %>%
group_by(year, month) %>%
summarise(mean_return = mean(RET, na.rm = TRUE),
median_return = median(RET, na.rm = TRUE),
sd_return = sd(RET, na.rm = TRUE),
n = n())%>%
kable(digits = 3, col.names = c("Year", "Month", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Year
|
Month
|
Mean Return
|
Median Return
|
Standard Deviation
|
N
|
2021
|
01
|
0.070
|
0.007
|
0.377
|
8185
|
2021
|
02
|
0.065
|
0.031
|
0.194
|
8375
|
2021
|
03
|
0.013
|
0.016
|
0.178
|
8509
|
2021
|
04
|
0.017
|
0.020
|
0.176
|
8616
|
2021
|
05
|
0.013
|
0.007
|
0.130
|
8718
|
2021
|
06
|
0.021
|
0.005
|
0.144
|
8852
|
2021
|
07
|
-0.032
|
-0.005
|
0.125
|
8921
|
2021
|
08
|
0.019
|
0.011
|
0.138
|
9015
|
2021
|
09
|
-0.028
|
-0.031
|
0.132
|
9132
|
2021
|
10
|
0.027
|
0.021
|
0.143
|
9222
|
2021
|
11
|
-0.045
|
-0.027
|
0.148
|
9342
|
2021
|
12
|
0.000
|
0.014
|
0.141
|
9407
|
2022
|
01
|
-0.067
|
-0.048
|
0.131
|
9482
|
2022
|
02
|
-0.008
|
-0.010
|
0.140
|
9528
|
2022
|
03
|
0.015
|
0.005
|
0.167
|
9563
|
2022
|
04
|
-0.088
|
-0.067
|
0.132
|
9577
|
2022
|
05
|
-0.015
|
0.000
|
0.140
|
9591
|
2022
|
06
|
-0.068
|
-0.070
|
0.166
|
9592
|
2022
|
07
|
0.065
|
0.054
|
0.161
|
9630
|
2022
|
08
|
-0.013
|
-0.024
|
0.153
|
9634
|
2022
|
09
|
-0.098
|
-0.091
|
0.137
|
9648
|
2022
|
10
|
0.049
|
0.042
|
0.174
|
9625
|
2022
|
11
|
0.030
|
0.038
|
0.183
|
9623
|
2022
|
12
|
-0.046
|
-0.040
|
0.190
|
9541
|
2023
|
01
|
0.122
|
0.074
|
0.288
|
9519
|
2023
|
02
|
-0.034
|
-0.026
|
0.134
|
9519
|
2023
|
03
|
-0.033
|
-0.005
|
0.162
|
9459
|
2023
|
04
|
-0.009
|
0.001
|
0.245
|
9446
|
2023
|
05
|
-0.009
|
-0.017
|
0.183
|
9459
|
2023
|
06
|
0.052
|
0.045
|
0.160
|
9434
|
2023
|
07
|
0.042
|
0.028
|
0.151
|
9430
|
2023
|
08
|
-0.050
|
-0.031
|
0.164
|
9469
|
2023
|
09
|
-0.054
|
-0.044
|
0.150
|
9517
|
2023
|
10
|
-0.058
|
-0.036
|
0.196
|
9500
|
2023
|
11
|
0.079
|
0.072
|
0.224
|
9490
|
2023
|
12
|
0.081
|
0.052
|
0.236
|
9413
|
Part k
Test whether the differences are significantly different from each
other for firms that surved through Dec 204 versus others. I found no
statistical difference between the two groups.
# create a variable for firms that survived through Dec 2023
data$survived <- ifelse(data$year == 2023 & data$month == 12, 1, 0)
# test whether the differences are significantly different from each other for firms that survived through Dec 2023 versus others
data %>%
group_by(survived) %>%
summarise(mean_return = mean(RET, na.rm = TRUE),
median_return = median(RET, na.rm = TRUE),
sd_return = sd(RET, na.rm = TRUE),
n = n())%>%
kable(digits = 3, col.names = c("Survived", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Survived
|
Mean Return
|
Median Return
|
Standard Deviation
|
N
|
0
|
-0.002
|
-0.001
|
0.183
|
327418
|
1
|
0.081
|
0.052
|
0.236
|
9498
|
# testing difference in means
t.test(RET ~ survived, data = data)
##
## Welch Two Sample t-test
##
## data: RET by survived
## t = -33.782, df = 9658.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.08812558 -0.07845954
## sample estimates:
## mean in group 0 mean in group 1
## -0.002386573 0.080905986
Part l
- From Compustat, we downloaded data with FIC to merge with
PERMNO
fic <- read.csv("foreign.csv")
# Only keep unique PERMNOS
fic <- fic %>%
dplyr:: select(PERMNO, fic) %>%
distinct(PERMNO, .keep_all = TRUE)
number_foreign <- data %>%
distinct(PERMNO, .keep_all = TRUE) %>%
left_join(fic, by = "PERMNO")
Counting the number of each country
number_foreign %>%
group_by(fic) %>%
summarise(n = n_distinct(PERMNO)) %>%
kable(digits = 3, col.names = c("Country", "Number of Firms")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Country
|
Number of Firms
|
ARG
|
14
|
ATG
|
1
|
AUS
|
22
|
BEL
|
8
|
BHS
|
2
|
BMU
|
69
|
BRA
|
22
|
CAN
|
285
|
CHE
|
25
|
CHL
|
8
|
CHN
|
9
|
COL
|
2
|
CUW
|
1
|
CYM
|
417
|
CYP
|
4
|
DEU
|
12
|
DNK
|
8
|
ESP
|
5
|
FIN
|
1
|
FRA
|
17
|
GBR
|
80
|
GGY
|
7
|
HKG
|
2
|
IDN
|
1
|
IMN
|
2
|
IND
|
8
|
IRL
|
45
|
ISR
|
112
|
ITA
|
4
|
JEY
|
18
|
JPN
|
17
|
KOR
|
9
|
LBR
|
1
|
LUX
|
22
|
MEX
|
14
|
MHL
|
43
|
MUS
|
2
|
NLD
|
41
|
NOR
|
2
|
PAN
|
4
|
PER
|
3
|
PHL
|
1
|
RUS
|
3
|
SGP
|
12
|
SWE
|
5
|
TUR
|
2
|
TWN
|
6
|
USA
|
8963
|
VGB
|
69
|
VIR
|
1
|
ZAF
|
6
|
NA
|
905
|
Total number of non-US firms, excluding NAs
number_foreign %>%
filter(!is.na(fic)) %>%
filter(! fic == "USA") %>%
summarise(n = n_distinct(PERMNO)) %>%
kable(digits = 3, col.names = c("Number of Non-US Firms")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Number of Non-US Firms
|
1474
|