Purpose

  • The purpose of this script is to show the steps and decisions taken to complete the pre-assignment 2 for FINA 9210.

  • The data used are downloaded from Dropbox and saved locally in the same folder as this script.

library(dplyr)
library(tidyverse)
library(data.table)
library(ggplot2)
library(kableExtra)
library(xtable)
library(TTR)
library(zoo)
library(MASS)
library(DBI)
library(PerformanceAnalytics)

Data

  • The data is first downloaded from WRDS and stored locally, which will then used for analysis.

  • Data between Jan 2021 to Dec 2023

data <-read.csv("data.csv")

Part a

  • For each month, report the number of stocks
data$year <- as.numeric(format(as.Date(data$date), "%Y"))
data$month <- as.factor(format(as.Date(data$date), "%m"))

data %>%
  group_by(year,month) %>%
  summarise(n_stocks = n_distinct(TICKER))%>%
  kable(digits = 3, col.names = c("Year", "Month", "Number of Stocks")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Number of Stocks
2021 01 8046
2021 02 8139
2021 03 8328
2021 04 8476
2021 05 8573
2021 06 8689
2021 07 8820
2021 08 8884
2021 09 8990
2021 10 9101
2021 11 9185
2021 12 9314
2022 01 9388
2022 02 9454
2022 03 9500
2022 04 9540
2022 05 9550
2022 06 9567
2022 07 9571
2022 08 9602
2022 09 9620
2022 10 9629
2022 11 9608
2022 12 9609
2023 01 9520
2023 02 9505
2023 03 9502
2023 04 9439
2023 05 9429
2023 06 9442
2023 07 9419
2023 08 9415
2023 09 9452
2023 10 9503
2023 11 9485
2023 12 9478

Part b

  • For each month, report the raw return
data$RET <- as.numeric(data$RET)
data %>%
  group_by(year,month) %>%
  summarise(raw_return = mean(RET, na.rm = TRUE))%>%
  kable(digits = 3, col.names = c("Year", "Month", "Raw Return")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Raw Return
2021 01 0.070
2021 02 0.065
2021 03 0.013
2021 04 0.017
2021 05 0.013
2021 06 0.021
2021 07 -0.032
2021 08 0.019
2021 09 -0.028
2021 10 0.027
2021 11 -0.045
2021 12 0.000
2022 01 -0.067
2022 02 -0.008
2022 03 0.015
2022 04 -0.088
2022 05 -0.015
2022 06 -0.068
2022 07 0.065
2022 08 -0.013
2022 09 -0.098
2022 10 0.050
2022 11 0.030
2022 12 -0.046
2023 01 0.122
2023 02 -0.034
2023 03 -0.033
2023 04 -0.009
2023 05 -0.009
2023 06 0.052
2023 07 0.042
2023 08 -0.050
2023 09 -0.054
2023 10 -0.058
2023 11 0.079
2023 12 0.081

Graphing average raw returns

graph_returns <- data %>%
  group_by(year,month) %>%
  summarise(raw_return = mean(RET, na.rm = TRUE))

# create another column for date
graph_returns$date <- as.Date(paste(graph_returns$year, graph_returns$month, "01", sep = "-"))

# graph
ggplot(graph_returns, aes(x = date, y = raw_return)) +
  geom_line() +
  labs(title = "Average Raw Returns by Month", x = "Date", y = "Average Raw Return") +
  theme_minimal()

Part c

  • For each month, report the total market capitalization
data$mktcap <- as.numeric(data$PRC) * as.numeric(data$SHROUT)

data %>%
  group_by(year,month) %>%
  summarise(mktcap = sum(mktcap, na.rm = TRUE))%>%
  kable(digits = 3, col.names = c("Year", "Month", "Market Capitalization")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Market Capitalization
2021 01 51902378967
2021 02 53555109644
2021 03 55421424919
2021 04 58227078177
2021 05 58688835704
2021 06 60304574800
2021 07 60927963589
2021 08 62567408312
2021 09 60024943877
2021 10 63932741495
2021 11 62792794887
2021 12 64863624233
2022 01 61047842678
2022 02 59621755623
2022 03 61279592338
2022 04 55728517854
2022 05 55564548569
2022 06 50896478856
2022 07 55237331252
2022 08 53146923252
2022 09 48147152663
2022 10 51644599360
2022 11 54412243029
2022 12 51197347473
2023 01 54885518207
2023 02 53342415333
2023 03 54441593731
2023 04 54835852335
2023 05 54757632331
2023 06 58280185550
2023 07 60404446467
2023 08 59034704677
2023 09 56231996101
2023 10 54418452710
2023 11 59307215187
2023 12 62426648543
  • For each month, report the average market capitalization
data %>%
  group_by(year,month) %>%
  summarise(avg_mktcap = mean(mktcap, na.rm = TRUE))%>%
  kable(digits = 3, col.names = c("Year", "Month", "Average Market Capitalization")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Average Market Capitalization
2021 01 6452310
2021 02 6580060
2021 03 6676476
2021 04 6887518
2021 05 6849771
2021 06 6949934
2021 07 6914204
2021 08 7057005
2021 09 6685781
2021 10 7051924
2021 11 6848380
2021 12 6986603
2022 01 6513159
2022 02 6317872
2022 03 6466821
2022 04 5858759
2022 05 5834774
2022 06 5332825
2022 07 5787044
2022 08 5561047
2022 09 5022653
2022 10 5398766
2022 11 5685710
2022 12 5392600
2023 01 5795113
2023 02 5644700
2023 03 5789811
2023 04 5837327
2023 05 5831484
2023 06 6214564
2023 07 6453467
2023 08 6305106
2023 09 5971962
2023 10 5779360
2023 11 6295883
2023 12 6641133

Part d

  • For each month, report the total trade volume
data$VOL <- as.numeric(data$VOL)

data %>%
  group_by(year,month) %>%
  summarise(total_volume = sum(VOL, na.rm = TRUE))%>%
  kable(digits = 3, col.names = c("Year", "Month", "Total Trade Volume")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Total Trade Volume
2021 01 2867731618
2021 02 2770588408
2021 03 2976303882
2021 04 2022042340
2021 05 2080067959
2021 06 2391876889
2021 07 2000361974
2021 08 1932969399
2021 09 2168697670
2021 10 2123197567
2021 11 2267063870
2021 12 2321601283
2022 01 2403657990
2022 02 2244584120
2022 03 3136482920
2022 04 2303785238
2022 05 2720314008
2022 06 2628773273
2022 07 2104310835
2022 08 2383966278
2022 09 2345135568
2022 10 2353901824
2022 11 2303717424
2022 12 2218075353
2023 01 2222773687
2023 02 2125989155
2023 03 2824535615
2023 04 1930726487
2023 05 2303103999
2023 06 2335292440
2023 07 2057683519
2023 08 2375469953
2023 09 2042122423
2023 10 2310597118
2023 11 2180462750
2023 12 2378536853

Part e

  • Find the average length of individual returns / I am not sure why this question is asking

Part f

  • Tabulate the percentage of returns that are missing, report, total number of returns, and the number of missing returns
data %>%
  group_by(year,month) %>%
  summarise(missing_return = sum(is.na(RET))/n(), 
            total_missing = sum(is.na(RET)), 
            total = n())%>%
  kable(digits = 3, col.names = c("Year", "Month", "Percentage of Missing Returns", "Total Missing", "N")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Percentage of Missing Returns Total Missing N
2021 01 0.034 275 8208
2021 02 0.044 368 8393
2021 03 0.054 464 8558
2021 04 0.045 392 8653
2021 05 0.035 307 8738
2021 06 0.039 346 8883
2021 07 0.033 292 8944
2021 08 0.030 274 9051
2021 09 0.036 334 9169
2021 10 0.039 359 9278
2021 11 0.037 344 9375
2021 12 0.035 330 9452
2022 01 0.027 257 9513
2022 02 0.023 223 9560
2022 03 0.022 207 9605
2022 04 0.019 180 9615
2022 05 0.017 162 9632
2022 06 0.016 153 9636
2022 07 0.017 163 9666
2022 08 0.022 213 9696
2022 09 0.019 187 9697
2022 10 0.020 196 9703
2022 11 0.017 163 9678
2022 12 0.024 229 9670
2023 01 0.016 154 9584
2023 02 0.019 182 9583
2023 03 0.024 233 9570
2023 04 0.018 168 9511
2023 05 0.018 174 9511
2023 06 0.022 206 9515
2023 07 0.021 202 9505
2023 08 0.025 236 9530
2023 09 0.026 251 9565
2023 10 0.029 281 9596
2023 11 0.025 236 9575
2023 12 0.018 171 9498

Part g

  • Find the number of firms that delisted during this period (DLSTCD is not NA and not 100)
delist <- data %>%
  filter(!is.na(DLSTCD)) %>%
  filter(DLSTCD != 100)

delist %>%
  group_by(year,month) %>%
  summarise(n_delist = n_distinct(TICKER))%>%
  kable(digits = 3, col.names = c("Year", "Month", "Number of Delisted Firms")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Number of Delisted Firms
2021 01 23
2021 02 18
2021 03 49
2021 04 37
2021 05 20
2021 06 31
2021 07 23
2021 08 36
2021 09 37
2021 10 56
2021 11 33
2021 12 45
2022 01 31
2022 02 32
2022 03 42
2022 04 38
2022 05 41
2022 06 44
2022 07 36
2022 08 62
2022 09 49
2022 10 78
2022 11 55
2022 12 129
2023 01 65
2023 02 64
2023 03 111
2023 04 65
2023 05 52
2023 06 81
2023 07 75
2023 08 61
2023 09 48
2023 10 96
2023 11 85
2023 12 85

Part h

  • Calculate average delisting return
delist$DLRET <- as.numeric(delist$DLRET)

delist %>%
  group_by(year,month) %>%
  summarise(avg_delist_return = mean(DLRET, na.rm = TRUE))%>%
  kable(digits = 3, col.names = c("Year", "Month", "Average Delisting Return")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Average Delisting Return
2021 01 -0.047
2021 02 -0.006
2021 03 -0.019
2021 04 0.018
2021 05 -0.001
2021 06 0.013
2021 07 -0.067
2021 08 0.018
2021 09 -0.056
2021 10 -0.007
2021 11 -0.008
2021 12 -0.005
2022 01 -0.015
2022 02 -0.060
2022 03 -0.099
2022 04 -0.066
2022 05 -0.132
2022 06 -0.139
2022 07 -0.004
2022 08 -0.014
2022 09 -0.120
2022 10 0.063
2022 11 -0.109
2022 12 -0.050
2023 01 -0.040
2023 02 -0.089
2023 03 -0.066
2023 04 -0.146
2023 05 -0.081
2023 06 -0.097
2023 07 -0.088
2023 08 -0.134
2023 09 0.024
2023 10 -0.138
2023 11 -0.109
2023 12 -0.133

Part i

Delisting bias occurs in financial databases like CRSP when a stock is delisted from an exchange, and the final returns of the delisted stock are not properly accounted for. This can lead to overstated or understated average stock returns, causing biases in empirical research. If we are not careful, we may find ourselves drawing incorrect conclusions from our analysis.

Part j

Surviorship bias is the tendency to focus on the firms that have survived and are still in the sample, while ignoring the firms that have delisted or gone bankrupt. This can lead to an overestimation of the average stock returns, as the firms that have delisted or gone bankrupt are likely to have lower returns than the firms that have survived. If we are not careful, we may find ourselves drawing incorrect conclusions from our analysis.

Let us now compare the means, medians and standard deviations of the raw returns with and without delisted firms.

# creating a variable for firms that were delisted or not

data$delisted <- ifelse(data$DLSTCD == 100 | is.na(data$DLSTCD), 0, 1)

# calculate the means, medians and standard deviations of the raw returns with and without delisted firms

data %>%
  group_by(delisted, year, month) %>%
  summarise(mean_return = mean(RET, na.rm = TRUE),
            median_return = median(RET, na.rm = TRUE),
            sd_return = sd(RET, na.rm = TRUE), 
            n = n())%>%
  kable(digits = 3, col.names = c("Delisted", "Year", "Month", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Delisted Year Month Mean Return Median Return Standard Deviation N
0 2021 01 0.070 0.007 0.377 8185
0 2021 02 0.065 0.031 0.194 8375
0 2021 03 0.013 0.016 0.178 8509
0 2021 04 0.017 0.020 0.176 8616
0 2021 05 0.013 0.007 0.130 8718
0 2021 06 0.021 0.005 0.144 8852
0 2021 07 -0.032 -0.005 0.125 8921
0 2021 08 0.019 0.011 0.138 9015
0 2021 09 -0.028 -0.031 0.132 9132
0 2021 10 0.027 0.021 0.143 9222
0 2021 11 -0.045 -0.027 0.148 9342
0 2021 12 0.000 0.014 0.141 9407
0 2022 01 -0.067 -0.048 0.131 9482
0 2022 02 -0.008 -0.010 0.140 9528
0 2022 03 0.015 0.005 0.167 9563
0 2022 04 -0.088 -0.067 0.132 9577
0 2022 05 -0.015 0.000 0.140 9591
0 2022 06 -0.068 -0.070 0.166 9592
0 2022 07 0.065 0.054 0.161 9630
0 2022 08 -0.013 -0.024 0.153 9634
0 2022 09 -0.098 -0.091 0.137 9648
0 2022 10 0.049 0.042 0.174 9625
0 2022 11 0.030 0.038 0.183 9623
0 2022 12 -0.046 -0.040 0.190 9541
0 2023 01 0.122 0.074 0.288 9519
0 2023 02 -0.034 -0.026 0.134 9519
0 2023 03 -0.033 -0.005 0.162 9459
0 2023 04 -0.009 0.001 0.245 9446
0 2023 05 -0.009 -0.017 0.183 9459
0 2023 06 0.052 0.045 0.160 9434
0 2023 07 0.042 0.028 0.151 9430
0 2023 08 -0.050 -0.031 0.164 9469
0 2023 09 -0.054 -0.044 0.150 9517
0 2023 10 -0.058 -0.036 0.196 9500
0 2023 11 0.079 0.072 0.224 9490
0 2023 12 0.081 0.052 0.236 9413
1 2021 01 0.003 0.002 0.005 23
1 2021 02 0.036 0.026 0.036 18
1 2021 03 0.074 0.028 0.129 49
1 2021 04 -0.013 0.005 0.110 37
1 2021 05 0.013 0.012 0.004 20
1 2021 06 -0.022 -0.033 0.024 31
1 2021 07 -0.003 -0.003 NA 23
1 2021 08 0.011 0.011 0.010 36
1 2021 09 0.021 0.002 0.179 37
1 2021 10 0.034 0.073 0.110 56
1 2021 11 0.036 0.000 0.115 33
1 2021 12 0.031 0.017 0.031 45
1 2022 01 -0.015 0.003 0.051 31
1 2022 02 0.031 0.013 0.037 32
1 2022 03 0.000 -0.015 0.047 42
1 2022 04 0.002 0.002 0.001 38
1 2022 05 0.007 0.007 0.008 41
1 2022 06 -0.047 -0.007 0.094 44
1 2022 07 0.017 0.017 0.012 36
1 2022 08 0.013 0.009 0.044 62
1 2022 09 -0.018 -0.008 0.121 49
1 2022 10 0.126 0.018 0.224 78
1 2022 11 0.004 0.004 0.039 55
1 2022 12 -0.249 -0.047 0.343 129
1 2023 01 0.023 0.024 0.013 65
1 2023 02 -0.173 -0.012 0.289 64
1 2023 03 -0.142 -0.012 0.374 111
1 2023 04 -0.043 0.001 0.314 65
1 2023 05 -0.020 0.001 0.081 52
1 2023 06 -0.156 0.004 0.330 81
1 2023 07 0.000 0.004 0.041 75
1 2023 08 -0.036 -0.028 0.055 61
1 2023 09 0.034 0.051 0.077 48
1 2023 10 0.513 0.513 0.660 96
1 2023 11 0.048 0.064 0.064 85
1 2023 12 NaN NA NA 85
data %>%
  group_by(delisted, year) %>%
  summarise(mean_return = mean(RET, na.rm = TRUE),
            median_return = median(RET, na.rm = TRUE),
            sd_return = sd(RET, na.rm = TRUE),
            n = n())%>%
  kable(digits = 3, col.names = c("Delisted", "Year", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Delisted Year Mean Return Median Return Standard Deviation N
0 2021 0.010 0.004 0.182 106294
0 2022 -0.020 -0.014 0.166 115034
0 2023 0.011 0.004 0.205 113655
1 2021 0.025 0.010 0.099 408
1 2022 -0.037 0.002 0.187 637
1 2023 -0.027 0.006 0.242 888

We now compare the entire sample with firms who survived to the end of 2023

Total Sample

data %>%
  group_by(year, month) %>%
  summarise(mean_return = mean(RET, na.rm = TRUE),
            median_return = median(RET, na.rm = TRUE),
            sd_return = sd(RET, na.rm = TRUE), 
            n = n())%>%
  kable(digits = 3, col.names = c("Year", "Month", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Mean Return Median Return Standard Deviation N
2021 01 0.070 0.007 0.377 8208
2021 02 0.065 0.031 0.194 8393
2021 03 0.013 0.016 0.178 8558
2021 04 0.017 0.020 0.176 8653
2021 05 0.013 0.008 0.130 8738
2021 06 0.021 0.005 0.144 8883
2021 07 -0.032 -0.005 0.125 8944
2021 08 0.019 0.011 0.138 9051
2021 09 -0.028 -0.031 0.132 9169
2021 10 0.027 0.021 0.143 9278
2021 11 -0.045 -0.027 0.148 9375
2021 12 0.000 0.014 0.141 9452
2022 01 -0.067 -0.048 0.131 9513
2022 02 -0.008 -0.010 0.140 9560
2022 03 0.015 0.005 0.167 9605
2022 04 -0.088 -0.067 0.132 9615
2022 05 -0.015 0.000 0.140 9632
2022 06 -0.068 -0.070 0.166 9636
2022 07 0.065 0.054 0.161 9666
2022 08 -0.013 -0.024 0.153 9696
2022 09 -0.098 -0.091 0.137 9697
2022 10 0.050 0.042 0.174 9703
2022 11 0.030 0.037 0.183 9678
2022 12 -0.046 -0.040 0.190 9670
2023 01 0.122 0.074 0.288 9584
2023 02 -0.034 -0.026 0.134 9583
2023 03 -0.033 -0.005 0.162 9570
2023 04 -0.009 0.001 0.245 9511
2023 05 -0.009 -0.017 0.182 9511
2023 06 0.052 0.045 0.160 9515
2023 07 0.042 0.028 0.151 9505
2023 08 -0.050 -0.031 0.164 9530
2023 09 -0.054 -0.044 0.150 9565
2023 10 -0.058 -0.036 0.196 9596
2023 11 0.079 0.072 0.224 9575
2023 12 0.081 0.052 0.236 9498

Firms that survived

data %>%
  filter(delisted == 0) %>%
  group_by(year, month) %>%
  summarise(mean_return = mean(RET, na.rm = TRUE),
            median_return = median(RET, na.rm = TRUE),
            sd_return = sd(RET, na.rm = TRUE), 
            n = n())%>%
  kable(digits = 3, col.names = c("Year", "Month", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Year Month Mean Return Median Return Standard Deviation N
2021 01 0.070 0.007 0.377 8185
2021 02 0.065 0.031 0.194 8375
2021 03 0.013 0.016 0.178 8509
2021 04 0.017 0.020 0.176 8616
2021 05 0.013 0.007 0.130 8718
2021 06 0.021 0.005 0.144 8852
2021 07 -0.032 -0.005 0.125 8921
2021 08 0.019 0.011 0.138 9015
2021 09 -0.028 -0.031 0.132 9132
2021 10 0.027 0.021 0.143 9222
2021 11 -0.045 -0.027 0.148 9342
2021 12 0.000 0.014 0.141 9407
2022 01 -0.067 -0.048 0.131 9482
2022 02 -0.008 -0.010 0.140 9528
2022 03 0.015 0.005 0.167 9563
2022 04 -0.088 -0.067 0.132 9577
2022 05 -0.015 0.000 0.140 9591
2022 06 -0.068 -0.070 0.166 9592
2022 07 0.065 0.054 0.161 9630
2022 08 -0.013 -0.024 0.153 9634
2022 09 -0.098 -0.091 0.137 9648
2022 10 0.049 0.042 0.174 9625
2022 11 0.030 0.038 0.183 9623
2022 12 -0.046 -0.040 0.190 9541
2023 01 0.122 0.074 0.288 9519
2023 02 -0.034 -0.026 0.134 9519
2023 03 -0.033 -0.005 0.162 9459
2023 04 -0.009 0.001 0.245 9446
2023 05 -0.009 -0.017 0.183 9459
2023 06 0.052 0.045 0.160 9434
2023 07 0.042 0.028 0.151 9430
2023 08 -0.050 -0.031 0.164 9469
2023 09 -0.054 -0.044 0.150 9517
2023 10 -0.058 -0.036 0.196 9500
2023 11 0.079 0.072 0.224 9490
2023 12 0.081 0.052 0.236 9413

Part k

Test whether the differences are significantly different from each other for firms that surved through Dec 204 versus others. I found no statistical difference between the two groups.

# create a variable for firms that survived through Dec 2023
data$survived <- ifelse(data$year == 2023 & data$month == 12, 1, 0)

# test whether the differences are significantly different from each other for firms that survived through Dec 2023 versus others

data %>%
  group_by(survived) %>%
  summarise(mean_return = mean(RET, na.rm = TRUE),
            median_return = median(RET, na.rm = TRUE),
            sd_return = sd(RET, na.rm = TRUE),
            n = n())%>%
  kable(digits = 3, col.names = c("Survived", "Mean Return", "Median Return", "Standard Deviation", "N")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Survived Mean Return Median Return Standard Deviation N
0 -0.002 -0.001 0.183 327418
1 0.081 0.052 0.236 9498
# testing difference in means 

t.test(RET ~ survived, data = data)
## 
##  Welch Two Sample t-test
## 
## data:  RET by survived
## t = -33.782, df = 9658.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.08812558 -0.07845954
## sample estimates:
## mean in group 0 mean in group 1 
##    -0.002386573     0.080905986

Part l

  • From Compustat, we downloaded data with FIC to merge with PERMNO
fic <- read.csv("foreign.csv")

# Only keep unique PERMNOS

fic <- fic %>%
  dplyr:: select(PERMNO, fic) %>%
  distinct(PERMNO, .keep_all = TRUE)
number_foreign <- data %>%
  distinct(PERMNO, .keep_all = TRUE) %>% 
  left_join(fic, by = "PERMNO")

Counting the number of each country

number_foreign %>%
  group_by(fic) %>%
  summarise(n = n_distinct(PERMNO)) %>%
  kable(digits = 3, col.names = c("Country", "Number of Firms")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Country Number of Firms
ARG 14
ATG 1
AUS 22
BEL 8
BHS 2
BMU 69
BRA 22
CAN 285
CHE 25
CHL 8
CHN 9
COL 2
CUW 1
CYM 417
CYP 4
DEU 12
DNK 8
ESP 5
FIN 1
FRA 17
GBR 80
GGY 7
HKG 2
IDN 1
IMN 2
IND 8
IRL 45
ISR 112
ITA 4
JEY 18
JPN 17
KOR 9
LBR 1
LUX 22
MEX 14
MHL 43
MUS 2
NLD 41
NOR 2
PAN 4
PER 3
PHL 1
RUS 3
SGP 12
SWE 5
TUR 2
TWN 6
USA 8963
VGB 69
VIR 1
ZAF 6
NA 905

Total number of non-US firms, excluding NAs

number_foreign %>%
  filter(!is.na(fic)) %>%
  filter(! fic == "USA") %>%
  summarise(n = n_distinct(PERMNO)) %>%
  kable(digits = 3, col.names = c("Number of Non-US Firms")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Number of Non-US Firms
1474