This document analyzes Taiwan stock market data from the Taiwan Economic Journal (TEJ) database covering the period from January 2, 2024 to June 30, 2025.
# Clear environment
rm(list=ls())
# Function to install and load packages
install_and_load <- function(packages) {
for (pkg in packages) {
if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
install.packages(pkg, dependencies = TRUE)
library(pkg, character.only = TRUE)
}
}
}
# Install and load required packages
required_packages <- c("tidyverse", "timetk", "zoo", "knitr", "DT",
"PerformanceAnalytics", "xts")
install_and_load(required_packages)# Try different import functions
# data_csv <- read_csv("tej_day_price_2024_20250630.txt")
# data_tsv <- read_tsv("tej_day_price_2024_20250630.txt")
data_delim <- read_delim("tej_day_price_2024_20250630.txt", delim = "\t")
# Show results using glimpse()
glimpse(data_delim)## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
Replace columns 2, 3, 5, 9, and 12 with new names: “date”, “id”, “name”, “price”, “cap”.
data <- data_delim %>%
rename(date = 2, # Date
id = 3, # TSE ID
name = 5, # English Short Name
price = 9, # Close(NTD)
cap = 12) %>% # Market Cap.(NTD MN)
select(id, name, date, price, cap)
glimpse(data)## Rows: 337,347
## Columns: 5
## $ id <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…
Select id, date, price columns, convert id to text and date to date format, then pivot to wide format.
data_wide <- data %>%
select(id, date, price) %>%
mutate(id = as.character(id),
date = ymd(date)) %>%
spread(key = id, value = price)
# Show first 10 rows and 10 columns
data_wide[1:10, 1:11] %>%
kable(caption = "Wide Format Data (First 10 rows, 10 stocks)")| date | 1101 | 1102 | 1103 | 1104 | 1108 | 1109 | 1110 | 1201 | 1203 | 1210 |
|---|---|---|---|---|---|---|---|---|---|---|
| 2024-01-02 | 32.3972 | 37.0392 | 17.6421 | 26.0628 | 14.0788 | 16.1807 | 18.3336 | 18.1893 | 55.3475 | 53.9934 |
| 2024-01-03 | 31.9304 | 36.5892 | 17.5017 | 25.8873 | 14.0343 | 16.0918 | 18.1873 | 18.1893 | 54.6830 | 53.9017 |
| 2024-01-04 | 31.9304 | 37.0392 | 17.5017 | 25.5363 | 14.0788 | 16.1362 | 18.3823 | 18.1416 | 54.1134 | 53.3517 |
| 2024-01-05 | 32.0704 | 36.9942 | 17.5485 | 25.7995 | 14.0788 | 16.1362 | 18.3336 | 18.1416 | 54.8729 | 52.8017 |
| 2024-01-08 | 31.9771 | 37.1742 | 17.5953 | 25.7118 | 14.1679 | 16.0918 | 18.3336 | 18.1416 | 54.5881 | 52.9850 |
| 2024-01-09 | 31.7903 | 36.9042 | 17.5485 | 25.3607 | 13.8115 | 16.0473 | 18.4311 | 18.0939 | 54.5881 | 52.3433 |
| 2024-01-10 | 31.4636 | 36.6342 | 17.4081 | 25.3607 | 13.6778 | 16.0473 | 18.4799 | 17.9984 | 53.4488 | 52.2516 |
| 2024-01-11 | 31.4636 | 36.7242 | 17.5017 | 25.5801 | 13.7670 | 16.0918 | 18.4799 | 17.9984 | 55.0627 | 52.0683 |
| 2024-01-12 | 31.5103 | 36.5892 | 17.5017 | 25.6240 | 13.8115 | 16.0473 | 18.4311 | 17.9984 | 54.3982 | 52.2516 |
| 2024-01-15 | 31.4169 | 36.5892 | 17.4081 | 25.4485 | 13.7670 | 16.0029 | 18.3336 | 18.0461 | 53.8286 | 53.1683 |
na_counts <- data_wide %>%
select(-date) %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(cols = everything(), names_to = "key", values_to = "value") %>%
filter(value > 0) %>%
arrange(key)
kable(na_counts, caption = "Stocks with NA Values and Their Counts")| key | value |
|---|---|
| 3716 | 160 |
| 4585 | 177 |
| 7722 | 18 |
| 7732 | 43 |
| 7736 | 53 |
| 7750 | 108 |
| 7765 | 151 |
| 7780 | 196 |
| 7788 | 198 |
| 7799 | 217 |
There are 10 stocks with missing values.
Replace NA values with the closest available stock prices using
na.locf().
data_filled <- data_wide %>%
mutate(across(-date, ~na.locf(., na.rm = FALSE)))
# Show first 10 rows and 10 columns
data_filled[1:10, 1:11] %>%
kable(caption = "Data with NA Values Filled (First 10 rows, 10 stocks)")| date | 1101 | 1102 | 1103 | 1104 | 1108 | 1109 | 1110 | 1201 | 1203 | 1210 |
|---|---|---|---|---|---|---|---|---|---|---|
| 2024-01-02 | 32.3972 | 37.0392 | 17.6421 | 26.0628 | 14.0788 | 16.1807 | 18.3336 | 18.1893 | 55.3475 | 53.9934 |
| 2024-01-03 | 31.9304 | 36.5892 | 17.5017 | 25.8873 | 14.0343 | 16.0918 | 18.1873 | 18.1893 | 54.6830 | 53.9017 |
| 2024-01-04 | 31.9304 | 37.0392 | 17.5017 | 25.5363 | 14.0788 | 16.1362 | 18.3823 | 18.1416 | 54.1134 | 53.3517 |
| 2024-01-05 | 32.0704 | 36.9942 | 17.5485 | 25.7995 | 14.0788 | 16.1362 | 18.3336 | 18.1416 | 54.8729 | 52.8017 |
| 2024-01-08 | 31.9771 | 37.1742 | 17.5953 | 25.7118 | 14.1679 | 16.0918 | 18.3336 | 18.1416 | 54.5881 | 52.9850 |
| 2024-01-09 | 31.7903 | 36.9042 | 17.5485 | 25.3607 | 13.8115 | 16.0473 | 18.4311 | 18.0939 | 54.5881 | 52.3433 |
| 2024-01-10 | 31.4636 | 36.6342 | 17.4081 | 25.3607 | 13.6778 | 16.0473 | 18.4799 | 17.9984 | 53.4488 | 52.2516 |
| 2024-01-11 | 31.4636 | 36.7242 | 17.5017 | 25.5801 | 13.7670 | 16.0918 | 18.4799 | 17.9984 | 55.0627 | 52.0683 |
| 2024-01-12 | 31.5103 | 36.5892 | 17.5017 | 25.6240 | 13.8115 | 16.0473 | 18.4311 | 17.9984 | 54.3982 | 52.2516 |
| 2024-01-15 | 31.4169 | 36.5892 | 17.4081 | 25.4485 | 13.7670 | 16.0029 | 18.3336 | 18.0461 | 53.8286 | 53.1683 |
Delete stocks that contained NA values from the original data.
stocks_with_na <- na_counts$key
data_clean <- data_wide %>%
select(-all_of(stocks_with_na))
cat("Updated dimensions after removing stocks with NA:\n")## Updated dimensions after removing stocks with NA:
## Rows: 358
## Columns: 937
Convert to xts time series and calculate daily discrete returns.
# Convert to xts
data_xts <- data_clean %>%
tk_xts(date_var = date)
# Calculate daily returns using PerformanceAnalytics
daily_returns <- Return.calculate(data_xts, method = "discrete")
# Remove first row and show first 5 stocks, first 5 days
daily_returns[-1, 1:5] %>%
head(5) %>%
kable(caption = "Daily Returns (First 5 stocks, First 5 days)", digits = 6)| 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|
| -0.014408653 | -0.012149290 | -0.007958236 | -0.006733735 | -0.003160781 |
| 0.000000000 | 0.012298711 | 0.000000000 | -0.013558772 | 0.003170803 |
| 0.004384536 | -0.001214929 | 0.002674026 | 0.010306896 | 0.000000000 |
| -0.002909225 | 0.004865628 | 0.002666895 | -0.003399291 | 0.006328664 |
| -0.005841680 | -0.007263102 | -0.002659801 | -0.013655209 | -0.025155457 |
# Convert to monthly data using xts::to.period
monthly_data <- to.period(data_xts, period = "months", OHLC = FALSE)
# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_data, method = "discrete")
# Remove first row and show first 5 stocks, first 5 months
monthly_returns[-1, 1:5] %>%
head(5) %>%
kable(caption = "Monthly Returns (First 5 stocks, First 5 months)", digits = 6)| 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|
| 0.006272034 | 0.01760804 | -0.008404066 | 0.01887014 | 0.036185231 |
| 0.001554899 | 0.02101398 | -0.016950585 | 0.06397241 | 0.003170803 |
| -0.003108301 | 0.05811288 | 0.057470064 | 0.11234002 | 0.072790295 |
| 0.029639309 | -0.04920108 | 0.032611536 | -0.03271487 | 0.000000000 |
| 0.036364817 | 0.05535680 | -0.036845213 | 0.04852830 | -0.011805133 |
Find the 20 largest market cap firms at year-end 2024 and mid-year 2025.
top_20_firms <- data %>%
mutate(date = ymd(date),
year1 = year(date),
id = as.character(id)) %>%
filter((date == "2024-12-31") | (date == "2025-06-30")) %>%
group_by(year1) %>%
arrange(desc(cap)) %>%
slice(1:20) %>%
ungroup() %>%
mutate(cap_formatted = scales::dollar(cap, prefix = "$", suffix = "",
big.mark = ",", accuracy = 1)) %>%
select(date, year1, id, name, cap, cap_formatted)
# Display using datatable for interactivity
datatable(top_20_firms,
caption = "Top 20 Largest Market Cap Firms (2024 & 2025)",
options = list(pageLength = 20))top_20_firms %>%
group_by(year1) %>%
summarise(
`Number of Firms` = n(),
`Total Market Cap (NTD MN)` = sum(cap),
`Average Market Cap (NTD MN)` = mean(cap),
`Largest Firm` = name[which.max(cap)],
`Largest Cap (NTD MN)` = max(cap)
) %>%
kable(caption = "Summary of Top 20 Firms by Year",
format.args = list(big.mark = ","))| year1 | Number of Firms | Total Market Cap (NTD MN) | Average Market Cap (NTD MN) | Largest Firm | Largest Cap (NTD MN) |
|---|---|---|---|---|---|
| 2,024 | 20 | 44,336,828 | 2,216,841 | TSMC | 27,877,688 |
| 2,025 | 20 | 43,341,582 | 2,167,079 | TSMC | 27,488,572 |
This analysis covered:
The dataset contains 358 trading days and 936 stocks after cleaning.
Report generated on 2025-10-29 21:12:49.743957