Introduction

This document analyzes Taiwan stock market data from the Taiwan Economic Journal (TEJ) database covering the period from January 2, 2024 to June 30, 2025.

Question 1: Load Packages and Import Data

# Clear environment
rm(list=ls())

# Function to install and load packages
install_and_load <- function(packages) {
  for (pkg in packages) {
    if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
      install.packages(pkg, dependencies = TRUE)
      library(pkg, character.only = TRUE)
    }
  }
}

# Install and load required packages
required_packages <- c("tidyverse", "timetk", "zoo", "knitr", "DT", 
                       "PerformanceAnalytics", "xts")
install_and_load(required_packages)
# Try different import functions
# data_csv <- read_csv("tej_day_price_2024_20250630.txt")
# data_tsv <- read_tsv("tej_day_price_2024_20250630.txt")
data_delim <- read_delim("tej_day_price_2024_20250630.txt", delim = "\t")

# Show results using glimpse()
glimpse(data_delim)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Question 2: Rename Columns

Replace columns 2, 3, 5, 9, and 12 with new names: “date”, “id”, “name”, “price”, “cap”.

data <- data_delim %>%
  rename(date = 2,      # Date
         id = 3,        # TSE ID
         name = 5,      # English Short Name
         price = 9,     # Close(NTD)
         cap = 12) %>%  # Market Cap.(NTD MN)
  select(id, name, date, price, cap)

glimpse(data)
## Rows: 337,347
## Columns: 5
## $ id    <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap   <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…

Question 3: Convert to Wide Format

Select id, date, price columns, convert id to text and date to date format, then pivot to wide format.

data_wide <- data %>%
  select(id, date, price) %>%
  mutate(id = as.character(id),
         date = ymd(date)) %>%
  spread(key = id, value = price)

# Show first 10 rows and 10 columns
data_wide[1:10, 1:11] %>%
  kable(caption = "Wide Format Data (First 10 rows, 10 stocks)")
Wide Format Data (First 10 rows, 10 stocks)
date 1101 1102 1103 1104 1108 1109 1110 1201 1203 1210
2024-01-02 32.3972 37.0392 17.6421 26.0628 14.0788 16.1807 18.3336 18.1893 55.3475 53.9934
2024-01-03 31.9304 36.5892 17.5017 25.8873 14.0343 16.0918 18.1873 18.1893 54.6830 53.9017
2024-01-04 31.9304 37.0392 17.5017 25.5363 14.0788 16.1362 18.3823 18.1416 54.1134 53.3517
2024-01-05 32.0704 36.9942 17.5485 25.7995 14.0788 16.1362 18.3336 18.1416 54.8729 52.8017
2024-01-08 31.9771 37.1742 17.5953 25.7118 14.1679 16.0918 18.3336 18.1416 54.5881 52.9850
2024-01-09 31.7903 36.9042 17.5485 25.3607 13.8115 16.0473 18.4311 18.0939 54.5881 52.3433
2024-01-10 31.4636 36.6342 17.4081 25.3607 13.6778 16.0473 18.4799 17.9984 53.4488 52.2516
2024-01-11 31.4636 36.7242 17.5017 25.5801 13.7670 16.0918 18.4799 17.9984 55.0627 52.0683
2024-01-12 31.5103 36.5892 17.5017 25.6240 13.8115 16.0473 18.4311 17.9984 54.3982 52.2516
2024-01-15 31.4169 36.5892 17.4081 25.4485 13.7670 16.0029 18.3336 18.0461 53.8286 53.1683

Question 4: Identify Stocks with NA Values

na_counts <- data_wide %>%
  select(-date) %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), names_to = "key", values_to = "value") %>%
  filter(value > 0) %>%
  arrange(key)

kable(na_counts, caption = "Stocks with NA Values and Their Counts")
Stocks with NA Values and Their Counts
key value
3716 160
4585 177
7722 18
7732 43
7736 53
7750 108
7765 151
7780 196
7788 198
7799 217

There are 10 stocks with missing values.

Question 5: Fill NA Values

Replace NA values with the closest available stock prices using na.locf().

data_filled <- data_wide %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

# Show first 10 rows and 10 columns
data_filled[1:10, 1:11] %>%
  kable(caption = "Data with NA Values Filled (First 10 rows, 10 stocks)")
Data with NA Values Filled (First 10 rows, 10 stocks)
date 1101 1102 1103 1104 1108 1109 1110 1201 1203 1210
2024-01-02 32.3972 37.0392 17.6421 26.0628 14.0788 16.1807 18.3336 18.1893 55.3475 53.9934
2024-01-03 31.9304 36.5892 17.5017 25.8873 14.0343 16.0918 18.1873 18.1893 54.6830 53.9017
2024-01-04 31.9304 37.0392 17.5017 25.5363 14.0788 16.1362 18.3823 18.1416 54.1134 53.3517
2024-01-05 32.0704 36.9942 17.5485 25.7995 14.0788 16.1362 18.3336 18.1416 54.8729 52.8017
2024-01-08 31.9771 37.1742 17.5953 25.7118 14.1679 16.0918 18.3336 18.1416 54.5881 52.9850
2024-01-09 31.7903 36.9042 17.5485 25.3607 13.8115 16.0473 18.4311 18.0939 54.5881 52.3433
2024-01-10 31.4636 36.6342 17.4081 25.3607 13.6778 16.0473 18.4799 17.9984 53.4488 52.2516
2024-01-11 31.4636 36.7242 17.5017 25.5801 13.7670 16.0918 18.4799 17.9984 55.0627 52.0683
2024-01-12 31.5103 36.5892 17.5017 25.6240 13.8115 16.0473 18.4311 17.9984 54.3982 52.2516
2024-01-15 31.4169 36.5892 17.4081 25.4485 13.7670 16.0029 18.3336 18.0461 53.8286 53.1683

Question 6: Remove Stocks with NA Values

Delete stocks that contained NA values from the original data.

stocks_with_na <- na_counts$key
data_clean <- data_wide %>%
  select(-all_of(stocks_with_na))

cat("Updated dimensions after removing stocks with NA:\n")
## Updated dimensions after removing stocks with NA:
cat("Rows:", nrow(data_clean), "\n")
## Rows: 358
cat("Columns:", ncol(data_clean), "\n")
## Columns: 937

Question 7: Calculate Daily Returns

Convert to xts time series and calculate daily discrete returns.

# Convert to xts
data_xts <- data_clean %>%
  tk_xts(date_var = date)

# Calculate daily returns using PerformanceAnalytics
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Remove first row and show first 5 stocks, first 5 days
daily_returns[-1, 1:5] %>% 
  head(5) %>%
  kable(caption = "Daily Returns (First 5 stocks, First 5 days)", digits = 6)
Daily Returns (First 5 stocks, First 5 days)
1101 1102 1103 1104 1108
-0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
0.000000000 0.012298711 0.000000000 -0.013558772 0.003170803
0.004384536 -0.001214929 0.002674026 0.010306896 0.000000000
-0.002909225 0.004865628 0.002666895 -0.003399291 0.006328664
-0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

Question 8: Calculate Monthly Returns

# Convert to monthly data using xts::to.period
monthly_data <- to.period(data_xts, period = "months", OHLC = FALSE)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_data, method = "discrete")

# Remove first row and show first 5 stocks, first 5 months
monthly_returns[-1, 1:5] %>% 
  head(5) %>%
  kable(caption = "Monthly Returns (First 5 stocks, First 5 months)", digits = 6)
Monthly Returns (First 5 stocks, First 5 months)
1101 1102 1103 1104 1108
0.006272034 0.01760804 -0.008404066 0.01887014 0.036185231
0.001554899 0.02101398 -0.016950585 0.06397241 0.003170803
-0.003108301 0.05811288 0.057470064 0.11234002 0.072790295
0.029639309 -0.04920108 0.032611536 -0.03271487 0.000000000
0.036364817 0.05535680 -0.036845213 0.04852830 -0.011805133

Question 9: Top 20 Largest Market Cap Firms

Find the 20 largest market cap firms at year-end 2024 and mid-year 2025.

top_20_firms <- data %>%
  mutate(date = ymd(date),
         year1 = year(date),
         id = as.character(id)) %>%
  filter((date == "2024-12-31") | (date == "2025-06-30")) %>%
  group_by(year1) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup() %>%
  mutate(cap_formatted = scales::dollar(cap, prefix = "$", suffix = "", 
                                        big.mark = ",", accuracy = 1)) %>%
  select(date, year1, id, name, cap, cap_formatted)

# Display using datatable for interactivity
datatable(top_20_firms, 
          caption = "Top 20 Largest Market Cap Firms (2024 & 2025)",
          options = list(pageLength = 20))

Summary by Year

top_20_firms %>%
  group_by(year1) %>%
  summarise(
    `Number of Firms` = n(),
    `Total Market Cap (NTD MN)` = sum(cap),
    `Average Market Cap (NTD MN)` = mean(cap),
    `Largest Firm` = name[which.max(cap)],
    `Largest Cap (NTD MN)` = max(cap)
  ) %>%
  kable(caption = "Summary of Top 20 Firms by Year", 
        format.args = list(big.mark = ","))
Summary of Top 20 Firms by Year
year1 Number of Firms Total Market Cap (NTD MN) Average Market Cap (NTD MN) Largest Firm Largest Cap (NTD MN)
2,024 20 44,336,828 2,216,841 TSMC 27,877,688
2,025 20 43,341,582 2,167,079 TSMC 27,488,572

Conclusion

This analysis covered:

  1. Data import and exploration
  2. Data cleaning and transformation
  3. Handling missing values
  4. Time series conversion
  5. Return calculations (daily and monthly)
  6. Market capitalization analysis

The dataset contains 358 trading days and 936 stocks after cleaning.


Report generated on 2025-10-29 21:12:49.743957