Introduction

This document analyzes Taiwan stock market data from the Taiwan Economic Journal (TEJ) database covering the period from January 2, 2024 to June 30, 2025.

Question 1: Load Packages and Import Data

# Clear environment
rm(list=ls())

# Function to install and load packages
install_and_load <- function(packages) {
  for (pkg in packages) {
    if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
      install.packages(pkg, dependencies = TRUE)
      library(pkg, character.only = TRUE)
    }
  }
}

# Install and load required packages
required_packages <- c("tidyverse", "timetk", "zoo", "knitr", "DT", 
                       "PerformanceAnalytics", "xts")
install_and_load(required_packages)

# Try different import functions
# data_csv <- read_csv("tej_day_price_2024_20250630.txt")
# data_tsv <- read_tsv("tej_day_price_2024_20250630.txt")
data_delim <- read_delim("tej_day_price_2024_20250630.txt", delim = "\t")

# Show results using glimpse()
glimpse(data_delim)

## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Question 2: Rename Columns

Replace columns 2, 3, 5, 9, and 12 with new names: “date”, “id”, “name”, “price”, “cap”.

data <- data_delim %>%
  rename(date = 2,      # Date
         id = 3,        # TSE ID
         name = 5,      # English Short Name
         price = 9,     # Close(NTD)
         cap = 12) %>%  # Market Cap.(NTD MN)
  select(id, name, date, price, cap)

glimpse(data)

## Rows: 337,347
## Columns: 5
## $ id    <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap   <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…

Question 3: Convert to Wide Format

Select id, date, price columns, convert id to text and date to date format, then pivot to wide format.

data_wide <- data %>%
  select(id, date, price) %>%
  mutate(id = as.character(id),
         date = ymd(date)) %>%
  spread(key = id, value = price)

# Show first 10 rows and 10 columns
data_wide[1:10, 1:11] %>%
  kable(caption = "Wide Format Data (First 10 rows, 10 stocks)")

Wide Format Data (First 10 rows, 10 stocks)
date	1101	1102	1103	1104	1108	1109	1110	1201	1203	1210
2024-01-02	32.3972	37.0392	17.6421	26.0628	14.0788	16.1807	18.3336	18.1893	55.3475	53.9934
2024-01-03	31.9304	36.5892	17.5017	25.8873	14.0343	16.0918	18.1873	18.1893	54.6830	53.9017
2024-01-04	31.9304	37.0392	17.5017	25.5363	14.0788	16.1362	18.3823	18.1416	54.1134	53.3517
2024-01-05	32.0704	36.9942	17.5485	25.7995	14.0788	16.1362	18.3336	18.1416	54.8729	52.8017
2024-01-08	31.9771	37.1742	17.5953	25.7118	14.1679	16.0918	18.3336	18.1416	54.5881	52.9850
2024-01-09	31.7903	36.9042	17.5485	25.3607	13.8115	16.0473	18.4311	18.0939	54.5881	52.3433
2024-01-10	31.4636	36.6342	17.4081	25.3607	13.6778	16.0473	18.4799	17.9984	53.4488	52.2516
2024-01-11	31.4636	36.7242	17.5017	25.5801	13.7670	16.0918	18.4799	17.9984	55.0627	52.0683
2024-01-12	31.5103	36.5892	17.5017	25.6240	13.8115	16.0473	18.4311	17.9984	54.3982	52.2516
2024-01-15	31.4169	36.5892	17.4081	25.4485	13.7670	16.0029	18.3336	18.0461	53.8286	53.1683

Question 4: Identify Stocks with NA Values

na_counts <- data_wide %>%
  select(-date) %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), names_to = "key", values_to = "value") %>%
  filter(value > 0) %>%
  arrange(key)

kable(na_counts, caption = "Stocks with NA Values and Their Counts")

Stocks with NA Values and Their Counts
key	value
3716	160
4585	177
7722	18
7732	43
7736	53
7750	108
7765	151
7780	196
7788	198
7799	217

There are 10 stocks with missing values.

Question 5: Fill NA Values

Replace NA values with the closest available stock prices using na.locf().

data_filled <- data_wide %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

# Show first 10 rows and 10 columns
data_filled[1:10, 1:11] %>%
  kable(caption = "Data with NA Values Filled (First 10 rows, 10 stocks)")

Data with NA Values Filled (First 10 rows, 10 stocks)
date	1101	1102	1103	1104	1108	1109	1110	1201	1203	1210
2024-01-02	32.3972	37.0392	17.6421	26.0628	14.0788	16.1807	18.3336	18.1893	55.3475	53.9934
2024-01-03	31.9304	36.5892	17.5017	25.8873	14.0343	16.0918	18.1873	18.1893	54.6830	53.9017
2024-01-04	31.9304	37.0392	17.5017	25.5363	14.0788	16.1362	18.3823	18.1416	54.1134	53.3517
2024-01-05	32.0704	36.9942	17.5485	25.7995	14.0788	16.1362	18.3336	18.1416	54.8729	52.8017
2024-01-08	31.9771	37.1742	17.5953	25.7118	14.1679	16.0918	18.3336	18.1416	54.5881	52.9850
2024-01-09	31.7903	36.9042	17.5485	25.3607	13.8115	16.0473	18.4311	18.0939	54.5881	52.3433
2024-01-10	31.4636	36.6342	17.4081	25.3607	13.6778	16.0473	18.4799	17.9984	53.4488	52.2516
2024-01-11	31.4636	36.7242	17.5017	25.5801	13.7670	16.0918	18.4799	17.9984	55.0627	52.0683
2024-01-12	31.5103	36.5892	17.5017	25.6240	13.8115	16.0473	18.4311	17.9984	54.3982	52.2516
2024-01-15	31.4169	36.5892	17.4081	25.4485	13.7670	16.0029	18.3336	18.0461	53.8286	53.1683

Question 6: Remove Stocks with NA Values

Delete stocks that contained NA values from the original data.

stocks_with_na <- na_counts$key
data_clean <- data_wide %>%
  select(-all_of(stocks_with_na))

cat("Updated dimensions after removing stocks with NA:\n")

## Updated dimensions after removing stocks with NA:

cat("Rows:", nrow(data_clean), "\n")

## Rows: 358

cat("Columns:", ncol(data_clean), "\n")

## Columns: 937

Question 7: Calculate Daily Returns

Convert to xts time series and calculate daily discrete returns.

# Convert to xts
data_xts <- data_clean %>%
  tk_xts(date_var = date)

# Calculate daily returns using PerformanceAnalytics
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Remove first row and show first 5 stocks, first 5 days
daily_returns[-1, 1:5] %>% 
  head(5) %>%
  kable(caption = "Daily Returns (First 5 stocks, First 5 days)", digits = 6)

Daily Returns (First 5 stocks, First 5 days)
1101	1102	1103	1104	1108
-0.014408653	-0.012149290	-0.007958236	-0.006733735	-0.003160781
0.000000000	0.012298711	0.000000000	-0.013558772	0.003170803
0.004384536	-0.001214929	0.002674026	0.010306896	0.000000000
-0.002909225	0.004865628	0.002666895	-0.003399291	0.006328664
-0.005841680	-0.007263102	-0.002659801	-0.013655209	-0.025155457

Question 8: Calculate Monthly Returns

# Convert to monthly data using xts::to.period
monthly_data <- to.period(data_xts, period = "months", OHLC = FALSE)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_data, method = "discrete")

# Remove first row and show first 5 stocks, first 5 months
monthly_returns[-1, 1:5] %>% 
  head(5) %>%
  kable(caption = "Monthly Returns (First 5 stocks, First 5 months)", digits = 6)

Monthly Returns (First 5 stocks, First 5 months)
1101	1102	1103	1104	1108
0.006272034	0.01760804	-0.008404066	0.01887014	0.036185231
0.001554899	0.02101398	-0.016950585	0.06397241	0.003170803
-0.003108301	0.05811288	0.057470064	0.11234002	0.072790295
0.029639309	-0.04920108	0.032611536	-0.03271487	0.000000000
0.036364817	0.05535680	-0.036845213	0.04852830	-0.011805133

Question 9: Top 20 Largest Market Cap Firms

Find the 20 largest market cap firms at year-end 2024 and mid-year 2025.

top_20_firms <- data %>%
  mutate(date = ymd(date),
         year1 = year(date),
         id = as.character(id)) %>%
  filter((date == "2024-12-31") | (date == "2025-06-30")) %>%
  group_by(year1) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup() %>%
  mutate(cap_formatted = scales::dollar(cap, prefix = "$", suffix = "", 
                                        big.mark = ",", accuracy = 1)) %>%
  select(date, year1, id, name, cap, cap_formatted)

# Display using datatable for interactivity
datatable(top_20_firms, 
          caption = "Top 20 Largest Market Cap Firms (2024 & 2025)",
          options = list(pageLength = 20))

Summary by Year

top_20_firms %>%
  group_by(year1) %>%
  summarise(
    `Number of Firms` = n(),
    `Total Market Cap (NTD MN)` = sum(cap),
    `Average Market Cap (NTD MN)` = mean(cap),
    `Largest Firm` = name[which.max(cap)],
    `Largest Cap (NTD MN)` = max(cap)
  ) %>%
  kable(caption = "Summary of Top 20 Firms by Year", 
        format.args = list(big.mark = ","))

Summary of Top 20 Firms by Year
year1	Number of Firms	Total Market Cap (NTD MN)	Average Market Cap (NTD MN)	Largest Firm	Largest Cap (NTD MN)
2,024	20	44,336,828	2,216,841	TSMC	27,877,688
2,025	20	43,341,582	2,167,079	TSMC	27,488,572

Conclusion

This analysis covered:

Data import and exploration
Data cleaning and transformation
Handling missing values
Time series conversion
Return calculations (daily and monthly)
Market capitalization analysis

The dataset contains 358 trading days and 936 stocks after cleaning.

Report generated on 2025-10-29 21:12:49.743957

TEJ Stock Data Analysis (2024-2025)

Khongorzul Erkhembayar

2025-10-29