Load Packages and Data

Question 1: Import Data

# Load required packages
library(tidyquant)
library(tidyverse)
library(timetk)
library(zoo)
# Import data using read_tsv
data <- read_tsv("tej_day_price_2024_20250630.txt")

# Show results
glimpse(data)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Data Preparation

Question 2: Rename Columns

Replace columns 2, 3, 5, 9, and 12 with new names.

# Rename columns
data <- data %>%
  rename(
    date = 2,
    id = 3,
    name = 5,
    price = 9,
    cap = 12
  )

glimpse(data)
## Rows: 337,347
## Columns: 12
## $ CO_ID             <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", "110…
## $ date              <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20…
## $ id                <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203…
## $ `TSE Sector`      <chr> "01", "01", "01", "01", "01", "01", "01", "02", "02"…
## $ name              <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA…
## $ `Open(NTD)`       <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.1807…
## $ `High(NTD)`       <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.2696…
## $ `Low(NTD)`        <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.1362…
## $ price             <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807…
## $ `Volume(1000S)`   <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 1849, …
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 2409, …
## $ cap               <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 964…

Question 3: Select, Convert, and Reshape

# Select columns and convert formats
data_selected <- data %>%
  select(id, date, price) %>%
  mutate(
    id = as.character(id),
    date = ymd(date)
  )

# Convert to wide format
data_wide <- data_selected %>%
  pivot_wider(
    names_from = id,
    values_from = price
  )

head(data_wide[, 1:6])
## # A tibble: 6 × 6
##   date       `1101` `1102` `1103` `1104` `1108`
##   <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 2024-01-02   32.4   37.0   17.6   26.1   14.1
## 2 2024-01-03   31.9   36.6   17.5   25.9   14.0
## 3 2024-01-04   31.9   37.0   17.5   25.5   14.1
## 4 2024-01-05   32.1   37.0   17.5   25.8   14.1
## 5 2024-01-08   32.0   37.2   17.6   25.7   14.2
## 6 2024-01-09   31.8   36.9   17.5   25.4   13.8

Dimensions: 358 rows × 947 columns


Handle Missing Data

Question 4: Identify Stocks with NA Values

# Count NAs for each stock
na_counts <- data_wide %>%
  select(-date) %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), 
               names_to = "stock_id", 
               values_to = "na_count") %>%
  filter(na_count > 0) %>%
  arrange(desc(na_count))

print(na_counts, n = 20)
## # A tibble: 10 × 2
##    stock_id na_count
##    <chr>       <int>
##  1 7799          217
##  2 7788          198
##  3 7780          196
##  4 4585          177
##  5 3716          160
##  6 7765          151
##  7 7750          108
##  8 7736           53
##  9 7732           43
## 10 7722           18

Total stocks with NAs: 10


Question 5: Replace NA Values

# Replace NAs with closest available prices
data_filled <- data_wide %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

# Check remaining NAs
remaining_nas <- data_filled %>%
  select(-date) %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  sum()

Remaining NAs after filling: 1321


Question 6: Remove Stocks with NAs

# Get stock IDs with NAs
stocks_with_na <- na_counts$stock_id

# Remove those columns
data_clean <- data_wide %>%
  select(-any_of(stocks_with_na))

Original dimensions: 358 rows × 947 columns
After cleaning: 358 rows × 937 columns
Stocks removed: 10


Returns Analysis

Question 7: Daily Returns

# Convert to xts
data_xts <- data_clean %>%
  tk_xts(date_var = date)

# Calculate daily returns
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Remove first row
daily_returns <- daily_returns[-1, ]

# Show first 5 stocks with first 5 days
print(daily_returns[1:5, 1:5])
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

Question 8: Monthly Returns

# Convert to monthly prices
monthly_prices <- to.monthly(data_xts, OHLC = FALSE)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")

# Remove first row
monthly_returns <- monthly_returns[-1, ]

# Show first 5 stocks with first 5 months
print(monthly_returns[1:5, 1:5])
##                  1101        1102         1103        1104         1108
## Feb 2024  0.006272034  0.01760804 -0.008404066  0.01887014  0.036185231
## Mar 2024  0.001554899  0.02101398 -0.016950585  0.06397241  0.003170803
## Apr 2024 -0.003108301  0.05811288  0.057470064  0.11234002  0.072790295
## May 2024  0.029639309 -0.04920108  0.032611536 -0.03271487  0.000000000
## Jun 2024  0.036364817  0.05535680 -0.036845213  0.04852830 -0.011805133

Top Firms Analysis

Question 9: Top 20 Largest Cap Firms

# Prepare data with cap
data_with_cap <- data %>%
  select(id, date, name, cap) %>%
  mutate(
    id = as.character(id),
    date = ymd(date),
    year = year(date)
  )

# Top 20 for 2024 year-end
top20_2024 <- data_with_cap %>%
  filter(year == 2024) %>%
  group_by(id) %>%
  filter(date == max(date)) %>%
  ungroup() %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  select(id, name, date, cap)

# Top 20 for 2025 year-end
top20_2025 <- data_with_cap %>%
  filter(year == 2025) %>%
  group_by(id) %>%
  filter(date == max(date)) %>%
  ungroup() %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  select(id, name, date, cap)

Top 20 - Year End 2024

print(top20_2024, n = 20)
## # A tibble: 20 × 4
##    id    name            date            cap
##    <chr> <chr>           <date>        <dbl>
##  1 2330  TSMC            2024-12-31 27877688
##  2 2317  Hon Hai         2024-12-31  2556073
##  3 2454  MediaTek        2024-12-31  2266389
##  4 2881  Fubon Financial 2024-12-31  1234015
##  5 2308  DELTA           2024-12-31  1118242
##  6 2382  QCI             2024-12-31  1108574
##  7 2882  CATHAY FHC      2024-12-31  1001907
##  8 2412  CHT             2024-12-31   958045
##  9 2891  CTBC Holding    2024-12-31   767154
## 10 3711  ASEH            2024-12-31   715219
## 11 2886  Mega FHC        2024-12-31   574052
## 12 2303  UMC             2024-12-31   540739
## 13 2603  EMC             2024-12-31   487135
## 14 6669  Wiwynn          2024-12-31   486903
## 15 1216  Uni-President   2024-12-31   459675
## 16 2357  Asustek         2024-12-31   457540
## 17 2885  Yuanta Group    2024-12-31   440057
## 18 2345  Accton          2024-12-31   433744
## 19 2884  E.S.F.H         2024-12-31   431087
## 20 3045  TWM             2024-12-31   422590

Top 20 - Year End 2025

print(top20_2025, n = 20)
## # A tibble: 20 × 4
##    id    name            date            cap
##    <chr> <chr>           <date>        <dbl>
##  1 2330  TSMC            2025-06-30 27488572
##  2 2317  Hon Hai         2025-06-30  2236570
##  3 2454  MediaTek        2025-06-30  2002073
##  4 2881  Fubon Financial 2025-06-30  1193018
##  5 2308  DELTA           2025-06-30  1072785
##  6 2382  QCI             2025-06-30  1060291
##  7 2412  CHT             2025-06-30  1047255
##  8 2882  CATHAY FHC      2025-06-30   921226
##  9 2891  CTBC Holding    2025-06-30   858722
## 10 3711  ASEH            2025-06-30   652308
## 11 2886  Mega FHC        2025-06-30   608910
## 12 2303  UMC             2025-06-30   554998
## 13 2884  E.S.F.H         2025-06-30   525462
## 14 2357  Asustek         2025-06-30   478338
## 15 6669  Wiwynn          2025-06-30   470177
## 16 1216  Uni-President   2025-06-30   460243
## 17 2885  Yuanta Group    2025-06-30   441999
## 18 2603  EMC             2025-06-30   430844
## 19 3045  TWM             2025-06-30   428175
## 20 2345  Accton          2025-06-30   409616