Load Required Libraries

library(tidyverse)
library(tidyquant)
library(timetk)
library(zoo)
library(PerformanceAnalytics)

Question 1: Import Data

Import the TEJ daily price data file from the Downloads folder.

# Import the file from Downloads folder
data <- read_tsv("C:/Users/User/Downloads/tej_day_price_2024_20250630.txt")

# Show the results
glimpse(data)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Question 2: Replace Column Names and Select Columns

Replace specific column names by position and select only the 5 required columns.

# First, let's see original column names
colnames(data)
##  [1] "CO_ID"               "Date"                "TSE ID"             
##  [4] "TSE Sector"          "English Short Name"  "Open(NTD)"          
##  [7] "High(NTD)"           "Low(NTD)"            "Close(NTD)"         
## [10] "Volume(1000S)"       "Amount(NTD1000)"     "Market Cap.(NTD MN)"
# Replace column names by POSITION
names(data)[2] <- "date"
names(data)[3] <- "id" 
names(data)[5] <- "name"
names(data)[9] <- "price"
names(data)[12] <- "cap"

# NOW select only these 5 columns
data <- data %>% select(id, name, date, price, cap)

# Show results
glimpse(data)
## Rows: 337,347
## Columns: 5
## $ id    <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap   <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…

Question 3: Change Formats and Reshape to Wide

Select columns, convert data types, and reshape from long to wide format.

# Select id, date, price columns
data_selected <- data %>%
  select(id, date, price)

# Change id to text (character)
data_selected <- data_selected %>%
  mutate(id = as.character(id))

# Change date to Date format
data_selected <- data_selected %>%
  mutate(date = as.Date(as.character(date), format = "%Y%m%d"))

# Convert from long to wide format using spread()
data_wide <- data_selected %>%
  spread(key = id, value = price)

# Show results
head(data_wide, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 4: Identify and Count NA Values

Show which stock IDs have NA values and count how many.

# Count NA values for each stock (column)
na_counts <- data_wide %>%
  select(-date) %>%  # Exclude date column
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "key", values_to = "value") %>%
  filter(value > 0) %>%
  arrange(desc(value))

# Show results
print(na_counts)
## # A tibble: 10 × 2
##    key   value
##    <chr> <int>
##  1 7799    217
##  2 7788    198
##  3 7780    196
##  4 4585    177
##  5 3716    160
##  6 7765    151
##  7 7750    108
##  8 7736     53
##  9 7732     43
## 10 7722     18

Question 5: Fill NA Values

Replace NA values with the closest available prices using na.locf() (Last Observation Carried Forward).

# Apply na.locf to all columns except date
data_filled <- data_wide %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

# Show results
head(data_filled, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 6: Remove Stocks with NA Values

Delete stocks that had any NA values from the dataset.

# Get list of stocks that had NA values
stocks_with_na <- na_counts$key

# Remove those stocks from data_wide (before filling)
data_clean <- data_wide %>%
  select(-any_of(stocks_with_na))

# Show updated dimensions
dim(data_clean)
## [1] 358 937
cat("Dimensions:", dim(data_clean), "\n")
## Dimensions: 358 937

Question 7: Calculate Daily Returns

Convert to xts format and calculate daily returns.

# Convert to xts format - date column is automatically used as index
data_xts <- data_clean %>%
  tk_xts(date_var = date, silent = TRUE)

# Calculate daily returns (discrete returns)
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Delete first row (which contains NA)
daily_returns <- daily_returns[-1, ]

# Show first 5 stocks with first 5 days
print(daily_returns[1:5, 1:5])
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

Question 8: Calculate Monthly Returns

Convert daily prices to monthly prices and compute monthly returns.

# Convert daily prices to monthly prices (last day of month)
monthly_prices <- to.monthly(data_xts, indexAt = "lastof", OHLC = FALSE)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")

# Delete first row (which contains NA)
monthly_returns <- monthly_returns[-1, ]

# Show first 5 stocks with first 5 months
monthly_returns[1:5, 1:5]
##                    1101        1102         1103        1104         1108
## 2024-02-29  0.006272034  0.01760804 -0.008404066  0.01887014  0.036185231
## 2024-03-31  0.001554899  0.02101398 -0.016950585  0.06397241  0.003170803
## 2024-04-30 -0.003108301  0.05811288  0.057470064  0.11234002  0.072790295
## 2024-05-31  0.029639309 -0.04920108  0.032611536 -0.03271487  0.000000000
## 2024-06-30  0.036364817  0.05535680 -0.036845213  0.04852830 -0.011805133

Question 9: Largest Market Cap Firms

Find the 20 largest market capitalization firms at year-end 2024 and 2025.

# Back to original data with cap information
largest_cap <- data %>%
  mutate(date = as.Date(as.character(date), format = "%Y%m%d")) %>%
  mutate(year1 = year(date)) %>%
  # Filter for December 31st of 2024 and June 30th of 2025
  filter(
    (year1 == 2024 & month(date) == 12 & day(date) == 31) |
      (year1 == 2025 & month(date) == 6 & day(date) == 30)
  ) %>%
  group_by(year1) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup() %>%
  mutate(cap1 = paste0("$", format(cap, big.mark = ",", scientific = FALSE))) %>%
  select(date, year1, cap, cap1, id, name)

# Show results
print(largest_cap, n = 40)
## # A tibble: 40 × 6
##    date       year1      cap cap1           id name           
##    <date>     <dbl>    <dbl> <chr>       <dbl> <chr>          
##  1 2024-12-31  2024 27877688 $27,877,688  2330 TSMC           
##  2 2024-12-31  2024  2556073 $ 2,556,073  2317 Hon Hai        
##  3 2024-12-31  2024  2266389 $ 2,266,389  2454 MediaTek       
##  4 2024-12-31  2024  1234015 $ 1,234,015  2881 Fubon Financial
##  5 2024-12-31  2024  1118242 $ 1,118,242  2308 DELTA          
##  6 2024-12-31  2024  1108574 $ 1,108,574  2382 QCI            
##  7 2024-12-31  2024  1001907 $ 1,001,907  2882 CATHAY FHC     
##  8 2024-12-31  2024   958045 $   958,045  2412 CHT            
##  9 2024-12-31  2024   767154 $   767,154  2891 CTBC Holding   
## 10 2024-12-31  2024   715219 $   715,219  3711 ASEH           
## 11 2024-12-31  2024   574052 $   574,052  2886 Mega FHC       
## 12 2024-12-31  2024   540739 $   540,739  2303 UMC            
## 13 2024-12-31  2024   487135 $   487,135  2603 EMC            
## 14 2024-12-31  2024   486903 $   486,903  6669 Wiwynn         
## 15 2024-12-31  2024   459675 $   459,675  1216 Uni-President  
## 16 2024-12-31  2024   457540 $   457,540  2357 Asustek        
## 17 2024-12-31  2024   440057 $   440,057  2885 Yuanta Group   
## 18 2024-12-31  2024   433744 $   433,744  2345 Accton         
## 19 2024-12-31  2024   431087 $   431,087  2884 E.S.F.H        
## 20 2024-12-31  2024   422590 $   422,590  3045 TWM            
## 21 2025-06-30  2025 27488572 $27,488,572  2330 TSMC           
## 22 2025-06-30  2025  2236570 $ 2,236,570  2317 Hon Hai        
## 23 2025-06-30  2025  2002073 $ 2,002,073  2454 MediaTek       
## 24 2025-06-30  2025  1193018 $ 1,193,018  2881 Fubon Financial
## 25 2025-06-30  2025  1072785 $ 1,072,785  2308 DELTA          
## 26 2025-06-30  2025  1060291 $ 1,060,291  2382 QCI            
## 27 2025-06-30  2025  1047255 $ 1,047,255  2412 CHT            
## 28 2025-06-30  2025   921226 $   921,226  2882 CATHAY FHC     
## 29 2025-06-30  2025   858722 $   858,722  2891 CTBC Holding   
## 30 2025-06-30  2025   652308 $   652,308  3711 ASEH           
## 31 2025-06-30  2025   608910 $   608,910  2886 Mega FHC       
## 32 2025-06-30  2025   554998 $   554,998  2303 UMC            
## 33 2025-06-30  2025   525462 $   525,462  2884 E.S.F.H        
## 34 2025-06-30  2025   478338 $   478,338  2357 Asustek        
## 35 2025-06-30  2025   470177 $   470,177  6669 Wiwynn         
## 36 2025-06-30  2025   460243 $   460,243  1216 Uni-President  
## 37 2025-06-30  2025   441999 $   441,999  2885 Yuanta Group   
## 38 2025-06-30  2025   430844 $   430,844  2603 EMC            
## 39 2025-06-30  2025   428175 $   428,175  3045 TWM            
## 40 2025-06-30  2025   409616 $   409,616  2345 Accton

Year End 2024

largest_cap %>%
  filter(year1 == 2024) %>%
  knitr::kable(caption = "20 Largest Cap Firms - Year End 2024")
20 Largest Cap Firms - Year End 2024
date year1 cap cap1 id name
2024-12-31 2024 27877688 \(27,877,688 | 2330|TSMC | |2024-12-31 | 2024| 2556073|\) 2,556,073 2317 Hon Hai
2024-12-31 2024 2266389 $ 2,266,389 2454 MediaTek
2024-12-31 2024 1234015 $ 1,234,015 2881 Fubon Financial
2024-12-31 2024 1118242 $ 1,118,242 2308 DELTA
2024-12-31 2024 1108574 $ 1,108,574 2382 QCI
2024-12-31 2024 1001907 $ 1,001,907 2882 CATHAY FHC
2024-12-31 2024 958045 $ 958,045 2412 CHT
2024-12-31 2024 767154 $ 767,154 2891 CTBC Holding
2024-12-31 2024 715219 $ 715,219 3711 ASEH
2024-12-31 2024 574052 $ 574,052 2886 Mega FHC
2024-12-31 2024 540739 $ 540,739 2303 UMC
2024-12-31 2024 487135 $ 487,135 2603 EMC
2024-12-31 2024 486903 $ 486,903 6669 Wiwynn
2024-12-31 2024 459675 $ 459,675 1216 Uni-President
2024-12-31 2024 457540 $ 457,540 2357 Asustek
2024-12-31 2024 440057 $ 440,057 2885 Yuanta Group
2024-12-31 2024 433744 $ 433,744 2345 Accton
2024-12-31 2024 431087 $ 431,087 2884 E.S.F.H
2024-12-31 2024 422590 $ 422,590 3045 TWM

Year End 2025

largest_cap %>%
  filter(year1 == 2025) %>%
  knitr::kable(caption = "20 Largest Cap Firms - Year End 2025")
20 Largest Cap Firms - Year End 2025
date year1 cap cap1 id name
2025-06-30 2025 27488572 \(27,488,572 | 2330|TSMC | |2025-06-30 | 2025| 2236570|\) 2,236,570 2317 Hon Hai
2025-06-30 2025 2002073 $ 2,002,073 2454 MediaTek
2025-06-30 2025 1193018 $ 1,193,018 2881 Fubon Financial
2025-06-30 2025 1072785 $ 1,072,785 2308 DELTA
2025-06-30 2025 1060291 $ 1,060,291 2382 QCI
2025-06-30 2025 1047255 $ 1,047,255 2412 CHT
2025-06-30 2025 921226 $ 921,226 2882 CATHAY FHC
2025-06-30 2025 858722 $ 858,722 2891 CTBC Holding
2025-06-30 2025 652308 $ 652,308 3711 ASEH
2025-06-30 2025 608910 $ 608,910 2886 Mega FHC
2025-06-30 2025 554998 $ 554,998 2303 UMC
2025-06-30 2025 525462 $ 525,462 2884 E.S.F.H
2025-06-30 2025 478338 $ 478,338 2357 Asustek
2025-06-30 2025 470177 $ 470,177 6669 Wiwynn
2025-06-30 2025 460243 $ 460,243 1216 Uni-President
2025-06-30 2025 441999 $ 441,999 2885 Yuanta Group
2025-06-30 2025 430844 $ 430,844 2603 EMC
2025-06-30 2025 428175 $ 428,175 3045 TWM
2025-06-30 2025 409616 $ 409,616 2345 Accton

Summary

This analysis covers:

  1. Data import and exploration
  2. Column renaming and selection
  3. Data transformation (long to wide format)
  4. Missing value analysis and treatment
  5. Daily and monthly return calculations
  6. Identification of largest market cap firms

End of Analysis