Loading Required Packages

library(tidyquant)
library(tidyverse)
library(timetk)
library(knitr)
library(dplyr)
library(lubridate)
library(scales)

Introduction

This analysis examines stock price data from 2024-2025, including data cleaning, transformation, and return calculations.

Question 1: Import Data and Show Structure

# Option 1: Use file.choose() to manually select the file (interactive)
# data <- read_tsv(file.choose())

# Option 2: Specify full file path (update this to your actual path)
# data <- read_tsv("C:/Users/dell/OneDrive/Desktop/tej_day_price_2024_20250630.txt")

# Option 3: Use the file if it's in the same directory as the .Rmd file
data <- read_tsv("tej_day_price_2024_20250630.txt")

# Show data structure
glimpse(data)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Question 2: Replace Column Names

# Replace specific column names
colnames(data)[c(2, 3, 5, 9, 12)] <- c("date", "id", "name", "price", "cap")
head(data)
## # A tibble: 6 × 12
##   CO_ID   date    id `TSE Sector` name  `Open(NTD)` `High(NTD)` `Low(NTD)` price
##   <chr>  <dbl> <dbl> <chr>        <chr>       <dbl>       <dbl>      <dbl> <dbl>
## 1 1101… 2.02e7  1101 01           TCC          32.5        32.5       32.3  32.4
## 2 1102… 2.02e7  1102 01           ACC          37.3        37.4       36.9  37.0
## 3 1103… 2.02e7  1103 01           CHC          17.8        17.8       17.6  17.6
## 4 1104… 2.02e7  1104 01           UCC          26.1        26.2       26.0  26.1
## 5 1108… 2.02e7  1108 01           Luck…        14.2        14.2       14.0  14.1
## 6 1109… 2.02e7  1109 01           HSIN…        16.2        16.3       16.1  16.2
## # ℹ 3 more variables: `Volume(1000S)` <dbl>, `Amount(NTD1000)` <dbl>, cap <dbl>

Question 3: Select Columns and Change Formats

# Select relevant columns and transform to wide format
data_selected <- data %>%
  select(id, date, price) %>%
  mutate(
    id = as.character(id),
    date = ymd(date),
    price = as.numeric(price)
  ) %>%
  pivot_wider(names_from = id, values_from = price)
head(data_selected)
## # A tibble: 6 × 947
##   date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##   <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
## 2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
## 3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
## 4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
## 5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
## 6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 4: Identify Stocks with NA Values

# Summarize NA values by stock
na_summary <- data_selected %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "stock_id", values_to = "na_count") %>%
  filter(na_count > 0) %>% 
  arrange(desc(stock_id))

print(na_summary)
## # A tibble: 10 × 2
##    stock_id na_count
##    <chr>       <int>
##  1 7799          217
##  2 7788          198
##  3 7780          196
##  4 7765          151
##  5 7750          108
##  6 7736           53
##  7 7732           43
##  8 7722           18
##  9 4585          177
## 10 3716          160

Total stocks with NA values: 10

Question 5: Replace NA Values with Forward Fill

# Replace NA values with last observation carried forward
data_filled <- data_selected %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

head(data_filled)
## # A tibble: 6 × 947
##   date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##   <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
## 2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
## 3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
## 4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
## 5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
## 6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 6: Remove Stocks with NA Values

# Remove stocks that had NA values
stocks_with_na <- na_summary$stock_id
data_clean <- data_selected %>%
  select(-any_of(stocks_with_na))

cat("Updated dimensions after removing stocks with NA:\n")
## Updated dimensions after removing stocks with NA:
cat("Rows:", nrow(data_clean), "\n")
## Rows: 358
cat("Columns:", ncol(data_clean), "\n")
## Columns: 937

Final dataset: 358 rows × 937 columns

Question 7: Calculate Daily Returns

# Ensure date is properly formatted
data_clean <- data_clean %>%
  mutate(date = as.Date(date))

# Convert to xts object
data_xts <- xts(data_clean[, -1], order.by = data_clean$date)

# Calculate daily returns
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Remove first row (NA) and show first 5 stocks with first 5 days
daily_returns <- daily_returns[-1, ]
head(daily_returns[, 1:5], 5)
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

Question 8: Compute Monthly Returns

# Get last day of each month
monthly_prices <- apply.monthly(data_xts, last)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")

# Remove first row (NA) and show first 5 stocks with first 5 months
monthly_returns <- monthly_returns[-1, ]
head(monthly_returns[, 1:5], 5)
##                    1101        1102         1103        1104         1108
## 2024-02-29  0.006272034  0.01760804 -0.008404066  0.01887014  0.036185231
## 2024-03-29  0.001554899  0.02101398 -0.016950585  0.06397241  0.003170803
## 2024-04-30 -0.003108301  0.05811288  0.057470064  0.11234002  0.072790295
## 2024-05-31  0.029639309 -0.04920108  0.032611536 -0.03271487  0.000000000
## 2024-06-28  0.036364817  0.05535680 -0.036845213  0.04852830 -0.011805133

Question 9: Top 20 Largest Firms by Market Cap /two years/

Year End 2024 and 2025

# Convert and clean the data
data_long <- data %>%
  mutate(date = as.Date(as.character(date), format = "%Y%m%d")) %>%  # Proper conversion
  filter(!is.na(date), !is.na(cap))

# Year-end 2024
top20_2024 <- data_long %>%
  filter(year(date) == 2024) %>%
  group_by(id, name) %>%
  slice_max(date, n = 1) %>%
  ungroup() %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  select(date, cap, id, name)

print(top20_2024)
## # A tibble: 20 × 4
##    date            cap    id name           
##    <date>        <dbl> <dbl> <chr>          
##  1 2024-12-31 27877688  2330 TSMC           
##  2 2024-12-31  2556073  2317 Hon Hai        
##  3 2024-12-31  2266389  2454 MediaTek       
##  4 2024-12-31  1234015  2881 Fubon Financial
##  5 2024-12-31  1118242  2308 DELTA          
##  6 2024-12-31  1108574  2382 QCI            
##  7 2024-12-31  1001907  2882 CATHAY FHC     
##  8 2024-12-31   958045  2412 CHT            
##  9 2024-12-31   767154  2891 CTBC Holding   
## 10 2024-12-31   715219  3711 ASEH           
## 11 2024-12-31   574052  2886 Mega FHC       
## 12 2024-12-31   540739  2303 UMC            
## 13 2024-12-31   487135  2603 EMC            
## 14 2024-12-31   486903  6669 Wiwynn         
## 15 2024-12-31   459675  1216 Uni-President  
## 16 2024-12-31   457540  2357 Asustek        
## 17 2024-12-31   440057  2885 Yuanta Group   
## 18 2024-12-31   433744  2345 Accton         
## 19 2024-12-31   431087  2884 E.S.F.H        
## 20 2024-12-31   422590  3045 TWM
# Year-end 2025
top20_2025 <- data_long %>%
  filter(year(date) == 2025) %>%
  group_by(id, name) %>%
  slice_max(date, n = 1) %>%
  ungroup() %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  select(date, cap, id, name)

print(top20_2025)
## # A tibble: 20 × 4
##    date            cap    id name           
##    <date>        <dbl> <dbl> <chr>          
##  1 2025-06-30 27488572  2330 TSMC           
##  2 2025-06-30  2236570  2317 Hon Hai        
##  3 2025-06-30  2002073  2454 MediaTek       
##  4 2025-06-30  1193018  2881 Fubon Financial
##  5 2025-06-30  1072785  2308 DELTA          
##  6 2025-06-30  1060291  2382 QCI            
##  7 2025-06-30  1047255  2412 CHT            
##  8 2025-06-30   921226  2882 CATHAY FHC     
##  9 2025-06-30   858722  2891 CTBC Holding   
## 10 2025-06-30   652308  3711 ASEH           
## 11 2025-06-30   608910  2886 Mega FHC       
## 12 2025-06-30   554998  2303 UMC            
## 13 2025-06-30   525462  2884 E.S.F.H        
## 14 2025-06-30   478338  2357 Asustek        
## 15 2025-06-30   470177  6669 Wiwynn         
## 16 2025-06-30   460243  1216 Uni-President  
## 17 2025-06-30   441999  2885 Yuanta Group   
## 18 2025-06-30   430844  2603 EMC            
## 19 2025-06-30   428175  3045 TWM            
## 20 2025-06-30   409616  2345 Accton

Conclusion

This analysis successfully cleaned and transformed the stock data, calculated returns, and identified the largest firms by market capitalization.