rm(list=ls())
library(tidyverse)
library(tidyquant)
library(timetk)
library(PerformanceAnalytics)
library(DT)

1. Import data file

# Import data using read_tsv()
data1 <- read_tsv("tej_day_price_2024_20250630.txt")

# Show results
glimpse(data1)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

2. Replace column names

data2 <- data1 %>%
  rename(
    date = 2,      # Date column
    id = 3,        # TSE ID column
    name = 5,      # English Short Name column
    price = 9,     # Close(NTD) column
    cap = 12       # Market Cap.(NTD MN) column
  ) %>%
  select(id, name, date, price, cap)

glimpse(data2)
## Rows: 337,347
## Columns: 5
## $ id    <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap   <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…

3. Convert to wide format

data3 <- data2 %>%
  select(id, date, price) %>%
  mutate(
    id = as.character(id),           # Convert id to text
    date = ymd(as.character(date))   # Convert date to date format
  ) %>%
  spread(key = id, value = price)    # Change from long to wide format

# Show results
datatable(data3, options = list(pageLength = 10, scrollX = TRUE))

4. Find stocks with NA values

na_counts <- data3 %>%
  select(-date) %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "key", values_to = "value") %>%
  filter(value > 0) %>%
  arrange(value)

datatable(na_counts, options = list(pageLength = 10))

5. Replace NA values

# Replace NA with closest available stock prices
data5 <- data3 %>%
  tk_xts(date_var = date) %>%
  na.locf() %>%                      # Replace NA with last observation carried forward
  tk_tbl(preserve_index = TRUE, rename_index = "date")

# Show results
datatable(data5, options = list(pageLength = 10, scrollX = TRUE))

6. Delete stocks with NA

# Get list of stocks with NA from Question 4
stocks_with_na <- na_counts$key

# Remove those stocks
data6 <- data3 %>%
  select(-any_of(stocks_with_na))

# Show dimensions
dim(data6)
## [1] 358 937

7. Calculate daily returns

# Convert to xts
data7_xts <- data6 %>%
  tk_xts(date_var = date)

# Calculate daily returns (discrete returns)
daily_returns <- Return.calculate(data7_xts, method = "discrete")

# Remove first row (NA) and show first 5 stocks, first 5 days
daily_returns_clean <- daily_returns[-1, ]
daily_returns_clean[1:5, 1:5]
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

8. Calculate monthly returns

# Convert to monthly and calculate returns
monthly_prices <- to.monthly(data7_xts, OHLC = FALSE)
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")

# Remove first row and show first 5 stocks, first 5 months
monthly_returns_clean <- monthly_returns[-1, ]
monthly_returns_clean[1:5, 1:5]
##                  1101        1102         1103        1104         1108
## Feb 2024  0.006272034  0.01760804 -0.008404066  0.01887014  0.036185231
## Mar 2024  0.001554899  0.02101398 -0.016950585  0.06397241  0.003170803
## Apr 2024 -0.003108301  0.05811288  0.057470064  0.11234002  0.072790295
## May 2024  0.029639309 -0.04920108  0.032611536 -0.03271487  0.000000000
## Jun 2024  0.036364817  0.05535680 -0.036845213  0.04852830 -0.011805133

9. Find largest cap firms

# Get the original data with cap information
data9 <- data2 %>%
  mutate(date = ymd(as.character(date))) %>%
  mutate(year1 = year(date))

# Find the 20 largest cap firms at year end of 2024 and 2025
top20_firms <- data9 %>%
  filter(date == as.Date("2024-12-31") | date == as.Date("2025-06-30")) %>%
  group_by(year1) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup() %>%
  mutate(cap1 = paste0("$", format(cap, big.mark = ","))) %>%
  select(date, year1, cap, cap1, id, name)

# Show results
datatable(top20_firms, options = list(pageLength = 10))