# Import data using read_tsv
data <- read_tsv("tej_day_price_2024_20250630.txt")
# Show results
glimpse(data)## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
Replace columns 2, 3, 5, 9, and 12 with new names.
# Rename columns
data <- data %>%
rename(
date = 2,
id = 3,
name = 5,
price = 9,
cap = 12
)
glimpse(data)## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", "110…
## $ date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20…
## $ id <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203…
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", "02"…
## $ name <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.1807…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.2696…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.1362…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 1849, …
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 2409, …
## $ cap <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 964…
# Select columns and convert formats
data_selected <- data %>%
select(id, date, price) %>%
mutate(
id = as.character(id),
date = ymd(date)
)
# Convert to wide format
data_wide <- data_selected %>%
pivot_wider(
names_from = id,
values_from = price
)
head(data_wide[, 1:6])## # A tibble: 6 × 6
## date `1101` `1102` `1103` `1104` `1108`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2024-01-02 32.4 37.0 17.6 26.1 14.1
## 2 2024-01-03 31.9 36.6 17.5 25.9 14.0
## 3 2024-01-04 31.9 37.0 17.5 25.5 14.1
## 4 2024-01-05 32.1 37.0 17.5 25.8 14.1
## 5 2024-01-08 32.0 37.2 17.6 25.7 14.2
## 6 2024-01-09 31.8 36.9 17.5 25.4 13.8
Dimensions: 358 rows × 947 columns
# Count NAs for each stock
na_counts <- data_wide %>%
select(-date) %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(cols = everything(),
names_to = "stock_id",
values_to = "na_count") %>%
filter(na_count > 0) %>%
arrange(desc(na_count))
print(na_counts, n = 20)## # A tibble: 10 × 2
## stock_id na_count
## <chr> <int>
## 1 7799 217
## 2 7788 198
## 3 7780 196
## 4 4585 177
## 5 3716 160
## 6 7765 151
## 7 7750 108
## 8 7736 53
## 9 7732 43
## 10 7722 18
Total stocks with NAs: 10
# Replace NAs with closest available prices
data_filled <- data_wide %>%
mutate(across(-date, ~na.locf(., na.rm = FALSE)))
# Check remaining NAs
remaining_nas <- data_filled %>%
select(-date) %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
sum()Remaining NAs after filling: 1321
# Convert to xts
data_xts <- data_clean %>%
tk_xts(date_var = date)
# Calculate daily returns
daily_returns <- Return.calculate(data_xts, method = "discrete")
# Remove first row
daily_returns <- daily_returns[-1, ]
# Show first 5 stocks with first 5 days
print(daily_returns[1:5, 1:5])## 1101 1102 1103 1104 1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04 0.000000000 0.012298711 0.000000000 -0.013558772 0.003170803
## 2024-01-05 0.004384536 -0.001214929 0.002674026 0.010306896 0.000000000
## 2024-01-08 -0.002909225 0.004865628 0.002666895 -0.003399291 0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457
# Convert to monthly prices
monthly_prices <- to.monthly(data_xts, OHLC = FALSE)
# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")
# Remove first row
monthly_returns <- monthly_returns[-1, ]
# Show first 5 stocks with first 5 months
print(monthly_returns[1:5, 1:5])## 1101 1102 1103 1104 1108
## Feb 2024 0.006272034 0.01760804 -0.008404066 0.01887014 0.036185231
## Mar 2024 0.001554899 0.02101398 -0.016950585 0.06397241 0.003170803
## Apr 2024 -0.003108301 0.05811288 0.057470064 0.11234002 0.072790295
## May 2024 0.029639309 -0.04920108 0.032611536 -0.03271487 0.000000000
## Jun 2024 0.036364817 0.05535680 -0.036845213 0.04852830 -0.011805133
# Prepare data with cap
data_with_cap <- data %>%
select(id, date, name, cap) %>%
mutate(
id = as.character(id),
date = ymd(date),
year = year(date)
)
# Top 20 for 2024 year-end
top20_2024 <- data_with_cap %>%
filter(year == 2024) %>%
group_by(id) %>%
filter(date == max(date)) %>%
ungroup() %>%
arrange(desc(cap)) %>%
slice(1:20) %>%
select(id, name, date, cap)
# Top 20 for 2025 year-end
top20_2025 <- data_with_cap %>%
filter(year == 2025) %>%
group_by(id) %>%
filter(date == max(date)) %>%
ungroup() %>%
arrange(desc(cap)) %>%
slice(1:20) %>%
select(id, name, date, cap)## # A tibble: 20 × 4
## id name date cap
## <chr> <chr> <date> <dbl>
## 1 2330 TSMC 2024-12-31 27877688
## 2 2317 Hon Hai 2024-12-31 2556073
## 3 2454 MediaTek 2024-12-31 2266389
## 4 2881 Fubon Financial 2024-12-31 1234015
## 5 2308 DELTA 2024-12-31 1118242
## 6 2382 QCI 2024-12-31 1108574
## 7 2882 CATHAY FHC 2024-12-31 1001907
## 8 2412 CHT 2024-12-31 958045
## 9 2891 CTBC Holding 2024-12-31 767154
## 10 3711 ASEH 2024-12-31 715219
## 11 2886 Mega FHC 2024-12-31 574052
## 12 2303 UMC 2024-12-31 540739
## 13 2603 EMC 2024-12-31 487135
## 14 6669 Wiwynn 2024-12-31 486903
## 15 1216 Uni-President 2024-12-31 459675
## 16 2357 Asustek 2024-12-31 457540
## 17 2885 Yuanta Group 2024-12-31 440057
## 18 2345 Accton 2024-12-31 433744
## 19 2884 E.S.F.H 2024-12-31 431087
## 20 3045 TWM 2024-12-31 422590
## # A tibble: 20 × 4
## id name date cap
## <chr> <chr> <date> <dbl>
## 1 2330 TSMC 2025-06-30 27488572
## 2 2317 Hon Hai 2025-06-30 2236570
## 3 2454 MediaTek 2025-06-30 2002073
## 4 2881 Fubon Financial 2025-06-30 1193018
## 5 2308 DELTA 2025-06-30 1072785
## 6 2382 QCI 2025-06-30 1060291
## 7 2412 CHT 2025-06-30 1047255
## 8 2882 CATHAY FHC 2025-06-30 921226
## 9 2891 CTBC Holding 2025-06-30 858722
## 10 3711 ASEH 2025-06-30 652308
## 11 2886 Mega FHC 2025-06-30 608910
## 12 2303 UMC 2025-06-30 554998
## 13 2884 E.S.F.H 2025-06-30 525462
## 14 2357 Asustek 2025-06-30 478338
## 15 6669 Wiwynn 2025-06-30 470177
## 16 1216 Uni-President 2025-06-30 460243
## 17 2885 Yuanta Group 2025-06-30 441999
## 18 2603 EMC 2025-06-30 430844
## 19 3045 TWM 2025-06-30 428175
## 20 2345 Accton 2025-06-30 409616