# Load required packages
library(tidyverse)
library(tidyquant)
library(timetk)
# Set working directory (adjust if needed)
setwd("/Users/macstore/midterm")
# Verify the file exists
list.files(pattern = "\\.txt$")
## [1] "tej_day_price_2024_20250630.txt"
Question 1: Import data
# Try different import functions
# Option 1: read_delim() - most flexible (RECOMMENDED)
tej_data <- read_delim("tej_day_price_2024_20250630.txt",
delim = "\t",
show_col_types = FALSE)
# Option 2: read_tsv() - specifically for tab-separated
# tej_data <- read_tsv("tej_day_price_2024_20250630.txt")
# Option 3: read_csv() won't work for tab-separated files
# tej_data <- read_csv("tej_day_price_2024_20250630.txt")
# Show results
glimpse(tej_data)
## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
Question 2: Rename columns
tej_renamed <- tej_data %>%
select(id = `TSE ID`, # Column 3
name = `English Short Name`, # Column 5
date = Date, # Column 2
price = `Close(NTD)`, # Column 9
cap = `Market Cap.(NTD MN)`) # Column 12
glimpse(tej_renamed)
## Rows: 337,347
## Columns: 5
## $ id <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…
Question 3: Convert to wide format
tej_wide <- tej_renamed %>%
select(id, date, price) %>%
mutate(id = as.character(id), # Convert id to text
date = ymd(date)) %>% # Convert date to date format
pivot_wider(names_from = id, # Spread data
values_from = price)
head(tej_wide, 10)
## # A tibble: 10 × 947
## date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2024-01-02 32.4 37.0 17.6 26.1 14.1 16.2 18.3 18.2 55.3
## 2 2024-01-03 31.9 36.6 17.5 25.9 14.0 16.1 18.2 18.2 54.7
## 3 2024-01-04 31.9 37.0 17.5 25.5 14.1 16.1 18.4 18.1 54.1
## 4 2024-01-05 32.1 37.0 17.5 25.8 14.1 16.1 18.3 18.1 54.9
## 5 2024-01-08 32.0 37.2 17.6 25.7 14.2 16.1 18.3 18.1 54.6
## 6 2024-01-09 31.8 36.9 17.5 25.4 13.8 16.0 18.4 18.1 54.6
## 7 2024-01-10 31.5 36.6 17.4 25.4 13.7 16.0 18.5 18.0 53.4
## 8 2024-01-11 31.5 36.7 17.5 25.6 13.8 16.1 18.5 18.0 55.1
## 9 2024-01-12 31.5 36.6 17.5 25.6 13.8 16.0 18.4 18.0 54.4
## 10 2024-01-15 31.4 36.6 17.4 25.4 13.8 16.0 18.3 18.0 53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## # `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## # `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## # `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## # `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## # `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## # `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …
Question 4: Find stocks with NA values
na_counts <- tej_wide %>%
pivot_longer(-date, names_to = "key", values_to = "value") %>%
group_by(key) %>%
summarise(value = sum(is.na(value))) %>%
filter(value > 0) %>%
arrange(key)
print(na_counts)
## # A tibble: 10 × 2
## key value
## <chr> <int>
## 1 3716 160
## 2 4585 177
## 3 7722 18
## 4 7732 43
## 5 7736 53
## 6 7750 108
## 7 7765 151
## 8 7780 196
## 9 7788 198
## 10 7799 217
Question 5: Replace NA with closest available prices
tej_filled <- tej_wide %>%
tk_xts(date_var = date) %>% # Convert to xts
na.locf() %>% # Fill NA forward
tk_tbl(preserve_index = TRUE, # Convert back to tibble
rename_index = "date")
head(tej_filled, 10)
## # A tibble: 10 × 947
## date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2024-01-02 32.4 37.0 17.6 26.1 14.1 16.2 18.3 18.2 55.3
## 2 2024-01-03 31.9 36.6 17.5 25.9 14.0 16.1 18.2 18.2 54.7
## 3 2024-01-04 31.9 37.0 17.5 25.5 14.1 16.1 18.4 18.1 54.1
## 4 2024-01-05 32.1 37.0 17.5 25.8 14.1 16.1 18.3 18.1 54.9
## 5 2024-01-08 32.0 37.2 17.6 25.7 14.2 16.1 18.3 18.1 54.6
## 6 2024-01-09 31.8 36.9 17.5 25.4 13.8 16.0 18.4 18.1 54.6
## 7 2024-01-10 31.5 36.6 17.4 25.4 13.7 16.0 18.5 18.0 53.4
## 8 2024-01-11 31.5 36.7 17.5 25.6 13.8 16.1 18.5 18.0 55.1
## 9 2024-01-12 31.5 36.6 17.5 25.6 13.8 16.0 18.4 18.0 54.4
## 10 2024-01-15 31.4 36.6 17.4 25.4 13.8 16.0 18.3 18.0 53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## # `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## # `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## # `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## # `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## # `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## # `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …
Question 6: Delete stocks with NA values
# Get stock ids to remove
stocks_to_remove <- na_counts$key
# Filter out those stocks
tej_clean <- tej_wide %>%
select(-any_of(stocks_to_remove))
# Show dimensions
dim(tej_clean)
## [1] 358 937
Question 7: Calculate daily returns
# Convert to xts
tej_xts <- tej_clean %>%
tk_xts(date_var = date)
# Calculate daily returns
daily_returns <- Return.calculate(tej_xts, method = "discrete")
# Remove first row (NA) and show first 5 stocks, first 5 days
daily_returns_clean <- daily_returns[-1, ]
daily_returns_clean[1:5, 1:5]
## 1101 1102 1103 1104 1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04 0.000000000 0.012298711 0.000000000 -0.013558772 0.003170803
## 2024-01-05 0.004384536 -0.001214929 0.002674026 0.010306896 0.000000000
## 2024-01-08 -0.002909225 0.004865628 0.002666895 -0.003399291 0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457
Question 8: Calculate monthly returns
# Convert to monthly
monthly_prices <- to.monthly(tej_xts, OHLC = FALSE)
# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")
# Remove first row and show first 5 stocks, first 5 months
monthly_returns_clean <- monthly_returns[-1, ]
monthly_returns_clean[1:5, 1:5]
## 1101 1102 1103 1104 1108
## Feb 2024 0.006272034 0.01760804 -0.008404066 0.01887014 0.036185231
## Mar 2024 0.001554899 0.02101398 -0.016950585 0.06397241 0.003170803
## Apr 2024 -0.003108301 0.05811288 0.057470064 0.11234002 0.072790295
## May 2024 0.029639309 -0.04920108 0.032611536 -0.03271487 0.000000000
## Jun 2024 0.036364817 0.05535680 -0.036845213 0.04852830 -0.011805133
Question 9: Find top 20 largest cap firms
top20_firms <- tej_renamed %>%
mutate(date = ymd(date),
year1 = year(date)) %>%
filter(date == ymd("2024-12-31") | date == ymd("2025-06-30")) %>%
group_by(year1) %>%
arrange(desc(cap)) %>%
slice(1:20) %>%
ungroup() %>%
mutate(cap1 = scales::dollar(cap)) %>%
select(date, year1, cap, cap1, id, name)
print(top20_firms)
## # A tibble: 40 × 6
## date year1 cap cap1 id name
## <date> <dbl> <dbl> <chr> <dbl> <chr>
## 1 2024-12-31 2024 27877688 $27,877,688 2330 TSMC
## 2 2024-12-31 2024 2556073 $2,556,073 2317 Hon Hai
## 3 2024-12-31 2024 2266389 $2,266,389 2454 MediaTek
## 4 2024-12-31 2024 1234015 $1,234,015 2881 Fubon Financial
## 5 2024-12-31 2024 1118242 $1,118,242 2308 DELTA
## 6 2024-12-31 2024 1108574 $1,108,574 2382 QCI
## 7 2024-12-31 2024 1001907 $1,001,907 2882 CATHAY FHC
## 8 2024-12-31 2024 958045 $958,045 2412 CHT
## 9 2024-12-31 2024 767154 $767,154 2891 CTBC Holding
## 10 2024-12-31 2024 715219 $715,219 3711 ASEH
## # ℹ 30 more rows
Alternative: Show both years separately
top20_2024 <- tej_renamed %>%
mutate(date = ymd(date)) %>%
filter(date == ymd("2024-12-31")) %>%
arrange(desc(cap)) %>%
slice(1:20)
top20_2025 <- tej_renamed %>%
mutate(date = ymd(date)) %>%
filter(date == ymd("2025-06-30")) %>%
arrange(desc(cap)) %>%
slice(1:20)
print("Top 20 in 2024:")
## [1] "Top 20 in 2024:"
print(top20_2024)
## # A tibble: 20 × 5
## id name date price cap
## <dbl> <chr> <date> <dbl> <dbl>
## 1 2330 TSMC 2024-12-31 1061. 27877688
## 2 2317 Hon Hai 2024-12-31 178. 2556073
## 3 2454 MediaTek 2024-12-31 1359. 2266389
## 4 2881 Fubon Financial 2024-12-31 83.8 1234015
## 5 2308 DELTA 2024-12-31 423. 1118242
## 6 2382 QCI 2024-12-31 274. 1108574
## 7 2882 CATHAY FHC 2024-12-31 64.8 1001907
## 8 2412 CHT 2024-12-31 119. 958045
## 9 2891 CTBC Holding 2024-12-31 37.0 767154
## 10 3711 ASEH 2024-12-31 156. 715219
## 11 2886 Mega FHC 2024-12-31 37.3 574052
## 12 2303 UMC 2024-12-31 40.4 540739
## 13 2603 EMC 2024-12-31 195. 487135
## 14 6669 Wiwynn 2024-12-31 2544. 486903
## 15 1216 Uni-President 2024-12-31 78.0 459675
## 16 2357 Asustek 2024-12-31 586. 457540
## 17 2885 Yuanta Group 2024-12-31 31.5 440057
## 18 2345 Accton 2024-12-31 763. 433744
## 19 2884 E.S.F.H 2024-12-31 25.7 431087
## 20 3045 TWM 2024-12-31 109. 422590
print("Top 20 in 2025:")
## [1] "Top 20 in 2025:"
print(top20_2025)
## # A tibble: 20 × 5
## id name date price cap
## <dbl> <chr> <date> <dbl> <dbl>
## 1 2330 TSMC 2025-06-30 1056. 27488572
## 2 2317 Hon Hai 2025-06-30 155. 2236570
## 3 2454 MediaTek 2025-06-30 1226. 2002073
## 4 2881 Fubon Financial 2025-06-30 81.0 1193018
## 5 2308 DELTA 2025-06-30 413 1072785
## 6 2382 QCI 2025-06-30 274. 1060291
## 7 2412 CHT 2025-06-30 130. 1047255
## 8 2882 CATHAY FHC 2025-06-30 62.8 921226
## 9 2891 CTBC Holding 2025-06-30 41.4 858722
## 10 3711 ASEH 2025-06-30 142. 652308
## 11 2886 Mega FHC 2025-06-30 39.5 608910
## 12 2303 UMC 2025-06-30 44.2 554998
## 13 2884 E.S.F.H 2025-06-30 31.4 525462
## 14 2357 Asustek 2025-06-30 644 478338
## 15 6669 Wiwynn 2025-06-30 2530 470177
## 16 1216 Uni-President 2025-06-30 78.1 460243
## 17 2885 Yuanta Group 2025-06-30 31.6 441999
## 18 2603 EMC 2025-06-30 199 430844
## 19 3045 TWM 2025-06-30 110. 428175
## 20 2345 Accton 2025-06-30 721. 409616