# Load required packages
library(tidyverse)
library(tidyquant)
library(timetk)
# Set working directory (adjust if needed)
setwd("/Users/macstore/midterm")

# Verify the file exists
list.files(pattern = "\\.txt$")
## [1] "tej_day_price_2024_20250630.txt"

Question 1: Import data

# Try different import functions
# Option 1: read_delim() - most flexible (RECOMMENDED)
tej_data <- read_delim("tej_day_price_2024_20250630.txt", 
                       delim = "\t",
                       show_col_types = FALSE)

# Option 2: read_tsv() - specifically for tab-separated
# tej_data <- read_tsv("tej_day_price_2024_20250630.txt")

# Option 3: read_csv() won't work for tab-separated files
# tej_data <- read_csv("tej_day_price_2024_20250630.txt")

# Show results
glimpse(tej_data)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Question 2: Rename columns

tej_renamed <- tej_data %>%
  select(id = `TSE ID`,                  # Column 3
         name = `English Short Name`,    # Column 5
         date = Date,                    # Column 2
         price = `Close(NTD)`,           # Column 9
         cap = `Market Cap.(NTD MN)`)    # Column 12

glimpse(tej_renamed)
## Rows: 337,347
## Columns: 5
## $ id    <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap   <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…

Question 3: Convert to wide format

tej_wide <- tej_renamed %>%
  select(id, date, price) %>%
  mutate(id = as.character(id),           # Convert id to text
         date = ymd(date)) %>%            # Convert date to date format
  pivot_wider(names_from = id,            # Spread data
              values_from = price)

head(tej_wide, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 4: Find stocks with NA values

na_counts <- tej_wide %>%
  pivot_longer(-date, names_to = "key", values_to = "value") %>%
  group_by(key) %>%
  summarise(value = sum(is.na(value))) %>%
  filter(value > 0) %>%
  arrange(key)

print(na_counts)
## # A tibble: 10 × 2
##    key   value
##    <chr> <int>
##  1 3716    160
##  2 4585    177
##  3 7722     18
##  4 7732     43
##  5 7736     53
##  6 7750    108
##  7 7765    151
##  8 7780    196
##  9 7788    198
## 10 7799    217

Question 5: Replace NA with closest available prices

tej_filled <- tej_wide %>%
  tk_xts(date_var = date) %>%          # Convert to xts
  na.locf() %>%                        # Fill NA forward
  tk_tbl(preserve_index = TRUE,        # Convert back to tibble
         rename_index = "date")

head(tej_filled, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 6: Delete stocks with NA values

# Get stock ids to remove
stocks_to_remove <- na_counts$key

# Filter out those stocks
tej_clean <- tej_wide %>%
  select(-any_of(stocks_to_remove))

# Show dimensions
dim(tej_clean)
## [1] 358 937

Question 7: Calculate daily returns

# Convert to xts
tej_xts <- tej_clean %>%
  tk_xts(date_var = date)

# Calculate daily returns
daily_returns <- Return.calculate(tej_xts, method = "discrete")

# Remove first row (NA) and show first 5 stocks, first 5 days
daily_returns_clean <- daily_returns[-1, ]
daily_returns_clean[1:5, 1:5]
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

Question 8: Calculate monthly returns

# Convert to monthly
monthly_prices <- to.monthly(tej_xts, OHLC = FALSE)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")

# Remove first row and show first 5 stocks, first 5 months
monthly_returns_clean <- monthly_returns[-1, ]
monthly_returns_clean[1:5, 1:5]
##                  1101        1102         1103        1104         1108
## Feb 2024  0.006272034  0.01760804 -0.008404066  0.01887014  0.036185231
## Mar 2024  0.001554899  0.02101398 -0.016950585  0.06397241  0.003170803
## Apr 2024 -0.003108301  0.05811288  0.057470064  0.11234002  0.072790295
## May 2024  0.029639309 -0.04920108  0.032611536 -0.03271487  0.000000000
## Jun 2024  0.036364817  0.05535680 -0.036845213  0.04852830 -0.011805133

Question 9: Find top 20 largest cap firms

top20_firms <- tej_renamed %>%
  mutate(date = ymd(date),
         year1 = year(date)) %>%
  filter(date == ymd("2024-12-31") | date == ymd("2025-06-30")) %>%
  group_by(year1) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup() %>%
  mutate(cap1 = scales::dollar(cap)) %>%
  select(date, year1, cap, cap1, id, name)

print(top20_firms)
## # A tibble: 40 × 6
##    date       year1      cap cap1           id name           
##    <date>     <dbl>    <dbl> <chr>       <dbl> <chr>          
##  1 2024-12-31  2024 27877688 $27,877,688  2330 TSMC           
##  2 2024-12-31  2024  2556073 $2,556,073   2317 Hon Hai        
##  3 2024-12-31  2024  2266389 $2,266,389   2454 MediaTek       
##  4 2024-12-31  2024  1234015 $1,234,015   2881 Fubon Financial
##  5 2024-12-31  2024  1118242 $1,118,242   2308 DELTA          
##  6 2024-12-31  2024  1108574 $1,108,574   2382 QCI            
##  7 2024-12-31  2024  1001907 $1,001,907   2882 CATHAY FHC     
##  8 2024-12-31  2024   958045 $958,045     2412 CHT            
##  9 2024-12-31  2024   767154 $767,154     2891 CTBC Holding   
## 10 2024-12-31  2024   715219 $715,219     3711 ASEH           
## # ℹ 30 more rows

Alternative: Show both years separately

top20_2024 <- tej_renamed %>%
  mutate(date = ymd(date)) %>%
  filter(date == ymd("2024-12-31")) %>%
  arrange(desc(cap)) %>%
  slice(1:20)

top20_2025 <- tej_renamed %>%
  mutate(date = ymd(date)) %>%
  filter(date == ymd("2025-06-30")) %>%
  arrange(desc(cap)) %>%
  slice(1:20)

print("Top 20 in 2024:")
## [1] "Top 20 in 2024:"
print(top20_2024)
## # A tibble: 20 × 5
##       id name            date        price      cap
##    <dbl> <chr>           <date>      <dbl>    <dbl>
##  1  2330 TSMC            2024-12-31 1061.  27877688
##  2  2317 Hon Hai         2024-12-31  178.   2556073
##  3  2454 MediaTek        2024-12-31 1359.   2266389
##  4  2881 Fubon Financial 2024-12-31   83.8  1234015
##  5  2308 DELTA           2024-12-31  423.   1118242
##  6  2382 QCI             2024-12-31  274.   1108574
##  7  2882 CATHAY FHC      2024-12-31   64.8  1001907
##  8  2412 CHT             2024-12-31  119.    958045
##  9  2891 CTBC Holding    2024-12-31   37.0   767154
## 10  3711 ASEH            2024-12-31  156.    715219
## 11  2886 Mega FHC        2024-12-31   37.3   574052
## 12  2303 UMC             2024-12-31   40.4   540739
## 13  2603 EMC             2024-12-31  195.    487135
## 14  6669 Wiwynn          2024-12-31 2544.    486903
## 15  1216 Uni-President   2024-12-31   78.0   459675
## 16  2357 Asustek         2024-12-31  586.    457540
## 17  2885 Yuanta Group    2024-12-31   31.5   440057
## 18  2345 Accton          2024-12-31  763.    433744
## 19  2884 E.S.F.H         2024-12-31   25.7   431087
## 20  3045 TWM             2024-12-31  109.    422590
print("Top 20 in 2025:")
## [1] "Top 20 in 2025:"
print(top20_2025)
## # A tibble: 20 × 5
##       id name            date        price      cap
##    <dbl> <chr>           <date>      <dbl>    <dbl>
##  1  2330 TSMC            2025-06-30 1056.  27488572
##  2  2317 Hon Hai         2025-06-30  155.   2236570
##  3  2454 MediaTek        2025-06-30 1226.   2002073
##  4  2881 Fubon Financial 2025-06-30   81.0  1193018
##  5  2308 DELTA           2025-06-30  413    1072785
##  6  2382 QCI             2025-06-30  274.   1060291
##  7  2412 CHT             2025-06-30  130.   1047255
##  8  2882 CATHAY FHC      2025-06-30   62.8   921226
##  9  2891 CTBC Holding    2025-06-30   41.4   858722
## 10  3711 ASEH            2025-06-30  142.    652308
## 11  2886 Mega FHC        2025-06-30   39.5   608910
## 12  2303 UMC             2025-06-30   44.2   554998
## 13  2884 E.S.F.H         2025-06-30   31.4   525462
## 14  2357 Asustek         2025-06-30  644     478338
## 15  6669 Wiwynn          2025-06-30 2530     470177
## 16  1216 Uni-President   2025-06-30   78.1   460243
## 17  2885 Yuanta Group    2025-06-30   31.6   441999
## 18  2603 EMC             2025-06-30  199     430844
## 19  3045 TWM             2025-06-30  110.    428175
## 20  2345 Accton          2025-06-30  721.    409616