Setup

Clear environment and load required packages:

# Clear environment
rm(list=ls())

# Load required packages
library(tidyverse)
library(tidyquant)
library(timetk)
library(zoo)
library(knitr)
library(DT)

Question 1: Import Data

Task: Please load up packages tidyquant, tidyverse and timetk. Import the data file from Tronclass: tej_day_price_2024_20250630.txt. Try using functions read_csv(), read_tsv() and read_delim() to import data. Show me the imported results of by using one of the three functions: glimpse(), head() or str() to show the results.

# Method 1: read_csv()
data_csv <- read_csv("tej_day_price_2024_20250630.txt")

# Method 2: read_tsv() - recommended for tab-separated files
data_tsv <- read_tsv("tej_day_price_2024_20250630.txt")

# Method 3: read_delim() with explicit delimiter
data_delim <- read_delim("tej_day_price_2024_20250630.txt", delim = "\t")

# Show imported results using glimpse()
glimpse(data_tsv)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
# Use data_tsv for subsequent analysis
data <- data_tsv

Question 2: Replace Column Names

Task: Replace column 2, 3, 5, 9 and 12 with new column names: “date”, “id”, “name”, “price”, “cap”. Show your results.

# Replace column 2, 3, 5, 9, 12 with new names
# Column 2: Date → date
# Column 3: TSE ID → id  
# Column 5: English Short Name → name
# Column 9: Close(NTD) → price
# Column 12: Market Cap.(NTD MN) → cap

data_q2 <- data %>%
  select(
    date = 2,      # Date
    id = 3,        # TSE ID
    name = 5,      # English Short Name
    price = 9,     # Close(NTD)
    cap = 12       # Market Cap.(NTD MN)
  )

glimpse(data_q2)
## Rows: 337,347
## Columns: 5
## $ date  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ id    <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap   <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…

Question 3: Convert to Wide Format

Task: Select column id, date, price, and change id format to text, date format to date. Also change the data format from long to wide and show your results.

# Select id, date, price and convert formats
data_q3 <- data_q2 %>%
  select(id, date, price) %>%
  mutate(
    id = as.character(id),           # Change id to text
    date = ymd(date)                 # Change date to Date format
  ) %>%
  pivot_wider(                       # Convert long to wide
    names_from = id,
    values_from = price
  )

# Show results
head(data_q3, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 4: Identify Stocks with NA Values

Task: Show the stock ids with NA values and compute the number of NA for each stock.

# Count NA values for each stock
na_summary <- data_q3 %>%
  select(-date) %>%                          # Exclude date column
  summarise(across(everything(), 
                   ~sum(is.na(.)))) %>%      # Count NAs per column
  pivot_longer(everything(),                 # Convert to long format
               names_to = "key",
               values_to = "value") %>%
  filter(value > 0) %>%                      # Keep only stocks with NA
  arrange(value)                             # Sort by number of NAs

# Display as interactive table
datatable(na_summary, 
          caption = "Stocks with NA Values",
          options = list(pageLength = 10))

Summary: There are 10 stocks with NA values.


Question 5: Fill NA Values

Task: Replace NA values with the closest available stock prices (using na.locf()).

# Fill NA values with last observation carried forward
data_q5 <- data_q3 %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

# Show results
head(data_q5, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

Question 6: Remove Stocks with NA

Task: Delete the stock which contains prices of NA in your data in question 4. Show the updated number of rows and columns in your filtered data.

# Get list of stock IDs that have NA values
stocks_with_na <- na_summary$key

# Remove these stocks from the dataset
data_q6 <- data_q3 %>%
  select(-all_of(stocks_with_na))

# Show updated dimensions
cat("Number of rows:", nrow(data_q6), "\n")
## Number of rows: 358
cat("Number of columns:", ncol(data_q6), "\n")
## Number of columns: 937

Results: After removing stocks with NA values: - Rows: 358 - Columns: 937 (1 date column + 936 stock columns) - Stocks removed: 10


Question 7: Calculate Daily Returns

Task: Convert data in Question 6 into time series data (xts). And calculate daily rate of returns (compute discrete returns). Delete the first row and show the first five stocks with first five days of returns.

# Convert to xts time series data
data_q7_xts <- data_q6 %>%
  tk_xts(date_var = date)

# Calculate daily rate of returns (discrete)
returns_daily <- Return.calculate(data_q7_xts, method = "discrete")

# Delete the first row (NA values from return calculation)
returns_daily <- returns_daily[-1, ]

# Show first 5 stocks with first 5 days of returns
head(returns_daily[, 1:5], 5)
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

Question 8: Calculate Monthly Returns

Task: Compute monthly returns. Delete the first row and show the first five stocks with first five months of returns.

# Convert daily data to monthly using to.monthly()
data_monthly <- to.monthly(data_q7_xts, OHLC = FALSE)

# Calculate monthly returns (discrete)
returns_monthly <- Return.calculate(data_monthly, method = "discrete")

# Delete the first row
returns_monthly <- returns_monthly[-1, ]

# Show first 5 stocks with first 5 months of returns
head(returns_monthly[, 1:5], 5)
##                  1101        1102         1103        1104         1108
## Feb 2024  0.006272034  0.01760804 -0.008404066  0.01887014  0.036185231
## Mar 2024  0.001554899  0.02101398 -0.016950585  0.06397241  0.003170803
## Apr 2024 -0.003108301  0.05811288  0.057470064  0.11234002  0.072790295
## May 2024  0.029639309 -0.04920108  0.032611536 -0.03271487  0.000000000
## Jun 2024  0.036364817  0.05535680 -0.036845213  0.04852830 -0.011805133

Question 9: Top 20 Largest Cap Firms

Task: Find the 20 largest cap firms in the year end of 2024 and 2025. Show the results.

# Find 20 largest cap firms for year-end 2024 and 2025
largest_caps <- data_q2 %>%
  mutate(
    date = ymd(date),                        # Convert to date
    year1 = year(date),                      # Extract year
    id = as.character(id)                    # Convert id to text
  ) %>%
  filter(
    date == as.Date("2024-12-31") |          # Year-end 2024
    date == as.Date("2025-06-30")            # Year-end 2025 (last date)
  ) %>%
  group_by(year1) %>%                        # Group by year
  arrange(desc(cap)) %>%                     # Sort by cap descending
  slice(1:20) %>%                            # Take top 20
  ungroup() %>%                              # Remove grouping
  mutate(cap1 = scales::dollar(cap,          # Format cap with dollar sign
                               prefix = "$", 
                               big.mark = ",", 
                               accuracy = 1)) %>%
  select(date, year1, cap, cap1, id, name)   # Select relevant columns

# Display as interactive table
datatable(largest_caps, 
          caption = "Top 20 Largest Cap Firms (2024 & 2025)",
          options = list(pageLength = 20))

Top 10 Firms in 2024

largest_caps %>%
  filter(year1 == 2024) %>%
  head(10) %>%
  kable(caption = "Top 10 Largest Market Cap Firms - Year End 2024")
Top 10 Largest Market Cap Firms - Year End 2024
date year1 cap cap1 id name
2024-12-31 2024 27877688 $27,877,688 2330 TSMC
2024-12-31 2024 2556073 $2,556,073 2317 Hon Hai
2024-12-31 2024 2266389 $2,266,389 2454 MediaTek
2024-12-31 2024 1234015 $1,234,015 2881 Fubon Financial
2024-12-31 2024 1118242 $1,118,242 2308 DELTA
2024-12-31 2024 1108574 $1,108,574 2382 QCI
2024-12-31 2024 1001907 $1,001,907 2882 CATHAY FHC
2024-12-31 2024 958045 $958,045 2412 CHT
2024-12-31 2024 767154 $767,154 2891 CTBC Holding
2024-12-31 2024 715219 $715,219 3711 ASEH

Top 10 Firms in 2025

largest_caps %>%
  filter(year1 == 2025) %>%
  head(10) %>%
  kable(caption = "Top 10 Largest Market Cap Firms - Year End 2025")
Top 10 Largest Market Cap Firms - Year End 2025
date year1 cap cap1 id name
2025-06-30 2025 27488572 $27,488,572 2330 TSMC
2025-06-30 2025 2236570 $2,236,570 2317 Hon Hai
2025-06-30 2025 2002073 $2,002,073 2454 MediaTek
2025-06-30 2025 1193018 $1,193,018 2881 Fubon Financial
2025-06-30 2025 1072785 $1,072,785 2308 DELTA
2025-06-30 2025 1060291 $1,060,291 2382 QCI
2025-06-30 2025 1047255 $1,047,255 2412 CHT
2025-06-30 2025 921226 $921,226 2882 CATHAY FHC
2025-06-30 2025 858722 $858,722 2891 CTBC Holding
2025-06-30 2025 652308 $652,308 3711 ASEH

Summary Statistics

summary_stats <- data.frame(
  Metric = c("Original data rows", 
             "Original data columns",
             "After cleaning rows",
             "After cleaning columns",
             "Stocks removed",
             "Daily returns rows",
             "Daily returns columns",
             "Monthly returns rows",
             "Monthly returns columns"),
  Value = c(nrow(data),
            ncol(data),
            nrow(data_q6),
            ncol(data_q6),
            length(stocks_with_na),
            nrow(returns_daily),
            ncol(returns_daily),
            nrow(returns_monthly),
            ncol(returns_monthly))
)

kable(summary_stats, caption = "Analysis Summary")
Analysis Summary
Metric Value
Original data rows 337347
Original data columns 12
After cleaning rows 358
After cleaning columns 937
Stocks removed 10
Daily returns rows 357
Daily returns columns 936
Monthly returns rows 17
Monthly returns columns 936

Conclusion

This analysis successfully:

  1. ✓ Imported and processed Taiwan stock market data
  2. ✓ Cleaned data by handling NA values
  3. ✓ Converted data formats (long to wide, regular to xts)
  4. ✓ Calculated daily and monthly returns
  5. ✓ Identified top firms by market capitalization

Total stocks analyzed: 936 stocks
Time period: 2024-01-02 to 2025-06-30
Total trading days: 358


Session Information

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.7.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Asia/Taipei
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DT_0.34.0                  knitr_1.50                
##  [3] timetk_2.9.1               PerformanceAnalytics_2.0.8
##  [5] quantmod_0.4.28            TTR_0.24.4                
##  [7] xts_0.14.1                 zoo_1.8-14                
##  [9] tidyquant_1.0.11           lubridate_1.9.4           
## [11] forcats_1.0.1              stringr_1.5.2             
## [13] dplyr_1.1.4                purrr_1.1.0               
## [15] readr_2.1.5                tidyr_1.3.1               
## [17] tibble_3.3.0               ggplot2_4.0.0             
## [19] tidyverse_2.0.0           
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1    timeDate_4041.110   farver_2.1.2       
##  [4] S7_0.2.0            fastmap_1.2.0       digest_0.6.37      
##  [7] rpart_4.1.24        timechange_0.3.0    lifecycle_1.0.4    
## [10] survival_3.8-3      magrittr_2.0.3      compiler_4.5.1     
## [13] rlang_1.1.6         sass_0.4.10         tools_4.5.1        
## [16] yaml_2.3.10         data.table_1.17.8   htmlwidgets_1.6.4  
## [19] bit_4.6.0           curl_7.0.0          RColorBrewer_1.1-3 
## [22] withr_3.0.2         nnet_7.3-20         grid_4.5.1         
## [25] future_1.67.0       globals_0.18.0      scales_1.4.0       
## [28] MASS_7.3-65         cli_3.6.5           crayon_1.5.3       
## [31] rmarkdown_2.29      generics_0.1.4      rstudioapi_0.17.1  
## [34] future.apply_1.20.0 tzdb_0.5.0          cachem_1.1.0       
## [37] splines_4.5.1       parallel_4.5.1      vctrs_0.6.5        
## [40] hardhat_1.4.2       Matrix_1.7-3        jsonlite_2.0.0     
## [43] hms_1.1.3           bit64_4.6.0-1       RobStatTM_1.0.11   
## [46] listenv_0.9.1       crosstalk_1.2.2     gower_1.0.2        
## [49] jquerylib_0.1.4     recipes_1.3.1       glue_1.8.0         
## [52] parallelly_1.45.1   codetools_0.2-20    rsample_1.3.1      
## [55] stringi_1.8.7       gtable_0.3.6        quadprog_1.5-8     
## [58] pillar_1.11.1       furrr_0.3.1         htmltools_0.5.8.1  
## [61] ipred_0.9-15        lava_1.8.1          R6_2.6.1           
## [64] vroom_1.6.6         evaluate_1.0.5      lattice_0.22-7     
## [67] bslib_0.9.0         class_7.3-23        Rcpp_1.1.0         
## [70] prodlim_2025.04.28  xfun_0.53           pkgconfig_2.0.3