Clear environment and load required packages:
# Clear environment
rm(list=ls())
# Load required packages
library(tidyverse)
library(tidyquant)
library(timetk)
library(zoo)
library(knitr)
library(DT)Task: Please load up packages
tidyquant, tidyverse and timetk.
Import the data file from Tronclass:
tej_day_price_2024_20250630.txt. Try using functions
read_csv(), read_tsv() and
read_delim() to import data. Show me the imported results
of by using one of the three functions: glimpse(),
head() or str() to show the results.
# Method 1: read_csv()
data_csv <- read_csv("tej_day_price_2024_20250630.txt")
# Method 2: read_tsv() - recommended for tab-separated files
data_tsv <- read_tsv("tej_day_price_2024_20250630.txt")
# Method 3: read_delim() with explicit delimiter
data_delim <- read_delim("tej_day_price_2024_20250630.txt", delim = "\t")
# Show imported results using glimpse()
glimpse(data_tsv)## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
Task: Replace column 2, 3, 5, 9 and 12 with new column names: “date”, “id”, “name”, “price”, “cap”. Show your results.
# Replace column 2, 3, 5, 9, 12 with new names
# Column 2: Date → date
# Column 3: TSE ID → id
# Column 5: English Short Name → name
# Column 9: Close(NTD) → price
# Column 12: Market Cap.(NTD MN) → cap
data_q2 <- data %>%
select(
date = 2, # Date
id = 3, # TSE ID
name = 5, # English Short Name
price = 9, # Close(NTD)
cap = 12 # Market Cap.(NTD MN)
)
glimpse(data_q2)## Rows: 337,347
## Columns: 5
## $ date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ id <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…
Task: Select column id,
date, price, and change id format
to text, date format to date. Also change the data format
from long to wide and show your results.
# Select id, date, price and convert formats
data_q3 <- data_q2 %>%
select(id, date, price) %>%
mutate(
id = as.character(id), # Change id to text
date = ymd(date) # Change date to Date format
) %>%
pivot_wider( # Convert long to wide
names_from = id,
values_from = price
)
# Show results
head(data_q3, 10)## # A tibble: 10 × 947
## date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2024-01-02 32.4 37.0 17.6 26.1 14.1 16.2 18.3 18.2 55.3
## 2 2024-01-03 31.9 36.6 17.5 25.9 14.0 16.1 18.2 18.2 54.7
## 3 2024-01-04 31.9 37.0 17.5 25.5 14.1 16.1 18.4 18.1 54.1
## 4 2024-01-05 32.1 37.0 17.5 25.8 14.1 16.1 18.3 18.1 54.9
## 5 2024-01-08 32.0 37.2 17.6 25.7 14.2 16.1 18.3 18.1 54.6
## 6 2024-01-09 31.8 36.9 17.5 25.4 13.8 16.0 18.4 18.1 54.6
## 7 2024-01-10 31.5 36.6 17.4 25.4 13.7 16.0 18.5 18.0 53.4
## 8 2024-01-11 31.5 36.7 17.5 25.6 13.8 16.1 18.5 18.0 55.1
## 9 2024-01-12 31.5 36.6 17.5 25.6 13.8 16.0 18.4 18.0 54.4
## 10 2024-01-15 31.4 36.6 17.4 25.4 13.8 16.0 18.3 18.0 53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## # `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## # `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## # `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## # `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## # `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## # `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …
Task: Show the stock ids with NA values
and compute the number of NA for each stock.
# Count NA values for each stock
na_summary <- data_q3 %>%
select(-date) %>% # Exclude date column
summarise(across(everything(),
~sum(is.na(.)))) %>% # Count NAs per column
pivot_longer(everything(), # Convert to long format
names_to = "key",
values_to = "value") %>%
filter(value > 0) %>% # Keep only stocks with NA
arrange(value) # Sort by number of NAs
# Display as interactive table
datatable(na_summary,
caption = "Stocks with NA Values",
options = list(pageLength = 10))Summary: There are 10 stocks with NA values.
Task: Replace NA values with the
closest available stock prices (using na.locf()).
# Fill NA values with last observation carried forward
data_q5 <- data_q3 %>%
mutate(across(-date, ~na.locf(., na.rm = FALSE)))
# Show results
head(data_q5, 10)## # A tibble: 10 × 947
## date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2024-01-02 32.4 37.0 17.6 26.1 14.1 16.2 18.3 18.2 55.3
## 2 2024-01-03 31.9 36.6 17.5 25.9 14.0 16.1 18.2 18.2 54.7
## 3 2024-01-04 31.9 37.0 17.5 25.5 14.1 16.1 18.4 18.1 54.1
## 4 2024-01-05 32.1 37.0 17.5 25.8 14.1 16.1 18.3 18.1 54.9
## 5 2024-01-08 32.0 37.2 17.6 25.7 14.2 16.1 18.3 18.1 54.6
## 6 2024-01-09 31.8 36.9 17.5 25.4 13.8 16.0 18.4 18.1 54.6
## 7 2024-01-10 31.5 36.6 17.4 25.4 13.7 16.0 18.5 18.0 53.4
## 8 2024-01-11 31.5 36.7 17.5 25.6 13.8 16.1 18.5 18.0 55.1
## 9 2024-01-12 31.5 36.6 17.5 25.6 13.8 16.0 18.4 18.0 54.4
## 10 2024-01-15 31.4 36.6 17.4 25.4 13.8 16.0 18.3 18.0 53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## # `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## # `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## # `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## # `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## # `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## # `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …
Task: Delete the stock which contains prices of
NA in your data in question 4. Show the updated number of
rows and columns in your filtered data.
# Get list of stock IDs that have NA values
stocks_with_na <- na_summary$key
# Remove these stocks from the dataset
data_q6 <- data_q3 %>%
select(-all_of(stocks_with_na))
# Show updated dimensions
cat("Number of rows:", nrow(data_q6), "\n")## Number of rows: 358
## Number of columns: 937
Results: After removing stocks with NA values: - Rows: 358 - Columns: 937 (1 date column + 936 stock columns) - Stocks removed: 10
Task: Convert data in Question 6 into time series
data (xts). And calculate daily rate of returns (compute
discrete returns). Delete the first row and show the first
five stocks with first five days of returns.
# Convert to xts time series data
data_q7_xts <- data_q6 %>%
tk_xts(date_var = date)
# Calculate daily rate of returns (discrete)
returns_daily <- Return.calculate(data_q7_xts, method = "discrete")
# Delete the first row (NA values from return calculation)
returns_daily <- returns_daily[-1, ]
# Show first 5 stocks with first 5 days of returns
head(returns_daily[, 1:5], 5)## 1101 1102 1103 1104 1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04 0.000000000 0.012298711 0.000000000 -0.013558772 0.003170803
## 2024-01-05 0.004384536 -0.001214929 0.002674026 0.010306896 0.000000000
## 2024-01-08 -0.002909225 0.004865628 0.002666895 -0.003399291 0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457
Task: Compute monthly returns. Delete the first row and show the first five stocks with first five months of returns.
# Convert daily data to monthly using to.monthly()
data_monthly <- to.monthly(data_q7_xts, OHLC = FALSE)
# Calculate monthly returns (discrete)
returns_monthly <- Return.calculate(data_monthly, method = "discrete")
# Delete the first row
returns_monthly <- returns_monthly[-1, ]
# Show first 5 stocks with first 5 months of returns
head(returns_monthly[, 1:5], 5)## 1101 1102 1103 1104 1108
## Feb 2024 0.006272034 0.01760804 -0.008404066 0.01887014 0.036185231
## Mar 2024 0.001554899 0.02101398 -0.016950585 0.06397241 0.003170803
## Apr 2024 -0.003108301 0.05811288 0.057470064 0.11234002 0.072790295
## May 2024 0.029639309 -0.04920108 0.032611536 -0.03271487 0.000000000
## Jun 2024 0.036364817 0.05535680 -0.036845213 0.04852830 -0.011805133
Task: Find the 20 largest cap firms in the year end of 2024 and 2025. Show the results.
# Find 20 largest cap firms for year-end 2024 and 2025
largest_caps <- data_q2 %>%
mutate(
date = ymd(date), # Convert to date
year1 = year(date), # Extract year
id = as.character(id) # Convert id to text
) %>%
filter(
date == as.Date("2024-12-31") | # Year-end 2024
date == as.Date("2025-06-30") # Year-end 2025 (last date)
) %>%
group_by(year1) %>% # Group by year
arrange(desc(cap)) %>% # Sort by cap descending
slice(1:20) %>% # Take top 20
ungroup() %>% # Remove grouping
mutate(cap1 = scales::dollar(cap, # Format cap with dollar sign
prefix = "$",
big.mark = ",",
accuracy = 1)) %>%
select(date, year1, cap, cap1, id, name) # Select relevant columns
# Display as interactive table
datatable(largest_caps,
caption = "Top 20 Largest Cap Firms (2024 & 2025)",
options = list(pageLength = 20))largest_caps %>%
filter(year1 == 2024) %>%
head(10) %>%
kable(caption = "Top 10 Largest Market Cap Firms - Year End 2024")| date | year1 | cap | cap1 | id | name |
|---|---|---|---|---|---|
| 2024-12-31 | 2024 | 27877688 | $27,877,688 | 2330 | TSMC |
| 2024-12-31 | 2024 | 2556073 | $2,556,073 | 2317 | Hon Hai |
| 2024-12-31 | 2024 | 2266389 | $2,266,389 | 2454 | MediaTek |
| 2024-12-31 | 2024 | 1234015 | $1,234,015 | 2881 | Fubon Financial |
| 2024-12-31 | 2024 | 1118242 | $1,118,242 | 2308 | DELTA |
| 2024-12-31 | 2024 | 1108574 | $1,108,574 | 2382 | QCI |
| 2024-12-31 | 2024 | 1001907 | $1,001,907 | 2882 | CATHAY FHC |
| 2024-12-31 | 2024 | 958045 | $958,045 | 2412 | CHT |
| 2024-12-31 | 2024 | 767154 | $767,154 | 2891 | CTBC Holding |
| 2024-12-31 | 2024 | 715219 | $715,219 | 3711 | ASEH |
largest_caps %>%
filter(year1 == 2025) %>%
head(10) %>%
kable(caption = "Top 10 Largest Market Cap Firms - Year End 2025")| date | year1 | cap | cap1 | id | name |
|---|---|---|---|---|---|
| 2025-06-30 | 2025 | 27488572 | $27,488,572 | 2330 | TSMC |
| 2025-06-30 | 2025 | 2236570 | $2,236,570 | 2317 | Hon Hai |
| 2025-06-30 | 2025 | 2002073 | $2,002,073 | 2454 | MediaTek |
| 2025-06-30 | 2025 | 1193018 | $1,193,018 | 2881 | Fubon Financial |
| 2025-06-30 | 2025 | 1072785 | $1,072,785 | 2308 | DELTA |
| 2025-06-30 | 2025 | 1060291 | $1,060,291 | 2382 | QCI |
| 2025-06-30 | 2025 | 1047255 | $1,047,255 | 2412 | CHT |
| 2025-06-30 | 2025 | 921226 | $921,226 | 2882 | CATHAY FHC |
| 2025-06-30 | 2025 | 858722 | $858,722 | 2891 | CTBC Holding |
| 2025-06-30 | 2025 | 652308 | $652,308 | 3711 | ASEH |
summary_stats <- data.frame(
Metric = c("Original data rows",
"Original data columns",
"After cleaning rows",
"After cleaning columns",
"Stocks removed",
"Daily returns rows",
"Daily returns columns",
"Monthly returns rows",
"Monthly returns columns"),
Value = c(nrow(data),
ncol(data),
nrow(data_q6),
ncol(data_q6),
length(stocks_with_na),
nrow(returns_daily),
ncol(returns_daily),
nrow(returns_monthly),
ncol(returns_monthly))
)
kable(summary_stats, caption = "Analysis Summary")| Metric | Value |
|---|---|
| Original data rows | 337347 |
| Original data columns | 12 |
| After cleaning rows | 358 |
| After cleaning columns | 937 |
| Stocks removed | 10 |
| Daily returns rows | 357 |
| Daily returns columns | 936 |
| Monthly returns rows | 17 |
| Monthly returns columns | 936 |
This analysis successfully:
Total stocks analyzed: 936 stocks
Time period: 2024-01-02 to 2025-06-30
Total trading days: 358
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.7.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Asia/Taipei
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.34.0 knitr_1.50
## [3] timetk_2.9.1 PerformanceAnalytics_2.0.8
## [5] quantmod_0.4.28 TTR_0.24.4
## [7] xts_0.14.1 zoo_1.8-14
## [9] tidyquant_1.0.11 lubridate_1.9.4
## [11] forcats_1.0.1 stringr_1.5.2
## [13] dplyr_1.1.4 purrr_1.1.0
## [15] readr_2.1.5 tidyr_1.3.1
## [17] tibble_3.3.0 ggplot2_4.0.0
## [19] tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 timeDate_4041.110 farver_2.1.2
## [4] S7_0.2.0 fastmap_1.2.0 digest_0.6.37
## [7] rpart_4.1.24 timechange_0.3.0 lifecycle_1.0.4
## [10] survival_3.8-3 magrittr_2.0.3 compiler_4.5.1
## [13] rlang_1.1.6 sass_0.4.10 tools_4.5.1
## [16] yaml_2.3.10 data.table_1.17.8 htmlwidgets_1.6.4
## [19] bit_4.6.0 curl_7.0.0 RColorBrewer_1.1-3
## [22] withr_3.0.2 nnet_7.3-20 grid_4.5.1
## [25] future_1.67.0 globals_0.18.0 scales_1.4.0
## [28] MASS_7.3-65 cli_3.6.5 crayon_1.5.3
## [31] rmarkdown_2.29 generics_0.1.4 rstudioapi_0.17.1
## [34] future.apply_1.20.0 tzdb_0.5.0 cachem_1.1.0
## [37] splines_4.5.1 parallel_4.5.1 vctrs_0.6.5
## [40] hardhat_1.4.2 Matrix_1.7-3 jsonlite_2.0.0
## [43] hms_1.1.3 bit64_4.6.0-1 RobStatTM_1.0.11
## [46] listenv_0.9.1 crosstalk_1.2.2 gower_1.0.2
## [49] jquerylib_0.1.4 recipes_1.3.1 glue_1.8.0
## [52] parallelly_1.45.1 codetools_0.2-20 rsample_1.3.1
## [55] stringi_1.8.7 gtable_0.3.6 quadprog_1.5-8
## [58] pillar_1.11.1 furrr_0.3.1 htmltools_0.5.8.1
## [61] ipred_0.9-15 lava_1.8.1 R6_2.6.1
## [64] vroom_1.6.6 evaluate_1.0.5 lattice_0.22-7
## [67] bslib_0.9.0 class_7.3-23 Rcpp_1.1.0
## [70] prodlim_2025.04.28 xfun_0.53 pkgconfig_2.0.3