library(tidyverse)
library(tidyquant)
library(timetk)
library(zoo)
library(xts)
library(lubridate)
library(knitr)
library(DT)
setwd("C:/Users/Suvderdene/Downloads")
cat("Current working directory:", getwd(), "\n")
## Current working directory: C:/Users/Suvderdene/Downloads
# Import data using read_delim for tab-separated file
data <- read_delim("tej_day_price_2024_20250630.txt", delim = "\t")
cat("Data imported successfully\n")
## Data imported successfully
cat("Dimensions:", nrow(data), "rows ×", ncol(data), "columns\n")
## Dimensions: 337347 rows × 12 columns
glimpse(data)
## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
# Replace columns 2, 3, 5, 9, and 12 with new names
colnames(data)[c(2, 3, 5, 9, 12)] <- c("date", "id", "name", "price", "cap")
cat("Column names updated:\n")
## Column names updated:
print(colnames(data))
## [1] "CO_ID" "date" "id" "TSE Sector"
## [5] "name" "Open(NTD)" "High(NTD)" "Low(NTD)"
## [9] "price" "Volume(1000S)" "Amount(NTD1000)" "cap"
data_wide <- data %>%
select(id, date, price) %>%
mutate(
id = as.character(id),
date = as.Date(date)
) %>%
pivot_wider(names_from = id, values_from = price)
cat("Data converted to wide format:\n")
## Data converted to wide format:
cat("Dimensions:", nrow(data_wide), "rows ×", ncol(data_wide), "columns\n")
## Dimensions: 358 rows × 947 columns
# Display first few rows and columns
kable(data_wide[1:5, 1:6], caption = "First 5 rows and 5 stocks")
| date | 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|---|
| 57385-07-08 | 32.3972 | 37.0392 | 17.6421 | 26.0628 | 14.0788 |
| 57385-07-09 | 31.9304 | 36.5892 | 17.5017 | 25.8873 | 14.0343 |
| 57385-07-10 | 31.9304 | 37.0392 | 17.5017 | 25.5363 | 14.0788 |
| 57385-07-11 | 32.0704 | 36.9942 | 17.5485 | 25.7995 | 14.0788 |
| 57385-07-14 | 31.9771 | 37.1742 | 17.5953 | 25.7118 | 14.1679 |
na_summary <- data_wide %>%
summarise(across(-date, ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "id", values_to = "na_count") %>%
filter(na_count > 0) %>%
arrange(desc(na_count))
cat("Total stocks with NA values:", nrow(na_summary), "\n")
## Total stocks with NA values: 10
# Display NA summary table
datatable(
na_summary,
caption = "Stocks with Missing Values",
options = list(pageLength = 10)
)
data_filled <- data_wide %>%
mutate(across(-date, ~na.locf(., na.rm = FALSE)))
# Check remaining NAs
remaining_nas <- data_filled %>%
summarise(across(-date, ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "id", values_to = "na_count") %>%
filter(na_count > 0)
cat("Remaining NA values after forward fill:", nrow(remaining_nas), "\n")
## Remaining NA values after forward fill: 10
# Get stock IDs that had NA values
stocks_with_na <- na_summary$id
# Remove those columns
data_clean <- data_wide %>%
select(-all_of(stocks_with_na))
cat("Original dimensions:", nrow(data_wide), "×", ncol(data_wide), "\n")
## Original dimensions: 358 × 947
cat("After removing NA stocks:", nrow(data_clean), "×", ncol(data_clean), "\n")
## After removing NA stocks: 358 × 937
cat("Stocks removed:", length(stocks_with_na), "\n")
## Stocks removed: 10
# Convert to xts
data_xts <- data_clean %>%
tk_xts(date_var = date)
cat("Converted to xts format\n")
## Converted to xts format
cat("Dimensions:", nrow(data_xts), "×", ncol(data_xts), "\n")
## Dimensions: 358 × 936
cat("Date range:", as.character(range(index(data_xts))), "\n")
## Date range: 57385-07-08 57414-05-06
# Calculate daily discrete returns
daily_returns <- Return.calculate(data_xts, method = "discrete")
# Remove first row (NA from calculation)
daily_returns_clean <- daily_returns[-1, ]
cat("Daily returns calculated\n")
## Daily returns calculated
cat("Dimensions:", nrow(daily_returns_clean), "×", ncol(daily_returns_clean), "\n")
## Dimensions: 357 × 936
# Display first 5 stocks and 5 days
n_display <- min(5, ncol(daily_returns_clean))
kable(
daily_returns_clean[1:5, 1:n_display],
caption = "First 5 Days of Daily Returns (First 5 Stocks)",
digits = 6
)
| 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|
| -0.014408653 | -0.012149290 | -0.007958236 | -0.006733735 | -0.003160781 |
| 0.000000000 | 0.012298711 | 0.000000000 | -0.013558772 | 0.003170803 |
| 0.004384536 | -0.001214929 | 0.002674026 | 0.010306896 | 0.000000000 |
| -0.002909225 | 0.004865628 | 0.002666895 | -0.003399291 | 0.006328664 |
| -0.005841680 | -0.007263102 | -0.002659801 | -0.013655209 | -0.025155457 |
# Manual monthly aggregation (taking last price of each month)
data_monthly <- data_clean %>%
mutate(year_month = floor_date(date, "month")) %>%
group_by(year_month) %>%
summarise(across(where(is.numeric), last, .names = "{.col}")) %>%
ungroup() %>%
tk_xts(date_var = year_month)
cat("Monthly data created\n")
## Monthly data created
cat("Dimensions:", nrow(data_monthly), "×", ncol(data_monthly), "\n")
## Dimensions: 34 × 936
cat("Date range:", as.character(range(index(data_monthly))), "\n")
## Date range: 57385-07-01 57414-05-01
# Calculate monthly returns
monthly_returns <- Return.calculate(data_monthly, method = "discrete")
monthly_returns_clean <- monthly_returns[-1, ]
cat("Monthly returns calculated\n")
## Monthly returns calculated
cat("Dimensions:", nrow(monthly_returns_clean), "×", ncol(monthly_returns_clean), "\n")
## Dimensions: 33 × 936
# Safe display
n_rows <- min(5, nrow(monthly_returns_clean))
n_cols <- min(5, ncol(monthly_returns_clean))
if (n_rows > 0 && n_cols > 0) {
kable(
monthly_returns_clean[1:n_rows, 1:n_cols],
caption = paste("First", n_rows, "Months of Returns (First", n_cols, "Stocks)"),
digits = 6
)
}
| 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|
| -0.024464541 | -0.006249288 | -0.011081251 | 0.006908198 | -0.003274780 |
| 0.000000000 | -0.003775947 | 0.000000000 | -0.005144624 | 0.006578462 |
| 0.006272034 | 0.021465043 | -0.008404066 | 0.024138947 | 0.029413275 |
| -0.007791176 | -0.001235955 | -0.022600780 | 0.045451755 | -0.006348731 |
| 0.009419464 | 0.022277466 | 0.005780847 | 0.017715452 | 0.009580357 |
top20_2024 <- data %>%
select(id, date, name, cap) %>%
filter(date == as.Date("2024-12-31")) %>%
group_by(id) %>%
slice(1) %>%
ungroup() %>%
arrange(desc(cap)) %>%
slice(1:20)
datatable(
top20_2024,
caption = "Top 20 Largest Market Cap Firms (Dec 31, 2024)",
options = list(pageLength = 20)
)
top20_2025 <- data %>%
select(id, date, name, cap) %>%
filter(date == as.Date("2025-06-30")) %>%
group_by(id) %>%
slice(1) %>%
ungroup() %>%
arrange(desc(cap)) %>%
slice(1:20)
datatable(
top20_2025,
caption = "Top 20 Largest Market Cap Firms (Jun 30, 2025)",
options = list(pageLength = 20)
)
# Daily returns summary
daily_summary <- data.frame(
Metric = c("Mean", "Median", "Std Dev", "Min", "Max"),
Value = c(
mean(daily_returns_clean, na.rm = TRUE),
median(daily_returns_clean, na.rm = TRUE),
sd(daily_returns_clean, na.rm = TRUE),
min(daily_returns_clean, na.rm = TRUE),
max(daily_returns_clean, na.rm = TRUE)
)
)
kable(daily_summary,
caption = "Daily Returns Summary Statistics",
digits = 6)
| Metric | Value |
|---|---|
| Mean | 0.000209 |
| Median | 0.000000 |
| Std Dev | 0.024961 |
| Min | -0.354955 |
| Max | 0.466552 |
This analysis covered:
Key Findings: