This report analyzes TEJ stock price data from 2024 to June 2025. The analysis includes data import, cleaning, transformation, and calculation of stock returns.
We’ll try different import functions to read the TEJ data file.
# Try read_delim() - auto-detect delimiter (recommended)
data_delim <- read_delim("tej_day_price_2024_20250630.txt")
# Show structure of imported data
glimpse(data_delim)## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
Summary: The data has been successfully imported with 337347 rows and 12 columns.
Replace columns 2, 3, 5, 9, and 12 with meaningful names.
data <- data_delim %>%
rename(
date = 2,
id = 3,
name = 5,
price = 9,
cap = 12
)
# Display first few rows
kable(head(data), caption = "Data with Renamed Columns")| CO_ID | date | id | TSE Sector | name | Open(NTD) | High(NTD) | Low(NTD) | price | Volume(1000S) | Amount(NTD1000) | cap |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1101 TCC | 20240102 | 1101 | 01 | TCC | 32.5373 | 32.5373 | 32.3038 | 32.3972 | 14937 | 518751 | 262026 |
| 1102 ACC | 20240102 | 1102 | 01 | ACC | 37.2642 | 37.4442 | 36.9492 | 37.0392 | 6223 | 256522 | 145941 |
| 1103 CHC | 20240102 | 1103 | 01 | CHC | 17.7825 | 17.7825 | 17.5953 | 17.6421 | 171 | 3240 | 14896 |
| 1104 UCC | 20240102 | 1104 | 01 | UCC | 26.0628 | 26.1505 | 25.9750 | 26.0628 | 260 | 7736 | 19995 |
| 1108 Lucky Cement | 20240102 | 1108 | 01 | Lucky Cement | 14.1679 | 14.1679 | 14.0343 | 14.0788 | 442 | 6992 | 6395 |
| 1109 HSINGTA | 20240102 | 1109 | 01 | HSINGTA | 16.1807 | 16.2696 | 16.1362 | 16.1807 | 228 | 4159 | 6209 |
Select relevant columns, convert data types, and reshape from long to wide format.
## Sample dates before conversion:
## [1] 20240102 20240102 20240102
data_wide <- data %>%
select(id, date, price) %>%
mutate(
id = as.character(id),
# Handle different date formats
date = case_when(
# If date is numeric like 20240101
!grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
# If date has slashes
grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
# If date has dashes
grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
# Default
TRUE ~ ymd(as.character(date))
)
) %>%
pivot_wider(
names_from = id,
values_from = price
)
# Show results
kable(head(data_wide[, 1:6]), caption = "Wide Format Data (First 5 Stocks)")| date | 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|---|
| 2024-01-02 | 32.3972 | 37.0392 | 17.6421 | 26.0628 | 14.0788 |
| 2024-01-03 | 31.9304 | 36.5892 | 17.5017 | 25.8873 | 14.0343 |
| 2024-01-04 | 31.9304 | 37.0392 | 17.5017 | 25.5363 | 14.0788 |
| 2024-01-05 | 32.0704 | 36.9942 | 17.5485 | 25.7995 | 14.0788 |
| 2024-01-08 | 31.9771 | 37.1742 | 17.5953 | 25.7118 | 14.1679 |
| 2024-01-09 | 31.7903 | 36.9042 | 17.5485 | 25.3607 | 13.8115 |
Data Dimensions: 358 rows × 947 columns
na_summary <- data_wide %>%
summarise(across(-date, ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "stock_id", values_to = "na_count") %>%
filter(na_count > 0) %>%
arrange(desc(na_count))
if(nrow(na_summary) > 0) {
datatable(na_summary,
caption = "Stocks with Missing Values",
options = list(pageLength = 10))
} else {
cat("No missing values found in the dataset.")
}Total stocks with NA values: 10
Replace NA values with the closest available stock prices using
na.locf().
data_filled <- data_wide %>%
mutate(across(-date, ~na.locf(., na.rm = FALSE)))
# Check remaining NAs
remaining_na <- data_filled %>%
summarise(across(-date, ~sum(is.na(.)))) %>%
sum()
cat("Remaining NA values after filling:", remaining_na, "\n")## Remaining NA values after filling: 1321
# Identify which columns still have NAs
cols_with_na <- data_wide %>%
summarise(across(-date, ~any(is.na(.)))) %>%
pivot_longer(everything(), names_to = "stock_id", values_to = "has_na") %>%
filter(has_na) %>%
pull(stock_id)
# Remove those columns
data_clean <- data_wide %>%
select(-all_of(cols_with_na))
# Show dimensions
cat("Original dimensions:", nrow(data_wide), "rows ×", ncol(data_wide), "columns\n")## Original dimensions: 358 rows × 947 columns
## Cleaned dimensions: 358 rows × 937 columns
## Stocks removed: 10
Convert data to time series format (xts) and calculate daily returns.
# Convert to xts
data_xts <- data_clean %>%
tk_xts(date_var = date)
# Calculate daily returns (discrete)
daily_returns <- Return.calculate(data_xts, method = "discrete")
# Delete first row and show first 5 stocks, first 5 days
daily_returns_clean <- daily_returns[-1, ]
kable(daily_returns_clean[1:5, 1:5],
caption = "Daily Returns: First 5 Stocks, First 5 Days",
digits = 6)| 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|
| -0.014408653 | -0.012149290 | -0.007958236 | -0.006733735 | -0.003160781 |
| 0.000000000 | 0.012298711 | 0.000000000 | -0.013558772 | 0.003170803 |
| 0.004384536 | -0.001214929 | 0.002674026 | 0.010306896 | 0.000000000 |
| -0.002909225 | 0.004865628 | 0.002666895 | -0.003399291 | 0.006328664 |
| -0.005841680 | -0.007263102 | -0.002659801 | -0.013655209 | -0.025155457 |
Daily Returns Summary:
# Calculate summary statistics
daily_stats <- data.frame(
Statistic = c("Mean", "Median", "Std Dev", "Min", "Max"),
Value = c(
mean(daily_returns_clean, na.rm = TRUE),
median(daily_returns_clean, na.rm = TRUE),
sd(daily_returns_clean, na.rm = TRUE),
min(daily_returns_clean, na.rm = TRUE),
max(daily_returns_clean, na.rm = TRUE)
)
)
kable(daily_stats, digits = 6, caption = "Daily Returns Summary Statistics")| Statistic | Value |
|---|---|
| Mean | 0.000209 |
| Median | 0.000000 |
| Std Dev | 0.024961 |
| Min | -0.354955 |
| Max | 0.466552 |
Calculate monthly returns from the daily price data.
# Convert to monthly prices
monthly_prices <- to.monthly(data_xts, OHLC = FALSE)
# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")
# Delete first row and show first 5 stocks, first 5 months
monthly_returns_clean <- monthly_returns[-1, ]
kable(monthly_returns_clean[1:5, 1:5],
caption = "Monthly Returns: First 5 Stocks, First 5 Months",
digits = 6)| 1101 | 1102 | 1103 | 1104 | 1108 |
|---|---|---|---|---|
| 0.006272034 | 0.01760804 | -0.008404066 | 0.01887014 | 0.036185231 |
| 0.001554899 | 0.02101398 | -0.016950585 | 0.06397241 | 0.003170803 |
| -0.003108301 | 0.05811288 | 0.057470064 | 0.11234002 | 0.072790295 |
| 0.029639309 | -0.04920108 | 0.032611536 | -0.03271487 | 0.000000000 |
| 0.036364817 | 0.05535680 | -0.036845213 | 0.04852830 | -0.011805133 |
Monthly Returns Summary:
# Calculate summary statistics
monthly_stats <- data.frame(
Statistic = c("Mean", "Median", "Std Dev", "Min", "Max"),
Value = c(
mean(monthly_returns_clean, na.rm = TRUE),
median(monthly_returns_clean, na.rm = TRUE),
sd(monthly_returns_clean, na.rm = TRUE),
min(monthly_returns_clean, na.rm = TRUE),
max(monthly_returns_clean, na.rm = TRUE)
)
)
kable(monthly_stats, digits = 6, caption = "Monthly Returns Summary Statistics")| Statistic | Value |
|---|---|
| Mean | 0.004060 |
| Median | -0.005655 |
| Std Dev | 0.109047 |
| Min | -0.636090 |
| Max | 1.868217 |
Find the 20 largest firms by market cap at year-end 2024 and 2025.
# Get the last date of each year in the dataset
year_ends <- data %>%
mutate(
date = case_when(
!grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
TRUE ~ ymd(as.character(date))
)
) %>%
group_by(year = year(date)) %>%
summarise(last_date = max(date)) %>%
filter(year %in% c(2024, 2025))
# Find top 20 firms for each year-end
top_firms <- data %>%
select(id, date, name, cap) %>%
mutate(
date = case_when(
!grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
TRUE ~ ymd(as.character(date))
)
) %>%
filter(date %in% year_ends$last_date) %>%
group_by(date) %>%
arrange(desc(cap)) %>%
slice(1:20) %>%
ungroup()
# Show combined results
datatable(top_firms,
caption = "Top 20 Firms by Market Cap (2024 & 2025)",
options = list(pageLength = 20))This analysis has successfully:
Key Findings:
Report generated on 2025-10-28