1 Introduction

This report analyzes TEJ stock price data from 2024 to June 2025. The analysis includes data import, cleaning, transformation, and calculation of stock returns.

2 Setup

2.1 Load Required Packages

# Clean environment
rm(list=ls())

# Load packages
library(tidyverse)
library(tidyquant)
library(timetk)
library(zoo)
library(xts)
library(PerformanceAnalytics)
library(knitr)
library(DT)

3 Data Import and Preparation

3.1 Question 1: Import the Data File

We’ll try different import functions to read the TEJ data file.

# Try read_delim() - auto-detect delimiter (recommended)
data_delim <- read_delim("tej_day_price_2024_20250630.txt")

# Show structure of imported data
glimpse(data_delim)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Summary: The data has been successfully imported with 337347 rows and 12 columns.

3.2 Question 2: Rename Columns

Replace columns 2, 3, 5, 9, and 12 with meaningful names.

data <- data_delim %>%
  rename(
    date = 2,
    id = 3,
    name = 5,
    price = 9,
    cap = 12
  )

# Display first few rows
kable(head(data), caption = "Data with Renamed Columns")
Data with Renamed Columns
CO_ID date id TSE Sector name Open(NTD) High(NTD) Low(NTD) price Volume(1000S) Amount(NTD1000) cap
1101 TCC 20240102 1101 01 TCC 32.5373 32.5373 32.3038 32.3972 14937 518751 262026
1102 ACC 20240102 1102 01 ACC 37.2642 37.4442 36.9492 37.0392 6223 256522 145941
1103 CHC 20240102 1103 01 CHC 17.7825 17.7825 17.5953 17.6421 171 3240 14896
1104 UCC 20240102 1104 01 UCC 26.0628 26.1505 25.9750 26.0628 260 7736 19995
1108 Lucky Cement 20240102 1108 01 Lucky Cement 14.1679 14.1679 14.0343 14.0788 442 6992 6395
1109 HSINGTA 20240102 1109 01 HSINGTA 16.1807 16.2696 16.1362 16.1807 228 4159 6209

3.3 Question 3: Transform to Wide Format

Select relevant columns, convert data types, and reshape from long to wide format.

# First, check the date format
cat("Sample dates before conversion:\n")
## Sample dates before conversion:
print(head(data$date, 3))
## [1] 20240102 20240102 20240102
data_wide <- data %>%
  select(id, date, price) %>%
  mutate(
    id = as.character(id),
    # Handle different date formats
    date = case_when(
      # If date is numeric like 20240101
      !grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
      # If date has slashes
      grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
      # If date has dashes
      grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
      # Default
      TRUE ~ ymd(as.character(date))
    )
  ) %>%
  pivot_wider(
    names_from = id,
    values_from = price
  )

# Show results
kable(head(data_wide[, 1:6]), caption = "Wide Format Data (First 5 Stocks)")
Wide Format Data (First 5 Stocks)
date 1101 1102 1103 1104 1108
2024-01-02 32.3972 37.0392 17.6421 26.0628 14.0788
2024-01-03 31.9304 36.5892 17.5017 25.8873 14.0343
2024-01-04 31.9304 37.0392 17.5017 25.5363 14.0788
2024-01-05 32.0704 36.9942 17.5485 25.7995 14.0788
2024-01-08 31.9771 37.1742 17.5953 25.7118 14.1679
2024-01-09 31.7903 36.9042 17.5485 25.3607 13.8115

Data Dimensions: 358 rows × 947 columns

4 Data Quality Analysis

4.1 Question 4: Identify Stocks with NA Values

na_summary <- data_wide %>%
  summarise(across(-date, ~sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "stock_id", values_to = "na_count") %>%
  filter(na_count > 0) %>%
  arrange(desc(na_count))

if(nrow(na_summary) > 0) {
  datatable(na_summary, 
            caption = "Stocks with Missing Values",
            options = list(pageLength = 10))
} else {
  cat("No missing values found in the dataset.")
}

Total stocks with NA values: 10

4.2 Question 5: Fill Missing Values

Replace NA values with the closest available stock prices using na.locf().

data_filled <- data_wide %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

# Check remaining NAs
remaining_na <- data_filled %>%
  summarise(across(-date, ~sum(is.na(.)))) %>%
  sum()

cat("Remaining NA values after filling:", remaining_na, "\n")
## Remaining NA values after filling: 1321

4.3 Question 6: Remove Stocks with Remaining NA Values

# Identify which columns still have NAs
cols_with_na <- data_wide %>%
  summarise(across(-date, ~any(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "stock_id", values_to = "has_na") %>%
  filter(has_na) %>%
  pull(stock_id)

# Remove those columns
data_clean <- data_wide %>%
  select(-all_of(cols_with_na))

# Show dimensions
cat("Original dimensions:", nrow(data_wide), "rows ×", ncol(data_wide), "columns\n")
## Original dimensions: 358 rows × 947 columns
cat("Cleaned dimensions:", nrow(data_clean), "rows ×", ncol(data_clean), "columns\n")
## Cleaned dimensions: 358 rows × 937 columns
cat("Stocks removed:", length(cols_with_na), "\n")
## Stocks removed: 10

5 Return Calculations

5.1 Question 7: Daily Returns

Convert data to time series format (xts) and calculate daily returns.

# Convert to xts
data_xts <- data_clean %>%
  tk_xts(date_var = date)

# Calculate daily returns (discrete)
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Delete first row and show first 5 stocks, first 5 days
daily_returns_clean <- daily_returns[-1, ]

kable(daily_returns_clean[1:5, 1:5], 
      caption = "Daily Returns: First 5 Stocks, First 5 Days",
      digits = 6)
Daily Returns: First 5 Stocks, First 5 Days
1101 1102 1103 1104 1108
-0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
0.000000000 0.012298711 0.000000000 -0.013558772 0.003170803
0.004384536 -0.001214929 0.002674026 0.010306896 0.000000000
-0.002909225 0.004865628 0.002666895 -0.003399291 0.006328664
-0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

Daily Returns Summary:

# Calculate summary statistics
daily_stats <- data.frame(
  Statistic = c("Mean", "Median", "Std Dev", "Min", "Max"),
  Value = c(
    mean(daily_returns_clean, na.rm = TRUE),
    median(daily_returns_clean, na.rm = TRUE),
    sd(daily_returns_clean, na.rm = TRUE),
    min(daily_returns_clean, na.rm = TRUE),
    max(daily_returns_clean, na.rm = TRUE)
  )
)

kable(daily_stats, digits = 6, caption = "Daily Returns Summary Statistics")
Daily Returns Summary Statistics
Statistic Value
Mean 0.000209
Median 0.000000
Std Dev 0.024961
Min -0.354955
Max 0.466552

5.2 Question 8: Monthly Returns

Calculate monthly returns from the daily price data.

# Convert to monthly prices
monthly_prices <- to.monthly(data_xts, OHLC = FALSE)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")

# Delete first row and show first 5 stocks, first 5 months
monthly_returns_clean <- monthly_returns[-1, ]

kable(monthly_returns_clean[1:5, 1:5], 
      caption = "Monthly Returns: First 5 Stocks, First 5 Months",
      digits = 6)
Monthly Returns: First 5 Stocks, First 5 Months
1101 1102 1103 1104 1108
0.006272034 0.01760804 -0.008404066 0.01887014 0.036185231
0.001554899 0.02101398 -0.016950585 0.06397241 0.003170803
-0.003108301 0.05811288 0.057470064 0.11234002 0.072790295
0.029639309 -0.04920108 0.032611536 -0.03271487 0.000000000
0.036364817 0.05535680 -0.036845213 0.04852830 -0.011805133

Monthly Returns Summary:

# Calculate summary statistics
monthly_stats <- data.frame(
  Statistic = c("Mean", "Median", "Std Dev", "Min", "Max"),
  Value = c(
    mean(monthly_returns_clean, na.rm = TRUE),
    median(monthly_returns_clean, na.rm = TRUE),
    sd(monthly_returns_clean, na.rm = TRUE),
    min(monthly_returns_clean, na.rm = TRUE),
    max(monthly_returns_clean, na.rm = TRUE)
  )
)

kable(monthly_stats, digits = 6, caption = "Monthly Returns Summary Statistics")
Monthly Returns Summary Statistics
Statistic Value
Mean 0.004060
Median -0.005655
Std Dev 0.109047
Min -0.636090
Max 1.868217

6 Market Cap Analysis

6.1 Question 9: Top 20 Firms by Market Capitalization

Find the 20 largest firms by market cap at year-end 2024 and 2025.

# Get the last date of each year in the dataset
year_ends <- data %>%
  mutate(
    date = case_when(
      !grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
      grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
      grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
      TRUE ~ ymd(as.character(date))
    )
  ) %>%
  group_by(year = year(date)) %>%
  summarise(last_date = max(date)) %>%
  filter(year %in% c(2024, 2025))

# Find top 20 firms for each year-end
top_firms <- data %>%
  select(id, date, name, cap) %>%
  mutate(
    date = case_when(
      !grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
      grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
      grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
      TRUE ~ ymd(as.character(date))
    )
  ) %>%
  filter(date %in% year_ends$last_date) %>%
  group_by(date) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup()

# Show combined results
datatable(top_firms, 
          caption = "Top 20 Firms by Market Cap (2024 & 2025)",
          options = list(pageLength = 20))

6.1.1 Top 20 Firms in 2024

top_2024 <- top_firms %>% filter(year(date) == 2024)
if(nrow(top_2024) > 0) {
  datatable(top_2024, 
            caption = "Top 20 Firms - Year End 2024",
            options = list(pageLength = 20))
} else {
  cat("No data available for 2024.")
}

6.1.2 Top 20 Firms in 2025

top_2025 <- top_firms %>% filter(year(date) == 2025)
if(nrow(top_2025) > 0) {
  datatable(top_2025, 
            caption = "Top 20 Firms - June 2025",
            options = list(pageLength = 20))
} else {
  cat("No data available for 2025.")
}

7 Conclusion

This analysis has successfully:

  1. ✓ Imported and cleaned TEJ stock price data
  2. ✓ Transformed data from long to wide format
  3. ✓ Identified and handled missing values
  4. ✓ Calculated daily and monthly returns
  5. ✓ Identified the top 20 firms by market capitalization

Key Findings:

  • Total stocks analyzed: 936
  • Date range: 2024-01-02 to 2025-06-30
  • Stocks removed due to missing data: 10

Report generated on 2025-10-28