1 Introduction

This report analyzes TEJ stock price data from 2024 to June 2025. The analysis includes data import, cleaning, transformation, and calculation of stock returns.

2 Setup

2.1 Load Required Packages

# Clean environment
rm(list=ls())

# Load packages
library(tidyverse)
library(tidyquant)
library(timetk)
library(zoo)
library(xts)
library(PerformanceAnalytics)
library(knitr)
library(DT)

3 Data Import and Preparation

3.1 Question 1: Import the Data File

We’ll try different import functions to read the TEJ data file.

# Try read_delim() - auto-detect delimiter (recommended)
data_delim <- read_delim("tej_day_price_2024_20250630.txt")

# Show structure of imported data
glimpse(data_delim)

## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Summary: The data has been successfully imported with 337347 rows and 12 columns.

3.2 Question 2: Rename Columns

Replace columns 2, 3, 5, 9, and 12 with meaningful names.

data <- data_delim %>%
  rename(
    date = 2,
    id = 3,
    name = 5,
    price = 9,
    cap = 12
  )

# Display first few rows
kable(head(data), caption = "Data with Renamed Columns")

Data with Renamed Columns
CO_ID	date	id	TSE Sector	name	Open(NTD)	High(NTD)	Low(NTD)	price	Volume(1000S)	Amount(NTD1000)	cap
1101 TCC	20240102	1101	01	TCC	32.5373	32.5373	32.3038	32.3972	14937	518751	262026
1102 ACC	20240102	1102	01	ACC	37.2642	37.4442	36.9492	37.0392	6223	256522	145941
1103 CHC	20240102	1103	01	CHC	17.7825	17.7825	17.5953	17.6421	171	3240	14896
1104 UCC	20240102	1104	01	UCC	26.0628	26.1505	25.9750	26.0628	260	7736	19995
1108 Lucky Cement	20240102	1108	01	Lucky Cement	14.1679	14.1679	14.0343	14.0788	442	6992	6395
1109 HSINGTA	20240102	1109	01	HSINGTA	16.1807	16.2696	16.1362	16.1807	228	4159	6209

3.3 Question 3: Transform to Wide Format

Select relevant columns, convert data types, and reshape from long to wide format.

# First, check the date format
cat("Sample dates before conversion:\n")

## Sample dates before conversion:

print(head(data$date, 3))

## [1] 20240102 20240102 20240102

data_wide <- data %>%
  select(id, date, price) %>%
  mutate(
    id = as.character(id),
    # Handle different date formats
    date = case_when(
      # If date is numeric like 20240101
      !grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
      # If date has slashes
      grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
      # If date has dashes
      grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
      # Default
      TRUE ~ ymd(as.character(date))
    )
  ) %>%
  pivot_wider(
    names_from = id,
    values_from = price
  )

# Show results
kable(head(data_wide[, 1:6]), caption = "Wide Format Data (First 5 Stocks)")

Wide Format Data (First 5 Stocks)
date	1101	1102	1103	1104	1108
2024-01-02	32.3972	37.0392	17.6421	26.0628	14.0788
2024-01-03	31.9304	36.5892	17.5017	25.8873	14.0343
2024-01-04	31.9304	37.0392	17.5017	25.5363	14.0788
2024-01-05	32.0704	36.9942	17.5485	25.7995	14.0788
2024-01-08	31.9771	37.1742	17.5953	25.7118	14.1679
2024-01-09	31.7903	36.9042	17.5485	25.3607	13.8115

Data Dimensions: 358 rows × 947 columns

4 Data Quality Analysis

4.1 Question 4: Identify Stocks with NA Values

na_summary <- data_wide %>%
  summarise(across(-date, ~sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "stock_id", values_to = "na_count") %>%
  filter(na_count > 0) %>%
  arrange(desc(na_count))

if(nrow(na_summary) > 0) {
  datatable(na_summary, 
            caption = "Stocks with Missing Values",
            options = list(pageLength = 10))
} else {
  cat("No missing values found in the dataset.")
}

Total stocks with NA values: 10

4.2 Question 5: Fill Missing Values

Replace NA values with the closest available stock prices using na.locf().

data_filled <- data_wide %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

# Check remaining NAs
remaining_na <- data_filled %>%
  summarise(across(-date, ~sum(is.na(.)))) %>%
  sum()

cat("Remaining NA values after filling:", remaining_na, "\n")

## Remaining NA values after filling: 1321

4.3 Question 6: Remove Stocks with Remaining NA Values

# Identify which columns still have NAs
cols_with_na <- data_wide %>%
  summarise(across(-date, ~any(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "stock_id", values_to = "has_na") %>%
  filter(has_na) %>%
  pull(stock_id)

# Remove those columns
data_clean <- data_wide %>%
  select(-all_of(cols_with_na))

# Show dimensions
cat("Original dimensions:", nrow(data_wide), "rows ×", ncol(data_wide), "columns\n")

## Original dimensions: 358 rows × 947 columns

cat("Cleaned dimensions:", nrow(data_clean), "rows ×", ncol(data_clean), "columns\n")

## Cleaned dimensions: 358 rows × 937 columns

cat("Stocks removed:", length(cols_with_na), "\n")

## Stocks removed: 10

5 Return Calculations

5.1 Question 7: Daily Returns

Convert data to time series format (xts) and calculate daily returns.

# Convert to xts
data_xts <- data_clean %>%
  tk_xts(date_var = date)

# Calculate daily returns (discrete)
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Delete first row and show first 5 stocks, first 5 days
daily_returns_clean <- daily_returns[-1, ]

kable(daily_returns_clean[1:5, 1:5], 
      caption = "Daily Returns: First 5 Stocks, First 5 Days",
      digits = 6)

Daily Returns: First 5 Stocks, First 5 Days
1101	1102	1103	1104	1108
-0.014408653	-0.012149290	-0.007958236	-0.006733735	-0.003160781
0.000000000	0.012298711	0.000000000	-0.013558772	0.003170803
0.004384536	-0.001214929	0.002674026	0.010306896	0.000000000
-0.002909225	0.004865628	0.002666895	-0.003399291	0.006328664
-0.005841680	-0.007263102	-0.002659801	-0.013655209	-0.025155457

Daily Returns Summary:

# Calculate summary statistics
daily_stats <- data.frame(
  Statistic = c("Mean", "Median", "Std Dev", "Min", "Max"),
  Value = c(
    mean(daily_returns_clean, na.rm = TRUE),
    median(daily_returns_clean, na.rm = TRUE),
    sd(daily_returns_clean, na.rm = TRUE),
    min(daily_returns_clean, na.rm = TRUE),
    max(daily_returns_clean, na.rm = TRUE)
  )
)

kable(daily_stats, digits = 6, caption = "Daily Returns Summary Statistics")

Daily Returns Summary Statistics
Statistic	Value
Mean	0.000209
Median	0.000000
Std Dev	0.024961
Min	-0.354955
Max	0.466552

5.2 Question 8: Monthly Returns

Calculate monthly returns from the daily price data.

# Convert to monthly prices
monthly_prices <- to.monthly(data_xts, OHLC = FALSE)

# Calculate monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")

# Delete first row and show first 5 stocks, first 5 months
monthly_returns_clean <- monthly_returns[-1, ]

kable(monthly_returns_clean[1:5, 1:5], 
      caption = "Monthly Returns: First 5 Stocks, First 5 Months",
      digits = 6)

Monthly Returns: First 5 Stocks, First 5 Months
1101	1102	1103	1104	1108
0.006272034	0.01760804	-0.008404066	0.01887014	0.036185231
0.001554899	0.02101398	-0.016950585	0.06397241	0.003170803
-0.003108301	0.05811288	0.057470064	0.11234002	0.072790295
0.029639309	-0.04920108	0.032611536	-0.03271487	0.000000000
0.036364817	0.05535680	-0.036845213	0.04852830	-0.011805133

Monthly Returns Summary:

# Calculate summary statistics
monthly_stats <- data.frame(
  Statistic = c("Mean", "Median", "Std Dev", "Min", "Max"),
  Value = c(
    mean(monthly_returns_clean, na.rm = TRUE),
    median(monthly_returns_clean, na.rm = TRUE),
    sd(monthly_returns_clean, na.rm = TRUE),
    min(monthly_returns_clean, na.rm = TRUE),
    max(monthly_returns_clean, na.rm = TRUE)
  )
)

kable(monthly_stats, digits = 6, caption = "Monthly Returns Summary Statistics")

Monthly Returns Summary Statistics
Statistic	Value
Mean	0.004060
Median	-0.005655
Std Dev	0.109047
Min	-0.636090
Max	1.868217

6 Market Cap Analysis

6.1 Question 9: Top 20 Firms by Market Capitalization

Find the 20 largest firms by market cap at year-end 2024 and 2025.

# Get the last date of each year in the dataset
year_ends <- data %>%
  mutate(
    date = case_when(
      !grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
      grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
      grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
      TRUE ~ ymd(as.character(date))
    )
  ) %>%
  group_by(year = year(date)) %>%
  summarise(last_date = max(date)) %>%
  filter(year %in% c(2024, 2025))

# Find top 20 firms for each year-end
top_firms <- data %>%
  select(id, date, name, cap) %>%
  mutate(
    date = case_when(
      !grepl("[^0-9]", as.character(date)) ~ ymd(as.character(date)),
      grepl("/", as.character(date)) ~ as.Date(date, format = "%Y/%m/%d"),
      grepl("-", as.character(date)) ~ as.Date(date, format = "%Y-%m-%d"),
      TRUE ~ ymd(as.character(date))
    )
  ) %>%
  filter(date %in% year_ends$last_date) %>%
  group_by(date) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup()

# Show combined results
datatable(top_firms, 
          caption = "Top 20 Firms by Market Cap (2024 & 2025)",
          options = list(pageLength = 20))

6.1.1 Top 20 Firms in 2024

top_2024 <- top_firms %>% filter(year(date) == 2024)
if(nrow(top_2024) > 0) {
  datatable(top_2024, 
            caption = "Top 20 Firms - Year End 2024",
            options = list(pageLength = 20))
} else {
  cat("No data available for 2024.")
}

6.1.2 Top 20 Firms in 2025

top_2025 <- top_firms %>% filter(year(date) == 2025)
if(nrow(top_2025) > 0) {
  datatable(top_2025, 
            caption = "Top 20 Firms - June 2025",
            options = list(pageLength = 20))
} else {
  cat("No data available for 2025.")
}

7 Conclusion

This analysis has successfully:

✓ Imported and cleaned TEJ stock price data
✓ Transformed data from long to wide format
✓ Identified and handled missing values
✓ Calculated daily and monthly returns
✓ Identified the top 20 firms by market capitalization

Key Findings:

Total stocks analyzed: 936
Date range: 2024-01-02 to 2025-06-30
Stocks removed due to missing data: 10

Report generated on 2025-10-28

TEJ Stock Data Analysis Report

Temuulen Sukhbat

2025/10/28