title: “Midterm – Erdenesaikhan J” author: “Erdenesaikhan” date: “2025-12-09” output: html_document editor_options: markdown: wrap: 72 —

Introduction

Finanical Database Analysis and Application TEJ Stock Data Analysis

# First we have to load our packages also import the "tej_day_price_2024_20250630"
library(tidyverse), 
library(tidyquant), 
library(timetk),
library(zoo), 
library(xts), 
library(lubridate), 
library(knitr),
library(DT)

\> setwd("C:/Users/pc/Downloads")

\> cat("Current working directory:", getwd(), "\n")

Question 1: Import Data

  • > cat("Data imported successfully\n")
cat("Dimensions:", nrow(data), "rows ×", ncol(data), "columns\n")
#Dimensions: 337347 rows × 12 columns
  • {glimpse(data)}

Rows: 337,347Columns: 12$ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …$ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…$ TSE ID <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …$ TSE Sector <chr> "01", "01", "01", "01", "01", "01", "01", "02", …$ English Short Name <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…$ Open(NTD) <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…$ High(NTD) <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…$ Low(NTD) <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…$ Close(NTD) <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…$ Volume(1000S) <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…$ Amount(NTD1000) <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…$ Market Cap.(NTD MN) <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

Question 2: Replace Column Names

  • colnames(data)[c(2, 3, 5, 9, 12)] <- c("date", "id", "name", "price", "cap")
    
    cat("Column names updated:\n")
    ## Column names updated:

print(colnames(data)) [1] "CO_ID" "date" "id" "TSE Sector"[5] "name" "Open(NTD)" "High(NTD)" "Low(NTD)"[9] "price" "Volume(1000S)" "Amount(NTD1000)" "cap"

Question 3: Convert to Wide Format

  • > data_wide <- data %>%
    +     select(id, date, price) %>%
    +     mutate(
    +         id = as.character(id),
    +         date = as.Date(date)
    +     ) %>%
    +     pivot_wider(names_from = id, values_from = price)
    > 
    > cat("Data converted to wide format:\n")

Data converted to wide format:

cat("Dimensions:", nrow(data_wide), "rows ×", ncol(data_wide), "columns\n")

Dimensions: 358 rows × 947 columns

  • > kable(data_wide[1:5, 1:6], caption = "First 5 rows and 5 stocks")

Table: First 5 rows and 5 stocks

|date        |    1101|    1102|    1103|    1104|    1108|
|:-----------|-------:|-------:|-------:|-------:|-------:| 
|57385-07-08 | 32.3972| 37.0392| 17.6421| 26.0628| 14.0788|
|57385-07-09 | 31.9304| 36.5892| 17.5017| 25.8873| 14.0343|
|57385-07-10 | 31.9304| 37.0392| 17.5017| 25.5363| 14.0788|
|57385-07-11 | 32.0704| 36.9942| 17.5485| 25.7995| 14.0788|
|57385-07-14 | 31.9771| 37.1742| 17.5953| 25.7118| 14.1679|

Question 4: Identify Stocks with NA Values

  • > na_summary <- data_wide %>%
    +     summarise(across(-date, ~sum(is.na(.)))) %>%
    +     pivot_longer(everything(), names_to = "id", values_to = "na_count") %>%
    +     filter(na_count > 0) %>%
    +     arrange(desc(na_count))
    > 
    > cat("Total stocks with NA values:", nrow(na_summary), "\n")
  • > datatable(
    +     na_summary,
    +     caption = "Stocks with Missing Values",
    +     options = list(pageLength = 10)
    + )

Question 5: Fill NA Values

  • > data_filled <- data_wide %>%
    +     mutate(across(-date, ~na.locf(., na.rm = FALSE)))
    > remaining_nas <- data_filled %>%
    +     summarise(across(-date, ~sum(is.na(.)))) %>%
    +     pivot_longer(everything(), names_to = "id", values_to = "na_count") %>%
    +     filter(na_count > 0)
    > 
    > cat("Remaining NA values after forward fill:", nrow(remaining_nas), "\n")

Remaining NA values after forward fill: 10

Question 6: Remove Stocks with NA Values

  • > data_clean <- data_wide %>%
    +     select(-all_of(stocks_with_na))
    > 
    > cat("Original dimensions:", nrow(data_wide), "×", ncol(data_wide), "\n")

Original dimensions: 358 x 947

  • cat("After removing NA stocks:", nrow(data_clean), "×", ncol(data_clean), "\n")

After removing NA stocks: 358 × 937

  • > cat("Stocks removed:", length(stocks_with_na), "\n") 

Stocks removed: 10

Question 7: Daily Returns

  • > data_xts <- data_clean %>%
    +     tk_xts(date_var = date)
    >
    > cat("Converted to xts format\n")

Converted to xts format

  • cat("Dimensions:", nrow(data_xts), "×", ncol(data_xts), "\n")

Dimensions: 358 × 936

  • cat("Date range:", as.character(range(index(data_xts))), "\n")

Date range: 57385-07-08 57414-05-06

  • > daily_returns <- Return.calculate(data_xts, method = "discrete")
    > daily_returns_clean <- daily_returns[-1, ]
    > cat("Daily returns calculated\n")

Daily returns calculated

  • cat("Dimensions:", nrow(daily_returns_clean), "×", ncol(daily_returns_clean), "\n")

Dimensions: 357 × 936

  • > n_display <- min(5, ncol(daily_returns_clean))
    > kable(
    +     daily_returns_clean[1:5, 1:n_display],
    +     caption = "First 5 Days of Daily Returns (First 5 Stocks)",
    +     digits = 6
    + )

Table: First 5 Days of Daily Returns (First 5 Stocks) | 1101| 1102| 1103| 1104| 1108| |------------:|------------:|------------:|------------:|------------:| | -0.014408653| -0.012149290| -0.007958236| -0.006733735| -0.003160781| | 0.000000000| 0.012298711| 0.000000000| -0.013558772| 0.003170803| | 0.004384536| -0.001214929| 0.002674026| 0.010306896| 0.000000000| | -0.002909225| 0.004865628| 0.002666895| -0.003399291| 0.006328664| | -0.005841680| -0.007263102| -0.002659801| -0.013655209| -0.025155457|

Question 8: Monthly Returns

  • > data_monthly <- data_clean %>%
    +     mutate(year_month = floor_date(date, "month")) %>%
    +     group_by(year_month) %>%
    +     summarise(across(where(is.numeric), last, .names = "{.col}")) %>%
    +     ungroup() %>%
    +     tk_xts(date_var = year_month)
    Warning message:
    Non-numeric columns being dropped: year_month 
    > 
    > cat("Monthly data created\n")

Monthly data created

  • cat("Dimensions:", nrow(data_monthly), "×", ncol(data_monthly), "\n")

Dimensions: 34 × 936

  • cat("Date range:", as.character(range(index(data_monthly))), "\n")

Date range: 57385-07-01 57414-05-01

  • monthly_returns <- Return.calculate(data_monthly, method = "discrete")
    monthly_returns_clean <- monthly_returns[-1, ]
    
    cat("Monthly returns calculated\n")

Monthly returns calculated

  • cat("Dimensions:", nrow(monthly_returns_clean), "×", ncol(monthly_returns_clean), "\n")

Dimensions: 33 × 936

  • > n_rows <- min(5, nrow(monthly_returns_clean))
    > n_cols <- min(5, ncol(monthly_returns_clean))
    > 
    > if (n_rows > 0 && n_cols > 0) {
    +     kable(
    +         monthly_returns_clean[1:n_rows, 1:n_cols],
    +         caption = paste("First", n_rows, "Months of Returns (First", n_cols, "Stocks)"),
    +         digits = 6
    +     )
    + }

Table: First 5 Months of Returns (First 5 Stocks)

| 1101| 1102| 1103| 1104| 1108|
|————:|————:|————:|————:|————:|
| -0.024464541| -0.006249288| -0.011081251| 0.006908198| -0.003274780|
| 0.000000000| -0.003775947| 0.000000000| -0.005144624| 0.006578462|
| 0.006272034| 0.021465043| -0.008404066| 0.024138947| 0.029413275|
| -0.007791176| -0.001235955| -0.022600780| 0.045451755| -0.006348731|
| 0.009419464| 0.022277466| 0.005780847| 0.017715452| 0.009580357|

Question 9: Top 20 Largest Cap Firms

  • > top20_2024 <- data %>%
    +     select(id, date, name, cap) %>%
    +     filter(date == as.Date("2024-12-31")) %>%
    +     group_by(id) %>%
    +     slice(1) %>%
    +     ungroup() %>%
    +     arrange(desc(cap)) %>%
    +     slice(1:20
  • datatable(
      top20_2024,
      caption = "Top 20 Largest Market Cap Firms (Dec 31, 2024)",
      options = list(pageLength = 20)
    )

Summary Statistics

  • daily_summary <- data.frame(
      Metric = c("Mean", "Median", "Std Dev", "Min", "Max"),
      Value = c(
        mean(daily_returns_clean, na.rm = TRUE),
        median(daily_returns_clean, na.rm = TRUE),
        sd(daily_returns_clean, na.rm = TRUE),
        min(daily_returns_clean, na.rm = TRUE),
        max(daily_returns_clean, na.rm = TRUE)
      )
    )
    
    kable(daily_summary, 
          caption = "Daily Returns Summary Statistics",
          digits = 6)

Table: Daily Returns Summary Statistics |Metric | Value| |:-------|---------:| |Mean | 0.000209| |Median | 0.000000| |Std Dev | 0.024961| |Min | -0.354955| |Max | 0.466552|

Conclusion


This analysis covered:
Data import and cleaning (358 days × 936 stocks)
Handling missing values
Daily and monthly returns calculation
Market capitalization analysis

Key Findings:

Total stocks analyzed: 936
Date range: 57385-07-08 to 57414-05-06
Daily observations: 358
Monthly observations: 33