title: “Financial Database Analysis and Application – Midterm Solutions” author: “Bat-Erdene” date: “October 27, 2025” output: html_document: toc: true toc_depth: 2 number_sections: true theme: united highlight: tango —————-

1. Data Import

tej_data <- read_delim("tej_day_price_2024_20250630.txt", delim = "\t")
glimpse(tej_data)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

2. Variable Selection and Renaming

tej_data_clean <- tej_data %>%
  select(2, 3, 5, 9, 12) %>%
  rename(
    date  = 1,
    id    = 2,
    name  = 3,
    price = 4,
    cap   = 5
  )

head(tej_data_clean)
## # A tibble: 6 × 5
##       date    id name         price    cap
##      <dbl> <dbl> <chr>        <dbl>  <dbl>
## 1 20240102  1101 TCC           32.4 262026
## 2 20240102  1102 ACC           37.0 145941
## 3 20240102  1103 CHC           17.6  14896
## 4 20240102  1104 UCC           26.1  19995
## 5 20240102  1108 Lucky Cement  14.1   6395
## 6 20240102  1109 HSINGTA       16.2   6209

3. Transform Data from Long to Wide Format

tej_wide <- tej_data_clean %>%
  mutate(
    id   = as.character(id),
    date = as.Date(as.character(date), "%Y%m%d")
  ) %>%
  select(id, date, price) %>%
  spread(key = id, value = price)

head(tej_wide)
## # A tibble: 6 × 947
##   date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##   <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
## 2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
## 3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
## 4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
## 5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
## 6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

4. Missing Value Analysis

na_summary <- tej_wide %>%
  gather(key = "stock_id", value = "price", -date) %>%
  group_by(stock_id) %>%
  summarise(missing_count = sum(is.na(price))) %>%
  filter(missing_count > 0)

na_summary
## # A tibble: 10 × 2
##    stock_id missing_count
##    <chr>            <int>
##  1 3716               160
##  2 4585               177
##  3 7722                18
##  4 7732                43
##  5 7736                53
##  6 7750               108
##  7 7765               151
##  8 7780               196
##  9 7788               198
## 10 7799               217

5. Imputation of Missing Data

tej_filled <- tej_wide %>%
  mutate_if(is.numeric, ~na.locf(., na.rm = FALSE))

head(tej_filled)
## # A tibble: 6 × 947
##   date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##   <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
## 2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
## 3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
## 4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
## 5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
## 6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

6. Removing Incomplete Stocks

tej_filtered <- tej_filled %>%
  select(where(~!any(is.na(.))))

dim(tej_filtered)
## [1] 358 937

7. Conversion to Time Series and Daily Returns

tej_xts <- tej_filtered %>%
  tk_xts(date_var = date)

daily_returns <- Return.calculate(tej_xts, method = "discrete")[-1, ]

head(daily_returns[, 1:5], 5)
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

8. Monthly Return Calculation (Fixed)

# Convert daily prices to end-of-month data
tej_monthly <- to.monthly(tej_xts, indexAt = "lastof", OHLC = FALSE)

# Compute discrete monthly returns
monthly_returns <- Return.calculate(tej_monthly, method = "discrete")[-1, ]

# Display first few rows
head(monthly_returns[, 1:5], 5)
##                    1101        1102         1103        1104         1108
## 2024-02-29  0.006272034  0.01760804 -0.008404066  0.01887014  0.036185231
## 2024-03-31  0.001554899  0.02101398 -0.016950585  0.06397241  0.003170803
## 2024-04-30 -0.003108301  0.05811288  0.057470064  0.11234002  0.072790295
## 2024-05-31  0.029639309 -0.04920108  0.032611536 -0.03271487  0.000000000
## 2024-06-30  0.036364817  0.05535680 -0.036845213  0.04852830 -0.011805133

9. Top 20 Firms by Market Capitalization

top_cap_firms <- tej_data_clean %>%
  mutate(
    date  = as.Date(as.character(date), "%Y%m%d"),
    year  = format(date, "%Y")
  ) %>%
  filter(date %in% as.Date(c("2024-12-31", "2025-12-31"))) %>%
  group_by(year) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup()

top_cap_firms
## # A tibble: 20 × 6
##    date          id name             price      cap year 
##    <date>     <dbl> <chr>            <dbl>    <dbl> <chr>
##  1 2024-12-31  2330 TSMC            1061.  27877688 2024 
##  2 2024-12-31  2317 Hon Hai          178.   2556073 2024 
##  3 2024-12-31  2454 MediaTek        1359.   2266389 2024 
##  4 2024-12-31  2881 Fubon Financial   83.8  1234015 2024 
##  5 2024-12-31  2308 DELTA            423.   1118242 2024 
##  6 2024-12-31  2382 QCI              274.   1108574 2024 
##  7 2024-12-31  2882 CATHAY FHC        64.8  1001907 2024 
##  8 2024-12-31  2412 CHT              119.    958045 2024 
##  9 2024-12-31  2891 CTBC Holding      37.0   767154 2024 
## 10 2024-12-31  3711 ASEH             156.    715219 2024 
## 11 2024-12-31  2886 Mega FHC          37.3   574052 2024 
## 12 2024-12-31  2303 UMC               40.4   540739 2024 
## 13 2024-12-31  2603 EMC              195.    487135 2024 
## 14 2024-12-31  6669 Wiwynn          2544.    486903 2024 
## 15 2024-12-31  1216 Uni-President     78.0   459675 2024 
## 16 2024-12-31  2357 Asustek          586.    457540 2024 
## 17 2024-12-31  2885 Yuanta Group      31.5   440057 2024 
## 18 2024-12-31  2345 Accton           763.    433744 2024 
## 19 2024-12-31  2884 E.S.F.H           25.7   431087 2024 
## 20 2024-12-31  3045 TWM              109.    422590 2024

10. Conclusion

## Analysis completed successfully. Report generated on October 27, 2025 ✅