###question 1
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.28     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first()  masks xts::first()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::last()   masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG
tej_day_price_csv <- read_csv("downloads/tej_day_price_2024_20250630.txt")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 337347 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): CO_ID   Date    TSE ID  TSE Sector  English Short Name  Open(NTD)   High(NTD)...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tej_day_price_csv)
## # A tibble: 6 × 1
##   CO_ID\tDate\tTSE ID\tTSE Sector\tEnglish Short Name\tOpen(NTD)\tHigh(NTD)\tL…¹
##   <chr>                                                                         
## 1 "1101 TCC\t20240102\t1101\t01\tTCC\t32.5373\t32.5373\t32.3038\t32.3972\t14937…
## 2 "1102 ACC\t20240102\t1102\t01\tACC\t37.2642\t37.4442\t36.9492\t37.0392\t6223\…
## 3 "1103 CHC\t20240102\t1103\t01\tCHC\t17.7825\t17.7825\t17.5953\t17.6421\t171\t…
## 4 "1104 UCC\t20240102\t1104\t01\tUCC\t26.0628\t26.1505\t25.9750\t26.0628\t260\t…
## 5 "1108 Lucky Cement\t20240102\t1108\t01\tLucky Cement\t14.1679\t14.1679\t14.03…
## 6 "1109 HSINGTA\t20240102\t1109\t01\tHSINGTA\t16.1807\t16.2696\t16.1362\t16.180…
## # ℹ abbreviated name:
## #   ¹​`CO_ID\tDate\tTSE ID\tTSE Sector\tEnglish Short Name\tOpen(NTD)\tHigh(NTD)\tLow(NTD)\tClose(NTD)\tVolume(1000S)\tAmount(NTD1000)\tMarket Cap.(NTD MN)`
tej_day_price_tsv <- read_tsv("downloads/tej_day_price_2024_20250630.txt")
## Rows: 337347 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, TSE Sector, English Short Name
## dbl (9): Date, TSE ID, Open(NTD), High(NTD), Low(NTD), Close(NTD), Volume(10...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse (tej_day_price_tsv)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
head(tej_day_price_tsv)
## # A tibble: 6 × 12
##   CO_ID              Date `TSE ID` `TSE Sector` `English Short Name` `Open(NTD)`
##   <chr>             <dbl>    <dbl> <chr>        <chr>                      <dbl>
## 1 1101 TCC         2.02e7     1101 01           TCC                         32.5
## 2 1102 ACC         2.02e7     1102 01           ACC                         37.3
## 3 1103 CHC         2.02e7     1103 01           CHC                         17.8
## 4 1104 UCC         2.02e7     1104 01           UCC                         26.1
## 5 1108 Lucky Ceme… 2.02e7     1108 01           Lucky Cement                14.2
## 6 1109 HSINGTA     2.02e7     1109 01           HSINGTA                     16.2
## # ℹ 6 more variables: `High(NTD)` <dbl>, `Low(NTD)` <dbl>, `Close(NTD)` <dbl>,
## #   `Volume(1000S)` <dbl>, `Amount(NTD1000)` <dbl>, `Market Cap.(NTD MN)` <dbl>
tej_day_price_delim <- read_delim("downloads/tej_day_price_2024_20250630.txt")
## Rows: 337347 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, TSE Sector, English Short Name
## dbl (9): Date, TSE ID, Open(NTD), High(NTD), Low(NTD), Close(NTD), Volume(10...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(tej_day_price_delim)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
head(tej_day_price_delim)
## # A tibble: 6 × 12
##   CO_ID              Date `TSE ID` `TSE Sector` `English Short Name` `Open(NTD)`
##   <chr>             <dbl>    <dbl> <chr>        <chr>                      <dbl>
## 1 1101 TCC         2.02e7     1101 01           TCC                         32.5
## 2 1102 ACC         2.02e7     1102 01           ACC                         37.3
## 3 1103 CHC         2.02e7     1103 01           CHC                         17.8
## 4 1104 UCC         2.02e7     1104 01           UCC                         26.1
## 5 1108 Lucky Ceme… 2.02e7     1108 01           Lucky Cement                14.2
## 6 1109 HSINGTA     2.02e7     1109 01           HSINGTA                     16.2
## # ℹ 6 more variables: `High(NTD)` <dbl>, `Low(NTD)` <dbl>, `Close(NTD)` <dbl>,
## #   `Volume(1000S)` <dbl>, `Amount(NTD1000)` <dbl>, `Market Cap.(NTD MN)` <dbl>
## 2. Replace column 2, 3, 5, 9 and 12 with new column names: “date”, “id”, “name”, “price”, “cap”. Show your results.


new_delim <- tej_day_price_delim %>% 
rename(
  "date"= `Date`,
  id= `TSE ID`,
  name =`English Short Name`,
  price =`Close(NTD)`,
  cap =`Market Cap.(NTD MN)`
)
glimpse(new_delim)
## Rows: 337,347
## Columns: 12
## $ CO_ID             <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", "110…
## $ date              <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20…
## $ id                <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203…
## $ `TSE Sector`      <chr> "01", "01", "01", "01", "01", "01", "01", "02", "02"…
## $ name              <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA…
## $ `Open(NTD)`       <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.1807…
## $ `High(NTD)`       <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.2696…
## $ `Low(NTD)`        <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.1362…
## $ price             <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807…
## $ `Volume(1000S)`   <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 1849, …
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 2409, …
## $ cap               <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 964…
##3.1 Change format
data_sel <- new_delim %>%
  select(id, date, price, ) %>%  
  mutate(
    id = as.character(id),            
    date = ymd(date),     
  )
str(data_sel)
## tibble [337,347 × 3] (S3: tbl_df/tbl/data.frame)
##  $ id   : chr [1:337347] "1101" "1102" "1103" "1104" ...
##  $ date : Date[1:337347], format: "2024-01-02" "2024-01-02" ...
##  $ price: num [1:337347] 32.4 37 17.6 26.1 14.1 ...
##3.2 long to wide
data_spread <- data_sel %>%
  spread(
    key = date,        
    value = price      
  )

view(data_spread)
## Question 4
data_filled <- data_spread
data_filled[,-1] <- t(apply(data_spread[,-1], 1, function(x)
  na.locf(na.locf(x, na.rm = FALSE), fromLast = TRUE)
))


data_by_date <- data_filled %>%
  pivot_longer(-id, names_to = "date", values_to = "price") %>%
  pivot_wider(names_from = id, values_from = price) %>%
  arrange(as.Date(date))


data_by_date
## # A tibble: 358 × 947
##    date    `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203` `1210`
##    <chr>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-0…   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3   54.0
##  2 2024-0…   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7   53.9
##  3 2024-0…   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1   53.4
##  4 2024-0…   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9   52.8
##  5 2024-0…   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6   53.0
##  6 2024-0…   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6   52.3
##  7 2024-0…   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4   52.3
##  8 2024-0…   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1   52.1
##  9 2024-0…   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4   52.3
## 10 2024-0…   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8   53.2
## # ℹ 348 more rows
## # ℹ 936 more variables: `1213` <dbl>, `1215` <dbl>, `1216` <dbl>, `1217` <dbl>,
## #   `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>, `1227` <dbl>,
## #   `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>, `1234` <dbl>,
## #   `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>, `1304` <dbl>,
## #   `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>, `1310` <dbl>,
## #   `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>, `1316` <dbl>, …
## Question 5
setdiff(names(data_by_date), c("date", unique(data_spread$id)))
## character(0)
data_filled <- data_spread
data_filled[,-1] <- t(apply(data_spread[,-1], 1, function(x)
  na.locf(na.locf(x, na.rm = FALSE), fromLast = TRUE)
))

data_filled <- data_filled %>%
  mutate(id = str_trim(id)) %>%              # remove spaces
  filter(id != "" & id != "NA") %>%          # drop blank or literal "NA"
  distinct(id, .keep_all = TRUE)             # remove duplicates

data_by_date <- data_filled %>%
  pivot_longer(-id, names_to = "date", values_to = "price") %>%
  pivot_wider(names_from = id, values_from = price) %>%
  arrange(as.Date(date))
head(data_by_date)
## # A tibble: 6 × 947
##   date     `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203` `1210`
##   <chr>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 2024-01…   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3   54.0
## 2 2024-01…   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7   53.9
## 3 2024-01…   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1   53.4
## 4 2024-01…   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9   52.8
## 5 2024-01…   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6   53.0
## 6 2024-01…   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6   52.3
## # ℹ 936 more variables: `1213` <dbl>, `1215` <dbl>, `1216` <dbl>, `1217` <dbl>,
## #   `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>, `1227` <dbl>,
## #   `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>, `1234` <dbl>,
## #   `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>, `1304` <dbl>,
## #   `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>, `1310` <dbl>,
## #   `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>, `1316` <dbl>,
## #   `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, `1325` <dbl>, …
## Question 6
data_filtered <- data_by_date%>%
  select(where(~ !any(is.na(.))))   # keep only columns with no NAs

dim(data_filtered)
## [1] 358 947
##Question 7

library(timetk)
library(PerformanceAnalytics)
data_by_date <- data_by_date %>%
  mutate(date = ymd(date))
data_xts <- tk_xts(data_by_date, select = -date, date_var = date)
data_ret <- Return.calculate(data_xts, method = "discrete")

data_ret <- data_ret[-1, ]

# Step 5 – Show first 5 stocks × first 5 days of returns
head(data_ret[, 1:5], 5)
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457
##Question 8
monthly_data <- to.monthly(data_xts, indexAt = "lastof", OHLC = FALSE)

monthly_ret <- Return.calculate(monthly_data, method = "discrete")

monthly_ret <- monthly_ret[-1, ]

round(head(monthly_ret[, 1:5], 5), 6)
##                 1101      1102      1103      1104      1108
## 2024-02-29  0.006272  0.017608 -0.008404  0.018870  0.036185
## 2024-03-31  0.001555  0.021014 -0.016951  0.063972  0.003171
## 2024-04-30 -0.003108  0.058113  0.057470  0.112340  0.072790
## 2024-05-31  0.029639 -0.049201  0.032612 -0.032715  0.000000
## 2024-06-30  0.036365  0.055357 -0.036845  0.048528 -0.011805
##Question 9
library(readr)
library(dplyr)
library(lubridate)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
# --- Load and clean data ---
largest_caps <- read_delim("downloads/tej_day_price_2024_20250630.txt", delim = "\t") %>%
  # Convert YYYYMMDD numeric to proper Date
  mutate(Date = ymd(Date)) %>%
  
  # Rename columns to match teacher’s convention
  rename(
    date = Date,
    id   = `CO_ID`,
    name = `English Short Name`,
    cap  = `Market Cap.(NTD MN)`
  ) %>%
  
  # Extract year for grouping
  mutate(year1 = year(date)) %>%
  
  # Automatically pick each year's last available trading day
  group_by(year1) %>%
  filter(date == max(date)) %>%
  ungroup() %>%
  
  # Rank top 20 firms by market cap for each date
  group_by(date, year1) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup() %>%
  
  # Add formatted market cap column ($xx,xxx,xxx)
  mutate(cap1 = dollar(cap, prefix = "$", big.mark = ",", accuracy = 1)) %>%
  
  # Reorder columns to match teacher output
  select(date, year1, cap, cap1, id, name)
## Rows: 337347 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, TSE Sector, English Short Name
## dbl (9): Date, TSE ID, Open(NTD), High(NTD), Low(NTD), Close(NTD), Volume(10...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# ✅ Show final result
largest_caps
## # A tibble: 40 × 6
##    date       year1      cap cap1        id                   name           
##    <date>     <dbl>    <dbl> <chr>       <chr>                <chr>          
##  1 2024-12-31  2024 27877688 $27,877,688 2330 TSMC            TSMC           
##  2 2024-12-31  2024  2556073 $2,556,073  2317 Hon Hai         Hon Hai        
##  3 2024-12-31  2024  2266389 $2,266,389  2454 MediaTek        MediaTek       
##  4 2024-12-31  2024  1234015 $1,234,015  2881 Fubon Financial Fubon Financial
##  5 2024-12-31  2024  1118242 $1,118,242  2308 DELTA           DELTA          
##  6 2024-12-31  2024  1108574 $1,108,574  2382 QCI             QCI            
##  7 2024-12-31  2024  1001907 $1,001,907  2882 CATHAY FHC      CATHAY FHC     
##  8 2024-12-31  2024   958045 $958,045    2412 CHT             CHT            
##  9 2024-12-31  2024   767154 $767,154    2891 CTBC Holding    CTBC Holding   
## 10 2024-12-31  2024   715219 $715,219    3711 ASEH            ASEH           
## # ℹ 30 more rows