###question 1
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.28 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks xts::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
tej_day_price_csv <- read_csv("downloads/tej_day_price_2024_20250630.txt")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 337347 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): CO_ID Date TSE ID TSE Sector English Short Name Open(NTD) High(NTD)...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tej_day_price_csv)
## # A tibble: 6 × 1
## CO_ID\tDate\tTSE ID\tTSE Sector\tEnglish Short Name\tOpen(NTD)\tHigh(NTD)\tL…¹
## <chr>
## 1 "1101 TCC\t20240102\t1101\t01\tTCC\t32.5373\t32.5373\t32.3038\t32.3972\t14937…
## 2 "1102 ACC\t20240102\t1102\t01\tACC\t37.2642\t37.4442\t36.9492\t37.0392\t6223\…
## 3 "1103 CHC\t20240102\t1103\t01\tCHC\t17.7825\t17.7825\t17.5953\t17.6421\t171\t…
## 4 "1104 UCC\t20240102\t1104\t01\tUCC\t26.0628\t26.1505\t25.9750\t26.0628\t260\t…
## 5 "1108 Lucky Cement\t20240102\t1108\t01\tLucky Cement\t14.1679\t14.1679\t14.03…
## 6 "1109 HSINGTA\t20240102\t1109\t01\tHSINGTA\t16.1807\t16.2696\t16.1362\t16.180…
## # ℹ abbreviated name:
## # ¹`CO_ID\tDate\tTSE ID\tTSE Sector\tEnglish Short Name\tOpen(NTD)\tHigh(NTD)\tLow(NTD)\tClose(NTD)\tVolume(1000S)\tAmount(NTD1000)\tMarket Cap.(NTD MN)`
tej_day_price_tsv <- read_tsv("downloads/tej_day_price_2024_20250630.txt")
## Rows: 337347 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, TSE Sector, English Short Name
## dbl (9): Date, TSE ID, Open(NTD), High(NTD), Low(NTD), Close(NTD), Volume(10...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse (tej_day_price_tsv)
## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
head(tej_day_price_tsv)
## # A tibble: 6 × 12
## CO_ID Date `TSE ID` `TSE Sector` `English Short Name` `Open(NTD)`
## <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 1101 TCC 2.02e7 1101 01 TCC 32.5
## 2 1102 ACC 2.02e7 1102 01 ACC 37.3
## 3 1103 CHC 2.02e7 1103 01 CHC 17.8
## 4 1104 UCC 2.02e7 1104 01 UCC 26.1
## 5 1108 Lucky Ceme… 2.02e7 1108 01 Lucky Cement 14.2
## 6 1109 HSINGTA 2.02e7 1109 01 HSINGTA 16.2
## # ℹ 6 more variables: `High(NTD)` <dbl>, `Low(NTD)` <dbl>, `Close(NTD)` <dbl>,
## # `Volume(1000S)` <dbl>, `Amount(NTD1000)` <dbl>, `Market Cap.(NTD MN)` <dbl>
tej_day_price_delim <- read_delim("downloads/tej_day_price_2024_20250630.txt")
## Rows: 337347 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, TSE Sector, English Short Name
## dbl (9): Date, TSE ID, Open(NTD), High(NTD), Low(NTD), Close(NTD), Volume(10...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(tej_day_price_delim)
## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID` <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name` <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)` <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…
head(tej_day_price_delim)
## # A tibble: 6 × 12
## CO_ID Date `TSE ID` `TSE Sector` `English Short Name` `Open(NTD)`
## <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 1101 TCC 2.02e7 1101 01 TCC 32.5
## 2 1102 ACC 2.02e7 1102 01 ACC 37.3
## 3 1103 CHC 2.02e7 1103 01 CHC 17.8
## 4 1104 UCC 2.02e7 1104 01 UCC 26.1
## 5 1108 Lucky Ceme… 2.02e7 1108 01 Lucky Cement 14.2
## 6 1109 HSINGTA 2.02e7 1109 01 HSINGTA 16.2
## # ℹ 6 more variables: `High(NTD)` <dbl>, `Low(NTD)` <dbl>, `Close(NTD)` <dbl>,
## # `Volume(1000S)` <dbl>, `Amount(NTD1000)` <dbl>, `Market Cap.(NTD MN)` <dbl>
## 2. Replace column 2, 3, 5, 9 and 12 with new column names: “date”, “id”, “name”, “price”, “cap”. Show your results.
new_delim <- tej_day_price_delim %>%
rename(
"date"= `Date`,
id= `TSE ID`,
name =`English Short Name`,
price =`Close(NTD)`,
cap =`Market Cap.(NTD MN)`
)
glimpse(new_delim)
## Rows: 337,347
## Columns: 12
## $ CO_ID <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", "110…
## $ date <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20…
## $ id <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203…
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", "02"…
## $ name <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA…
## $ `Open(NTD)` <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.1807…
## $ `High(NTD)` <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.2696…
## $ `Low(NTD)` <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.1362…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807…
## $ `Volume(1000S)` <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 1849, …
## $ `Amount(NTD1000)` <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 2409, …
## $ cap <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 964…
##3.1 Change format
data_sel <- new_delim %>%
select(id, date, price, ) %>%
mutate(
id = as.character(id),
date = ymd(date),
)
str(data_sel)
## tibble [337,347 × 3] (S3: tbl_df/tbl/data.frame)
## $ id : chr [1:337347] "1101" "1102" "1103" "1104" ...
## $ date : Date[1:337347], format: "2024-01-02" "2024-01-02" ...
## $ price: num [1:337347] 32.4 37 17.6 26.1 14.1 ...
##3.2 long to wide
data_spread <- data_sel %>%
spread(
key = date,
value = price
)
view(data_spread)
## Question 4
data_filled <- data_spread
data_filled[,-1] <- t(apply(data_spread[,-1], 1, function(x)
na.locf(na.locf(x, na.rm = FALSE), fromLast = TRUE)
))
data_by_date <- data_filled %>%
pivot_longer(-id, names_to = "date", values_to = "price") %>%
pivot_wider(names_from = id, values_from = price) %>%
arrange(as.Date(date))
data_by_date
## # A tibble: 358 × 947
## date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203` `1210`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2024-0… 32.4 37.0 17.6 26.1 14.1 16.2 18.3 18.2 55.3 54.0
## 2 2024-0… 31.9 36.6 17.5 25.9 14.0 16.1 18.2 18.2 54.7 53.9
## 3 2024-0… 31.9 37.0 17.5 25.5 14.1 16.1 18.4 18.1 54.1 53.4
## 4 2024-0… 32.1 37.0 17.5 25.8 14.1 16.1 18.3 18.1 54.9 52.8
## 5 2024-0… 32.0 37.2 17.6 25.7 14.2 16.1 18.3 18.1 54.6 53.0
## 6 2024-0… 31.8 36.9 17.5 25.4 13.8 16.0 18.4 18.1 54.6 52.3
## 7 2024-0… 31.5 36.6 17.4 25.4 13.7 16.0 18.5 18.0 53.4 52.3
## 8 2024-0… 31.5 36.7 17.5 25.6 13.8 16.1 18.5 18.0 55.1 52.1
## 9 2024-0… 31.5 36.6 17.5 25.6 13.8 16.0 18.4 18.0 54.4 52.3
## 10 2024-0… 31.4 36.6 17.4 25.4 13.8 16.0 18.3 18.0 53.8 53.2
## # ℹ 348 more rows
## # ℹ 936 more variables: `1213` <dbl>, `1215` <dbl>, `1216` <dbl>, `1217` <dbl>,
## # `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>, `1227` <dbl>,
## # `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>, `1234` <dbl>,
## # `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>, `1304` <dbl>,
## # `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>, `1310` <dbl>,
## # `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>, `1316` <dbl>, …
## Question 5
setdiff(names(data_by_date), c("date", unique(data_spread$id)))
## character(0)
data_filled <- data_spread
data_filled[,-1] <- t(apply(data_spread[,-1], 1, function(x)
na.locf(na.locf(x, na.rm = FALSE), fromLast = TRUE)
))
data_filled <- data_filled %>%
mutate(id = str_trim(id)) %>% # remove spaces
filter(id != "" & id != "NA") %>% # drop blank or literal "NA"
distinct(id, .keep_all = TRUE) # remove duplicates
data_by_date <- data_filled %>%
pivot_longer(-id, names_to = "date", values_to = "price") %>%
pivot_wider(names_from = id, values_from = price) %>%
arrange(as.Date(date))
head(data_by_date)
## # A tibble: 6 × 947
## date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203` `1210`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2024-01… 32.4 37.0 17.6 26.1 14.1 16.2 18.3 18.2 55.3 54.0
## 2 2024-01… 31.9 36.6 17.5 25.9 14.0 16.1 18.2 18.2 54.7 53.9
## 3 2024-01… 31.9 37.0 17.5 25.5 14.1 16.1 18.4 18.1 54.1 53.4
## 4 2024-01… 32.1 37.0 17.5 25.8 14.1 16.1 18.3 18.1 54.9 52.8
## 5 2024-01… 32.0 37.2 17.6 25.7 14.2 16.1 18.3 18.1 54.6 53.0
## 6 2024-01… 31.8 36.9 17.5 25.4 13.8 16.0 18.4 18.1 54.6 52.3
## # ℹ 936 more variables: `1213` <dbl>, `1215` <dbl>, `1216` <dbl>, `1217` <dbl>,
## # `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>, `1227` <dbl>,
## # `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>, `1234` <dbl>,
## # `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>, `1304` <dbl>,
## # `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>, `1310` <dbl>,
## # `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>, `1316` <dbl>,
## # `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, `1325` <dbl>, …
## Question 6
data_filtered <- data_by_date%>%
select(where(~ !any(is.na(.)))) # keep only columns with no NAs
dim(data_filtered)
## [1] 358 947
##Question 7
library(timetk)
library(PerformanceAnalytics)
data_by_date <- data_by_date %>%
mutate(date = ymd(date))
data_xts <- tk_xts(data_by_date, select = -date, date_var = date)
data_ret <- Return.calculate(data_xts, method = "discrete")
data_ret <- data_ret[-1, ]
# Step 5 – Show first 5 stocks × first 5 days of returns
head(data_ret[, 1:5], 5)
## 1101 1102 1103 1104 1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04 0.000000000 0.012298711 0.000000000 -0.013558772 0.003170803
## 2024-01-05 0.004384536 -0.001214929 0.002674026 0.010306896 0.000000000
## 2024-01-08 -0.002909225 0.004865628 0.002666895 -0.003399291 0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457
##Question 8
monthly_data <- to.monthly(data_xts, indexAt = "lastof", OHLC = FALSE)
monthly_ret <- Return.calculate(monthly_data, method = "discrete")
monthly_ret <- monthly_ret[-1, ]
round(head(monthly_ret[, 1:5], 5), 6)
## 1101 1102 1103 1104 1108
## 2024-02-29 0.006272 0.017608 -0.008404 0.018870 0.036185
## 2024-03-31 0.001555 0.021014 -0.016951 0.063972 0.003171
## 2024-04-30 -0.003108 0.058113 0.057470 0.112340 0.072790
## 2024-05-31 0.029639 -0.049201 0.032612 -0.032715 0.000000
## 2024-06-30 0.036365 0.055357 -0.036845 0.048528 -0.011805
##Question 9
library(readr)
library(dplyr)
library(lubridate)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# --- Load and clean data ---
largest_caps <- read_delim("downloads/tej_day_price_2024_20250630.txt", delim = "\t") %>%
# Convert YYYYMMDD numeric to proper Date
mutate(Date = ymd(Date)) %>%
# Rename columns to match teacher’s convention
rename(
date = Date,
id = `CO_ID`,
name = `English Short Name`,
cap = `Market Cap.(NTD MN)`
) %>%
# Extract year for grouping
mutate(year1 = year(date)) %>%
# Automatically pick each year's last available trading day
group_by(year1) %>%
filter(date == max(date)) %>%
ungroup() %>%
# Rank top 20 firms by market cap for each date
group_by(date, year1) %>%
arrange(desc(cap)) %>%
slice(1:20) %>%
ungroup() %>%
# Add formatted market cap column ($xx,xxx,xxx)
mutate(cap1 = dollar(cap, prefix = "$", big.mark = ",", accuracy = 1)) %>%
# Reorder columns to match teacher output
select(date, year1, cap, cap1, id, name)
## Rows: 337347 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, TSE Sector, English Short Name
## dbl (9): Date, TSE ID, Open(NTD), High(NTD), Low(NTD), Close(NTD), Volume(10...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# ✅ Show final result
largest_caps
## # A tibble: 40 × 6
## date year1 cap cap1 id name
## <date> <dbl> <dbl> <chr> <chr> <chr>
## 1 2024-12-31 2024 27877688 $27,877,688 2330 TSMC TSMC
## 2 2024-12-31 2024 2556073 $2,556,073 2317 Hon Hai Hon Hai
## 3 2024-12-31 2024 2266389 $2,266,389 2454 MediaTek MediaTek
## 4 2024-12-31 2024 1234015 $1,234,015 2881 Fubon Financial Fubon Financial
## 5 2024-12-31 2024 1118242 $1,118,242 2308 DELTA DELTA
## 6 2024-12-31 2024 1108574 $1,108,574 2382 QCI QCI
## 7 2024-12-31 2024 1001907 $1,001,907 2882 CATHAY FHC CATHAY FHC
## 8 2024-12-31 2024 958045 $958,045 2412 CHT CHT
## 9 2024-12-31 2024 767154 $767,154 2891 CTBC Holding CTBC Holding
## 10 2024-12-31 2024 715219 $715,219 3711 ASEH ASEH
## # ℹ 30 more rows