Questions:

1. Please load up packages tidyquant, tidyverse and timetk. Import the data file from Tronclass: tej_day_price_2024_20250630.txt. Try using functions read_csv(), read_tsv() and read_delim() to import data. Show me the imported results of by using one of the three functions: glimpse(), head() or str() to show the results. (Note: If you are asked to show the results, you can use head() or glimpse() when your data in your answers is very big and long.)

rm(list=ls())
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.28     ✔ xts                  0.14.1── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG
# Import data using read_tsv()
data <- read_tsv("C:/Users/ASUS/Downloads/tej_day_price_2024_20250630.txt")
## Rows: 337347 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CO_ID, TSE Sector, English Short Name
## dbl (9): Date, TSE ID, Open(NTD), High(NTD), Low(NTD), Close(NTD), Volume(10...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Show results using glimpse()
glimpse(data)
## Rows: 337,347
## Columns: 12
## $ CO_ID                 <chr> "1101 TCC", "1102 ACC", "1103 CHC", "1104 UCC", …
## $ Date                  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102…
## $ `TSE ID`              <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, …
## $ `TSE Sector`          <chr> "01", "01", "01", "01", "01", "01", "01", "02", …
## $ `English Short Name`  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSI…
## $ `Open(NTD)`           <dbl> 32.5373, 37.2642, 17.7825, 26.0628, 14.1679, 16.…
## $ `High(NTD)`           <dbl> 32.5373, 37.4442, 17.7825, 26.1505, 14.1679, 16.…
## $ `Low(NTD)`            <dbl> 32.3038, 36.9492, 17.5953, 25.9750, 14.0343, 16.…
## $ `Close(NTD)`          <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.…
## $ `Volume(1000S)`       <dbl> 14937, 6223, 171, 260, 442, 228, 57, 126, 48, 18…
## $ `Amount(NTD1000)`     <dbl> 518751, 256522, 3240, 7736, 6992, 4159, 1075, 24…
## $ `Market Cap.(NTD MN)` <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754,…

2. Replace column 2, 3, 5, 9 and 12 with new column names: “date”, “id”, “name”, “price”, “cap”. Show your results.

data <- data %>%
  rename(
    date = 2,      # Date column
    id = 3,        # TSE ID column
    name = 5,      # English Short Name column
    price = 9,     # Close(NTD) column
    cap = 12       # Market Cap.(NTD MN) column
  ) %>%
  select(id, name, date, price, cap)

glimpse(data)
## Rows: 337,347
## Columns: 5
## $ id    <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 1210, 1213…
## $ name  <chr> "TCC", "ACC", "CHC", "UCC", "Lucky Cement", "HSINGTA", "Tuna Cem…
## $ date  <dbl> 20240102, 20240102, 20240102, 20240102, 20240102, 20240102, 2024…
## $ price <dbl> 32.3972, 37.0392, 17.6421, 26.0628, 14.0788, 16.1807, 18.3336, 1…
## $ cap   <dbl> 262026, 145941, 14896, 19995, 6395, 6209, 10754, 9640, 13992, 52…

3. Select column id, date, price, and change id format to text, date format to date. Also change the data format from long to wide and show your results (Hint: you can use dcast() or spread() function).

data_wide <- data %>%
  select(id, date, price) %>%
  mutate(
    id = as.character(id),
    date = ymd(as.character(date))
  ) %>%
  pivot_wider(names_from = id, values_from = price)

head(data_wide, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

4. Show the stock ids with NA values and compute the number of NA for each stock.

na_summary <- data_wide %>%
  summarise(across(everything(), ~sum(is.na(.)))) %>%
  pivot_longer(cols = everything(), names_to = "key", values_to = "value") %>%
  filter(value > 0) %>%
  arrange(value)

na_summary
## # A tibble: 10 × 2
##    key   value
##    <chr> <int>
##  1 7722     18
##  2 7732     43
##  3 7736     53
##  4 7750    108
##  5 7765    151
##  6 3716    160
##  7 4585    177
##  8 7780    196
##  9 7788    198
## 10 7799    217

5. Replace NA values with the closest available stock prices (Hint: you can use na.locf()).

library(zoo)

data_wide <- data_wide %>%
  mutate(across(-date, ~na.locf(., na.rm = FALSE)))

head(data_wide, 10)
## # A tibble: 10 × 947
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2024-01-02   32.4   37.0   17.6   26.1   14.1   16.2   18.3   18.2   55.3
##  2 2024-01-03   31.9   36.6   17.5   25.9   14.0   16.1   18.2   18.2   54.7
##  3 2024-01-04   31.9   37.0   17.5   25.5   14.1   16.1   18.4   18.1   54.1
##  4 2024-01-05   32.1   37.0   17.5   25.8   14.1   16.1   18.3   18.1   54.9
##  5 2024-01-08   32.0   37.2   17.6   25.7   14.2   16.1   18.3   18.1   54.6
##  6 2024-01-09   31.8   36.9   17.5   25.4   13.8   16.0   18.4   18.1   54.6
##  7 2024-01-10   31.5   36.6   17.4   25.4   13.7   16.0   18.5   18.0   53.4
##  8 2024-01-11   31.5   36.7   17.5   25.6   13.8   16.1   18.5   18.0   55.1
##  9 2024-01-12   31.5   36.6   17.5   25.6   13.8   16.0   18.4   18.0   54.4
## 10 2024-01-15   31.4   36.6   17.4   25.4   13.8   16.0   18.3   18.0   53.8
## # ℹ 937 more variables: `1210` <dbl>, `1213` <dbl>, `1215` <dbl>, `1216` <dbl>,
## #   `1217` <dbl>, `1218` <dbl>, `1219` <dbl>, `1220` <dbl>, `1225` <dbl>,
## #   `1227` <dbl>, `1229` <dbl>, `1231` <dbl>, `1232` <dbl>, `1233` <dbl>,
## #   `1234` <dbl>, `1235` <dbl>, `1236` <dbl>, `1301` <dbl>, `1303` <dbl>,
## #   `1304` <dbl>, `1305` <dbl>, `1307` <dbl>, `1308` <dbl>, `1309` <dbl>,
## #   `1310` <dbl>, `1312` <dbl>, `1313` <dbl>, `1314` <dbl>, `1315` <dbl>,
## #   `1316` <dbl>, `1319` <dbl>, `1321` <dbl>, `1323` <dbl>, `1324` <dbl>, …

6. Delete the stock which contains prices of NA in your data in question 4. Show the updated number of rows and columns in your filtered data.

# Get stocks with NA values
stocks_with_na <- na_summary$key

# Remove those stocks from data
data_wide <- data_wide %>%
  select(-all_of(stocks_with_na))

# Show dimensions
dim(data_wide)
## [1] 358 937

7. Convert data in Question 6 into time series data (xts) (Hint: you can use tk_xts()). And calculate daily rate of returns (Hint: use Return.calculate() and compute discrete returns). Delete the first row and show the first five stocks with first five days of returns.

# Convert to xts
data_xts <- data_wide %>%
  tk_xts(date_var = date)

# Calculate daily returns
daily_returns <- Return.calculate(data_xts, method = "discrete")

# Delete first row and show first 5 stocks with first 5 days
daily_returns[-1, 1:5][1:5, ]
##                    1101         1102         1103         1104         1108
## 2024-01-03 -0.014408653 -0.012149290 -0.007958236 -0.006733735 -0.003160781
## 2024-01-04  0.000000000  0.012298711  0.000000000 -0.013558772  0.003170803
## 2024-01-05  0.004384536 -0.001214929  0.002674026  0.010306896  0.000000000
## 2024-01-08 -0.002909225  0.004865628  0.002666895 -0.003399291  0.006328664
## 2024-01-09 -0.005841680 -0.007263102 -0.002659801 -0.013655209 -0.025155457

8. Compute monthly returns. Delete the first row and show the first five stocks with first five months of returns (Hint: you can use to.period() or to.monthly()).

# Convert to monthly returns
monthly_returns <- daily_returns %>%
  to.monthly(indexAt = "lastof", OHLC = FALSE)

# Delete first row and show first 5 stocks with first 5 months
monthly_returns[-1, 1:5][1:5, ]
##                    1101         1102         1103        1104         1108
## 2024-02-29  0.007852356  0.008727188 -0.008404066 0.015384975  0.012860762
## 2024-03-31  0.014194342  0.008546035  0.000000000 0.003176291 -0.003157880
## 2024-04-30 -0.009273852  0.002293315 -0.018668262 0.000000000 -0.014530578
## 2024-05-31  0.004564721 -0.010715712  0.061451680 0.021021905  0.008931255
## 2024-06-30 -0.001460420  0.000000000  0.000000000 0.005641527  0.002990431

9. Find the 20 largest cap firms in the year end of 2024 and 2025. Show the results (Hint: you can use select(), filter(), group_by(), arrange(), slice(), ungroup()).

top_20 <- data %>%
  mutate(
    date = ymd(as.character(date)),
    year1 = year(date)
  ) %>%
  filter(date == "2024-12-31" | date == "2025-06-30") %>%
  group_by(year1) %>%
  arrange(desc(cap)) %>%
  slice(1:20) %>%
  ungroup() %>%
  mutate(cap1 = scales::dollar(cap, prefix = "$", big.mark = ",")) %>%
  select(date, year1, cap, cap1, id, name)

top_20
## # A tibble: 40 × 6
##    date       year1      cap cap1           id name           
##    <date>     <dbl>    <dbl> <chr>       <dbl> <chr>          
##  1 2024-12-31  2024 27877688 $27,877,688  2330 TSMC           
##  2 2024-12-31  2024  2556073 $2,556,073   2317 Hon Hai        
##  3 2024-12-31  2024  2266389 $2,266,389   2454 MediaTek       
##  4 2024-12-31  2024  1234015 $1,234,015   2881 Fubon Financial
##  5 2024-12-31  2024  1118242 $1,118,242   2308 DELTA          
##  6 2024-12-31  2024  1108574 $1,108,574   2382 QCI            
##  7 2024-12-31  2024  1001907 $1,001,907   2882 CATHAY FHC     
##  8 2024-12-31  2024   958045 $958,045     2412 CHT            
##  9 2024-12-31  2024   767154 $767,154     2891 CTBC Holding   
## 10 2024-12-31  2024   715219 $715,219     3711 ASEH           
## # ℹ 30 more rows