#Q1. Install library tidyquant, timetk and quantmod. Import data tej_2016_2018.txt.
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(timetk)
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## ══ Need to Learn tidyquant? ════════════════════════════════════════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
rm(list=ls())
stock_day_3_year<-read_tsv("tej_2016_2018.txt")
## Rows: 609010 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CoName, TSE Sector, MV%
## dbl (9): CO_ID, Date, Open, High, Low, Close, Volume, Market Cap., Shares
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(stock_day_3_year)
## Rows: 609,010
## Columns: 12
## $ CO_ID <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 12…
## $ CoName <chr> "Taiwan Cement", "Asia Cement", "Chia Hsin Cement", "Uni…
## $ `TSE Sector` <chr> "01", "01", "01", "01", "01", "01", "01", "02", "02", "0…
## $ Date <dbl> 20160104, 20160104, 20160104, 20160104, 20160104, 201601…
## $ Open <dbl> 19.14, 23.33, 8.41, 17.41, 8.69, 9.59, 14.95, 17.14, 19.…
## $ High <dbl> 19.14, 23.33, 8.41, 17.41, 8.69, 9.59, 14.95, 17.24, 19.…
## $ Low <dbl> 18.33, 22.09, 8.28, 16.98, 8.61, 9.50, 14.52, 16.56, 19.…
## $ Close <dbl> 18.33, 22.39, 8.28, 16.98, 8.61, 9.50, 14.71, 16.80, 19.…
## $ Volume <dbl> 10437, 6852, 133, 231, 81, 15, 37, 1417, 9, 1504, 2, 395…
## $ `Market Cap.` <dbl> 96550, 88237, 7282, 13602, 4047, 3789, 8694, 8729, 5496,…
## $ Shares <dbl> 3692175, 3361447, 774780, 628289, 404738, 378900, 572000…
## $ `MV%` <chr> "0.406", "0.371", "0.031", "0.057", "0.017", "0.016", "0…
#Q2. Select column CO_ID, DATE, Close. Convert CO_ID into text format, DATE into date format and change data from long format to wide format. (Hint: You can use `spread()`)
stock_day_3_year$CO_ID <- as.character(stock_day_3_year$CO_ID)
stock_day_3_year$Date <- as.character(stock_day_3_year$Date)
stock_day_3_year$Date <- as.Date(stock_day_3_year$Date, "%Y%m%d")
stock_day_3_year %>%
select(CO_ID, Date, Close) %>%
spread(key = CO_ID, value = Close)
## # A tibble: 737 × 856
## Date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-01-04 18.3 22.4 8.28 17.0 8.61 9.5 14.7 16.8 19.2
## 2 2016-01-05 18.5 22.2 8.24 16.9 8.6 9.5 14.7 16.6 19.2
## 3 2016-01-06 18.3 22.1 8.17 16.8 8.57 9.49 15.0 16.6 19.2
## 4 2016-01-07 19.2 22.3 8.16 16.8 8.44 9.5 14.7 16.6 19.0
## 5 2016-01-08 18.9 22.2 8.14 16.7 8.5 9.45 14.3 16.6 19.2
## 6 2016-01-11 18.2 21.9 8.02 16.7 8.45 9.4 13.6 16.5 19.2
## 7 2016-01-12 18.3 21.8 7.97 16.4 8.43 9.4 13.7 16.4 19.1
## 8 2016-01-13 18.8 22.0 7.97 16.6 8.36 9.4 13.6 16.5 19.0
## 9 2016-01-14 18.8 21.8 7.99 16.2 8.23 9.41 13.4 16.6 19.0
## 10 2016-01-15 18.9 21.8 7.99 16.1 8.15 9.42 13.6 16.5 18.9
## # … with 727 more rows, and 846 more variables: 1210 <dbl>, 1213 <dbl>,
## # 1215 <dbl>, 1216 <dbl>, 1217 <dbl>, 1218 <dbl>, 1219 <dbl>, 1220 <dbl>,
## # 1225 <dbl>, 1227 <dbl>, 1229 <dbl>, 1231 <dbl>, 1232 <dbl>, 1233 <dbl>,
## # 1234 <dbl>, 1235 <dbl>, 1236 <dbl>, 1301 <dbl>, 1303 <dbl>, 1304 <dbl>,
## # 1305 <dbl>, 1307 <dbl>, 1308 <dbl>, 1309 <dbl>, 1310 <dbl>, 1312 <dbl>,
## # 1313 <dbl>, 1314 <dbl>, 1315 <dbl>, 1316 <dbl>, 1319 <dbl>, 1321 <dbl>,
## # 1323 <dbl>, 1324 <dbl>, 1325 <dbl>, 1326 <dbl>, 1339 <dbl>, 1402 <dbl>, …
#Q3. Convert data into xts format (Hint: you can use`tk_xts()`). Compute daily returns (you can use log or `Return.calculate()`). Show the first five daily returns.
ret_day <- stock_day_3_year %>%
tk_xts(select = -Date, date_var = Date) %>%
Return.calculate(method = "log")
## Warning: Non-numeric columns being dropped: CO_ID, CoName, TSE Sector, MV%
ret_day[is.na(ret_day)] <- 0
head(ret_day)
## Open High Low Close Volume
## 2016-01-04 0.00000000 0.00000000 0.00000000 0.00000000 0.0000000
## 2016-01-04 0.19795970 0.19795970 0.18658596 0.20007537 -0.4208166
## 2016-01-04 -1.02031861 -1.02031861 -0.98128205 -0.99477146 -3.9419467
## 2016-01-04 0.72762328 0.72762328 0.71819321 0.71819321 0.5520686
## 2016-01-04 -0.69487181 -0.69487181 -0.67911186 -0.67911186 -1.0479686
## 2016-01-04 0.09854795 0.09854795 0.09836748 0.09836748 -1.6863990
## Market Cap. Shares
## 2016-01-04 0.00000000 0.00000000
## 2016-01-04 -0.09003463 -0.09384418
## 2016-01-04 -2.49462083 -1.46754770
## 2016-01-04 0.62481129 -0.20957887
## 2016-01-04 -1.21224097 -0.43976031
## 2016-01-04 -0.06587373 -0.06596763
#Q4. Select the first 10 stocks and compute monthly returns. (Hint: you can use `to.monthly()` or `to.period()` to convert daily data into monthly data. Show the first 10 monthly returns.
ret_mon <- stock_day_3_year %>%
tk_xts(select = -Date, date_var = Date) %>%
to.period(period = "months",
indexAt = "lastof",
OHLC = FALSE) %>%
Return.calculate(method = 'log')
## Warning: Non-numeric columns being dropped: CO_ID, CoName, TSE Sector, MV%
ret_mon[is.na(ret_mon)] <- 0
ret_mon[1:10,]
## Open High Low Close Volume
## 2016-01-31 0.000000000 0.000000000 0.000000000 0.000000000 0.0000000
## 2016-02-29 0.081081669 0.105360516 0.099263988 0.094687321 1.8640801
## 2016-03-31 0.000000000 -0.018349139 -0.009389740 -0.005205634 -1.3785723
## 2016-04-30 -0.014644613 -0.023947006 -0.014783796 -0.018967903 0.9132091
## 2016-05-31 -0.034301326 -0.034301326 -0.036840569 -0.029143995 -0.5337194
## 2016-06-30 -0.012068166 -0.009863094 -0.008869238 -0.007696575 -1.2580400
## 2016-07-31 0.008791265 0.019629856 0.016565812 0.009884759 0.4883528
## 2016-08-31 0.038631413 0.026639828 0.032330402 0.030142217 0.6346510
## 2016-09-30 0.001052078 0.000000000 -0.002123143 0.004232811 -0.7552790
## 2016-10-31 0.014614039 0.019782009 0.025184962 0.023996978 -0.4446858
## Market Cap. Shares
## 2016-01-31 0.000000000 0
## 2016-02-29 0.094237684 0
## 2016-03-31 -0.004609483 0
## 2016-04-30 -0.019176540 0
## 2016-05-31 -0.029741969 0
## 2016-06-30 -0.008119124 0
## 2016-07-31 -0.030347156 0
## 2016-08-31 0.030347156 0
## 2016-09-30 0.004338402 0
## 2016-10-31 0.023530497 0
#Q5. Find the 20 largest market capitalization firms at the end of 2017 and 2018. And compute each firm’s market capitalization share of the total 20 largest firms. (Hint: `filter()`, `arrange()`, `slice()`, `sum()`)。
largest20_2017 <- stock_day_3_year %>%
select(CO_ID, CoName, Date, Shares) %>%
filter(Date == "2017-12-29") %>%
mutate(year1 = year(Date)) %>%
select(Date, year1, Shares, CO_ID, CoName) %>%
arrange(desc(Shares)) %>%
slice(1:20) %>%
ungroup()
glimpse(largest20_2017)
## Rows: 20
## Columns: 5
## $ Date <date> 2017-12-29, 2017-12-29, 2017-12-29, 2017-12-29, 2017-12-29, 20…
## $ year1 <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 201…
## $ Shares <dbl> 25930380, 19496989, 17328738, 15734860, 14976819, 13599823, 126…
## $ CO_ID <chr> "2330", "2891", "2317", "2002", "2883", "2886", "2303", "2882",…
## $ CoName <chr> "TSMC", "CTBC Holding", "Hon Hai Precision", "China Steel", "Ch…
largest20_2018 <- stock_day_3_year %>%
select(CO_ID, CoName, Date, Shares) %>%
filter(Date == "2018-12-28") %>%
mutate(year2 = year(Date)) %>%
select(Date, year2, Shares, CO_ID, CoName) %>%
arrange(desc(Shares)) %>%
slice(1:20) %>%
ungroup()
glimpse(largest20_2018)
## Rows: 20
## Columns: 5
## $ Date <date> 2018-12-28, 2018-12-28, 2018-12-28, 2018-12-28, 2018-12-28, 20…
## $ year2 <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 201…
## $ Shares <dbl> 25930380, 19496989, 15734860, 14963355, 13862990, 13599823, 125…
## $ CO_ID <chr> "2330", "2891", "2002", "2883", "2317", "2886", "5880", "2882",…
## $ CoName <chr> "TSMC", "CTBC Holding", "China Steel", "China Deve. FHC", "Hon …
#Q6. Plot the share of each firm’s market cap in Q5 in descending order.
largest20_2017%>%
ggplot(aes(x = reorder(CO_ID, Shares, desc), y = Shares)) +
geom_col(fill = "red") +
labs(x = "CO_ID") +
labs(y = 'Market share %')+
labs(title = 'The share of each firm’s market cap in Q5 in descending order')

largest20_2018%>%
ggplot(aes(x = reorder(CO_ID, Shares, desc), y = Shares)) +
geom_col(fill = "blue") +
labs(x = "CO_ID") +
labs(y = 'Market share %')+
labs(title = 'The share of each firm’s market cap in Q5 in descending order')

#Q7. Based on the 20 firms selected from Q5, find their daily returns in 2018. (Hint: `filter()`)
ret_day_2018 <- largest20_2018 %>%
tk_xts(select = -Date, date_var = Date) %>%
Return.calculate(method = "log")
## Warning: Non-numeric columns being dropped: CO_ID, CoName
ret_day_2018[is.na(ret_day_2018)] <- 0
head(ret_day_2018)
## year2 Shares
## 2018-12-28 0 0.00000000
## 2018-12-28 0 -0.28515521
## 2018-12-28 0 -0.21438141
## 2018-12-28 0 -0.05027442
## 2018-12-28 0 -0.07638151
## 2018-12-28 0 -0.01916592