#Q1. Install library tidyquant, timetk and quantmod. Import data tej_2016_2018.txt.
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(timetk)
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## ══ Need to Learn tidyquant? ════════════════════════════════════════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
rm(list=ls())
stock_day_3_year<-read_tsv("tej_2016_2018.txt")
## Rows: 609010 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): CoName, TSE Sector, MV%
## dbl (9): CO_ID, Date, Open, High, Low, Close, Volume, Market Cap., Shares
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(stock_day_3_year)
## Rows: 609,010
## Columns: 12
## $ CO_ID         <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 12…
## $ CoName        <chr> "Taiwan Cement", "Asia Cement", "Chia Hsin Cement", "Uni…
## $ `TSE Sector`  <chr> "01", "01", "01", "01", "01", "01", "01", "02", "02", "0…
## $ Date          <dbl> 20160104, 20160104, 20160104, 20160104, 20160104, 201601…
## $ Open          <dbl> 19.14, 23.33, 8.41, 17.41, 8.69, 9.59, 14.95, 17.14, 19.…
## $ High          <dbl> 19.14, 23.33, 8.41, 17.41, 8.69, 9.59, 14.95, 17.24, 19.…
## $ Low           <dbl> 18.33, 22.09, 8.28, 16.98, 8.61, 9.50, 14.52, 16.56, 19.…
## $ Close         <dbl> 18.33, 22.39, 8.28, 16.98, 8.61, 9.50, 14.71, 16.80, 19.…
## $ Volume        <dbl> 10437, 6852, 133, 231, 81, 15, 37, 1417, 9, 1504, 2, 395…
## $ `Market Cap.` <dbl> 96550, 88237, 7282, 13602, 4047, 3789, 8694, 8729, 5496,…
## $ Shares        <dbl> 3692175, 3361447, 774780, 628289, 404738, 378900, 572000…
## $ `MV%`         <chr> "0.406", "0.371", "0.031", "0.057", "0.017", "0.016", "0…
#Q2. Select column CO_ID, DATE, Close. Convert CO_ID into text format, DATE into date format and change data from long format to wide format. (Hint: You can use `spread()`)
stock_day_3_year$CO_ID <- as.character(stock_day_3_year$CO_ID)
stock_day_3_year$Date <- as.character(stock_day_3_year$Date)
stock_day_3_year$Date <- as.Date(stock_day_3_year$Date, "%Y%m%d")
stock_day_3_year %>% 
  select(CO_ID, Date, Close) %>%
  spread(key = CO_ID, value = Close)
## # A tibble: 737 × 856
##    Date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2016-01-04   18.3   22.4   8.28   17.0   8.61   9.5    14.7   16.8   19.2
##  2 2016-01-05   18.5   22.2   8.24   16.9   8.6    9.5    14.7   16.6   19.2
##  3 2016-01-06   18.3   22.1   8.17   16.8   8.57   9.49   15.0   16.6   19.2
##  4 2016-01-07   19.2   22.3   8.16   16.8   8.44   9.5    14.7   16.6   19.0
##  5 2016-01-08   18.9   22.2   8.14   16.7   8.5    9.45   14.3   16.6   19.2
##  6 2016-01-11   18.2   21.9   8.02   16.7   8.45   9.4    13.6   16.5   19.2
##  7 2016-01-12   18.3   21.8   7.97   16.4   8.43   9.4    13.7   16.4   19.1
##  8 2016-01-13   18.8   22.0   7.97   16.6   8.36   9.4    13.6   16.5   19.0
##  9 2016-01-14   18.8   21.8   7.99   16.2   8.23   9.41   13.4   16.6   19.0
## 10 2016-01-15   18.9   21.8   7.99   16.1   8.15   9.42   13.6   16.5   18.9
## # … with 727 more rows, and 846 more variables: 1210 <dbl>, 1213 <dbl>,
## #   1215 <dbl>, 1216 <dbl>, 1217 <dbl>, 1218 <dbl>, 1219 <dbl>, 1220 <dbl>,
## #   1225 <dbl>, 1227 <dbl>, 1229 <dbl>, 1231 <dbl>, 1232 <dbl>, 1233 <dbl>,
## #   1234 <dbl>, 1235 <dbl>, 1236 <dbl>, 1301 <dbl>, 1303 <dbl>, 1304 <dbl>,
## #   1305 <dbl>, 1307 <dbl>, 1308 <dbl>, 1309 <dbl>, 1310 <dbl>, 1312 <dbl>,
## #   1313 <dbl>, 1314 <dbl>, 1315 <dbl>, 1316 <dbl>, 1319 <dbl>, 1321 <dbl>,
## #   1323 <dbl>, 1324 <dbl>, 1325 <dbl>, 1326 <dbl>, 1339 <dbl>, 1402 <dbl>, …
#Q3. Convert data into xts format (Hint: you can use`tk_xts()`). Compute daily returns (you can use log or `Return.calculate()`). Show the first five daily returns. 
ret_day <- stock_day_3_year %>% 
  tk_xts(select = -Date, date_var = Date) %>% 
  Return.calculate(method = "log") 
## Warning: Non-numeric columns being dropped: CO_ID, CoName, TSE Sector, MV%
ret_day[is.na(ret_day)] <- 0
head(ret_day)
##                   Open        High         Low       Close     Volume
## 2016-01-04  0.00000000  0.00000000  0.00000000  0.00000000  0.0000000
## 2016-01-04  0.19795970  0.19795970  0.18658596  0.20007537 -0.4208166
## 2016-01-04 -1.02031861 -1.02031861 -0.98128205 -0.99477146 -3.9419467
## 2016-01-04  0.72762328  0.72762328  0.71819321  0.71819321  0.5520686
## 2016-01-04 -0.69487181 -0.69487181 -0.67911186 -0.67911186 -1.0479686
## 2016-01-04  0.09854795  0.09854795  0.09836748  0.09836748 -1.6863990
##            Market Cap.      Shares
## 2016-01-04  0.00000000  0.00000000
## 2016-01-04 -0.09003463 -0.09384418
## 2016-01-04 -2.49462083 -1.46754770
## 2016-01-04  0.62481129 -0.20957887
## 2016-01-04 -1.21224097 -0.43976031
## 2016-01-04 -0.06587373 -0.06596763
#Q4. Select the first 10 stocks and compute monthly returns. (Hint: you can use `to.monthly()` or `to.period()` to convert daily data into monthly data. Show the first 10 monthly returns. 
ret_mon <- stock_day_3_year %>% 
  tk_xts(select = -Date, date_var = Date) %>%
  to.period(period  = "months",
            indexAt = "lastof",
            OHLC    = FALSE)  %>%
  Return.calculate(method = 'log')
## Warning: Non-numeric columns being dropped: CO_ID, CoName, TSE Sector, MV%
ret_mon[is.na(ret_mon)] <- 0
ret_mon[1:10,]
##                    Open         High          Low        Close     Volume
## 2016-01-31  0.000000000  0.000000000  0.000000000  0.000000000  0.0000000
## 2016-02-29  0.081081669  0.105360516  0.099263988  0.094687321  1.8640801
## 2016-03-31  0.000000000 -0.018349139 -0.009389740 -0.005205634 -1.3785723
## 2016-04-30 -0.014644613 -0.023947006 -0.014783796 -0.018967903  0.9132091
## 2016-05-31 -0.034301326 -0.034301326 -0.036840569 -0.029143995 -0.5337194
## 2016-06-30 -0.012068166 -0.009863094 -0.008869238 -0.007696575 -1.2580400
## 2016-07-31  0.008791265  0.019629856  0.016565812  0.009884759  0.4883528
## 2016-08-31  0.038631413  0.026639828  0.032330402  0.030142217  0.6346510
## 2016-09-30  0.001052078  0.000000000 -0.002123143  0.004232811 -0.7552790
## 2016-10-31  0.014614039  0.019782009  0.025184962  0.023996978 -0.4446858
##             Market Cap. Shares
## 2016-01-31  0.000000000      0
## 2016-02-29  0.094237684      0
## 2016-03-31 -0.004609483      0
## 2016-04-30 -0.019176540      0
## 2016-05-31 -0.029741969      0
## 2016-06-30 -0.008119124      0
## 2016-07-31 -0.030347156      0
## 2016-08-31  0.030347156      0
## 2016-09-30  0.004338402      0
## 2016-10-31  0.023530497      0
#Q5. Find the 20 largest market capitalization firms at the end of 2017 and 2018. And compute each firm’s market capitalization share of the total 20 largest firms. (Hint: `filter()`, `arrange()`, `slice()`, `sum()`)。
largest20_2017 <- stock_day_3_year %>% 
  select(CO_ID, CoName, Date, Shares) %>%
  filter(Date == "2017-12-29") %>%
  mutate(year1 = year(Date)) %>%
  select(Date, year1, Shares, CO_ID, CoName) %>%
  arrange(desc(Shares)) %>%
  slice(1:20) %>%
  ungroup()
glimpse(largest20_2017)
## Rows: 20
## Columns: 5
## $ Date   <date> 2017-12-29, 2017-12-29, 2017-12-29, 2017-12-29, 2017-12-29, 20…
## $ year1  <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 201…
## $ Shares <dbl> 25930380, 19496989, 17328738, 15734860, 14976819, 13599823, 126…
## $ CO_ID  <chr> "2330", "2891", "2317", "2002", "2883", "2886", "2303", "2882",…
## $ CoName <chr> "TSMC", "CTBC Holding", "Hon Hai Precision", "China Steel", "Ch…
largest20_2018 <- stock_day_3_year %>% 
  select(CO_ID, CoName, Date, Shares) %>%
  filter(Date == "2018-12-28") %>%
  mutate(year2 = year(Date)) %>%
  select(Date, year2, Shares, CO_ID, CoName) %>%
  arrange(desc(Shares)) %>%
  slice(1:20) %>%
  ungroup()
glimpse(largest20_2018)
## Rows: 20
## Columns: 5
## $ Date   <date> 2018-12-28, 2018-12-28, 2018-12-28, 2018-12-28, 2018-12-28, 20…
## $ year2  <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 201…
## $ Shares <dbl> 25930380, 19496989, 15734860, 14963355, 13862990, 13599823, 125…
## $ CO_ID  <chr> "2330", "2891", "2002", "2883", "2317", "2886", "5880", "2882",…
## $ CoName <chr> "TSMC", "CTBC Holding", "China Steel", "China Deve. FHC", "Hon …
#Q6. Plot the share of each firm’s market cap in Q5 in descending order.
largest20_2017%>% 
  ggplot(aes(x = reorder(CO_ID, Shares, desc), y = Shares)) + 
  geom_col(fill = "red") + 
  labs(x = "CO_ID") + 
  labs(y = 'Market share %')+ 
  labs(title = 'The share of each firm’s market cap in Q5 in descending order')

largest20_2018%>% 
  ggplot(aes(x = reorder(CO_ID, Shares, desc), y = Shares)) + 
  geom_col(fill = "blue") + 
  labs(x = "CO_ID") + 
  labs(y = 'Market share %')+ 
  labs(title = 'The share of each firm’s market cap in Q5 in descending order')

#Q7. Based on the 20 firms selected from Q5, find their daily returns in 2018. (Hint: `filter()`)
ret_day_2018 <- largest20_2018 %>% 
  tk_xts(select = -Date, date_var = Date) %>% 
  Return.calculate(method = "log") 
## Warning: Non-numeric columns being dropped: CO_ID, CoName
ret_day_2018[is.na(ret_day_2018)] <- 0
head(ret_day_2018)
##            year2      Shares
## 2018-12-28     0  0.00000000
## 2018-12-28     0 -0.28515521
## 2018-12-28     0 -0.21438141
## 2018-12-28     0 -0.05027442
## 2018-12-28     0 -0.07638151
## 2018-12-28     0 -0.01916592