# Q1. Install library tidyquant, timetk and quantmod. Import data tej_2016_2018.txt.
library(timetk)
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ══ Need to Learn tidyquant? ════════════════════════════════════════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(quantmod)
library(tidyr)
library(ggplot2)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
rm(list = ls())
tej_stock <- read_tsv("tej_2016_2018.txt")
## Rows: 605034 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): CoName, MV%
## dbl (10): CO_ID, Date, Open, High, Low, Close, Volume, Amount, Shares, Marke...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(tej_stock)
## Rows: 605,034
## Columns: 12
## $ CO_ID         <dbl> 1101, 1102, 1103, 1104, 1108, 1109, 1110, 1201, 1203, 12…
## $ CoName        <chr> "Taiwan Cement", "Asia Cement", "Chia Hsin Cement", "Uni…
## $ Date          <dbl> 20160104, 20160104, 20160104, 20160104, 20160104, 201601…
## $ Open          <dbl> 16.1059, 20.3594, 7.4338, 15.6636, 8.0961, 8.4734, 14.78…
## $ High          <dbl> 16.1059, 20.3594, 7.4338, 15.6636, 8.0961, 8.4734, 14.78…
## $ Low           <dbl> 15.4274, 19.2801, 7.3169, 15.2755, 8.0160, 8.3895, 14.35…
## $ Close         <dbl> 15.4274, 19.5406, 7.3247, 15.2755, 8.0160, 8.3895, 14.54…
## $ Volume        <dbl> 10437, 6852, 133, 231, 81, 15, 37, 1417, 9, 1504, 2, 395…
## $ Amount        <dbl> 275489, 179618, 1254, 5076, 813, 156, 563, 24270, 215, 3…
## $ Shares        <dbl> 3692175, 3361447, 774780, 628289, 404738, 378900, 572000…
## $ `Market Cap.` <dbl> 96550, 88237, 7282, 13602, 4047, 3789, 8694, 8729, 5496,…
## $ `MV%`         <chr> "0.406", "0.371", "0.031", "0.057", "0.017", "0.016", "0…
# Q2. Select column CO_ID, DATE, Close. Convert CO_ID into text format, DATE into date format and change data from long format to wide format. (Hint: You can use `spread()`)
tej_stock$CO_ID <- as.character(tej_stock$CO_ID)
tej_stock$Date <- as.character(tej_stock$Date)
tej_stock$Date <- as.Date(tej_stock$Date, "%Y%m%d")
tej_stock %>% 
  select(CO_ID, Date, Close) %>%
  spread(key = CO_ID, value = Close)
## # A tibble: 737 × 850
##    Date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2016-01-04   15.4   19.5   7.32   15.3   8.02   8.39   14.5   15.4   18.0
##  2 2016-01-05   15.6   19.4   7.29   15.2   8.01   8.39   14.5   15.3   18.0
##  3 2016-01-06   15.4   19.3   7.22   15.1   7.98   8.38   14.8   15.2   18.0
##  4 2016-01-07   16.1   19.5   7.22   15.1   7.86   8.39   14.5   15.2   17.8
##  5 2016-01-08   15.9   19.4   7.19   15.1   7.92   8.35   14.2   15.3   18.0
##  6 2016-01-11   15.3   19.1   7.09   15.0   7.87   8.31   13.5   15.1   18.0
##  7 2016-01-12   15.4   19.0   7.04   14.7   7.86   8.31   13.6   15.0   17.9
##  8 2016-01-13   15.8   19.2   7.04   14.9   7.79   8.31   13.4   15.1   17.8
##  9 2016-01-14   15.8   19.0   7.06   14.5   7.66   8.31   13.2   15.2   17.8
## 10 2016-01-15   15.9   19.0   7.07   14.5   7.59   8.32   13.4   15.1   17.7
## # … with 727 more rows, and 840 more variables: 1210 <dbl>, 1213 <dbl>,
## #   1215 <dbl>, 1216 <dbl>, 1217 <dbl>, 1218 <dbl>, 1219 <dbl>, 1220 <dbl>,
## #   1225 <dbl>, 1227 <dbl>, 1229 <dbl>, 1231 <dbl>, 1232 <dbl>, 1233 <dbl>,
## #   1234 <dbl>, 1235 <dbl>, 1236 <dbl>, 1301 <dbl>, 1303 <dbl>, 1304 <dbl>,
## #   1305 <dbl>, 1307 <dbl>, 1308 <dbl>, 1309 <dbl>, 1310 <dbl>, 1312 <dbl>,
## #   1313 <dbl>, 1314 <dbl>, 1315 <dbl>, 1316 <dbl>, 1319 <dbl>, 1321 <dbl>,
## #   1323 <dbl>, 1324 <dbl>, 1325 <dbl>, 1326 <dbl>, 1339 <dbl>, 1402 <dbl>, …
#Q3. Convert data into xts format (Hint: you can use`tk_xts()`). Compute daily returns (you can use log or `Return.calculate()`). Show the first five daily returns. 
ret_day <- tej_stock %>% 
  tk_xts(select = -Date, date_var = Date) %>% 
  Return.calculate(method = "log") 
## Warning: Non-numeric columns being dropped: CO_ID, CoName, MV%
ret_day[is.na(ret_day)] <- 0
head(ret_day)
##                   Open        High         Low       Close     Volume
## 2016-01-04  0.00000000  0.00000000  0.00000000  0.00000000  0.0000000
## 2016-01-04  0.23435706  0.23435706  0.22292833  0.23634920 -0.4208166
## 2016-01-04 -1.00750555 -1.00750555 -0.96888673 -0.98124215 -3.9419467
## 2016-01-04  0.74530238  0.74530238  0.73606350  0.73499804  0.5520686
## 2016-01-04 -0.65995708 -0.65995708 -0.64481069 -0.64481069 -1.0479686
## 2016-01-04  0.04554938  0.04554938  0.04554138  0.04554138 -1.6863990
##                Amount      Shares Market Cap.
## 2016-01-04  0.0000000  0.00000000  0.00000000
## 2016-01-04 -0.4277153 -0.09384418 -0.09003463
## 2016-01-04 -4.9644939 -1.46754770 -2.49462083
## 2016-01-04  1.3981851 -0.20957887  0.62481129
## 2016-01-04 -1.8315477 -0.43976031 -1.21224097
## 2016-01-04 -1.6508751 -0.06596763 -0.06587373
#Q4. Select the first 10 stocks and compute monthly returns. (Hint: you can use `to.monthly()` or `to.period()` to convert daily data into monthly data. Show the first 10 monthly returns. 
ret_mon <- tej_stock %>% 
  tk_xts(select = -Date, date_var = Date) %>%
  to.period(period  = "months",
            indexAt = "lastof",
            OHLC    = FALSE)  %>%
  Return.calculate(method = 'log')
## Warning: Non-numeric columns being dropped: CO_ID, CoName, MV%
ret_mon[is.na(ret_mon)] <- 0
ret_mon[1:10,]
##                    Open         High          Low        Close     Volume
## 2016-01-31  0.000000000  0.000000000  0.000000000  0.000000000  0.0000000
## 2016-02-29  0.080908333  0.105868001  0.099580194  0.094358498  1.8640801
## 2016-03-31  0.000000000 -0.018604401 -0.009523469 -0.004750398 -1.3785723
## 2016-04-30 -0.014319432 -0.023753060 -0.014457450 -0.019230521  0.9132091
## 2016-05-31 -0.034231650 -0.034231650 -0.036581773 -0.029557478 -0.5337194
## 2016-06-30 -0.013019121 -0.009999629 -0.009104348 -0.008031805 -1.2580400
## 2016-07-31  0.009076666  0.019485946  0.016128643  0.010109097  0.4883528
## 2016-08-31  0.038774859  0.026350180  0.032784691  0.030707399  0.6346510
## 2016-09-30  0.001004093  0.000000000 -0.002014410  0.004027639 -0.7552790
## 2016-10-31  0.014887914  0.019798136  0.024941445  0.023809618 -0.4446858
##                Amount Shares  Market Cap.
## 2016-01-31  0.0000000      0  0.000000000
## 2016-02-29  1.9572593      0  0.094237684
## 2016-03-31 -1.3801249      0 -0.004609483
## 2016-04-30  0.8872181      0 -0.019176540
## 2016-05-31 -0.5699491      0 -0.029741969
## 2016-06-30 -1.2569868      0 -0.008119124
## 2016-07-31  0.4653632      0 -0.030347156
## 2016-08-31  0.6588446      0  0.030347156
## 2016-09-30 -0.7412156      0  0.004338402
## 2016-10-31 -0.4130697      0  0.023530497
# Q5. Find the 20 largest market capitalization firms at the end of 2017 and 2018. And compute each firm’s market capitalization share of the total 20 largest firms. (Hint: `filter()`, `arrange()`, `slice()`, `sum()`)。
largest20_2017 <- tej_stock %>% 
  select(CO_ID, CoName, Date, Shares) %>%
  filter(Date == "2017-12-29") %>%
  mutate(year1 = year(Date)) %>%
  select(Date, year1, Shares, CO_ID, CoName) %>%
  arrange(desc(Shares)) %>%
  slice(1:20) %>%
  ungroup()
glimpse(largest20_2017)
## Rows: 20
## Columns: 5
## $ Date   <date> 2017-12-29, 2017-12-29, 2017-12-29, 2017-12-29, 2017-12-29, 20…
## $ year1  <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 201…
## $ Shares <dbl> 25930380, 19496989, 17328738, 15734860, 14976819, 13599823, 126…
## $ CO_ID  <chr> "2330", "2891", "2317", "2002", "2883", "2886", "2303", "2882",…
## $ CoName <chr> "TSMC", "CTBC Holding", "Hon Hai Precision", "China Steel", "Ch…
largest20_2018 <- tej_stock %>% 
  select(CO_ID, CoName, Date, Shares) %>%
  filter(Date == "2018-12-28") %>%
  mutate(year2 = year(Date)) %>%
  select(Date, year2, Shares, CO_ID, CoName) %>%
  arrange(desc(Shares)) %>%
  slice(1:20) %>%
  ungroup()
glimpse(largest20_2018)
## Rows: 20
## Columns: 5
## $ Date   <date> 2018-12-28, 2018-12-28, 2018-12-28, 2018-12-28, 2018-12-28, 20…
## $ year2  <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 201…
## $ Shares <dbl> 25930380, 19496990, 15734861, 14963356, 13862991, 13599824, 125…
## $ CO_ID  <chr> "2330", "2891", "2002", "2883", "2317", "2886", "5880", "2882",…
## $ CoName <chr> "TSMC", "CTBC Holding", "China Steel", "China Deve. FHC", "Hon …
#Q6. Plot the share of each firm’s market cap in Q5 in descending order.
largest20_2017%>% 
  ggplot(aes(x = reorder(CO_ID, Shares, desc), y = Shares)) + 
  geom_col(fill = "black") + 
  labs(x = "CO_ID") + 
  labs(y = 'Market share %')+
  labs(title = 'The share of each firm’s market cap in Q5 in descending order') +
theme(axis.text.x = element_text(face="bold", color="#001000", size=8, angle=90), axis.text.y = element_text(face="bold", color="#001000", size=8, angle=0))

largest20_2018%>% 
  ggplot(aes(x = reorder(CO_ID, Shares, desc), y = Shares)) + 
  geom_col(fill = "grey") + 
  labs(x = "CO_ID") +
  labs(y = 'Market share %')+
  labs(title = 'The share of each firm’s market cap in Q5 in descending order') +
theme(axis.text.x = element_text(face="bold", color="#001000", size=8, angle=90), axis.text.y = element_text(face="bold", color="#001000", size=8, angle=0))

#Q7. Based on the 20 firms selected from Q5, find their daily returns in 2018. (Hint: `filter()`)
ret_day_2018 <- largest20_2018 %>% 
  tk_xts(select = -Date, date_var = Date) %>% 
  Return.calculate(method = "log") 
## Warning: Non-numeric columns being dropped: CO_ID, CoName
ret_day_2018[is.na(ret_day_2018)] <- 0
head(ret_day_2018)
##            year2      Shares
## 2018-12-28     0  0.00000000
## 2018-12-28     0 -0.28515516
## 2018-12-28     0 -0.21438140
## 2018-12-28     0 -0.05027442
## 2018-12-28     0 -0.07638151
## 2018-12-28     0 -0.01916592