library(tidyverse)
## ── Attaching packages ────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ══ Need to Learn tidyquant? ═══════════════════════════════════════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(timetk)
# Q1
stock_day <- read_tsv('tej_2016_2018.txt')
## Parsed with column specification:
## cols(
##   CO_ID = col_double(),
##   CoName = col_character(),
##   `TSE Sector` = col_character(),
##   Date = col_double(),
##   Open = col_double(),
##   High = col_double(),
##   Low = col_double(),
##   Close = col_double(),
##   Volume = col_double(),
##   `Market Cap.` = col_double(),
##   Shares = col_double(),
##   `MV%` = col_character()
## )
stock_day <- stock_day %>% 
  dplyr::rename(id = CO_ID, 
         name = CoName, 
         date = Date, 
         cap_share = `MV%`)
stock_day <- stock_day %>% 
  mutate(date = as.Date(as.character(.$date),'%Y%m%d'))
# Q2
price_day1 <- stock_day %>% 
  select(id, date, Open, High, Low, Close)
price_day1[1:10,]
## # A tibble: 10 x 6
##       id date        Open  High   Low Close
##    <dbl> <date>     <dbl> <dbl> <dbl> <dbl>
##  1  1101 2016-01-04 19.1  19.1  18.3  18.3 
##  2  1102 2016-01-04 23.3  23.3  22.1  22.4 
##  3  1103 2016-01-04  8.41  8.41  8.28  8.28
##  4  1104 2016-01-04 17.4  17.4  17.0  17.0 
##  5  1108 2016-01-04  8.69  8.69  8.61  8.61
##  6  1109 2016-01-04  9.59  9.59  9.5   9.5 
##  7  1110 2016-01-04 15.0  15.0  14.5  14.7 
##  8  1201 2016-01-04 17.1  17.2  16.6  16.8 
##  9  1203 2016-01-04 19.2  19.2  19.2  19.2 
## 10  1210 2016-01-04 15.8  15.9  15.4  15.7
#Q3
price_day <- price_day1 %>% 
  select(id, date, Close) %>% 
  spread(key = id, value = Close)
price_day[1:10, 1:10]
## # A tibble: 10 x 10
##    date       `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
##    <date>      <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 2016-01-04   18.3   22.4   8.28   17.0   8.61   9.5    14.7   16.8   19.2
##  2 2016-01-05   18.5   22.2   8.24   16.9   8.6    9.5    14.7   16.6   19.2
##  3 2016-01-06   18.3   22.1   8.17   16.8   8.57   9.49   15.0   16.6   19.2
##  4 2016-01-07   19.2   22.3   8.16   16.8   8.44   9.5    14.7   16.6   19.0
##  5 2016-01-08   18.9   22.2   8.14   16.7   8.5    9.45   14.3   16.6   19.2
##  6 2016-01-11   18.2   21.9   8.02   16.7   8.45   9.4    13.6   16.5   19.2
##  7 2016-01-12   18.3   21.8   7.97   16.4   8.43   9.4    13.7   16.4   19.1
##  8 2016-01-13   18.8   22.0   7.97   16.6   8.36   9.4    13.6   16.5   19.0
##  9 2016-01-14   18.8   21.8   7.99   16.2   8.23   9.41   13.4   16.6   19.0
## 10 2016-01-15   18.9   21.8   7.99   16.1   8.15   9.42   13.6   16.5   18.9
# Q4
number_of_na <- price_day %>% 
  map_df(~sum(is.na(.))) %>% 
  gather() %>% 
  filter(value!= 0)
glimpse(number_of_na)
## Rows: 53
## Columns: 2
## $ key   <chr> "1587", "1598", "1760", "1776", "2025", "2069", "2429", "2630",…
## $ value <int> 555, 173, 505, 80, 170, 49, 20, 521, 197, 88, 216, 322, 234, 61…
# Q5
price_close <- price_day %>% 
  na.locf(fromLast = TRUE, na.rm = FALSE)

# check stocks with trailing NAs
check_trailing <- price_close %>% 
  map_df(~sum(is.na(.))) %>% 
  gather() %>% 
  filter(value!= 0)

# replace trailing with 0
price_close[is.na(price_close)] <- 0

# check stock price id 2025
tail(price_close$`2025`)
## [1] 0 0 0 0 0 0
# Q6
library(PerformanceAnalytics)
ret_day <- price_close %>% 
  mutate(date = as.Date(as.character(.$date),'%Y-%m-%d')) %>%
  tk_xts(select = -date, date_var = date) %>% 
  Return.calculate(method = 'log')
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ret_day[1:11, 1:11]
##                    1101         1102         1103         1104         1108
## 2016-01-04           NA           NA           NA           NA           NA
## 2016-01-05  0.009772065 -0.009423449 -0.004842624 -0.007092228 -0.001162115
## 2016-01-06 -0.013598251 -0.001805055 -0.008531435 -0.004756252 -0.003494471
## 2016-01-07  0.048633682  0.007649081 -0.001224740  0.000000000 -0.015285424
## 2016-01-08 -0.012598592 -0.005844027 -0.002453989 -0.002386636  0.007083855
## 2016-01-11 -0.038228073 -0.011791520 -0.014851758 -0.002392346 -0.005899722
## 2016-01-12  0.003834571 -0.005948313 -0.006253929 -0.018737327 -0.002369669
## 2016-01-13  0.024839290  0.011861453  0.000000000  0.009714709 -0.008338345
## 2016-01-14  0.000000000 -0.013698844  0.002506267 -0.024466052 -0.015672412
## 2016-01-15  0.005849525  0.000000000  0.000000000 -0.001859313 -0.009768087
## 2016-01-18 -0.011197131 -0.003684942 -0.007537724 -0.004975135  0.003674223
##                    1109         1110         1201         1203         1210
## 2016-01-04           NA           NA           NA           NA           NA
## 2016-01-05  0.000000000 -0.003404838 -0.008968670 -0.004166673 -0.029043668
## 2016-01-06 -0.001053186  0.019588603 -0.003007521  0.000000000 -0.005252803
## 2016-01-07  0.001053186 -0.019588603 -0.002412546 -0.006808091 -0.017264705
## 2016-01-08 -0.005277057 -0.023465535  0.005420067  0.006808091  0.029695353
## 2016-01-11 -0.005305052 -0.047917640 -0.011477021  0.000000000 -0.009800797
## 2016-01-12  0.000000000  0.006571765 -0.006093864 -0.004184107 -0.015216966
## 2016-01-13  0.000000000 -0.013924739  0.009126927 -0.002623985  0.012587116
## 2016-01-14  0.001063264 -0.014121379  0.003023891 -0.002104156 -0.009923998
## 2016-01-15  0.001062135  0.017804625 -0.006056954 -0.004221642  0.007287213
## 2016-01-18 -0.002125399  0.003669729  0.046299070  0.000000000 -0.020000667
##                    1213
## 2016-01-04           NA
## 2016-01-05 -0.019875516
## 2016-01-06  0.010355122
## 2016-01-07 -0.003685961
## 2016-01-08  0.000000000
## 2016-01-11 -0.006669161
## 2016-01-12  0.030027888
## 2016-01-13 -0.002890175
## 2016-01-14 -0.010181906
## 2016-01-15  0.002919710
## 2016-01-18  0.003637690
# Q7
ret_mon <- price_close %>% 
  mutate(date = as.Date(as.character(.$date),'%Y-%m-%d')) %>% 
  tk_xts(select = -date, date.var = `date`)
## Using column `date` for date_var.
ret_mon <- ret_mon %>% 
  to.period(period = 'months',
            indexAt = 'lastof',
            OHLC = FALSE) %>%
  Return.calculate(method = 'log')
ret_mon[1:11, 1:11]
##                   1101         1102         1103         1104         1108
## 2016-01-31          NA           NA           NA           NA           NA
## 2016-02-29  0.09416960  0.050086671  0.029631798  0.016610656  0.025075941
## 2016-03-31  0.04872223  0.063369614  0.012092046  0.067242306  0.039311420
## 2016-04-30  0.04342729 -0.022581605 -0.029270382 -0.079519931 -0.010256500
## 2016-05-31 -0.09072401 -0.145487929 -0.091974166 -0.029459981 -0.004592431
## 2016-06-30  0.06480558  0.115684399 -0.001357773  0.088229888  0.000000000
## 2016-07-31  0.09502693  0.045583749  0.065726674  0.036596422  0.011441772
## 2016-08-31  0.06691328  0.007598517  0.023885485  0.120462082 -0.006849342
## 2016-09-30 -0.01679429 -0.034038943 -0.003733670  0.014331849 -0.008050647
## 2016-10-31  0.05976386  0.005344309 -0.008766493  0.000000000 -0.017472780
## 2016-11-30 -0.02645268  0.017879387 -0.003780723  0.008306913  0.020930997
##                    1109         1110         1201         1203         1210
## 2016-01-31           NA           NA           NA           NA           NA
## 2016-02-29  0.022353246  0.020946956  0.010719426 -0.013252248  0.106818338
## 2016-03-31 -0.007395702  0.046779908  0.078746867 -0.034196005 -0.062712617
## 2016-04-30  0.007395702  0.029572047  0.007749974 -0.002211167  0.051325020
## 2016-05-31  0.004201687  0.000000000  0.017347374  0.011554461  0.164175182
## 2016-06-30 -0.004201687 -0.026167209  0.038695304 -0.002190581  0.132507851
## 2016-07-31  0.034147136  0.019522666 -0.050909196  0.004376375 -0.085058251
## 2016-08-31  0.053478088  0.003327790 -0.040757992  0.024799115  0.021236319
## 2016-09-30 -0.043356544  0.009917437 -0.078177781  0.027830623  0.005714301
## 2016-10-31 -0.024466052 -0.025992800  0.005176888  0.002069323  0.059003287
## 2016-11-30 -0.005173317  0.000000000  0.008569033  0.020461072  0.023884373
##                    1213
## 2016-01-31           NA
## 2016-02-29  0.067139303
## 2016-03-31  0.131136182
## 2016-04-30 -0.053567746
## 2016-05-31  0.197906831
## 2016-06-30 -0.107265113
## 2016-07-31 -0.009286196
## 2016-08-31 -0.069409451
## 2016-09-30 -0.064538521
## 2016-10-31  0.058268908
## 2016-11-30  0.049089610
# Q8
stock_50_cap <- stock_day %>% 
  select(id, name, date, cap_share) %>% 
  filter(date == "2016-12-30") %>% 
  mutate(year1 = year(date)) %>% 
  select(date, year1, cap_share, id, name) %>% 
  arrange(desc(cap_share)) %>% 
  slice(1:50)
stock_50_cap
## # A tibble: 50 x 5
##    date       year1 cap_share    id name                
##    <date>     <dbl> <chr>     <dbl> <chr>               
##  1 2016-12-30  2016 5.365      2317 Hon Hai Precision   
##  2 2016-12-30  2016 3.923      6505 Formosa Petrochem.  
##  3 2016-12-30  2016 2.895      2412 Chunghwa Telecom    
##  4 2016-12-30  2016 2.226      2882 Cathay Holdings     
##  5 2016-12-30  2016 2.088      1301 Formosa Plastics    
##  6 2016-12-30  2016 2.076      1303 Nan Ya Plastics     
##  7 2016-12-30  2016 2.075      1326 Formosa Chem & Fibre
##  8 2016-12-30  2016 17.304     2330 TSMC                
##  9 2016-12-30  2016 1.919      2881 Fubon FHC           
## 10 2016-12-30  2016 1.869      3008 Largan              
## # … with 40 more rows
# Q9
stock_10_cap <- stock_50_cap %>% 
  slice(1:10)
stock_10_cap %>% 
  ggplot(aes(x = reorder(id, cap_share, desc), y = cap_share)) +
  geom_col(fill = 'red') + 
  labs (x = 'id') +
  labs (y = 'Market share %') +
  labs(title = ' The ten largest market capitalization stocks in TWSE as of 2016') +
  geom_text(aes(label = cap_share), vjust = -0.5, size = 3)

# Q10
ret_mon['2017::2018', as.character(stock_50_cap$id[1:5])] %>% 
  tk_tbl(preserve_index = TRUE, rename_index = 'date') %>% 
  gather(asset, returns, -date) %>% 
  group_by(asset) %>% 
  ggplot(aes(x = returns, color = asset)) +
  geom_density(alpha = 1) +
  ggtitle('Monthly Returns Density from 2017-2018') +
  xlab('monthly returns') +
  ylab('distribution')

# Q11
str(stock_day)
## tibble [609,010 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id         : num [1:609010] 1101 1102 1103 1104 1108 ...
##  $ name       : chr [1:609010] "Taiwan Cement" "Asia Cement" "Chia Hsin Cement" "Universal Cement" ...
##  $ TSE Sector : chr [1:609010] "01" "01" "01" "01" ...
##  $ date       : Date[1:609010], format: "2016-01-04" "2016-01-04" ...
##  $ Open       : num [1:609010] 19.14 23.33 8.41 17.41 8.69 ...
##  $ High       : num [1:609010] 19.14 23.33 8.41 17.41 8.69 ...
##  $ Low        : num [1:609010] 18.33 22.09 8.28 16.98 8.61 ...
##  $ Close      : num [1:609010] 18.33 22.39 8.28 16.98 8.61 ...
##  $ Volume     : num [1:609010] 10437 6852 133 231 81 ...
##  $ Market Cap.: num [1:609010] 96550 88237 7282 13602 4047 ...
##  $ Shares     : num [1:609010] 3692175 3361447 774780 628289 404738 ...
##  $ cap_share  : chr [1:609010] "0.406" "0.371" "0.031" "0.057" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   CO_ID = col_double(),
##   ..   CoName = col_character(),
##   ..   `TSE Sector` = col_character(),
##   ..   Date = col_double(),
##   ..   Open = col_double(),
##   ..   High = col_double(),
##   ..   Low = col_double(),
##   ..   Close = col_double(),
##   ..   Volume = col_double(),
##   ..   `Market Cap.` = col_double(),
##   ..   Shares = col_double(),
##   ..   `MV%` = col_character()
##   .. )
stock_id_sector <- stock_day %>% 
  select(id, `TSE Sector`) %>% 
  mutate(`TSE Sector` = as.factor(`TSE Sector`)) %>% 
  unique() %>% 
  mutate(stock_id = id) %>% 
  select(stock_id, `TSE Sector`)

stock_id_sector %>% 
  ggplot(aes(x = `TSE Sector`)) +
  geom_bar() +
  geom_text(stat = 'count', aes(label = ..count..), vjust = -0.5, size = 3)