library(tidyverse)
## ── Attaching packages ────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ══ Need to Learn tidyquant? ═══════════════════════════════════════════════════
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(timetk)
# Q1
stock_day <- read_tsv('tej_2016_2018.txt')
## Parsed with column specification:
## cols(
## CO_ID = col_double(),
## CoName = col_character(),
## `TSE Sector` = col_character(),
## Date = col_double(),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## Volume = col_double(),
## `Market Cap.` = col_double(),
## Shares = col_double(),
## `MV%` = col_character()
## )
stock_day <- stock_day %>%
dplyr::rename(id = CO_ID,
name = CoName,
date = Date,
cap_share = `MV%`)
stock_day <- stock_day %>%
mutate(date = as.Date(as.character(.$date),'%Y%m%d'))
# Q2
price_day1 <- stock_day %>%
select(id, date, Open, High, Low, Close)
price_day1[1:10,]
## # A tibble: 10 x 6
## id date Open High Low Close
## <dbl> <date> <dbl> <dbl> <dbl> <dbl>
## 1 1101 2016-01-04 19.1 19.1 18.3 18.3
## 2 1102 2016-01-04 23.3 23.3 22.1 22.4
## 3 1103 2016-01-04 8.41 8.41 8.28 8.28
## 4 1104 2016-01-04 17.4 17.4 17.0 17.0
## 5 1108 2016-01-04 8.69 8.69 8.61 8.61
## 6 1109 2016-01-04 9.59 9.59 9.5 9.5
## 7 1110 2016-01-04 15.0 15.0 14.5 14.7
## 8 1201 2016-01-04 17.1 17.2 16.6 16.8
## 9 1203 2016-01-04 19.2 19.2 19.2 19.2
## 10 1210 2016-01-04 15.8 15.9 15.4 15.7
#Q3
price_day <- price_day1 %>%
select(id, date, Close) %>%
spread(key = id, value = Close)
price_day[1:10, 1:10]
## # A tibble: 10 x 10
## date `1101` `1102` `1103` `1104` `1108` `1109` `1110` `1201` `1203`
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2016-01-04 18.3 22.4 8.28 17.0 8.61 9.5 14.7 16.8 19.2
## 2 2016-01-05 18.5 22.2 8.24 16.9 8.6 9.5 14.7 16.6 19.2
## 3 2016-01-06 18.3 22.1 8.17 16.8 8.57 9.49 15.0 16.6 19.2
## 4 2016-01-07 19.2 22.3 8.16 16.8 8.44 9.5 14.7 16.6 19.0
## 5 2016-01-08 18.9 22.2 8.14 16.7 8.5 9.45 14.3 16.6 19.2
## 6 2016-01-11 18.2 21.9 8.02 16.7 8.45 9.4 13.6 16.5 19.2
## 7 2016-01-12 18.3 21.8 7.97 16.4 8.43 9.4 13.7 16.4 19.1
## 8 2016-01-13 18.8 22.0 7.97 16.6 8.36 9.4 13.6 16.5 19.0
## 9 2016-01-14 18.8 21.8 7.99 16.2 8.23 9.41 13.4 16.6 19.0
## 10 2016-01-15 18.9 21.8 7.99 16.1 8.15 9.42 13.6 16.5 18.9
# Q4
number_of_na <- price_day %>%
map_df(~sum(is.na(.))) %>%
gather() %>%
filter(value!= 0)
glimpse(number_of_na)
## Rows: 53
## Columns: 2
## $ key <chr> "1587", "1598", "1760", "1776", "2025", "2069", "2429", "2630",…
## $ value <int> 555, 173, 505, 80, 170, 49, 20, 521, 197, 88, 216, 322, 234, 61…
# Q5
price_close <- price_day %>%
na.locf(fromLast = TRUE, na.rm = FALSE)
# check stocks with trailing NAs
check_trailing <- price_close %>%
map_df(~sum(is.na(.))) %>%
gather() %>%
filter(value!= 0)
# replace trailing with 0
price_close[is.na(price_close)] <- 0
# check stock price id 2025
tail(price_close$`2025`)
## [1] 0 0 0 0 0 0
# Q6
library(PerformanceAnalytics)
ret_day <- price_close %>%
mutate(date = as.Date(as.character(.$date),'%Y-%m-%d')) %>%
tk_xts(select = -date, date_var = date) %>%
Return.calculate(method = 'log')
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ret_day[1:11, 1:11]
## 1101 1102 1103 1104 1108
## 2016-01-04 NA NA NA NA NA
## 2016-01-05 0.009772065 -0.009423449 -0.004842624 -0.007092228 -0.001162115
## 2016-01-06 -0.013598251 -0.001805055 -0.008531435 -0.004756252 -0.003494471
## 2016-01-07 0.048633682 0.007649081 -0.001224740 0.000000000 -0.015285424
## 2016-01-08 -0.012598592 -0.005844027 -0.002453989 -0.002386636 0.007083855
## 2016-01-11 -0.038228073 -0.011791520 -0.014851758 -0.002392346 -0.005899722
## 2016-01-12 0.003834571 -0.005948313 -0.006253929 -0.018737327 -0.002369669
## 2016-01-13 0.024839290 0.011861453 0.000000000 0.009714709 -0.008338345
## 2016-01-14 0.000000000 -0.013698844 0.002506267 -0.024466052 -0.015672412
## 2016-01-15 0.005849525 0.000000000 0.000000000 -0.001859313 -0.009768087
## 2016-01-18 -0.011197131 -0.003684942 -0.007537724 -0.004975135 0.003674223
## 1109 1110 1201 1203 1210
## 2016-01-04 NA NA NA NA NA
## 2016-01-05 0.000000000 -0.003404838 -0.008968670 -0.004166673 -0.029043668
## 2016-01-06 -0.001053186 0.019588603 -0.003007521 0.000000000 -0.005252803
## 2016-01-07 0.001053186 -0.019588603 -0.002412546 -0.006808091 -0.017264705
## 2016-01-08 -0.005277057 -0.023465535 0.005420067 0.006808091 0.029695353
## 2016-01-11 -0.005305052 -0.047917640 -0.011477021 0.000000000 -0.009800797
## 2016-01-12 0.000000000 0.006571765 -0.006093864 -0.004184107 -0.015216966
## 2016-01-13 0.000000000 -0.013924739 0.009126927 -0.002623985 0.012587116
## 2016-01-14 0.001063264 -0.014121379 0.003023891 -0.002104156 -0.009923998
## 2016-01-15 0.001062135 0.017804625 -0.006056954 -0.004221642 0.007287213
## 2016-01-18 -0.002125399 0.003669729 0.046299070 0.000000000 -0.020000667
## 1213
## 2016-01-04 NA
## 2016-01-05 -0.019875516
## 2016-01-06 0.010355122
## 2016-01-07 -0.003685961
## 2016-01-08 0.000000000
## 2016-01-11 -0.006669161
## 2016-01-12 0.030027888
## 2016-01-13 -0.002890175
## 2016-01-14 -0.010181906
## 2016-01-15 0.002919710
## 2016-01-18 0.003637690
# Q7
ret_mon <- price_close %>%
mutate(date = as.Date(as.character(.$date),'%Y-%m-%d')) %>%
tk_xts(select = -date, date.var = `date`)
## Using column `date` for date_var.
ret_mon <- ret_mon %>%
to.period(period = 'months',
indexAt = 'lastof',
OHLC = FALSE) %>%
Return.calculate(method = 'log')
ret_mon[1:11, 1:11]
## 1101 1102 1103 1104 1108
## 2016-01-31 NA NA NA NA NA
## 2016-02-29 0.09416960 0.050086671 0.029631798 0.016610656 0.025075941
## 2016-03-31 0.04872223 0.063369614 0.012092046 0.067242306 0.039311420
## 2016-04-30 0.04342729 -0.022581605 -0.029270382 -0.079519931 -0.010256500
## 2016-05-31 -0.09072401 -0.145487929 -0.091974166 -0.029459981 -0.004592431
## 2016-06-30 0.06480558 0.115684399 -0.001357773 0.088229888 0.000000000
## 2016-07-31 0.09502693 0.045583749 0.065726674 0.036596422 0.011441772
## 2016-08-31 0.06691328 0.007598517 0.023885485 0.120462082 -0.006849342
## 2016-09-30 -0.01679429 -0.034038943 -0.003733670 0.014331849 -0.008050647
## 2016-10-31 0.05976386 0.005344309 -0.008766493 0.000000000 -0.017472780
## 2016-11-30 -0.02645268 0.017879387 -0.003780723 0.008306913 0.020930997
## 1109 1110 1201 1203 1210
## 2016-01-31 NA NA NA NA NA
## 2016-02-29 0.022353246 0.020946956 0.010719426 -0.013252248 0.106818338
## 2016-03-31 -0.007395702 0.046779908 0.078746867 -0.034196005 -0.062712617
## 2016-04-30 0.007395702 0.029572047 0.007749974 -0.002211167 0.051325020
## 2016-05-31 0.004201687 0.000000000 0.017347374 0.011554461 0.164175182
## 2016-06-30 -0.004201687 -0.026167209 0.038695304 -0.002190581 0.132507851
## 2016-07-31 0.034147136 0.019522666 -0.050909196 0.004376375 -0.085058251
## 2016-08-31 0.053478088 0.003327790 -0.040757992 0.024799115 0.021236319
## 2016-09-30 -0.043356544 0.009917437 -0.078177781 0.027830623 0.005714301
## 2016-10-31 -0.024466052 -0.025992800 0.005176888 0.002069323 0.059003287
## 2016-11-30 -0.005173317 0.000000000 0.008569033 0.020461072 0.023884373
## 1213
## 2016-01-31 NA
## 2016-02-29 0.067139303
## 2016-03-31 0.131136182
## 2016-04-30 -0.053567746
## 2016-05-31 0.197906831
## 2016-06-30 -0.107265113
## 2016-07-31 -0.009286196
## 2016-08-31 -0.069409451
## 2016-09-30 -0.064538521
## 2016-10-31 0.058268908
## 2016-11-30 0.049089610
# Q8
stock_50_cap <- stock_day %>%
select(id, name, date, cap_share) %>%
filter(date == "2016-12-30") %>%
mutate(year1 = year(date)) %>%
select(date, year1, cap_share, id, name) %>%
arrange(desc(cap_share)) %>%
slice(1:50)
stock_50_cap
## # A tibble: 50 x 5
## date year1 cap_share id name
## <date> <dbl> <chr> <dbl> <chr>
## 1 2016-12-30 2016 5.365 2317 Hon Hai Precision
## 2 2016-12-30 2016 3.923 6505 Formosa Petrochem.
## 3 2016-12-30 2016 2.895 2412 Chunghwa Telecom
## 4 2016-12-30 2016 2.226 2882 Cathay Holdings
## 5 2016-12-30 2016 2.088 1301 Formosa Plastics
## 6 2016-12-30 2016 2.076 1303 Nan Ya Plastics
## 7 2016-12-30 2016 2.075 1326 Formosa Chem & Fibre
## 8 2016-12-30 2016 17.304 2330 TSMC
## 9 2016-12-30 2016 1.919 2881 Fubon FHC
## 10 2016-12-30 2016 1.869 3008 Largan
## # … with 40 more rows
# Q9
stock_10_cap <- stock_50_cap %>%
slice(1:10)
stock_10_cap %>%
ggplot(aes(x = reorder(id, cap_share, desc), y = cap_share)) +
geom_col(fill = 'red') +
labs (x = 'id') +
labs (y = 'Market share %') +
labs(title = ' The ten largest market capitalization stocks in TWSE as of 2016') +
geom_text(aes(label = cap_share), vjust = -0.5, size = 3)

# Q10
ret_mon['2017::2018', as.character(stock_50_cap$id[1:5])] %>%
tk_tbl(preserve_index = TRUE, rename_index = 'date') %>%
gather(asset, returns, -date) %>%
group_by(asset) %>%
ggplot(aes(x = returns, color = asset)) +
geom_density(alpha = 1) +
ggtitle('Monthly Returns Density from 2017-2018') +
xlab('monthly returns') +
ylab('distribution')

# Q11
str(stock_day)
## tibble [609,010 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ id : num [1:609010] 1101 1102 1103 1104 1108 ...
## $ name : chr [1:609010] "Taiwan Cement" "Asia Cement" "Chia Hsin Cement" "Universal Cement" ...
## $ TSE Sector : chr [1:609010] "01" "01" "01" "01" ...
## $ date : Date[1:609010], format: "2016-01-04" "2016-01-04" ...
## $ Open : num [1:609010] 19.14 23.33 8.41 17.41 8.69 ...
## $ High : num [1:609010] 19.14 23.33 8.41 17.41 8.69 ...
## $ Low : num [1:609010] 18.33 22.09 8.28 16.98 8.61 ...
## $ Close : num [1:609010] 18.33 22.39 8.28 16.98 8.61 ...
## $ Volume : num [1:609010] 10437 6852 133 231 81 ...
## $ Market Cap.: num [1:609010] 96550 88237 7282 13602 4047 ...
## $ Shares : num [1:609010] 3692175 3361447 774780 628289 404738 ...
## $ cap_share : chr [1:609010] "0.406" "0.371" "0.031" "0.057" ...
## - attr(*, "spec")=
## .. cols(
## .. CO_ID = col_double(),
## .. CoName = col_character(),
## .. `TSE Sector` = col_character(),
## .. Date = col_double(),
## .. Open = col_double(),
## .. High = col_double(),
## .. Low = col_double(),
## .. Close = col_double(),
## .. Volume = col_double(),
## .. `Market Cap.` = col_double(),
## .. Shares = col_double(),
## .. `MV%` = col_character()
## .. )
stock_id_sector <- stock_day %>%
select(id, `TSE Sector`) %>%
mutate(`TSE Sector` = as.factor(`TSE Sector`)) %>%
unique() %>%
mutate(stock_id = id) %>%
select(stock_id, `TSE Sector`)
stock_id_sector %>%
ggplot(aes(x = `TSE Sector`)) +
geom_bar() +
geom_text(stat = 'count', aes(label = ..count..), vjust = -0.5, size = 3)
