library(readr)
## Warning: package 'readr' was built under R version 3.5.3
etf4_csv<-read_csv("ETF4_2000_2018_d.csv", locale = locale(encoding= 'big5'))
## Parsed with column specification:
## cols(
##   證券代碼 = col_character(),
##   簡稱 = col_character(),
##   日期 = col_double(),
##   `未調整收盤價(元)` = col_double(),
##   `當日均價(元)` = col_double()
## )
etf4.c<-etf4_csv[, c(-2, -4)]
etf4.c<-etf4.c[-1,]
colnames(etf4.c)<-c("id", "date", "price")
library(reshape2)
etf4.reorder = dcast(etf4.c, date~id)
## Using price as value column: use value.var to override.
etf4.reorder$date<-as.Date(as.character(etf4.reorder$date), "%Y%m%d")
dim(etf4.reorder)
## [1] 2474    5
head(etf4.reorder)
##         date  0050  0056 006205 00646
## 1 2009-01-05    NA 14.00     NA    NA
## 2 2009-01-06 34.21 14.02     NA    NA
## 3 2009-01-07 34.59 14.28     NA    NA
## 4 2009-01-08 33.21 13.86     NA    NA
## 5 2009-01-09 32.32 13.61     NA    NA
## 6 2009-01-10 31.91 13.55     NA    NA
str(etf4.reorder)
## 'data.frame':    2474 obs. of  5 variables:
##  $ date  : Date, format: "2009-01-05" "2009-01-06" ...
##  $ 0050  : num  NA 34.2 34.6 33.2 32.3 ...
##  $ 0056  : num  14 14 14.3 13.9 13.6 ...
##  $ 006205: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ 00646 : num  NA NA NA NA NA NA NA NA NA NA ...
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
etf4.xts<-xts(etf4.reorder[,-1], order.by = etf4.reorder$date)
head(etf4.xts)
##             0050  0056 006205 00646
## 2009-01-05    NA 14.00     NA    NA
## 2009-01-06 34.21 14.02     NA    NA
## 2009-01-07 34.59 14.28     NA    NA
## 2009-01-08 33.21 13.86     NA    NA
## 2009-01-09 32.32 13.61     NA    NA
## 2009-01-10 31.91 13.55     NA    NA
tail(etf4.xts)
##             0050  0056 006205 00646
## 2018-12-22 74.75 24.15  25.08 22.93
## 2018-12-24 74.67 24.16  25.25 22.72
## 2018-12-25 73.57 23.90  24.90 22.51
## 2018-12-26 73.87 23.83  25.16 22.13
## 2018-12-27 74.81 23.96  25.30 22.95
## 2018-12-28 75.21 23.92  25.24 23.16
str(etf4.xts)
## An 'xts' object on 2009-01-05/2018-12-28 containing:
##   Data: num [1:2474, 1:4] NA 34.2 34.6 33.2 32.3 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:4] "0050" "0056" "006205" "00646"
##   Indexed by objects of class: [Date] TZ: UTC
##   xts Attributes:  
##  NULL
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.5.3
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.5.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
etf4_monthly <- to.monthly(etf4.xts, indexAt = "lastof", OHLC=FALSE)
## Warning in to.period(x, "months", indexAt = indexAt, name = name, ...):
## missing values removed from data
head(etf4_monthly)
##             0050  0056 006205 00646
## 2015-12-31 60.75 21.81  31.97 20.07
## 2016-01-31 59.51 21.48  26.21 19.21
## 2016-02-29 61.25 22.39  25.90 19.22
## 2016-03-31 64.82 22.44  27.83 19.73
## 2016-04-30 61.63 21.58  27.17 19.82
## 2016-05-31 63.10 21.86  27.10 20.31
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 3.5.3
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(magrittr)
etf4_returns_xts <-Return.calculate(etf4_monthly, method = "log") %>%
  na.omit()
head(etf4_returns_xts)
##                   0050        0056       006205         00646
## 2016-01-31 -0.02062272 -0.01524631 -0.198656946 -0.0437951860
## 2016-02-29  0.02881948  0.04149216 -0.011898049  0.0005204268
## 2016-03-31  0.05665035  0.00223065  0.071871607  0.0261889165
## 2016-04-30 -0.05046543 -0.03907812 -0.024001152  0.0045512089
## 2016-05-31  0.02357200  0.01289152 -0.002579696  0.0244218467
## 2016-06-30  0.03335201  0.03196311 -0.017119882 -0.0249265149
dim(etf4_returns_xts)
## [1] 36  4
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
plot(etf4_returns_xts, xaxt='n')
axis(1, index(etf4_returns_xts), format(index(etf4_returns_xts), "%Y/%m"))

library(ggplot2)
etf4_ret.df1<-fortify(etf4_returns_xts)
head(etf4_ret.df1)
##        Index        0050        0056       006205         00646
## 1 2016-01-31 -0.02062272 -0.01524631 -0.198656946 -0.0437951860
## 2 2016-02-29  0.02881948  0.04149216 -0.011898049  0.0005204268
## 3 2016-03-31  0.05665035  0.00223065  0.071871607  0.0261889165
## 4 2016-04-30 -0.05046543 -0.03907812 -0.024001152  0.0045512089
## 5 2016-05-31  0.02357200  0.01289152 -0.002579696  0.0244218467
## 6 2016-06-30  0.03335201  0.03196311 -0.017119882 -0.0249265149
plot(etf4_ret.df1$`0050`, etf4_ret.df1$`00646`, pch=20,
     col = 'darkred', main = '0050 vs. 00646 monthly returns',
     xlab = '0050', ylab = '00646 S&P500')

#畫出etf4_returns_xts的圖 探討兩個數值之間的關係使用plot

xaxt=‘n’-讓x軸消失

axis 1-控制下方的X軸

format(index(etf4_returns_xts), “%Y/%m”) - 以年/閱資料的形式呈現

使用fortify將xts轉換為df,繪製0050和00646的散點圖

pch(plotting character)- 代表點的圖形

col(color)- 代表顏色

pch與col可分別調整資料當中點的形狀和顏色

main - 代表圖最上方的標題

xlab - x軸標題/ylab - y軸標題

(*要加上13-37行才可運作)

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages ------------------------------------------------------ tidyverse 1.2.1 --
## √ tibble  2.0.1       √ dplyr   0.8.0.1
## √ tidyr   0.8.3       √ stringr 1.4.0  
## √ purrr   0.3.1       √ forcats 0.4.0
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts --------------------------------------------------------- tidyverse_conflicts() --
## x tidyr::extract()   masks magrittr::extract()
## x dplyr::filter()    masks stats::filter()
## x dplyr::first()     masks xts::first()
## x dplyr::lag()       masks stats::lag()
## x dplyr::last()      masks xts::last()
## x purrr::set_names() masks magrittr::set_names()
library(ggplot2)

library(lubridate)
## Warning: package 'lubridate' was built under R version 3.5.3
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
etf4_ret.df2 <- cbind(etf4_ret.df1, month=month(index(etf4_returns_xts)), 
                      year=year(index(etf4_returns_xts)))

ggplot(data = etf4_ret.df2) +
  geom_point(mapping = aes(x = etf4_ret.df2$`0050`, y = etf4_ret.df2$`0056`, color = month))

ggplot(data = etf4_ret.df2) +
  geom_point(mapping = aes(x = etf4_ret.df2$`0050`, y = etf4_ret.df2$`0056`, size = month))

ggplot(data = etf4_ret.df2) +
  geom_point(mapping = aes(x = etf4_ret.df2$`0050`, y = etf4_ret.df2$`0056`, alpha = month))

library(PerformanceAnalytics)
library(magrittr)
etf4_ret<-coredata(etf4_monthly[-1,])/coredata(etf4_monthly[-dim(etf4_monthly)[1],])-1
head(etf4_ret)
##             0050        0056       006205         00646
## [1,] -0.02041152 -0.01513067 -0.180168908 -0.0428500249
## [2,]  0.02923878  0.04236499 -0.011827547  0.0005205622
## [3,]  0.05828571  0.00223314  0.074517375  0.0265348595
## [4,] -0.04921321 -0.03832442 -0.023715415  0.0045615813
## [5,]  0.02385202  0.01297498 -0.002576371  0.0247225025
## [6,]  0.03391442  0.03247941 -0.016974170 -0.0246184146
class(etf4_ret)
## [1] "matrix"
etf4_ret
##                0050         0056       006205         00646
##  [1,] -0.0204115226 -0.015130674 -0.180168908 -0.0428500249
##  [2,]  0.0292387834  0.042364991 -0.011827547  0.0005205622
##  [3,]  0.0582857143  0.002233140  0.074517375  0.0265348595
##  [4,] -0.0492132058 -0.038324421 -0.023715415  0.0045615813
##  [5,]  0.0238520201  0.012974977 -0.002576371  0.0247225025
##  [6,]  0.0339144216  0.032479414 -0.016974170 -0.0246184146
##  [7,]  0.0554874310  0.080194949  0.012012012  0.0312973246
##  [8,]  0.0124891083  0.013125513  0.024480712  0.0024473813
##  [9,]  0.0192197361  0.013765182 -0.038015930 -0.0219726562
## [10,]  0.0153391500 -0.053913738  0.020700038 -0.0054917624
## [11,] -0.0059598060 -0.018151119  0.057522124  0.0411646586
## [12,]  0.0006971556 -0.008598452 -0.058577406  0.0385728062
## [13,]  0.0197854257  0.019947962  0.018148148 -0.0129990715
## [14,] -0.0012296762  0.039115646  0.003273918  0.0145813735
## [15,]  0.0103967168  0.013093290 -0.031182016  0.0000000000
## [16,]  0.0083942594 -0.012116317 -0.015344311 -0.0023180343
## [17,]  0.0296723953  0.015944399  0.035347777  0.0041821561
## [18,]  0.0445951232  0.027766600  0.060939794  0.0143452106
## [19,]  0.0154787168  0.021534847  0.043944637  0.0159671533
## [20,]  0.0195451752  0.000000000  0.038448790 -0.0094297261
## [21,] -0.0227875573 -0.006132618 -0.006383658  0.0385312783
## [22,]  0.0502159161 -0.015426147  0.024734982  0.0139676997
## [23,] -0.0300751880 -0.020368194  0.010344828  0.0111924236
## [24,] -0.0064195736  0.000000000 -0.004964319  0.0166028097
## [25,]  0.0295014019  0.053178729  0.082008107  0.0335008375
## [26,] -0.0197750148 -0.012528474 -0.036599424 -0.0113452188
## [27,]  0.0013288234 -0.001922338 -0.056536045 -0.0385245902
## [28,] -0.0384847388 -0.030046225 -0.016487001  0.0166240409
## [29,]  0.0127979925  0.040508340 -0.001611863  0.0310272537
## [30,]  0.0012388503 -0.023664122 -0.083629319  0.0166734445
## [31,]  0.0570403366  0.057857701  0.009866103  0.0340000000
## [32,]  0.0155683015  0.022172949 -0.038381019  0.0398452611
## [33,]  0.0051867220 -0.013738250  0.030478955  0.0000000000
## [34,] -0.1145510836 -0.130865103 -0.076408451 -0.0662202381
## [35,] -0.0012950013  0.032053986  0.001143729  0.0139442231
## [36,] -0.0247665975 -0.022476502 -0.038842346 -0.0899803536
etf4_ret.tmp<-data.frame(date = index(etf4_returns_xts), etf4_ret)
head(etf4_ret.tmp)
##         date       X0050       X0056      X006205        X00646
## 1 2016-01-31 -0.02041152 -0.01513067 -0.180168908 -0.0428500249
## 2 2016-02-29  0.02923878  0.04236499 -0.011827547  0.0005205622
## 3 2016-03-31  0.05828571  0.00223314  0.074517375  0.0265348595
## 4 2016-04-30 -0.04921321 -0.03832442 -0.023715415  0.0045615813
## 5 2016-05-31  0.02385202  0.01297498 -0.002576371  0.0247225025
## 6 2016-06-30  0.03391442  0.03247941 -0.016974170 -0.0246184146

運用ggplot繪圖

安裝並執行tidyverse、ggplot2、lubridate

將xts轉換為ggplot可以使用的數據資料

將xts中的日期拆為年、月和日的資料數列

geom(geometric objects) - 代表幾何的意思

geom_point表示散佈圖

aes(aesthetic mappings)是用來綁定至 X 軸與 Y 軸

size(大小)/alpha(調整大小的數值)

ggplot(data = etf4_ret.df2) + geom_point(mapping = aes(x = etf4_ret.df2\(`0050`, y = etf4_ret.df2\)0056, color = month)) - 點的顏色的轉換

ggplot(data = etf4_ret.df2) + geom_point(mapping = aes(x = etf4_ret.df2\(`0050`, y = etf4_ret.df2\)0056, size = month)) - 點的顏色由大至小

ggplot(data = etf4_ret.df2) + geom_point(mapping = aes(x = etf4_ret.df2\(`0050`, y = etf4_ret.df2\)0056, alpha = month))- 點的顏色由深至淺

將etf4_ret轉換成etf4_ret.tmp的暫存檔

(*要加上90-94行才可運作)

etf4_ret.df<-fortify(etf4_returns_xts, melt=TRUE)
head(etf4_ret.df)
##        Index Series       Value
## 1 2016-01-31   0050 -0.02062272
## 2 2016-02-29   0050  0.02881948
## 3 2016-03-31   0050  0.05665035
## 4 2016-04-30   0050 -0.05046543
## 5 2016-05-31   0050  0.02357200
## 6 2016-06-30   0050  0.03335201
p<-ggplot(etf4_ret.df, aes(x = Index, y = Value))+
  geom_line(aes(color = Series), size = 1)
p + scale_x_date(date_labels = "%Y/%m")

p

#將數據轉換為長格式並化成圖 etf4_ret.df<-fortify(etf4_returns_xts, melt=TRUE)將數據轉換為長格式

也可以使用etf4_ret.tmp<-etf4_returns_xts %>%、data.frame(date=index(.)) %>% 、remove_rownames() %>%、gather(asset, return, -date) 將數據轉換為長格式

利用melt將寬資料轉換成長資料的形式

p<-ggplot(etf4_ret.df, aes(x = Index, y = Value))+ geom_line(aes(color = Series), size = 1) 將資料存於p當中

geom_line表示線圖

aes(aesthetic mappings)是用來綁定至 X 軸與 Y 軸

scale_x_date - 日期方面的設置函數

scale_x_date(date_labels = “%Y/%m”) - 為調整日期顯示的格式

q<-etf4_ret.df %>%
  ggplot(aes(x =Value, fill = Series)) +
  geom_histogram(alpha = 0.45, binwidth = .005) +
  ggtitle("Monthly Returns")
q + facet_wrap(~Series)+ theme_update(plot.title = element_text(hjust = 0.5))

q

#繪製成直條圖 geom_histogram 為直條圖

binwidth(調整寬度參數)

ggtitle(圖表標題)

facet_wrap函數 - 以分類變數輸入facet,方便比較不同分類下的數值趨勢的差異

theme_update函數 - 修改主題的函數,用於增量更新

hjust - 控制橫軸並將值限定在0-1

q<-etf4_ret.df %>% ggplot(aes(x =Value, fill = Series)) + geom_histogram(alpha = 0.45, binwidth = .005) + ggtitle(“Monthly Returns”) - 將資料存於q當中

etf4_ret.df %>%
  ggplot(aes(x = Value, colour = Series)) +
  geom_density(alpha = 1) +
  ggtitle("Monthly Returns Density Since 2016") +
  xlab("monthly returns") +
  ylab("distribution") +
  theme_update(plot.title = element_text(hjust = 0.5))

#繪製密度圖 %>%(將多個函數呼叫,串連的流程)

geom_density - 密度曲線圖所使用的圖層函數

ggtitle(圖表標題)

xlab - x軸標題/ylab - y軸標題

theme_update函數 - 修改主題的函數,用於增量更新

hjust - 控制橫軸並將值限定在0-1

etf4_ret.df %>%
  ggplot(aes(x = Value)) +
  geom_density(aes(color = Series), alpha = 1) +
  geom_histogram(aes(fill = Series), alpha = 0.45, binwidth = .01) +
  guides(fill = FALSE) +
  facet_wrap(~Series) +
  ggtitle("Monthly Returns Since 2016") +
  xlab("monthly returns") +
  ylab("distribution") +
  theme_update(plot.title = element_text(hjust = 0.5))

#將密度圖和直條圖繪製成同意張圖 geom_density - 密度曲線圖所使用的圖層函數

geom_histogram 為直條圖

alpha(調整大小的數值)/binwidth(調整寬度參數)

facet_wrap函數 - 以分類變數輸入facet,方便比較不同分類下的數值趨勢的差異

fill - 改變條形圖的填充色

xlab - x軸標題/ylab - y軸標題

hjust - 控制橫軸並將值限定在0-1

theme_update函數 - 修改主題的函數,用於增量更新

library(plotly)
## Warning: package 'plotly' was built under R version 3.5.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p1 = plot_ly(etf4_ret.tmp, x= ~date, y= ~X0050, name = "0050", type = "scatter",mode = "lines") %>% 
  add_trace(y=~X0056, name = '0056',mode = 'lines+markers')%>%
  layout(xaxis = list(title = 'year'), yaxis = list(title= 'monthly returns'))
p1

#視覺化圖表 plotly - 是個交互式視覺化的工具

scatter(分散式)

add_trace - 用在已有的圖形上,添加新的圖形

layout - 用來設置圖形外觀